diff --git a/edamfu/edamfu/__init__.py b/edamfu/edamfu/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/edamfu/edamfu/cli.py b/edamfu/edamfu/cli.py
new file mode 100644
index 0000000..e69de29
diff --git a/edamfu/edamfu/core.py b/edamfu/edamfu/core.py
new file mode 100644
index 0000000..efbb003
--- /dev/null
+++ b/edamfu/edamfu/core.py
@@ -0,0 +1,23 @@
+import shutil
+import tempfile
+
+from edamfu.tree import reorder_root, add_comments
+
+from edamfu.utils import (
+ escape_irrelevant_xml_entities_in_text,
+ unescape_irrelevant_xml_entities_in_text,
+ prettify_xml,
+)
+
+
+def reformat(input_file_path, output_file_path):
+ temp_edam_file = tempfile.NamedTemporaryFile(delete=False)
+ temp_edam_file.close()
+ shutil.copy2(input_file_path, temp_edam_file.name)
+ escape_irrelevant_xml_entities_in_text(temp_edam_file.name)
+ reorder_root(temp_edam_file.name)
+ add_comments(temp_edam_file.name)
+ unescape_irrelevant_xml_entities_in_text(temp_edam_file.name)
+ prettify_xml(temp_edam_file.name)
+ shutil.copy2(temp_edam_file.name, output_file_path)
+ return output_file_path
diff --git a/edamfu/edamfu/reorder.py b/edamfu/edamfu/reorder.py
new file mode 100644
index 0000000..6cc9d7f
--- /dev/null
+++ b/edamfu/edamfu/reorder.py
@@ -0,0 +1,11 @@
+import shutil
+from edamfu.core import reformat
+
+# Input file:
+xml_file_path = (
+ "/home/hmenager/edamfu/tests/EDAM_dev.owl" # Replace with the path to your XML file
+)
+# Processed file
+sorted_path = "/home/hmenager/edamfu/tests/EDAM_dev.processed.owl"
+
+reformat(xml_file_path, sorted_path)
diff --git a/edamfu/edamfu/reorder_lxml.py b/edamfu/edamfu/reorder_lxml.py
new file mode 100644
index 0000000..81109c4
--- /dev/null
+++ b/edamfu/edamfu/reorder_lxml.py
@@ -0,0 +1,122 @@
+from lxml import etree
+import re
+from copy import copy
+
+def get_element_sort_key(elem, order_mapping):
+ first_key = order_mapping.get(elem.tag, {}).get("element_order", 0)
+ if elem.tag == "{http://www.w3.org/2002/07/owl#}Class":
+ secondary_key = elem.get("{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about")
+ elif elem.tag == "{http://www.w3.org/2002/07/owl#}Axiom":
+ annotated_source = elem.find("{http://www.w3.org/2002/07/owl#}annotatedSource")
+ secondary_key = annotated_source.get("{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource") if annotated_source is not None else "zzz"
+ else:
+ secondary_key = "zzz"
+ return (first_key, secondary_key)
+
+
+## A
+## C
+## C1
+## C2
+## B
+## B1
+## B2
+def reorder_elements(element, order_mapping):
+ ## element=A
+ sorted_elements = sorted(element, key=lambda elem: get_element_sort_key(elem, order_mapping))
+ ## element=A, sorted_elements=[B,C]
+ new_element = etree.Element(element.tag, attrib=element.attrib)
+ if element.text:
+ new_element.text = etree.CDATA(element.text)
+ for child in sorted_elements:
+ child = reorder_elements(child, order_mapping)
+ new_element.extend(sorted_elements)
+ print(new_element.tag, len(new_element.getchildren()))
+ ontology = new_element.iter("{http://www.w3.org/2002/07/owl#}Ontology")
+ return new_element
+
+def add_comments(xml_file_path):
+ tree = etree.parse(xml_file_path)
+ root = tree.getroot()
+ for class_element in root.findall("{http://www.w3.org/2002/07/owl#}*"):
+ class_uri = class_element.get("{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about")
+ root.insert(list(root).index(class_element), etree.Comment(f" {class_uri} "))
+ with open(xml_file_path, "wb") as f:
+ tree.write(f, encoding="utf-8", xml_declaration=True)
+ print(f"Comments added to '{xml_file_path}'.")
+
+
+def reorder_root(xml_file_path, order_mapping, namespaces):
+ # etree.register_namespace(None, "http://edamontology.org/")
+ # etree.register_namespace("dc", "http://purl.org/dc/elements/1.1/")
+ # etree.register_namespace("dcterms", "http://purl.org/dc/terms/")
+ # etree.register_namespace("owl", "http://www.w3.org/2002/07/owl#")
+ # etree.register_namespace("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#")
+ # etree.register_namespace("skos", "http://www.w3.org/2004/02/skos/core#")
+ # etree.register_namespace("xml", "http://www.w3.org/XML/1998/namespace")
+ # etree.register_namespace("xsd", "http://www.w3.org/2001/XMLSchema#")
+ # etree.register_namespace("doap", "http://usefulinc.com/ns/doap#")
+ # etree.register_namespace("foaf", "http://xmlns.com/foaf/0.1/")
+ # etree.register_namespace("rdfs", "http://www.w3.org/2000/01/rdf-schema#")
+ # etree.register_namespace("oboInOwl", "http://www.geneontology.org/formats/oboInOwl#")
+ # etree.register_namespace("oboLegacy", "http://purl.obolibrary.org/obo/")
+
+ tree = etree.parse(xml_file_path, parser=etree.XMLParser(resolve_entities=False, remove_comments=True))
+ root = tree.getroot()
+ etree.strip_elements(root, etree.Comment, with_tail=True)
+ new_root = reorder_elements(root, order_mapping)
+ ontology = new_root.iter("{http://www.w3.org/2002/07/owl#}Ontology")
+ for el in ontology:
+ print("##", el)
+
+ #escape_special_characters(new_root)
+
+ sorted_path = "/home/hmenager/edamfu/tests/EDAM_dev.sorted-lxml.owl"
+
+ with open(sorted_path, "wb") as f:
+ f.write(etree.tostring(new_root, xml_declaration=True))
+
+ print(f"XML elements reordered and saved to '{sorted_path}'.")
+ return sorted_path
+
+def prettify_xml(file_path):
+ with open(file_path, 'r') as file:
+ file_content = file.read()
+ modified_content = file_content.replace('" />', '"/>')
+ modified_content = modified_content.replace('--><', '-->\n\n <')
+ modified_content = modified_content.replace('>\n \n\n\n \n\n
+"""
+
+
+OBJECT_PROPERTY_COMMENT = """
+
+
+
+
+"""
+
+CLASSES_COMMENT = """
+
+
+
+
+"""
+
+
+def escape_irrelevant_xml_entities_in_text(file_path):
+ """
+ Escapes "legacy" XML entities in the text of the file
+ This is useful to avoid adding differences due to the automated conversion
+ of these entities by the XML parser
+ Note: this function must be called before the XML file is parsed, and once
+ the processing is done, `unescape_irrelevant_xml_entities_in_text` must be
+ call to revert the temporary changes
+ :param file_path: the path to the file to process
+ """
+ with open(file_path, "r") as file:
+ file_content = file.read()
+ for entity, replacement in IRRELEVANT_ENTITIES_DICT.items():
+ file_content = file_content.replace(entity, replacement)
+ with open(file_path, "w") as file:
+ file.write(file_content)
+
+
+def unescape_irrelevant_xml_entities_in_text(file_path):
+ """
+ Reverse function to unescapes "legacy" XML entities in the text of the file,
+ see `escape_irrelevant_xml_entities_in_text` for more details.
+ :param file_path: the path to the file to process
+ """
+ with open(file_path, "r") as file:
+ file_content = file.read()
+ for entity, replacement in REVERSED_IRRELEVANT_ENTITIES_DICT.items():
+ file_content = file_content.replace(entity, replacement)
+ with open(file_path, "w") as file:
+ file.write(file_content)
+
+
+def add_after_last(s, old, new):
+ pattern = re.compile(old, re.DOTALL)
+ matches = pattern.findall(s)
+ if matches:
+ li = s.rsplit(matches[-1], 1)
+ new_s = matches[-1] + new
+ return new_s.join(li)
+
+
+def prettify_xml(file_path):
+ """
+ Adjust spaces in the file to follow EDAM source "conventions"
+ :param file_path: the path to the file to process
+ """
+ with open(file_path, "r") as file:
+ file_content = file.read()
+ # no trailing whitespaces at the end of an element's attributes list
+ modified_content = file_content.replace('" />', '"/>')
+ # normalize line breaks after an element comment
+ modified_content = modified_content.replace("--><", "-->\n\n <")
+ # normalize line breaks before an element comment
+ modified_content = modified_content.replace(">\n \n",
+ "\n",
+ " \n",
+ " \n",
+ " beta12orEarlier\n",
+ " beta12orEarlier\n",
+ " \n",
+ " A bioinformatics package or tool, e.g. a standalone application or web service.\n",
+ " \n",
+ " \n",
+ " Tool\n",
+ " true\n",
+ " \n",
+ " \n",
+ "\n",
+ "\n",
+ " \n",
+ "\n",
+ " \n",
+ " \n",
+ " beta12orEarlier\n",
+ " beta12orEarlier\n",
+ " \n",
+ " A digital data archive typically based around a relational model but sometimes using an object-oriented, tree or graph-based model.\n",
+ " \n",
+ " \n",
+ " Database\n",
+ " true\n",
+ " \n",
+ "\n",
+ " \n",
+ "\n",
+ " \n",
+ "\n",
+ "\"\"\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "b7e77467-4816-4447-8828-a88d05adfce0",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "\n",
+ "\t\n",
+ " \n",
+ "\t\n",
+ "\t\t\n",
+ " \n",
+ "\t\t\n",
+ "\t\t\n",
+ " \n",
+ "\t\t\n",
+ "\t\t\n",
+ " \n",
+ "\t\tbeta12orEarlier\n",
+ "\t\t\n",
+ " \n",
+ "\t\tbeta12orEarlier\n",
+ "\t\t\n",
+ " \n",
+ "\t\t\n",
+ "\t\t\n",
+ " \n",
+ "\t\tA digital data archive typically based around a relational model but sometimes using an object-oriented, tree or graph-based model.\n",
+ "\t\t\n",
+ " \n",
+ "\t\t\n",
+ "\t\t\n",
+ " \n",
+ "\t\t\n",
+ "\t\t\n",
+ " \n",
+ "\t\tDatabase\n",
+ "\t\t\n",
+ " \n",
+ "\t\ttrue\n",
+ "\t\t\n",
+ " \n",
+ "\t\n",
+ "\t\n",
+ " \n",
+ "\t\n",
+ "\t\t\n",
+ " \n",
+ "\t\t\n",
+ "\t\t\n",
+ " \n",
+ "\t\t\n",
+ "\t\t\n",
+ " \n",
+ "\t\tbeta12orEarlier\n",
+ "\t\t\n",
+ " \n",
+ "\t\tbeta12orEarlier\n",
+ "\t\t\n",
+ " \n",
+ "\t\t\n",
+ "\t\t\n",
+ " \n",
+ "\t\tA bioinformatics package or tool, e.g. a standalone application or web service.\n",
+ "\t\t\n",
+ " \n",
+ "\t\t\n",
+ "\t\t\n",
+ " \n",
+ "\t\t\n",
+ "\t\t\n",
+ " \n",
+ "\t\tTool\n",
+ "\t\t\n",
+ " \n",
+ "\t\ttrue\n",
+ "\t\t\n",
+ " \n",
+ "\t\n",
+ "\t\n",
+ " \n",
+ "\t\n",
+ "\t\t\n",
+ " \n",
+ "\t\t\n",
+ "\t\t\n",
+ " \n",
+ "\t\n",
+ "\t\n",
+ "\n",
+ "\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "from rdflib import Graph, URIRef, RDF, OWL, Namespace\n",
+ "\n",
+ "from rdflib.plugins.serializers.rdfxml import XMLSerializer\n",
+ "\n",
+ "from io import BytesIO\n",
+ "\n",
+ "import xml.dom.minidom\n",
+ "\n",
+ "def create_owl_elements(input_owl_string):\n",
+ " try:\n",
+ " # Define custom namespace prefixes\n",
+ " edam_namespace = Namespace(\"http://edamontology.org/\")\n",
+ " dc_namespace = Namespace(\"http://purl.org/dc/elements/1.1/\")\n",
+ " dcterms_namespace = Namespace(\"http://purl.org/dc/terms/\")\n",
+ " owl_namespace = Namespace(\"http://www.w3.org/2002/07/owl#\")\n",
+ " rdf_namespace = Namespace(\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\")\n",
+ " skos_namespace = Namespace(\"http://www.w3.org/2004/02/skos/core#\")\n",
+ " xml_namespace = Namespace(\"http://www.w3.org/XML/1998/namespace\")\n",
+ " xsd_namespace = Namespace(\"http://www.w3.org/2001/XMLSchema#\")\n",
+ " doap_namespace = Namespace(\"http://usefulinc.com/ns/doap#\")\n",
+ " foaf_namespace = Namespace(\"http://xmlns.com/foaf/0.1/\")\n",
+ " rdfs_namespace = Namespace(\"http://www.w3.org/2000/01/rdf-schema#\")\n",
+ " oboInOwl_namespace = Namespace(\"http://www.geneontology.org/formats/oboInOwl#\")\n",
+ " oboLegacy_namespace = Namespace(\"http://purl.obolibrary.org/obo/\")\n",
+ "\n",
+ " # Load the RDF graph from the input string with explicit namespaces\n",
+ " namespaces = {\n",
+ " \"\": edam_namespace,\n",
+ " \"dc\": dc_namespace,\n",
+ " \"dcterms\": dcterms_namespace,\n",
+ " \"owl\": owl_namespace,\n",
+ " \"rdf\": rdf_namespace,\n",
+ " \"skos\": skos_namespace,\n",
+ " \"xml\": xml_namespace,\n",
+ " \"xsd\": xsd_namespace,\n",
+ " \"doap\": doap_namespace,\n",
+ " \"foaf\": foaf_namespace,\n",
+ " \"rdfs\": rdfs_namespace,\n",
+ " \"oboInOwl\": oboInOwl_namespace,\n",
+ " \"oboLegacy\": oboLegacy_namespace, \n",
+ " }\n",
+ " graph = Graph()\n",
+ " graph.parse(data=input_owl_string, format=\"xml\", namespaces=namespaces)\n",
+ "\n",
+ "\n",
+ " # Create a new graph for the result\n",
+ " result_graph = Graph()\n",
+ " # Replace namespace prefixes in the result graph to use the \"canonical\" ones\n",
+ " for prefix, namespace in namespaces.items():\n",
+ " result_graph.bind(prefix, namespace, replace=True)\n",
+ "\n",
+ " # Iterate through each class in the input graph\n",
+ " for class_uri in graph.subjects(RDF.type, OWL.Class):\n",
+ " class_element = URIRef(class_uri)\n",
+ "\n",
+ " # Add owl:Class triple\n",
+ " result_graph.add((class_element, RDF.type, OWL.Class))\n",
+ "\n",
+ " # Add other properties to the owl:Class element\n",
+ " for triple in graph.triples((class_uri, None, None)):\n",
+ " result_graph.add((class_element, triple[1], triple[2]))\n",
+ "\n",
+ " # Create an XMLSerializer instance\n",
+ " serializer = XMLSerializer(result_graph)\n",
+ "\n",
+ " # Use BytesIO to create an in-memory file-like object for bytes\n",
+ " stream = BytesIO()\n",
+ "\n",
+ " # Serialize the modified graph to the in-memory stream\n",
+ " serializer.serialize(stream)\n",
+ "\n",
+ " # Get the resulting string from the stream\n",
+ " result_string = xml.dom.minidom.parseString(stream.getvalue()).toprettyxml()\n",
+ " \n",
+ " return result_string\n",
+ "\n",
+ " except Exception as e:\n",
+ " print(f\"An error occurred: {e}\")\n",
+ " return None\n",
+ "\n",
+ "# Perform reformatting and get the resulting OWL string\n",
+ "result_owl_string = create_owl_elements(input_owl_string)\n",
+ "\n",
+ "if result_owl_string is not None:\n",
+ " print(result_owl_string)\n",
+ "else:\n",
+ " print(\"Error processing OWL data.\")\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "78f33183-ee5d-4e7e-8329-79b0f6388e2b",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.6"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/edamfu/requirements.txt b/edamfu/requirements.txt
new file mode 100644
index 0000000..bbbc5e5
--- /dev/null
+++ b/edamfu/requirements.txt
@@ -0,0 +1,99 @@
+anyio==4.2.0
+argon2-cffi==23.1.0
+argon2-cffi-bindings==21.2.0
+arrow==1.3.0
+asttokens==2.4.1
+async-lru==2.0.4
+attrs==23.2.0
+Babel==2.14.0
+beautifulsoup4==4.12.3
+bleach==6.1.0
+certifi==2024.2.2
+cffi==1.16.0
+charset-normalizer==3.3.2
+comm==0.2.1
+debugpy==1.8.0
+decorator==5.1.1
+defusedxml==0.7.1
+executing==2.0.1
+fastjsonschema==2.19.1
+fqdn==1.5.1
+h11==0.14.0
+httpcore==1.0.2
+httpx==0.26.0
+idna==3.6
+ipykernel==6.29.2
+ipython==8.21.0
+ipywidgets==8.1.1
+isodate==0.6.1
+isoduration==20.11.0
+jedi==0.19.1
+Jinja2==3.1.3
+json5==0.9.14
+jsonpointer==2.4
+jsonschema==4.21.1
+jsonschema-specifications==2023.12.1
+jupyter==1.0.0
+jupyter-console==6.6.3
+jupyter-events==0.9.0
+jupyter-lsp==2.2.2
+jupyter_client==8.6.0
+jupyter_core==5.7.1
+jupyter_server==2.12.5
+jupyter_server_terminals==0.5.2
+jupyterlab==4.1.0
+jupyterlab-widgets==3.0.9
+jupyterlab_pygments==0.3.0
+jupyterlab_server==2.25.2
+MarkupSafe==2.1.5
+matplotlib-inline==0.1.6
+mistune==3.0.2
+nbclient==0.9.0
+nbconvert==7.16.0
+nbformat==5.9.2
+nest-asyncio==1.6.0
+notebook==7.0.7
+notebook_shim==0.2.3
+overrides==7.7.0
+packaging==23.2
+pandocfilters==1.5.1
+parso==0.8.3
+pexpect==4.9.0
+platformdirs==4.2.0
+prometheus-client==0.19.0
+prompt-toolkit==3.0.43
+psutil==5.9.8
+ptyprocess==0.7.0
+pure-eval==0.2.2
+pycparser==2.21
+Pygments==2.17.2
+pyparsing==3.1.1
+python-dateutil==2.8.2
+python-json-logger==2.0.7
+PyYAML==6.0.1
+pyzmq==25.1.2
+qtconsole==5.5.1
+QtPy==2.4.1
+rdflib==7.0.0
+referencing==0.33.0
+requests==2.31.0
+rfc3339-validator==0.1.4
+rfc3986-validator==0.1.1
+rpds-py==0.17.1
+Send2Trash==1.8.2
+six==1.16.0
+sniffio==1.3.0
+soupsieve==2.5
+stack-data==0.6.3
+terminado==0.18.0
+tinycss2==1.2.1
+tornado==6.4
+traitlets==5.14.1
+types-python-dateutil==2.8.19.20240106
+uri-template==1.3.0
+urllib3==2.2.0
+wcwidth==0.2.13
+webcolors==1.13
+webencodings==0.5.1
+websocket-client==1.7.0
+widgetsnbextension==4.0.9
diff --git a/edamfu/setup.py b/edamfu/setup.py
new file mode 100644
index 0000000..34c0b4c
--- /dev/null
+++ b/edamfu/setup.py
@@ -0,0 +1,20 @@
+from setuptools import setup, find_packages
+
+setup(
+ name='edamfu',
+ version='1.0.0',
+ author='Your Name',
+ author_email='your_email@example.com',
+ description='A Python module for working with edamfu',
+ packages=['edamfu'],
+ classifiers=[
+ 'Development Status :: 3 - Alpha',
+ 'Intended Audience :: Developers',
+ 'License :: OSI Approved :: MIT License',
+ 'Programming Language :: Python',
+ 'Programming Language :: Python :: 3',
+ 'Programming Language :: Python :: 3.6',
+ 'Programming Language :: Python :: 3.7',
+ 'Programming Language :: Python :: 3.8',
+ ],
+)
diff --git a/edamfu/tests/EDAM_dev.owl b/edamfu/tests/EDAM_dev.owl
new file mode 100644
index 0000000..012e3c5
--- /dev/null
+++ b/edamfu/tests/EDAM_dev.owl
@@ -0,0 +1,60979 @@
+
+
+
+ 4040
+ 03.10.2023 11:14 UTC
+ EDAM http://edamontology.org/ "EDAM relations, concept properties, and subsets"
+ EDAM_data http://edamontology.org/data_ "EDAM types of data"
+ EDAM_format http://edamontology.org/format_ "EDAM data formats"
+ EDAM_operation http://edamontology.org/operation_ "EDAM operations"
+ EDAM_topic http://edamontology.org/topic_ "EDAM topics"
+ EDAM is particularly suitable for semantic annotations and categorisation of diverse resources related to data analysis and management: e.g. tools, workflows, learning materials, or standards. EDAM is also useful in data management itself, for recording provenance metadata of processed data.
+ EDAM is a community project and its development can be followed and contributed to at https://github.com/edamontology/edamontology.
+ https://github.com/edamontology/edamontology/graphs/contributors and many more!
+ Hervé Ménager
+ Jon Ison
+ Matúš Kalaš
+ application/rdf+xml
+
+
+
+ EDAM - The ontology of data analysis and management
+ EDAM is a domain ontology of data analysis and data management in bio- and other sciences, and science-based applications. It comprises concepts related to analysis, modelling, optimisation, and data life-cycle. Targetting usability by diverse users, the structure of EDAM is relatively simple, divided into 4 main sections: Topic, Operation, Data (incl. Identifier), and Format.
+ 1.26_dev
+
+
+
+
+
+
+
+
+
+
+ Matúš Kalaš
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.13
+ true
+ Publication reference
+ 'Citation' concept property ('citation' metadata tag) contains a dereferenceable URI, preferably including a DOI, pointing to a citeable publication of the given data format.
+ Publication
+
+ Citation
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true
+ Version in which a concept was created.
+
+ Created in
+
+
+
+
+
+
+
+ true
+ A comment explaining why the comment should be or was deprecated, including name of person commenting (jison, mkalas etc.).
+
+ deprecation_comment
+
+
+
+
+
+
+
+ true
+ 'Documentation' trailing modifier (qualifier, 'documentation') of 'xref' links of 'Format' concepts. When 'true', the link is pointing to a page with explanation, description, documentation, or specification of the given data format.
+ Specification
+
+ Documentation
+
+
+
+
+
+
+
+ true
+ 'Example' concept property ('example' metadata tag) lists examples of valid values of types of identifiers (accessions). Applicable to some other types of data, too.
+
+ Separated by bar ('|'). For more complex data and data formats, it can be a link to a website with examples, instead.
+ Example
+
+
+
+
+
+
+
+ true
+ 'File extension' concept property ('file_extension' metadata tag) lists examples of usual file extensions of formats.
+
+ N.B.: File extensions that are not correspondigly defined at http://filext.com are recorded in EDAM only if not in conflict with http://filext.com, and/or unique and usual within life-science computing.
+ Separated by bar ('|'), without a dot ('.') prefix, preferably not all capital characters.
+ File extension
+
+
+
+
+
+
+
+ true
+ "Supported by the given data format" here means, that the given format enables representation of data that satisfies the information standard.
+ 'Information standard' trailing modifier (qualifier, 'information_standard') of 'xref' links of 'Format' concepts. When 'true', the link is pointing to an information standard supported by the given data format.
+ Minimum information checklist
+ Minimum information standard
+
+ Information standard
+
+
+
+
+
+
+
+ true
+ When 'true', the concept has been proposed to be deprecated.
+
+ deprecation_candidate
+
+
+
+
+
+
+
+ true
+ When 'true', the concept has been proposed to be refactored.
+
+ refactor_candidate
+
+
+
+
+
+
+
+ true
+ When 'true', the concept has been proposed or is supported within Debian as a tag.
+
+ isdebtag
+
+
+
+
+
+
+
+ true
+ 'Media type' trailing modifier (qualifier, 'media_type') of 'xref' links of 'Format' concepts. When 'true', the link is pointing to a page specifying a media type of the given data format.
+ MIME type
+
+ Media type
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true
+ Whether terms associated with this concept are recommended for use in annotation.
+
+ notRecommendedForAnnotation
+
+
+
+
+
+
+
+ true
+ Version in which a concept was made obsolete.
+
+ Obsolete since
+
+
+
+
+
+
+
+ true
+ EDAM concept URI of the erstwhile "parent" of a now deprecated concept.
+
+ Old parent
+
+
+
+
+
+
+
+ true
+ EDAM concept URI of an erstwhile related concept (by has_input, has_output, has_topic, is_format_of, etc.) of a now deprecated concept.
+
+ Old related
+
+
+
+
+
+
+
+ true
+ 'Ontology used' concept property ('ontology_used' metadata tag) of format concepts links to a domain ontology that is used inside the given data format, or contains a note about ontology use within the format.
+
+ Ontology used
+
+
+
+
+
+
+
+ true
+ 'Organisation' trailing modifier (qualifier, 'organisation') of 'xref' links of 'Format' concepts. When 'true', the link is pointing to an organisation that developed, standardised, and maintains the given data format.
+ Organization
+
+ Organisation
+
+
+
+
+
+
+
+ true
+ A comment explaining the proposed refactoring, including name of person commenting (jison, mkalas etc.).
+
+ refactor_comment
+
+
+
+
+
+
+
+ true
+ 'Regular expression' concept property ('regex' metadata tag) specifies the allowed values of types of identifiers (accessions). Applicable to some other types of data, too.
+
+ Regular expression
+
+
+
+
+
+
+
+ Related term
+
+ 'Related term' concept property ('related_term'; supposedly a synonym modifier in OBO format) states a related term - not necessarily closely semantically related - that users (also non-specialists) may use when searching.
+
+
+
+
+
+
+
+
+ true
+ 'Repository' trailing modifier (qualifier, 'repository') of 'xref' links of 'Format' concepts. When 'true', the link is pointing to the public source-code repository where the given data format is developed or maintained.
+ Public repository
+ Source-code repository
+
+ Repository
+
+
+
+
+
+
+
+ true
+ Name of thematic editor (http://biotools.readthedocs.io/en/latest/governance.html#registry-editors) responsible for this concept and its children.
+
+ thematic_editor
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ false
+ false
+ false
+ OBO_REL:is_a
+ 'A has_format B' defines for the subject A, that it has the object B as its data format.
+
+ false
+ Subject A can be any concept or entity outside of an ontology (or an ontology concept in a role of an entity being semantically annotated) that is (or is in a role of) 'Data', or an input, output, input or output argument of an 'Operation'. Object B can either be a concept that is a 'Format', or in unexpected cases an entity outside of an ontology that is a 'Format' or is in the role of a 'Format'. In EDAM, 'has_format' is not explicitly defined between EDAM concepts, only the inverse 'is_format_of'.
+ has format
+
+
+
+
+
+
+
+
+
+
+
+ false
+ false
+ false
+ OBO_REL:is_a
+ 'A has_function B' defines for the subject A, that it has the object B as its function.
+ OBO_REL:bearer_of
+
+ true
+ Subject A can be any concept or entity outside of an ontology (or an ontology concept in a role of an entity being semantically annotated). Object B can either be a concept that is (or is in a role of) a function, or an entity outside of an ontology that is (or is in a role of) a function specification. In the scope of EDAM, 'has_function' serves only for relating annotated entities outside of EDAM with 'Operation' concepts.
+ has function
+
+
+
+
+
+
+
+ OBO_REL:bearer_of
+ Is defined anywhere? Not in the 'unknown' version of RO. 'OBO_REL:bearer_of' is narrower in the sense that it only relates ontological categories (concepts) that are an 'independent_continuant' (snap:IndependentContinuant) with ontological categories that are a 'specifically_dependent_continuant' (snap:SpecificallyDependentContinuant), and broader in the sense that it relates with any borne objects not just functions of the subject.
+
+
+
+
+ true
+ In very unusual cases.
+
+
+
+
+
+
+
+
+
+
+ false
+ false
+ false
+ OBO_REL:is_a
+ 'A has_identifier B' defines for the subject A, that it has the object B as its identifier.
+
+ false
+ Subject A can be any concept or entity outside of an ontology (or an ontology concept in a role of an entity being semantically annotated). Object B can either be a concept that is an 'Identifier', or an entity outside of an ontology that is an 'Identifier' or is in the role of an 'Identifier'. In EDAM, 'has_identifier' is not explicitly defined between EDAM concepts, only the inverse 'is_identifier_of'.
+ has identifier
+
+
+
+
+
+
+
+
+
+
+ false
+ false
+ false
+ OBO_REL:is_a
+ 'A has_input B' defines for the subject A, that it has the object B as a necessary or actual input or input argument.
+ OBO_REL:has_participant
+
+ true
+ Subject A can either be concept that is or has an 'Operation' function, or an entity outside of an ontology (or an ontology concept in a role of an entity being semantically annotated) that has an 'Operation' function or is an 'Operation'. Object B can be any concept or entity. In EDAM, only 'has_input' is explicitly defined between EDAM concepts ('Operation' 'has_input' 'Data'). The inverse, 'is_input_of', is not explicitly defined.
+ has input
+
+
+
+
+
+
+ OBO_REL:has_participant
+ 'OBO_REL:has_participant' is narrower in the sense that it only relates ontological categories (concepts) that are a 'process' (span:Process) with ontological categories that are a 'continuant' (snap:Continuant), and broader in the sense that it relates with any participating objects not just inputs or input arguments of the subject.
+
+
+
+
+ true
+ In very unusual cases.
+
+
+
+
+
+
+
+
+
+
+ false
+ false
+ false
+ OBO_REL:is_a
+ 'A has_output B' defines for the subject A, that it has the object B as a necessary or actual output or output argument.
+ OBO_REL:has_participant
+
+ true
+ Subject A can either be concept that is or has an 'Operation' function, or an entity outside of an ontology (or an ontology concept in a role of an entity being semantically annotated) that has an 'Operation' function or is an 'Operation'. Object B can be any concept or entity. In EDAM, only 'has_output' is explicitly defined between EDAM concepts ('Operation' 'has_output' 'Data'). The inverse, 'is_output_of', is not explicitly defined.
+ has output
+
+
+
+
+
+
+ OBO_REL:has_participant
+ 'OBO_REL:has_participant' is narrower in the sense that it only relates ontological categories (concepts) that are a 'process' (span:Process) with ontological categories that are a 'continuant' (snap:Continuant), and broader in the sense that it relates with any participating objects not just outputs or output arguments of the subject. It is also not clear whether an output (result) actually participates in the process that generates it.
+
+
+
+
+ true
+ In very unusual cases.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ false
+ false
+ false
+ OBO_REL:is_a
+ 'A has_topic B' defines for the subject A, that it has the object B as its topic (A is in the scope of a topic B).
+
+ true
+ Subject A can be any concept or entity outside of an ontology (or an ontology concept in a role of an entity being semantically annotated). Object B can either be a concept that is a 'Topic', or in unexpected cases an entity outside of an ontology that is a 'Topic' or is in the role of a 'Topic'. In EDAM, only 'has_topic' is explicitly defined between EDAM concepts ('Operation' or 'Data' 'has_topic' 'Topic'). The inverse, 'is_topic_of', is not explicitly defined.
+ has topic
+
+
+
+
+
+
+
+
+ true
+ In very unusual cases.
+
+
+
+
+
+
+
+
+
+ false
+ false
+ false
+ OBO_REL:is_a
+ 'A is_format_of B' defines for the subject A, that it is a data format of the object B.
+ OBO_REL:quality_of
+
+ false
+ Subject A can either be a concept that is a 'Format', or in unexpected cases an entity outside of an ontology (or an ontology concept in a role of an entity being semantically annotated) that is a 'Format' or is in the role of a 'Format'. Object B can be any concept or entity outside of an ontology that is (or is in a role of) 'Data', or an input, output, input or output argument of an 'Operation'. In EDAM, only 'is_format_of' is explicitly defined between EDAM concepts ('Format' 'is_format_of' 'Data'). The inverse, 'has_format', is not explicitly defined.
+ is format of
+
+
+
+
+
+ OBO_REL:quality_of
+ Is defined anywhere? Not in the 'unknown' version of RO. 'OBO_REL:quality_of' might be seen narrower in the sense that it only relates subjects that are a 'quality' (snap:Quality) with objects that are an 'independent_continuant' (snap:IndependentContinuant), and is broader in the sense that it relates any qualities of the object.
+
+
+
+
+
+
+
+
+ false
+ false
+ false
+ OBO_REL:is_a
+ 'A is_function_of B' defines for the subject A, that it is a function of the object B.
+ OBO_REL:function_of
+ OBO_REL:inheres_in
+
+ true
+ Subject A can either be concept that is (or is in a role of) a function, or an entity outside of an ontology (or an ontology concept in a role of an entity being semantically annotated) that is (or is in a role of) a function specification. Object B can be any concept or entity. Within EDAM itself, 'is_function_of' is not used.
+ is function of
+
+
+
+
+
+
+ OBO_REL:function_of
+ Is defined anywhere? Not in the 'unknown' version of RO. 'OBO_REL:function_of' only relates subjects that are a 'function' (snap:Function) with objects that are an 'independent_continuant' (snap:IndependentContinuant), so for example no processes. It does not define explicitly that the subject is a function of the object.
+
+
+
+
+ OBO_REL:inheres_in
+ Is defined anywhere? Not in the 'unknown' version of RO. 'OBO_REL:inheres_in' is narrower in the sense that it only relates ontological categories (concepts) that are a 'specifically_dependent_continuant' (snap:SpecificallyDependentContinuant) with ontological categories that are an 'independent_continuant' (snap:IndependentContinuant), and broader in the sense that it relates any borne subjects not just functions.
+
+
+
+
+ true
+ In very unusual cases.
+
+
+
+
+
+
+
+
+
+ false
+ false
+ false
+ OBO_REL:is_a
+ 'A is_identifier_of B' defines for the subject A, that it is an identifier of the object B.
+
+ false
+ Subject A can either be a concept that is an 'Identifier', or an entity outside of an ontology (or an ontology concept in a role of an entity being semantically annotated) that is an 'Identifier' or is in the role of an 'Identifier'. Object B can be any concept or entity outside of an ontology. In EDAM, only 'is_identifier_of' is explicitly defined between EDAM concepts (only 'Identifier' 'is_identifier_of' 'Data'). The inverse, 'has_identifier', is not explicitly defined.
+ is identifier of
+
+
+
+
+
+
+
+
+
+ false
+ false
+ false
+ OBO_REL:is_a
+ 'A is_input_of B' defines for the subject A, that it as a necessary or actual input or input argument of the object B.
+ OBO_REL:participates_in
+
+ true
+ Subject A can be any concept or entity outside of an ontology (or an ontology concept in a role of an entity being semantically annotated). Object B can either be a concept that is or has an 'Operation' function, or an entity outside of an ontology that has an 'Operation' function or is an 'Operation'. In EDAM, 'is_input_of' is not explicitly defined between EDAM concepts, only the inverse 'has_input'.
+ is input of
+
+
+
+
+
+
+ OBO_REL:participates_in
+ 'OBO_REL:participates_in' is narrower in the sense that it only relates ontological categories (concepts) that are a 'continuant' (snap:Continuant) with ontological categories that are a 'process' (span:Process), and broader in the sense that it relates any participating subjects not just inputs or input arguments.
+
+
+
+
+ true
+ In very unusual cases.
+
+
+
+
+
+
+
+
+
+ false
+ false
+ false
+ OBO_REL:is_a
+ 'A is_output_of B' defines for the subject A, that it as a necessary or actual output or output argument of the object B.
+ OBO_REL:participates_in
+
+ true
+ Subject A can be any concept or entity outside of an ontology (or an ontology concept in a role of an entity being semantically annotated). Object B can either be a concept that is or has an 'Operation' function, or an entity outside of an ontology that has an 'Operation' function or is an 'Operation'. In EDAM, 'is_output_of' is not explicitly defined between EDAM concepts, only the inverse 'has_output'.
+ is output of
+
+
+
+
+
+
+ OBO_REL:participates_in
+ 'OBO_REL:participates_in' is narrower in the sense that it only relates ontological categories (concepts) that are a 'continuant' (snap:Continuant) with ontological categories that are a 'process' (span:Process), and broader in the sense that it relates any participating subjects not just outputs or output arguments. It is also not clear whether an output (result) actually participates in the process that generates it.
+
+
+
+
+ true
+ In very unusual cases.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ false
+ false
+ false
+ OBO_REL:is_a
+ 'A is_topic_of B' defines for the subject A, that it is a topic of the object B (a topic A is the scope of B).
+ OBO_REL:quality_of
+
+ true
+ Subject A can either be a concept that is a 'Topic', or in unexpected cases an entity outside of an ontology (or an ontology concept in a role of an entity being semantically annotated) that is a 'Topic' or is in the role of a 'Topic'. Object B can be any concept or entity outside of an ontology. In EDAM, 'is_topic_of' is not explicitly defined between EDAM concepts, only the inverse 'has_topic'.
+ is topic of
+
+
+
+
+
+ OBO_REL:quality_of
+ Is defined anywhere? Not in the 'unknown' version of RO. 'OBO_REL:quality_of' might be seen narrower in the sense that it only relates subjects that are a 'quality' (snap:Quality) with objects that are an 'independent_continuant' (snap:IndependentContinuant), and is broader in the sense that it relates any qualities of the object.
+
+
+
+
+ true
+ In very unusual cases.
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A type of computational resource used in bioinformatics.
+
+ Resource type
+ true
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Information, represented in an information artefact (data record) that is 'understandable' by dedicated computational tools that can use the data as input or produce it as output.
+ Data record
+ Data set
+ Datum
+
+
+ Data
+
+
+
+
+
+
+
+
+
+
+
+
+ Data record
+ EDAM does not distinguish a data record (a tool-understandable information artefact) from data or datum (its content, the tool-understandable encoding of an information).
+
+
+
+
+ Data set
+ EDAM does not distinguish the multiplicity of data, such as one data item (datum) versus a collection of data (data set).
+
+
+
+
+ Datum
+ EDAM does not distinguish the multiplicity of data, such as one data item (datum) versus a collection of data (data set).
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+ A bioinformatics package or tool, e.g. a standalone application or web service.
+
+
+ Tool
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+ A digital data archive typically based around a relational model but sometimes using an object-oriented, tree or graph-based model.
+
+
+ Database
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An ontology of biological or bioinformatics concepts and relations, a controlled vocabulary, structured glossary etc.
+
+
+ Ontology
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A directory on disk from which files are read.
+
+ Directory metadata
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Controlled vocabulary from National Library of Medicine. The MeSH thesaurus is used to index articles in biomedical journals for the Medline/PubMED databases.
+
+ MeSH vocabulary
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Controlled vocabulary for gene names (symbols) from HUGO Gene Nomenclature Committee.
+
+ HGNC vocabulary
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Compendium of controlled vocabularies for the biomedical domain (Unified Medical Language System).
+
+ UMLS vocabulary
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ A text token, number or something else which identifies an entity, but which may not be persistent (stable) or unique (the same identifier may identify multiple things).
+ ID
+
+
+
+ Identifier
+
+
+
+
+
+
+
+
+ Almost exact but limited to identifying resources, and being unambiguous.
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An entry (retrievable via URL) from a biological database.
+
+ Database entry
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Mass of a molecule.
+
+
+ Molecular mass
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PDBML:pdbx_formal_charge
+ Net charge of a molecule.
+
+
+ Molecular charge
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A specification of a chemical structure.
+ Chemical structure specification
+
+
+ Chemical formula
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A QSAR quantitative descriptor (name-value pair) of chemical structure.
+
+
+ QSAR descriptors have numeric values that quantify chemical information encoded in a symbolic representation of a molecule. They are used in quantitative structure activity relationship (QSAR) applications. Many subtypes of individual descriptors (not included in EDAM) cover various types of protein properties.
+ QSAR descriptor
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Deprecated because this is bloat / confusing & better handled as an EDAM Format concept - "raw" sequences just imply a particular format (i.e. one with a vanilla string, possible in a particular alphabet, with no metadata).
+ 1.23
+
+ A raw molecular sequence (string of characters) which might include ambiguity, unknown positions and non-sequence characters.
+
+
+ Non-sequence characters may be used for example for gaps and translation stop.
+ Raw sequence
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ SO:2000061
+ A molecular sequence and associated metadata.
+
+
+ Sequence record
+ http://purl.bioontology.org/ontology/MSH/D058977
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A collection of one or typically multiple molecular sequences (which can include derived data or metadata) that do not (typically) correspond to molecular sequence database records or entries and which (typically) are derived from some analytical method.
+ Alignment reference
+ SO:0001260
+
+
+ An example is an alignment reference; one or a set of reference molecular sequences, structures, or profiles used for alignment of genomic, transcriptomic, or proteomic experimental data.
+ This concept may be used for arbitrary sequence sets and associated data arising from processing.
+ Sequence set
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A character used to replace (mask) other characters in a molecular sequence.
+
+ Sequence mask character
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A label (text token) describing the type of sequence masking to perform.
+
+ Sequence masking is where specific characters or positions in a molecular sequence are masked (replaced) with an another (mask character). The mask type indicates what is masked, for example regions that are not of interest or which are information-poor including acidic protein regions, basic protein regions, proline-rich regions, low compositional complexity regions, short-periodicity internal repeats, simple repeats and low complexity regions. Masked sequences are used in database search to eliminate statistically significant but biologically uninteresting hits.
+ Sequence mask type
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.20
+
+
+ The strand of a DNA sequence (forward or reverse).
+
+ The forward or 'top' strand might specify a sequence is to be used as given, the reverse or 'bottom' strand specifying the reverse complement of the sequence is to be used.
+ DNA sense specification
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A specification of sequence length(s).
+
+ Sequence length specification
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Basic or general information concerning molecular sequences.
+
+ This is used for such things as a report including the sequence identifier, type and length.
+ Sequence metadata
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ How the annotation of a sequence feature (for example in EMBL or Swiss-Prot) was derived.
+
+
+ This might be the name and version of a software tool, the name of a database, or 'curated' to indicate a manual annotation (made by a human).
+ Sequence feature source
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A report of sequence hits and associated data from searching a database of sequences (for example a BLAST search). This will typically include a list of scores (often with statistical evaluation) and a set of alignments for the hits.
+ Database hits (sequence)
+ Sequence database hits
+ Sequence database search results
+ Sequence search hits
+
+
+ The score list includes the alignment score, percentage of the query sequence matched, length of the database sequence entry in this alignment, identifier of the database sequence entry, excerpt of the database sequence entry description etc.
+ Sequence search results
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Report on the location of matches ("hits") between sequences, sequence profiles, motifs (conserved or functional patterns) and other types of sequence signatures.
+ Profile-profile alignment
+ Protein secondary database search results
+ Search results (protein secondary database)
+ Sequence motif hits
+ Sequence motif matches
+ Sequence profile alignment
+ Sequence profile hits
+ Sequence profile matches
+ Sequence-profile alignment
+
+
+ A "profile-profile alignment" is an alignment of two sequence profiles, each profile typically representing a sequence alignment.
+ A "sequence-profile alignment" is an alignment of one or more molecular sequence(s) to one or more sequence profile(s) (each profile typically representing a sequence alignment).
+ This includes reports of hits from a search of a protein secondary or domain database. Data associated with the search or alignment might also be included, e.g. ranked list of best-scoring sequences, a graphical representation of scores etc.
+ Sequence signature matches
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Data files used by motif or profile methods.
+
+ Sequence signature model
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data concerning concerning specific or conserved pattern in molecular sequences and the classifiers used for their identification, including sequence motifs, profiles or other diagnostic element.
+
+
+ This can include metadata about a motif or sequence profile such as its name, length, technical details about the profile construction, and so on.
+ Sequence signature data
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Alignment of exact matches between subsequences (words) within two or more molecular sequences.
+
+ Sequence alignment (words)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A dotplot of sequence similarities identified from word-matching or character comparison.
+
+
+ Dotplot
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alignment of multiple molecular sequences.
+ Multiple sequence alignment
+ msa
+
+
+ Sequence alignment
+
+ http://purl.bioontology.org/ontology/MSH/D016415
+ http://semanticscience.org/resource/SIO_010066
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Some simple value controlling a sequence alignment (or similar 'match') operation.
+
+ Sequence alignment parameter
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A value representing molecular sequence similarity.
+
+
+ Sequence similarity score
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Report of general information on a sequence alignment, typically include a description, sequence identifiers and alignment score.
+
+ Sequence alignment metadata
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An informative report of molecular sequence alignment-derived data or metadata.
+ Sequence alignment metadata
+
+
+ Use this for any computer-generated reports on sequence alignments, and for general information (metadata) on a sequence alignment, such as a description, sequence identifiers and alignment score.
+ Sequence alignment report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ "Sequence-profile alignment" and "Profile-profile alignment" are synonymous with "Sequence signature matches" which was already stated as including matches (alignment) and other data.
+ 1.25 or earlier
+
+ A profile-profile alignment (each profile typically representing a sequence alignment).
+
+
+ Profile-profile alignment
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ "Sequence-profile alignment" and "Profile-profile alignment" are synonymous with "Sequence signature matches" which was already stated as including matches (alignment) and other data.
+ 1.24
+
+ Alignment of one or more molecular sequence(s) to one or more sequence profile(s) (each profile typically representing a sequence alignment).
+
+
+ Sequence-profile alignment
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby:phylogenetic_distance_matrix
+ A matrix of estimated evolutionary distance between molecular sequences, such as is suitable for phylogenetic tree calculation.
+ Phylogenetic distance matrix
+
+
+ Methods might perform character compatibility analysis or identify patterns of similarity in an alignment or data matrix.
+ Sequence distance matrix
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Basic character data from which a phylogenetic tree may be generated.
+
+
+ As defined, this concept would also include molecular sequences, microsatellites, polymorphisms (RAPDs, RFLPs, or AFLPs), restriction sites and fragments
+ Phylogenetic character data
+ http://www.evolutionaryontology.org/cdao.owl#Character
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby:Tree
+ Moby:myTree
+ Moby:phylogenetic_tree
+ The raw data (not just an image) from which a phylogenetic tree is directly generated or plotted, such as topology, lengths (in time or in expected amounts of variance) and a confidence interval for each length.
+ Phylogeny
+
+
+ A phylogenetic tree is usually constructed from a set of sequences from which an alignment (or data matrix) is calculated. See also 'Phylogenetic tree image'.
+ Phylogenetic tree
+ http://purl.bioontology.org/ontology/MSH/D010802
+ http://www.evolutionaryontology.org/cdao.owl#Tree
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Matrix of integer or floating point numbers for amino acid or nucleotide sequence comparison.
+ Substitution matrix
+
+
+ The comparison matrix might include matrix name, optional comment, height and width (or size) of matrix, an index row/column (of characters) and data rows/columns (of integers or floats).
+ Comparison matrix
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+ Predicted or actual protein topology represented as a string of protein secondary structure elements.
+
+
+ The location and size of the secondary structure elements and intervening loop regions is usually indicated.
+ Protein topology
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Secondary structure (predicted or real) of a protein.
+
+
+ Protein features report (secondary structure)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Super-secondary structure of protein sequence(s).
+
+
+ Super-secondary structures include leucine zippers, coiled coils, Helix-Turn-Helix etc.
+ Protein features report (super-secondary)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Alignment of the (1D representations of) secondary structure of two or more proteins.
+ Secondary structure alignment (protein)
+
+
+ Protein secondary structure alignment
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An informative report on protein secondary structure alignment-derived data or metadata.
+
+ Secondary structure alignment metadata (protein)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby:RNAStructML
+ An informative report of secondary structure (predicted or real) of an RNA molecule.
+ Secondary structure (RNA)
+
+
+ This includes thermodynamically stable or evolutionarily conserved structures such as knots, pseudoknots etc.
+ RNA secondary structure
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Moby:RNAStructAlignmentML
+ Alignment of the (1D representations of) secondary structure of two or more RNA molecules.
+ Secondary structure alignment (RNA)
+
+
+ RNA secondary structure alignment
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An informative report of RNA secondary structure alignment-derived data or metadata.
+
+ Secondary structure alignment metadata (RNA)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 3D coordinate and associated data for a macromolecular tertiary (3D) structure or part of a structure.
+ Coordinate model
+ Structure data
+
+
+ The coordinate data may be predicted or real.
+ Structure
+ http://purl.bioontology.org/ontology/MSH/D015394
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An entry from a molecular tertiary (3D) structure database.
+
+ Tertiary structure record
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+
+ Results (hits) from searching a database of tertiary structure.
+
+ Structure database search results
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alignment (superimposition) of molecular tertiary (3D) structures.
+
+
+ A tertiary structure alignment will include the untransformed coordinates of one macromolecule, followed by the second (or subsequent) structure(s) with all the coordinates transformed (by rotation / translation) to give a superposition.
+ Structure alignment
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An informative report of molecular tertiary structure alignment-derived data.
+
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Structure alignment report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A value representing molecular structure similarity, measured from structure alignment or some other type of structure comparison.
+
+
+ Structure similarity score
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Some type of structural (3D) profile or template (representing a structure or structure alignment).
+ 3D profile
+ Structural (3D) profile
+
+
+ Structural profile
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A 3D profile-3D profile alignment (each profile representing structures or a structure alignment).
+ Structural profile alignment
+
+
+ Structural (3D) profile alignment
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ An alignment of a sequence to a 3D profile (representing structures or a structure alignment).
+
+ Sequence-3D profile alignment
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Matrix of values used for scoring sequence-structure compatibility.
+
+
+ Protein sequence-structure scoring matrix
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An alignment of molecular sequence to structure (from threading sequence(s) through 3D structure or representation of structure(s)).
+
+
+ Sequence-structure alignment
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ An informative report about a specific amino acid.
+
+ Amino acid annotation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ An informative report about a specific peptide.
+
+ Peptide annotation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An informative human-readable report about one or more specific protein molecules or protein structural domains, derived from analysis of primary (sequence or structural) data.
+ Gene product annotation
+
+
+ Protein report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A report of primarily non-positional data describing intrinsic physical, chemical or other properties of a protein molecule or model.
+ Protein physicochemical property
+ Protein properties
+ Protein sequence statistics
+
+
+ This is a broad data type and is used a placeholder for other, more specific types. Data may be based on analysis of nucleic acid sequence or structural data, for example reports on the surface properties (shape, hydropathy, electrostatic patches etc) of a protein structure, protein flexibility or motion, and protein architecture (spatial arrangement of secondary structure).
+ Protein property
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ 3D structural motifs in a protein.
+
+ Protein structural motifs and surfaces
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Data concerning the classification of the sequences and/or structures of protein structural domain(s).
+
+ Protein domain classification
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ structural domains or 3D folds in a protein or polypeptide chain.
+
+
+ Protein features report (domains)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ An informative report on architecture (spatial arrangement of secondary structure) of a protein structure.
+
+ Protein architecture report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ A report on an analysis or model of protein folding properties, folding pathways, residues or sites that are key to protein folding, nucleation or stabilisation centers etc.
+
+
+ Protein folding report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Data on the effect of (typically point) mutation on protein folding, stability, structure and function.
+
+ This is a broad data type and is used a placeholder for other, more specific types. It is primarily intended to help navigation of EDAM and would not typically be used for annotation.
+ Protein features (mutation)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Protein-protein interaction data from for example yeast two-hybrid analysis, protein microarrays, immunoaffinity chromatography followed by mass spectrometry, phage display etc.
+
+
+ This is a broad data type and is used a placeholder for other, more specific types. It is primarily intended to help navigation of EDAM and would not typically be used for annotation.
+ Protein interaction raw data
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data concerning the interactions (predicted or known) within or between a protein, structural domain or part of a protein. This includes intra- and inter-residue contacts and distances, as well as interactions with other proteins and non-protein entities such as nucleic acid, metal atoms, water, ions etc.
+ Protein interaction record
+ Protein interaction report
+ Protein report (interaction)
+ Protein-protein interaction data
+ Atom interaction data
+ Protein non-covalent interactions report
+ Residue interaction data
+
+
+ Protein interaction data
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Protein classification data
+ An informative report on a specific protein family or other classification or group of protein sequences or structures.
+ Protein family annotation
+
+
+ Protein family report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The maximum initial velocity or rate of a reaction. It is the limiting velocity as substrate concentrations get very large.
+
+
+ Vmax
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Km is the concentration (usually in Molar units) of substrate that leads to half-maximal velocity of an enzyme-catalysed reaction.
+
+
+ Km
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ An informative report about a specific nucleotide base.
+
+ Nucleotide base annotation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Nucleic acid structural properties stiffness, curvature, twist/roll data or other conformational parameters or properties.
+ A report of primarily non-positional data describing intrinsic physical, chemical or other properties of a nucleic acid molecule.
+ Nucleic acid physicochemical property
+ GC-content
+ Nucleic acid property (structural)
+ Nucleic acid structural property
+
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Nucleic acid property
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data derived from analysis of codon usage (typically a codon usage table) of DNA sequences.
+ Codon usage report
+
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Codon usage data
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby:GeneInfo
+ Moby:gene
+ Moby_namespace:Human_Readable_Description
+ A report on predicted or actual gene structure, regions which make an RNA product and features such as promoters, coding regions, splice sites etc.
+ Gene and transcript structure (report)
+ Gene annotation
+ Gene features report
+ Gene function (report)
+ Gene structure (repot)
+ Nucleic acid features (gene and transcript structure)
+
+
+ This includes any report on a particular locus or gene. This might include the gene name, description, summary and so on. It can include details about the function of a gene, such as its encoded protein or a functional classification of the gene sequence along according to the encoded protein(s).
+ Gene report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A report on the classification of nucleic acid / gene sequences according to the functional classification of their gene products.
+
+ Gene classification
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ stable, naturally occurring mutations in a nucleotide sequence including alleles, naturally occurring mutations such as single base nucleotide substitutions, deletions and insertions, RFLPs and other polymorphisms.
+
+
+ DNA variation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A human-readable collection of information about a specific chromosome.
+
+
+ This includes basic information. e.g. chromosome number, length, karyotype features, chromosome sequence etc.
+ Chromosome report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A human-readable collection of information about the set of genes (or allelic forms) present in an individual, organism or cell and associated with a specific physical characteristic, or a report concerning an organisms traits and phenotypes.
+ Genotype/phenotype annotation
+
+
+ Genotype/phenotype report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ PCR experiments, e.g. quantitative real-time PCR.
+
+
+ PCR experiment report
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Fluorescence trace data generated by an automated DNA sequencer, which can be interpreted as a molecular sequence (reads), given associated sequencing metadata such as base-call quality scores.
+
+
+ This is the raw data produced by a DNA sequencing machine.
+ Sequence trace
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An assembly of fragments of a (typically genomic) DNA sequence.
+ Contigs
+ SO:0000353
+ SO:0001248
+
+
+ Typically, an assembly is a collection of contigs (for example ESTs and genomic DNA fragments) that are ordered, aligned and merged. Annotation of the assembled sequence might be included.
+ Sequence assembly
+
+
+
+
+
+ SO:0001248
+ Perhaps surprisingly, the definition of 'SO:assembly' is narrower than the 'SO:sequence_assembly'.
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Radiation hybrid scores (RH) scores for one or more markers.
+ Radiation Hybrid (RH) scores
+
+
+ Radiation Hybrid (RH) scores are used in Radiation Hybrid mapping.
+ RH scores
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A human-readable collection of information about the linkage of alleles.
+ Gene annotation (linkage)
+ Linkage disequilibrium (report)
+
+
+ This includes linkage disequilibrium; the non-random association of alleles or polymorphisms at two or more loci (not necessarily on the same chromosome).
+ Genetic linkage report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data quantifying the level of expression of (typically) multiple genes, derived for example from microarray experiments.
+ Gene expression pattern
+
+
+ Gene expression profile
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ microarray experiments including conditions, protocol, sample:data relationships etc.
+
+
+ Microarray experiment report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Data on oligonucleotide probes (typically for use with DNA microarrays).
+
+ Oligonucleotide probe data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Output from a serial analysis of gene expression (SAGE) experiment.
+
+ SAGE experimental data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Massively parallel signature sequencing (MPSS) data.
+
+ MPSS experimental data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Sequencing by synthesis (SBS) data.
+
+ SBS experimental data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.14
+
+ Tag to gene assignments (tag mapping) of SAGE, MPSS and SBS data. Typically this is the sequencing-based expression profile annotated with gene identifiers.
+
+
+ Sequence tag profile (with gene assignment)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Protein X-ray crystallographic data
+ X-ray crystallography data.
+
+
+ Electron density map
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Nuclear magnetic resonance (NMR) raw data, typically for a protein.
+ Protein NMR data
+
+
+ Raw NMR data
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Protein secondary structure from protein coordinate or circular dichroism (CD) spectroscopic data.
+ CD spectrum
+ Protein circular dichroism (CD) spectroscopic data
+
+
+ CD spectra
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Volume map data from electron microscopy.
+ 3D volume map
+ EM volume map
+ Electron microscopy volume map
+
+
+ Volume map
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.19
+
+ Annotation on a structural 3D model (volume map) from electron microscopy.
+
+
+ Electron microscopy model
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Two-dimensional gel electrophoresis image.
+
+
+ 2D PAGE image
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Spectra from mass spectrometry.
+ Mass spectrometry spectra
+
+
+ Mass spectrum
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A set of peptide masses (peptide mass fingerprint) from mass spectrometry.
+ Peak list
+ Protein fingerprint
+ Molecular weights standard fingerprint
+
+
+ A molecular weight standard fingerprint is standard protonated molecular masses e.g. from trypsin (modified porcine trypsin, Promega) and keratin peptides.
+ Peptide mass fingerprint
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Protein or peptide identifications with evidence supporting the identifications, for example from comparing a peptide mass fingerprint (from mass spectrometry) to a sequence database, or the set of typical spectra one obtains when running a protein through a mass spectrometer.
+ 'Protein identification'
+ Peptide spectrum match
+
+
+ Peptide identification
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An informative report about a specific biological pathway or network, typically including a map (diagram) of the pathway.
+
+ Pathway or network annotation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A map (typically a diagram) of a biological pathway.
+
+ Biological pathway map
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A definition of a data resource serving one or more types of data, including metadata and links to the resource or data proper.
+
+ Data resource definition
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Basic information, annotation or documentation concerning a workflow (but not the workflow itself).
+
+
+ Workflow metadata
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A biological model represented in mathematical terms.
+ Biological model
+
+
+ Mathematical model
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A value representing estimated statistical significance of some observed data; typically sequence database hits.
+
+
+ Statistical estimate score
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Resource definition for an EMBOSS database.
+
+ EMBOSS database resource definition
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Information on a version of software or data, for example name, version number and release date.
+
+ Development status / maturity may be part of the version information, for example in case of tools, standards, or some data records.
+ Version information
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A mapping of the accession numbers (or other database identifier) of entries between (typically) two biological or biomedical databases.
+
+
+ The cross-mapping is typically a table where each row is an accession number and each column is a database being cross-referenced. The cells give the accession number or identifier of the corresponding entry in a database. If a cell in the table is not filled then no mapping could be found for the database. Additional information might be given on version, date etc.
+ Database cross-mapping
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An index of data of biological relevance.
+
+
+ Data index
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A human-readable collection of information concerning an analysis of an index of biological data.
+ Database index annotation
+
+
+ Data index report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Basic information on bioinformatics database(s) or other data sources such as name, type, description, URL etc.
+
+
+ Database metadata
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Basic information about one or more bioinformatics applications or packages, such as name, type, description, or other documentation.
+
+
+ Tool metadata
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Textual metadata on a submitted or completed job.
+
+ Job metadata
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Textual metadata on a software author or end-user, for example a person or other software.
+
+
+ User metadata
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A human-readable collection of information about a specific chemical compound.
+ Chemical compound annotation
+ Chemical structure report
+ Small molecule annotation
+
+
+ Small molecule report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A human-readable collection of information about a particular strain of organism cell line including plants, virus, fungi and bacteria. The data typically includes strain number, organism type, growth conditions, source and so on.
+ Cell line annotation
+ Organism strain data
+
+
+ Cell line report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ An informative report about a specific scent.
+
+ Scent annotation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A term (name) from an ontology.
+ Ontology class name
+ Ontology terms
+
+
+ Ontology term
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data concerning or derived from a concept from a biological ontology.
+ Ontology class metadata
+ Ontology term metadata
+
+
+ Ontology concept data
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby:BooleanQueryString
+ Moby:Global_Keyword
+ Moby:QueryString
+ Moby:Wildcard_Query
+ Keyword(s) or phrase(s) used (typically) for text-searching purposes.
+ Phrases
+ Term
+
+
+ Boolean operators (AND, OR and NOT) and wildcard characters may be allowed.
+ Keyword
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby:GCP_SimpleCitation
+ Moby:Publication
+ Bibliographic data that uniquely identifies a scientific article, book or other published material.
+ Bibliographic reference
+ Reference
+
+
+ A bibliographic reference might include information such as authors, title, journal name, date and (possibly) a link to the abstract or full-text of the article if available.
+ Citation
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A scientific text, typically a full text article from a scientific journal.
+ Article text
+ Scientific article
+
+
+ Article
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A human-readable collection of information resulting from text mining.
+ Text mining output
+
+
+ A text mining abstract will typically include an annotated a list of words or sentences extracted from one or more scientific articles.
+ Text mining report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An identifier of a biological entity or phenomenon.
+
+ Entity identifier
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An identifier of a data resource.
+
+ Data resource identifier
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An identifier that identifies a particular type of data.
+ Identifier (typed)
+
+
+
+ This concept exists only to assist EDAM maintenance and navigation in graphical browsers. It does not add semantic information. This branch provides an alternative organisation of the concepts nested under 'Accession' and 'Name'. All concepts under here are already included under 'Accession' or 'Name'.
+ Identifier (by type of entity)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An identifier of a bioinformatics tool, e.g. an application or web service.
+
+
+
+ Tool identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Name or other identifier of a discrete entity (any biological thing with a distinct, discrete physical existence).
+
+ Discrete entity identifier
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Name or other identifier of an entity feature (a physical part or region of a discrete biological entity, or a feature that can be mapped to such a thing).
+
+ Entity feature identifier
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Name or other identifier of a collection of discrete biological entities.
+
+ Entity collection identifier
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Name or other identifier of a physical, observable biological occurrence or event.
+
+ Phenomenon identifier
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Name or other identifier of a molecule.
+
+
+
+ Molecule identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier (e.g. character symbol) of a specific atom.
+ Atom identifier
+
+
+
+ Atom ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Name of a specific molecule.
+
+
+
+ Molecule name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A label (text token) describing the type a molecule.
+
+ For example, 'Protein', 'DNA', 'RNA' etc.
+ Molecule type
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Unique identifier of a chemical compound.
+
+ Chemical identifier
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Name of a chromosome.
+
+
+
+ Chromosome name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a peptide chain.
+
+
+
+ Peptide identifier
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a protein.
+
+
+
+ Protein identifier
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique name of a chemical compound.
+ Chemical name
+
+
+
+ Compound name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique registry number of a chemical compound.
+
+
+
+ Chemical registry number
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Code word for a ligand, for example from a PDB file.
+
+ Ligand identifier
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a drug.
+
+
+
+ Drug identifier
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of an amino acid.
+ Residue identifier
+
+
+
+ Amino acid identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Name or other identifier of a nucleotide.
+
+
+
+ Nucleotide identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a monosaccharide.
+
+
+
+ Monosaccharide identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique name from Chemical Entities of Biological Interest (ChEBI) of a chemical compound.
+ ChEBI chemical name
+
+
+
+ This is the recommended chemical name for use for example in database annotation.
+ Chemical name (ChEBI)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ IUPAC recommended name of a chemical compound.
+ IUPAC chemical name
+
+
+
+ Chemical name (IUPAC)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ International Non-proprietary Name (INN or 'generic name') of a chemical compound, assigned by the World Health Organisation (WHO).
+ INN chemical name
+
+
+
+ Chemical name (INN)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Brand name of a chemical compound.
+ Brand chemical name
+
+
+
+ Chemical name (brand)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Synonymous name of a chemical compound.
+ Synonymous chemical name
+
+
+
+ Chemical name (synonymous)
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ CAS registry number of a chemical compound; a unique numerical identifier of chemicals in the scientific literature, as assigned by the Chemical Abstracts Service.
+ CAS chemical registry number
+ Chemical registry number (CAS)
+
+
+
+ CAS number
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Beilstein registry number of a chemical compound.
+ Beilstein chemical registry number
+
+
+
+ Chemical registry number (Beilstein)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Gmelin registry number of a chemical compound.
+ Gmelin chemical registry number
+
+
+
+ Chemical registry number (Gmelin)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 3-letter code word for a ligand (HET group) from a PDB file, for example ATP.
+ Component identifier code
+ Short ligand name
+
+
+
+ HET group name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ String of one or more ASCII characters representing an amino acid.
+
+
+
+ Amino acid name
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ String of one or more ASCII characters representing a nucleotide.
+
+
+
+ Nucleotide code
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PDBML:pdbx_PDB_strand_id
+ WHATIF: chain
+ Identifier of a polypeptide chain from a protein.
+ Chain identifier
+ PDB chain identifier
+ PDB strand id
+ Polypeptide chain identifier
+ Protein chain identifier
+
+
+
+ This is typically a character (for the chain) appended to a PDB identifier, e.g. 1cukA
+ Polypeptide chain ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Name of a protein.
+
+
+
+ Protein name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Name or other identifier of an enzyme or record from a database of enzymes.
+
+
+
+ Enzyme identifier
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]+\.-\.-\.-|[0-9]+\.[0-9]+\.-\.-|[0-9]+\.[0-9]+\.[0-9]+\.-|[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+
+ Moby:Annotated_EC_Number
+ Moby:EC_Number
+ An Enzyme Commission (EC) number of an enzyme.
+ EC
+ EC code
+ Enzyme Commission number
+
+
+
+ EC number
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Name of an enzyme.
+
+
+
+ Enzyme name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Name of a restriction enzyme.
+
+
+
+ Restriction enzyme name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A specification (partial or complete) of one or more positions or regions of a molecular sequence or map.
+
+ Sequence position specification
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A unique identifier of molecular sequence feature, for example an ID of a feature that is unique within the scope of the GFF file.
+
+
+
+ Sequence feature ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PDBML:_atom_site.id
+ WHATIF: PDBx_atom_site
+ WHATIF: number
+ A position of one or more points (base or residue) in a sequence, or part of such a specification.
+ SO:0000735
+
+
+ Sequence position
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Specification of range(s) of sequence positions.
+
+
+ Sequence range
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Name or other identifier of an nucleic acid feature.
+
+ Nucleic acid feature identifier
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Name or other identifier of a protein feature.
+
+ Protein feature identifier
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The type of a sequence feature, typically a term or accession from the Sequence Ontology, for example an EMBL or Swiss-Prot sequence feature key.
+ Sequence feature method
+ Sequence feature type
+
+
+ A feature key indicates the biological nature of the feature or information about changes to or versions of the sequence.
+ Sequence feature key
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Typically one of the EMBL or Swiss-Prot feature qualifiers.
+
+
+ Feature qualifiers hold information about a feature beyond that provided by the feature key and location.
+ Sequence feature qualifier
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A name of a sequence feature, e.g. the name of a feature to be displayed to an end-user. Typically an EMBL or Swiss-Prot feature label.
+ Sequence feature name
+
+
+ A feature label identifies a feature of a sequence database entry. When used with the database name and the entry's primary accession number, it is a unique identifier of that feature.
+ Sequence feature label
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a sequence feature-containing entity adhering to the standard feature naming scheme used by all EMBOSS applications.
+ UFO
+
+
+ EMBOSS Uniform Feature Object
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ String of one or more ASCII characters representing a codon.
+
+ Codon name
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Moby:GeneAccessionList
+ An identifier of a gene, such as a name/symbol or a unique identifier of a gene in a database.
+
+
+
+ Gene identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby_namespace:Global_GeneCommonName
+ Moby_namespace:Global_GeneSymbol
+ The short name of a gene; a single word that does not contain white space characters. It is typically derived from the gene name.
+
+
+
+ Gene symbol
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ http://www.geneontology.org/doc/GO.xrf_abbs:LocusID
+ http://www.geneontology.org/doc/GO.xrf_abbs:NCBI_Gene
+ An NCBI unique identifier of a gene.
+ Entrez gene ID
+ Gene identifier (Entrez)
+ Gene identifier (NCBI)
+ NCBI gene ID
+ NCBI geneid
+
+
+
+ Gene ID (NCBI)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An NCBI RefSeq unique identifier of a gene.
+
+ Gene identifier (NCBI RefSeq)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An NCBI UniGene unique identifier of a gene.
+
+ Gene identifier (NCBI UniGene)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An Entrez unique identifier of a gene.
+
+ Gene identifier (Entrez)
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a gene or feature from the CGD database.
+ CGD ID
+
+
+
+ Gene ID (CGD)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a gene from DictyBase.
+
+
+
+ Gene ID (DictyBase)
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier for a gene (or other feature) from the Ensembl database.
+ Gene ID (Ensembl)
+
+
+
+ Ensembl gene ID
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ S[0-9]+
+ Identifier of an entry from the SGD database.
+ SGD identifier
+
+
+
+ Gene ID (SGD)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [a-zA-Z_0-9\.-]*
+ Moby_namespace:GeneDB
+ Identifier of a gene from the GeneDB database.
+ GeneDB identifier
+
+
+
+ Gene ID (GeneDB)
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the TIGR database.
+
+
+
+ TIGR identifier
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Gene:[0-9]{7}
+ Identifier of an gene from the TAIR database.
+
+
+
+ TAIR accession (gene)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a protein structural domain.
+
+
+
+ This is typically a character or string concatenated with a PDB identifier and a chain identifier.
+ Protein domain ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a protein domain (or other node) from the SCOP database.
+
+
+
+ SCOP domain identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1nr3A00
+ Identifier of a protein domain from CATH.
+ CATH domain identifier
+
+
+
+ CATH domain ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A SCOP concise classification string (sccs) is a compact representation of a SCOP domain classification.
+
+
+
+ An scss includes the class (alphabetical), fold, superfamily and family (all numerical) to which a given domain belongs.
+ SCOP concise classification string (sccs)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 33229
+ Unique identifier (number) of an entry in the SCOP hierarchy, for example 33229.
+ SCOP unique identifier
+ sunid
+
+
+
+ A sunid uniquely identifies an entry in the SCOP hierarchy, including leaves (the SCOP domains) and higher level nodes including entries corresponding to the protein level.
+ SCOP sunid
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 3.30.1190.10.1.1.1.1.1
+ A code number identifying a node from the CATH database.
+ CATH code
+ CATH node identifier
+
+
+
+ CATH node ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a biological kingdom (Bacteria, Archaea, or Eukaryotes).
+
+
+
+ Kingdom name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a species (typically a taxonomic group) of organism.
+ Organism species
+
+
+
+ Species name
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a strain of an organism variant, typically a plant, virus or bacterium.
+
+
+
+ Strain name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ A string of characters that name or otherwise identify a resource on the Internet.
+ URIs
+
+
+ URI
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An identifier of a biological or bioinformatics database.
+ Database identifier
+
+
+
+ Database ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a directory.
+
+
+
+ Directory name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name (or part of a name) of a file (of any type).
+
+
+
+ File name
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Name of an ontology of biological or bioinformatics concepts and relations.
+
+
+
+ Ontology name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby:Link
+ Moby:URL
+ A Uniform Resource Locator (URL).
+
+
+ URL
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A Uniform Resource Name (URN).
+
+
+ URN
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A Life Science Identifier (LSID) - a unique identifier of some data.
+ Life Science Identifier
+
+
+ LSIDs provide a standard way to locate and describe data. An LSID is represented as a Uniform Resource Name (URN) with the following format: URN:LSID:<Authority>:<Namespace>:<ObjectID>[:<Version>]
+ LSID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a biological or bioinformatics database.
+
+
+
+ Database name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ The name of a molecular sequence database.
+
+ Sequence database name
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a file (of any type) with restricted possible values.
+
+
+
+ Enumerated file name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The extension of a file name.
+
+
+
+ A file extension is the characters appearing after the final '.' in the file name.
+ File name extension
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The base name of a file.
+
+
+
+ A file base name is the file name stripped of its directory specification and extension.
+ File base name
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Name of a QSAR descriptor.
+
+
+
+ QSAR descriptor name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An identifier of an entry from a database where the same type of identifier is used for objects (data) of different semantic type.
+
+ This concept is required for completeness. It should never have child concepts.
+ Database entry identifier
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An identifier of molecular sequence(s) or entries from a molecular sequence database.
+
+
+
+ Sequence identifier
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An identifier of a set of molecular sequence(s).
+
+
+
+ Sequence set ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+
+ Identifier of a sequence signature (motif or profile) for example from a database of sequence patterns.
+
+ Sequence signature identifier
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a molecular sequence alignment, for example a record from an alignment database.
+
+
+
+ Sequence alignment ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of a phylogenetic distance matrix.
+
+ Phylogenetic distance matrix identifier
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a phylogenetic tree for example from a phylogenetic tree database.
+
+
+
+ Phylogenetic tree ID
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An identifier of a comparison matrix.
+ Substitution matrix identifier
+
+
+
+ Comparison matrix identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ A unique and persistent identifier of a molecular tertiary structure, typically an entry from a structure database.
+
+
+
+ Structure ID
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier or name of a structural (3D) profile or template (representing a structure or structure alignment).
+ Structural profile identifier
+
+
+
+ Structural (3D) profile ID
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of an entry from a database of tertiary structure alignments.
+
+
+
+ Structure alignment ID
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of an index of amino acid physicochemical and biochemical property data.
+
+
+
+ Amino acid index ID
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Molecular interaction ID
+ Identifier of a report of protein interactions from a protein interaction database (typically).
+
+
+
+ Protein interaction ID
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a protein family.
+ Protein secondary database record identifier
+
+
+
+ Protein family identifier
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique name of a codon usage table.
+
+
+
+ Codon usage table name
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a transcription factor (or a TF binding site).
+
+
+
+ Transcription factor identifier
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of an entry from a database of microarray data.
+
+
+
+ Experiment annotation ID
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of an entry from a database of electron microscopy data.
+
+
+
+ Electron microscopy model ID
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Accession of a report of gene expression (e.g. a gene expression profile) from a database.
+ Gene expression profile identifier
+
+
+
+ Gene expression report ID
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of an entry from a database of genotypes and phenotypes.
+
+
+
+ Genotype and phenotype annotation ID
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of an entry from a database of biological pathways or networks.
+
+
+
+ Pathway or network identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a biological or biomedical workflow, typically from a database of workflows.
+
+
+
+ Workflow ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a data type definition from some provider.
+ Data resource definition identifier
+
+
+
+ Data resource definition ID
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a mathematical model, typically an entry from a database.
+ Biological model identifier
+
+
+
+ Biological model ID
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of an entry from a database of chemicals.
+ Chemical compound identifier
+ Compound ID
+ Small molecule identifier
+
+
+
+ Compound identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A unique (typically numerical) identifier of a concept in an ontology of biological or bioinformatics concepts and relations.
+
+
+
+ Ontology concept ID
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Unique identifier of a scientific article.
+ Article identifier
+
+
+
+ Article ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ FB[a-zA-Z_0-9]{2}[0-9]{7}
+ Identifier of an object from the FlyBase database.
+
+
+
+ FlyBase ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Name of an object from the WormBase database, usually a human-readable name.
+
+
+
+ WormBase name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Class of an object from the WormBase database.
+
+
+
+ A WormBase class describes the type of object such as 'sequence' or 'protein'.
+ WormBase class
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ A persistent, unique identifier of a molecular sequence database entry.
+ Sequence accession number
+
+
+
+ Sequence accession
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A label (text token) describing a type of molecular sequence.
+
+ Sequence type might reflect the molecule (protein, nucleic acid etc) or the sequence itself (gapped, ambiguous etc).
+ Sequence type
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a sequence-based entity adhering to the standard sequence naming scheme used by all EMBOSS applications.
+ EMBOSS USA
+
+
+
+ EMBOSS Uniform Sequence Address
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Accession number of a protein sequence database entry.
+ Protein sequence accession number
+
+
+
+ Sequence accession (protein)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Accession number of a nucleotide sequence database entry.
+ Nucleotide sequence accession number
+
+
+
+ Sequence accession (nucleic acid)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ (NC|AC|NG|NT|NW|NZ|NM|NR|XM|XR|NP|AP|XP|YP|ZP)_[0-9]+
+ Accession number of a RefSeq database entry.
+ RefSeq ID
+
+
+
+ RefSeq accession
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.0
+
+
+ Accession number of a UniProt (protein sequence) database entry. May contain version or isoform number.
+
+ UniProt accession (extended)
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An identifier of PIR sequence database entry.
+ PIR ID
+ PIR accession number
+
+
+
+ PIR identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.2
+
+ Identifier of a TREMBL sequence database entry.
+
+
+ TREMBL accession
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Primary identifier of a Gramene database entry.
+ Gramene primary ID
+
+
+
+ Gramene primary identifier
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a (nucleic acid) entry from the EMBL/GenBank/DDBJ databases.
+
+
+
+ EMBL/GenBank/DDBJ ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A unique identifier of an entry (gene cluster) from the NCBI UniGene database.
+ UniGene ID
+ UniGene cluster ID
+ UniGene identifier
+
+
+
+ Sequence cluster ID (UniGene)
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a dbEST database entry.
+ dbEST ID
+
+
+
+ dbEST accession
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a dbSNP database entry.
+ dbSNP identifier
+
+
+
+ dbSNP ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The EMBOSS type of a molecular sequence.
+
+ See the EMBOSS documentation (http://emboss.sourceforge.net/) for a definition of what this includes.
+ EMBOSS sequence type
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ List of EMBOSS Uniform Sequence Addresses (EMBOSS listfile).
+
+ EMBOSS listfile
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An identifier of a cluster of molecular sequence(s).
+
+
+
+ Sequence cluster ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier of an entry from the COG database.
+ COG ID
+
+
+
+ Sequence cluster ID (COG)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a sequence motif, for example an entry from a motif database.
+
+
+
+ Sequence motif identifier
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a sequence profile.
+
+
+
+ A sequence profile typically represents a sequence alignment.
+ Sequence profile ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the ELMdb database of protein functional sites.
+
+
+
+ ELM ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PS[0-9]{5}
+ Accession number of an entry from the Prosite database.
+ Prosite ID
+
+
+
+ Prosite accession number
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier or name of a HMMER hidden Markov model.
+
+
+
+ HMMER hidden Markov model ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier or name of a profile from the JASPAR database.
+
+
+
+ JASPAR profile ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A label (text token) describing the type of a sequence alignment.
+
+ Possible values include for example the EMBOSS alignment types, BLAST alignment types and so on.
+ Sequence alignment type
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The type of a BLAST sequence alignment.
+
+ BLAST sequence alignment type
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A label (text token) describing the type of a phylogenetic tree.
+
+ For example 'nj', 'upgmp' etc.
+ Phylogenetic tree type
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Accession number of an entry from the TreeBASE database.
+
+
+
+ TreeBASE study accession number
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Accession number of an entry from the TreeFam database.
+
+
+
+ TreeFam accession number
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A label (text token) describing the type of a comparison matrix.
+
+ For example 'blosum', 'pam', 'gonnet', 'id' etc. Comparison matrix type may be required where a series of matrices of a certain type are used.
+ Comparison matrix type
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique name or identifier of a comparison matrix.
+ Substitution matrix name
+
+
+
+ See for example http://www.ebi.ac.uk/Tools/webservices/help/matrix.
+ Comparison matrix name
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9][a-zA-Z_0-9]{3}
+ An identifier of an entry from the PDB database.
+ PDB identifier
+ PDBID
+
+
+
+ A PDB identification code which consists of 4 characters, the first of which is a digit in the range 0 - 9; the remaining 3 are alphanumeric, and letters are upper case only. (source: https://cdn.rcsb.org/wwpdb/docs/documentation/file-format/PDB_format_1996.pdf)
+ PDB ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the AAindex database.
+
+
+
+ AAindex ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Accession number of an entry from the BIND database.
+
+
+
+ BIND accession number
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ EBI\-[0-9]+
+ Accession number of an entry from the IntAct database.
+
+
+
+ IntAct accession number
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Name of a protein family.
+
+
+
+ Protein family name
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Name of an InterPro entry, usually indicating the type of protein matches for that entry.
+
+
+
+ InterPro entry name
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ IPR015590
+ IPR[0-9]{6}
+ Primary accession number of an InterPro entry.
+ InterPro primary accession
+ InterPro primary accession number
+
+
+
+ Every InterPro entry has a unique accession number to provide a persistent citation of database records.
+ InterPro accession
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Secondary accession number of an InterPro entry.
+ InterPro secondary accession number
+
+
+
+ InterPro secondary accession
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier of an entry from the Gene3D database.
+
+
+
+ Gene3D ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PIRSF[0-9]{6}
+ Unique identifier of an entry from the PIRSF database.
+
+
+
+ PIRSF ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PR[0-9]{5}
+ The unique identifier of an entry in the PRINTS database.
+
+
+
+ PRINTS code
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PF[0-9]{5}
+ Accession number of a Pfam entry.
+
+
+
+ Pfam accession number
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ SM[0-9]{5}
+ Accession number of an entry from the SMART database.
+
+
+
+ SMART accession number
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier (number) of a hidden Markov model from the Superfamily database.
+
+
+
+ Superfamily hidden Markov model number
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Accession number of an entry (family) from the TIGRFam database.
+ TIGRFam accession number
+
+
+
+ TIGRFam ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PD[0-9]+
+ A ProDom domain family accession number.
+
+
+
+ ProDom is a protein domain family database.
+ ProDom accession number
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the TRANSFAC database.
+
+
+
+ TRANSFAC accession number
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [AEP]-[a-zA-Z_0-9]{4}-[0-9]+
+ Accession number of an entry from the ArrayExpress database.
+ ArrayExpress experiment ID
+
+
+
+ ArrayExpress accession number
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]+
+ PRIDE experiment accession number.
+
+
+
+ PRIDE experiment accession number
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the EMDB electron microscopy database.
+
+
+
+ EMDB ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [GDS|GPL|GSE|GSM][0-9]+
+ Accession number of an entry from the GEO database.
+
+
+
+ GEO accession number
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the GermOnline database.
+
+
+
+ GermOnline ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the EMAGE database.
+
+
+
+ EMAGE ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Accession number of an entry from a database of disease.
+
+
+
+ Disease ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the HGVbase database.
+
+
+
+ HGVbase ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry from the HIVDB database.
+
+ HIVDB identifier
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [*#+%^]?[0-9]{6}
+ Identifier of an entry from the OMIM database.
+
+
+
+ OMIM ID
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier of an object from one of the KEGG databases (excluding the GENES division).
+
+
+
+ KEGG object identifier
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ REACT_[0-9]+(\.[0-9]+)?
+ Identifier of an entry from the Reactome database.
+ Reactome ID
+
+
+
+ Pathway ID (reactome)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry from the aMAZE database.
+
+ Pathway ID (aMAZE)
+ true
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an pathway from the BioCyc biological pathways database.
+ BioCyc pathway ID
+
+
+
+ Pathway ID (BioCyc)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the INOH database.
+ INOH identifier
+
+
+
+ Pathway ID (INOH)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the PATIKA database.
+ PATIKA ID
+
+
+
+ Pathway ID (PATIKA)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the CPDB (ConsensusPathDB) biological pathways database, which is an identifier from an external database integrated into CPDB.
+ CPDB ID
+
+
+
+ This concept refers to identifiers used by the databases collated in CPDB; CPDB identifiers are not independently defined.
+ Pathway ID (CPDB)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PTHR[0-9]{5}
+ Identifier of a biological pathway from the Panther Pathways database.
+ Panther Pathways ID
+
+
+
+ Pathway ID (Panther)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ MIR:00100005
+ MIR:[0-9]{8}
+ Unique identifier of a MIRIAM data resource.
+
+
+
+ This is the identifier used internally by MIRIAM for a data type.
+ MIRIAM identifier
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a data type from the MIRIAM database.
+
+
+
+ MIRIAM data type name
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ urn:miriam:pubmed:16333295|urn:miriam:obo.go:GO%3A0045202
+ The URI (URL or URN) of a data entity from the MIRIAM database.
+ identifiers.org synonym
+
+
+
+ A MIRIAM URI consists of the URI of the MIRIAM data type (PubMed, UniProt etc) followed by the identifier of an element of that data type, for example PMID for a publication or an accession number for a GO term.
+ MIRIAM URI
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ UniProt|Enzyme Nomenclature
+ The primary name of a data type from the MIRIAM database.
+
+
+
+ The primary name of a MIRIAM data type is taken from a controlled vocabulary.
+ MIRIAM data type primary name
+
+
+
+
+ UniProt|Enzyme Nomenclature
+ A protein entity has the MIRIAM data type 'UniProt', and an enzyme has the MIRIAM data type 'Enzyme Nomenclature'.
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A synonymous name of a data type from the MIRIAM database.
+
+
+
+ A synonymous name for a MIRIAM data type taken from a controlled vocabulary.
+ MIRIAM data type synonymous name
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier of a Taverna workflow.
+
+
+
+ Taverna workflow ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Name of a biological (mathematical) model.
+
+
+
+ Biological model name
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ (BIOMD|MODEL)[0-9]{10}
+ Unique identifier of an entry from the BioModel database.
+
+
+
+ BioModel ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]+
+ Chemical structure specified in PubChem Compound Identification (CID), a non-zero integer identifier for a unique chemical structure.
+ PubChem compound accession identifier
+
+
+
+ PubChem CID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]+
+ Identifier of an entry from the ChemSpider database.
+
+
+
+ ChemSpider ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ CHEBI:[0-9]+
+ Identifier of an entry from the ChEBI database.
+ ChEBI IDs
+ ChEBI identifier
+
+
+
+ ChEBI ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An identifier of a concept from the BioPax ontology.
+
+
+
+ BioPax concept ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]{7}|GO:[0-9]{7}
+ An identifier of a concept from The Gene Ontology.
+ GO concept identifier
+
+
+
+ GO concept ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An identifier of a concept from the MeSH vocabulary.
+
+
+
+ MeSH concept ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An identifier of a concept from the HGNC controlled vocabulary.
+
+
+
+ HGNC concept ID
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 9662|3483|182682
+ [1-9][0-9]{0,8}
+ A stable unique identifier for each taxon (for a species, a family, an order, or any other group in the NCBI taxonomy database.
+ NCBI tax ID
+ NCBI taxonomy identifier
+
+
+
+ NCBI taxonomy ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An identifier of a concept from the Plant Ontology (PO).
+
+
+
+ Plant Ontology concept ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An identifier of a concept from the UMLS vocabulary.
+
+
+
+ UMLS concept ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ FMA:[0-9]+
+ An identifier of a concept from Foundational Model of Anatomy.
+
+
+
+ Classifies anatomical entities according to their shared characteristics (genus) and distinguishing characteristics (differentia). Specifies the part-whole and spatial relationships of the entities, morphological transformation of the entities during prenatal development and the postnatal life cycle and principles, rules and definitions according to which classes and relationships in the other three components of FMA are represented.
+ FMA concept ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An identifier of a concept from the EMAP mouse ontology.
+
+
+
+ EMAP concept ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An identifier of a concept from the ChEBI ontology.
+
+
+
+ ChEBI concept ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An identifier of a concept from the MGED ontology.
+
+
+
+ MGED concept ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An identifier of a concept from the myGrid ontology.
+
+
+
+ The ontology is provided as two components, the service ontology and the domain ontology. The domain ontology acts provides concepts for core bioinformatics data types and their relations. The service ontology describes the physical and operational features of web services.
+ myGrid concept ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 4963447
+ [1-9][0-9]{0,8}
+ PubMed unique identifier of an article.
+ PMID
+
+
+
+ PubMed ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ (doi\:)?[0-9]{2}\.[0-9]{4}/.*
+ Digital Object Identifier (DOI) of a published article.
+ Digital Object Identifier
+
+
+
+ DOI
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Medline UI (unique identifier) of an article.
+ Medline unique identifier
+
+
+
+ The use of Medline UI has been replaced by the PubMed unique identifier.
+ Medline UI
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a computer package, application, method or function.
+
+
+
+ Tool name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The unique name of a signature (sequence classifier) method.
+
+
+
+ Signature methods from http://www.ebi.ac.uk/Tools/InterProScan/help.html#results include BlastProDom, FPrintScan, HMMPIR, HMMPfam, HMMSmart, HMMTigr, ProfileScan, ScanRegExp, SuperFamily and HAMAP.
+ Tool name (signature)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a BLAST tool.
+ BLAST name
+
+
+
+ This include 'blastn', 'blastp', 'blastx', 'tblastn' and 'tblastx'.
+ Tool name (BLAST)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a FASTA tool.
+
+
+
+ This includes 'fasta3', 'fastx3', 'fasty3', 'fastf3', 'fasts3' and 'ssearch'.
+ Tool name (FASTA)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of an EMBOSS application.
+
+
+
+ Tool name (EMBOSS)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of an EMBASSY package.
+
+
+
+ Tool name (EMBASSY package)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A QSAR constitutional descriptor.
+ QSAR constitutional descriptor
+
+
+ QSAR descriptor (constitutional)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A QSAR electronic descriptor.
+ QSAR electronic descriptor
+
+
+ QSAR descriptor (electronic)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A QSAR geometrical descriptor.
+ QSAR geometrical descriptor
+
+
+ QSAR descriptor (geometrical)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A QSAR topological descriptor.
+ QSAR topological descriptor
+
+
+ QSAR descriptor (topological)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A QSAR molecular descriptor.
+ QSAR molecular descriptor
+
+
+ QSAR descriptor (molecular)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Any collection of multiple protein sequences and associated metadata that do not (typically) correspond to common sequence database records or database entries.
+
+
+ Sequence set (protein)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Any collection of multiple nucleotide sequences and associated metadata that do not (typically) correspond to common sequence database records or database entries.
+
+
+ Sequence set (nucleic acid)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A set of sequences that have been clustered or otherwise classified as belonging to a group including (typically) sequence cluster information.
+
+
+ The cluster might include sequences identifiers, short descriptions, alignment and summary information.
+ Sequence cluster
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A file of intermediate results from a PSIBLAST search that is used for priming the search in the next PSIBLAST iteration.
+
+ A Psiblast checkpoint file uses ASN.1 Binary Format and usually has the extension '.asn'.
+ Psiblast checkpoint file
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Sequences generated by HMMER package in FASTA-style format.
+
+ HMMER synthetic sequences set
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A protein sequence cleaved into peptide fragments (by enzymatic or chemical cleavage) with fragment masses.
+
+
+ Proteolytic digest
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ SO:0000412
+ Restriction digest fragments from digesting a nucleotide sequence with restriction sites using a restriction endonuclease.
+
+
+ Restriction digest
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Oligonucleotide primer(s) for PCR and DNA amplification, for example a minimal primer set.
+
+
+ PCR primers
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ File of sequence vectors used by EMBOSS vectorstrip application, or any file in same format.
+
+ vectorstrip cloning vector definition file
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A library of nucleotide sequences to avoid during hybridisation events. Hybridisation of the internal oligo to sequences in this library is avoided, rather than priming from them. The file is in a restricted FASTA format.
+
+ Primer3 internal oligo mishybridizing library
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A nucleotide sequence library of sequences to avoid during amplification (for example repetitive sequences, or possibly the sequences of genes in a gene family that should not be amplified. The file must is in a restricted FASTA format.
+
+ Primer3 mispriming library file
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ File of one or more pairs of primer sequences, as used by EMBOSS primersearch application.
+
+ primersearch primer pairs sequence record
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A cluster of protein sequences.
+ Protein sequence cluster
+
+
+ The sequences are typically related, for example a family of sequences.
+ Sequence cluster (protein)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A cluster of nucleotide sequences.
+ Nucleotide sequence cluster
+
+
+ The sequences are typically related, for example a family of sequences.
+ Sequence cluster (nucleic acid)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The size (length) of a sequence, subsequence or region in a sequence, or range(s) of lengths.
+
+
+ Sequence length
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Size of a sequence word.
+
+ Word size is used for example in word-based sequence database search methods.
+ Word size
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Size of a sequence window.
+
+ A window is a region of fixed size but not fixed position over a molecular sequence. It is typically moved (computationally) over a sequence during scoring.
+ Window size
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Specification of range(s) of length of sequences.
+
+ Sequence length range
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+ Report on basic information about a molecular sequence such as name, accession number, type (nucleic or protein), length, description etc.
+
+
+ Sequence information report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An informative report about non-positional sequence features, typically a report on general molecular sequence properties derived from sequence analysis.
+ Sequence properties report
+
+
+ Sequence property
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Annotation of positional features of molecular sequence(s), i.e. that can be mapped to position(s) in the sequence.
+ Feature record
+ Features
+ General sequence features
+ Sequence features report
+ SO:0000110
+
+
+ This includes annotation of positional sequence features, organised into a standard feature table, or any other report of sequence features. General feature reports are a source of sequence feature table information although internal conversion would be required.
+ Sequence features
+ http://purl.bioontology.org/ontology/MSH/D058977
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Comparative data on sequence features such as statistics, intersections (and data on intersections), differences etc.
+
+ This is a broad data type and is used a placeholder for other, more specific types. It is primarily intended to help navigation of EDAM and would not typically be used for annotation.
+ Sequence features (comparative)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A report of general sequence properties derived from protein sequence data.
+
+ Sequence property (protein)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A report of general sequence properties derived from nucleotide sequence data.
+
+ Sequence property (nucleic acid)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A report on sequence complexity, for example low-complexity or repeat regions in sequences.
+ Sequence property (complexity)
+
+
+ Sequence complexity report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A report on ambiguity in molecular sequence(s).
+ Sequence property (ambiguity)
+
+
+ Sequence ambiguity report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A report (typically a table) on character or word composition / frequency of a molecular sequence(s).
+ Sequence composition
+ Sequence property (composition)
+
+
+ Sequence composition report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A report on peptide fragments of certain molecular weight(s) in one or more protein sequences.
+
+
+ Peptide molecular weight hits
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A plot of third base position variability in a nucleotide sequence.
+
+
+ Base position variability plot
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A table of character or word composition / frequency of a molecular sequence.
+
+ Sequence composition table
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A table of base frequencies of a nucleotide sequence.
+
+
+ Base frequencies table
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A table of word composition of a nucleotide sequence.
+
+
+ Base word frequencies table
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A table of amino acid frequencies of a protein sequence.
+ Sequence composition (amino acid frequencies)
+
+
+ Amino acid frequencies table
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A table of amino acid word composition of a protein sequence.
+ Sequence composition (amino acid words)
+
+
+ Amino acid word frequencies table
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Annotation of a molecular sequence in DAS format.
+
+ DAS sequence feature annotation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Annotation of positional sequence features, organised into a standard feature table.
+ Sequence feature table
+
+
+ Feature table
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A map of (typically one) DNA sequence annotated with positional or non-positional features.
+ DNA map
+
+
+ Map
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An informative report on intrinsic positional features of a nucleotide sequence, formatted to be machine-readable.
+ Feature table (nucleic acid)
+ Nucleic acid feature table
+ Genome features
+ Genomic features
+
+
+ This includes nucleotide sequence feature annotation in any known sequence feature table format and any other report of nucleic acid features.
+ Nucleic acid features
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An informative report on intrinsic positional features of a protein sequence.
+ Feature table (protein)
+ Protein feature table
+
+
+ This includes protein sequence feature annotation in any known sequence feature table format and any other report of protein features.
+ Protein features
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby:GeneticMap
+ A map showing the relative positions of genetic markers in a nucleic acid sequence, based on estimation of non-physical distance such as recombination frequencies.
+ Linkage map
+
+
+ A genetic (linkage) map indicates the proximity of two genes on a chromosome, whether two genes are linked and the frequency they are transmitted together to an offspring. They are limited to genetic markers of traits observable only in whole organisms.
+ Genetic map
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A map of genetic markers in a contiguous, assembled genomic sequence, with the sizes and separation of markers measured in base pairs.
+
+
+ A sequence map typically includes annotation on significant subsequences such as contigs, haplotypes and genes. The contigs shown will (typically) be a set of small overlapping clones representing a complete chromosomal segment.
+ Sequence map
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A map of DNA (linear or circular) annotated with physical features or landmarks such as restriction sites, cloned DNA fragments, genes or genetic markers, along with the physical distances between them.
+
+
+ Distance in a physical map is measured in base pairs. A physical map might be ordered relative to a reference map (typically a genetic map) in the process of genome sequencing.
+ Physical map
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+ Image of a sequence with matches to signatures, motifs or profiles.
+
+
+ Sequence signature map
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A map showing banding patterns derived from direct observation of a stained chromosome.
+ Chromosome map
+ Cytogenic map
+ Cytologic map
+
+
+ This is the lowest-resolution physical map and can provide only rough estimates of physical (base pair) distances. Like a genetic map, they are limited to genetic markers of traits observable only in whole organisms.
+ Cytogenetic map
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A gene map showing distances between loci based on relative cotransduction frequencies.
+
+
+ DNA transduction map
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Sequence map of a single gene annotated with genetic features such as introns, exons, untranslated regions, polyA signals, promoters, enhancers and (possibly) mutations defining alleles of a gene.
+
+
+ Gene map
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Sequence map of a plasmid (circular DNA).
+
+
+ Plasmid map
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Sequence map of a whole genome.
+
+
+ Genome map
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Image of the restriction enzyme cleavage sites (restriction sites) in a nucleic acid sequence.
+
+
+ Restriction map
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+ Image showing matches between protein sequence(s) and InterPro Entries.
+
+
+ The sequence(s) might be screened against InterPro, or be the sequences from the InterPro entry itself. Each protein is represented as a scaled horizontal line with colored bars indicating the position of the matches.
+ InterPro compact match image
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+ Image showing detailed information on matches between protein sequence(s) and InterPro Entries.
+
+
+ The sequence(s) might be screened against InterPro, or be the sequences from the InterPro entry itself.
+ InterPro detailed match image
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+ Image showing the architecture of InterPro domains in a protein sequence.
+
+
+ The sequence(s) might be screened against InterPro, or be the sequences from the InterPro entry itself. Domain architecture is shown as a series of non-overlapping domains in the protein.
+ InterPro architecture image
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ SMART protein schematic in PNG format.
+
+ SMART protein schematic
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+ Images based on GlobPlot prediction of intrinsic disordered regions and globular domains in protein sequences.
+
+
+ GlobPlot domain image
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Report on the location of matches to profiles, motifs (conserved or functional patterns) or other signatures in one or more sequences.
+
+
+ Sequence motif matches
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Location of short repetitive subsequences (repeat sequences) in (typically nucleotide) sequences.
+
+ The report might include derived data map such as classification, annotation, organisation, periodicity etc.
+ Sequence features (repeats)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A report on predicted or actual gene structure, regions which make an RNA product and features such as promoters, coding regions, splice sites etc.
+
+ Gene and transcript structure (report)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ regions of a nucleic acid sequence containing mobile genetic elements.
+
+
+ Mobile genetic elements
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A report on quadruplex-forming motifs in a nucleotide sequence.
+
+ Nucleic acid features (quadruplexes)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Report on nucleosome formation potential or exclusion sequence(s).
+
+
+ Nucleosome exclusion sequences
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+ A report on exonic splicing enhancers (ESE) in an exon.
+
+
+ Gene features (exonic splicing enhancer)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A report on microRNA sequence (miRNA) or precursor, microRNA targets, miRNA binding sites in an RNA sequence etc.
+
+ Nucleic acid features (microRNA)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ protein-coding regions including coding sequences (CDS), exons, translation initiation sites and open reading frames.
+
+
+ Coding region
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ A report on selenocysteine insertion sequence (SECIS) element in a DNA sequence.
+
+ Gene features (SECIS element)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ transcription factor binding sites (TFBS) in a DNA sequence.
+
+
+ Transcription factor binding sites
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A report on predicted or known key residue positions (sites) in a protein sequence, such as binding or functional sites.
+
+ Use this concept for collections of specific sites which are not necessarily contiguous, rather than contiguous stretches of amino acids.
+ Protein features (sites)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ signal peptides or signal peptide cleavage sites in protein sequences.
+
+
+ Protein features report (signal peptides)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ cleavage sites (for a proteolytic enzyme or agent) in a protein sequence.
+
+
+ Protein features report (cleavage sites)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ post-translation modifications in a protein sequence, typically describing the specific sites involved.
+
+
+ Protein features (post-translation modifications)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ catalytic residues (active site) of an enzyme.
+
+
+ Protein features report (active sites)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ ligand-binding (non-catalytic) residues of a protein, such as sites that bind metal, prosthetic groups or lipids.
+
+
+ Protein features report (binding sites)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+ A report on antigenic determinant sites (epitopes) in proteins, from sequence and / or structural data.
+
+
+ Epitope mapping is commonly done during vaccine design.
+ Protein features (epitopes)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ RNA and DNA-binding proteins and binding sites in protein sequences.
+
+
+ Protein features report (nucleic acid binding sites)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A report on epitopes that bind to MHC class I molecules.
+
+ MHC Class I epitopes report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A report on predicted epitopes that bind to MHC class II molecules.
+
+ MHC Class II epitopes report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+ A report or plot of PEST sites in a protein sequence.
+
+
+ 'PEST' motifs target proteins for proteolytic degradation and reduce the half-lives of proteins dramatically.
+ Protein features (PEST sites)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Scores from a sequence database search (for example a BLAST search).
+
+ Sequence database hits scores list
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Alignments from a sequence database search (for example a BLAST search).
+
+ Sequence database hits alignments list
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A report on the evaluation of the significance of sequence similarity scores from a sequence database search (for example a BLAST search).
+
+ Sequence database hits evaluation data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Alphabet for the motifs (patterns) that MEME will search for.
+
+ MEME motif alphabet
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ MEME background frequencies file.
+
+ MEME background frequencies file
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ File of directives for ordering and spacing of MEME motifs.
+
+ MEME motifs directive file
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Dirichlet distribution used by hidden Markov model analysis programs.
+
+
+ Dirichlet distribution
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+
+ Emission and transition counts of a hidden Markov model, generated once HMM has been determined, for example after residues/gaps have been assigned to match, delete and insert states.
+
+ HMM emission and transition counts
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Regular expression pattern.
+
+
+ Regular expression
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Any specific or conserved pattern (typically expressed as a regular expression) in a molecular sequence.
+
+
+ Sequence motif
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Some type of statistical model representing a (typically multiple) sequence alignment.
+
+
+ Sequence profile
+ http://semanticscience.org/resource/SIO_010531
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An informative report about a specific or conserved protein sequence pattern.
+ InterPro entry
+ Protein domain signature
+ Protein family signature
+ Protein region signature
+ Protein repeat signature
+ Protein site signature
+
+
+ Protein signature
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A nucleotide regular expression pattern from the Prosite database.
+
+ Prosite nucleotide pattern
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A protein regular expression pattern from the Prosite database.
+
+ Prosite protein pattern
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A profile (typically representing a sequence alignment) that is a simple matrix of nucleotide (or amino acid) counts per position.
+ PFM
+
+
+ Position frequency matrix
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A profile (typically representing a sequence alignment) that is weighted matrix of nucleotide (or amino acid) counts per position.
+ PWM
+
+
+ Contributions of individual sequences to the matrix might be uneven (weighted).
+ Position weight matrix
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A profile (typically representing a sequence alignment) derived from a matrix of nucleotide (or amino acid) counts per position that reflects information content at each position.
+ ICM
+
+
+ Information content matrix
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A statistical Markov model of a system which is assumed to be a Markov process with unobserved (hidden) states. For example, a hidden Markov model representation of a set or alignment of sequences.
+ HMM
+
+
+ Hidden Markov model
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ One or more fingerprints (sequence classifiers) as used in the PRINTS database.
+
+
+ Fingerprint
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A protein signature of the type used in the EMBASSY Signature package.
+
+ Domainatrix signature
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ NULL hidden Markov model representation used by the HMMER package.
+
+ HMMER NULL hidden Markov model
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A protein family signature (sequence classifier) from the InterPro database.
+
+ Protein family signatures cover all domains in the matching proteins and span >80% of the protein length and with no adjacent protein domain signatures or protein region signatures.
+ Protein family signature
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A protein domain signature (sequence classifier) from the InterPro database.
+
+ Protein domain signatures identify structural or functional domains or other units with defined boundaries.
+ Protein domain signature
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A protein region signature (sequence classifier) from the InterPro database.
+
+ A protein region signature defines a region which cannot be described as a protein family or domain signature.
+ Protein region signature
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A protein repeat signature (sequence classifier) from the InterPro database.
+
+ A protein repeat signature is a repeated protein motif, that is not in single copy expected to independently fold into a globular domain.
+ Protein repeat signature
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A protein site signature (sequence classifier) from the InterPro database.
+
+ A protein site signature is a classifier for a specific site in a protein.
+ Protein site signature
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ A protein conserved site signature (sequence classifier) from the InterPro database.
+
+ A protein conserved site signature is any short sequence pattern that may contain one or more unique residues and is cannot be described as a active site, binding site or post-translational modification.
+ Protein conserved site signature
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ A protein active site signature (sequence classifier) from the InterPro database.
+
+ A protein active site signature corresponds to an enzyme catalytic pocket. An active site typically includes non-contiguous residues, therefore multiple signatures may be required to describe an active site. ; residues involved in enzymatic reactions for which mutational data is typically available.
+ Protein active site signature
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ A protein binding site signature (sequence classifier) from the InterPro database.
+
+ A protein binding site signature corresponds to a site that reversibly binds chemical compounds, which are not themselves substrates of the enzymatic reaction. This includes enzyme cofactors and residues involved in electron transport or protein structure modification.
+ Protein binding site signature
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ A protein post-translational modification signature (sequence classifier) from the InterPro database.
+
+ A protein post-translational modification signature corresponds to sites that undergo modification of the primary structure, typically to activate or de-activate a function. For example, methylation, sumoylation, glycosylation etc. The modification might be permanent or reversible.
+ Protein post-translational modification signature
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Alignment of exactly two molecular sequences.
+ Sequence alignment (pair)
+
+
+ Pair sequence alignment
+ http://semanticscience.org/resource/SIO_010068
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Alignment of more than two molecular sequences.
+
+ Sequence alignment (multiple)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alignment of multiple nucleotide sequences.
+ Sequence alignment (nucleic acid)
+ DNA sequence alignment
+ RNA sequence alignment
+
+
+ Nucleic acid sequence alignment
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alignment of multiple protein sequences.
+ Sequence alignment (protein)
+
+
+ Protein sequence alignment
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alignment of multiple molecular sequences of different types.
+ Sequence alignment (hybrid)
+
+
+ Hybrid sequence alignments include for example genomic DNA to EST, cDNA or mRNA.
+ Hybrid sequence alignment
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+
+
+ Alignment of exactly two nucleotide sequences.
+
+ Sequence alignment (nucleic acid pair)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+
+
+ Alignment of exactly two protein sequences.
+
+ Sequence alignment (protein pair)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Alignment of exactly two molecular sequences of different types.
+
+ Hybrid sequence alignment (pair)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Alignment of more than two nucleotide sequences.
+
+ Multiple nucleotide sequence alignment
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Alignment of more than two protein sequences.
+
+ Multiple protein sequence alignment
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A simple floating point number defining the penalty for opening or extending a gap in an alignment.
+
+
+ Alignment score or penalty
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Whether end gaps are scored or not.
+
+ Score end gaps control
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Controls the order of sequences in an output sequence alignment.
+
+ Aligned sequence order
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A penalty for opening a gap in an alignment.
+
+
+ Gap opening penalty
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A penalty for extending a gap in an alignment.
+
+
+ Gap extension penalty
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A penalty for gaps that are close together in an alignment.
+
+
+ Gap separation penalty
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+
+ A penalty for gaps at the termini of an alignment, either from the N/C terminal of protein or 5'/3' terminal of nucleotide sequences.
+
+ Terminal gap penalty
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The score for a 'match' used in various sequence database search applications with simple scoring schemes.
+
+
+ Match reward score
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The score (penalty) for a 'mismatch' used in various alignment and sequence database search applications with simple scoring schemes.
+
+
+ Mismatch penalty score
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ This is the threshold drop in score at which extension of word alignment is halted.
+
+
+ Drop off score
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A simple floating point number defining the penalty for opening a gap in an alignment.
+
+ Gap opening penalty (integer)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A simple floating point number defining the penalty for opening a gap in an alignment.
+
+ Gap opening penalty (float)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A simple floating point number defining the penalty for extending a gap in an alignment.
+
+ Gap extension penalty (integer)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A simple floating point number defining the penalty for extending a gap in an alignment.
+
+ Gap extension penalty (float)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A simple floating point number defining the penalty for gaps that are close together in an alignment.
+
+ Gap separation penalty (integer)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A simple floating point number defining the penalty for gaps that are close together in an alignment.
+
+ Gap separation penalty (float)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A number defining the penalty for opening gaps at the termini of an alignment, either from the N/C terminal of protein or 5'/3' terminal of nucleotide sequences.
+
+
+ Terminal gap opening penalty
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A number defining the penalty for extending gaps at the termini of an alignment, either from the N/C terminal of protein or 5'/3' terminal of nucleotide sequences.
+
+
+ Terminal gap extension penalty
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Sequence identity is the number (%) of matches (identical characters) in positions from an alignment of two molecular sequences.
+
+
+ Sequence identity
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Sequence similarity is the similarity (expressed as a percentage) of two molecular sequences calculated from their alignment, a scoring matrix for scoring characters substitutions and penalties for gap insertion and extension.
+
+
+ Data Type is float probably.
+ Sequence similarity
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Data on molecular sequence alignment quality (estimated accuracy).
+
+ Sequence alignment metadata (quality report)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ Data on character conservation in a molecular sequence alignment.
+
+ This is a broad data type and is used a placeholder for other, more specific types. It is primarily intended to help navigation of EDAM and would not typically be used for annotation. Use this concept for calculated substitution rates, relative site variability, data on sites with biased properties, highly conserved or very poorly conserved sites, regions, blocks etc.
+ Sequence alignment report (site conservation)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ Data on correlations between sites in a molecular sequence alignment, typically to identify possible covarying positions and predict contacts or structural constraints in protein structures.
+
+ Sequence alignment report (site correlation)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Alignment of molecular sequences to a Domainatrix signature (representing a sequence alignment).
+
+ Sequence-profile alignment (Domainatrix signature)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Alignment of molecular sequence(s) to a hidden Markov model(s).
+
+ Sequence-profile alignment (HMM)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Alignment of molecular sequences to a protein fingerprint from the PRINTS database.
+
+ Sequence-profile alignment (fingerprint)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Continuous quantitative data that may be read during phylogenetic tree calculation.
+ Phylogenetic continuous quantitative characters
+ Quantitative traits
+
+
+ Phylogenetic continuous quantitative data
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Character data with discrete states that may be read during phylogenetic tree calculation.
+ Discrete characters
+ Discretely coded characters
+ Phylogenetic discrete states
+
+
+ Phylogenetic discrete data
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ One or more cliques of mutually compatible characters that are generated, for example from analysis of discrete character data, and are used to generate a phylogeny.
+ Phylogenetic report (cliques)
+
+
+ Phylogenetic character cliques
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Phylogenetic invariants data for testing alternative tree topologies.
+ Phylogenetic report (invariants)
+
+
+ Phylogenetic invariants
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A report of data concerning or derived from a phylogenetic tree, or from comparing two or more phylogenetic trees.
+
+ This is a broad data type and is used for example for reports on confidence, shape or stratigraphic (age) data derived from phylogenetic tree analysis.
+ Phylogenetic report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A model of DNA substitution that explains a DNA sequence alignment, derived from phylogenetic tree analysis.
+ Phylogenetic tree report (DNA substitution model)
+ Sequence alignment report (DNA substitution model)
+ Substitution model
+
+
+ DNA substitution model
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ Data about the shape of a phylogenetic tree.
+
+ Phylogenetic tree report (tree shape)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ Data on the confidence of a phylogenetic tree.
+
+ Phylogenetic tree report (tree evaluation)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Distances, such as Branch Score distance, between two or more phylogenetic trees.
+ Phylogenetic tree report (tree distances)
+
+
+ Phylogenetic tree distances
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ Molecular clock and stratigraphic (age) data derived from phylogenetic tree analysis.
+
+ Phylogenetic tree report (tree stratigraphic)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Independent contrasts for characters used in a phylogenetic tree, or covariances, regressions and correlations between characters for those contrasts.
+ Phylogenetic report (character contrasts)
+
+
+ Phylogenetic character contrasts
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Matrix of integer numbers for sequence comparison.
+
+ Comparison matrix (integers)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Matrix of floating point numbers for sequence comparison.
+
+ Comparison matrix (floats)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Matrix of integer or floating point numbers for nucleotide comparison.
+ Nucleotide comparison matrix
+ Nucleotide substitution matrix
+
+
+ Comparison matrix (nucleotide)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Matrix of integer or floating point numbers for amino acid comparison.
+ Amino acid comparison matrix
+ Amino acid substitution matrix
+
+
+ Comparison matrix (amino acid)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Matrix of integer numbers for nucleotide comparison.
+
+ Nucleotide comparison matrix (integers)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Matrix of floating point numbers for nucleotide comparison.
+
+ Nucleotide comparison matrix (floats)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Matrix of integer numbers for amino acid comparison.
+
+ Amino acid comparison matrix (integers)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Matrix of floating point numbers for amino acid comparison.
+
+ Amino acid comparison matrix (floats)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 3D coordinate and associated data for a nucleic acid tertiary (3D) structure.
+
+
+ Nucleic acid structure
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 3D coordinate and associated data for a protein tertiary (3D) structure, or part of a structure, possibly in complex with other molecules.
+ Protein structures
+
+
+ Protein structure
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The structure of a protein in complex with a ligand, typically a small molecule such as an enzyme substrate or cofactor, but possibly another macromolecule.
+
+
+ This includes interactions of proteins with atoms, ions and small molecules or macromolecules such as nucleic acids or other polypeptides. For stable inter-polypeptide interactions use 'Protein complex' instead.
+ Protein-ligand complex
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 3D coordinate and associated data for a carbohydrate (3D) structure.
+
+
+ Carbohydrate structure
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 3D coordinate and associated data for the (3D) structure of a small molecule, such as any common chemical compound.
+ CHEBI:23367
+
+
+ Small molecule structure
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 3D coordinate and associated data for a DNA tertiary (3D) structure.
+
+
+ DNA structure
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 3D coordinate and associated data for an RNA tertiary (3D) structure.
+
+
+ RNA structure
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 3D coordinate and associated data for a tRNA tertiary (3D) structure, including tmRNA, snoRNAs etc.
+
+
+ tRNA structure
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 3D coordinate and associated data for the tertiary (3D) structure of a polypeptide chain.
+
+
+ Protein chain
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 3D coordinate and associated data for the tertiary (3D) structure of a protein domain.
+
+
+ Protein domain
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ 3D coordinate and associated data for a protein tertiary (3D) structure (all atoms).
+
+ Protein structure (all atoms)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 3D coordinate and associated data for a protein tertiary (3D) structure (typically C-alpha atoms only).
+ Protein structure (C-alpha atoms)
+
+
+ C-beta atoms from amino acid side-chains may be included.
+ C-alpha trace
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ 3D coordinate and associated data for a polypeptide chain tertiary (3D) structure (all atoms).
+
+ Protein chain (all atoms)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ 3D coordinate and associated data for a polypeptide chain tertiary (3D) structure (typically C-alpha atoms only).
+
+ C-beta atoms from amino acid side-chains may be included.
+ Protein chain (C-alpha atoms)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ 3D coordinate and associated data for a protein domain tertiary (3D) structure (all atoms).
+
+ Protein domain (all atoms)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ 3D coordinate and associated data for a protein domain tertiary (3D) structure (typically C-alpha atoms only).
+
+ C-beta atoms from amino acid side-chains may be included.
+ Protein domain (C-alpha atoms)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alignment (superimposition) of exactly two molecular tertiary (3D) structures.
+ Pair structure alignment
+
+
+ Structure alignment (pair)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Alignment (superimposition) of more than two molecular tertiary (3D) structures.
+
+ Structure alignment (multiple)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alignment (superimposition) of protein tertiary (3D) structures.
+ Structure alignment (protein)
+
+
+ Protein structure alignment
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alignment (superimposition) of nucleic acid tertiary (3D) structures.
+ Structure alignment (nucleic acid)
+
+
+ Nucleic acid structure alignment
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+
+
+ Alignment (superimposition) of exactly two protein tertiary (3D) structures.
+
+ Structure alignment (protein pair)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Alignment (superimposition) of more than two protein tertiary (3D) structures.
+
+ Multiple protein tertiary structure alignment
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Alignment (superimposition) of protein tertiary (3D) structures (all atoms considered).
+
+ Structure alignment (protein all atoms)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Alignment (superimposition) of protein tertiary (3D) structures (typically C-alpha atoms only considered).
+
+ C-beta atoms from amino acid side-chains may be considered.
+ Structure alignment (protein C-alpha atoms)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Alignment (superimposition) of exactly two protein tertiary (3D) structures (all atoms considered).
+
+ Pairwise protein tertiary structure alignment (all atoms)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Alignment (superimposition) of exactly two protein tertiary (3D) structures (typically C-alpha atoms only considered).
+
+ C-beta atoms from amino acid side-chains may be included.
+ Pairwise protein tertiary structure alignment (C-alpha atoms)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Alignment (superimposition) of exactly two protein tertiary (3D) structures (all atoms considered).
+
+ Multiple protein tertiary structure alignment (all atoms)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Alignment (superimposition) of exactly two protein tertiary (3D) structures (typically C-alpha atoms only considered).
+
+ C-beta atoms from amino acid side-chains may be included.
+ Multiple protein tertiary structure alignment (C-alpha atoms)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+
+
+ Alignment (superimposition) of exactly two nucleic acid tertiary (3D) structures.
+
+ Structure alignment (nucleic acid pair)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Alignment (superimposition) of more than two nucleic acid tertiary (3D) structures.
+
+ Multiple nucleic acid tertiary structure alignment
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alignment (superimposition) of RNA tertiary (3D) structures.
+ Structure alignment (RNA)
+
+
+ RNA structure alignment
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Matrix to transform (rotate/translate) 3D coordinates, typically the transformation necessary to superimpose two molecular structures.
+
+
+ Structural transformation matrix
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ DaliLite hit table of protein chain tertiary structure alignment data.
+
+ The significant and top-scoring hits for regions of the compared structures is shown. Data such as Z-Scores, number of aligned residues, root-mean-square deviation (RMSD) of atoms and sequence identity are given.
+ DaliLite hit table
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A score reflecting structural similarities of two molecules.
+
+ Molecular similarity score
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Root-mean-square deviation (RMSD) is calculated to measure the average distance between superimposed macromolecular coordinates.
+ RMSD
+
+
+ Root-mean-square deviation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A measure of the similarity between two ligand fingerprints.
+
+
+ A ligand fingerprint is derived from ligand structural data from a Protein DataBank file. It reflects the elements or groups present or absent, covalent bonds and bond orders and the bonded environment in terms of SATIS codes and BLEEP atom types.
+ Tanimoto similarity score
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A matrix of 3D-1D scores reflecting the probability of amino acids to occur in different tertiary structural environments.
+
+
+ 3D-1D scoring matrix
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A table of 20 numerical values which quantify a property (e.g. physicochemical or biochemical) of the common amino acids.
+
+
+ Amino acid index
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Chemical classification (small, aliphatic, aromatic, polar, charged etc) of amino acids.
+ Chemical classes (amino acids)
+
+
+ Amino acid index (chemical classes)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Statistical protein contact potentials.
+ Contact potentials (amino acid pair-wise)
+
+
+ Amino acid pair-wise contact potentials
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Molecular weights of amino acids.
+ Molecular weight (amino acids)
+
+
+ Amino acid index (molecular weight)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Hydrophobic, hydrophilic or charge properties of amino acids.
+ Hydropathy (amino acids)
+
+
+ Amino acid index (hydropathy)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Experimental free energy values for the water-interface and water-octanol transitions for the amino acids.
+ White-Wimley data (amino acids)
+
+
+ Amino acid index (White-Wimley data)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Van der Waals radii of atoms for different amino acid residues.
+ van der Waals radii (amino acids)
+
+
+ Amino acid index (van der Waals radii)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ An informative report on a specific enzyme.
+
+ Enzyme report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ An informative report on a specific restriction enzyme such as enzyme reference data.
+
+ This might include name of enzyme, organism, isoschizomers, methylation, source, suppliers, literature references, or data on restriction enzyme patterns such as name of enzyme, recognition site, length of pattern, number of cuts made by enzyme, details of blunt or sticky end cut etc.
+ Restriction enzyme report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ List of molecular weight(s) of one or more proteins or peptides, for example cut by proteolytic enzymes or reagents.
+
+
+ The report might include associated data such as frequency of peptide fragment molecular weights.
+ Peptide molecular weights
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Report on the hydrophobic moment of a polypeptide sequence.
+
+
+ Hydrophobic moment is a peptides hydrophobicity measured for different angles of rotation.
+ Peptide hydrophobic moment
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The aliphatic index of a protein.
+
+
+ The aliphatic index is the relative protein volume occupied by aliphatic side chains.
+ Protein aliphatic index
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A protein sequence with annotation on hydrophobic or hydrophilic / charged regions, hydrophobicity plot etc.
+
+
+ Hydrophobic moment is a peptides hydrophobicity measured for different angles of rotation.
+ Protein sequence hydropathy plot
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A plot of the mean charge of the amino acids within a window of specified length as the window is moved along a protein sequence.
+
+
+ Protein charge plot
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The solubility or atomic solvation energy of a protein sequence or structure.
+ Protein solubility data
+
+
+ Protein solubility
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data on the crystallizability of a protein sequence.
+ Protein crystallizability data
+
+
+ Protein crystallizability
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data on the stability, intrinsic disorder or globularity of a protein sequence.
+ Protein globularity data
+
+
+ Protein globularity
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The titration curve of a protein.
+
+
+ Protein titration curve
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The isoelectric point of one proteins.
+
+
+ Protein isoelectric point
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The pKa value of a protein.
+
+
+ Protein pKa value
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The hydrogen exchange rate of a protein.
+
+
+ Protein hydrogen exchange rate
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The extinction coefficient of a protein.
+
+
+ Protein extinction coefficient
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The optical density of a protein.
+
+
+ Protein optical density
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ An informative report on protein subcellular localisation (nuclear, cytoplasmic, mitochondrial, chloroplast, plastid, membrane etc) or destination (exported / extracellular proteins).
+
+ Protein subcellular localisation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An report on allergenicity / immunogenicity of peptides and proteins.
+ Peptide immunogenicity
+ Peptide immunogenicity report
+
+
+ This includes data on peptide ligands that elicit an immune response (immunogens), allergic cross-reactivity, predicted antigenicity (Hopp and Woods plot) etc. These data are useful in the development of peptide-specific antibodies or multi-epitope vaccines. Methods might use sequence data (for example motifs) and / or structural data.
+ Peptide immunogenicity data
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ A report on the immunogenicity of MHC class I or class II binding peptides.
+
+ MHC peptide immunogenicity report
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A human-readable collection of information about one or more specific protein 3D structure(s) or structural domains.
+ Protein property (structural)
+ Protein report (structure)
+ Protein structural property
+ Protein structure report (domain)
+ Protein structure-derived report
+
+
+ Protein structure report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Report on the quality of a protein three-dimensional model.
+ Protein property (structural quality)
+ Protein report (structural quality)
+ Protein structure report (quality evaluation)
+ Protein structure validation report
+
+
+ Model validation might involve checks for atomic packing, steric clashes, agreement with electron density maps etc.
+ Protein structural quality report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Data on inter-atomic or inter-residue contacts, distances and interactions in protein structure(s) or on the interactions of protein atoms or residues with non-protein groups.
+
+
+ Protein non-covalent interactions report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ Informative report on flexibility or motion of a protein structure.
+
+ This is a broad data type and is used a placeholder for other, more specific types. It is primarily intended to help navigation of EDAM and would not typically be used for annotation.
+ Protein flexibility or motion report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data on the solvent accessible or buried surface area of a protein structure.
+
+
+ This concept covers definitions of the protein surface, interior and interfaces, accessible and buried residues, surface accessible pockets, interior inaccessible cavities etc.
+ Protein solvent accessibility
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ Data on the surface properties (shape, hydropathy, electrostatic patches etc) of a protein structure.
+
+ This is a broad data type and is used a placeholder for other, more specific types. It is primarily intended to help navigation of EDAM and would not typically be used for annotation.
+ Protein surface report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Phi/psi angle data or a Ramachandran plot of a protein structure.
+
+
+ Ramachandran plot
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data on the net charge distribution (dipole moment) of a protein structure.
+
+
+ Protein dipole moment
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A matrix of distances between amino acid residues (for example the C-alpha atoms) in a protein structure.
+
+
+ Protein distance matrix
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An amino acid residue contact map for a protein structure.
+
+
+ Protein contact map
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Report on clusters of contacting residues in protein structures such as a key structural residue network.
+
+
+ Protein residue 3D cluster
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Patterns of hydrogen bonding in protein structures.
+
+
+ Protein hydrogen bonds
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ Non-canonical atomic interactions in protein structures.
+
+ Protein non-canonical interactions
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Information on a node from the CATH database.
+
+ The report (for example http://www.cathdb.info/cathnode/1.10.10.10) includes CATH code (of the node and upper levels in the hierarchy), classification text (of appropriate levels in hierarchy), list of child nodes, representative domain and other relevant data and links.
+ CATH node
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Information on a node from the SCOP database.
+
+ SCOP node
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+ An EMBASSY domain classification file (DCF) of classification and other data for domains from SCOP or CATH, in EMBL-like format.
+
+
+ EMBASSY domain classification
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Information on a protein 'class' node from the CATH database.
+
+ CATH class
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Information on a protein 'architecture' node from the CATH database.
+
+ CATH architecture
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Information on a protein 'topology' node from the CATH database.
+
+ CATH topology
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Information on a protein 'homologous superfamily' node from the CATH database.
+
+ CATH homologous superfamily
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Information on a protein 'structurally similar group' node from the CATH database.
+
+ CATH structurally similar group
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Information on a protein 'functional category' node from the CATH database.
+
+ CATH functional category
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A report on known protein structural domains or folds that are recognised (identified) in protein sequence(s).
+
+ Methods use some type of mapping between sequence and fold, for example secondary structure prediction and alignment, profile comparison, sequence properties, homologous sequence search, kernel machines etc. Domains and folds might be taken from SCOP or CATH.
+ Protein fold recognition report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ protein-protein interaction(s), including interactions between protein domains.
+
+
+ Protein-protein interaction report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An informative report on protein-ligand (small molecule) interaction(s).
+ Protein-drug interaction report
+
+
+ Protein-ligand interaction report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ protein-DNA/RNA interaction(s).
+
+
+ Protein-nucleic acid interactions report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Nucleic acid melting curve: a melting curve of a double-stranded nucleic acid molecule (DNA or DNA/RNA). Shows the proportion of nucleic acid which are double-stranded versus temperature.
+ Nucleic acid probability profile: a probability profile of a double-stranded nucleic acid molecule (DNA or DNA/RNA). Shows the probability of a base pair not being melted (i.e. remaining as double-stranded DNA) at a specified temperature
+ Nucleic acid stitch profile: stitch profile of hybridised or double stranded nucleic acid (DNA or RNA/DNA). A stitch profile diagram shows partly melted DNA conformations (with probabilities) at a range of temperatures. For example, a stitch profile might show possible loop openings with their location, size, probability and fluctuations at a given temperature.
+ Nucleic acid temperature profile: a temperature profile of a double-stranded nucleic acid molecule (DNA or DNA/RNA). Plots melting temperature versus base position.
+ Data on the dissociation characteristics of a double-stranded nucleic acid molecule (DNA or a DNA/RNA hybrid) during heating.
+ Nucleic acid stability profile
+ Melting map
+ Nucleic acid melting curve
+
+
+ A melting (stability) profile calculated the free energy required to unwind and separate the nucleic acid strands, plotted for sliding windows over a sequence.
+ Nucleic acid melting profile
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Enthalpy of hybridised or double stranded nucleic acid (DNA or RNA/DNA).
+
+
+ Nucleic acid enthalpy
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Entropy of hybridised or double stranded nucleic acid (DNA or RNA/DNA).
+
+
+ Nucleic acid entropy
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Melting temperature of hybridised or double stranded nucleic acid (DNA or RNA/DNA).
+
+ Nucleic acid melting temperature
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.21
+
+ Stitch profile of hybridised or double stranded nucleic acid (DNA or RNA/DNA).
+
+
+ Nucleic acid stitch profile
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ DNA base pair stacking energies data.
+
+
+ DNA base pair stacking energies data
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ DNA base pair twist angle data.
+
+
+ DNA base pair twist angle data
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ DNA base trimer roll angles data.
+
+
+ DNA base trimer roll angles data
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ RNA parameters used by the Vienna package.
+
+ Vienna RNA parameters
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Structure constraints used by the Vienna package.
+
+ Vienna RNA structure constraints
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ RNA concentration data used by the Vienna package.
+
+ Vienna RNA concentration data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ RNA calculated energy data generated by the Vienna package.
+
+ Vienna RNA calculated energy
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Dotplot of RNA base pairing probability matrix.
+
+
+ Such as generated by the Vienna package.
+ Base pairing probability matrix dotplot
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A human-readable collection of information about RNA/DNA folding, minimum folding energies for DNA or RNA sequences, energy landscape of RNA mutants etc.
+ Nucleic acid report (folding model)
+ Nucleic acid report (folding)
+ RNA secondary structure folding classification
+ RNA secondary structure folding probabilities
+
+
+ Nucleic acid folding report
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Table of codon usage data calculated from one or more nucleic acid sequences.
+
+
+ A codon usage table might include the codon usage table name, optional comments and a table with columns for codons and corresponding codon usage data. A genetic code can be extracted from or represented by a codon usage table.
+ Codon usage table
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A genetic code for an organism.
+
+
+ A genetic code need not include detailed codon usage information.
+ Genetic code
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A simple measure of synonymous codon usage bias often used to predict gene expression levels.
+
+ Codon adaptation index
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A plot of the synonymous codon usage calculated for windows over a nucleotide sequence.
+ Synonymous codon usage statistic plot
+
+
+ Codon usage bias plot
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The effective number of codons used in a gene sequence. This reflects how far codon usage of a gene departs from equal usage of synonymous codons.
+
+ Nc statistic
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The differences in codon usage fractions between two codon usage tables.
+
+
+ Codon usage fraction difference
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A human-readable collection of information about the influence of genotype on drug response.
+
+
+ The report might correlate gene expression or single-nucleotide polymorphisms with drug efficacy or toxicity.
+ Pharmacogenomic test report
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A human-readable collection of information about a specific disease.
+
+
+ For example, an informative report on a specific tumor including nature and origin of the sample, anatomic site, organ or tissue, tumor type, including morphology and/or histologic type, and so on.
+ Disease report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ A report on linkage disequilibrium; the non-random association of alleles or polymorphisms at two or more loci (not necessarily on the same chromosome).
+
+
+ Linkage disequilibrium (report)
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A graphical 2D tabular representation of expression data, typically derived from an omics experiment. A heat map is a table where rows and columns correspond to different features and contexts (for example, cells or samples) and the cell colour represents the level of expression of a gene that context.
+
+
+ Heat map
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Affymetrix library file of information about which probes belong to which probe set.
+
+ Affymetrix probe sets library file
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Affymetrix library file of information about the probe sets such as the gene name with which the probe set is associated.
+ GIN file
+
+ Affymetrix probe sets information library file
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Standard protonated molecular masses from trypsin (modified porcine trypsin, Promega) and keratin peptides, used in EMBOSS.
+
+
+ Molecular weights standard fingerprint
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ A report typically including a map (diagram) of a metabolic pathway.
+
+
+ This includes carbohydrate, energy, lipid, nucleotide, amino acid, glycan, PK/NRP, cofactor/vitamin, secondary metabolite, xenobiotics etc.
+ Metabolic pathway report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ genetic information processing pathways.
+
+
+ Genetic information processing pathway report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ environmental information processing pathways.
+
+
+ Environmental information processing pathway report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ A report typically including a map (diagram) of a signal transduction pathway.
+
+
+ Signal transduction pathway report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Topic concernning cellular process pathways.
+
+
+ Cellular process pathways report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ disease pathways, typically of human disease.
+
+
+ Disease pathway or network report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.21
+
+ A report typically including a map (diagram) of drug structure relationships.
+
+
+ Drug structure relationship map
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+
+ networks of protein interactions.
+
+ Protein interaction networks
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ An entry (data type) from the Minimal Information Requested in the Annotation of Biochemical Models (MIRIAM) database of data resources.
+
+ A MIRIAM entry describes a MIRIAM data type including the official name, synonyms, root URI, identifier pattern (regular expression applied to a unique identifier of the data type) and documentation. Each data type can be associated with several resources. Each resource is a physical location of a service (typically a database) providing information on the elements of a data type. Several resources may exist for each data type, provided the same (mirrors) or different information. MIRIAM provides a stable and persistent reference to its data types.
+ MIRIAM datatype
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A simple floating point number defining the lower or upper limit of an expectation value (E-value).
+ Expectation value
+
+
+ An expectation value (E-Value) is the expected number of observations which are at least as extreme as observations expected to occur by random chance. The E-value describes the number of hits with a given score or better that are expected to occur at random when searching a database of a particular size. It decreases exponentially with the score (S) of a hit. A low E value indicates a more significant score.
+ E-value
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The z-value is the number of standard deviations a data value is above or below a mean value.
+
+
+ A z-value might be specified as a threshold for reporting hits from database searches.
+ Z-value
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The P-value is the probability of obtaining by random chance a result that is at least as extreme as an observed result, assuming a NULL hypothesis is true.
+
+
+ A z-value might be specified as a threshold for reporting hits from database searches.
+ P-value
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Information on a database (or ontology) version, for example name, version number and release date.
+
+ Database version information
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Information on an application version, for example name, version number and release date.
+
+ Tool version information
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Information on a version of the CATH database.
+
+ CATH version information
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Cross-mapping of Swiss-Prot codes to PDB identifiers.
+
+ Swiss-Prot to PDB mapping
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Cross-references from a sequence record to other databases.
+
+ Sequence database cross-references
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Metadata on the status of a submitted job.
+
+ Values for EBI services are 'DONE' (job has finished and the results can then be retrieved), 'ERROR' (the job failed or no results where found), 'NOT_FOUND' (the job id is no longer available; job results might be deleted, 'PENDING' (the job is in a queue waiting processing), 'RUNNING' (the job is currently being processed).
+ Job status
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.0
+
+
+ The (typically numeric) unique identifier of a submitted job.
+
+ Job ID
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A label (text token) describing the type of job, for example interactive or non-interactive.
+
+ Job type
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A report of tool-specific metadata on some analysis or process performed, for example a log of diagnostic or error messages.
+
+ Tool log
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ DaliLite log file describing all the steps taken by a DaliLite alignment of two protein structures.
+
+ DaliLite log file
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ STRIDE log file.
+
+ STRIDE log file
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ NACCESS log file.
+
+ NACCESS log file
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ EMBOSS wordfinder log file.
+
+ EMBOSS wordfinder log file
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ EMBOSS (EMBASSY) domainatrix application log file.
+
+ EMBOSS domainatrix log file
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ EMBOSS (EMBASSY) sites application log file.
+
+ EMBOSS sites log file
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ EMBOSS (EMBASSY) supermatcher error file.
+
+ EMBOSS supermatcher error file
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ EMBOSS megamerger log file.
+
+ EMBOSS megamerger log file
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ EMBOSS megamerger log file.
+
+ EMBOSS whichdb log file
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ EMBOSS vectorstrip log file.
+
+ EMBOSS vectorstrip log file
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A username on a computer system or a website.
+
+
+
+ Username
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A password on a computer system, or a website.
+
+
+
+ Password
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby:Email
+ Moby:EmailAddress
+ A valid email address of an end-user.
+
+
+
+ Email address
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a person.
+
+
+
+ Person name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Number of iterations of an algorithm.
+
+ Number of iterations
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Number of entities (for example database hits, sequences, alignments etc) to write to an output file.
+
+ Number of output entities
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Controls the order of hits (reported matches) in an output file from a database search.
+
+ Hit sort order
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A drug structure relationship map is report (typically a map diagram) of drug structure relationships.
+ A human-readable collection of information about a specific drug.
+ Drug annotation
+ Drug structure relationship map
+
+
+ Drug report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An image (for viewing or printing) of a phylogenetic tree including (typically) a plot of rooted or unrooted phylogenies, cladograms, circular trees or phenograms and associated information.
+
+
+ See also 'Phylogenetic tree'
+ Phylogenetic tree image
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Image of RNA secondary structure, knots, pseudoknots etc.
+
+
+ RNA secondary structure image
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Image of protein secondary structure.
+
+
+ Protein secondary structure image
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Image of one or more molecular tertiary (3D) structures.
+
+
+ Structure image
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Image of two or more aligned molecular sequences possibly annotated with alignment features.
+
+
+ Sequence alignment image
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An image of the structure of a small chemical compound.
+ Small molecule structure image
+ Chemical structure sketch
+ Small molecule sketch
+
+
+ The molecular identifier and formula are typically included.
+ Chemical structure image
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A fate map is a plan of early stage of an embryo such as a blastula, showing areas that are significance to development.
+
+
+ Fate map
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An image of spots from a microarray experiment.
+
+
+ Microarray spots image
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A term from the BioPax ontology.
+
+ BioPax term
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A term definition from The Gene Ontology (GO).
+
+ GO
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A term from the MeSH vocabulary.
+
+ MeSH
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A term from the HGNC controlled vocabulary.
+
+ HGNC
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A term from the NCBI taxonomy vocabulary.
+
+ NCBI taxonomy vocabulary
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A term from the Plant Ontology (PO).
+
+ Plant ontology term
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A term from the UMLS vocabulary.
+
+ UMLS
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A term from Foundational Model of Anatomy.
+
+ Classifies anatomical entities according to their shared characteristics (genus) and distinguishing characteristics (differentia). Specifies the part-whole and spatial relationships of the entities, morphological transformation of the entities during prenatal development and the postnatal life cycle and principles, rules and definitions according to which classes and relationships in the other three components of FMA are represented.
+ FMA
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A term from the EMAP mouse ontology.
+
+ EMAP
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A term from the ChEBI ontology.
+
+ ChEBI
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A term from the MGED ontology.
+
+ MGED
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A term from the myGrid ontology.
+
+ The ontology is provided as two components, the service ontology and the domain ontology. The domain ontology acts provides concepts for core bioinformatics data types and their relations. The service ontology describes the physical and operational features of web services.
+ myGrid
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A term definition for a biological process from the Gene Ontology (GO).
+
+ Data Type is an enumerated string.
+ GO (biological process)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A term definition for a molecular function from the Gene Ontology (GO).
+
+ Data Type is an enumerated string.
+ GO (molecular function)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A term definition for a cellular component from the Gene Ontology (GO).
+
+ Data Type is an enumerated string.
+ GO (cellular component)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A relation type defined in an ontology.
+
+ Ontology relation type
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The definition of a concept from an ontology.
+ Ontology class definition
+
+
+ Ontology concept definition
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ A comment on a concept from an ontology.
+
+ Ontology concept comment
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Reference for a concept from an ontology.
+
+ Ontology concept reference
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Information on a published article provided by the doc2loc program.
+
+ The doc2loc output includes the url, format, type and availability code of a document for every service provider.
+ doc2loc document information
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PDBML:PDB_residue_no
+ WHATIF: pdb_number
+ A residue identifier (a string) from a PDB file.
+
+
+ PDB residue number
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Cartesian coordinate of an atom (in a molecular structure).
+ Cartesian coordinate
+
+
+ Atomic coordinate
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.21
+
+ Cartesian x coordinate of an atom (in a molecular structure).
+
+
+ Atomic x coordinate
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.21
+
+ Cartesian y coordinate of an atom (in a molecular structure).
+
+
+ Atomic y coordinate
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.21
+
+ Cartesian z coordinate of an atom (in a molecular structure).
+
+
+ Atomic z coordinate
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PDBML:pdbx_PDB_atom_name
+ WHATIF: PDBx_auth_atom_id
+ WHATIF: PDBx_type_symbol
+ WHATIF: alternate_atom
+ WHATIF: atom_type
+ Identifier (a string) of a specific atom from a PDB file for a molecular structure.
+
+
+
+ PDB atom name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data on a single atom from a protein structure.
+ Atom data
+ CHEBI:33250
+
+
+ This is a broad data type and is used a placeholder for other, more specific types. It is primarily intended to help navigation of EDAM and would not typically be used for annotation.
+ Protein atom
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data on a single amino acid residue position in a protein structure.
+ Residue
+
+
+ This is a broad data type and is used a placeholder for other, more specific types. It is primarily intended to help navigation of EDAM and would not typically be used for annotation.
+ Protein residue
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Name of an atom.
+
+
+
+ Atom name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ WHATIF: type
+ Three-letter amino acid residue names as used in PDB files.
+
+
+
+ PDB residue name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PDBML:pdbx_PDB_model_num
+ WHATIF: model_number
+ Identifier of a model structure from a PDB file.
+ Model number
+
+
+
+ PDB model number
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Summary of domain classification information for a CATH domain.
+
+ The report (for example http://www.cathdb.info/domain/1cukA01) includes CATH codes for levels in the hierarchy for the domain, level descriptions and relevant data and links.
+ CATH domain report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ FASTA sequence database (based on ATOM records in PDB) for CATH domains (clustered at different levels of sequence identity).
+
+ CATH representative domain sequences (ATOM)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ FASTA sequence database (based on COMBS sequence data) for CATH domains (clustered at different levels of sequence identity).
+
+ CATH representative domain sequences (COMBS)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ FASTA sequence database for all CATH domains (based on PDB ATOM records).
+
+ CATH domain sequences (ATOM)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ FASTA sequence database for all CATH domains (based on COMBS sequence data).
+
+ CATH domain sequences (COMBS)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Information on an molecular sequence version.
+ Sequence version information
+
+
+ Sequence version
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A numerical value, that is some type of scored value arising for example from a prediction method.
+
+
+ Score
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Report on general functional properties of specific protein(s).
+
+ For properties that can be mapped to a sequence, use 'Sequence report' instead.
+ Protein report (function)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Name of a gene from Aspergillus Genome Database.
+
+ Gene name (ASPGD)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Name of a gene from Candida Genome Database.
+
+ Gene name (CGD)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Name of a gene from dictyBase database.
+
+ Gene name (dictyBase)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Primary name of a gene from EcoGene Database.
+
+ Gene name (EcoGene primary)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Name of a gene from MaizeGDB (maize genes) database.
+
+ Gene name (MaizeGDB)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Name of a gene from Saccharomyces Genome Database.
+
+ Gene name (SGD)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Name of a gene from Tetrahymena Genome Database.
+
+ Gene name (TGD)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Symbol of a gene from E.coli Genetic Stock Center.
+
+ Gene name (CGSC)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Symbol of a gene approved by the HUGO Gene Nomenclature Committee.
+
+ Gene name (HGNC)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Symbol of a gene from the Mouse Genome Database.
+
+ Gene name (MGD)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Symbol of a gene from Bacillus subtilis Genome Sequence Project.
+
+ Gene name (Bacillus subtilis)
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ http://www.geneontology.org/doc/GO.xrf_abbs: ApiDB_PlasmoDB
+ Identifier of a gene from PlasmoDB Plasmodium Genome Resource.
+
+
+
+ Gene ID (PlasmoDB)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a gene from EcoGene Database.
+ EcoGene Accession
+ EcoGene ID
+
+
+
+ Gene ID (EcoGene)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ http://www.geneontology.org/doc/GO.xrf_abbs: FB
+ http://www.geneontology.org/doc/GO.xrf_abbs: FlyBase
+ Gene identifier from FlyBase database.
+
+
+
+ Gene ID (FlyBase)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Gene identifier from Glossina morsitans GeneDB database.
+
+ Gene ID (GeneDB Glossina morsitans)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Gene identifier from Leishmania major GeneDB database.
+
+ Gene ID (GeneDB Leishmania major)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ http://www.geneontology.org/doc/GO.xrf_abbs: GeneDB_Pfalciparum
+ Gene identifier from Plasmodium falciparum GeneDB database.
+
+ Gene ID (GeneDB Plasmodium falciparum)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ http://www.geneontology.org/doc/GO.xrf_abbs: GeneDB_Spombe
+ Gene identifier from Schizosaccharomyces pombe GeneDB database.
+
+ Gene ID (GeneDB Schizosaccharomyces pombe)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ http://www.geneontology.org/doc/GO.xrf_abbs: GeneDB_Tbrucei
+ Gene identifier from Trypanosoma brucei GeneDB database.
+
+ Gene ID (GeneDB Trypanosoma brucei)
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ http://www.geneontology.org/doc/GO.xrf_abbs: GR_GENE
+ http://www.geneontology.org/doc/GO.xrf_abbs: GR_gene
+ Gene identifier from Gramene database.
+
+
+
+ Gene ID (Gramene)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ http://www.geneontology.org/doc/GO.xrf_abbs: PAMGO_VMD
+ http://www.geneontology.org/doc/GO.xrf_abbs: VMD
+ Gene identifier from Virginia Bioinformatics Institute microbial database.
+
+
+
+ Gene ID (Virginia microbial)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ http://www.geneontology.org/doc/GO.xrf_abbs: SGN
+ Gene identifier from Sol Genomics Network.
+
+
+
+ Gene ID (SGN)
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ WBGene[0-9]{8}
+ http://www.geneontology.org/doc/GO.xrf_abbs: WB
+ http://www.geneontology.org/doc/GO.xrf_abbs: WormBase
+ Gene identifier used by WormBase database.
+
+
+
+ Gene ID (WormBase)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Any name (other than the recommended one) for a gene.
+
+ Gene synonym
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of an open reading frame attributed by a sequencing project.
+
+
+
+ ORF name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A component of a larger sequence assembly.
+
+ Sequence assembly component
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A report on a chromosome aberration such as abnormalities in chromosome structure.
+
+ Chromosome annotation (aberration)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An identifier of a clone (cloned molecular sequence) from a database.
+
+
+
+ Clone ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PDBML:pdbx_PDB_ins_code
+ WHATIF: insertion_code
+ An insertion code (part of the residue number) for an amino acid residue from a PDB file.
+
+
+ PDB insertion code
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ WHATIF: PDBx_occupancy
+ The fraction of an atom type present at a site in a molecular structure.
+
+
+ The sum of the occupancies of all the atom types at a site should not normally significantly exceed 1.0.
+ Atomic occupancy
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ WHATIF: PDBx_B_iso_or_equiv
+ Isotropic B factor (atomic displacement parameter) for an atom from a PDB file.
+
+
+ Isotropic B factor
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A cytogenetic map showing chromosome banding patterns in mutant cell lines relative to the wild type.
+ Deletion-based cytogenetic map
+
+
+ A cytogenetic map is built from a set of mutant cell lines with sub-chromosomal deletions and a reference wild-type line ('genome deletion panel'). The panel is used to map markers onto the genome by comparing mutant to wild-type banding patterns. Markers are linked (occur in the same deleted region) if they share the same banding pattern (presence or absence) as the deletion panel.
+ Deletion map
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A genetic map which shows the approximate location of quantitative trait loci (QTL) between two or more markers.
+ Quantitative trait locus map
+
+
+ QTL map
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby:Haplotyping_Study_obj
+ A map of haplotypes in a genome or other sequence, describing common patterns of genetic variation.
+
+
+ Haplotype map
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.21
+
+ Data describing a set of multiple genetic or physical maps, typically sharing a common set of features which are mapped.
+
+
+ Map set data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+
+
+ A feature which may mapped (positioned) on a genetic or other type of map.
+
+ Mappable features may be based on Gramene's notion of map features; see http://www.gramene.org/db/cmap/feature_type_info.
+ Map feature
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A designation of the type of map (genetic map, physical map, sequence map etc) or map set.
+
+ Map types may be based on Gramene's notion of a map type; see http://www.gramene.org/db/cmap/map_type_info.
+ Map type
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a protein fold.
+
+
+
+ Protein fold name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby:BriefTaxonConcept
+ Moby:PotentialTaxon
+ The name of a group of organisms belonging to the same taxonomic rank.
+ Taxonomic rank
+ Taxonomy rank
+
+
+
+ For a complete list of taxonomic ranks see https://www.phenoscape.org/wiki/Taxonomic_Rank_Vocabulary.
+ Taxon
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ A unique identifier of a (group of) organisms.
+
+
+
+ Organism identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a genus of organism.
+
+
+
+ Genus name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby:GCP_Taxon
+ Moby:TaxonName
+ Moby:TaxonScientificName
+ Moby:TaxonTCS
+ Moby:iANT_organism-xml
+ The full name for a group of organisms, reflecting their biological classification and (usually) conforming to a standard nomenclature.
+ Taxonomic information
+ Taxonomic name
+
+
+
+ Name components correspond to levels in a taxonomic hierarchy (e.g. 'Genus', 'Species', etc.) Meta information such as a reference where the name was defined and a date might be included.
+ Taxonomic classification
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby_namespace:iHOPorganism
+ A unique identifier for an organism used in the iHOP database.
+
+
+
+ iHOP organism ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Common name for an organism as used in the GenBank database.
+
+
+
+ Genbank common name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a taxon from the NCBI taxonomy database.
+
+
+
+ NCBI taxon
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An alternative for a word.
+
+ Synonym
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A common misspelling of a word.
+
+ Misspelling
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An abbreviation of a phrase or word.
+
+ Acronym
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A term which is likely to be misleading of its meaning.
+
+ Misnomer
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby:Author
+ Information on the authors of a published work.
+
+
+
+ Author ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An identifier representing an author in the DragonDB database.
+
+
+
+ DragonDB author identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby:DescribedLink
+ A URI along with annotation describing the data found at the address.
+
+
+ Annotated URI
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A controlled vocabulary for words and phrases that can appear in the keywords field (KW line) of entries from the UniProt database.
+
+ UniProt keywords
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby_namespace:GENEFARM_GeneID
+ Identifier of a gene from the GeneFarm database.
+
+
+
+ Gene ID (GeneFarm)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby_namespace:Blattner_number
+ The blattner identifier for a gene.
+
+
+
+ Blattner number
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Moby_namespace:MIPS_GE_Maize
+ Identifier for genetic elements in MIPS Maize database.
+
+ Gene ID (MIPS Maize)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Moby_namespace:MIPS_GE_Medicago
+ Identifier for genetic elements in MIPS Medicago database.
+
+ Gene ID (MIPS Medicago)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ The name of an Antirrhinum Gene from the DragonDB database.
+
+ Gene name (DragonDB)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ A unique identifier for an Arabidopsis gene, which is an acronym or abbreviation of the gene name.
+
+ Gene name (Arabidopsis)
+ true
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby_namespace:iHOPsymbol
+ A unique identifier of a protein or gene used in the iHOP database.
+
+
+
+ iHOP symbol
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Name of a gene from the GeneFarm database.
+
+ Gene name (GeneFarm)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ A unique name or other identifier of a genetic locus, typically conforming to a scheme that names loci (such as predicted genes) depending on their position in a molecular sequence, for example a completely sequenced genome or chromosome.
+ Locus identifier
+ Locus name
+
+
+
+ Locus ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ AT[1-5]G[0-9]{5}
+ http://www.geneontology.org/doc/GO.xrf_abbs:AGI_LocusCode
+ Locus identifier for Arabidopsis Genome Initiative (TAIR, TIGR and MIPS databases).
+ AGI ID
+ AGI identifier
+ AGI locus code
+ Arabidopsis gene loci number
+
+
+
+ Locus ID (AGI)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ http://www.geneontology.org/doc/GO.xrf_abbs: ASPGD
+ http://www.geneontology.org/doc/GO.xrf_abbs: ASPGDID
+ Identifier for loci from ASPGD (Aspergillus Genome Database).
+
+
+
+ Locus ID (ASPGD)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ http://www.geneontology.org/doc/GO.xrf_abbs: Broad_MGG
+ Identifier for loci from Magnaporthe grisea Database at the Broad Institute.
+
+
+
+ Locus ID (MGG)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ http://www.geneontology.org/doc/GO.xrf_abbs: CGD
+ http://www.geneontology.org/doc/GO.xrf_abbs: CGDID
+ Identifier for loci from CGD (Candida Genome Database).
+ CGD locus identifier
+ CGDID
+
+
+
+ Locus ID (CGD)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ http://www.geneontology.org/doc/GO.xrf_abbs: JCVI_CMR
+ http://www.geneontology.org/doc/GO.xrf_abbs: TIGR_CMR
+ Locus identifier for Comprehensive Microbial Resource at the J. Craig Venter Institute.
+
+
+
+ Locus ID (CMR)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby_namespace:LocusID
+ http://www.geneontology.org/doc/GO.xrf_abbs: NCBI_locus_tag
+ Identifier for loci from NCBI database.
+ Locus ID (NCBI)
+
+
+
+ NCBI locus tag
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ http://www.geneontology.org/doc/GO.xrf_abbs: SGD
+ http://www.geneontology.org/doc/GO.xrf_abbs: SGDID
+ Identifier for loci from SGD (Saccharomyces Genome Database).
+ SGDID
+
+
+
+ Locus ID (SGD)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby_namespace:MMP_Locus
+ Identifier of loci from Maize Mapping Project.
+
+
+
+ Locus ID (MMP)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby_namespace:DDB_gene
+ Identifier of locus from DictyBase (Dictyostelium discoideum).
+
+
+
+ Locus ID (DictyBase)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby_namespace:EntrezGene_EntrezGeneID
+ Moby_namespace:EntrezGene_ID
+ Identifier of a locus from EntrezGene database.
+
+
+
+ Locus ID (EntrezGene)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby_namespace:MaizeGDB_Locus
+ Identifier of locus from MaizeGDB (Maize genome database).
+
+
+
+ Locus ID (MaizeGDB)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Moby:SO_QTL
+ A stretch of DNA that is closely linked to the genes underlying a quantitative trait (a phenotype that varies in degree and depends upon the interactions between multiple genes and their environment).
+
+ A QTL sometimes but does not necessarily correspond to a gene.
+ Quantitative trait locus
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby_namespace:GeneId
+ Identifier of a gene from the KOME database.
+
+
+
+ Gene ID (KOME)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby:Tropgene_locus
+ Identifier of a locus from the Tropgene database.
+
+
+
+ Locus ID (Tropgene)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An alignment of molecular sequences, structures or profiles derived from them.
+
+
+ Alignment
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data for an atom (in a molecular structure).
+ General atomic property
+
+
+ Atomic property
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby_namespace:SP_KW
+ http://www.geneontology.org/doc/GO.xrf_abbs: SP_KW
+ A word or phrase that can appear in the keywords field (KW line) of entries from the UniProt database.
+
+
+ UniProt keyword
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A name for a genetic locus conforming to a scheme that names loci (such as predicted genes) depending on their position in a molecular sequence, for example a completely sequenced genome or chromosome.
+
+ Ordered locus name
+ true
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby:GCP_MapInterval
+ Moby:GCP_MapPoint
+ Moby:GCP_MapPosition
+ Moby:GenePosition
+ Moby:HitPosition
+ Moby:Locus
+ Moby:MapPosition
+ Moby:Position
+ PDBML:_atom_site.id
+ A position in a map (for example a genetic map), either a single position (point) or a region / interval.
+ Locus
+ Map position
+
+
+ This includes positions in genomes based on a reference sequence. A position may be specified for any mappable object, i.e. anything that may have positional information such as a physical position in a chromosome. Data might include sequence region name, strand, coordinate system name, assembly name, start position and end position.
+ Sequence coordinates
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data concerning the intrinsic physical (e.g. structural) or chemical properties of one, more or all amino acids.
+ Amino acid data
+
+
+ Amino acid property
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ A human-readable collection of information which (typically) is generated or collated by hand and which describes a biological entity, phenomena or associated primary (e.g. sequence or structural) data, as distinct from the primary data itself and computer-generated reports derived from it.
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Annotation
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data describing a molecular map (genetic or physical) or a set of such maps, including various attributes of, data extracted from or derived from the analysis of them, but excluding the map(s) themselves. This includes metadata for map sets that share a common set of features which are mapped.
+ Map attribute
+ Map set data
+
+
+ Map data
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Data used by the Vienna RNA analysis package.
+
+ Vienna RNA structural data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Data used to replace (mask) characters in a molecular sequence.
+
+ Sequence mask parameter
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data concerning chemical reaction(s) catalysed by enzyme(s).
+
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Enzyme kinetics data
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A plot giving an approximation of the kinetics of an enzyme-catalysed reaction, assuming simple kinetics (i.e. no intermediate or product inhibition, allostericity or cooperativity). It plots initial reaction rate to the substrate concentration (S) from which the maximum rate (vmax) is apparent.
+
+
+ Michaelis Menten plot
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A plot based on the Michaelis Menten equation of enzyme kinetics plotting the ratio of the initial substrate concentration (S) against the reaction velocity (v).
+
+
+ Hanes Woolf plot
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+
+ Raw data from or annotation on laboratory experiments.
+
+ This is a broad data type and is used a placeholder for other, more specific types. It is primarily intended to help navigation of EDAM and would not typically be used for annotation.
+ Experimental data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Information on a genome version.
+
+ Genome version information
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Typically a human-readable summary of body of facts or information indicating why a statement is true or valid. This may include a computational prediction, laboratory experiment, literature reference etc.
+
+
+ Evidence
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ A molecular sequence and minimal metadata, typically an identifier of the sequence and/or a comment.
+
+
+ Sequence record lite
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ One or more molecular sequences, possibly with associated annotation.
+ Sequences
+
+
+ This concept is a placeholder of concepts for primary sequence data including raw sequences and sequence records. It should not normally be used for derivatives such as sequence alignments, motifs or profiles.
+ Sequence
+ http://purl.bioontology.org/ontology/MSH/D008969
+ http://purl.org/biotop/biotop.owl#BioMolecularSequenceInformation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ A nucleic acid sequence and minimal metadata, typically an identifier of the sequence and/or a comment.
+
+
+ Nucleic acid sequence record (lite)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ A protein sequence and minimal metadata, typically an identifier of the sequence and/or a comment.
+
+
+ Protein sequence record (lite)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A human-readable collection of information including annotation on a biological entity or phenomena, computer-generated reports of analysis of primary data (e.g. sequence or structural), and metadata (data about primary data) or any other free (essentially unformatted) text, as distinct from the primary data itself.
+ Document
+ Record
+
+
+ You can use this term by default for any textual report, in case you can't find another, more specific term. Reports may be generated automatically or collated by hand and can include metadata on the origin, source, history, ownership or location of some thing.
+ Report
+ http://semanticscience.org/resource/SIO_000148
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ General data for a molecule.
+ General molecular property
+
+
+ Molecular property (general)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+
+ Data concerning molecular structural data.
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Structural data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A nucleotide sequence motif.
+ Nucleic acid sequence motif
+ DNA sequence motif
+ RNA sequence motif
+
+
+ Sequence motif (nucleic acid)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An amino acid sequence motif.
+ Protein sequence motif
+
+
+ Sequence motif (protein)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Some simple value controlling a search operation, typically a search of a database.
+
+ Search parameter
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A report of hits from searching a database of some type.
+ Database hits
+ Search results
+
+
+ Database search results
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ The secondary structure assignment (predicted or real) of a nucleic acid or protein.
+
+ Secondary structure
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An array of numerical values.
+ Array
+
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Matrix
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+
+ Data concerning, extracted from, or derived from the analysis of molecular alignment of some type.
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Alignment data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A human-readable collection of information about one or more specific nucleic acid molecules.
+
+
+ Nucleic acid report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A human-readable collection of information about one or more molecular tertiary (3D) structures. It might include annotation on the structure, a computer-generated report of analysis of structural data, and metadata (data about primary data) or any other free (essentially unformatted) text, as distinct from the primary data itself.
+ Structure-derived report
+
+
+ Structure report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.21
+
+
+
+ A report on nucleic acid structure-derived data, describing structural properties of a DNA molecule, or any other annotation or information about specific nucleic acid 3D structure(s).
+
+ Nucleic acid structure data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A report on the physical (e.g. structural) or chemical properties of molecules, or parts of a molecule.
+ Physicochemical property
+ SO:0000400
+
+
+ Molecular property
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Structural data for DNA base pairs or runs of bases, such as energy or angle data.
+
+
+ DNA base structural data
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Information on a database (or ontology) entry version, such as name (or other identifier) or parent database, unique identifier of entry, data, author and so on.
+
+ Database entry version information
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ A persistent (stable) and unique identifier, typically identifying an object (entry) from a database.
+
+
+
+ Accession
+ http://semanticscience.org/resource/SIO_000675
+ http://semanticscience.org/resource/SIO_000731
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ single nucleotide polymorphism (SNP) in a DNA sequence.
+
+
+ SNP
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Reference to a dataset (or a cross-reference between two datasets), typically one or more entries in a biological database or ontology.
+
+
+ A list of database accessions or identifiers are usually included.
+ Data reference
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An identifier of a submitted job.
+
+
+
+ Job identifier
+ http://wsio.org/data_009
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+
+ A name of a thing, which need not necessarily uniquely identify it.
+ Symbolic name
+
+
+
+ Name
+ "http://www.w3.org/2000/01/rdf-schema#label
+ http://semanticscience.org/resource/SIO_000116
+ http://usefulinc.com/ns/doap#name
+
+
+
+
+
+ Closely related, but focusing on labeling and human readability but not on identification.
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A label (text token) describing the type of a thing, typically an enumerated string (a string with one of a limited set of values).
+
+ Type
+ http://purl.org/dc/elements/1.1/type
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Authentication data usually used to log in into an account on an information system such as a web application or a database.
+
+
+
+ Account authentication
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A three-letter code used in the KEGG databases to uniquely identify organisms.
+
+
+
+ KEGG organism code
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Name of an entry (gene) from the KEGG GENES database.
+
+ Gene name (KEGG GENES)
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an object from one of the BioCyc databases.
+
+
+
+ BioCyc ID
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a compound from the BioCyc chemical compounds database.
+ BioCyc compound ID
+ BioCyc compound identifier
+
+
+
+ Compound ID (BioCyc)
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a biological reaction from the BioCyc reactions database.
+
+
+
+ Reaction ID (BioCyc)
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an enzyme from the BioCyc enzymes database.
+ BioCyc enzyme ID
+
+
+
+ Enzyme ID (BioCyc)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a biological reaction from a database.
+
+
+
+ Reaction ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An identifier that is re-used for data objects of fundamentally different types (typically served from a single database).
+
+
+
+ This branch provides an alternative organisation of the concepts nested under 'Accession' and 'Name'. All concepts under here are already included under 'Accession' or 'Name'.
+ Identifier (hybrid)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a molecular property.
+
+
+
+ Molecular property identifier
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a codon usage table, for example a genetic code.
+ Codon usage table identifier
+
+
+
+ Codon usage table ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Primary identifier of an object from the FlyBase database.
+
+
+
+ FlyBase primary identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an object from the WormBase database.
+
+
+
+ WormBase identifier
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ CE[0-9]{5}
+ Protein identifier used by WormBase database.
+
+
+
+ WormBase wormpep ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An informative report on a trinucleotide sequence that encodes an amino acid including the triplet sequence, the encoded amino acid or whether it is a start or stop codon.
+
+ Nucleic acid features (codon)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An identifier of a map of a molecular sequence.
+
+
+
+ Map identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An identifier of a software end-user on a website or a database (typically a person or an entity).
+
+
+
+ Person identifier
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Name or other identifier of a nucleic acid molecule.
+
+
+
+ Nucleic acid identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.20
+
+
+ Frame for translation of DNA (3 forward and 3 reverse frames relative to a chromosome).
+
+ Translation frame specification
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An identifier of a genetic code.
+
+
+
+ Genetic code identifier
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Informal name for a genetic code, typically an organism name.
+
+
+
+ Genetic code name
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Name of a file format such as HTML, PNG, PDF, EMBL, GenBank and so on.
+
+
+
+ File format name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A label (text token) describing a type of sequence profile such as frequency matrix, Gribskov profile, hidden Markov model etc.
+
+ Sequence profile type
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Name of a computer operating system such as Linux, PC or Mac.
+
+
+
+ Operating system name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A type of point or block mutation, including insertion, deletion, change, duplication and moves.
+
+ Mutation type
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A logical operator such as OR, AND, XOR, and NOT.
+
+
+
+ Logical operator
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A control of the order of data that is output, for example the order of sequences in an alignment.
+
+ Possible options including sorting by score, rank, by increasing P-value (probability, i.e. most statistically significant hits given first) and so on.
+ Results sort order
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A simple parameter that is a toggle (boolean value), typically a control for a modal tool.
+
+ Toggle
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The width of an output sequence or alignment.
+
+ Sequence width
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A penalty for introducing or extending a gap in an alignment.
+
+
+ Gap penalty
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A temperature concerning nucleic acid denaturation, typically the temperature at which the two strands of a hybridised or double stranded nucleic acid (DNA or RNA/DNA) molecule separate.
+ Melting temperature
+
+
+ Nucleic acid melting temperature
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The concentration of a chemical compound.
+
+
+ Concentration
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Size of the incremental 'step' a sequence window is moved over a sequence.
+
+ Window step size
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An image of a graph generated by the EMBOSS suite.
+
+ EMBOSS graph
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An application report generated by the EMBOSS suite.
+
+ EMBOSS report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ An offset for a single-point sequence position.
+
+ Sequence offset
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A value that serves as a threshold for a tool (usually to control scoring or output).
+
+ Threshold
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ An informative report on a transcription factor protein.
+
+ This might include conformational or physicochemical properties, as well as sequence information for transcription factor(s) binding sites.
+ Protein report (transcription factor)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The name of a category of biological or bioinformatics database.
+
+ Database category name
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Name of a sequence profile.
+
+ Sequence profile name
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Specification of one or more colors.
+
+ Color
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A parameter that is used to control rendering (drawing) to a device or image.
+
+ Rendering parameter
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Any arbitrary name of a molecular sequence.
+
+
+
+ Sequence name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A temporal date.
+
+ Date
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+
+ Word composition data for a molecular sequence.
+
+ Word composition
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A plot of Fickett testcode statistic (identifying protein coding regions) in a nucleotide sequences.
+
+
+ Fickett testcode plot
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A plot of sequence similarities identified from word-matching or character comparison.
+ Sequence conservation report
+
+
+ Use this concept for calculated substitution rates, relative site variability, data on sites with biased properties, highly conserved or very poorly conserved sites, regions, blocks etc.
+ Sequence similarity plot
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An image of peptide sequence sequence looking down the axis of the helix for highlighting amphipathicity and other properties.
+
+
+ Helical wheel
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An image of peptide sequence sequence in a simple 3,4,3,4 repeating pattern that emulates at a simple level the arrangement of residues around an alpha helix.
+
+
+ Useful for highlighting amphipathicity and other properties.
+ Helical net
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A plot of general physicochemical properties of a protein sequence.
+
+ Protein sequence properties plot
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A plot of pK versus pH for a protein.
+
+
+ Protein ionisation curve
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A plot of character or word composition / frequency of a molecular sequence.
+
+
+ Sequence composition plot
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Density plot (of base composition) for a nucleotide sequence.
+
+
+ Nucleic acid density plot
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Image of a sequence trace (nucleotide sequence versus probabilities of each of the 4 bases).
+
+
+ Sequence trace image
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A report on siRNA duplexes in mRNA.
+
+ Nucleic acid features (siRNA)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A collection of multiple molecular sequences and (typically) associated metadata that is intended for sequential processing.
+
+ This concept may be used for sequence sets that are expected to be read and processed a single sequence at a time.
+ Sequence set (stream)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Secondary identifier of an object from the FlyBase database.
+
+
+
+ Secondary identifier are used to handle entries that were merged with or split from other entries in the database.
+ FlyBase secondary identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The number of a certain thing.
+
+ Cardinality
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A single thing.
+
+ Exactly 1
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ One or more things.
+
+ 1 or more
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Exactly two things.
+
+ Exactly 2
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Two or more things.
+
+ 2 or more
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A fixed-size datum calculated (by using a hash function) for a molecular sequence, typically for purposes of error detection or indexing.
+ Hash
+ Hash code
+ Hash sum
+ Hash value
+
+
+ Sequence checksum
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ chemical modification of a protein.
+
+
+ Protein features report (chemical modifications)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Data on an error generated by computer system or tool.
+
+ Error
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Basic information on any arbitrary database entry.
+
+
+ Database entry metadata
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ A cluster of similar genes.
+
+ Gene cluster
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ A molecular sequence and comprehensive metadata (such as a feature table), typically corresponding to a full entry from a molecular sequence database.
+
+
+ Sequence record full
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An identifier of a plasmid in a database.
+
+
+
+ Plasmid identifier
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ A unique identifier of a specific mutation catalogued in a database.
+
+
+
+ Mutation ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Information describing the mutation itself, the organ site, tissue and type of lesion where the mutation has been identified, description of the patient origin and life-style.
+
+ Mutation annotation (basic)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An informative report on the prevalence of mutation(s), including data on samples and mutation prevalence (e.g. by tumour type)..
+
+ Mutation annotation (prevalence)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An informative report on mutation prognostic data, such as information on patient cohort, the study settings and the results of the study.
+
+ Mutation annotation (prognostic)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An informative report on the functional properties of mutant proteins including transcriptional activities, promotion of cell growth and tumorigenicity, dominant negative effects, capacity to induce apoptosis, cell-cycle arrest or checkpoints in human cells and so on.
+
+ Mutation annotation (functional)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The number of a codon, for instance, at which a mutation is located.
+
+
+ Codon number
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ An informative report on a specific tumor including nature and origin of the sample, anatomic site, organ or tissue, tumor type, including morphology and/or histologic type, and so on.
+
+ Tumor annotation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Basic information about a server on the web, such as an SRS server.
+
+ Server metadata
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a field in a database.
+
+
+
+ Database field name
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier of a sequence cluster from the SYSTERS database.
+ SYSTERS cluster ID
+
+
+
+ Sequence cluster ID (SYSTERS)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data concerning a biological ontology.
+
+
+ Ontology metadata
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Raw SCOP domain classification data files.
+
+ These are the parsable data files provided by SCOP.
+ Raw SCOP domain classification
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Raw CATH domain classification data files.
+
+ These are the parsable data files provided by CATH.
+ Raw CATH domain classification
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ An informative report on the types of small molecules or 'heterogens' (non-protein groups) that are represented in PDB files.
+
+ Heterogen annotation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Phylogenetic property values data.
+
+ Phylogenetic property values
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A collection of sequences output from a bootstrapping (resampling) procedure.
+
+ Bootstrapping is often performed in phylogenetic analysis.
+ Sequence set (bootstrapped)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A consensus phylogenetic tree derived from comparison of multiple trees.
+
+ Phylogenetic consensus tree
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A data schema for organising or transforming data of some type.
+
+ Schema
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A DTD (document type definition).
+
+ DTD
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ An XML Schema.
+
+ XML Schema
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A relax-NG schema.
+
+ Relax-NG schema
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ An XSLT stylesheet.
+
+ XSLT stylesheet
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a data type.
+
+
+
+ Data resource definition name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Name of an OBO file format such as OBO-XML, plain and so on.
+
+
+
+ OBO file format name
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier for genetic elements in MIPS database.
+ MIPS genetic element identifier
+
+
+
+ Gene ID (MIPS)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An identifier of protein sequence(s) or protein sequence database entries.
+
+ Sequence identifier (protein)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An identifier of nucleotide sequence(s) or nucleotide sequence database entries.
+
+ Sequence identifier (nucleic acid)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An accession number of an entry from the EMBL sequence database.
+ EMBL ID
+ EMBL accession number
+ EMBL identifier
+
+
+
+ EMBL accession
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An identifier of a polypeptide in the UniProt database.
+ UniProt entry name
+ UniProt identifier
+ UniProtKB entry name
+ UniProtKB identifier
+
+
+
+ UniProt ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Accession number of an entry from the GenBank sequence database.
+ GenBank ID
+ GenBank accession number
+ GenBank identifier
+
+
+
+ GenBank accession
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Secondary (internal) identifier of a Gramene database entry.
+ Gramene internal ID
+ Gramene internal identifier
+ Gramene secondary ID
+
+
+
+ Gramene secondary identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An identifier of an entry from a database of molecular sequence variation.
+
+
+
+ Sequence variation ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ A unique (and typically persistent) identifier of a gene in a database, that is (typically) different to the gene name/symbol.
+ Gene accession
+ Gene code
+
+
+
+ Gene ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Name of an entry (gene) from the AceView genes database.
+
+ Gene name (AceView)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ http://www.geneontology.org/doc/GO.xrf_abbs: ECK
+ Identifier of an E. coli K-12 gene from EcoGene Database.
+ E. coli K-12 gene identifier
+ ECK accession
+
+
+
+ Gene ID (ECK)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier for a gene approved by the HUGO Gene Nomenclature Committee.
+ HGNC ID
+
+
+
+ Gene ID (HGNC)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a gene, (typically) assigned by a person and/or according to a naming scheme. It may contain white space characters and is typically more intuitive and readable than a gene symbol. It (typically) may be used to identify similar genes in different species and to derive a gene symbol.
+ Allele name
+
+
+
+ Gene name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Name of an entry (gene) from the NCBI genes database.
+
+ Gene name (NCBI)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A specification of a chemical structure in SMILES format.
+
+
+ SMILES string
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier of an entry from the STRING database of protein-protein interactions.
+
+
+
+ STRING ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ An informative report on a specific virus.
+
+ Virus annotation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ An informative report on the taxonomy of a specific virus.
+
+ Virus annotation (taxonomy)
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]+
+ Identifier of a biological reaction from the SABIO-RK reactions database.
+
+
+
+ Reaction ID (SABIO-RK)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A human-readable collection of information about one or more specific carbohydrate 3D structure(s).
+
+
+ Carbohydrate report
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A series of digits that are assigned consecutively to each sequence record processed by NCBI. The GI number bears no resemblance to the Accession number of the sequence record.
+ NCBI GI number
+
+
+
+ Nucleotide sequence GI number is shown in the VERSION field of the database record. Protein sequence GI number is shown in the CDS/db_xref field of a nucleotide database record, and the VERSION field of a protein database record.
+ GI number
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An identifier assigned to sequence records processed by NCBI, made of the accession number of the database record followed by a dot and a version number.
+ NCBI accession.version
+ accession.version
+
+
+
+ Nucleotide sequence version contains two letters followed by six digits, a dot, and a version number (or for older nucleotide sequence records, the format is one letter followed by five digits, a dot, and a version number). Protein sequence version contains three letters followed by five digits, a dot, and a version number.
+ NCBI version
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a cell line.
+
+
+
+ Cell line name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The exact name of a cell line.
+
+
+
+ Cell line name (exact)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The truncated name of a cell line.
+
+
+
+ Cell line name (truncated)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a cell line without any punctuation.
+
+
+
+ Cell line name (no punctuation)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The assonant name of a cell line.
+
+
+
+ Cell line name (assonant)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ A unique, persistent identifier of an enzyme.
+ Enzyme accession
+
+
+
+ Enzyme ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an enzyme from the REBASE enzymes database.
+
+
+
+ REBASE enzyme number
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ DB[0-9]{5}
+ Unique identifier of a drug from the DrugBank database.
+
+
+
+ DrugBank ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A unique identifier assigned to NCBI protein sequence records.
+ protein gi
+ protein gi number
+
+
+
+ Nucleotide sequence GI number is shown in the VERSION field of the database record. Protein sequence GI number is shown in the CDS/db_xref field of a nucleotide database record, and the VERSION field of a protein database record.
+ GI number (protein)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A score derived from the alignment of two sequences, which is then normalised with respect to the scoring system.
+
+
+ Bit scores are normalised with respect to the scoring system and therefore can be used to compare alignment scores from different searches.
+ Bit score
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.20
+
+
+ Phase for translation of DNA (0, 1 or 2) relative to a fragment of the coding sequence.
+
+ Translation phase specification
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data concerning or describing some core computational resource, as distinct from primary data. This includes metadata on the origin, source, history, ownership or location of some thing.
+ Provenance metadata
+
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Resource metadata
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Any arbitrary identifier of an ontology.
+
+
+
+ Ontology identifier
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a concept in an ontology.
+
+
+
+ Ontology concept name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An identifier of a build of a particular genome.
+
+
+
+ Genome build identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a biological pathway or network.
+
+
+
+ Pathway or network name
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [a-zA-Z_0-9]{2,3}[0-9]{5}
+ Identifier of a pathway from the KEGG pathway database.
+ KEGG pathway ID
+
+
+
+ Pathway ID (KEGG)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [a-zA-Z_0-9]+
+ Identifier of a pathway from the NCI-Nature pathway database.
+
+
+
+ Pathway ID (NCI-Nature)
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a pathway from the ConsensusPathDB pathway database.
+
+
+
+ Pathway ID (ConsensusPathDB)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier of an entry from the UniRef database.
+ UniRef cluster id
+ UniRef entry accession
+
+
+
+ Sequence cluster ID (UniRef)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier of an entry from the UniRef100 database.
+ UniRef100 cluster id
+ UniRef100 entry accession
+
+
+
+ Sequence cluster ID (UniRef100)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier of an entry from the UniRef90 database.
+ UniRef90 cluster id
+ UniRef90 entry accession
+
+
+
+ Sequence cluster ID (UniRef90)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier of an entry from the UniRef50 database.
+ UniRef50 cluster id
+ UniRef50 entry accession
+
+
+
+ Sequence cluster ID (UniRef50)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data concerning or derived from an ontology.
+ Ontological data
+
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Ontology data
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A human-readable collection of information about a specific RNA family or other group of classified RNA sequences.
+ RNA family annotation
+
+
+ RNA family report
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of an RNA family, typically an entry from a RNA sequence classification database.
+
+
+
+ RNA family identifier
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Stable accession number of an entry (RNA family) from the RFAM database.
+
+
+
+ RFAM accession
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A label (text token) describing a type of protein family signature (sequence classifier) from the InterPro database.
+
+ Protein signature type
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ An informative report on protein domain-DNA/RNA interaction(s).
+
+ Domain-nucleic acid interaction report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+
+ An informative report on protein domain-protein domain interaction(s).
+
+ Domain-domain interactions
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Data on indirect protein domain-protein domain interaction(s).
+
+ Domain-domain interaction (indirect)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Accession number of a nucleotide or protein sequence database entry.
+
+
+
+ Sequence accession (hybrid)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+ Data concerning two-dimensional polygel electrophoresis.
+
+
+ This is a broad data type and is used a placeholder for other, more specific types. It is primarily intended to help navigation of EDAM and would not typically be used for annotation.
+ 2D PAGE data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ two-dimensional gel electrophoresis experiments, gels or spots in a gel.
+
+
+ 2D PAGE report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ A persistent, unique identifier of a biological pathway or network (typically a database entry).
+
+
+
+ Pathway or network accession
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alignment of the (1D representations of) secondary structure of two or more molecules.
+
+
+ Secondary structure alignment
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an object from the ASTD database.
+
+
+
+ ASTD ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an exon from the ASTD database.
+
+
+
+ ASTD ID (exon)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an intron from the ASTD database.
+
+
+
+ ASTD ID (intron)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a polyA signal from the ASTD database.
+
+
+
+ ASTD ID (polya)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a transcription start site from the ASTD database.
+
+
+
+ ASTD ID (tss)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ An informative report on individual spot(s) from a two-dimensional (2D PAGE) gel.
+
+
+ 2D PAGE spot report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Unique identifier of a spot from a two-dimensional (protein) gel.
+
+
+
+ Spot ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier of a spot from a two-dimensional (protein) gel in the SWISS-2DPAGE database.
+
+
+
+ Spot serial number
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier of a spot from a two-dimensional (protein) gel from a HSC-2DPAGE database.
+
+
+
+ Spot ID (HSC-2DPAGE)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Data on the interaction of a protein (or protein domain) with specific structural (3D) and/or sequence motifs.
+
+ Protein-motif interaction
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a strain of an organism variant, typically a plant, virus or bacterium.
+
+
+
+ Strain identifier
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A unique identifier of an item from the CABRI database.
+
+
+
+ CABRI accession
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Report of genotype experiment including case control, population, and family studies. These might use array based methods and re-sequencing methods.
+
+
+ Experiment report (genotyping)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of an entry from a database of genotype experiment metadata.
+
+
+
+ Genotype experiment ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the EGA database.
+
+
+
+ EGA accession
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ IPI[0-9]{8}
+ Identifier of a protein entry catalogued in the International Protein Index (IPI) database.
+
+
+
+ IPI protein ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Accession number of a protein from the RefSeq database.
+ RefSeq protein ID
+
+
+
+ RefSeq accession (protein)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry (promoter) from the EPD database.
+ EPD identifier
+
+
+
+ EPD ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the TAIR database.
+
+
+
+ TAIR accession
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an Arabidopsis thaliana gene from the TAIR database.
+
+
+
+ TAIR accession (At gene)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the UniSTS database.
+
+
+
+ UniSTS accession
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the UNITE database.
+
+
+
+ UNITE accession
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the UTR database.
+
+
+
+ UTR accession
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ UPI[A-F0-9]{10}
+ Accession number of a UniParc (protein sequence) database entry.
+ UPI
+ UniParc ID
+
+
+
+ UniParc accession
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the Rouge or HUGE databases.
+
+
+
+ mFLJ/mKIAA number
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ An informative report on a specific fungus.
+
+ Fungi annotation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ An informative report on a specific fungus anamorph.
+
+ Fungi annotation (anamorph)
+ true
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier for a protein from the Ensembl database.
+ Ensembl ID (protein)
+ Protein ID (Ensembl)
+
+
+
+ Ensembl protein ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ An informative report on a specific toxin.
+
+ Toxin annotation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An informative report on a membrane protein.
+
+ Protein report (membrane protein)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ An informative report on tentative or known protein-drug interaction(s).
+
+
+ Protein-drug interaction report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+
+ Data concerning a map of molecular sequence(s).
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Map data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data concerning phylogeny, typically of molecular sequences, including reports of information concerning or derived from a phylogenetic tree, or from comparing two or more phylogenetic trees.
+
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Phylogenetic data
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Data concerning one or more protein molecules.
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Protein data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Data concerning one or more nucleic acid molecules.
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Nucleic acid data
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data concerning, extracted from, or derived from the analysis of a scientific text (or texts) such as a full text article from a scientific journal.
+ Article data
+ Scientific text data
+
+
+ This is a broad data type and is used a placeholder for other, more specific types. It is primarily intended to help navigation of EDAM and would not typically be used for annotation. It includes concepts that are best described as scientific text or closely concerned with or derived from text.
+ Text data
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.16
+
+
+ Typically a simple numerical or string value that controls the operation of a tool.
+
+ Parameter
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Data concerning a specific type of molecule.
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Molecular data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+
+ An informative report on a specific molecule.
+
+ Molecule report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A human-readable collection of information about a specific organism.
+ Organism annotation
+
+
+ Organism report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A human-readable collection of information about about how a scientific experiment or analysis was carried out that results in a specific set of data or results used for further analysis or to test a specific hypothesis.
+ Experiment annotation
+ Experiment metadata
+ Experiment report
+
+
+ Protocol
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An attribute of a molecular sequence, possibly in reference to some other sequence.
+ Sequence parameter
+
+
+ Sequence attribute
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Output from a serial analysis of gene expression (SAGE), massively parallel signature sequencing (MPSS) or sequencing by synthesis (SBS) experiment. In all cases this is a list of short sequence tags and the number of times it is observed.
+ Sequencing-based expression profile
+ Sequence tag profile (with gene assignment)
+
+
+ SAGE, MPSS and SBS experiments are usually performed to study gene expression. The sequence tags are typically subsequently annotated (after a database search) with the mRNA (and therefore gene) the tag was extracted from.
+ This includes tag to gene assignments (tag mapping) of SAGE, MPSS and SBS data. Typically this is the sequencing-based expression profile annotated with gene identifiers.
+ Sequence tag profile
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data concerning a mass spectrometry measurement.
+
+
+ Mass spectrometry data
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Raw data from experimental methods for determining protein structure.
+
+
+ This is a broad data type and is used a placeholder for other, more specific types. It is primarily intended to help navigation of EDAM and would not typically be used for annotation.
+ Protein structure raw data
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An identifier of a mutation.
+
+
+
+ Mutation identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+
+ Data concerning an alignment of two or more molecular sequences, structures or derived data.
+
+ This is a broad data type and is used a placeholder for other, more specific types. This includes entities derived from sequences and structures such as motifs and profiles.
+ Alignment data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Data concerning an index of data.
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Data index data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Single letter amino acid identifier, e.g. G.
+
+
+
+ Amino acid name (single letter)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Three letter amino acid identifier, e.g. GLY.
+
+
+
+ Amino acid name (three letter)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Full name of an amino acid, e.g. Glycine.
+
+
+
+ Amino acid name (full name)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a toxin.
+
+
+
+ Toxin identifier
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier of a toxin from the ArachnoServer database.
+
+
+
+ ArachnoServer ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A simple summary of expressed genes.
+
+ Expressed gene list
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier of a monomer from the BindingDB database.
+
+
+
+ BindingDB Monomer ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The name of a concept from the GO ontology.
+
+ GO concept name
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]{7}|GO:[0-9]{7}
+ An identifier of a 'biological process' concept from the the Gene Ontology.
+
+
+
+ GO concept ID (biological process)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]{7}|GO:[0-9]{7}
+ An identifier of a 'molecular function' concept from the the Gene Ontology.
+
+
+
+ GO concept ID (molecular function)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The name of a concept for a cellular component from the GO ontology.
+
+ GO concept name (cellular component)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An image arising from a Northern Blot experiment.
+
+
+ Northern blot image
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Unique identifier of a blot from a Northern Blot.
+
+
+
+ Blot ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier of a blot from a Northern Blot from the BlotBase database.
+
+
+
+ BlotBase blot ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Raw data on a biological hierarchy, describing the hierarchy proper, hierarchy components and possibly associated annotation.
+ Hierarchy annotation
+
+
+ Hierarchy
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry from a database of biological hierarchies.
+
+ Hierarchy identifier
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the Brite database of biological hierarchies.
+
+
+
+ Brite hierarchy ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A type (represented as a string) of cancer.
+
+ Cancer type
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A unique identifier for an organism used in the BRENDA database.
+
+
+
+ BRENDA organism ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a taxon using the controlled vocabulary of the UniGene database.
+ UniGene organism abbreviation
+
+
+
+ UniGene taxon
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a taxon using the controlled vocabulary of the UTRdb database.
+
+
+
+ UTRdb taxon
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An identifier of a catalogue of biological resources.
+ Catalogue identifier
+
+
+
+ Catalogue ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a catalogue of biological resources from the CABRI database.
+
+
+
+ CABRI catalogue name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An informative report on protein secondary structure alignment-derived data or metadata.
+
+ Secondary structure alignment metadata
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Was deprecated since 1.5, but not correctly (fully) obsoleted until 1.19.
+ 1.5
+
+
+ An informative report on the physical, chemical or other information concerning the interaction of two or more molecules (or parts of molecules).
+
+ Molecule interaction report
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Primary data about a specific biological pathway or network (the nodes and connections within the pathway or network).
+ Network
+ Pathway
+
+
+ Pathway or network
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Data concerning one or more small molecules.
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Small molecule data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Data concerning a particular genotype, phenotype or a genotype / phenotype relation.
+
+ Genotype and phenotype data
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Image, hybridisation or some other data arising from a study of feature/molecule expression, typically profiling or quantification.
+ Gene expression data
+ Gene product profile
+ Gene product quantification data
+ Gene transcription profile
+ Gene transcription quantification data
+ Metabolite expression data
+ Microarray data
+ Non-coding RNA profile
+ Non-coding RNA quantification data
+ Protein expression data
+ RNA profile
+ RNA quantification data
+ RNA-seq data
+ Transcriptome profile
+ Transcriptome quantification data
+ mRNA profile
+ mRNA quantification data
+ Protein profile
+ Protein quantification data
+ Proteome profile
+ Proteome quantification data
+
+
+ Expression data
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ C[0-9]+
+ Unique identifier of a chemical compound from the KEGG database.
+ KEGG compound ID
+ KEGG compound identifier
+
+
+
+ Compound ID (KEGG)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Name (not necessarily stable) an entry (RNA family) from the RFAM database.
+
+
+
+ RFAM name
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ R[0-9]+
+ Identifier of a biological reaction from the KEGG reactions database.
+
+
+
+ Reaction ID (KEGG)
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ D[0-9]+
+ Unique identifier of a drug from the KEGG Drug database.
+
+
+
+ Drug ID (KEGG)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ ENS[A-Z]*[FPTG][0-9]{11}
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl database.
+ Ensembl IDs
+
+
+
+ Ensembl ID
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [A-Z][0-9]+(\.[-[0-9]+])?
+ An identifier of a disease from the International Classification of Diseases (ICD) database.
+
+
+
+ ICD identifier
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9A-Za-z]+:[0-9]+:[0-9]{1,5}(\.[0-9])?
+ Unique identifier of a sequence cluster from the CluSTr database.
+ CluSTr ID
+ CluSTr cluster ID
+
+
+
+ Sequence cluster ID (CluSTr)
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ G[0-9]+
+ Unique identifier of a glycan ligand from the KEGG GLYCAN database (a subset of KEGG LIGAND).
+
+
+
+ KEGG Glycan ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]+\.[A-Z]\.[0-9]+\.[0-9]+\.[0-9]+
+ A unique identifier of a family from the transport classification database (TCDB) of membrane transport proteins.
+ TC number
+
+
+
+ OBO file for regular expression.
+ TCDB ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ MINT\-[0-9]{1,5}
+ Unique identifier of an entry from the MINT database of protein-protein interactions.
+
+
+
+ MINT ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ DIP[\:\-][0-9]{3}[EN]
+ Unique identifier of an entry from the DIP database of protein-protein interactions.
+
+
+
+ DIP ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A[0-9]{6}
+ Unique identifier of a protein listed in the UCSD-Nature Signaling Gateway Molecule Pages database.
+
+
+
+ Signaling Gateway protein ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a protein modification catalogued in a database.
+
+
+
+ Protein modification ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ AA[0-9]{4}
+ Identifier of a protein modification catalogued in the RESID database.
+
+
+
+ RESID ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]{4,7}
+ Identifier of an entry from the RGD database.
+
+
+
+ RGD ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ AASequence:[0-9]{10}
+ Identifier of a protein sequence from the TAIR database.
+
+
+
+ TAIR accession (protein)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ HMDB[0-9]{5}
+ Identifier of a small molecule metabolite from the Human Metabolome Database (HMDB).
+ HMDB ID
+
+
+
+ Compound ID (HMDB)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ LM(FA|GL|GP|SP|ST|PR|SL|PK)[0-9]{4}([0-9a-zA-Z]{4})?
+ Identifier of an entry from the LIPID MAPS database.
+ LM ID
+
+
+
+ LIPID MAPS ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PAp[0-9]{8}
+ PDBML:pdbx_PDB_strand_id
+ Identifier of a peptide from the PeptideAtlas peptide databases.
+
+
+
+ PeptideAtlas ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.7
+
+ Identifier of a report of molecular interactions from a database (typically).
+
+
+ Molecular interaction ID
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]+
+ A unique identifier of an interaction from the BioGRID database.
+
+
+
+ BioGRID interaction ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ S[0-9]{2}\.[0-9]{3}
+ Unique identifier of a peptidase enzyme from the MEROPS database.
+ MEROPS ID
+
+
+
+ Enzyme ID (MEROPS)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An identifier of a mobile genetic element.
+
+
+
+ Mobile genetic element ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ mge:[0-9]+
+ An identifier of a mobile genetic element from the Aclame database.
+
+
+
+ ACLAME ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PWY[a-zA-Z_0-9]{2}\-[0-9]{3}
+ Identifier of an entry from the Saccharomyces genome database (SGD).
+
+
+
+ SGD ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Unique identifier of a book.
+
+
+
+ Book ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ (ISBN)?(-13|-10)?[:]?[ ]?([0-9]{2,3}[ -]?)?[0-9]{1,5}[ -]?[0-9]{1,7}[ -]?[0-9]{1,6}[ -]?([0-9]|X)
+ The International Standard Book Number (ISBN) is for identifying printed books.
+
+
+
+ ISBN
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ B[0-9]{5}
+ Identifier of a metabolite from the 3DMET database.
+ 3DMET ID
+
+
+
+ Compound ID (3DMET)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ ([A-NR-Z][0-9][A-Z][A-Z0-9][A-Z0-9][0-9])_.*|([OPQ][0-9][A-Z0-9][A-Z0-9][A-Z0-9][0-9]_.*)|(GAG_.*)|(MULT_.*)|(PFRAG_.*)|(LIP_.*)|(CAT_.*)
+ A unique identifier of an interaction from the MatrixDB database.
+
+
+
+ MatrixDB interaction ID
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]+
+ A unique identifier for pathways, reactions, complexes and small molecules from the cPath (Pathway Commons) database.
+
+
+
+ These identifiers are unique within the cPath database, however, they are not stable between releases.
+ cPath ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ [0-9]+
+ Identifier of an assay from the PubChem database.
+
+
+
+ PubChem bioassay ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the PubChem database.
+ PubChem identifier
+
+
+
+ PubChem ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ M[0-9]{4}
+ Identifier of an enzyme reaction mechanism from the MACie database.
+ MACie entry number
+
+
+
+ Reaction ID (MACie)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ MI[0-9]{7}
+ Identifier for a gene from the miRBase database.
+ miRNA ID
+ miRNA identifier
+ miRNA name
+
+
+
+ Gene ID (miRBase)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ ZDB\-GENE\-[0-9]+\-[0-9]+
+ Identifier for a gene from the Zebrafish information network genome (ZFIN) database.
+
+
+
+ Gene ID (ZFIN)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]{5}
+ Identifier of an enzyme-catalysed reaction from the Rhea database.
+
+
+
+ Reaction ID (Rhea)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ UPA[0-9]{5}
+ Identifier of a biological pathway from the Unipathway database.
+ upaid
+
+
+
+ Pathway ID (Unipathway)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]+
+ Identifier of a small molecular from the ChEMBL database.
+ ChEMBL ID
+
+
+
+ Compound ID (ChEMBL)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [a-zA-Z_0-9]+
+ Unique identifier of an entry from the Ligand-gated ion channel (LGICdb) database.
+
+
+
+ LGICdb identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]+
+ Identifier of a biological reaction (kinetics entry) from the SABIO-RK reactions database.
+
+
+
+ Reaction kinetics ID (SABIO-RK)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PA[0-9]+
+ Identifier of an entry from the pharmacogenetics and pharmacogenomics knowledge base (PharmGKB).
+
+
+
+ PharmGKB ID
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PA[0-9]+
+ Identifier of a pathway from the pharmacogenetics and pharmacogenomics knowledge base (PharmGKB).
+
+
+
+ Pathway ID (PharmGKB)
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PA[0-9]+
+ Identifier of a disease from the pharmacogenetics and pharmacogenomics knowledge base (PharmGKB).
+
+
+
+ Disease ID (PharmGKB)
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PA[0-9]+
+ Identifier of a drug from the pharmacogenetics and pharmacogenomics knowledge base (PharmGKB).
+
+
+
+ Drug ID (PharmGKB)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ DAP[0-9]+
+ Identifier of a drug from the Therapeutic Target Database (TTD).
+
+
+
+ Drug ID (TTD)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ TTDS[0-9]+
+ Identifier of a target protein from the Therapeutic Target Database (TTD).
+
+
+
+ Target ID (TTD)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ A unique identifier of a type or group of cells.
+
+
+
+ Cell type identifier
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]+
+ A unique identifier of a neuron from the NeuronDB database.
+
+
+
+ NeuronDB ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [a-zA-Z_0-9]+
+ A unique identifier of a neuron from the NeuroMorpho database.
+
+
+
+ NeuroMorpho ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]+
+ Identifier of a chemical from the ChemIDplus database.
+ ChemIDplus ID
+
+
+
+ Compound ID (ChemIDplus)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ SMP[0-9]{5}
+ Identifier of a pathway from the Small Molecule Pathway Database (SMPDB).
+
+
+
+ Pathway ID (SMPDB)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]+
+ Identifier of an entry from the BioNumbers database of key numbers and associated data in molecular biology.
+
+
+
+ BioNumbers ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ T3D[0-9]+
+ Unique identifier of a toxin from the Toxin and Toxin Target Database (T3DB) database.
+
+
+
+ T3DB ID
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a carbohydrate.
+
+
+
+ Carbohydrate identifier
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]+
+ Identifier of an entry from the GlycomeDB database.
+
+
+
+ GlycomeDB ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [a-zA-Z_0-9]+[0-9]+
+ Identifier of an entry from the LipidBank database.
+
+
+
+ LipidBank ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ cd[0-9]{5}
+ Identifier of a conserved domain from the Conserved Domain Database.
+
+
+
+ CDD ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]{1,5}
+ An identifier of an entry from the MMDB database.
+ MMDB accession
+
+
+
+ MMDB ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]+
+ Unique identifier of an entry from the iRefIndex database of protein-protein interactions.
+
+
+
+ iRefIndex ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]+
+ Unique identifier of an entry from the ModelDB database.
+
+
+
+ ModelDB ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]+
+ Identifier of a signaling pathway from the Database of Quantitative Cellular Signaling (DQCS).
+
+
+
+ Pathway ID (DQCS)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database (Homo sapiens division).
+
+ Ensembl ID (Homo sapiens)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Bos taurus' division).
+
+ Ensembl ID ('Bos taurus')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Canis familiaris' division).
+
+ Ensembl ID ('Canis familiaris')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Cavia porcellus' division).
+
+ Ensembl ID ('Cavia porcellus')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Ciona intestinalis' division).
+
+ Ensembl ID ('Ciona intestinalis')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Ciona savignyi' division).
+
+ Ensembl ID ('Ciona savignyi')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Danio rerio' division).
+
+ Ensembl ID ('Danio rerio')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Dasypus novemcinctus' division).
+
+ Ensembl ID ('Dasypus novemcinctus')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Echinops telfairi' division).
+
+ Ensembl ID ('Echinops telfairi')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Erinaceus europaeus' division).
+
+ Ensembl ID ('Erinaceus europaeus')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Felis catus' division).
+
+ Ensembl ID ('Felis catus')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Gallus gallus' division).
+
+ Ensembl ID ('Gallus gallus')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Gasterosteus aculeatus' division).
+
+ Ensembl ID ('Gasterosteus aculeatus')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Homo sapiens' division).
+
+ Ensembl ID ('Homo sapiens')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Loxodonta africana' division).
+
+ Ensembl ID ('Loxodonta africana')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Macaca mulatta' division).
+
+ Ensembl ID ('Macaca mulatta')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Monodelphis domestica' division).
+
+ Ensembl ID ('Monodelphis domestica')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Mus musculus' division).
+
+ Ensembl ID ('Mus musculus')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Myotis lucifugus' division).
+
+ Ensembl ID ('Myotis lucifugus')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Ornithorhynchus anatinus' division).
+
+ Ensembl ID ("Ornithorhynchus anatinus")
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Oryctolagus cuniculus' division).
+
+ Ensembl ID ('Oryctolagus cuniculus')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Oryzias latipes' division).
+
+ Ensembl ID ('Oryzias latipes')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Otolemur garnettii' division).
+
+ Ensembl ID ('Otolemur garnettii')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Pan troglodytes' division).
+
+ Ensembl ID ('Pan troglodytes')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Rattus norvegicus' division).
+
+ Ensembl ID ('Rattus norvegicus')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Spermophilus tridecemlineatus' division).
+
+ Ensembl ID ('Spermophilus tridecemlineatus')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Takifugu rubripes' division).
+
+ Ensembl ID ('Takifugu rubripes')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Tupaia belangeri' division).
+
+ Ensembl ID ('Tupaia belangeri')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Xenopus tropicalis' division).
+
+ Ensembl ID ('Xenopus tropicalis')
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a protein domain (or other node) from the CATH database.
+
+
+
+ CATH identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 2.10.10.10
+ A code number identifying a family from the CATH database.
+
+
+
+ CATH node ID (family)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an enzyme from the CAZy enzymes database.
+ CAZy ID
+
+
+
+ Enzyme ID (CAZy)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A unique identifier assigned by the I.M.A.G.E. consortium to a clone (cloned molecular sequence).
+ I.M.A.G.E. cloneID
+ IMAGE cloneID
+
+
+
+ Clone ID (IMAGE)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]{7}|GO:[0-9]{7}
+ An identifier of a 'cellular component' concept from the Gene Ontology.
+ GO concept identifier (cellular compartment)
+
+
+
+ GO concept ID (cellular component)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Name of a chromosome as used in the BioCyc database.
+
+
+
+ Chromosome name (BioCyc)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An identifier of a gene expression profile from the CleanEx database.
+
+
+
+ CleanEx entry name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An identifier of (typically a list of) gene expression experiments catalogued in the CleanEx database.
+
+
+
+ CleanEx dataset code
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A human-readable collection of information concerning a genome as a whole.
+
+
+ Genome report
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier for a protein complex from the CORUM database.
+ CORUM complex ID
+
+
+
+ Protein ID (CORUM)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier of a position-specific scoring matrix from the CDD database.
+
+
+
+ CDD PSSM-ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier for a protein from the CuticleDB database.
+ CuticleDB ID
+
+
+
+ Protein ID (CuticleDB)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a predicted transcription factor from the DBD database.
+
+
+
+ DBD ID
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ General annotation on an oligonucleotide probe, or a set of probes.
+ Oligonucleotide probe sets annotation
+
+
+ Oligonucleotide probe annotation
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of an oligonucleotide from a database.
+
+
+
+ Oligonucleotide ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an oligonucleotide probe from the dbProbe database.
+
+
+
+ dbProbe ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Physicochemical property data for one or more dinucleotides.
+
+
+ Dinucleotide property
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an dinucleotide property from the DiProDB database.
+
+
+
+ DiProDB ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ disordered structure in a protein.
+
+
+ Protein features report (disordered structure)
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier for a protein from the DisProt database.
+ DisProt ID
+
+
+
+ Protein ID (DisProt)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Annotation on an embryo or concerning embryological development.
+
+ Embryo report
+ true
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier for a gene transcript from the Ensembl database.
+ Transcript ID (Ensembl)
+
+
+
+ Ensembl transcript ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ An informative report on one or more small molecules that are enzyme inhibitors.
+
+ Inhibitor annotation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Moby:GeneAccessionList
+ An identifier of a promoter of a gene that is catalogued in a database.
+
+
+
+ Promoter ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an EST sequence.
+
+
+
+ EST accession
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an EST sequence from the COGEME database.
+
+
+
+ COGEME EST ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a unisequence from the COGEME database.
+
+
+
+ A unisequence is a single sequence assembled from ESTs.
+ COGEME unisequence ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Accession number of an entry (protein family) from the GeneFarm database.
+ GeneFarm family ID
+
+
+
+ Protein family ID (GeneFarm)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a family of organism.
+
+
+
+ Family name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ The name of a genus of viruses.
+
+ Genus name (virus)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ The name of a family of viruses.
+
+ Family name (virus)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ The name of a SwissRegulon database.
+
+ Database name (SwissRegulon)
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A feature identifier as used in the SwissRegulon database.
+
+
+
+ This can be name of a gene, the ID of a TFBS, or genomic coordinates in form "chr:start..end".
+ Sequence feature ID (SwissRegulon)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A unique identifier of gene in the NMPDR database.
+
+
+
+ A FIG ID consists of four parts: a prefix, genome id, locus type and id number.
+ FIG ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A unique identifier of gene in the Xenbase database.
+
+
+
+ Gene ID (Xenbase)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A unique identifier of gene in the Genolist database.
+
+
+
+ Gene ID (Genolist)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Name of an entry (gene) from the Genolist genes database.
+
+ Gene name (Genolist)
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry (promoter) from the ABS database.
+ ABS identifier
+
+
+
+ ABS ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a transcription factor from the AraC-XylS database.
+
+
+
+ AraC-XylS ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Name of an entry (gene) from the HUGO database.
+
+ Gene name (HUGO)
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a locus from the PseudoCAP database.
+
+
+
+ Locus ID (PseudoCAP)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a locus from the UTR database.
+
+
+
+ Locus ID (UTR)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier of a monosaccharide from the MonosaccharideDB database.
+
+
+
+ MonosaccharideDB ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ The name of a subdivision of the Collagen Mutation Database (CMD) database.
+
+ Database name (CMD)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ The name of a subdivision of the Osteogenesis database.
+
+ Database name (Osteogenesis)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An identifier of a particular genome.
+
+
+
+ Genome identifier
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.26
+ An identifier of a particular genome.
+
+
+ GenomeReviews ID
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]+
+ Identifier of an entry from the GlycoMapsDB (Glycosciences.de) database.
+
+
+
+ GlycoMap ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A conformational energy map of the glycosidic linkages in a carbohydrate molecule.
+
+
+ Carbohydrate conformational map
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a transcription factor.
+
+
+
+ Transcription factor name
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a membrane transport proteins from the transport classification database (TCDB).
+
+
+
+ TCID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PF[0-9]{5}
+ Name of a domain from the Pfam database.
+
+
+
+ Pfam domain name
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ CL[0-9]{4}
+ Accession number of a Pfam clan.
+
+
+
+ Pfam clan ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier for a gene from the VectorBase database.
+ VectorBase ID
+
+
+
+ Gene ID (VectorBase)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the UTRSite database of regulatory motifs in eukaryotic UTRs.
+
+
+
+ UTRSite ID
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An informative report about a specific or conserved pattern in a molecular sequence, such as its context in genes or proteins, its role, origin or method of construction, etc.
+ Sequence motif report
+ Sequence profile report
+
+
+ Sequence signature report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An informative report on a particular locus.
+
+ Locus annotation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Official name of a protein as used in the UniProt database.
+
+
+
+ Protein name (UniProt)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ One or more terms from one or more controlled vocabularies which are annotations on an entity.
+
+ The concepts are typically provided as a persistent identifier or some other link the source ontologies. Evidence of the validity of the annotation might be included.
+ Term ID list
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Name of a protein family from the HAMAP database.
+
+
+
+ HAMAP ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+
+ Basic information concerning an identifier of data (typically including the identifier itself). For example, a gene symbol with information concerning its provenance.
+
+ Identifier with metadata
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Annotation about a gene symbol.
+
+ Gene symbol annotation
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a RNA transcript.
+
+
+
+ Transcript ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an RNA transcript from the H-InvDB database.
+
+
+
+ HIT ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A unique identifier of gene cluster in the H-InvDB database.
+
+
+
+ HIX ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a antibody from the HPA database.
+
+
+
+ HPA antibody id
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a human major histocompatibility complex (HLA) or other protein from the IMGT/HLA database.
+
+
+
+ IMGT/HLA ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A unique identifier of gene assigned by the J. Craig Venter Institute (JCVI).
+
+
+
+ Gene ID (JCVI)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a kinase protein.
+
+
+
+ Kinase name
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a physical entity from the ConsensusPathDB database.
+
+
+
+ ConsensusPathDB entity ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Name of a physical entity from the ConsensusPathDB database.
+
+
+
+ ConsensusPathDB entity name
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The number of a strain of algae and protozoa from the CCAP database.
+
+
+
+ CCAP strain number
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An identifier of stock from a catalogue of biological resources.
+
+
+
+ Stock number
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A stock number from The Arabidopsis information resource (TAIR).
+
+
+
+ Stock number (TAIR)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the RNA editing database (REDIdb).
+
+
+
+ REDIdb ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Name of a domain from the SMART database.
+
+
+
+ SMART domain name
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Accession number of an entry (family) from the PANTHER database.
+ Panther family ID
+
+
+
+ Protein family ID (PANTHER)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A unique identifier for a virus from the RNAVirusDB database.
+
+
+
+ Could list (or reference) other taxa here from https://www.phenoscape.org/wiki/Taxonomic_Rank_Vocabulary.
+ RNAVirusDB ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An accession of annotation on a (group of) viruses (catalogued in a database).
+
+
+
+ Virus ID
+ Virus identifier
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An identifier of a genome project assigned by NCBI.
+
+
+
+ NCBI Genome Project ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A unique identifier of a whole genome assigned by the NCBI.
+
+
+
+ NCBI genome accession
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Data concerning, extracted from, or derived from the analysis of a sequence profile, such as its name, length, technical details about the profile or it's construction, the biological role or annotation, and so on.
+
+
+ Sequence profile data
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier for a membrane protein from the TopDB database.
+ TopDB ID
+
+
+
+ Protein ID (TopDB)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a two-dimensional (protein) gel.
+ Gel identifier
+
+
+
+ Gel ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Name of a reference map gel from the SWISS-2DPAGE database.
+
+
+
+ Reference map name (SWISS-2DPAGE)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier for a peroxidase protein from the PeroxiBase database.
+ PeroxiBase ID
+
+
+
+ Protein ID (PeroxiBase)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the SISYPHUS database of tertiary structure alignments.
+
+
+
+ SISYPHUS ID
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Accession of an open reading frame (catalogued in a database).
+
+
+
+ ORF ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An identifier of an open reading frame.
+
+
+
+ ORF identifier
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the GlycosciencesDB database.
+ LInear Notation for Unique description of Carbohydrate Sequences ID
+
+
+
+
+ LINUCS ID
+ [1-9][0-9]*
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier for a ligand-gated ion channel protein from the LGICdb database.
+ LGICdb ID
+
+
+
+ Protein ID (LGICdb)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an EST sequence from the MaizeDB database.
+
+
+
+ MaizeDB ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A unique identifier of gene in the MfunGD database.
+
+
+
+ Gene ID (MfunGD)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An identifier of a disease from the Orpha database.
+
+
+
+ Orpha number
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier for a protein from the EcID database.
+
+
+
+ Protein ID (EcID)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A unique identifier of a cDNA molecule catalogued in the RefSeq database.
+
+
+
+ Clone ID (RefSeq)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier for a cone snail toxin protein from the ConoServer database.
+
+
+
+ Protein ID (ConoServer)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a GeneSNP database entry.
+
+
+
+ GeneSNP ID
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a lipid.
+
+
+
+ Lipid identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+ A flat-file (textual) data archive.
+
+
+ Databank
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+ A web site providing data (web pages) on a common theme to a HTTP client.
+
+
+ Web portal
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier for a gene from the VBASE2 database.
+ VBASE2 ID
+
+
+
+ Gene ID (VBASE2)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A unique identifier for a virus from the DPVweb database.
+ DPVweb virus ID
+
+
+
+ DPVweb ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]+
+ Identifier of a pathway from the BioSystems pathway database.
+
+
+
+ Pathway ID (BioSystems)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Data concerning a proteomics experiment.
+
+ Experimental data (proteomics)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An abstract of a scientific article.
+
+
+ Abstract
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 3D coordinate and associated data for a lipid structure.
+
+
+ Lipid structure
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 3D coordinate and associated data for the (3D) structure of a drug.
+
+
+ Drug structure
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 3D coordinate and associated data for the (3D) structure of a toxin.
+
+
+ Toxin structure
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A simple matrix of numbers, where each value (or column of values) is derived derived from analysis of the corresponding position in a sequence alignment.
+ PSSM
+
+
+ Position-specific scoring matrix
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A matrix of distances between molecular entities, where a value (distance) is (typically) derived from comparison of two entities and reflects their similarity.
+
+
+ Distance matrix
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Distances (values representing similarity) between a group of molecular structures.
+
+
+ Structural distance matrix
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Bibliographic data concerning scientific article(s).
+
+ Article metadata
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A concept from a biological ontology.
+
+
+ This includes any fields from the concept definition such as concept name, definition, comments and so on.
+ Ontology concept
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A numerical measure of differences in the frequency of occurrence of synonymous codons in DNA sequences.
+
+
+ Codon usage bias
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Northern Blot experiments.
+
+
+ Northern blot report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A map showing distance between genetic markers estimated by radiation-induced breaks in a chromosome.
+ RH map
+
+
+ The radiation method can break very closely linked markers providing a more detailed map. Most genetic markers and subsequences may be located to a defined map position and with a more precise estimates of distance than a linkage map.
+ Radiation hybrid map
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A simple list of data identifiers (such as database accessions), possibly with additional basic information on the addressed data.
+
+
+ ID list
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Gene frequencies data that may be read during phylogenetic tree calculation.
+
+
+ Phylogenetic gene frequencies data
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ A set of sub-sequences displaying some type of polymorphism, typically indicating the sequence in which they occur, their position and other metadata.
+
+ Sequence set (polymorphic)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ An entry (resource) from the DRCAT bioinformatics resource catalogue.
+
+ DRCAT resource
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 3D coordinate and associated data for a multi-protein complex; two or more polypeptides chains in a stable, functional association with one another.
+
+
+ Protein complex
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 3D coordinate and associated data for a protein (3D) structural motif; any group of contiguous or non-contiguous amino acid residues but typically those forming a feature with a structural or functional role.
+
+
+ Protein structural motif
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A human-readable collection of information about one or more specific lipid 3D structure(s).
+
+
+ Lipid report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ Image of one or more molecular secondary structures.
+
+ Secondary structure image
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ An informative report on general information, properties or features of one or more molecular secondary structures.
+
+ Secondary structure report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ DNA sequence-specific feature annotation (not in a feature table).
+
+ DNA features
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Features concerning RNA or regions of DNA that encode an RNA molecule.
+
+ RNA features report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Biological data that has been plotted as a graph of some type, or plotting instructions for rendering such a graph.
+ Graph data
+
+
+ Plot
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A protein sequence and associated metadata.
+ Sequence record (protein)
+
+
+ Protein sequence record
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A nucleic acid sequence and associated metadata.
+ Nucleotide sequence record
+ Sequence record (nucleic acid)
+ DNA sequence record
+ RNA sequence record
+
+
+ Nucleic acid sequence record
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ A protein sequence and comprehensive metadata (such as a feature table), typically corresponding to a full entry from a molecular sequence database.
+
+
+ Protein sequence record (full)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ A nucleic acid sequence and comprehensive metadata (such as a feature table), typically corresponding to a full entry from a molecular sequence database.
+
+
+ Nucleic acid sequence record (full)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Accession of a mathematical model, typically an entry from a database.
+
+
+
+ Biological model accession
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a type or group of cells.
+
+
+
+ Cell type name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Accession of a type or group of cells (catalogued in a database).
+ Cell type ID
+
+
+
+ Cell type accession
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Accession of an entry from a database of chemicals.
+ Chemical compound accession
+ Small molecule accession
+
+
+
+ Compound accession
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Accession of a drug.
+
+
+
+ Drug accession
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Name of a toxin.
+
+
+
+ Toxin name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Accession of a toxin (catalogued in a database).
+
+
+
+ Toxin accession
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Accession of a monosaccharide (catalogued in a database).
+
+
+
+ Monosaccharide accession
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Common name of a drug.
+
+
+
+ Drug name
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Accession of an entry from a database of carbohydrates.
+
+
+
+ Carbohydrate accession
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Accession of a specific molecule (catalogued in a database).
+
+
+
+ Molecule accession
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Accession of a data definition (catalogued in a database).
+
+
+
+ Data resource definition accession
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An accession of a particular genome (in a database).
+
+
+
+ Genome accession
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An accession of a map of a molecular sequence (deposited in a database).
+
+
+
+ Map accession
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Accession of an entry from a database of lipids.
+
+
+
+ Lipid accession
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Accession of a peptide deposited in a database.
+
+
+
+ Peptide ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Accession of a protein deposited in a database.
+ Protein accessions
+
+
+
+ Protein accession
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An accession of annotation on a (group of) organisms (catalogued in a database).
+
+
+
+ Organism accession
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby:BriefOccurrenceRecord
+ Moby:FirstEpithet
+ Moby:InfraspecificEpithet
+ Moby:OccurrenceRecord
+ Moby:Organism_Name
+ Moby:OrganismsLongName
+ Moby:OrganismsShortName
+ The name of an organism (or group of organisms).
+
+
+
+ Organism name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Accession of a protein family (that is deposited in a database).
+
+
+
+ Protein family accession
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Accession of an entry from a database of transcription factors or binding sites.
+
+
+
+ Transcription factor accession
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Accession number of a strain of an organism variant, typically a plant, virus or bacterium.
+
+
+
+ Strain accession
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.26
+ true
+ An accession of annotation on a (group of) viruses (catalogued in a database).
+
+
+ Virus identifier
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Metadata on sequence features.
+
+
+ Sequence features metadata
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a Gramene database entry.
+
+
+
+ Gramene identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An identifier of an entry from the DDBJ sequence database.
+ DDBJ ID
+ DDBJ accession number
+ DDBJ identifier
+
+
+
+ DDBJ accession
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An identifier of an entity from the ConsensusPathDB database.
+
+
+
+ ConsensusPathDB identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+
+ Data concerning, extracted from, or derived from the analysis of molecular sequence(s).
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Sequence data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Data concerning codon usage.
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Codon usage
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+
+ Data derived from the analysis of a scientific text such as a full text article from a scientific journal.
+
+ Article report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An informative report of information about molecular sequence(s), including basic information (metadata), and reports generated from molecular sequence analysis, including positional features and non-positional properties.
+ Sequence-derived report
+
+
+ Sequence report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data concerning the properties or features of one or more protein secondary structures.
+
+
+ Protein secondary structure
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A Hopp and Woods plot of predicted antigenicity of a peptide or protein.
+
+
+ Hopp and Woods plot
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.21
+
+
+ A melting curve of a double-stranded nucleic acid molecule (DNA or DNA/RNA).
+
+
+ Nucleic acid melting curve
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.21
+
+ A probability profile of a double-stranded nucleic acid molecule (DNA or DNA/RNA).
+
+
+ Nucleic acid probability profile
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.21
+
+ A temperature profile of a double-stranded nucleic acid molecule (DNA or DNA/RNA).
+
+
+ Nucleic acid temperature profile
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ A report typically including a map (diagram) of a gene regulatory network.
+
+
+ Gene regulatory network report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ An informative report on a two-dimensional (2D PAGE) gel.
+
+
+ 2D PAGE gel report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.14
+
+ General annotation on a set of oligonucleotide probes, such as the gene name with which the probe set is associated and which probes belong to the set.
+
+
+ Oligonucleotide probe sets annotation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ An image from a microarray experiment which (typically) allows a visualisation of probe hybridisation and gene-expression data.
+
+ Microarray image
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data (typically biological or biomedical) that has been rendered into an image, typically for display on screen.
+ Image data
+
+
+ Image
+ http://semanticscience.org/resource/SIO_000079
+ http://semanticscience.org/resource/SIO_000081
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Image of a molecular sequence, possibly with sequence features or properties shown.
+
+
+ Sequence image
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A report on protein properties concerning hydropathy.
+ Protein hydropathy report
+
+
+ Protein hydropathy data
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Data concerning a computational workflow.
+
+ Workflow data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A computational workflow.
+
+ Workflow
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Data concerning molecular secondary structure data.
+
+ Secondary structure data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Deprecated because this is bloat / confusing & better handled as an EDAM Format concept - "raw" sequences just imply a particular format (i.e. one with a vanilla string, possible in a particular alphabet, with no metadata).
+ 1.23
+
+ A raw protein sequence (string of characters).
+
+
+ Protein sequence (raw)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Deprecated because this is bloat / confusing & better handled as an EDAM Format concept - "raw" sequences just imply a particular format (i.e. one with a vanilla string, possible in a particular alphabet, with no metadata).
+ 1.23
+
+ A raw nucleic acid sequence.
+
+
+ Nucleic acid sequence (raw)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+ One or more protein sequences, possibly with associated annotation.
+ Amino acid sequence
+ Amino acid sequences
+ Protein sequences
+
+
+ Protein sequence
+ http://purl.org/biotop/biotop.owl#AminoAcidSequenceInformation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ One or more nucleic acid sequences, possibly with associated annotation.
+ Nucleic acid sequences
+ Nucleotide sequence
+ Nucleotide sequences
+ DNA sequence
+
+
+ Nucleic acid sequence
+ http://purl.org/biotop/biotop.owl#NucleotideSequenceInformation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data concerning a biochemical reaction, typically data and more general annotation on the kinetics of enzyme-catalysed reaction.
+ Enzyme kinetics annotation
+ Reaction annotation
+
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Reaction data
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data concerning small peptides.
+ Peptide data
+
+
+ Peptide property
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Was deprecated since 1.5, but not correctly (fully) obsoleted until 1.19.
+ 1.5
+
+
+ An informative report concerning the classification of protein sequences or structures.
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Protein classification
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Data concerning specific or conserved pattern in molecular sequences.
+
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Sequence motif data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Data concerning models representing a (typically multiple) sequence alignment.
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Sequence profile data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+
+ Data concerning a specific biological pathway or network.
+
+ Pathway or network data
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An informative report concerning or derived from the analysis of a biological pathway or network, such as a map (diagram) or annotation.
+
+
+ Pathway or network report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A thermodynamic or kinetic property of a nucleic acid molecule.
+ Nucleic acid property (thermodynamic or kinetic)
+ Nucleic acid thermodynamic property
+
+
+ Nucleic acid thermodynamic data
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Was deprecated since 1.5, but not correctly (fully) obsoleted until 1.19.
+ 1.5
+
+
+ Data concerning the classification of nucleic acid sequences or structures.
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Nucleic acid classification
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A report on a classification of molecular sequences, structures or other entities.
+
+ This can include an entire classification, components such as classifiers, assignments of entities to a classification and so on.
+ Classification report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ key residues involved in protein folding.
+
+
+ Protein features report (key folding sites)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Geometry data for a protein structure, for example bond lengths, bond angles, torsion angles, chiralities, planaraties etc.
+ Torsion angle data
+
+
+ Protein geometry data
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An image of protein structure.
+ Structure image (protein)
+
+
+ Protein structure image
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Weights for sequence positions or characters in phylogenetic analysis where zero is defined as unweighted.
+
+
+ Phylogenetic character weights
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Annotation of one particular positional feature on a biomolecular (typically genome) sequence, suitable for import and display in a genome browser.
+ Genome annotation track
+ Genome track
+ Genome-browser track
+ Genomic track
+ Sequence annotation track
+
+
+ Annotation track
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+ P43353|Q7M1G0|Q9C199|A5A6J6
+ [OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}
+ Accession number of a UniProt (protein sequence) database entry.
+ UniProt accession number
+ UniProt entry accession
+ UniProtKB accession
+ UniProtKB accession number
+ Swiss-Prot entry accession
+ TrEMBL entry accession
+
+
+
+ UniProt accession
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 16
+ [1-9][0-9]?
+ Identifier of a genetic code in the NCBI list of genetic codes.
+
+
+
+ NCBI genetic code ID
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a concept in an ontology of biological or bioinformatics concepts and relations.
+
+
+
+ Ontology concept identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The name of a concept for a biological process from the GO ontology.
+
+ GO concept name (biological process)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The name of a concept for a molecular function from the GO ontology.
+
+ GO concept name (molecular function)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data concerning the classification, identification and naming of organisms.
+ Taxonomic data
+
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Taxonomy
+
+
+
+
+
+
+
+
+
+ beta13
+ EMBL/GENBANK/DDBJ coding feature protein identifier, issued by International collaborators.
+
+
+
+ This qualifier consists of a stable ID portion (3+5 format with 3 position letters and 5 numbers) plus a version number after the decimal point. When the protein sequence encoded by the CDS changes, only the version number of the /protein_id value is incremented; the stable part of the /protein_id remains unchanged and as a result will permanently be associated with a given protein; this qualifier is valid only on CDS features which translate into a valid protein.
+ Protein ID (EMBL/GenBank/DDBJ)
+
+
+
+
+
+
+
+
+ beta13
+ 1.5
+
+ A type of data that (typically) corresponds to entries from the primary biological databases and which is (typically) the primary input or output of a tool, i.e. the data the tool processes or generates, as distinct from metadata and identifiers which describe and identify such core data, parameters that control the behaviour of tools, reports of derivative data generated by tools and annotation.
+
+
+ Core data entities typically have a format and may be identified by an accession number.
+ Core data
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta13
+ true
+ Name or other identifier of molecular sequence feature(s).
+
+
+
+ Sequence feature identifier
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta13
+ true
+ An identifier of a molecular tertiary structure, typically an entry from a structure database.
+
+
+
+ Structure identifier
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta13
+ true
+ An identifier of an array of numerical values, such as a comparison matrix.
+
+
+
+ Matrix identifier
+
+
+
+
+
+
+
+
+ beta13
+ 1.8
+
+ A report (typically a table) on character or word composition / frequency of protein sequence(s).
+
+
+ Protein sequence composition
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.8
+
+ A report (typically a table) on character or word composition / frequency of nucleic acid sequence(s).
+
+
+ Nucleic acid sequence composition (report)
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.5
+
+
+ A node from a classification of protein structural domain(s).
+
+ Protein domain classification node
+ true
+
+
+
+
+
+
+
+
+ beta13
+ Duplicates http://edamontology.org/data_1002, hence deprecated.
+ 1.23
+
+ Unique numerical identifier of chemicals in the scientific literature, as assigned by the Chemical Abstracts Service.
+
+
+ CAS number
+ true
+
+
+
+
+
+
+
+
+
+ beta13
+ Unique identifier of a drug conforming to the Anatomical Therapeutic Chemical (ATC) Classification System, a drug classification system controlled by the WHO Collaborating Centre for Drug Statistics Methodology (WHOCC).
+
+
+
+ ATC code
+
+
+
+
+
+
+
+
+ beta13
+ A unique, unambiguous, alphanumeric identifier of a chemical substance as catalogued by the Substance Registration System of the Food and Drug Administration (FDA).
+ Unique Ingredient Identifier
+
+
+
+ UNII
+
+
+
+
+
+
+
+
+ beta13
+ 1.5
+
+
+ Basic information concerning geographical location or time.
+
+ Geotemporal metadata
+ true
+
+
+
+
+
+
+
+
+ beta13
+ Metadata concerning the software, hardware or other aspects of a computer system.
+
+
+ System metadata
+
+
+
+
+
+
+
+
+ beta13
+ 1.15
+
+ A name of a sequence feature, e.g. the name of a feature to be displayed to an end-user.
+
+
+ Sequence feature name
+ true
+
+
+
+
+
+
+
+
+ beta13
+ Raw data such as measurements or other results from laboratory experiments, as generated from laboratory hardware.
+ Experimental measurement data
+ Experimentally measured data
+ Measured data
+ Measurement
+ Measurement data
+ Measurement metadata
+ Raw experimental data
+
+
+ This is a broad data type and is used a placeholder for other, more specific types. It is primarily intended to help navigation of EDAM and would not typically be used for annotation.
+ Experimental measurement
+
+
+
+
+
+
+
+
+
+ beta13
+ Raw data (typically MIAME-compliant) for hybridisations from a microarray experiment.
+
+
+ Such data as found in Affymetrix CEL or GPR files.
+ Raw microarray data
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta13
+ Data generated from processing and analysis of probe set data from a microarray experiment.
+ Gene annotation (expression)
+ Gene expression report
+ Microarray probe set data
+
+
+ Such data as found in Affymetrix .CHP files or data from other software such as RMA or dChip.
+ Processed microarray data
+
+
+
+
+
+
+
+
+
+ beta13
+ The final processed (normalised) data for a set of hybridisations in a microarray experiment.
+ Gene expression data matrix
+ Normalised microarray data
+
+
+ This combines data from all hybridisations.
+ Gene expression matrix
+
+
+
+
+
+
+
+
+ beta13
+ Annotation on a biological sample, for example experimental factors and their values.
+
+
+ This might include compound and dose in a dose response experiment.
+ Sample annotation
+
+
+
+
+
+
+
+
+ beta13
+ Annotation on the array itself used in a microarray experiment.
+
+
+ This might include gene identifiers, genomic coordinates, probe oligonucleotide sequences etc.
+ Microarray metadata
+
+
+
+
+
+
+
+
+ beta13
+ 1.8
+
+ Annotation on laboratory and/or data processing protocols used in an microarray experiment.
+
+
+ This might describe e.g. the normalisation methods used to process the raw data.
+ Microarray protocol annotation
+ true
+
+
+
+
+
+
+
+
+ beta13
+ Data concerning the hybridisations measured during a microarray experiment.
+
+
+ Microarray hybridisation data
+
+
+
+
+
+
+
+
+ beta13
+ 1.5
+
+
+ A report of regions in a molecular sequence that are biased to certain characters.
+
+ Sequence features (compositionally-biased regions)
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.5
+
+ A report on features in a nucleic acid sequence that indicate changes to or differences between sequences.
+
+
+ Nucleic acid features (difference and change)
+ true
+
+
+
+
+
+
+
+
+
+ beta13
+ The report may be based on analysis of nucleic acid sequence or structural data, or any annotation or information about specific nucleic acid 3D structure(s) or such structures in general.
+ A human-readable collection of information about regions within a nucleic acid sequence which form secondary or tertiary (3D) structures.
+ Nucleic acid features (structure)
+ Quadruplexes (report)
+ Stem loop (report)
+ d-loop (report)
+
+
+ Nucleic acid structure report
+
+
+
+
+
+
+
+
+ beta13
+ 1.8
+
+ short repetitive subsequences (repeat sequences) in a protein sequence.
+
+
+ Protein features report (repeats)
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.8
+
+ Report on the location of matches to profiles, motifs (conserved or functional patterns) or other signatures in one or more protein sequences.
+
+
+ Sequence motif matches (protein)
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.8
+
+ Report on the location of matches to profiles, motifs (conserved or functional patterns) or other signatures in one or more nucleic acid sequences.
+
+
+ Sequence motif matches (nucleic acid)
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.5
+
+
+ A report on displacement loops in a mitochondrial DNA sequence.
+
+ A displacement loop is a region of mitochondrial DNA in which one of the strands is displaced by an RNA molecule.
+ Nucleic acid features (d-loop)
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.5
+
+
+ A report on stem loops in a DNA sequence.
+
+ A stem loop is a hairpin structure; a double-helical structure formed when two complementary regions of a single strand of RNA or DNA molecule form base-pairs.
+ Nucleic acid features (stem loop)
+ true
+
+
+
+
+
+
+
+
+ beta13
+ An informative report on features of a messenger RNA (mRNA) molecules including precursor RNA, primary (unprocessed) transcript and fully processed molecules. This includes reports on a specific gene transcript, clone or EST.
+ Clone or EST (report)
+ Gene transcript annotation
+ Nucleic acid features (mRNA features)
+ Transcript (report)
+ mRNA (report)
+ mRNA features
+
+
+ This includes 5'untranslated region (5'UTR), coding sequences (CDS), exons, intervening sequences (intron) and 3'untranslated regions (3'UTR).
+ Gene transcript report
+
+
+
+
+
+
+
+
+ beta13
+ 1.8
+
+ features of non-coding or functional RNA molecules, including tRNA and rRNA.
+
+
+ Non-coding RNA
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.5
+
+
+ Features concerning transcription of DNA into RNA including the regulation of transcription.
+
+ This includes promoters, CAAT signals, TATA signals, -35 signals, -10 signals, GC signals, primer binding sites for initiation of transcription or reverse transcription, enhancer, attenuator, terminators and ribosome binding sites.
+ Transcriptional features (report)
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.5
+
+
+ A report on predicted or actual immunoglobulin gene structure including constant, switch and variable regions and diversity, joining and variable segments.
+
+ Nucleic acid features (immunoglobulin gene structure)
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.5
+
+
+ Information on a 'class' node from the SCOP database.
+
+ SCOP class
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.5
+
+
+ Information on a 'fold' node from the SCOP database.
+
+ SCOP fold
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.5
+
+
+ Information on a 'superfamily' node from the SCOP database.
+
+ SCOP superfamily
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.5
+
+
+ Information on a 'family' node from the SCOP database.
+
+ SCOP family
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.5
+
+
+ Information on a 'protein' node from the SCOP database.
+
+ SCOP protein
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.5
+
+
+ Information on a 'species' node from the SCOP database.
+
+ SCOP species
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.8
+
+ mass spectrometry experiments.
+
+
+ Mass spectrometry experiment
+ true
+
+
+
+
+
+
+
+
+ beta13
+ Nucleic acid classification
+ A human-readable collection of information about a particular family of genes, typically a set of genes with similar sequence that originate from duplication of a common ancestor gene, or any other classification of nucleic acid sequences or structures that reflects gene structure.
+ Gene annotation (homology information)
+ Gene annotation (homology)
+ Gene family annotation
+ Gene homology (report)
+ Homology information
+
+
+ This includes reports on on gene homologues between species.
+ Gene family report
+
+
+
+
+
+
+
+
+ beta13
+ An image of a protein.
+
+
+ Protein image
+
+
+
+
+
+
+
+
+ beta13
+ 1.24
+
+
+
+
+ An alignment of protein sequences and/or structures.
+
+ Protein alignment
+ true
+
+
+
+
+
+
+
+
+ 1.0
+ 1.8
+
+ sequencing experiment, including samples, sampling, preparation, sequencing, and analysis.
+
+
+ NGS experiment
+ true
+
+
+
+
+
+
+
+
+ 1.1
+ An informative report about a DNA sequence assembly.
+ Assembly report
+
+
+ This might include an overall quality assessment of the assembly and summary statistics including counts, average length and number of bases for reads, matches and non-matches, contigs, reads in pairs etc.
+ Sequence assembly report
+
+
+
+
+
+
+
+
+ 1.1
+ An index of a genome sequence.
+
+
+ Many sequence alignment tasks involving many or very large sequences rely on a precomputed index of the sequence to accelerate the alignment.
+ Genome index
+
+
+
+
+
+
+
+
+ 1.1
+ 1.8
+
+ Report concerning genome-wide association study experiments.
+
+
+ GWAS report
+ true
+
+
+
+
+
+
+
+
+ 1.2
+ The position of a cytogenetic band in a genome.
+
+
+ Information might include start and end position in a chromosome sequence, chromosome identifier, name of band and so on.
+ Cytoband position
+
+
+
+
+
+
+
+
+
+
+ 1.2
+ CL_[0-9]{7}
+ Cell type ontology concept ID.
+ CL ID
+
+
+
+ Cell type ontology ID
+
+
+
+
+
+
+
+
+ 1.2
+ Mathematical model of a network, that contains biochemical kinetics.
+
+
+ Kinetic model
+
+
+
+
+
+
+
+
+
+ 1.3
+ Identifier of a COSMIC database entry.
+ COSMIC identifier
+
+
+
+ COSMIC ID
+
+
+
+
+
+
+
+
+
+ 1.3
+ Identifier of a HGMD database entry.
+ HGMD identifier
+
+
+
+ HGMD ID
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ Unique identifier of sequence assembly.
+ Sequence assembly version
+
+
+
+ Sequence assembly ID
+
+
+
+
+
+
+
+
+ 1.3
+ 1.5
+
+
+ A label (text token) describing a type of sequence feature such as gene, transcript, cds, exon, repeat, simple, misc, variation, somatic variation, structural variation, somatic structural variation, constrained or regulatory.
+
+ Sequence feature type
+ true
+
+
+
+
+
+
+
+
+ 1.3
+ 1.5
+
+
+ An informative report on gene homologues between species.
+
+ Gene homology (report)
+ true
+
+
+
+
+
+
+
+
+
+
+ 1.3
+ ENSGT00390000003602
+ Unique identifier for a gene tree from the Ensembl database.
+ Ensembl ID (gene tree)
+
+
+
+ Ensembl gene tree ID
+
+
+
+
+
+
+
+
+ 1.3
+ A phylogenetic tree that is an estimate of the character's phylogeny.
+
+
+ Gene tree
+
+
+
+
+
+
+
+
+ 1.3
+ A phylogenetic tree that reflects phylogeny of the taxa from which the characters (used in calculating the tree) were sampled.
+
+
+ Species tree
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ Name or other identifier of an entry from a biosample database.
+ Sample accession
+
+
+
+ Sample ID
+
+
+
+
+
+
+
+
+
+ 1.3
+ Identifier of an object from the MGI database.
+
+
+
+ MGI accession
+
+
+
+
+
+
+
+
+ 1.3
+ Name of a phenotype.
+ Phenotype
+ Phenotypes
+
+
+
+ Phenotype name
+
+
+
+
+
+
+
+
+ 1.4
+ A HMM transition matrix contains the probabilities of switching from one HMM state to another.
+ HMM transition matrix
+
+
+ Consider for example an HMM with two states (AT-rich and GC-rich). The transition matrix will hold the probabilities of switching from the AT-rich to the GC-rich state, and vica versa.
+ Transition matrix
+
+
+
+
+
+
+
+
+ 1.4
+ A HMM emission matrix holds the probabilities of choosing the four nucleotides (A, C, G and T) in each of the states of a HMM.
+ HMM emission matrix
+
+
+ Consider for example an HMM with two states (AT-rich and GC-rich). The emission matrix holds the probabilities of choosing each of the four nucleotides (A, C, G and T) in the AT-rich state and in the GC-rich state.
+ Emission matrix
+
+
+
+
+
+
+
+
+ 1.4
+ 1.15
+
+ A statistical Markov model of a system which is assumed to be a Markov process with unobserved (hidden) states.
+
+
+ Hidden Markov model
+ true
+
+
+
+
+
+
+
+
+ 1.4
+ true
+ An identifier of a data format.
+
+
+ Format identifier
+
+
+
+
+
+
+
+
+ 1.5
+ Raw biological or biomedical image generated by some experimental technique.
+
+
+ Raw image
+ http://semanticscience.org/resource/SIO_000081
+
+
+
+
+
+
+
+
+ 1.5
+ Data concerning the intrinsic physical (e.g. structural) or chemical properties of one, more or all carbohydrates.
+ Carbohydrate data
+
+
+ Carbohydrate property
+
+
+
+
+
+
+
+
+ 1.5
+ 1.8
+
+ Report concerning proteomics experiments.
+
+
+ Proteomics experiment report
+ true
+
+
+
+
+
+
+
+
+ 1.5
+ 1.8
+
+ RNAi experiments.
+
+
+ RNAi report
+ true
+
+
+
+
+
+
+
+
+ 1.5
+ 1.8
+
+ biological computational model experiments (simulation), for example the minimum information required in order to permit its correct interpretation and reproduction.
+
+
+ Simulation experiment report
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.7
+ An imaging technique that uses magnetic fields and radiowaves to form images, typically to investigate the anatomy and physiology of the human body.
+ MRT image
+ Magnetic resonance imaging image
+ Magnetic resonance tomography image
+ NMRI image
+ Nuclear magnetic resonance imaging image
+
+
+ MRI image
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.7
+ An image from a cell migration track assay.
+
+
+ Cell migration track image
+
+
+
+
+
+
+
+
+ 1.7
+ Rate of association of a protein with another protein or some other molecule.
+ kon
+
+
+ Rate of association
+
+
+
+
+
+
+
+
+ 1.7
+ Multiple gene identifiers in a specific order.
+
+
+ Such data are often used for genome rearrangement tools and phylogenetic tree labeling.
+ Gene order
+
+
+
+
+
+
+
+
+ 1.7
+ The spectrum of frequencies of electromagnetic radiation emitted from a molecule as a result of some spectroscopy experiment.
+ Spectra
+
+
+ Spectrum
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.7
+ Spectral information for a molecule from a nuclear magnetic resonance experiment.
+ NMR spectra
+
+
+ NMR spectrum
+
+
+
+
+
+
+
+
+ 1.8
+ 1.21
+
+ A sketch of a small molecule made with some specialised drawing package.
+
+
+ Chemical structure sketches are used for presentational purposes but also as inputs to various analysis software.
+ Chemical structure sketch
+ true
+
+
+
+
+
+
+
+
+ 1.8
+ An informative report about a specific or conserved nucleic acid sequence pattern.
+
+
+ Nucleic acid signature
+
+
+
+
+
+
+
+
+ 1.8
+ A DNA sequence.
+ DNA sequences
+
+
+ DNA sequence
+
+
+
+
+
+
+
+
+ 1.8
+ An RNA sequence.
+ RNA sequences
+
+
+ RNA sequence
+
+
+
+
+
+
+
+
+ 1.8
+ Deprecated because this is bloat / confusing & better handled as an EDAM Format concept - "raw" sequences just imply a particular format (i.e. one with a vanilla string, possible in a particular alphabet, with no metadata).
+ 1.23
+
+ A raw RNA sequence.
+
+
+ RNA sequence (raw)
+ true
+
+
+
+
+
+
+
+
+ 1.8
+ Deprecated because this is bloat / confusing & better handled as an EDAM Format concept - "raw" sequences just imply a particular format (i.e. one with a vanilla string, possible in a particular alphabet, with no metadata).
+ 1.23
+
+ A raw DNA sequence.
+
+
+ DNA sequence (raw)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.8
+ Data on gene sequence variations resulting large-scale genotyping and DNA sequencing projects.
+ Gene sequence variations
+
+
+ Variations are stored along with a reference genome.
+ Sequence variations
+
+
+
+
+
+
+
+
+ 1.8
+ A list of publications such as scientic papers or books.
+
+
+ Bibliography
+
+
+
+
+
+
+
+
+ 1.8
+ A mapping of supplied textual terms or phrases to ontology concepts (URIs).
+
+
+ Ontology mapping
+
+
+
+
+
+
+
+
+ 1.9
+ Any data concerning a specific biological or biomedical image.
+ Image-associated data
+ Image-related data
+
+
+ This can include basic provenance and technical information about the image, scientific annotation and so on.
+ Image metadata
+
+
+
+
+
+
+
+
+ 1.9
+ A human-readable collection of information concerning a clinical trial.
+ Clinical trial information
+
+
+ Clinical trial report
+
+
+
+
+
+
+
+
+ 1.10
+ A report about a biosample.
+ Biosample report
+
+
+ Reference sample report
+
+
+
+
+
+
+
+
+ 1.10
+ Accession number of an entry from the Gene Expression Atlas.
+
+
+
+ Gene Expression Atlas Experiment ID
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.12
+ true
+ Identifier of an entry from a database of disease.
+
+
+
+ Disease identifier
+
+
+
+
+
+
+
+
+
+ 1.12
+ The name of some disease.
+
+
+
+ Disease name
+
+
+
+
+
+
+
+
+ 1.12
+ Some material that is used for educational (training) purposes.
+ OER
+ Open educational resource
+
+
+ Training material
+
+
+
+
+
+
+
+
+ 1.12
+ A training course available for use on the Web.
+ On-line course
+ MOOC
+ Massive open online course
+
+
+ Online course
+
+
+
+
+
+
+
+
+ 1.12
+ Any free or plain text, typically for human consumption and in English. Can instantiate also as a textual search query.
+ Free text
+ Plain text
+ Textual search query
+
+
+ Text
+
+
+
+
+
+
+
+
+
+ 1.14
+ Machine-readable biodiversity data.
+ Biodiversity information
+ OTU table
+
+
+ Biodiversity data
+
+
+
+
+
+
+
+
+ 1.14
+ A human-readable collection of information concerning biosafety data.
+ Biosafety information
+
+
+ Biosafety report
+
+
+
+
+
+
+
+
+ 1.14
+ A report about any kind of isolation of biological material.
+ Geographic location
+ Isolation source
+
+
+ Isolation report
+
+
+
+
+
+
+
+
+ 1.14
+ Information about the ability of an organism to cause disease in a corresponding host.
+ Pathogenicity
+
+
+ Pathogenicity report
+
+
+
+
+
+
+
+
+ 1.14
+ Information about the biosafety classification of an organism according to corresponding law.
+ Biosafety level
+
+
+ Biosafety classification
+
+
+
+
+
+
+
+
+ 1.14
+ A report about localisation of the isolaton of biological material e.g. country or coordinates.
+
+
+ Geographic location
+
+
+
+
+
+
+
+
+ 1.14
+ A report about any kind of isolation source of biological material e.g. blood, water, soil.
+
+
+ Isolation source
+
+
+
+
+
+
+
+
+ 1.14
+ Experimentally determined parameter of the physiology of an organism, e.g. substrate spectrum.
+
+
+ Physiology parameter
+
+
+
+
+
+
+
+
+ 1.14
+ Experimentally determined parameter of the morphology of an organism, e.g. size & shape.
+
+
+ Morphology parameter
+
+
+
+
+
+
+
+
+ 1.14
+ Experimental determined parameter for the cultivation of an organism.
+ Cultivation conditions
+ Carbon source
+ Culture media composition
+ Nitrogen source
+ Salinity
+ Temperature
+ pH value
+
+
+ Cultivation parameter
+
+
+
+
+
+
+
+
+ 1.15
+ Data concerning a sequencing experiment, that may be specified as an input to some tool.
+
+
+ Sequencing metadata name
+
+
+
+
+
+
+
+
+ 1.15
+ An identifier of a flow cell of a sequencing machine.
+
+
+ A flow cell is used to immobilise, amplify and sequence millions of molecules at once. In Illumina machines, a flowcell is composed of 8 "lanes" which allows 8 experiments in a single analysis.
+ Flow cell identifier
+
+
+
+
+
+
+
+
+ 1.15
+ An identifier of a lane within a flow cell of a sequencing machine, within which millions of sequences are immobilised, amplified and sequenced.
+
+
+ Lane identifier
+
+
+
+
+
+
+
+
+ 1.15
+ A number corresponding to the number of an analysis performed by a sequencing machine. For example, if it's the 13th analysis, the run is 13.
+
+
+ Run number
+
+
+
+
+
+
+
+
+ 1.15
+ Data concerning ecology; for example measurements and reports from the study of interactions among organisms and their environment.
+
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Ecological data
+
+
+
+
+
+
+
+
+ 1.15
+ The mean species diversity in sites or habitats at a local scale.
+ α-diversity
+
+
+ Alpha diversity data
+
+
+
+
+
+
+
+
+ 1.15
+ The ratio between regional and local species diversity.
+ True beta diversity
+ β-diversity
+
+
+ Beta diversity data
+
+
+
+
+
+
+
+
+ 1.15
+ The total species diversity in a landscape.
+ ɣ-diversity
+
+
+ Gamma diversity data
+
+
+
+
+
+
+
+
+
+ 1.15
+ A plot in which community data (e.g. species abundance data) is summarised. Similar species and samples are plotted close together, and dissimilar species and samples are plotted placed far apart.
+
+
+ Ordination plot
+
+
+
+
+
+
+
+
+ 1.16
+ A ranked list of categories (usually ontology concepts), each associated with a statistical metric of over-/under-representation within the studied data.
+ Enrichment report
+ Over-representation report
+ Functional enrichment report
+
+
+ Over-representation data
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.16
+ GO-term report
+ A ranked list of Gene Ontology concepts, each associated with a p-value, concerning or derived from the analysis of e.g. a set of genes or proteins.
+ GO-term enrichment report
+ Gene ontology concept over-representation report
+ Gene ontology enrichment report
+ Gene ontology term enrichment report
+
+
+ GO-term enrichment data
+
+
+
+
+
+
+
+
+ 1.16
+ Score for localization of one or more post-translational modifications in peptide sequence measured by mass spectrometry.
+ False localisation rate
+ PTM localisation
+ PTM score
+
+
+ Localisation score
+
+
+
+
+
+
+
+
+
+ 1.16
+ Identifier of a protein modification catalogued in the Unimod database.
+
+
+
+ Unimod ID
+
+
+
+
+
+
+
+
+ 1.16
+ Identifier for mass spectrometry proteomics data in the proteomexchange.org repository.
+
+
+
+ ProteomeXchange ID
+
+
+
+
+
+
+
+
+ 1.16
+ Groupings of expression profiles according to a clustering algorithm.
+ Clustered gene expression profiles
+
+
+ Clustered expression profiles
+
+
+
+
+
+
+
+
+
+ 1.16
+ An identifier of a concept from the BRENDA ontology.
+
+
+
+ BRENDA ontology concept ID
+
+
+
+
+
+
+
+
+
+ 1.16
+ A text (such as a scientific article), annotated with notes, data and metadata, such as recognised entities, concepts, and their relations.
+
+
+ Annotated text
+
+
+
+
+
+
+
+
+ 1.16
+ A structured query, in form of a script, that defines a database search task.
+
+
+ Query script
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.19
+ Structural 3D model (volume map) from electron microscopy.
+
+
+ 3D EM Map
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.19
+ Annotation on a structural 3D EM Map from electron microscopy. This might include one or several locations in the map of the known features of a particular macromolecule.
+
+
+ 3D EM Mask
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.19
+ Raw DDD movie acquisition from electron microscopy.
+
+
+ EM Movie
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.19
+ Raw acquisition from electron microscopy or average of an aligned DDD movie.
+
+
+ EM Micrograph
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.21
+ Data coming from molecular simulations, computer "experiments" on model molecules.
+
+
+ Typically formed by two separated but indivisible pieces of information: topology data (static) and trajectory data (dynamic).
+ Molecular simulation data
+
+
+
+
+
+
+
+
+
+ 1.21
+ Identifier of an entry from the RNA central database of annotated human miRNAs.
+
+
+
+ There are canonical and taxon-specific forms of RNAcentral ID. Canonical form e.g. urs_9or10digits identifies an RNA sequence (within the RNA central database) which may appear in multiple sequences. Taxon-specific form identifies a sequence in the specific taxon (e.g. urs_9or10digits_taxonID).
+ RNA central ID
+
+
+
+
+
+
+
+
+ 1.21
+ A human-readable systematic collection of patient (or population) health information in a digital format.
+ EHR
+ EMR
+ Electronic medical record
+
+
+ Electronic health record
+
+
+
+
+
+
+
+
+ 1.22
+ Data coming from molecular simulations, computer "experiments" on model molecules. Typically formed by two separated but indivisible pieces of information: topology data (static) and trajectory data (dynamic).
+
+
+ Simulation
+
+
+
+
+
+
+
+
+ 1.22
+ Dynamic information of a structure molecular system coming from a molecular simulation: XYZ 3D coordinates (sometimes with their associated velocities) for every atom along time.
+
+
+ Trajectory data
+
+
+
+
+
+
+
+
+ 1.22
+ Force field parameters: charges, masses, radii, bond lengths, bond dihedrals, etc. define the structural molecular system, and are essential for the proper description and simulation of a molecular system.
+
+
+ Forcefield parameters
+
+
+
+
+
+
+
+
+ 1.22
+ Static information of a structure molecular system that is needed for a molecular simulation: the list of atoms, their non-bonded parameters for Van der Waals and electrostatic interactions, and the complete connectivity in terms of bonds, angles and dihedrals.
+
+
+ Topology data
+
+
+
+
+
+
+
+
+ 1.22
+ Visualization of distribution of quantitative data, e.g. expression data, by histograms, violin plots and density plots.
+ Density plot
+
+
+ Histogram
+
+
+
+
+
+
+
+
+ 1.23
+ Report of the quality control review that was made of factors involved in a procedure.
+ QC metrics
+ QC report
+ Quality control metrics
+ Quality control report
+
+
+
+
+
+
+
+
+ 1.23
+ A table of unnormalized values representing summarised read counts per genomic region (e.g. gene, transcript, peak).
+ Read count matrix
+
+
+ Count matrix
+
+
+
+
+
+
+
+
+ 1.24
+ Alignment (superimposition) of DNA tertiary (3D) structures.
+ Structure alignment (DNA)
+
+
+ DNA structure alignment
+
+
+
+
+
+
+
+
+ 1.24
+ A score derived from the P-value to ensure correction for multiple tests. The Q-value provides an estimate of the positive False Discovery Rate (pFDR), i.e. the rate of false positives among all the cases reported positive: pFDR = FP / (FP + TP).
+ Adjusted P-value
+ FDR
+ Padj
+ pFDR
+
+
+ Q-values are widely used in high-throughput data analysis (e.g. detection of differentially expressed genes from transcriptome data).
+ Q-value
+
+
+
+
+
+
+
+
+
+
+ 1.24
+ A profile HMM is a variant of a Hidden Markov model that is derived specifically from a set of (aligned) biological sequences. Profile HMMs provide the basis for a position-specific scoring system, which can be used to align sequences and search databases for related sequences.
+
+
+ Profile HMM
+
+
+
+
+
+
+
+
+
+
+
+ 1.24
+
+ WP[0-9]+
+ Identifier of a pathway from the WikiPathways pathway database.
+ WikiPathways ID
+ WikiPathways pathway ID
+
+
+
+ Pathway ID (WikiPathways)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.24
+ A ranked list of pathways, each associated with z-score, p-value or similar, concerning or derived from the analysis of e.g. a set of genes or proteins.
+ Pathway analysis results
+ Pathway enrichment report
+ Pathway over-representation report
+ Pathway report
+ Pathway term enrichment report
+
+
+ Pathway overrepresentation data
+
+
+
+
+
+
+
+
+ 1.26
+
+
+ \d{4}-\d{4}-\d{4}-\d{3}(\d|X)
+ Identifier of a researcher registered with the ORCID database. Used to identify author IDs.
+
+
+
+
+ ORCID Identifier
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+
+ Chemical structure specified in Simplified Molecular Input Line Entry System (SMILES) line notation.
+
+
+ SMILES
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Chemical structure specified in IUPAC International Chemical Identifier (InChI) line notation.
+
+
+ InChI
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Chemical structure specified by Molecular Formula (MF), including a count of each element in a compound.
+
+
+ The general MF query format consists of a series of valid atomic symbols, with an optional number or range.
+ mf
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The InChIKey (hashed InChI) is a fixed length (25 character) condensed digital representation of an InChI chemical structure specification. It uniquely identifies a chemical compound.
+
+
+ An InChIKey identifier is not human- nor machine-readable but is more suitable for web searches than an InChI chemical structure specification.
+ InChIKey
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ SMILES ARbitrary Target Specification (SMARTS) format for chemical structure specification, which is a subset of the SMILES line notation.
+
+
+ smarts
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for a molecular sequence with possible unknown positions but without ambiguity or non-sequence characters.
+
+
+ unambiguous pure
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for a nucleotide sequence with possible ambiguity, unknown positions and non-sequence characters.
+
+
+ Non-sequence characters may be used for example for gaps.
+ nucleotide
+ http://onto.eva.mpg.de/ontologies/gfo-bio.owl#Nucleotide_sequence
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for a protein sequence with possible ambiguity, unknown positions and non-sequence characters.
+
+
+ Non-sequence characters may be used for gaps and translation stop.
+ protein
+ http://onto.eva.mpg.de/ontologies/gfo-bio.owl#Amino_acid_sequence
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for the consensus of two or more molecular sequences.
+
+
+ consensus
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for a nucleotide sequence with possible ambiguity and unknown positions but without non-sequence characters.
+
+
+ pure nucleotide
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for a nucleotide sequence (characters ACGTU only) with possible unknown positions but without ambiguity or non-sequence characters .
+
+
+ unambiguous pure nucleotide
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for a DNA sequence with possible ambiguity, unknown positions and non-sequence characters.
+
+
+ dna
+ http://onto.eva.mpg.de/ontologies/gfo-bio.owl#DNA_sequence
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for an RNA sequence with possible ambiguity, unknown positions and non-sequence characters.
+
+
+ rna
+ http://onto.eva.mpg.de/ontologies/gfo-bio.owl#RNA_sequence
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for a DNA sequence (characters ACGT only) with possible unknown positions but without ambiguity or non-sequence characters.
+
+
+ unambiguous pure dna
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for a DNA sequence with possible ambiguity and unknown positions but without non-sequence characters.
+
+
+ pure dna
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for an RNA sequence (characters ACGU only) with possible unknown positions but without ambiguity or non-sequence characters.
+
+
+ unambiguous pure rna sequence
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for an RNA sequence with possible ambiguity and unknown positions but without non-sequence characters.
+
+
+ pure rna
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for any protein sequence with possible unknown positions but without ambiguity or non-sequence characters.
+
+
+ unambiguous pure protein
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for any protein sequence with possible ambiguity and unknown positions but without non-sequence characters.
+
+
+ pure protein
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of an entry from UniGene.
+
+ A UniGene entry includes a set of transcript sequences assigned to the same transcription locus (gene or expressed pseudogene), with information on protein similarities, gene expression, cDNA clone reagents, and genomic location.
+ UniGene entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of an entry from the COG database of clusters of (related) protein sequences.
+
+ COG sequence cluster format
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format for sequence positions (feature location) as used in DDBJ/EMBL/GenBank database.
+ Feature location
+
+
+ EMBL feature location
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Report format for tandem repeats in a nucleotide sequence (format generated by the Sanger Centre quicktandem program).
+
+
+ quicktandem
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Report format for inverted repeats in a nucleotide sequence (format generated by the Sanger Centre inverted program).
+
+
+ Sanger inverted repeats
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Report format for tandem repeats in a sequence (an EMBOSS report format).
+
+
+ EMBOSS repeat
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of a report on exon-intron structure generated by EMBOSS est2genome.
+
+
+ est2genome format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Report format for restriction enzyme recognition sites used by EMBOSS restrict program.
+
+
+ restrict format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Report format for restriction enzyme recognition sites used by EMBOSS restover program.
+
+
+ restover format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Report format for restriction enzyme recognition sites used by REBASE database.
+
+
+ REBASE restriction sites
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of results of a sequence database search using FASTA.
+
+
+ This includes (typically) score data, alignment data and a histogram (of observed and expected distribution of E values.)
+ FASTA search results format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of results of a sequence database search using some variant of BLAST.
+
+
+ This includes score data, alignment data and summary table.
+ BLAST results
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of results of a sequence database search using some variant of MSPCrunch.
+
+
+ mspcrunch
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of results of a sequence database search using some variant of Smith Waterman.
+
+
+ Smith-Waterman format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of EMBASSY domain hits file (DHF) of hits (sequences) with domain classification information.
+
+
+ The hits are relatives to a SCOP or CATH family and are found from a search of a sequence database.
+ dhf
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of EMBASSY ligand hits file (LHF) of database hits (sequences) with ligand classification information.
+
+
+ The hits are putative ligand-binding sequences and are found from a search of a sequence database.
+ lhf
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Results format for searches of the InterPro database.
+
+
+ InterPro hits format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of results of a search of the InterPro database showing matches of query protein sequence(s) to InterPro entries.
+
+
+ The report includes a classification of regions in a query protein sequence which are assigned to a known InterPro protein family or group.
+ InterPro protein view report format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of results of a search of the InterPro database showing matches between protein sequence(s) and signatures for an InterPro entry.
+
+
+ The table presents matches between query proteins (rows) and signature methods (columns) for this entry. Alternatively the sequence(s) might be from from the InterPro entry itself. The match position in the protein sequence and match status (true positive, false positive etc) are indicated.
+ InterPro match table format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Dirichlet distribution HMMER format.
+
+
+ HMMER Dirichlet prior
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Dirichlet distribution MEME format.
+
+
+ MEME Dirichlet prior
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of a report from the HMMER package on the emission and transition counts of a hidden Markov model.
+
+
+ HMMER emission and transition
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of a regular expression pattern from the Prosite database.
+
+
+ prosite-pattern
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of an EMBOSS sequence pattern.
+
+
+ EMBOSS sequence pattern
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A motif in the format generated by the MEME program.
+
+
+ meme-motif
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Sequence profile (sequence classifier) format used in the PROSITE database.
+
+
+ prosite-profile
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A profile (sequence classifier) in the format used in the JASPAR database.
+
+
+ JASPAR format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of the model of random sequences used by MEME.
+
+
+ MEME background Markov model
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of a hidden Markov model representation used by the HMMER package.
+
+
+ HMMER format
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ FASTA-style format for multiple sequences aligned by HMMER package to an HMM.
+
+
+ HMMER-aln
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of multiple sequences aligned by DIALIGN package.
+
+
+ DIALIGN format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ EMBASSY 'domain alignment file' (DAF) format, containing a sequence alignment of protein domains belonging to the same SCOP or CATH family.
+
+
+ The format is clustal-like and includes annotation of domain family classification information.
+ daf
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format for alignment of molecular sequences to MEME profiles (position-dependent scoring matrices) as generated by the MAST tool from the MEME package.
+
+
+ Sequence-MEME profile alignment
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format used by the HMMER package for an alignment of a sequence against a hidden Markov model database.
+
+
+ HMMER profile alignment (sequences versus HMMs)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format used by the HMMER package for of an alignment of a hidden Markov model against a sequence database.
+
+
+ HMMER profile alignment (HMM versus sequences)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of PHYLIP phylogenetic distance matrix data.
+
+
+ Data Type must include the distance matrix, probably as pairs of sequence identifiers with a distance (integer or float).
+ Phylip distance matrix
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Dendrogram (tree file) format generated by ClustalW.
+
+
+ ClustalW dendrogram
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Raw data file format used by Phylip from which a phylogenetic tree is directly generated or plotted.
+
+
+ Phylip tree raw
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PHYLIP file format for continuous quantitative character data.
+
+
+ Phylip continuous quantitative characters
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of phylogenetic property data.
+
+ Phylogenetic property values format
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PHYLIP file format for phylogenetics character frequency data.
+
+
+ Phylip character frequencies format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of PHYLIP discrete states data.
+
+
+ Phylip discrete states format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of PHYLIP cliques data.
+
+
+ Phylip cliques format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Phylogenetic tree data format used by the PHYLIP program.
+
+
+ Phylip tree format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The format of an entry from the TreeBASE database of phylogenetic data.
+
+
+ TreeBASE format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The format of an entry from the TreeFam database of phylogenetic data.
+
+
+ TreeFam format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format for distances, such as Branch Score distance, between two or more phylogenetic trees as used by the Phylip package.
+
+
+ Phylip tree distance format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of an entry from the DSSP database (Dictionary of Secondary Structure in Proteins).
+
+
+ The DSSP database is built using the DSSP application which defines secondary structure, geometrical features and solvent exposure of proteins, given atomic coordinates in PDB format.
+ dssp
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Entry format of the HSSP database (Homology-derived Secondary Structure in Proteins).
+
+
+ hssp
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of RNA secondary structure in dot-bracket notation, originally generated by the Vienna RNA package/server.
+ Vienna RNA format
+ Vienna RNA secondary structure format
+
+
+ Dot-bracket format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of local RNA secondary structure components with free energy values, generated by the Vienna RNA package/server.
+
+
+ Vienna local RNA secondary structure format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of an entry (or part of an entry) from the PDB database.
+ PDB entry format
+
+
+ PDB database entry format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Entry format of PDB database in PDB format.
+ PDB format
+
+
+ PDB
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Entry format of PDB database in mmCIF format.
+
+
+ mmCIF
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Entry format of PDB database in PDBML (XML) format.
+
+
+ PDBML
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+ Format of a matrix of 3D-1D scores used by the EMBOSS Domainatrix applications.
+
+
+ Domainatrix 3D-1D scoring matrix format
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Amino acid index format used by the AAindex database.
+
+
+ aaindex
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of an entry from IntEnz (The Integrated Relational Enzyme Database).
+
+ IntEnz is the master copy of the Enzyme Nomenclature, the recommendations of the NC-IUBMB on the Nomenclature and Classification of Enzyme-Catalysed Reactions.
+ IntEnz enzyme report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of an entry from the BRENDA enzyme database.
+
+ BRENDA enzyme report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of an entry from the KEGG REACTION database of biochemical reactions.
+
+ KEGG REACTION enzyme report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of an entry from the KEGG ENZYME database.
+
+ KEGG ENZYME enzyme report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of an entry from the proto section of the REBASE enzyme database.
+
+ REBASE proto enzyme report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of an entry from the withrefm section of the REBASE enzyme database.
+
+ REBASE withrefm enzyme report format
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of output of the Pcons Model Quality Assessment Program (MQAP).
+
+
+ Pcons ranks protein models by assessing their quality based on the occurrence of recurring common three-dimensional structural patterns. Pcons returns a score reflecting the overall global quality and a score for each individual residue in the protein reflecting the local residue quality.
+ Pcons report format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of output of the ProQ protein model quality predictor.
+
+
+ ProQ is a neural network-based predictor that predicts the quality of a protein model based on the number of structural features.
+ ProQ report format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of SMART domain assignment data.
+
+ The SMART output file includes data on genetically mobile domains / analysis of domain architectures, including phyletic distributions, functional class, tertiary structures and functionally important residues.
+ SMART domain assignment report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format for the BIND database of protein interaction.
+
+ BIND entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format for the IntAct database of protein interaction.
+
+ IntAct entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format for the InterPro database of protein signatures (sequence classifiers) and classified sequences.
+
+ This includes signature metadata, sequence references and a reference to the signature itself. There is normally a header (entry accession numbers and name), abstract, taxonomy information, example proteins etc. Each entry also includes a match list which give a number of different views of the signature matches for the sequences in each InterPro entry.
+ InterPro entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format for the textual abstract of signatures in an InterPro entry and its protein matches.
+
+ References are included and a functional inference is made where possible.
+ InterPro entry abstract format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format for the Gene3D protein secondary database.
+
+ Gene3D entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format for the PIRSF protein secondary database.
+
+ PIRSF entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format for the PRINTS protein secondary database.
+
+ PRINTS entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format for the Panther library of protein families and subfamilies.
+
+ Panther Families and HMMs entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format for the Pfam protein secondary database.
+
+ Pfam entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format for the SMART protein secondary database.
+
+ SMART entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format for the Superfamily protein secondary database.
+
+ Superfamily entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format for the TIGRFam protein secondary database.
+
+ TIGRFam entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format for the ProDom protein domain classification database.
+
+ ProDom entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format for the FSSP database.
+
+ FSSP entry format
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A report format for the kinetics of enzyme-catalysed reaction(s) in a format generated by EMBOSS findkm. This includes Michaelis Menten plot, Hanes Woolf plot, Michaelis Menten constant (Km) and maximum velocity (Vmax).
+
+
+ findkm
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format of Ensembl genome database.
+
+ Ensembl gene report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format of DictyBase genome database.
+
+ DictyBase gene report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format of Candida Genome database.
+
+ CGD gene report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format of DragonDB genome database.
+
+ DragonDB gene report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format of EcoCyc genome database.
+
+ EcoCyc gene report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format of FlyBase genome database.
+
+ FlyBase gene report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format of Gramene genome database.
+
+ Gramene gene report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format of KEGG GENES genome database.
+
+ KEGG GENES gene report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format of the Maize genetics and genomics database (MaizeGDB).
+
+ MaizeGDB gene report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format of the Mouse Genome Database (MGD).
+
+ MGD gene report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format of the Rat Genome Database (RGD).
+
+ RGD gene report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format of the Saccharomyces Genome Database (SGD).
+
+ SGD gene report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format of the Sanger GeneDB genome database.
+
+ GeneDB gene report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format of The Arabidopsis Information Resource (TAIR) genome database.
+
+ TAIR gene report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format of the WormBase genomes database.
+
+ WormBase gene report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format of the Zebrafish Information Network (ZFIN) genome database.
+
+ ZFIN gene report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format of the TIGR genome database.
+
+ TIGR gene report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format for the dbSNP database.
+
+ dbSNP polymorphism report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of an entry from the OMIM database of genotypes and phenotypes.
+
+ OMIM entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of a record from the HGVbase database of genotypes and phenotypes.
+
+ HGVbase entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of a record from the HIVDB database of genotypes and phenotypes.
+
+ HIVDB entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of an entry from the KEGG DISEASE database.
+
+ KEGG DISEASE entry format
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Report format on PCR primers and hybridisation oligos as generated by Whitehead primer3 program.
+
+
+ Primer3 primer
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A format of raw sequence read data from an Applied Biosystems sequencing machine.
+
+
+ ABI
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of MIRA sequence trace information file.
+
+
+ mira
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+ caf
+
+ Common Assembly Format (CAF). A sequence assembly format including contigs, base-call qualities, and other metadata.
+
+
+ CAF
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+ Sequence assembly project file EXP format.
+ Affymetrix EXP format
+
+
+ EXP
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+ Staden Chromatogram Files format (SCF) of base-called sequence reads, qualities, and other metadata.
+
+
+ SCF
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+ PHD sequence trace format to store serialised chromatogram data (reads).
+
+
+ PHD
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of Affymetrix data file of raw image data.
+ Affymetrix image data file format
+
+
+ dat
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of Affymetrix data file of information about (raw) expression levels of the individual probes.
+ Affymetrix probe raw data format
+
+
+ cel
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of affymetrix gene cluster files (hc-genes.txt, hc-chips.txt) from hierarchical clustering.
+
+
+ affymetrix
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format for the ArrayExpress microarrays database.
+
+ ArrayExpress entry format
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Affymetrix data file format for information about experimental conditions and protocols.
+ Affymetrix experimental conditions data file format
+
+
+ affymetrix-exp
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+ chp
+ Format of Affymetrix data file of information about (normalised) expression levels of the individual probes.
+ Affymetrix probe normalised data format
+
+
+ CHP
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of an entry from the Electron Microscopy DataBase (EMDB).
+
+ EMDB entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The format of an entry from the KEGG PATHWAY database of pathway maps for molecular interactions and reaction networks.
+
+ KEGG PATHWAY entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The format of an entry from the MetaCyc metabolic pathways database.
+
+ MetaCyc entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The format of a report from the HumanCyc metabolic pathways database.
+
+ HumanCyc entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The format of an entry from the INOH signal transduction pathways database.
+
+ INOH entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The format of an entry from the PATIKA biological pathways database.
+
+ PATIKA entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The format of an entry from the reactome biological pathways database.
+
+ Reactome entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The format of an entry from the aMAZE biological pathways and molecular interactions database.
+
+ aMAZE entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The format of an entry from the CPDB database.
+
+ CPDB entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The format of an entry from the Panther Pathways database.
+
+ Panther Pathways entry format
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of Taverna workflows.
+
+
+ Taverna workflow format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of mathematical models from the BioModel database.
+
+ Models are annotated and linked to relevant data resources, such as publications, databases of compounds and pathways, controlled vocabularies, etc.
+ BioModel mathematical model format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The format of an entry from the KEGG LIGAND chemical database.
+
+ KEGG LIGAND entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The format of an entry from the KEGG COMPOUND database.
+
+ KEGG COMPOUND entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The format of an entry from the KEGG PLANT database.
+
+ KEGG PLANT entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The format of an entry from the KEGG GLYCAN database.
+
+ KEGG GLYCAN entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The format of an entry from PubChem.
+
+ PubChem entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The format of an entry from a database of chemical structures and property predictions.
+
+ ChemSpider entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The format of an entry from Chemical Entities of Biological Interest (ChEBI).
+
+ ChEBI includes an ontological classification defining relations between entities or classes of entities.
+ ChEBI entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The format of an entry from the MSDchem ligand dictionary.
+
+ MSDchem ligand dictionary entry format
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The format of an entry from the HET group dictionary (HET groups from PDB files).
+
+
+ HET group dictionary entry format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The format of an entry from the KEGG DRUG database.
+
+ KEGG DRUG entry format
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of bibliographic reference as used by the PubMed database.
+
+
+ PubMed citation
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format for abstracts of scientific articles from the Medline database.
+
+
+ Bibliographic reference information including citation information is included
+ Medline Display Format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ CiteXplore 'core' citation format including title, journal, authors and abstract.
+
+
+ CiteXplore-core
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ CiteXplore 'all' citation format includes all known details such as Mesh terms and cross-references.
+
+
+ CiteXplore-all
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Article format of the PubMed Central database.
+
+
+ pmc
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The format of iHOP (Information Hyperlinked over Proteins) text-mining result.
+
+
+ iHOP format
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ OSCAR format of annotated chemical text.
+
+
+ OSCAR (Open-Source Chemistry Analysis Routines) software performs chemistry-specific parsing of chemical documents. It attempts to identify chemical names, ontology concepts, and chemical data from a document.
+ OSCAR format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Format of an ATOM record (describing data for an individual atom) from a PDB file.
+
+ PDB atom record format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of CATH domain classification information for a polypeptide chain.
+
+ The report (for example http://www.cathdb.info/chain/1cukA) includes chain identifiers, domain identifiers and CATH codes for domains in a given protein chain.
+ CATH chain report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of CATH domain classification information for a protein PDB file.
+
+ The report (for example http://www.cathdb.info/pdb/1cuk) includes chain identifiers, domain identifiers and CATH codes for domains in a given PDB file.
+ CATH PDB report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry (gene) format of the NCBI database.
+
+ NCBI gene report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Moby:GI_Gene
+ Report format for biological functions associated with a gene name and its alternative names (synonyms, homonyms), as generated by the GeneIlluminator service.
+
+ This includes a gene name and abbreviation of the name which may be in a name space indicating the gene status and relevant organisation.
+ GeneIlluminator gene report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Moby:BacMapGeneCard
+ Format of a report on the DNA and protein sequences for a given gene label from a bacterial chromosome maps from the BacMap database.
+
+ BacMap gene card format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of a report on Escherichia coli genes, proteins and molecules from the CyberCell Database (CCDB).
+
+ ColiCard report format
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Map of a plasmid (circular DNA) in PlasMapper TextMap format.
+
+
+ PlasMapper TextMap
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Phylogenetic tree Newick (text) format.
+ nh
+
+
+ newick
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Phylogenetic tree TreeCon (text) format.
+
+
+ TreeCon format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Phylogenetic tree Nexus (text) format.
+
+
+ Nexus format
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ A defined way or layout of representing and structuring data in a computer file, blob, string, message, or elsewhere.
+ Data format
+ Data model
+ Exchange format
+ File format
+
+
+ The main focus in EDAM lies on formats as means of structuring data exchanged between different tools or resources. The serialisation, compression, or encoding of concrete data formats/models is not in scope of EDAM. Format 'is format of' Data.
+ Format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Data model
+ A defined data format has its implicit or explicit data model, and EDAM does not distinguish the two. Some data models, however, do not have any standard way of serialisation into an exchange format, and those are thus not considered formats in EDAM. (Remark: even broader - or closely related - term to 'Data model' would be an 'Information model'.)
+
+
+
+
+ File format
+ File format denotes only formats of a computer file, but the same formats apply also to data blobs or exchanged messages.
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Data format for an individual atom.
+
+ Atomic data format
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Data format for a molecular sequence record.
+
+
+ Sequence record format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Data format for molecular sequence feature information.
+
+
+ Sequence feature annotation format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Data format for molecular sequence alignment information.
+
+
+ Alignment format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ ACEDB sequence format.
+
+
+ acedb
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Clustalw output format.
+
+ clustal sequence format
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Codata entry format.
+
+
+ codata
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Fasta format variant with database name before ID.
+
+
+ dbid
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ EMBL entry format.
+ EMBL
+ EMBL sequence format
+
+
+ EMBL format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Staden experiment file format.
+
+
+ Staden experiment format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ FASTA format including NCBI-style IDs.
+ FASTA format
+ FASTA sequence format
+
+
+ FASTA
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ fastq
+ fq
+ FASTQ short read format ignoring quality scores.
+ FASTAQ
+ fq
+
+
+ FASTQ
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ FASTQ Illumina 1.3 short read format.
+
+
+ FASTQ-illumina
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ FASTQ short read format with phred quality.
+
+
+ FASTQ-sanger
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ FASTQ Solexa/Illumina 1.0 short read format.
+
+
+ FASTQ-solexa
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Fitch program format.
+
+
+ fitch program
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ GCG sequence file format.
+ GCG SSF
+
+
+ GCG SSF (single sequence file) file format.
+ GCG
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Genbank entry format.
+ GenBank
+
+
+ GenBank format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Genpept protein entry format.
+
+
+ Currently identical to refseqp format
+ genpept
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ GFF feature file format with sequence in the header.
+
+
+ GFF2-seq
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ GFF3 feature file format with sequence.
+
+
+ GFF3-seq
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ FASTA sequence format including NCBI-style GIs.
+
+
+ giFASTA format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Hennig86 output sequence format.
+
+
+ hennig86
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Intelligenetics sequence format.
+
+
+ ig
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Intelligenetics sequence format (strict version).
+
+
+ igstrict
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Jackknifer interleaved and non-interleaved sequence format.
+
+
+ jackknifer
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Mase program sequence format.
+
+
+ mase format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Mega interleaved and non-interleaved sequence format.
+
+
+ mega-seq
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ GCG MSF (multiple sequence file) file format.
+
+
+ GCG MSF
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ pir
+ NBRF/PIR entry sequence format.
+ nbrf
+ pir
+
+
+ nbrf/pir
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Nexus/paup interleaved sequence format.
+
+
+ nexus-seq
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PDB sequence format (ATOM lines).
+
+
+ pdb format in EMBOSS.
+ pdbatom
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PDB nucleotide sequence format (ATOM lines).
+
+
+ pdbnuc format in EMBOSS.
+ pdbatomnuc
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PDB nucleotide sequence format (SEQRES lines).
+
+
+ pdbnucseq format in EMBOSS.
+ pdbseqresnuc
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PDB sequence format (SEQRES lines).
+
+
+ pdbseq format in EMBOSS.
+ pdbseqres
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Plain old FASTA sequence format (unspecified format for IDs).
+
+
+ Pearson format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Phylip interleaved sequence format.
+
+ phylip sequence format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ PHYLIP non-interleaved sequence format.
+
+ phylipnon sequence format
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Raw sequence format with no non-sequence characters.
+
+
+ raw
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Refseq protein entry sequence format.
+
+
+ Currently identical to genpept format
+ refseqp
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Selex sequence format.
+
+ selex sequence format
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+
+
+ Staden suite sequence format.
+
+
+ Staden format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+ Stockholm multiple sequence alignment format (used by Pfam and Rfam).
+
+
+ Stockholm format
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ DNA strider output sequence format.
+
+
+ strider format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ UniProtKB entry sequence format.
+ SwissProt format
+ UniProt format
+
+
+ UniProtKB format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ txt
+ Plain text sequence format (essentially unformatted).
+
+
+ plain text format (unformatted)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Treecon output sequence format.
+
+ treecon sequence format
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ NCBI ASN.1-based sequence format.
+
+
+ ASN.1 sequence format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ DAS sequence (XML) format (any type).
+ das sequence format
+
+
+ DAS format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ DAS sequence (XML) format (nucleotide-only).
+
+
+ The use of this format is deprecated.
+ dasdna
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ EMBOSS debugging trace sequence format of full internal data content.
+
+
+ debug-seq
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Jackknifer output sequence non-interleaved format.
+
+
+ jackknifernon
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Mega non-interleaved output sequence format.
+
+ meganon sequence format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ NCBI FASTA sequence format with NCBI-style IDs.
+
+
+ There are several variants of this.
+ NCBI format
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Nexus/paup non-interleaved sequence format.
+
+
+ nexusnon
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+ General Feature Format (GFF) of sequence features.
+
+
+ GFF2
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+
+ Generic Feature Format version 3 (GFF3) of sequence features.
+
+
+ GFF3
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.7
+
+ PIR feature format.
+
+
+ pir
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Swiss-Prot feature format.
+
+ swiss feature
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ DAS GFF (XML) feature format.
+ DASGFF feature
+ das feature
+
+
+ DASGFF
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ EMBOSS debugging trace feature format of full internal data content.
+
+
+ debug-feat
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ EMBL feature format.
+
+ EMBL feature
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Genbank feature format.
+
+ GenBank feature
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ ClustalW format for (aligned) sequences.
+ clustal
+
+
+ ClustalW format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ EMBOSS alignment format for debugging trace of full internal data content.
+
+
+ debug
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Fasta format for (aligned) sequences.
+
+
+ FASTA-aln
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Pearson MARKX0 alignment format.
+
+
+ markx0
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Pearson MARKX1 alignment format.
+
+
+ markx1
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Pearson MARKX10 alignment format.
+
+
+ markx10
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Pearson MARKX2 alignment format.
+
+
+ markx2
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Pearson MARKX3 alignment format.
+
+
+ markx3
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alignment format for start and end of matches between sequence pairs.
+
+
+ match
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Mega format for (typically aligned) sequences.
+
+
+ mega
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Mega non-interleaved format for (typically aligned) sequences.
+
+
+ meganon
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ MSF format for (aligned) sequences.
+
+ msf alignment format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Nexus/paup format for (aligned) sequences.
+
+ nexus alignment format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Nexus/paup non-interleaved format for (aligned) sequences.
+
+ nexusnon alignment format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ EMBOSS simple sequence pairwise alignment format.
+
+
+ pair
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ http://www.bioperl.org/wiki/PHYLIP_multiple_alignment_format
+ Phylip format for (aligned) sequences.
+ PHYLIP
+ PHYLIP interleaved format
+ ph
+ phy
+
+
+ PHYLIP format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ http://www.bioperl.org/wiki/PHYLIP_multiple_alignment_format
+ Phylip non-interleaved format for (aligned) sequences.
+ PHYLIP sequential format
+ phylipnon
+
+
+ PHYLIP sequential
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alignment format for score values for pairs of sequences.
+
+
+ scores format
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ SELEX format for (aligned) sequences.
+
+
+ selex
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ EMBOSS simple multiple alignment format.
+
+
+ EMBOSS simple format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Simple multiple sequence (alignment) format for SRS.
+
+
+ srs format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Simple sequence pair (alignment) format for SRS.
+
+
+ srspair
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ T-Coffee program alignment format.
+
+
+ T-Coffee format
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Treecon format for (aligned) sequences.
+
+
+ TreeCon-seq
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Data format for a phylogenetic tree.
+
+
+ Phylogenetic tree format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Data format for a biological pathway or network.
+
+
+ Biological pathway or network format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Data format for a sequence-profile alignment.
+
+
+ Sequence-profile alignment format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Data format for a sequence-HMM profile alignment.
+
+ Sequence-profile alignment (HMM) format
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data format for an amino acid index.
+
+
+ Amino acid index format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Data format for a full-text scientific article.
+ Literature format
+
+
+ Article format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Data format of a report from text mining.
+
+
+ Text mining report format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Data format for reports on enzyme kinetics.
+
+
+ Enzyme kinetics report format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format of a report on a chemical compound.
+ Chemical compound annotation format
+ Chemical structure format
+ Small molecule report format
+ Small molecule structure format
+
+
+ Chemical data format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format of a report on a particular locus, gene, gene system or groups of genes.
+ Gene features format
+
+
+ Gene annotation format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format of a workflow.
+ Programming language
+ Script format
+
+
+ Workflow format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Data format for a molecular tertiary structure.
+
+
+ Tertiary structure format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.2
+
+
+ Data format for a biological model.
+
+ Biological model format
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Text format of a chemical formula.
+
+
+ Chemical formula format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format of raw (unplotted) phylogenetic data.
+
+
+ Phylogenetic character data format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of phylogenetic continuous quantitative character data.
+
+
+ Phylogenetic continuous quantitative character format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of phylogenetic discrete states data.
+
+
+ Phylogenetic discrete states format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of phylogenetic cliques data.
+
+
+ Phylogenetic tree report (cliques) format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of phylogenetic invariants data.
+
+
+ Phylogenetic tree report (invariants) format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Annotation format for electron microscopy models.
+
+ Electron microscopy model format
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format for phylogenetic tree distance data.
+
+
+ Phylogenetic tree report (tree distances) format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.0
+
+
+ Format for sequence polymorphism data.
+
+ Polymorphism report format
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format for reports on a protein family.
+
+
+ Protein family report format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format for molecular interaction data.
+ Molecular interaction format
+
+
+ Protein interaction format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format for sequence assembly data.
+
+
+ Sequence assembly format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format for information about a microarray experimental per se (not the data generated from that experiment).
+
+
+ Microarray experiment data format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format for sequence trace data (i.e. including base call information).
+
+
+ Sequence trace format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format of a file of gene expression data, e.g. a gene expression matrix or profile.
+ Gene expression data format
+
+
+ Gene expression report format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of a report on genotype / phenotype information.
+
+ Genotype and phenotype annotation format
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format of a map of (typically one) molecular sequence annotated with features.
+
+
+ Map format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format of a report on PCR primers or hybridisation oligos in a nucleic acid sequence.
+
+
+ Nucleic acid features (primers) format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format of a report of general information about a specific protein.
+
+
+ Protein report format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of a report of general information about a specific enzyme.
+
+ Protein report (enzyme) format
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of a matrix of 3D-1D scores (amino acid environment probabilities).
+
+
+ 3D-1D scoring matrix format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format of a report on the quality of a protein three-dimensional model.
+
+
+ Protein structure report (quality evaluation) format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format of a report on sequence hits and associated data from searching a sequence database.
+
+
+ Database hits (sequence) format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format of a matrix of genetic distances between molecular sequences.
+
+
+ Sequence distance matrix format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format of a sequence motif.
+
+
+ Sequence motif format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format of a sequence profile.
+
+
+ Sequence profile format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of a hidden Markov model.
+
+
+ Hidden Markov model format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Data format of a dirichlet distribution.
+
+
+ Dirichlet distribution format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Data format for the emission and transition counts of a hidden Markov model.
+
+
+ HMM emission and transition counts format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format for secondary structure (predicted or real) of an RNA molecule.
+
+
+ RNA secondary structure format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format for secondary structure (predicted or real) of a protein molecule.
+
+
+ Protein secondary structure format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format used to specify range(s) of sequence positions.
+
+
+ Sequence range format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for molecular sequence with possible unknown positions but without non-sequence characters.
+
+
+ pure
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for a molecular sequence with possible unknown positions but possibly with non-sequence characters.
+
+
+ unpure
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for a molecular sequence with possible unknown positions but without ambiguity characters.
+
+
+ unambiguous sequence
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for a molecular sequence with possible unknown positions and possible ambiguity characters.
+
+
+ ambiguous
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format used for map of repeats in molecular (typically nucleotide) sequences.
+
+
+ Sequence features (repeats) format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format used for report on restriction enzyme recognition sites in nucleotide sequences.
+
+
+ Nucleic acid features (restriction sites) format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.10
+
+ Format used for report on coding regions in nucleotide sequences.
+
+
+ Gene features (coding region) format
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format used for clusters of molecular sequences.
+
+
+ Sequence cluster format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format used for clusters of protein sequences.
+
+
+ Sequence cluster format (protein)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format used for clusters of nucleotide sequences.
+
+
+ Sequence cluster format (nucleic acid)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Format used for clusters of genes.
+
+ Gene cluster format
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A text format resembling EMBL entry format.
+
+
+ This concept may be used for the many non-standard EMBL-like text formats.
+ EMBL-like (text)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A text format resembling FASTQ short read format.
+
+
+ This concept may be used for non-standard FASTQ short read-like formats.
+ FASTQ-like format (text)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ XML format for EMBL entries.
+
+ https://fairsharing.org/bsg-s001452/
+
+
+ true
+ EMBLXML
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Specific XML format for EMBL entries (only uses certain sections).
+
+ https://fairsharing.org/bsg-s001452/
+
+
+ true
+ cdsxml
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ INSDSeq provides the elements of a sequence as presented in the GenBank/EMBL/DDBJ-style flatfile formats, with a small amount of additional structure.
+
+
+ INSD XML
+ INSDC XML
+
+ INSDSeq
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Geneseq sequence format.
+
+
+ geneseq
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A text sequence format resembling uniprotkb entry format.
+
+
+ UniProt-like (text)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ UniProt entry sequence format.
+
+
+ UniProt format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+
+ ipi sequence format.
+
+ ipi
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Abstract format used by MedLine database.
+
+
+ medline
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format used for ontologies.
+
+
+ Ontology format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A serialisation format conforming to the Open Biomedical Ontologies (OBO) model.
+
+
+ OBO format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A serialisation format conforming to the Web Ontology Language (OWL) model.
+
+
+ OWL format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A text format resembling FASTA format.
+
+
+ This concept may also be used for the many non-standard FASTA-like formats.
+ FASTA-like (text)
+ http://filext.com/file-extension/FASTA
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Data format for a molecular sequence record, typically corresponding to a full entry from a molecular sequence database.
+
+
+ Sequence record full format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Data format for a molecular sequence record 'lite', typically molecular sequence and minimal metadata, such as an identifier of the sequence and/or a comment.
+
+
+ Sequence record lite format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An XML format for EMBL entries.
+
+
+ This is a placeholder for other more specific concepts. It should not normally be used for annotation.
+ EMBL format (XML)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A text format resembling GenBank entry (plain text) format.
+
+
+ This concept may be used for the non-standard GenBank-like text formats.
+ GenBank-like format (text)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Text format for a sequence feature table.
+
+
+ Sequence feature table format (text)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.0
+
+
+ Format of a report on organism strain data / cell line.
+
+ Strain data format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format for a report of strain data as used for CIP database entries.
+
+ CIP strain data format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ PHYLIP file format for phylogenetic property data.
+
+ phylip property values
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format (HTML) for the STRING database of protein interaction.
+
+ STRING entry format (HTML)
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Entry format (XML) for the STRING database of protein interaction.
+
+
+ STRING entry format (XML)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ GFF feature format (of indeterminate version).
+
+
+ GFF
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+
+ Gene Transfer Format (GTF), a restricted version of GFF.
+
+
+ GTF
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ FASTA format wrapped in HTML elements.
+
+
+ FASTA-HTML
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ EMBL entry format wrapped in HTML elements.
+
+
+ EMBL-HTML
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of an entry from the BioCyc enzyme database.
+
+ BioCyc enzyme report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of an entry from the Enzyme nomenclature database (ENZYME).
+
+ ENZYME enzyme report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of a report on a gene from the PseudoCAP database.
+
+ PseudoCAP gene report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of a report on a gene from the GeneCards database.
+
+ GeneCards gene report format
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Textual format.
+ Plain text format
+ txt
+
+
+ Data in text format can be compressed into binary format, or can be a value of an XML element or attribute. Markup formats are not considered textual (or more precisely, not plain-textual).
+ Textual format
+ http://filext.com/file-extension/TXT
+ http://www.iana.org/assignments/media-types/media-types.xhtml#text
+ http://www.iana.org/assignments/media-types/text/plain
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ HTML format.
+ Hypertext Markup Language
+
+
+ HTML
+ http://filext.com/file-extension/HTML
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ xml
+
+
+
+ eXtensible Markup Language (XML) format.
+ eXtensible Markup Language
+
+
+ Data in XML format can be serialised into text, or binary format.
+ XML
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Binary format.
+
+
+ Only specific native binary formats are listed under 'Binary format' in EDAM. Generic binary formats - such as any data being zipped, or any XML data being serialised into the Efficient XML Interchange (EXI) format - are not modelled in EDAM. Refer to http://wsio.org/compression_004.
+ Binary format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Typical textual representation of a URI.
+
+ URI format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The format of an entry from the NCI-Nature pathways database.
+
+ NCI-Nature pathway entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ A placeholder concept for visual navigation by dividing data formats by the content of the data that is represented.
+ Format (typed)
+
+
+ This concept exists only to assist EDAM maintenance and navigation in graphical browsers. It does not add semantic information. The concept branch under 'Format (typed)' provides an alternative organisation of the concepts nested under the other top-level branches ('Binary', 'HTML', 'RDF', 'Text' and 'XML'. All concepts under here are already included under those branches.
+ Format (by type of data)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+
+
+
+
+ Any ontology allowed, none mandatory. Preferably with URIs but URIs are not mandatory. Non-ontology terms are also allowed as the last resort in case of a lack of suitable ontology.
+
+ 'BioXSD' belongs to the 'BioXSD|GTrack' ecosystem of generic formats. 'BioXSD in XML' is the XML format based on the common, unified 'BioXSD data model', a.k.a. 'BioXSD|BioJSON|BioYAML'.
+
+
+ BioXSD-schema-based XML format of sequence-based data and some other common data - sequence records, alignments, feature records, references to resources, and more - optimised for integrative bioinformatics, Web services, and object-oriented programming.
+ BioJSON
+ BioXSD
+ BioXSD XML
+ BioXSD XML format
+ BioXSD data model
+ BioXSD format
+ BioXSD in XML
+ BioXSD in XML format
+ BioXSD+XML
+ BioXSD/GTrack
+ BioXSD|GTrack
+ BioYAML
+
+
+ BioXSD (XML)
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A serialisation format conforming to the Resource Description Framework (RDF) model.
+ Resource Description Framework format
+ RDF
+ Resource Description Framework
+
+
+ RDF format
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Genbank entry format wrapped in HTML elements.
+
+
+ GenBank-HTML
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of a report on protein features (domain composition).
+
+ Protein features (domains) format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A format resembling EMBL entry (plain text) format.
+
+
+ This concept may be used for the many non-standard EMBL-like formats.
+ EMBL-like format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A format resembling FASTQ short read format.
+
+
+ This concept may be used for non-standard FASTQ short read-like formats.
+ FASTQ-like format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A format resembling FASTA format.
+
+
+ This concept may be used for the many non-standard FASTA-like formats.
+ FASTA-like
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A sequence format resembling uniprotkb entry format.
+
+
+ uniprotkb-like format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format for a sequence feature table.
+
+
+ Sequence feature table format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ OBO ontology text format.
+
+
+ OBO
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ OBO ontology XML format.
+
+
+ OBO-XML
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data format for a molecular sequence record (text).
+
+
+ Sequence record format (text)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data format for a molecular sequence record (XML).
+
+
+ Sequence record format (XML)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ XML format for a sequence feature table.
+
+
+ Sequence feature table format (XML)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Text format for molecular sequence alignment information.
+
+
+ Alignment format (text)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ XML format for molecular sequence alignment information.
+
+
+ Alignment format (XML)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Text format for a phylogenetic tree.
+
+
+ Phylogenetic tree format (text)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ XML format for a phylogenetic tree.
+
+
+ Phylogenetic tree format (XML)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An XML format resembling EMBL entry format.
+
+
+ This concept may be used for the any non-standard EMBL-like XML formats.
+ EMBL-like (XML)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A format resembling GenBank entry (plain text) format.
+
+
+ This concept may be used for the non-standard GenBank-like formats.
+ GenBank-like format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format for the STRING database of protein interaction.
+
+ STRING entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Text format for sequence assembly data.
+
+
+ Sequence assembly format (text)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Text format (representation) of amino acid residues.
+
+ Amino acid identifier format
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for a molecular sequence without any unknown positions or ambiguity characters.
+
+
+ completely unambiguous
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for a molecular sequence without unknown positions, ambiguity or non-sequence characters.
+
+
+ completely unambiguous pure
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for a nucleotide sequence (characters ACGTU only) without unknown positions, ambiguity or non-sequence characters .
+
+
+ completely unambiguous pure nucleotide
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for a DNA sequence (characters ACGT only) without unknown positions, ambiguity or non-sequence characters.
+
+
+ completely unambiguous pure dna
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for an RNA sequence (characters ACGU only) without unknown positions, ambiguity or non-sequence characters.
+
+
+ completely unambiguous pure rna sequence
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format of a raw molecular sequence (i.e. the alphabet used).
+
+
+ Raw sequence format
+ http://www.onto-med.de/ontologies/gfo.owl#Symbol_sequence
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+ BAM format, the binary, BGZF-formatted compressed version of SAM format for alignment of nucleotide sequences (e.g. sequencing reads) to (a) reference sequence(s). May contain base-call and alignment qualities and other data.
+
+
+ BAM
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+ Sequence Alignment/Map (SAM) format for alignment of nucleotide sequences (e.g. sequencing reads) to (a) reference sequence(s). May contain base-call and alignment qualities and other data.
+
+
+ The format supports short and long reads (up to 128Mbp) produced by different sequencing platforms and is used to hold mapped data within the GATK and across the Broad Institute, the Sanger Centre, and throughout the 1000 Genomes project.
+ SAM
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+ Systems Biology Markup Language (SBML), the standard XML format for models of biological processes such as for example metabolism, cell signaling, and gene regulation.
+
+
+ SBML
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for any protein sequence without unknown positions, ambiguity or non-sequence characters.
+
+
+ completely unambiguous pure protein
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format of a bibliographic reference.
+
+
+ Bibliographic reference format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of a sequence annotation track.
+
+
+ Sequence annotation track format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data format for molecular sequence alignment information that can hold sequence alignment(s) of only 2 sequences.
+
+
+ Alignment format (pair only)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format of sequence variation annotation.
+
+
+ Sequence variation annotation format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Some variant of Pearson MARKX alignment format.
+
+
+ markx0 variant
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Some variant of Mega format for (typically aligned) sequences.
+
+
+ mega variant
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Some variant of Phylip format for (aligned) sequences.
+
+
+ Phylip format variant
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ AB1 binary format of raw DNA sequence reads (output of Applied Biosystems' sequencing analysis software). Contains an electropherogram and the DNA base sequence.
+
+
+ AB1 uses the generic binary Applied Biosystems, Inc. Format (ABIF).
+ AB1
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+ ACE sequence assembly format including contigs, base-call qualities, and other metadata (version Aug 1998 and onwards).
+
+
+ ACE
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+ Browser Extensible Data (BED) format of sequence annotation track, typically to be displayed in a genome browser.
+
+
+ BED detail format includes 2 additional columns (http://genome.ucsc.edu/FAQ/FAQformat#format1.7) and BED 15 includes 3 additional columns for experiment scores (http://genomewiki.ucsc.edu/index.php/Microarray_track).
+ BED
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+ bigBed format for large sequence annotation tracks, similar to textual BED format.
+
+
+ bigBed
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+ wig
+
+ Wiggle format (WIG) of a sequence annotation track that consists of a value for each sequence position. Typically to be displayed in a genome browser.
+
+
+ WIG
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+ bigWig format for large sequence annotation tracks that consist of a value for each sequence position. Similar to textual WIG format.
+
+
+ bigWig
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+ PSL format of alignments, typically generated by BLAT or psLayout. Can be displayed in a genome browser like a sequence annotation track.
+
+
+ PSL
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+ Multiple Alignment Format (MAF) supporting alignments of whole genomes with rearrangements, directions, multiple pieces to the alignment, and so forth.
+
+
+ Typically generated by Multiz and TBA aligners; can be displayed in a genome browser like a sequence annotation track. This should not be confused with MIRA Assembly Format or Mutation Annotation Format.
+ MAF
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+
+ 2bit binary format of nucleotide sequences using 2 bits per nucleotide. In addition encodes unknown nucleotides and lower-case 'masking'.
+
+
+ 2bit
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+ .nib (nibble) binary format of a nucleotide sequence using 4 bits per nucleotide (including unknown) and its lower-case 'masking'.
+
+
+ .nib
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+ gp
+
+ genePred table format for gene prediction tracks.
+
+
+ genePred format has 3 main variations (http://genome.ucsc.edu/FAQ/FAQformat#format9 http://www.broadinstitute.org/software/igv/genePred). They reflect UCSC Browser DB tables.
+ genePred
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+ Personal Genome SNP (pgSnp) format for sequence variation tracks (indels and polymorphisms), supported by the UCSC Genome Browser.
+
+
+ pgSnp
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+ axt format of alignments, typically produced from BLASTZ.
+
+
+ axt
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+ lav
+
+ LAV format of alignments generated by BLASTZ and LASTZ.
+
+
+ LAV
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+ Pileup format of alignment of sequences (e.g. sequencing reads) to (a) reference sequence(s). Contains aligned bases per base of the reference sequence(s).
+
+
+ Pileup
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+ Variant Call Format (VCF) is tabular format for storing genomic sequence variations.
+ 1000 Genomes Project has its own specification for encoding structural variations in VCF (https://www.internationalgenome.org/wiki/Analysis/Variant%20Call%20Format/VCF%20(Variant%20Call%20Format)%20version%204.0/encoding-structural-variants). This is based on VCF version 4.0 and not directly compatible with VCF version 4.3.
+ vcf
+ vcf.gz
+
+
+ VCF
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+ Sequence Read Format (SRF) of sequence trace data. Supports submission to the NCBI Short Read Archive.
+
+
+ SRF
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+ ZTR format for storing chromatogram data from DNA sequencing instruments.
+
+
+ ZTR
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+ Genome Variation Format (GVF). A GFF3-compatible format with defined header and attribute tags for sequence variation.
+
+
+ GVF
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+ bcf
+ bcf.gz
+ BCF is the binary version of Variant Call Format (VCF) for sequence variation (indels, polymorphisms, structural variation).
+
+
+ BCF
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta13
+ true
+ Format of a matrix (array) of numerical values.
+
+
+ Matrix format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta13
+ true
+ Format of data concerning the classification of the sequences and/or structures of protein structural domain(s).
+
+
+ Protein domain classification format
+
+
+
+
+
+
+
+
+ beta13
+ Format of raw SCOP domain classification data files.
+
+
+ These are the parsable data files provided by SCOP.
+ Raw SCOP domain classification format
+
+
+
+
+
+
+
+
+ beta13
+ Format of raw CATH domain classification data files.
+
+
+ These are the parsable data files provided by CATH.
+ Raw CATH domain classification format
+
+
+
+
+
+
+
+
+ beta13
+ Format of summary of domain classification information for a CATH domain.
+
+
+ The report (for example http://www.cathdb.info/domain/1cukA01) includes CATH codes for levels in the hierarchy for the domain, level descriptions and relevant data and links.
+ CATH domain report format
+
+
+
+
+
+
+
+
+
+ 1.0
+
+
+ Systems Biology Result Markup Language (SBRML), the standard XML format for simulated or calculated results (e.g. trajectories) of systems biology models.
+
+
+ SBRML
+
+
+
+
+
+
+
+
+ 1.0
+
+
+ BioPAX is an exchange format for pathway data, with its data model defined in OWL.
+
+
+ BioPAX
+
+
+
+
+
+
+
+
+
+
+ 1.0
+
+
+ EBI Application Result XML is a format returned by sequence similarity search Web services at EBI.
+
+
+ EBI Application Result XML
+
+
+
+
+
+
+
+
+
+ 1.0
+
+
+ XML Molecular Interaction Format (MIF), standardised by HUPO PSI MI.
+ MIF
+
+
+ PSI MI XML (MIF)
+
+
+
+
+
+
+
+
+
+ 1.0
+
+
+ phyloXML is a standardised XML format for phylogenetic trees, networks, and associated data.
+
+
+ phyloXML
+
+
+
+
+
+
+
+
+
+ 1.0
+
+
+ NeXML is a standardised XML format for rich phyloinformatic data.
+
+
+ NeXML
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.0
+
+
+ MAGE-ML XML format for microarray expression data, standardised by MGED (now FGED).
+
+
+ MAGE-ML
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.0
+
+
+ MAGE-TAB textual format for microarray expression data, standardised by MGED (now FGED).
+
+
+ MAGE-TAB
+
+
+
+
+
+
+
+
+
+ 1.0
+
+
+ GCDML XML format for genome and metagenome metadata according to MIGS/MIMS/MIMARKS information standards, standardised by the Genomic Standards Consortium (GSC).
+
+
+ GCDML
+
+
+
+
+
+
+
+
+
+
+
+ 1.0
+
+
+
+
+
+
+ 'GTrack' belongs to the 'BioXSD|GTrack' ecosystem of generic formats, and particular to its subset, the 'GTrack ecosystem' (GTrack, GSuite, BTrack). 'GTrack' is the tabular format for representing features of sequences and genomes.
+
+
+
+ GTrack is a generic and optimised tabular format for genome or sequence feature tracks. GTrack unifies the power of other track formats (e.g. GFF3, BED, WIG), and while optimised in size, adds more flexibility, customisation, and automation ("machine understandability").
+ BioXSD/GTrack GTrack
+ BioXSD|GTrack GTrack
+ GTrack ecosystem of formats
+ GTrack format
+ GTrack|BTrack|GSuite GTrack
+ GTrack|GSuite|BTrack GTrack
+
+
+ GTrack
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.0
+ true
+ Data format for a report of information derived from a biological pathway or network.
+
+
+ Biological pathway or network report format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.0
+ true
+ Data format for annotation on a laboratory experiment.
+
+
+ Experiment annotation format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.2
+
+
+ Cytoband format for chromosome cytobands.
+
+
+ Reflects a UCSC Browser DB table.
+ Cytoband format
+
+
+
+
+
+
+
+
+
+
+ 1.2
+
+
+ CopasiML, the native format of COPASI.
+
+
+ CopasiML
+
+
+
+
+
+
+
+
+
+ 1.2
+
+
+
+
+ CellML, the format for mathematical models of biological and other networks.
+
+
+ CellML
+
+
+
+
+
+
+
+
+
+ 1.2
+
+
+
+
+
+ Tabular Molecular Interaction format (MITAB), standardised by HUPO PSI MI.
+
+
+ PSI MI TAB (MITAB)
+
+
+
+
+
+
+
+
+ 1.2
+
+
+ Protein affinity format (PSI-PAR), standardised by HUPO PSI MI. It is compatible with PSI MI XML (MIF) and uses the same XML Schema.
+
+
+ PSI-PAR
+
+
+
+
+
+
+
+
+
+ 1.2
+
+
+ mzML format for raw spectrometer output data, standardised by HUPO PSI MSS.
+
+
+ mzML is the successor and unifier of the mzData format developed by PSI and mzXML developed at the Seattle Proteome Center.
+ mzML
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.2
+ true
+ Format for mass pectra and derived data, include peptide sequences etc.
+
+
+ Mass spectrometry data format
+
+
+
+
+
+
+
+
+
+ 1.2
+
+
+ TraML (Transition Markup Language) is the format for mass spectrometry transitions, standardised by HUPO PSI MSS.
+
+
+ TraML
+
+
+
+
+
+
+
+
+
+ 1.2
+
+
+ mzIdentML is the exchange format for peptides and proteins identified from mass spectra, standardised by HUPO PSI PI. It can be used for outputs of proteomics search engines.
+
+
+ mzIdentML
+
+
+
+
+
+
+
+
+
+ 1.2
+
+
+ mzQuantML is the format for quantitation values associated with peptides, proteins and small molecules from mass spectra, standardised by HUPO PSI PI. It can be used for outputs of quantitation software for proteomics.
+
+
+ mzQuantML
+
+
+
+
+
+
+
+
+
+ 1.2
+
+
+ GelML is the format for describing the process of gel electrophoresis, standardised by HUPO PSI PS.
+
+
+ GelML
+
+
+
+
+
+
+
+
+
+ 1.2
+
+
+ spML is the format for describing proteomics sample processing, other than using gels, prior to mass spectrometric protein identification, standardised by HUPO PSI PS. It may also be applicable for metabolomics.
+
+
+ spML
+
+
+
+
+
+
+
+
+
+ 1.2
+ A human-readable encoding for the Web Ontology Language (OWL).
+
+
+ OWL Functional Syntax
+
+
+
+
+
+
+
+
+
+ 1.2
+ A syntax for writing OWL class expressions.
+
+
+ This format was influenced by the OWL Abstract Syntax and the DL style syntax.
+ Manchester OWL Syntax
+
+
+
+
+
+
+
+
+
+ 1.2
+ A superset of the "Description-Logic Knowledge Representation System Specification from the KRSS Group of the ARPA Knowledge Sharing Effort".
+
+
+ This format is used in Protege 4.
+ KRSS2 Syntax
+
+
+
+
+
+
+
+
+
+ 1.2
+ The Terse RDF Triple Language (Turtle) is a human-friendly serialisation format for RDF (Resource Description Framework) graphs.
+
+
+ The SPARQL Query Language incorporates a very similar syntax.
+ Turtle
+
+
+
+
+
+
+
+
+
+ 1.2
+ nt
+ A plain text serialisation format for RDF (Resource Description Framework) graphs, and a subset of the Turtle (Terse RDF Triple Language) format.
+
+
+ N-Triples should not be confused with Notation 3 which is a superset of Turtle.
+ N-Triples
+
+
+
+
+
+
+
+
+
+ 1.2
+ n3
+ A shorthand non-XML serialisation of Resource Description Framework model, designed with human-readability in mind.
+ N3
+
+
+ Notation3
+
+
+
+
+
+
+
+
+
+ 1.2
+ rdf
+
+ Resource Description Framework (RDF) XML format.
+
+
+ RDF/XML can be used as a standard serialisation syntax for OWL DL, but not for OWL Full.
+ RDF/XML
+ http://www.ebi.ac.uk/SWO/data/SWO_3000006
+
+
+
+
+
+
+
+
+
+ 1.2
+ OWL ontology XML serialisation format.
+ OWL
+
+
+ OWL/XML
+
+
+
+
+
+
+
+
+
+ 1.3
+
+
+ The A2M format is used as the primary format for multiple alignments of protein or nucleic-acid sequences in the SAM suite of tools. It is a small modification of FASTA format for sequences and is compatible with most tools that read FASTA.
+
+
+ A2M
+
+
+
+
+
+
+
+
+
+ 1.3
+
+
+ Standard flowgram format (SFF) is a binary file format used to encode results of pyrosequencing from the 454 Life Sciences platform for high-throughput sequencing.
+ Standard flowgram format
+
+
+ SFF
+
+
+
+
+
+
+
+
+ 1.3
+
+ The MAP file describes SNPs and is used by the Plink package.
+ Plink MAP
+
+
+ MAP
+
+
+
+
+
+
+
+
+ 1.3
+
+ The PED file describes individuals and genetic data and is used by the Plink package.
+ Plink PED
+
+
+ PED
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ Data format for a metadata on an individual and their genetic data.
+
+
+ Individual genetic data format
+
+
+
+
+
+
+
+
+
+ 1.3
+
+ The PED/MAP file describes data used by the Plink package.
+ Plink PED/MAP
+
+
+ PED/MAP
+
+
+
+
+
+
+
+
+
+ 1.3
+
+
+ File format of a CT (Connectivity Table) file from the RNAstructure package.
+ Connect format
+ Connectivity Table file format
+
+
+ CT
+
+
+
+
+
+
+
+
+
+ 1.3
+
+ XRNA old input style format.
+
+
+ SS
+
+
+
+
+
+
+
+
+
+
+ 1.3
+
+ RNA Markup Language.
+
+
+ RNAML
+
+
+
+
+
+
+
+
+
+ 1.3
+
+ Format for the Genetic Data Environment (GDE).
+
+
+ GDE
+
+
+
+
+
+
+
+
+ 1.3
+
+ A multiple alignment in vertical format, as used in the AMPS (Alignment of Multiple Protein Sequences) package.
+ Block file format
+
+
+ BLC
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ Format of a data index of some type.
+
+
+ Data index format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.3
+
+ BAM indexing format.
+
+
+ BAI
+
+
+
+
+
+
+
+
+ 1.3
+
+ HMMER profile HMM file for HMMER versions 2.x.
+
+
+ HMMER2
+
+
+
+
+
+
+
+
+ 1.3
+
+ HMMER profile HMM file for HMMER versions 3.x.
+
+
+ HMMER3
+
+
+
+
+
+
+
+
+ 1.3
+
+ PO is the output format of Partial Order Alignment program (POA) performing Multiple Sequence Alignment (MSA).
+
+
+ PO
+
+
+
+
+
+
+
+
+
+ 1.3
+ XML format as produced by the NCBI Blast package.
+
+
+ BLAST XML results format
+
+
+
+
+
+
+
+
+
+ 1.7
+ http://www.ebi.ac.uk/ena/software/cram-usage#format_specification http://samtools.github.io/hts-specs/CRAMv2.1.pdf
+ http://www.ebi.ac.uk/ena/software/cram-usage#format_specification http://samtools.github.io/hts-specs/CRAMv2.1.pdf
+ Reference-based compression of alignment format.
+
+
+ CRAM
+
+
+
+
+
+
+
+
+
+ 1.7
+ json
+
+
+
+ JavaScript Object Notation format; a lightweight, text-based format to represent tree-structured data using key-value pairs.
+ JavaScript Object Notation
+
+
+ JSON
+
+
+
+
+
+
+
+
+
+ 1.7
+ Encapsulated PostScript format.
+
+
+ EPS
+
+
+
+
+
+
+
+
+ 1.7
+ Graphics Interchange Format.
+
+
+ GIF
+
+
+
+
+
+
+
+
+
+ 1.7
+ Microsoft Excel spreadsheet format.
+ Microsoft Excel format
+
+
+ xls
+
+
+
+
+
+
+
+
+ 1.7
+ tab
+ tsv
+
+
+
+ Tabular data represented as tab-separated values in a text file.
+ Tab-delimited
+ Tab-separated values
+ tab
+
+
+ TSV
+
+
+
+
+
+
+
+
+ 1.7
+ 1.10
+
+ Format of a file of gene expression data, e.g. a gene expression matrix or profile.
+
+
+ Gene expression data format
+ true
+
+
+
+
+
+
+
+
+
+ 1.7
+ Format of the cytoscape input file of gene expression ratios or values are specified over one or more experiments.
+
+
+ Cytoscape input file format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.7
+ https://github.com/BenLangmead/bowtie/blob/master/MANUAL
+ Bowtie format for indexed reference genome for "small" genomes.
+ Bowtie index format
+
+
+ ebwt
+
+
+
+
+
+
+
+
+ 1.7
+ http://www.molbiol.ox.ac.uk/tutorials/Seqlab_GCG.pdf
+ Rich sequence format.
+ GCG RSF
+
+
+ RSF-format files contain one or more sequences that may or may not be related. In addition to the sequence data, each sequence can be annotated with descriptive sequence information (from the GCG manual).
+ RSF
+
+
+
+
+
+
+
+
+
+
+ 1.7
+ Some format based on the GCG format.
+
+
+ GCG format variant
+
+
+
+
+
+
+
+
+
+ 1.7
+ http://rothlab.ucdavis.edu/genhelp/chapter_2_using_sequences.html#_Creating_and_Editing_Single_Sequenc
+ Bioinformatics Sequence Markup Language format.
+
+
+ BSML
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.7
+ https://github.com/BenLangmead/bowtie/blob/master/MANUAL
+ Bowtie format for indexed reference genome for "large" genomes.
+ Bowtie long index format
+
+
+ ebwtl
+
+
+
+
+
+
+
+
+
+ 1.8
+
+ Ensembl standard format for variation data.
+
+
+ Ensembl variation file format
+
+
+
+
+
+
+
+
+
+ 1.8
+ Microsoft Word format.
+ Microsoft Word format
+ doc
+
+
+ docx
+
+
+
+
+
+
+
+
+ 1.8
+ true
+ Format of documents including word processor, spreadsheet and presentation.
+
+
+ Document format
+
+
+
+
+
+
+
+
+
+ 1.8
+ Portable Document Format.
+
+
+ PDF
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.9
+ true
+ Format used for images and image metadata.
+
+
+ Image format
+
+
+
+
+
+
+
+
+
+ 1.9
+
+ Medical image format corresponding to the Digital Imaging and Communications in Medicine (DICOM) standard.
+
+
+ DICOM format
+
+
+
+
+
+
+
+
+
+ 1.9
+
+ nii
+ An open file format from the Neuroimaging Informatics Technology Initiative (NIfTI) commonly used to store brain imaging data obtained using Magnetic Resonance Imaging (MRI) methods.
+ NIFTI format
+ NIfTI-1 format
+
+
+ nii
+
+
+
+
+
+
+
+
+
+ 1.9
+
+ Text-based tagged file format for medical images generated using the MetaImage software package.
+ Metalmage format
+
+
+ mhd
+
+
+
+
+
+
+
+
+
+ 1.9
+
+ Nearly Raw Rasta Data format designed to support scientific visualisation and image processing involving N-dimensional raster data.
+
+
+ nrrd
+
+
+
+
+
+
+
+
+ 1.9
+ File format used for scripts written in the R programming language for execution within the R software environment, typically for statistical computation and graphics.
+
+
+ R file format
+
+
+
+
+
+
+
+
+ 1.9
+ File format used for scripts for the Statistical Package for the Social Sciences.
+
+
+ SPSS
+
+
+
+
+
+
+
+
+ 1.9
+
+ eml
+ mht
+ mhtml
+
+ MHTML is not strictly an HTML format, it is encoded as an HTML email message (although with multipart/related instead of multipart/alternative). It, however, contains the main HTML block as its core, and thus it is for practical reasons included in EDAM as a specialisation of 'HTML'.
+
+
+ MIME HTML format for Web pages, which can include external resources, including images, Flash animations and so on.
+ HTML email format
+ HTML email message format
+ MHT
+ MHT format
+ MHTML format
+ MIME HTML
+ MIME HTML format
+ eml
+ MIME multipart
+ MIME multipart format
+ MIME multipart message
+ MIME multipart message format
+
+
+ MHTML
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.10
+ Proprietary file format for (raw) BeadArray data used by genomewide profiling platforms from Illumina Inc. This format is output directly from the scanner and stores summary intensities for each probe-type on an array.
+
+
+ IDAT
+
+
+
+
+
+
+
+
+
+ 1.10
+
+ Joint Picture Group file format for lossy graphics file.
+ JPEG
+ jpeg
+
+
+ Sequence of segments with markers. Begins with byte of 0xFF and follows by marker type.
+ JPG
+
+
+
+
+
+
+
+
+
+ 1.10
+ Reporter Code Count-A data file (.csv) output by the Nanostring nCounter Digital Analyzer, which contains gene sample information, probe information and probe counts.
+
+
+ rcc
+
+
+
+
+
+
+
+
+ 1.11
+
+
+ ARFF (Attribute-Relation File Format) is an ASCII text file format that describes a list of instances sharing a set of attributes.
+
+
+ This file format is for machine learning.
+ arff
+
+
+
+
+
+
+
+
+
+ 1.11
+
+
+ AFG is a single text-based file assembly format that holds read and consensus information together.
+
+
+ afg
+
+
+
+
+
+
+
+
+
+ 1.11
+
+
+ The bedGraph format allows display of continuous-valued data in track format. This display type is useful for probability scores and transcriptome data.
+
+
+ Holds a tab-delimited chromosome /start /end / datavalue dataset.
+ bedgraph
+
+
+
+
+
+
+
+
+ 1.11
+
+
+ Browser Extensible Data (BED) format of sequence annotation track that strictly does not contain non-standard fields beyond the first 3 columns.
+
+
+ Galaxy allows BED files to contain non-standard fields beyond the first 3 columns, some other implementations do not.
+ bedstrict
+
+
+
+
+
+
+
+
+ 1.11
+
+
+ BED file format where each feature is described by chromosome, start, end, name, score, and strand.
+
+
+ Tab delimited data in strict BED format - no non-standard columns allowed; column count forced to 6
+ bed6
+
+
+
+
+
+
+
+
+ 1.11
+
+
+ A BED file where each feature is described by all twelve columns.
+
+
+ Tab delimited data in strict BED format - no non-standard columns allowed; column count forced to 12
+ bed12
+
+
+
+
+
+
+
+
+
+ 1.11
+
+
+ Tabular format of chromosome names and sizes used by Galaxy.
+
+
+ Galaxy allows BED files to contain non-standard fields beyond the first 3 columns, some other implementations do not.
+ chrominfo
+
+
+
+
+
+
+
+
+
+ 1.11
+
+
+ Custom Sequence annotation track format used by Galaxy.
+
+
+ Used for tracks/track views within galaxy.
+ customtrack
+
+
+
+
+
+
+
+
+
+ 1.11
+
+
+ Color space FASTA format sequence variant.
+
+
+ FASTA format extended for color space information.
+ csfasta
+
+
+
+
+
+
+
+
+
+ 1.11
+
+
+ HDF5 is a data model, library, and file format for storing and managing data, based on Hierarchical Data Format (HDF).
+ h5
+
+
+ An HDF5 file appears to the user as a directed graph. The nodes of this graph are the higher-level HDF5 objects that are exposed by the HDF5 APIs: Groups, Datasets, Named datatypes. Currently supported by the Python MDTraj package.
+ HDF5 is the new version, according to the HDF group, a completely different technology (https://support.hdfgroup.org/products/hdf4/ compared to HDF.
+ HDF5
+
+
+
+
+
+
+
+
+
+ 1.11
+
+ A versatile bitmap format.
+
+
+ The TIFF format is perhaps the most versatile and diverse bitmap format in existence. Its extensible nature and support for numerous data compression schemes allow developers to customize the TIFF format to fit any peculiar data storage needs.
+ TIFF
+
+
+
+
+
+
+
+
+
+ 1.11
+
+ Standard bitmap storage format in the Microsoft Windows environment.
+
+
+ Although it is based on Windows internal bitmap data structures, it is supported by many non-Windows and non-PC applications.
+ BMP
+
+
+
+
+
+
+
+
+
+ 1.11
+
+ IM is a format used by LabEye and other applications based on the IFUNC image processing library.
+
+
+ IFUNC library reads and writes most uncompressed interchange versions of this format.
+ im
+
+
+
+
+
+
+
+
+
+ 1.11
+
+ pcd
+ Photo CD format, which is the highest resolution format for images on a CD.
+
+
+ PCD was developed by Kodak. A PCD file contains five different resolution (ranging from low to high) of a slide or film negative. Due to it PCD is often used by many photographers and graphics professionals for high-end printed applications.
+ pcd
+
+
+
+
+
+
+
+
+
+ 1.11
+
+ PCX is an image file format that uses a simple form of run-length encoding. It is lossless.
+
+
+ pcx
+
+
+
+
+
+
+
+
+
+ 1.11
+
+ The PPM format is a lowest common denominator color image file format.
+
+
+ ppm
+
+
+
+
+
+
+
+
+
+ 1.11
+
+ PSD (Photoshop Document) is a proprietary file that allows the user to work with the images' individual layers even after the file has been saved.
+
+
+ psd
+
+
+
+
+
+
+
+
+
+ 1.11
+
+ X BitMap is a plain text binary image format used by the X Window System used for storing cursor and icon bitmaps used in the X GUI.
+
+
+ The XBM format was replaced by XPM for X11 in 1989.
+ xbm
+
+
+
+
+
+
+
+
+
+ 1.11
+
+ X PixMap (XPM) is an image file format used by the X Window System, it is intended primarily for creating icon pixmaps, and supports transparent pixels.
+
+
+ Sequence of segments with markers. Begins with byte of 0xFF and follows by marker type.
+ xpm
+
+
+
+
+
+
+
+
+
+ 1.11
+
+ RGB file format is the native raster graphics file format for Silicon Graphics workstations.
+
+
+ rgb
+
+
+
+
+
+
+
+
+
+ 1.11
+
+ The PBM format is a lowest common denominator monochrome file format. It serves as the common language of a large family of bitmap image conversion filters.
+
+
+ pbm
+
+
+
+
+
+
+
+
+
+ 1.11
+
+ The PGM format is a lowest common denominator grayscale file format.
+
+
+ It is designed to be extremely easy to learn and write programs for.
+ pgm
+
+
+
+
+
+
+
+
+
+ 1.11
+
+ png
+ PNG is a file format for image compression.
+
+
+ It iis expected to replace the Graphics Interchange Format (GIF).
+ PNG
+
+
+
+
+
+
+
+
+
+ 1.11
+
+ Scalable Vector Graphics (SVG) is an XML-based vector image format for two-dimensional graphics with support for interactivity and animation.
+ Scalable Vector Graphics
+
+
+ The SVG specification is an open standard developed by the World Wide Web Consortium (W3C) since 1999.
+ SVG
+
+
+
+
+
+
+
+
+
+ 1.11
+
+ Sun Raster is a raster graphics file format used on SunOS by Sun Microsystems.
+
+
+ The SVG specification is an open standard developed by the World Wide Web Consortium (W3C) since 1999.
+ rast
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.11
+ true
+ Textual report format for sequence quality for reports from sequencing machines.
+
+
+ Sequence quality report format (text)
+
+
+
+
+
+
+
+
+
+
+ 1.11
+ http://en.wikipedia.org/wiki/Phred_quality_score
+ FASTQ format subset for Phred sequencing quality score data only (no sequences).
+
+
+ Phred quality scores are defined as a property which is logarithmically related to the base-calling error probabilities.
+ qual
+
+
+
+
+
+
+
+
+
+ 1.11
+ FASTQ format subset for Phred sequencing quality score data only (no sequences) for Solexa/Illumina 1.0 format.
+
+
+ Solexa/Illumina 1.0 format can encode a Solexa/Illumina quality score from -5 to 62 using ASCII 59 to 126 (although in raw read data Solexa scores from -5 to 40 only are expected)
+ qualsolexa
+
+
+
+
+
+
+
+
+
+ 1.11
+ http://en.wikipedia.org/wiki/Phred_quality_score
+ FASTQ format subset for Phred sequencing quality score data only (no sequences) from Illumina 1.5 and before Illumina 1.8.
+
+
+ Starting in Illumina 1.5 and before Illumina 1.8, the Phred scores 0 to 2 have a slightly different meaning. The values 0 and 1 are no longer used and the value 2, encoded by ASCII 66 "B", is used also at the end of reads as a Read Segment Quality Control Indicator.
+ qualillumina
+
+
+
+
+
+
+
+
+ 1.11
+ http://en.wikipedia.org/wiki/Phred_quality_score
+ FASTQ format subset for Phred sequencing quality score data only (no sequences) for SOLiD data.
+
+
+ For SOLiD data, the sequence is in color space, except the first position. The quality values are those of the Sanger format.
+ qualsolid
+
+
+
+
+
+
+
+
+ 1.11
+ http://en.wikipedia.org/wiki/Phred_quality_score
+ FASTQ format subset for Phred sequencing quality score data only (no sequences) from 454 sequencers.
+
+
+ qual454
+
+
+
+
+
+
+
+
+ 1.11
+
+
+ Human ENCODE peak format.
+
+
+ Format that covers both the broad peak format and narrow peak format from ENCODE.
+ ENCODE peak format
+
+
+
+
+
+
+
+
+ 1.11
+
+
+ Human ENCODE narrow peak format.
+
+
+ Format that covers both the broad peak format and narrow peak format from ENCODE.
+ ENCODE narrow peak format
+
+
+
+
+
+
+
+
+ 1.11
+
+
+ Human ENCODE broad peak format.
+
+
+ ENCODE broad peak format
+
+
+
+
+
+
+
+
+
+ 1.11
+
+ bgz
+ Blocked GNU Zip format.
+
+
+ BAM files are compressed using a variant of GZIP (GNU ZIP), into a format called BGZF (Blocked GNU Zip Format).
+ bgzip
+
+
+
+
+
+
+
+
+
+ 1.11
+
+
+ TAB-delimited genome position file index format.
+
+
+ tabix
+
+
+
+
+
+
+
+
+ 1.11
+ true
+ Data format for graph data.
+
+
+ Graph format
+
+
+
+
+
+
+
+
+ 1.11
+
+ XML-based format used to store graph descriptions within Galaxy.
+
+
+ xgmml
+
+
+
+
+
+
+
+
+ 1.11
+
+ SIF (simple interaction file) Format - a network/pathway format used for instance in cytoscape.
+
+
+ sif
+
+
+
+
+
+
+
+
+
+ 1.11
+ MS Excel spreadsheet format consisting of a set of XML documents stored in a ZIP-compressed file.
+
+
+ xlsx
+
+
+
+
+
+
+
+
+ 1.11
+
+ Data format used by the SQLite database.
+
+
+ SQLite format
+
+
+
+
+
+
+
+
+
+ 1.11
+
+ Data format used by the SQLite database conformant to the Gemini schema.
+
+
+ Gemini SQLite format
+
+
+
+
+
+
+
+
+ 1.11
+ Duplicate of http://edamontology.org/format_3326
+ 1.20
+
+
+ Format of a data index of some type.
+
+
+ Index format
+ true
+
+
+
+
+
+
+
+
+
+ 1.11
+ An index of a genome database, indexed for use by the snpeff tool.
+
+
+ snpeffdb
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.12
+
+ Binary format used by MATLAB files to store workspace variables.
+ .mat file format
+ MAT file format
+ MATLAB file format
+
+
+ MAT
+
+
+
+
+
+
+
+
+
+
+ 1.12
+
+ Network Common Data Form (NetCDF) library is supported by AMBER MD package from version 9.
+ Format used by netCDF software library for writing and reading chromatography-MS data files. Also used to store trajectory atom coordinates information, such as the ones obtained by Molecular Dynamics simulations.
+ ANDI-MS
+
+
+ netCDF
+
+
+
+
+
+
+
+
+ 1.12
+ mgf
+ Mascot Generic Format. Encodes multiple MS/MS spectra in a single file.
+
+
+ Files includes *m*/*z*, intensity pairs separated by headers; headers can contain a bit more information, including search engine instructions.
+ MGF
+
+
+
+
+
+
+
+
+ 1.12
+ Spectral data format file where each spectrum is written to a separate file.
+
+
+ Each file contains one header line for the known or assumed charge and the mass of the precursor peptide ion, calculated from the measured *m*/*z* and the charge. This one line was then followed by all the *m*/*z*, intensity pairs that represent the spectrum.
+ dta
+
+
+
+
+
+
+
+
+ 1.12
+ Spectral data file similar to dta.
+
+
+ Differ from .dta only in subtleties of the header line format and content and support the added feature of being able to.
+ pkl
+
+
+
+
+
+
+
+
+ 1.12
+ https://dx.doi.org/10.1038%2Fnbt1031
+ Common file format for proteomics mass spectrometric data developed at the Seattle Proteome Center/Institute for Systems Biology.
+
+
+ mzXML
+
+
+
+
+
+
+
+
+
+ 1.12
+ http://sashimi.sourceforge.net/schema_revision/pepXML/pepXML_v118.xsd
+ Open data format for the storage, exchange, and processing of peptide sequence assignments of MS/MS scans, intended to provide a common data output format for many different MS/MS search engines and subsequent peptide-level analyses.
+
+
+ pepXML
+
+
+
+
+
+
+
+
+
+ 1.12
+
+ Graphical Pathway Markup Language (GPML) is an XML format used for exchanging biological pathways.
+
+
+ GPML
+
+
+
+
+
+
+
+
+
+ 1.12
+
+ oxlicg
+
+
+
+ A list of k-mers and their occurrences in a dataset. Can also be used as an implicit De Bruijn graph.
+ K-mer countgraph
+
+
+
+
+
+
+
+
+
+
+ 1.13
+
+
+ mzTab is a tab-delimited format for mass spectrometry-based proteomics and metabolomics results.
+
+
+ mzTab
+
+
+
+
+
+
+
+
+
+
+ 1.13
+
+ imzml
+
+ imzML metadata is a data format for mass spectrometry imaging metadata.
+
+
+ imzML data are recorded in 2 files: '.imzXML' is a metadata XML file based on mzML by HUPO-PSI, and '.ibd' is a binary file containing the mass spectra. This entry is for the metadata XML file
+ imzML metadata file
+
+
+
+
+
+
+
+
+
+
+ 1.13
+
+
+ qcML is an XML format for quality-related data of mass spectrometry and other high-throughput measurements.
+
+
+ The focus of qcML is towards mass spectrometry based proteomics, but the format is suitable for metabolomics and sequencing as well.
+ qcML
+
+
+
+
+
+
+
+
+
+
+ 1.13
+
+
+ PRIDE XML is an XML format for mass spectra, peptide and protein identifications, and metadata about a corresponding measurement, sample, experiment.
+
+
+ PRIDE XML
+
+
+
+
+
+
+
+
+
+
+
+ 1.13
+
+
+ Simulation Experiment Description Markup Language (SED-ML) is an XML format for encoding simulation setups, according to the MIASE (Minimum Information About a Simulation Experiment) requirements.
+
+
+ SED-ML
+
+
+
+
+
+
+
+
+
+
+
+ 1.13
+
+
+ Open Modeling EXchange format (OMEX) is a ZIPped format for encapsulating all information necessary for a modeling and simulation project in systems biology.
+
+
+ An OMEX file is a ZIP container that includes a manifest file, listing the content of the archive, an optional metadata file adding information about the archive and its content, and the files describing the model. OMEX is one of the standardised formats within COMBINE (Computational Modeling in Biology Network).
+ COMBINE OMEX
+
+
+
+
+
+
+
+
+
+
+ 1.13
+
+
+ The Investigation / Study / Assay (ISA) tab-delimited (TAB) format incorporates metadata from experiments employing a combination of technologies.
+
+
+ ISA-TAB is based on MAGE-TAB. Other than tabular, the ISA model can also be represented in RDF, and in JSON (compliable with a set of defined JSON Schemata).
+ ISA-TAB
+
+
+
+
+
+
+
+
+
+
+ 1.13
+
+
+ SBtab is a tabular format for biochemical network models.
+
+
+ SBtab
+
+
+
+
+
+
+
+
+
+ 1.13
+
+
+ Biological Connection Markup Language (BCML) is an XML format for biological pathways.
+
+
+ BCML
+
+
+
+
+
+
+
+
+
+ 1.13
+
+
+ Biological Dynamics Markup Language (BDML) is an XML format for quantitative data describing biological dynamics.
+
+
+ BDML
+
+
+
+
+
+
+
+
+ 1.13
+
+
+ Biological Expression Language (BEL) is a textual format for representing scientific findings in life sciences in a computable form.
+
+
+ BEL
+
+
+
+
+
+
+
+
+
+ 1.13
+
+
+ SBGN-ML is an XML format for Systems Biology Graphical Notation (SBGN) diagrams of biological pathways or networks.
+
+
+ SBGN-ML
+
+
+
+
+
+
+
+
+
+ 1.13
+
+ agp
+
+ AGP is a tabular format for a sequence assembly (a contig, a scaffold/supercontig, or a chromosome).
+
+
+ AGP
+
+
+
+
+
+
+
+
+ 1.13
+ PostScript format.
+ PostScript
+
+
+ PS
+
+
+
+
+
+
+
+
+ 1.13
+
+ sra
+ SRA archive format (SRA) is the archive format used for input to the NCBI Sequence Read Archive.
+ SRA
+ SRA archive format
+
+
+ SRA format
+
+
+
+
+
+
+
+
+ 1.13
+
+ VDB ('vertical database') is the native format used for export from the NCBI Sequence Read Archive.
+ SRA native format
+
+
+ VDB
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.13
+
+ Index file format used by the samtools package to index TAB-delimited genome position files.
+
+
+ Tabix index file format
+
+
+
+
+
+
+
+
+ 1.13
+ A five-column, tab-delimited table of feature locations and qualifiers for importing annotation into an existing Sequin submission (an NCBI tool for submitting and updating GenBank entries).
+
+
+ Sequin format
+
+
+
+
+
+
+
+
+ 1.14
+ Proprietary mass-spectrometry format of Thermo Scientific's ProteomeDiscoverer software.
+ Magellan storage file format
+
+
+ This format corresponds to an SQLite database, and you can look into the files with e.g. SQLiteStudio3. There are also some readers (http://doi.org/10.1021/pr2005154) and converters (http://doi.org/10.1016/j.jprot.2015.06.015) for this format available, which re-engineered the database schema, but there is no official DB schema specification of Thermo Scientific for the format.
+ MSF
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.14
+ true
+ Data format for biodiversity data.
+
+
+ Biodiversity data format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.14
+
+ Exchange format of the Access to Biological Collections Data (ABCD) Schema; a standard for the access to and exchange of data about specimens and observations (primary biodiversity data).
+ ABCD
+
+
+ ABCD format
+
+
+
+
+
+
+
+
+
+ 1.14
+ Tab-delimited text files of GenePattern that contain a column for each sample, a row for each gene, and an expression value for each gene in each sample.
+ GCT format
+ Res format
+
+
+ GCT/Res format
+
+
+
+
+
+
+
+
+
+ 1.14
+ wiff
+ Mass spectrum file format from QSTAR and QTRAP instruments (ABI/Sciex).
+ wiff
+
+
+ WIFF format
+
+
+
+
+
+
+
+
+
+ 1.14
+
+ Output format used by X! series search engines that is based on the XML language BIOML.
+
+
+ X!Tandem XML
+
+
+
+
+
+
+
+
+
+ 1.14
+ Proprietary file format for mass spectrometry data from Thermo Scientific.
+
+
+ Proprietary format for which documentation is not available.
+ Thermo RAW
+
+
+
+
+
+
+
+
+
+ 1.14
+
+ "Raw" result file from Mascot database search.
+
+
+ Mascot .dat file
+
+
+
+
+
+
+
+
+
+ 1.14
+
+ Format of peak list files from Andromeda search engine (MaxQuant) that consist of arbitrarily many spectra.
+ MaxQuant APL
+
+
+ MaxQuant APL peaklist format
+
+
+
+
+
+
+
+
+ 1.14
+
+ Synthetic Biology Open Language (SBOL) is an XML format for the specification and exchange of biological design information in synthetic biology.
+
+
+ SBOL introduces a standardised format for the electronic exchange of information on the structural and functional aspects of biological designs.
+ SBOL
+
+
+
+
+
+
+
+
+ 1.14
+
+ PMML uses XML to represent mining models. The structure of the models is described by an XML Schema.
+
+
+ One or more mining models can be contained in a PMML document.
+ PMML
+
+
+
+
+
+
+
+
+
+ 1.14
+
+ Image file format used by the Open Microscopy Environment (OME).
+
+
+ An OME-TIFF dataset consists of one or more files in standard TIFF or BigTIFF format, with the file extension .ome.tif or .ome.tiff, and an identical (or in the case of multiple files, nearly identical) string of OME-XML metadata embedded in the ImageDescription tag of each file's first IFD (Image File Directory). BigTIFF file extensions are also permitted, with the file extension .ome.tf2, .ome.tf8 or .ome.btf, but note these file extensions are an addition to the original specification, and software using an older version of the specification may not be able to handle these file extensions.
+ OME develops open-source software and data format standards for the storage and manipulation of biological microscopy data. It is a joint project between universities, research establishments, industry and the software development community.
+ OME-TIFF
+
+
+
+
+
+
+
+
+ 1.14
+
+ The LocARNA PP format combines sequence or alignment information and (respectively, single or consensus) ensemble probabilities into an PP 2.0 record.
+
+
+ Format for multiple aligned or single sequences together with the probabilistic description of the (consensus) RNA secondary structure ensemble by probabilities of base pairs, base pair stackings, and base pairs and unpaired bases in the loop of base pairs.
+ LocARNA PP
+
+
+
+
+
+
+
+
+ 1.14
+
+ Input format used by the Database of Genotypes and Phenotypes (dbGaP).
+
+
+ The Database of Genotypes and Phenotypes (dbGaP) is a National Institutes of Health (NIH) sponsored repository charged to archive, curate and distribute information produced by studies investigating the interaction of genotype and phenotype.
+ dbGaP format
+
+
+
+
+
+
+
+
+
+
+ 1.15
+
+ biom
+ The BIological Observation Matrix (BIOM) is a format for representing biological sample by observation contingency tables in broad areas of comparative omics. The primary use of this format is to represent OTU tables and metagenome tables.
+ BIological Observation Matrix format
+ biom
+
+
+ BIOM is a recognised standard for the Earth Microbiome Project, and is a project supported by Genomics Standards Consortium. Supported in QIIME, Mothur, MEGAN, etc.
+ BIOM format
+
+
+
+
+
+
+
+
+
+ 1.15
+
+
+ A format for storage, exchange, and processing of protein identifications created from ms/ms-derived peptide sequence data.
+
+
+ No human-consumable information about this format is available (see http://tools.proteomecenter.org/wiki/index.php?title=Formats:protXML).
+ protXML
+ http://doi.org/10.1038/msb4100024
+ http://sashimi.sourceforge.net/schema_revision/protXML/protXML_v3.xsd
+
+
+
+
+
+
+
+
+
+
+ 1.15
+ true
+ A linked data format enables publishing structured data as linked data (Linked Data), so that the data can be interlinked and become more useful through semantic queries.
+ Semantic Web format
+
+
+ Linked data format
+
+
+
+
+
+
+
+
+
+
+
+ 1.15
+
+ jsonld
+
+
+ JSON-LD, or JavaScript Object Notation for Linked Data, is a method of encoding Linked Data using JSON.
+ JavaScript Object Notation for Linked Data
+ jsonld
+
+
+ JSON-LD
+
+
+
+
+
+
+
+
+
+ 1.15
+
+ yaml
+ yml
+
+ YAML (YAML Ain't Markup Language) is a human-readable tree-structured data serialisation language.
+ YAML Ain't Markup Language
+ yml
+
+
+ Data in YAML format can be serialised into text, or binary format.
+ YAML version 1.2 is a superset of JSON; prior versions were "not strictly compatible".
+ YAML
+
+
+
+
+
+
+
+
+
+ 1.16
+ Tabular data represented as values in a text file delimited by some character.
+ Delimiter-separated values
+ Tabular format
+
+
+ DSV
+
+
+
+
+
+
+
+
+
+ 1.16
+ csv
+
+
+
+ Tabular data represented as comma-separated values in a text file.
+ Comma-separated values
+
+
+ CSV
+
+
+
+
+
+
+
+
+
+ 1.16
+ out
+ "Raw" result file from SEQUEST database search.
+
+
+ SEQUEST .out file
+
+
+
+
+
+
+
+
+
+ 1.16
+ http://ftp.mi.fu-berlin.de/pub/OpenMS/release1.9-documentation/html/classOpenMS_1_1IdXMLFile.html
+ http://open-ms.sourceforge.net/schemas/
+ XML file format for files containing information about peptide identifications from mass spectrometry data analysis carried out with OpenMS.
+
+
+ idXML
+
+
+
+
+
+
+
+
+ 1.16
+ Data table formatted such that it can be passed/streamed within the KNIME platform.
+
+
+ KNIME datatable format
+
+
+
+
+
+
+
+
+
+
+ 1.16
+
+ UniProtKB XML sequence features format is an XML format available for downloading UniProt entries.
+ UniProt XML
+ UniProt XML format
+ UniProtKB XML format
+
+
+ UniProtKB XML
+
+
+
+
+
+
+
+
+
+ 1.16
+
+ UniProtKB RDF sequence features format is an RDF format available for downloading UniProt entries (in RDF/XML).
+ UniProt RDF
+ UniProt RDF format
+ UniProt RDF/XML
+ UniProt RDF/XML format
+ UniProtKB RDF format
+ UniProtKB RDF/XML
+ UniProtKB RDF/XML format
+
+
+ UniProtKB RDF
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.16
+
+
+
+
+ Work in progress. 'BioXSD' belongs to the 'BioXSD|GTrack' ecosystem of generic formats. 'BioJSON' is the JSON format based on the common, unified 'BioXSD data model', a.k.a. 'BioXSD|BioJSON|BioYAML'.
+
+ BioJSON is a BioXSD-schema-based JSON format of sequence-based data and some other common data - sequence records, alignments, feature records, references to resources, and more - optimised for integrative bioinformatics, web applications and APIs, and object-oriented programming.
+ BioJSON (BioXSD data model)
+ BioJSON format (BioXSD)
+ BioXSD BioJSON
+ BioXSD BioJSON format
+ BioXSD JSON
+ BioXSD JSON format
+ BioXSD in JSON
+ BioXSD in JSON format
+ BioXSD+JSON
+ BioXSD/GTrack BioJSON
+ BioXSD|BioJSON|BioYAML BioJSON
+ BioXSD|GTrack BioJSON
+
+
+ BioJSON (BioXSD)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.16
+
+
+
+
+ Work in progress. 'BioXSD' belongs to the 'BioXSD|GTrack' ecosystem of generic formats. 'BioYAML' is the YAML format based on the common, unified 'BioXSD data model', a.k.a. 'BioXSD|BioJSON|BioYAML'.
+
+ BioYAML is a BioXSD-schema-based YAML format of sequence-based data and some other common data - sequence records, alignments, feature records, references to resources, and more - optimised for integrative bioinformatics, web APIs, human readability and editing, and object-oriented programming.
+ BioXSD BioYAML
+ BioXSD BioYAML format
+ BioXSD YAML
+ BioXSD YAML format
+ BioXSD in YAML
+ BioXSD in YAML format
+ BioXSD+YAML
+ BioXSD/GTrack BioYAML
+ BioXSD|BioJSON|BioYAML BioYAML
+ BioXSD|GTrack BioYAML
+ BioYAML (BioXSD data model)
+ BioYAML (BioXSD)
+ BioYAML format
+ BioYAML format (BioXSD)
+
+
+ BioYAML
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.16
+
+
+ BioJSON is a JSON format of single multiple sequence alignments, with their annotations, features, and custom visualisation and application settings for the Jalview workbench.
+ BioJSON format (Jalview)
+ JSON (Jalview)
+ JSON format (Jalview)
+ Jalview BioJSON
+ Jalview BioJSON format
+ Jalview JSON
+ Jalview JSON format
+
+
+ BioJSON (Jalview)
+
+
+
+
+
+
+
+
+
+
+
+ 1.16
+
+
+
+
+ 'GSuite' belongs to the 'BioXSD|GTrack' ecosystem of generic formats, and particular to its subset, the 'GTrack ecosystem' (GTrack, GSuite, BTrack). 'GSuite' is the tabular format for an annotated collection of individual GTrack files.
+
+
+ GSuite is a tabular format for collections of genome or sequence feature tracks, suitable for integrative multi-track analysis. GSuite contains links to genome/sequence tracks, with additional metadata.
+ BioXSD/GTrack GSuite
+ BioXSD|GTrack GSuite
+ GSuite (GTrack ecosystem of formats)
+ GSuite format
+ GTrack|BTrack|GSuite GSuite
+ GTrack|GSuite|BTrack GSuite
+
+
+ GSuite
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.16
+
+ 'BTrack' belongs to the 'BioXSD|GTrack' ecosystem of generic formats, and particular to its subset, the 'GTrack ecosystem' (GTrack, GSuite, BTrack). 'BTrack' is the binary, optionally compressed HDF5-based version of the GTrack and GSuite formats.
+ BTrack is an HDF5-based binary format for genome or sequence feature tracks and their collections, suitable for integrative multi-track analysis. BTrack is a binary, compressed alternative to the GTrack and GSuite formats.
+ BTrack (GTrack ecosystem of formats)
+ BTrack format
+ BioXSD/GTrack BTrack
+ BioXSD|GTrack BTrack
+ GTrack|BTrack|GSuite BTrack
+ GTrack|GSuite|BTrack BTrack
+
+
+ BTrack
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.16
+
+
+
+
+
+
+ Multi-Crop Passport Descriptors is a format available in 2 successive versions, V.1 (FAO/IPGRI 2001) and V.2 (FAO/Bioversity 2012).
+
+
+
+
+ The FAO/Bioversity/IPGRI Multi-Crop Passport Descriptors (MCPD) is an international standard format for exchange of germplasm information.
+ Bioversity MCPD
+ FAO MCPD
+ IPGRI MCPD
+ MCPD V.1
+ MCPD V.2
+ MCPD format
+ Multi-Crop Passport Descriptors
+ Multi-Crop Passport Descriptors format
+
+
+ MCPD
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.16
+ true
+ Data format of an annotated text, e.g. with recognised entities, concepts, and relations.
+
+
+ Annotated text format
+
+
+
+
+
+
+
+
+
+
+ 1.16
+
+
+ JSON format of annotated scientific text used by PubAnnotations and other tools.
+
+
+ PubAnnotation format
+
+
+
+
+
+
+
+
+
+
+ 1.16
+
+
+ BioC is a standardised XML format for sharing and integrating text data and annotations.
+
+
+ BioC
+
+
+
+
+
+
+
+
+
+
+
+ 1.16
+
+
+ Native textual export format of annotated scientific text from PubTator.
+
+
+ PubTator format
+
+
+
+
+
+
+
+
+
+
+ 1.16
+
+
+ A format of text annotation using the linked-data Open Annotation Data Model, serialised typically in RDF or JSON-LD.
+
+
+ Open Annotation format
+
+
+
+
+
+
+
+
+
+
+ 1.16
+
+
+
+
+
+
+
+
+
+
+
+
+ A family of similar formats of text annotation, used by BRAT and other tools, known as BioNLP Shared Task format (BioNLP 2009 Shared Task on Event Extraction, BioNLP Shared Task 2011, BioNLP Shared Task 2013), BRAT format, BRAT standoff format, and similar.
+ BRAT format
+ BRAT standoff format
+
+
+ BioNLP Shared Task format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.16
+ true
+ A query language (format) for structured database queries.
+ Query format
+
+
+ Query language
+
+
+
+
+
+
+
+
+ 1.16
+ sql
+
+
+
+ SQL (Structured Query Language) is the de-facto standard query language (format of queries) for querying and manipulating data in relational databases.
+ Structured Query Language
+
+
+ SQL
+
+
+
+
+
+
+
+
+
+ 1.16
+
+ xq
+ xquery
+ xqy
+
+ XQuery (XML Query) is a query language (format of queries) for querying and manipulating structured and unstructured data, usually in the form of XML, text, and with vendor-specific extensions for other data formats (JSON, binary, etc.).
+ XML Query
+ xq
+ xqy
+
+
+ XQuery
+
+
+
+
+
+
+
+
+
+ 1.16
+
+
+ SPARQL (SPARQL Protocol and RDF Query Language) is a semantic query language for querying and manipulating data stored in Resource Description Framework (RDF) format.
+ SPARQL Protocol and RDF Query Language
+
+
+ SPARQL
+
+
+
+
+
+
+
+
+
+ 1.17
+ XML format for XML Schema.
+
+
+ xsd
+
+
+
+
+
+
+
+
+
+ 1.20
+
+ XMFA format stands for eXtended Multi-FastA format and is used to store collinear sub-alignments that constitute a single genome alignment.
+ eXtended Multi-FastA format
+
+
+
+ XMFA
+
+
+
+
+
+
+
+
+
+ 1.20
+
+ The GEN file format contains genetic data and describes SNPs.
+ Genotype file format
+
+
+ GEN
+
+
+
+
+
+
+
+
+ 1.20
+
+ The SAMPLE file format contains information about each individual i.e. individual IDs, covariates, phenotypes and missing data proportions, from a GWAS study.
+
+
+ SAMPLE file format
+
+
+
+
+
+
+
+
+
+ 1.20
+
+ SDF is one of a family of chemical-data file formats developed by MDL Information Systems; it is intended especially for structural information.
+
+
+ SDF
+
+
+
+
+
+
+
+
+
+ 1.20
+
+ An MDL Molfile is a file format for holding information about the atoms, bonds, connectivity and coordinates of a molecule.
+
+
+ Molfile
+
+
+
+
+
+
+
+
+
+ 1.20
+
+ Complete, portable representation of a SYBYL molecule. ASCII file which contains all the information needed to reconstruct a SYBYL molecule.
+
+
+ Mol2
+
+
+
+
+
+
+
+
+
+ 1.20
+
+ format for the LaTeX document preparation system.
+ LaTeX format
+
+
+ uses the TeX typesetting program format
+ latex
+
+
+
+
+
+
+
+
+
+ 1.20
+
+ Tab-delimited text file format used by Eland - the read-mapping program distributed by Illumina with its sequencing analysis pipeline - which maps short Solexa sequence reads to the human reference genome.
+ ELAND
+ eland
+
+
+ ELAND format
+
+
+
+
+
+
+
+
+ 1.20
+
+
+ Phylip multiple alignment sequence format, less stringent than PHYLIP format.
+ PHYLIP Interleaved format
+
+
+ It differs from Phylip Format (format_1997) on length of the ID sequence. There no length restrictions on the ID, but whitespaces aren't allowed in the sequence ID/Name because one space separates the longest ID and the beginning of the sequence. Sequences IDs must be padded to the longest ID length.
+ Relaxed PHYLIP Interleaved
+
+
+
+
+
+
+
+
+ 1.20
+
+
+ Phylip multiple alignment sequence format, less stringent than PHYLIP sequential format (format_1998).
+ Relaxed PHYLIP non-interleaved
+ Relaxed PHYLIP non-interleaved format
+ Relaxed PHYLIP sequential format
+
+
+ It differs from Phylip sequential format (format_1997) on length of the ID sequence. There no length restrictions on the ID, but whitespaces aren't allowed in the sequence ID/Name because one space separates the longest ID and the beginning of the sequence. Sequences IDs must be padded to the longest ID length.
+ Relaxed PHYLIP Sequential
+
+
+
+
+
+
+
+
+
+ 1.20
+
+ Default XML format of VisANT, containing all the network information.
+ VisANT xml
+ VisANT xml format
+
+
+ VisML
+
+
+
+
+
+
+
+
+
+ 1.20
+
+ GML (Graph Modeling Language) is a text file format supporting network data with a very easy syntax. It is used by Graphlet, Pajek, yEd, LEDA and NetworkX.
+ GML format
+
+
+ GML
+
+
+
+
+
+
+
+
+
+ 1.20
+
+
+ FASTG is a format for faithfully representing genome assemblies in the face of allelic polymorphism and assembly uncertainty.
+ FASTG assembly graph format
+
+
+ It is called FASTG, like FASTA, but the G stands for "graph".
+ FASTG
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.20
+ true
+ Data format for raw data from a nuclear magnetic resonance (NMR) spectroscopy experiment.
+ NMR peak assignment data format
+ NMR processed data format
+ NMR raw data format
+ Nuclear magnetic resonance spectroscopy data format
+ Processed NMR data format
+ Raw NMR data format
+
+
+ NMR data format
+
+
+
+
+
+
+
+
+
+ 1.20
+
+
+ nmrML is an MSI supported XML-based open access format for metabolomics NMR raw and processed spectral data. It is accompanies by an nmrCV (controlled vocabulary) to allow ontology-based annotations.
+
+
+ nmrML
+
+
+
+
+
+
+
+
+
+
+ 1.20
+
+ . proBAM is an adaptation of BAM (format_2572), which was extended to meet specific requirements entailed by proteomics data.
+
+
+ proBAM
+
+
+
+
+
+
+
+
+
+ 1.20
+
+ . proBED is an adaptation of BED (format_3003), which was extended to meet specific requirements entailed by proteomics data.
+
+
+ proBED
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.20
+ true
+ Data format for raw microarray data.
+ Microarray data format
+
+
+ Raw microarray data format
+
+
+
+
+
+
+
+
+
+ 1.20
+
+ GenePix Results (GPR) text file format developed by Axon Instruments that is used to save GenePix Results data.
+
+
+ GPR
+
+
+
+
+
+
+
+
+
+ 1.20
+ Binary format used by the ARB software suite.
+ ARB binary format
+
+
+ ARB
+
+
+
+
+
+
+
+
+
+ 1.20
+ http://ftp.mi.fu-berlin.de/pub/OpenMS/release1.9-documentation/html/classOpenMS_1_1ConsensusXMLFile.html
+ OpenMS format for grouping features in one map or across several maps.
+
+
+ consensusXML
+
+
+
+
+
+
+
+
+
+ 1.20
+ http://ftp.mi.fu-berlin.de/pub/OpenMS/release1.9-documentation/html/classOpenMS_1_1FeatureXMLFile.html
+ OpenMS format for quantitation results (LC/MS features).
+
+
+ featureXML
+
+
+
+
+
+
+
+
+
+ 1.20
+ http://www.psidev.info/mzdata-1_0_5-docs
+ Now deprecated data format of the HUPO Proteomics Standards Initiative. Replaced by mzML (format_3244).
+
+
+ mzData
+
+
+
+
+
+
+
+
+
+ 1.20
+ http://cruxtoolkit.sourceforge.net/tide-search.html
+ Format supported by the Tide tool for identifying peptides from tandem mass spectra.
+
+
+ TIDE TXT
+
+
+
+
+
+
+
+
+
+ 1.20
+ ftp://ftp.ncbi.nlm.nih.gov/blast/documents/NEWXML/ProposedBLASTXMLChanges.pdf
+ ftp://ftp.ncbi.nlm.nih.gov/blast/documents/NEWXML/xml2.pdf
+ http://www.ncbi.nlm.nih.gov/data_specs/schema/NCBI_BlastOutput2.mod.xsd
+ XML format as produced by the NCBI Blast package v2.
+
+
+ BLAST XML v2 results format
+
+
+
+
+
+
+
+
+
+ 1.20
+
+
+ Microsoft Powerpoint format.
+
+
+ pptx
+
+
+
+
+
+
+
+
+
+
+ 1.20
+
+ ibd
+
+ ibd is a data format for mass spectrometry imaging data.
+
+
+ imzML data is recorded in 2 files: '.imzXML' is a metadata XML file based on mzML by HUPO-PSI, and '.ibd' is a binary file containing the mass spectra.
+ ibd
+
+
+
+
+
+
+
+
+ 1.21
+ Data format used in Natural Language Processing.
+ Natural Language Processing format
+
+
+ NLP format
+
+
+
+
+
+
+
+
+
+ 1.21
+
+ XML input file format for BEAST Software (Bayesian Evolutionary Analysis Sampling Trees).
+
+
+ BEAST
+
+
+
+
+
+
+
+
+
+ 1.21
+
+ Chado-XML format is a direct mapping of the Chado relational schema into XML.
+
+
+ Chado-XML
+
+
+
+
+
+
+
+
+
+ 1.21
+
+ An alignment format generated by PRANK/PRANKSTER consisting of four elements: newick, nodes, selection and model.
+
+
+ HSAML
+
+
+
+
+
+
+
+
+
+ 1.21
+
+ Output xml file from the InterProScan sequence analysis application.
+
+
+ InterProScan XML
+
+
+
+
+
+
+
+
+
+ 1.21
+
+ The KEGG Markup Language (KGML) is an exchange format of the KEGG pathway maps, which is converted from internally used KGML+ (KGML+SVG) format.
+ KEGG Markup Language
+
+
+ KGML
+
+
+
+
+
+
+
+
+
+ 1.21
+ XML format for collected entries from bibliographic databases MEDLINE and PubMed.
+ MEDLINE XML
+
+
+ PubMed XML
+
+
+
+
+
+
+
+
+
+ 1.21
+
+ A set of XML compliant markup components for describing multiple sequence alignments.
+
+
+ MSAML
+
+
+
+
+
+
+
+
+
+
+ 1.21
+
+ OrthoXML is designed broadly to allow the storage and comparison of orthology data from any ortholog database. It establishes a structure for describing orthology relationships while still allowing flexibility for database-specific information to be encapsulated in the same format.
+
+
+ OrthoXML
+
+
+
+
+
+
+
+
+
+ 1.21
+
+ Tree structure of Protein Sequence Database Markup Language generated using Matra software.
+
+
+ PSDML
+
+
+
+
+
+
+
+
+
+ 1.21
+
+ SeqXML is an XML Schema to describe biological sequences, developed by the Stockholm Bioinformatics Centre.
+
+
+ SeqXML
+
+
+
+
+
+
+
+
+
+ 1.21
+
+ XML format for the UniParc database.
+
+
+ UniParc XML
+
+
+
+
+
+
+
+
+
+ 1.21
+
+ XML format for the UniRef reference clusters.
+
+
+ UniRef XML
+
+
+
+
+
+
+
+
+
+
+ 1.21
+
+
+
+
+ cwl
+
+
+
+ Common Workflow Language (CWL) format for description of command-line tools and workflows.
+ Common Workflow Language
+ CommonWL
+
+
+ CWL
+
+
+
+
+
+
+
+
+
+ 1.21
+ Proprietary file format for mass spectrometry data from Waters.
+
+
+ Proprietary format for which documentation is not available, but used by multiple tools.
+ Waters RAW
+
+
+
+
+
+
+
+
+
+ 1.21
+
+ A standardized file format for data exchange in mass spectrometry, initially developed for infrared spectrometry.
+
+
+ JCAMP-DX is an ASCII based format and therefore not very compact even though it includes standards for file compression.
+ JCAMP-DX
+
+
+
+
+
+
+
+
+
+ 1.21
+ An NLP format used for annotated textual documents.
+
+
+ NLP annotation format
+
+
+
+
+
+
+
+
+ 1.21
+ NLP format used by a specific type of corpus (collection of texts).
+
+
+ NLP corpus format
+
+
+
+
+
+
+
+
+
+
+ 1.21
+
+
+
+ mirGFF3 is a common format for microRNA data resulting from small-RNA RNA-Seq workflows.
+ miRTop format
+
+
+ mirGFF3 is a specialisation of GFF3; produced by small-RNA-Seq analysis workflows, usable and convertible with the miRTop API (https://mirtop.readthedocs.io/en/latest/), and consumable by tools for downstream analysis.
+ mirGFF3
+
+
+
+
+
+
+
+
+ 1.21
+ A "placeholder" concept for formats of annotated RNA data, including e.g. microRNA and RNA-Seq data.
+ RNA data format
+ miRNA data format
+ microRNA data format
+
+
+ RNA annotation format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.22
+ true
+ File format to store trajectory information for a 3D structure .
+ CG trajectory formats
+ MD trajectory formats
+ NA trajectory formats
+ Protein trajectory formats
+
+
+ Formats differ on what they are able to store (coordinates, velocities, topologies) and how they are storing it (raw, compressed, textual, binary).
+ Trajectory format
+
+
+
+
+
+
+
+
+ 1.22
+ true
+ Binary file format to store trajectory information for a 3D structure .
+
+
+ Trajectory format (binary)
+
+
+
+
+
+
+
+
+ 1.22
+ true
+ Textual file format to store trajectory information for a 3D structure .
+
+
+ Trajectory format (text)
+
+
+
+
+
+
+
+
+
+ 1.22
+ HDF is the name of a set of file formats and libraries designed to store and organize large amounts of numerical data, originally developed at the National Center for Supercomputing Applications at the University of Illinois.
+
+
+ HDF is currently supported by many commercial and non-commercial software platforms such as Java, MATLAB/Scilab, Octave, Python and R.
+ HDF
+
+
+
+
+
+
+
+
+
+ 1.22
+ PCAZip format is a binary compressed file to store atom coordinates based on Essential Dynamics (ED) and Principal Component Analysis (PCA).
+
+
+ The compression is made projecting the Cartesian snapshots collected along the trajectory into an orthogonal space defined by the most relevant eigenvectors obtained by diagonalization of the covariance matrix (PCA). In the compression/decompression process, part of the original information is lost, depending on the final number of eigenvectors chosen. However, with a reasonable choice of the set of eigenvectors the compression typically reduces the trajectory file to less than one tenth of their original size with very acceptable loss of information. Compression with PCAZip can only be applied to unsolvated structures.
+ PCAzip
+
+
+
+
+
+
+
+
+
+ 1.22
+ Portable binary format for trajectories produced by GROMACS package.
+
+
+ XTC uses the External Data Representation (xdr) routines for writing and reading data which were created for the Unix Network File System (NFS). XTC files use a reduced precision (lossy) algorithm which works multiplying the coordinates by a scaling factor (typically 1000), so converting them to pm (GROMACS standard distance unit is nm). This allows an integer rounding of the values. Several other tricks are performed, such as making use of atom proximity information: atoms close in sequence are usually close in space (e.g. water molecules). That makes XTC format the most efficient in terms of disk usage, in most cases reducing by a factor of 2 the size of any other binary trajectory format.
+ XTC
+
+
+
+
+
+
+
+
+
+ 1.22
+ Trajectory Next Generation (TNG) is a format for storage of molecular simulation data. It is designed and implemented by the GROMACS development group, and it is called to be the substitute of the XTC format.
+ Trajectory Next Generation format
+
+
+ Fully architecture-independent format, regarding both endianness and the ability to mix single/double precision trajectories and I/O libraries. Self-sufficient, it should not require any other files for reading, and all the data should be contained in a single file for easy transport. Temporal compression of data, improving the compression rate of the previous XTC format. Possibility to store meta-data with information about the simulation. Direct access to a particular frame. Efficient parallel I/O.
+ TNG
+
+
+
+
+
+
+
+
+
+
+ 1.22
+ The XYZ chemical file format is widely supported by many programs, although many slightly different XYZ file formats coexist (Tinker XYZ, UniChem XYZ, etc.). Basic information stored for each atom in the system are x, y and z coordinates and atom element/atomic number.
+
+
+ XYZ files are structured in this way: First line contains the number of atoms in the file. Second line contains a title, comment, or filename. Remaining lines contain atom information. Each line starts with the element symbol, followed by x, y and z coordinates in angstroms separated by whitespace. Multiple molecules or frames can be contained within one file, so it supports trajectory storage. XYZ files can be directly represented by a molecular viewer, as they contain all the basic information needed to build the 3D model.
+ XYZ
+
+
+
+
+
+
+
+
+
+
+ 1.22
+
+ AMBER trajectory (also called mdcrd), with 10 coordinates per line and format F8.3 (fixed point notation with field width 8 and 3 decimal places).
+ AMBER trajectory format
+ inpcrd
+
+
+ mdcrd
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.22
+ true
+ Format of topology files; containing the static information of a structure molecular system that is needed for a molecular simulation.
+ CG topology format
+ MD topology format
+ NA topology format
+ Protein topology format
+
+
+ Many different file formats exist describing structural molecular topology. Typically, each MD package or simulation software works with their own implementation (e.g. GROMACS top, CHARMM psf, AMBER prmtop).
+ Topology format
+
+
+
+
+
+
+
+
+
+
+ 1.22
+
+ GROMACS MD package top textual files define an entire structure system topology, either directly, or by including itp files.
+
+
+ There is currently no tool available for conversion between GROMACS topology format and other formats, due to the internal differences in both approaches. There is, however, a method to convert small molecules parameterized with AMBER force-field into GROMACS format, allowing simulations of these systems with GROMACS MD package.
+ GROMACS top
+
+
+
+
+
+
+
+
+
+
+ 1.22
+
+ AMBER Prmtop file (version 7) is a structure topology text file divided in several sections designed to be parsed easily using simple Fortran code. Each section contains particular topology information, such as atom name, charge, mass, angles, dihedrals, etc.
+ AMBER Parm
+ AMBER Parm7
+ Parm7
+ Prmtop
+ Prmtop7
+
+
+ It can be modified manually, but as the size of the system increases, the hand-editing becomes increasingly complex. AMBER Parameter-Topology file format is used extensively by the AMBER software suite and is referred to as the Prmtop file for short.
+ version 7 is written to distinguish it from old versions of AMBER Prmtop. Similarly to HDF5, it is a completely different format, according to AMBER group: a drastic change to the file format occurred with the 2004 release of Amber 7 (http://ambermd.org/prmtop.pdf)
+ AMBER top
+
+
+
+
+
+
+
+
+
+
+ 1.22
+
+ X-Plor Protein Structure Files (PSF) are structure topology files used by NAMD and CHARMM molecular simulations programs. PSF files contain six main sections of interest: atoms, bonds, angles, dihedrals, improper dihedrals (force terms used to maintain planarity) and cross-terms.
+
+
+ The high similarity in the functional form of the two potential energy functions used by AMBER and CHARMM force-fields gives rise to the possible use of one force-field within the other MD engine. Therefore, the conversion of PSF files to AMBER Prmtop format is possible with the use of AMBER chamber (CHARMM - AMBER) program.
+ PSF
+
+
+
+
+
+
+
+
+
+
+
+ 1.22
+
+ GROMACS itp files (include topology) contain structure topology information, and are typically included in GROMACS topology files (GROMACS top). Itp files are used to define individual (or multiple) components of a topology as a separate file. This is particularly useful if there is a molecule that is used frequently, and also reduces the size of the system topology file, splitting it in different parts.
+
+
+ GROMACS itp files are used also to define position restrictions on the molecule, or to define the force field parameters for a particular ligand.
+ GROMACS itp
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.22
+ Format of force field parameter files, which store the set of parameters (charges, masses, radii, bond lengths, bond dihedrals, etc.) that are essential for the proper description and simulation of a molecular system.
+ Many different file formats exist describing force field parameters. Typically, each MD package or simulation software works with their own implementation (e.g. GROMACS itp, CHARMM rtf, AMBER off / frcmod).
+ FF parameter format
+
+
+
+
+
+
+
+
+
+ 1.22
+ Scripps Research Institute BinPos format is a binary formatted file to store atom coordinates.
+ Scripps Research Institute BinPos
+
+
+ It is basically a translation of the ASCII atom coordinate format to binary code. The only additional information stored is a magic number that identifies the BinPos format and the number of atoms per snapshot. The remainder is the chain of coordinates binary encoded. A drawback of this format is its architecture dependency. Integers and floats codification depends on the architecture, thus it needs to be converted if working in different platforms (little endian, big endian).
+ BinPos
+
+
+
+
+
+
+
+
+
+
+ 1.22
+
+ AMBER coordinate/restart file with 6 coordinates per line and decimal format F12.7 (fixed point notation with field width 12 and 7 decimal places).
+ restrt
+ rst7
+
+
+ RST
+
+
+
+
+
+
+
+
+
+
+ 1.22
+ Format of CHARMM Residue Topology Files (RTF), which define groups by including the atoms, the properties of the group, and bond and charge information.
+
+
+ There is currently no tool available for conversion between GROMACS topology format and other formats, due to the internal differences in both approaches. There is, however, a method to convert small molecules parameterized with AMBER force-field into GROMACS format, allowing simulations of these systems with GROMACS MD package.
+ CHARMM rtf
+
+
+
+
+
+
+
+
+
+ 1.22
+
+ AMBER frcmod (Force field Modification) is a file format to store any modification to the standard force field needed for a particular molecule to be properly represented in the simulation.
+
+
+ AMBER frcmod
+
+
+
+
+
+
+
+
+
+ 1.22
+
+ AMBER Object File Format library files (OFF library files) store residue libraries (forcefield residue parameters).
+ AMBER Object File Format
+ AMBER lib
+ AMBER off
+
+
+
+
+
+
+
+
+
+ 1.22
+ MReData is a text based data standard for processed NMR data. It is relying on SDF molecule data and allows to store assignments of NMR peaks to molecule features. The NMR-extracted data (or "NMReDATA") includes: Chemical shift,scalar coupling, 2D correlation, assignment, etc.
+
+
+ NMReData is a text based data standard for processed NMR data. It is relying on SDF molecule data and allows to store assignments of NMR peaks to molecule features. The NMR-extracted data (or "NMReDATA") includes: Chemical shift,scalar coupling, 2D correlation, assignment, etc. Find more in the paper at https://doi.org/10.1002/mrc.4527.
+ NMReDATA
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.22
+
+
+
+
+ BpForms is a string format for concretely representing the primary structures of biopolymers, including DNA, RNA, and proteins that include non-canonical nucleic and amino acids. See https://www.bpforms.org for more information.
+
+
+ BpForms
+
+
+
+
+
+
+
+
+
+
+ 1.22
+
+ The first 4 bytes of any trr file containing 1993. See https://github.com/galaxyproject/galaxy/pull/6597/files#diff-409951594551183dbf886e24de6cb129R760
+ Format of trr files that contain the trajectory of a simulation experiment used by GROMACS.
+ trr
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.22
+
+
+
+
+
+ msh
+
+
+
+ Mash sketch is a format for sequence / sequence checksum information. To make a sketch, each k-mer in a sequence is hashed, which creates a pseudo-random identifier. By sorting these hashes, a small subset from the top of the sorted list can represent the entire sequence.
+ Mash sketch
+ min-hash sketch
+
+
+ msh
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.23
+
+
+
+ loom
+ The Loom file format is based on HDF5, a standard for storing large numerical datasets. The Loom format is designed to efficiently hold large omics datasets. Typically, such data takes the form of a large matrix of numbers, along with metadata for the rows and columns.
+ Loom
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.23
+
+
+
+ zarray
+ zgroup
+ The Zarr format is an implementation of chunked, compressed, N-dimensional arrays for storing data.
+ Zarr
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.23
+
+
+ mtx
+
+ The Matrix Market matrix (MTX) format stores numerical or pattern matrices in a dense (array format) or sparse (coordinate format) representation.
+ MTX
+
+
+
+
+
+
+
+
+
+
+ 1.24
+
+
+
+
+
+ text/plain
+
+
+ BcForms is a format for abstractly describing the molecular structure (atoms and bonds) of macromolecular complexes as a collection of subunits and crosslinks. Each subunit can be described with BpForms (http://edamontology.org/format_3909) or SMILES (http://edamontology.org/data_2301). BcForms uses an ontology of crosslinks to abstract the chemical details of crosslinks from the descriptions of complexes (see https://bpforms.org/crosslink.html).
+ BcForms is related to http://edamontology.org/format_3909. (BcForms uses BpForms to describe subunits which are DNA, RNA, or protein polymers.) However, that format isn't the parent of BcForms. BcForms is similarly related to SMILES (http://edamontology.org/data_2301).
+ BcForms
+
+
+
+
+
+
+
+
+
+ 1.24
+
+ nq
+ N-Quads is a line-based, plain text format for encoding an RDF dataset. It includes information about the graph each triple belongs to.
+
+
+ N-Quads should not be confused with N-Triples which does not contain graph information.
+ N-Quads
+
+
+
+
+
+
+
+
+
+ 1.25
+
+
+
+
+ json
+ application/json
+
+ Vega is a visualization grammar, a declarative language for creating, saving, and sharing interactive visualization designs. With Vega, you can describe the visual appearance and interactive behavior of a visualization in a JSON format, and generate web-based views using Canvas or SVG.
+
+
+ Vega
+
+
+
+
+
+
+
+
+
+ 1.25
+
+
+
+
+ json
+ application/json
+
+ Vega-Lite is a high-level grammar of interactive graphics. It provides a concise JSON syntax for rapidly generating visualizations to support analysis. Vega-Lite specifications can be compiled to Vega specifications.
+
+
+ Vega-lite
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.25
+
+
+
+
+ application/xml
+
+ A model description language for computational neuroscience.
+
+
+ NeuroML
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.25
+
+
+
+
+ bngl
+ application/xml
+ plain/text
+
+ BioNetGen is a format for the specification and simulation of rule-based models of biochemical systems, including signal transduction, metabolic, and genetic regulatory networks.
+ BioNetGen Language
+
+
+ BNGL
+
+
+
+
+
+
+
+
+ 1.25
+
+
+
+
+ A Docker image is a file, comprised of multiple layers, that is used to execute code in a Docker container. An image is essentially built from the instructions for a complete and executable version of an application, which relies on the host OS kernel.
+
+
+ Docker image
+
+
+
+
+
+
+
+
+
+ 1.25
+
+
+ gfa
+
+ Graphical Fragment Assembly captures sequence graphs as the product of an assembly, a representation of variation in genomes, splice graphs in genes, or even overlap between reads from long-read sequencing technology.
+ Graphical Fragment Assembly (GFA) 1.0
+
+
+
+ GFA 1
+
+
+
+
+
+
+
+
+
+ 1.25
+
+
+ gfa
+
+ Graphical Fragment Assembly captures sequence graphs as the product of an assembly, a representation of variation in genomes, splice graphs in genes, or even overlap between reads from long-read sequencing technology. GFA2 is an update of GFA1 which is not compatible with GFA1.
+ Graphical Fragment Assembly (GFA) 2.0
+
+
+
+ GFA 2
+
+
+
+
+
+
+
+
+ 1.25
+
+
+ xlsx
+ application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
+
+ ObjTables is a toolkit for creating re-usable datasets that are both human and machine-readable, combining the ease of spreadsheets (e.g., Excel workbooks) with the rigor of schemas (classes, their attributes, the type of each attribute, and the possible relationships between instances of classes). ObjTables consists of a format for describing schemas for spreadsheets, numerous data types for science, a syntax for indicating the class and attribute represented by each table and column in a workbook, and software for using schemas to rigorously validate, merge, split, compare, and revision datasets.
+
+
+ ObjTables
+
+
+
+
+
+
+
+
+
+ 1.25
+ contig
+ The CONTIG format used for output of the SOAPdenovo alignment program. It contains contig sequences generated without using mate pair information.
+
+
+ CONTIG
+
+
+
+
+
+
+
+
+
+ 1.25
+ wego
+ WEGO native format used by the Web Gene Ontology Annotation Plot application. Tab-delimited format with gene names and others GO IDs (columns) with one annotation record per line.
+
+
+ WEGO
+
+
+
+
+
+
+
+
+
+ 1.25
+ rpkm
+ Tab-delimited format for gene expression levels table, calculated as Reads Per Kilobase per Million (RPKM) mapped reads.
+ Gene expression levels table format
+
+
+ For example a 1kb transcript with 1000 alignments in a sample of 10 million reads (out of which 8 million reads can be mapped) will have RPKM = 1000/(1 * 8) = 125
+ RPKM
+
+
+
+
+
+
+
+
+ 1.25
+ tar
+ TAR archive file format generated by the Unix-based utility tar.
+ TAR
+ Tarball
+ tar
+
+
+ For example a 1kb transcript with 1000 alignments in a sample of 10 million reads (out of which 8 million reads can be mapped) will have RPKM = 1000/(1 * 8) = 125
+ TAR format
+
+
+
+
+
+
+
+
+
+
+ 1.25
+ chain
+ The CHAIN format describes a pairwise alignment that allow gaps in both sequences simultaneously and is used by the UCSC Genome Browser.
+
+
+ CHAIN
+ https://genome.ucsc.edu/goldenPath/help/chain.html
+
+
+
+
+
+
+
+
+
+ 1.25
+ net
+ The NET file format is used to describe the data that underlie the net alignment annotations in the UCSC Genome Browser.
+
+
+ NET
+ https://genome.ucsc.edu/goldenPath/help/net.html
+
+
+
+
+
+
+
+
+
+ 1.25
+ qmap
+ Format of QMAP files generated for methylation data from an internal BGI pipeline.
+
+
+ QMAP
+
+
+
+
+
+
+
+
+
+ 1.25
+ ga
+ An emerging format for high-level Galaxy workflow description.
+ Galaxy workflow format
+ GalaxyWF
+ ga
+
+
+ gxformat2
+ https://github.com/galaxyproject/gxformat2
+
+
+
+
+
+
+
+
+
+ 1.25
+ wmv
+ The proprietary native video format of various Microsoft programs such as Windows Media Player.
+ Windows Media Video format
+ Windows movie file format
+
+
+ WMV
+
+
+
+
+
+
+
+
+
+ 1.25
+ zip
+ ZIP is an archive file format that supports lossless data compression.
+ ZIP
+
+
+ A ZIP file may contain one or more files or directories that may have been compressed.
+ ZIP format
+
+
+
+
+
+
+
+
+
+
+ 1.25
+ lsm
+ Zeiss' proprietary image format based on TIFF.
+
+
+ LSM files are the default data export for the Zeiss LSM series confocal microscopes (e.g. LSM 510, LSM 710). In addition to the image data, LSM files contain most imaging settings.
+ LSM
+
+
+
+
+
+
+
+
+ 1.25
+ gz
+ gzip
+ GNU zip compressed file format common to Unix-based operating systems.
+ GNU Zip
+ gz
+ gzip
+
+
+ GZIP format
+
+
+
+
+
+
+
+
+
+
+ 1.25
+ avi
+ Audio Video Interleaved (AVI) format is a multimedia container format for AVI files, that allows synchronous audio-with-video playback.
+ Audio Video Interleaved
+
+
+ AVI
+
+
+
+
+
+
+
+
+
+
+ 1.25
+ trackdb
+ A declaration file format for UCSC browsers track dataset display charateristics.
+
+
+ TrackDB
+
+
+
+
+
+
+
+
+
+ 1.25
+ cigar
+ Compact Idiosyncratic Gapped Alignment Report format is a compressed (run-length encoded) pairwise alignment format. It is useful for representing long (e.g. genomic) pairwise alignments.
+ CIGAR
+
+
+ CIGAR format
+ http://wiki.bits.vib.be/index.php/CIGAR/
+
+
+
+
+
+
+
+
+
+ 1.25
+ stl
+ STL is a file format native to the stereolithography CAD software created by 3D Systems. The format is used to save and share surface-rendered 3D images and also for 3D printing.
+ stl
+
+
+ Stereolithography format
+
+
+
+
+
+
+
+
+
+ 1.25
+ u3d
+ U3D (Universal 3D) is a compressed file format and data structure for 3D computer graphics. It contains 3D model information such as triangle meshes, lighting, shading, motion data, lines and points with color and structure.
+ Universal 3D
+ Universal 3D format
+
+
+ U3D
+
+
+
+
+
+
+
+
+
+ 1.25
+ tex
+ Bitmap image format used for storing textures.
+
+
+ Texture files can create the appearance of different surfaces and can be applied to both 2D and 3D objects. Note the file extension .tex is also used for LaTex documents which are a completely different format and they are NOT interchangeable.
+ Texture file format
+
+
+
+
+
+
+
+
+
+ 1.25
+ py
+ Format for scripts writtenin Python - a widely used high-level programming language for general-purpose programming.
+ Python
+ Python program
+ py
+
+
+ Python script
+
+
+
+
+
+
+
+
+
+ 1.25
+ mp4
+ A digital multimedia container format most commonly used to store video and audio.
+ MP4
+
+
+ MPEG-4
+
+
+
+
+
+
+
+
+
+
+ 1.25
+ pl
+ Format for scripts written in Perl - a family of high-level, general-purpose, interpreted, dynamic programming languages.
+ Perl
+ Perl program
+ pl
+
+
+ Perl script
+
+
+
+
+
+
+
+
+
+ 1.25
+ r
+ Format for scripts written in the R language - an open source programming language and software environment for statistical computing and graphics that is supported by the R Foundation for Statistical Computing.
+ R
+ R program
+
+
+ R script
+
+
+
+
+
+
+
+
+
+ 1.25
+ rmd
+ A file format for making dynamic documents (R Markdown scripts) with the R language.
+
+
+ R markdown
+ https://rmarkdown.rstudio.com/articles_intro.html
+
+
+
+
+
+
+
+
+ 1.25
+ This duplicates an existing concept (http://edamontology.org/format_3549).
+ 1.26
+
+ An open file format from the Neuroimaging Informatics Technology Initiative (NIfTI) commonly used to store brain imaging data obtained using Magnetic Resonance Imaging (MRI) methods.
+
+
+ NIFTI format
+ true
+
+
+
+
+
+
+
+
+ 1.25
+ pickle
+ Format used by Python pickle module for serializing and de-serializing a Python object structure.
+
+
+ pickle
+ https://docs.python.org/2/library/pickle.html
+
+
+
+
+
+
+
+
+ 1.25
+ npy
+ The standard binary file format used by NumPy - a fundamental package for scientific computing with Python - for persisting a single arbitrary NumPy array on disk. The format stores all of the shape and dtype information necessary to reconstruct the array correctly.
+ NumPy
+ npy
+
+
+ NumPy format
+
+
+
+
+
+
+
+
+ 1.25
+ repz
+ Format of repertoire (archive) files that can be read by SimToolbox (a MATLAB toolbox for structured illumination fluorescence microscopy) or alternatively extracted with zip file archiver software.
+
+
+ SimTools repertoire file format
+ https://pdfs.semanticscholar.org/5f25/f1cc6cdf2225fe22dc6fd4fc0296d486a85c.pdf
+
+
+
+
+
+
+
+
+ 1.25
+ cfg
+ A configuration file used by various programs to store settings that are specific to their respective software.
+
+
+ Configuration file format
+
+
+
+
+
+
+
+
+ 1.25
+ zst
+ Format used by the Zstandard real-time compression algorithm.
+ Zstandard compression format
+ Zstandard-compressed file format
+ zst
+
+
+ Zstandard format
+ https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md
+
+
+
+
+
+
+
+
+
+ 1.25
+ m
+ The file format for MATLAB scripts or functions.
+ MATLAB
+ m
+
+
+ MATLAB script
+
+
+
+
+
+
+
+ 1.26
+ PEtab
+
+
+ A data format for specifying parameter estimation problems in systems biology.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.26
+
+
+
+ g.vcf
+ g.vcf.gz
+ Genomic Variant Call Format (gVCF) is a version of VCF that includes not only the positions that are variant when compared to a reference genome, but also the non-variant positions as ranges, including metrics of confidence that the positions in the range are actually non-variant e.g. minimum read-depth and genotype quality.
+ g.vcf
+ g.vcf.gz
+
+
+ gVCF
+
+
+
+
+
+
+
+
+
+ 1.26
+
+
+
+
+
+
+
+
+
+ cml
+ Chemical Markup Language (CML) is an XML-based format for encoding detailed information about a wide range of chemical concepts.
+ ChemML
+
+
+ cml
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.26
+
+
+
+
+
+
+
+
+ cif
+ Crystallographic Information File (CIF) is a data exchange standard file format for Crystallographic Information and related Structural Science data.
+
+
+ cif
+
+
+
+
+
+
+
+
+ 1.26
+
+
+
+
+
+
+
+
+
+
+
+ json
+
+ Format for describing the capabilities of a biosimulation tool including the modeling frameworks, simulation algorithms, and modeling formats that it supports, as well as metadata such as a list of the interfaces, programming languages, and operating systems supported by the tool; a link to download the tool; a list of the authors of the tool; and the license to the tool.
+
+
+ BioSimulators format for the specifications of biosimulation tools
+
+
+
+
+
+
+
+
+ 1.26
+
+
+
+ Outlines the syntax and semantics of the input and output arguments for command-line interfaces for biosimulation tools.
+
+
+ BioSimulators standard for command-line interfaces for biosimulation tools
+
+
+
+
+
+
+
+
+
+ 1.26
+ Data format derived from the standard PDB format, which enables user to incorporate parameters for charge and radius to the existing PDB data file.
+
+
+ PQR
+
+
+
+
+
+
+
+
+
+
+ 1.26
+ Data format used in AutoDock 4 for storing atomic coordinates, partial atomic charges and AutoDock atom types for both receptors and ligands.
+
+
+
+ PDBQT
+
+
+
+
+
+
+
+
+ 1.26
+
+
+ msp
+ MSP is a data format for mass spectrometry data.
+
+
+ NIST Text file format for storing MS∕MS spectra (m∕z and intensity of mass peaks) along with additional annotations for each spectrum. A single MSP file can thus contain single or multiple spectra. This format is frequently used to share spectra libraries.
+ MSP
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Function
+ A function that processes a set of inputs and results in a set of outputs, or associates arguments (inputs) with values (outputs).
+ Computational method
+ Computational operation
+ Computational procedure
+ Computational subroutine
+ Function (programming)
+ Lambda abstraction
+ Mathematical function
+ Mathematical operation
+ Computational tool
+ Process
+ sumo:Function
+
+
+ Special cases are: a) An operation that consumes no input (has no input arguments). Such operation is either a constant function, or an operation depending only on the underlying state. b) An operation that may modify the underlying state but has no output. c) The singular-case operation with no input or output, that still may modify the underlying state.
+ Operation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Function
+ Operation is a function that is computational. It typically has input(s) and output(s), which are always data.
+
+
+
+
+ Computational tool
+ Computational tool provides one or more operations.
+
+
+
+
+ Process
+ Process can have a function (as its quality/attribute), and can also perform an operation with inputs and outputs.
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Search or query a data resource and retrieve entries and / or annotation.
+ Database retrieval
+ Query
+
+
+ Query and retrieval
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Search database to retrieve all relevant references to a particular entity or entry.
+
+ Data retrieval (database cross-reference)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Annotate an entity (typically a biological or biomedical database entity) with terms from a controlled vocabulary.
+
+
+ This is a broad concept and is used a placeholder for other, more specific concepts.
+ Annotation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Generate an index of (typically a file of) biological data.
+ Data indexing
+ Database indexing
+
+
+ Indexing
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Analyse an index of biological data.
+
+ Data index analysis
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Retrieve basic information about a molecular sequence.
+
+ Annotation retrieval (sequence)
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Generate a molecular sequence by some means.
+ Sequence generation (nucleic acid)
+ Sequence generation (protein)
+
+
+ Sequence generation
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Edit or change a molecular sequence, either randomly or specifically.
+
+
+ Sequence editing
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Merge two or more (typically overlapping) molecular sequences.
+ Sequence splicing
+ Paired-end merging
+ Paired-end stitching
+ Read merging
+ Read stitching
+
+
+ Sequence merging
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Convert a molecular sequence from one type to another.
+
+
+ Sequence conversion
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate sequence complexity, for example to find low-complexity regions in sequences.
+
+
+ Sequence complexity calculation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate sequence ambiguity, for example identity regions in protein or nucleotide sequences with many ambiguity codes.
+
+
+ Sequence ambiguity calculation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate character or word composition or frequency of a molecular sequence.
+
+
+ Sequence composition calculation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Find and/or analyse repeat sequences in (typically nucleotide) sequences.
+
+
+ Repeat sequences include tandem repeats, inverted or palindromic repeats, DNA microsatellites (Simple Sequence Repeats or SSRs), interspersed repeats, maximal duplications and reverse, complemented and reverse complemented repeats etc. Repeat units can be exact or imperfect, in tandem or dispersed, of specified or unspecified length.
+ Repeat sequence analysis
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Discover new motifs or conserved patterns in sequences or sequence alignments (de-novo discovery).
+ Motif discovery
+
+
+ Motifs and patterns might be conserved or over-represented (occur with improbable frequency).
+ Sequence motif discovery
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Find (scan for) known motifs, patterns and regular expressions in molecular sequence(s).
+ Motif scanning
+ Sequence signature detection
+ Sequence signature recognition
+ Motif detection
+ Motif recognition
+ Motif search
+ Sequence motif detection
+ Sequence motif search
+ Sequence profile search
+
+
+ Sequence motif recognition
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Find motifs shared by molecular sequences.
+
+
+ Sequence motif comparison
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Analyse the sequence, conformational or physicochemical properties of transcription regulatory elements in DNA sequences.
+
+ For example transcription factor binding sites (TFBS) analysis to predict accessibility of DNA to binding factors.
+ Transcription regulatory sequence analysis
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.19
+
+ Identify common, conserved (homologous) or synonymous transcriptional regulatory motifs (transcription factor binding sites).
+
+
+ Conserved transcription regulatory sequence identification
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.18
+
+
+ Extract, calculate or predict non-positional (physical or chemical) properties of a protein from processing a protein (3D) structure.
+
+
+ Protein property calculation (from structure)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse flexibility and motion in protein structure.
+ CG analysis
+ MD analysis
+ Protein Dynamics Analysis
+ Trajectory analysis
+ Nucleic Acid Dynamics Analysis
+ Protein flexibility and motion analysis
+ Protein flexibility prediction
+ Protein motion prediction
+
+
+ Use this concept for analysis of flexible and rigid residues, local chain deformability, regions undergoing conformational change, molecular vibrations or fluctuational dynamics, domain motions or other large-scale structural transitions in a protein structure.
+ Simulation analysis
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identify or screen for 3D structural motifs in protein structure(s).
+ Protein structural feature identification
+ Protein structural motif recognition
+
+
+ This includes conserved substructures and conserved geometry, such as spatial arrangement of secondary structure or protein backbone. Methods might use structure alignment, structural templates, searches for similar electrostatic potential and molecular surface shape, surface-mapping of phylogenetic information etc.
+ Structural motif discovery
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identify structural domains in a protein structure from first principles (for example calculations on structural compactness).
+
+
+ Protein domain recognition
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse the architecture (spatial arrangement of secondary structure) of protein structure(s).
+
+
+ Protein architecture analysis
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ WHATIF: SymShellFiveXML
+ WHATIF: SymShellOneXML
+ WHATIF: SymShellTenXML
+ WHATIF: SymShellTwoXML
+ WHATIF:ListContactsNormal
+ WHATIF:ListContactsRelaxed
+ WHATIF:ListSideChainContactsNormal
+ WHATIF:ListSideChainContactsRelaxed
+ Calculate or extract inter-atomic, inter-residue or residue-atom contacts, distances and interactions in protein structure(s).
+
+
+ Residue interaction calculation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ WHATIF:CysteineTorsions
+ WHATIF:ResidueTorsions
+ WHATIF:ResidueTorsionsBB
+ WHATIF:ShowTauAngle
+ Calculate, visualise or analyse phi/psi angles of a protein structure.
+ Backbone torsion angle calculation
+ Cysteine torsion angle calculation
+ Tau angle calculation
+ Torsion angle calculation
+
+
+ Protein geometry calculation
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Extract, calculate or predict non-positional (physical or chemical) properties of a protein, including any non-positional properties of the molecular sequence, from processing a protein sequence or 3D structure.
+ Protein property rendering
+ Protein property calculation (from sequence)
+ Protein property calculation (from structure)
+ Protein structural property calculation
+ Structural property calculation
+
+
+ This includes methods to render and visualise the properties of a protein sequence, and a residue-level search for properties such as solvent accessibility, hydropathy, secondary structure, ligand-binding etc.
+ Protein property calculation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Immunogen design
+ Predict antigenicity, allergenicity / immunogenicity, allergic cross-reactivity etc of peptides and proteins.
+ Antigenicity prediction
+ Immunogenicity prediction
+ B cell peptide immunogenicity prediction
+ Hopp and Woods plotting
+ MHC peptide immunogenicity prediction
+
+
+ Immunological system are cellular or humoral. In vaccine design to induces a cellular immune response, methods must search for antigens that can be recognized by the major histocompatibility complex (MHC) molecules present in T lymphocytes. If a humoral response is required, antigens for B cells must be identified.
+ This includes methods that generate a graphical rendering of antigenicity of a protein, such as a Hopp and Woods plot.
+ This is usually done in the development of peptide-specific antibodies or multi-epitope vaccines. Methods might use sequence data (for example motifs) and / or structural data.
+ Peptide immunogenicity prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict, recognise and identify positional features in molecular sequences such as key functional sites or regions.
+ Sequence feature prediction
+ Sequence feature recognition
+ Motif database search
+ SO:0000110
+
+
+ Look at "Protein feature detection" (http://edamontology.org/operation_3092) and "Nucleic acid feature detection" (http://edamontology.org/operation_0415) in case more specific terms are needed.
+ Sequence feature detection
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Extract a sequence feature table from a sequence database entry.
+
+ Data retrieval (feature table)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Query the features (in a feature table) of molecular sequence(s).
+
+ Feature table query
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Compare the feature tables of two or more molecular sequences.
+ Feature comparison
+ Feature table comparison
+
+
+ Sequence feature comparison
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Display basic information about a sequence alignment.
+
+ Data retrieval (sequence alignment)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse a molecular sequence alignment.
+
+
+ Sequence alignment analysis
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Compare (typically by aligning) two molecular sequence alignments.
+
+
+ See also 'Sequence profile alignment'.
+ Sequence alignment comparison
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Convert a molecular sequence alignment from one type to another (for example amino acid to coding nucleotide sequence).
+
+
+ Sequence alignment conversion
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Process (read and / or write) physicochemical property data of nucleic acids.
+
+ Nucleic acid property processing
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate or predict physical or chemical properties of nucleic acid molecules, including any non-positional properties of the molecular sequence.
+
+
+ Nucleic acid property calculation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict splicing alternatives or transcript isoforms from analysis of sequence data.
+ Alternative splicing analysis
+ Alternative splicing detection
+ Differential splicing analysis
+ Splice transcript prediction
+
+
+ Alternative splicing prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Detect frameshifts in DNA sequences, including frameshift sites and signals, and frameshift errors from sequencing projects.
+ Frameshift error detection
+
+
+ Methods include sequence alignment (if related sequences are available) and word-based sequence comparison.
+ Frameshift detection
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Detect vector sequences in nucleotide sequence, typically by comparison to a set of known vector sequences.
+
+
+ Vector sequence detection
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict secondary structure of protein sequences.
+ Secondary structure prediction (protein)
+
+
+ Methods might use amino acid composition, local sequence information, multiple sequence alignments, physicochemical features, estimated energy content, statistical algorithms, hidden Markov models, support vector machines, kernel machines, neural networks etc.
+ Protein secondary structure prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict super-secondary structure of protein sequence(s).
+
+
+ Super-secondary structures include leucine zippers, coiled coils, Helix-Turn-Helix etc.
+ Protein super-secondary structure prediction
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict and/or classify transmembrane proteins or transmembrane (helical) domains or regions in protein sequences.
+
+
+ Transmembrane protein prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse transmembrane protein(s), typically by processing sequence and / or structural data, and write an informative report for example about the protein and its transmembrane domains / regions.
+
+
+ Use this (or child) concept for analysis of transmembrane domains (buried and exposed faces), transmembrane helices, helix topology, orientation, inter-helical contacts, membrane dipping (re-entrant) loops and other secondary structure etc. Methods might use pattern discovery, hidden Markov models, sequence alignment, structural profiles, amino acid property analysis, comparison to known domains or some combination (hybrid methods).
+ Transmembrane protein analysis
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ This is a "organisational class" not very useful for annotation per se.
+ 1.19
+
+
+
+
+ Predict tertiary structure of a molecular (biopolymer) sequence.
+
+ Structure prediction
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict contacts, non-covalent interactions and distance (constraints) between amino acids in protein sequences.
+ Residue interaction prediction
+ Contact map prediction
+ Protein contact map prediction
+
+
+ Methods usually involve multiple sequence alignment analysis.
+ Residue contact prediction
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.19
+
+ Analyse experimental protein-protein interaction data from for example yeast two-hybrid analysis, protein microarrays, immunoaffinity chromatography followed by mass spectrometry, phage display etc.
+
+
+ Protein interaction raw data analysis
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Identify or predict protein-protein interactions, interfaces, binding sites etc in protein sequences.
+
+
+ Protein-protein interaction prediction (from protein sequence)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Identify or predict protein-protein interactions, interfaces, binding sites etc in protein structures.
+
+
+ Protein-protein interaction prediction (from protein structure)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse a network of protein interactions.
+ Protein interaction network comparison
+
+
+ Protein interaction network analysis
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Notions of pathway and network were mixed up, EDAM 1.24 disentangles them.
+ 1.24
+
+
+
+ Compare two or more biological pathways or networks.
+
+ Pathway or network comparison
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict RNA secondary structure (for example knots, pseudoknots, alternative structures etc).
+ RNA shape prediction
+
+
+ Methods might use RNA motifs, predicted intermolecular contacts, or RNA sequence-structure compatibility (inverse RNA folding).
+ RNA secondary structure prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse some aspect of RNA/DNA folding, typically by processing sequence and/or structural data. For example, compute folding energies such as minimum folding energies for DNA or RNA sequences or energy landscape of RNA mutants.
+ Nucleic acid folding
+ Nucleic acid folding modelling
+ Nucleic acid folding prediction
+ Nucleic acid folding energy calculation
+
+
+ Nucleic acid folding analysis
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Retrieve information on restriction enzymes or restriction enzyme sites.
+
+ Data retrieval (restriction enzyme annotation)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Identify genetic markers in DNA sequences.
+
+ A genetic marker is any DNA sequence of known chromosomal location that is associated with and specific to a particular gene or trait. This includes short sequences surrounding a SNP, Sequence-Tagged Sites (STS) which are well suited for PCR amplification, a longer minisatellites sequence etc.
+ Genetic marker identification
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Generate a genetic (linkage) map of a DNA sequence (typically a chromosome) showing the relative positions of genetic markers based on estimation of non-physical distances.
+ Functional mapping
+ Genetic cartography
+ Genetic map construction
+ Genetic map generation
+ Linkage mapping
+ QTL mapping
+
+
+ Mapping involves ordering genetic loci along a chromosome and estimating the physical distance between loci. A genetic map shows the relative (not physical) position of known genes and genetic markers.
+ This includes mapping of the genetic architecture of dynamic complex traits (functional mapping), e.g. by characterisation of the underlying quantitative trait loci (QTLs) or nucleotides (QTNs).
+ Genetic mapping
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse genetic linkage.
+
+
+ For example, estimate how close two genes are on a chromosome by calculating how often they are transmitted together to an offspring, ascertain whether two genes are linked and parental linkage, calculate linkage map distance etc.
+ Linkage analysis
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate codon usage statistics and create a codon usage table.
+ Codon usage table construction
+
+
+ Codon usage table generation
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Compare two or more codon usage tables.
+
+
+ Codon usage table comparison
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse codon usage in molecular sequences or process codon usage data (e.g. a codon usage table).
+ Codon usage data analysis
+ Codon usage table analysis
+
+
+ Codon usage analysis
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identify and plot third base position variability in a nucleotide sequence.
+
+
+ Base position variability plotting
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Find exact character or word matches between molecular sequences without full sequence alignment.
+
+
+ Sequence word comparison
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate a sequence distance matrix or otherwise estimate genetic distances between molecular sequences.
+ Phylogenetic distance matrix generation
+ Sequence distance calculation
+ Sequence distance matrix construction
+
+
+ Sequence distance matrix generation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Compare two or more molecular sequences, identify and remove redundant sequences based on some criteria.
+
+
+ Sequence redundancy removal
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Build clusters of similar sequences, typically using scores from pair-wise alignment or other comparison of the sequences.
+ Sequence cluster construction
+ Sequence cluster generation
+
+
+ The clusters may be output or used internally for some other purpose.
+ Sequence clustering
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Includes methods that align sequence profiles (representing sequence alignments): ethods might perform one-to-one, one-to-many or many-to-many comparisons. See also 'Sequence alignment comparison'.
+ Align (identify equivalent sites within) molecular sequences.
+ Sequence alignment construction
+ Sequence alignment generation
+ Consensus-based sequence alignment
+ Constrained sequence alignment
+ Multiple sequence alignment (constrained)
+ Sequence alignment (constrained)
+
+
+ See also "Read mapping"
+ Sequence alignment
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Align two or more molecular sequences of different types (for example genomic DNA to EST, cDNA or mRNA).
+
+ Hybrid sequence alignment construction
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Align molecular sequences using sequence and structural information.
+ Sequence alignment (structure-based)
+
+
+ Structure-based sequence alignment
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Includes methods that align structural (3D) profiles or templates (representing structures or structure alignments) - including methods that perform one-to-one, one-to-many or many-to-many comparisons.
+ Align (superimpose) molecular tertiary structures.
+ Structural alignment
+ 3D profile alignment
+ 3D profile-to-3D profile alignment
+ Structural profile alignment
+
+
+ Structure alignment
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Generate some type of sequence profile (for example a hidden Markov model) from a sequence alignment.
+ Sequence profile construction
+
+
+ Sequence profile generation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Generate some type of structural (3D) profile or template from a structure or structure alignment.
+ Structural profile construction
+ Structural profile generation
+
+
+ 3D profile generation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.19
+
+ Align sequence profiles (representing sequence alignments).
+
+
+ Profile-profile alignment
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.19
+
+ Align structural (3D) profiles or templates (representing structures or structure alignments).
+
+
+ 3D profile-to-3D profile alignment
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Align molecular sequence(s) to sequence profile(s), or profiles to other profiles. A profile typically represents a sequence alignment.
+ Profile-profile alignment
+ Profile-to-profile alignment
+ Sequence-profile alignment
+ Sequence-to-profile alignment
+
+
+ A sequence profile typically represents a sequence alignment. Methods might perform one-to-one, one-to-many or many-to-many comparisons.
+ Sequence profile alignment
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.19
+
+ Align molecular sequence(s) to structural (3D) profile(s) or template(s) (representing a structure or structure alignment).
+
+
+ Sequence-to-3D-profile alignment
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ This includes sequence-to-3D-profile alignment methods, which align molecular sequence(s) to structural (3D) profile(s) or template(s) (representing a structure or structure alignment) - methods might perform one-to-one, one-to-many or many-to-many comparisons.
+ Align molecular sequence to structure in 3D space (threading).
+ Sequence-structure alignment
+ Sequence-3D profile alignment
+ Sequence-to-3D-profile alignment
+
+
+ Use this concept for methods that evaluate sequence-structure compatibility by assessing residue interactions in 3D. Methods might perform one-to-one, one-to-many or many-to-many comparisons.
+ Protein threading
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Recognize (predict and identify) known protein structural domains or folds in protein sequence(s) which (typically) are not accompanied by any significant sequence similarity to know structures.
+ Domain prediction
+ Fold prediction
+ Protein domain prediction
+ Protein fold prediction
+ Protein fold recognition
+
+
+ Methods use some type of mapping between sequence and fold, for example secondary structure prediction and alignment, profile comparison, sequence properties, homologous sequence search, kernel machines etc. Domains and folds might be taken from SCOP or CATH.
+ Fold recognition
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ (jison)Too fine-grained, the operation (Data retrieval) hasn't changed, just what is retrieved.
+ 1.17
+
+ Search for and retrieve data concerning or describing some core data, as distinct from the primary data that is being described.
+
+
+ This includes documentation, general information and other metadata on entities such as databases, database entries and tools.
+ Metadata retrieval
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Query scientific literature, in search for articles, article data, concepts, named entities, or for statistics.
+
+
+ Literature search
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Text analysis
+ Process and analyse text (typically scientific literature) to extract information from it.
+ Literature mining
+ Text analytics
+ Text data mining
+ Article analysis
+ Literature analysis
+
+
+ Text mining
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Perform in-silico (virtual) PCR.
+
+
+ Virtual PCR
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Design or predict oligonucleotide primers for PCR and DNA amplification etc.
+ PCR primer prediction
+ Primer design
+ PCR primer design (based on gene structure)
+ PCR primer design (for conserved primers)
+ PCR primer design (for gene transcription profiling)
+ PCR primer design (for genotyping polymorphisms)
+ PCR primer design (for large scale sequencing)
+ PCR primer design (for methylation PCRs)
+ Primer quality estimation
+
+
+ Primer design involves predicting or selecting primers that are specific to a provided PCR template. Primers can be designed with certain properties such as size of product desired, primer size etc. The output might be a minimal or overlapping primer set.
+ This includes predicting primers based on gene structure, promoters, exon-exon junctions, predicting primers that are conserved across multiple genomes or species, primers for for gene transcription profiling, for genotyping polymorphisms, for example single nucleotide polymorphisms (SNPs), for large scale sequencing, or for methylation PCRs.
+ PCR primer design
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict and/or optimize oligonucleotide probes for DNA microarrays, for example for transcription profiling of genes, or for genomes and gene families.
+ Microarray probe prediction
+
+
+ Microarray probe design
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Combine (align and merge) overlapping fragments of a DNA sequence to reconstruct the original sequence.
+ Metagenomic assembly
+ Sequence assembly editing
+
+
+ For example, assemble overlapping reads from paired-end sequencers into contigs (a contiguous sequence corresponding to read overlaps). Or assemble contigs, for example ESTs and genomic DNA fragments, depending on the detected fragment overlaps.
+ Sequence assembly
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.16
+
+ Standardize or normalize microarray data.
+
+
+ Microarray data standardisation and normalisation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Process (read and / or write) SAGE, MPSS or SBS experimental data.
+
+ Sequencing-based expression profile data processing
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Perform cluster analysis of expression data to identify groups with similar expression profiles, for example by clustering.
+ Gene expression clustering
+ Gene expression profile clustering
+
+
+ Expression profile clustering
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The measurement of the activity (expression) of multiple genes in a cell, tissue, sample etc., in order to get an impression of biological function.
+ Feature expression analysis
+ Functional profiling
+ Gene expression profile construction
+ Gene expression profile generation
+ Gene expression quantification
+ Gene transcription profiling
+ Non-coding RNA profiling
+ Protein profiling
+ RNA profiling
+ mRNA profiling
+
+
+ Gene expression profiling generates some sort of gene expression profile, for example from microarray data.
+ Gene expression profiling
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Comparison of expression profiles.
+ Gene expression comparison
+ Gene expression profile comparison
+
+
+ Expression profile comparison
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+ Interpret (in functional terms) and annotate gene expression data.
+
+
+ Functional profiling
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Analyse EST or cDNA sequences.
+
+ For example, identify full-length cDNAs from EST sequences or detect potential EST antisense transcripts.
+ EST and cDNA sequence analysis
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identify and select targets for protein structural determination.
+
+ Methods will typically navigate a graph of protein families of known structure.
+ Structural genomics target selection
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Includes secondary structure assignment from circular dichroism (CD) spectroscopic data, and from protein coordinate data.
+ Assign secondary structure from protein coordinate or experimental data.
+
+
+ Protein secondary structure assignment
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Assign a protein tertiary structure (3D coordinates), or other aspects of protein structure, from raw experimental data.
+ NOE assignment
+ Structure calculation
+
+
+ Protein structure assignment
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ WHATIF: CorrectedPDBasXML
+ WHATIF: UseFileDB
+ WHATIF: UseResidueDB
+ Evaluate the quality or correctness a protein three-dimensional model.
+ Protein model validation
+ Residue validation
+
+
+ Model validation might involve checks for atomic packing, steric clashes (bumps), volume irregularities, agreement with electron density maps, number of amino acid residues, percentage of residues with missing or bad atoms, irregular Ramachandran Z-scores, irregular Chi-1 / Chi-2 normality scores, RMS-Z score on bonds and angles etc.
+ The PDB file format has had difficulties, inconsistencies and errors. Corrections can include identifying a meaningful sequence, removal of alternate atoms, correction of nomenclature problems, removal of incomplete residues and spurious waters, addition or removal of water, modelling of missing side chains, optimisation of cysteine bonds, regularisation of bond lengths, bond angles and planarities etc.
+ This includes methods that calculate poor quality residues. The scoring function to identify poor quality residues may consider residues with bad atoms or atoms with high B-factor, residues in the N- or C-terminal position, adjacent to an unstructured residue, non-canonical residues, glycine and proline (or adjacent to these such residues).
+ Protein structure validation
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ WHATIF: CorrectedPDBasXML
+ Refine (after evaluation) a model of a molecular structure (typically a protein structure) to reduce steric clashes, volume irregularities etc.
+ Protein model refinement
+
+
+ Molecular model refinement
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Construct a phylogenetic tree.
+ Phlyogenetic tree construction
+ Phylogenetic reconstruction
+ Phylogenetic tree generation
+
+
+ Phylogenetic trees are usually constructed from a set of sequences from which an alignment (or data matrix) is calculated.
+ Phylogenetic inference
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse an existing phylogenetic tree or trees, typically to detect features or make predictions.
+ Phylogenetic tree analysis
+ Phylogenetic modelling
+
+
+ Phylgenetic modelling is the modelling of trait evolution and prediction of trait values using phylogeny as a basis.
+ Phylogenetic analysis
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Compare two or more phylogenetic trees.
+
+
+ For example, to produce a consensus tree, subtrees, supertrees, calculate distances between trees or test topological similarity between trees (e.g. a congruence index) etc.
+ Phylogenetic tree comparison
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Edit a phylogenetic tree.
+
+
+ Phylogenetic tree editing
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Comparison of a DNA sequence to orthologous sequences in different species and inference of a phylogenetic tree, in order to identify regulatory elements such as transcription factor binding sites (TFBS).
+ Phylogenetic shadowing
+
+
+ Phylogenetic shadowing is a type of footprinting where many closely related species are used. A phylogenetic 'shadow' represents the additive differences between individual sequences. By masking or 'shadowing' variable positions a conserved sequence is produced with few or none of the variations, which is then compared to the sequences of interest to identify significant regions of conservation.
+ Phylogenetic footprinting
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.20
+
+ Simulate the folding of a protein.
+
+
+ Protein folding simulation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.19
+
+ Predict the folding pathway(s) or non-native structural intermediates of a protein.
+
+
+ Protein folding pathway prediction
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Map and model the effects of single nucleotide polymorphisms (SNPs) on protein structure(s).
+
+
+ Protein SNP mapping
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict the effect of point mutation on a protein structure, in terms of strucural effects and protein folding, stability and function.
+ Variant functional prediction
+ Protein SNP mapping
+ Protein mutation modelling
+ Protein stability change prediction
+
+
+ Protein SNP mapping maps and modesl the effects of single nucleotide polymorphisms (SNPs) on protein structure(s). Methods might predict silent or pathological mutations.
+ Variant effect prediction
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+ Design molecules that elicit an immune response (immunogens).
+
+
+ Immunogen design
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.18
+
+ Predict and optimise zinc finger protein domains for DNA/RNA binding (for example for transcription factors and nucleases).
+
+
+ Zinc finger prediction
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate Km, Vmax and derived data for an enzyme reaction.
+
+
+ Enzyme kinetics calculation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Reformat a file of data (or equivalent entity in memory).
+ File format conversion
+ File formatting
+ File reformatting
+ Format conversion
+ Reformatting
+
+
+ Formatting
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Test and validate the format and content of a data file.
+ File format validation
+
+
+ Format validation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Visualise, plot or render (graphically) biomolecular data such as molecular sequences or structures.
+ Data visualisation
+ Rendering
+ Molecular visualisation
+ Plotting
+
+
+ This includes methods to render and visualise molecules.
+ Visualisation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Search a sequence database by sequence comparison and retrieve similar sequences. Sequences matching a given sequence motif or pattern, such as a Prosite pattern or regular expression.
+
+
+ This excludes direct retrieval methods (e.g. the dbfetch program).
+ Sequence database search
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Search a tertiary structure database, typically by sequence and/or structure comparison, or some other means, and retrieve structures and associated data.
+
+
+ Structure database search
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Search a secondary protein database (of classification information) to assign a protein sequence(s) to a known protein family or group.
+
+
+ Protein secondary database search
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+
+ Screen a sequence against a motif or pattern database.
+
+ Motif database search
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ Search a database of sequence profiles with a query sequence.
+
+ Sequence profile database search
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Search a database of transmembrane proteins, for example for sequence or structural similarities.
+
+ Transmembrane protein database search
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Query a database and retrieve sequences with a given entry code or accession number.
+
+ Sequence retrieval (by code)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Query a database and retrieve sequences containing a given keyword.
+
+ Sequence retrieval (by keyword)
+ true
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Search a sequence database and retrieve sequences that are similar to a query sequence.
+ Sequence database search (by sequence)
+ Structure database search (by sequence)
+
+
+ Sequence similarity search
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Search a sequence database and retrieve sequences matching a given sequence motif or pattern, such as a Prosite pattern or regular expression.
+
+
+ Sequence database search (by motif or pattern)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Search a sequence database and retrieve sequences of a given amino acid composition.
+
+ Sequence database search (by amino acid composition)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Search a sequence database and retrieve sequences with a specified property, typically a physicochemical or compositional property.
+
+
+ Sequence database search (by property)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Search a sequence database and retrieve sequences that are similar to a query sequence using a word-based method.
+
+ Word-based methods (for example BLAST, gapped BLAST, MEGABLAST, WU-BLAST etc.) are usually quicker than alignment-based methods. They may or may not handle gaps.
+ Sequence database search (by sequence using word-based methods)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Search a sequence database and retrieve sequences that are similar to a query sequence using a sequence profile-based method, or with a supplied profile as query.
+
+ This includes tools based on PSI-BLAST.
+ Sequence database search (by sequence using profile-based methods)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Search a sequence database for sequences that are similar to a query sequence using a local alignment-based method.
+
+ This includes tools based on the Smith-Waterman algorithm or FASTA.
+ Sequence database search (by sequence using local alignment-based methods)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Search sequence(s) or a sequence database for sequences that are similar to a query sequence using a global alignment-based method.
+
+ This includes tools based on the Needleman and Wunsch algorithm.
+ Sequence database search (by sequence using global alignment-based methods)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Search a DNA database (for example a database of conserved sequence tags) for matches to Sequence-Tagged Site (STS) primer sequences.
+
+ STSs are genetic markers that are easily detected by the polymerase chain reaction (PCR) using specific primers.
+ Sequence database search (by sequence for primer sequences)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+ Search sequence(s) or a sequence database for sequences which match a set of peptide masses, for example a peptide mass fingerprint from mass spectrometry.
+
+
+ Sequence database search (by molecular weight)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Search sequence(s) or a sequence database for sequences of a given isoelectric point.
+
+ Sequence database search (by isoelectric point)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Query a tertiary structure database and retrieve entries with a given entry code or accession number.
+
+ Structure retrieval (by code)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Query a tertiary structure database and retrieve entries containing a given keyword.
+
+ Structure retrieval (by keyword)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Search a tertiary structure database and retrieve structures with a sequence similar to a query sequence.
+
+
+ Structure database search (by sequence)
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Search a database of molecular structure and retrieve structures that are similar to a query structure.
+ Structure database search (by structure)
+ Structure retrieval by structure
+
+
+ Structural similarity search
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Annotate a molecular sequence record with terms from a controlled vocabulary.
+
+
+ Sequence annotation
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Annotate a genome sequence with terms from a controlled vocabulary.
+ Functional genome annotation
+ Metagenome annotation
+ Structural genome annotation
+
+
+ Genome annotation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Generate the reverse and / or complement of a nucleotide sequence.
+ Nucleic acid sequence reverse and complement
+ Reverse / complement
+ Reverse and complement
+
+
+ Reverse complement
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Generate a random sequence, for example, with a specific character composition.
+
+
+ Random sequence generation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Generate digest fragments for a nucleotide sequence containing restriction sites.
+ Nucleic acid restriction digest
+
+
+ Restriction digest
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ This is often followed by calculation of protein fragment masses (http://edamontology.org/operation_0398).
+ Cleave a protein sequence into peptide fragments (corresponding to enzymatic or chemical cleavage).
+
+
+ Protein sequence cleavage
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Mutate a molecular sequence a specified amount or shuffle it to produce a randomised sequence with the same overall composition.
+
+
+ Sequence mutation and randomisation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Mask characters in a molecular sequence (replacing those characters with a mask character).
+
+
+ For example, SNPs or repeats in a DNA sequence might be masked.
+ Sequence masking
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Cut (remove) characters or a region from a molecular sequence.
+
+
+ Sequence cutting
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Create (or remove) restriction sites in sequences, for example using silent mutations.
+
+
+ Restriction site creation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Translate a DNA sequence into protein.
+
+
+ DNA translation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Transcribe a nucleotide sequence into mRNA sequence(s).
+
+
+ DNA transcription
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Calculate base frequency or word composition of a nucleotide sequence.
+
+
+ Sequence composition calculation (nucleic acid)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Calculate amino acid frequency or word composition of a protein sequence.
+
+
+ Sequence composition calculation (protein)
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Find (and possibly render) short repetitive subsequences (repeat sequences) in (typically nucleotide) sequences.
+
+
+ Repeat sequence detection
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse repeat sequence organisation such as periodicity.
+
+
+ Repeat sequence organisation analysis
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Analyse the hydrophobic, hydrophilic or charge properties of a protein structure.
+
+
+ Protein hydropathy calculation (from structure)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ WHATIF:AtomAccessibilitySolvent
+ WHATIF:AtomAccessibilitySolventPlus
+ Calculate solvent accessible or buried surface areas in protein or other molecular structures.
+ Protein solvent accessibility calculation
+
+
+ Solvent accessibility might be calculated for the backbone, sidechain and total (backbone plus sidechain).
+ Accessible surface calculation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Identify clusters of hydrophobic or charged residues in a protein structure.
+
+
+ Protein hydropathy cluster calculation
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate whether a protein structure has an unusually large net charge (dipole moment).
+
+
+ Protein dipole moment calculation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ WHATIF:AtomAccessibilityMolecular
+ WHATIF:AtomAccessibilityMolecularPlus
+ WHATIF:ResidueAccessibilityMolecular
+ WHATIF:ResidueAccessibilitySolvent
+ WHATIF:ResidueAccessibilityVacuum
+ WHATIF:ResidueAccessibilityVacuumMolecular
+ WHATIF:TotAccessibilityMolecular
+ WHATIF:TotAccessibilitySolvent
+ Calculate the molecular surface area in proteins and other macromolecules.
+ Protein atom surface calculation
+ Protein residue surface calculation
+ Protein surface and interior calculation
+ Protein surface calculation
+
+
+ Molecular surface calculation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Identify or predict catalytic residues, active sites or other ligand-binding sites in protein structures.
+
+
+ Protein binding site prediction (from structure)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse the interaction of protein with nucleic acids, e.g. RNA or DNA-binding sites, interfaces etc.
+ Protein-nucleic acid binding site analysis
+ Protein-DNA interaction analysis
+ Protein-RNA interaction analysis
+
+
+ Protein-nucleic acid interaction analysis
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Decompose a structure into compact or globular fragments (protein peeling).
+
+
+ Protein peeling
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate a matrix of distance between residues (for example the C-alpha atoms) in a protein structure.
+
+
+ Protein distance matrix calculation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate a residue contact map (typically all-versus-all inter-residue contacts) for a protein structure.
+ Protein contact map calculation
+
+
+ Contact map calculation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate clusters of contacting residues in protein structures.
+
+
+ This includes for example clusters of hydrophobic or charged residues, or clusters of contacting residues which have a key structural or functional role.
+ Residue cluster calculation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ WHATIF:HasHydrogenBonds
+ WHATIF:ShowHydrogenBonds
+ WHATIF:ShowHydrogenBondsM
+ Identify potential hydrogen bonds between amino acids and other groups.
+
+
+ The output might include the atoms involved in the bond, bond geometric parameters and bond enthalpy.
+ Hydrogen bond calculation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+
+ Calculate non-canonical atomic interactions in protein structures.
+
+ Residue non-canonical interaction detection
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate a Ramachandran plot of a protein structure.
+
+
+ Ramachandran plot calculation
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.22
+
+ Validate a Ramachandran plot of a protein structure.
+
+
+ Ramachandran plot validation
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate the molecular weight of a protein sequence or fragments.
+ Peptide mass calculation
+
+
+ Protein molecular weight calculation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict extinction coefficients or optical density of a protein sequence.
+
+
+ Protein extinction coefficient calculation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate pH-dependent properties from pKa calculations of a protein sequence.
+ Protein pH-dependent property calculation
+
+
+ Protein pKa calculation
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Hydropathy calculation on a protein sequence.
+
+
+ Protein hydropathy calculation (from sequence)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Plot a protein titration curve.
+
+
+ Protein titration curve plotting
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate isoelectric point of a protein sequence.
+
+
+ Protein isoelectric point calculation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Estimate hydrogen exchange rate of a protein sequence.
+
+
+ Protein hydrogen exchange rate calculation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate hydrophobic or hydrophilic / charged regions of a protein sequence.
+
+
+ Protein hydrophobic region calculation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate aliphatic index (relative volume occupied by aliphatic side chains) of a protein.
+
+
+ Protein aliphatic index calculation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate the hydrophobic moment of a peptide sequence and recognize amphiphilicity.
+
+
+ Hydrophobic moment is a peptides hydrophobicity measured for different angles of rotation.
+ Protein hydrophobic moment plotting
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict the stability or globularity of a protein sequence, whether it is intrinsically unfolded etc.
+
+
+ Protein globularity prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict the solubility or atomic solvation energy of a protein sequence.
+
+
+ Protein solubility prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict crystallizability of a protein sequence.
+
+
+ Protein crystallizability prediction
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ (jison)Too fine-grained.
+ 1.17
+
+ Detect or predict signal peptides (and typically predict subcellular localisation) of eukaryotic proteins.
+
+
+ Protein signal peptide detection (eukaryotes)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ (jison)Too fine-grained.
+ 1.17
+
+ Detect or predict signal peptides (and typically predict subcellular localisation) of bacterial proteins.
+
+
+ Protein signal peptide detection (bacteria)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Predict MHC class I or class II binding peptides, promiscuous binding peptides, immunogenicity etc.
+
+
+ MHC peptide immunogenicity prediction
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Predict, recognise and identify positional features in protein sequences such as functional sites or regions and secondary structure.
+
+ Methods typically involve scanning for known motifs, patterns and regular expressions.
+ Protein feature prediction (from sequence)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict, recognise and identify features in nucleotide sequences such as functional sites or regions, typically by scanning for known motifs, patterns and regular expressions.
+ Sequence feature detection (nucleic acid)
+ Nucleic acid feature prediction
+ Nucleic acid feature recognition
+ Nucleic acid site detection
+ Nucleic acid site prediction
+ Nucleic acid site recognition
+
+
+ Methods typically involve scanning for known motifs, patterns and regular expressions.
+ This is placeholder but does not comprehensively include all child concepts - please inspect other concepts under "Nucleic acid sequence analysis" for example "Gene prediction", for other feature detection operations.
+ Nucleic acid feature detection
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict antigenic determinant sites (epitopes) in protein sequences.
+ Antibody epitope prediction
+ Epitope prediction
+ B cell epitope mapping
+ B cell epitope prediction
+ Epitope mapping (MHC Class I)
+ Epitope mapping (MHC Class II)
+ T cell epitope mapping
+ T cell epitope prediction
+
+
+ Epitope mapping is commonly done during vaccine design.
+ Epitope mapping
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict post-translation modification sites in protein sequences.
+ PTM analysis
+ PTM prediction
+ PTM site analysis
+ Post-translation modification site prediction
+ Post-translational modification analysis
+ PTM site prediction
+ Protein post-translation modification site prediction
+ Acetylation prediction
+ Acetylation site prediction
+ Dephosphorylation prediction
+ Dephosphorylation site prediction
+ GPI anchor prediction
+ GPI anchor site prediction
+ GPI modification prediction
+ GPI modification site prediction
+ Glycosylation prediction
+ Glycosylation site prediction
+ Hydroxylation prediction
+ Hydroxylation site prediction
+ Methylation prediction
+ Methylation site prediction
+ N-myristoylation prediction
+ N-myristoylation site prediction
+ N-terminal acetylation prediction
+ N-terminal acetylation site prediction
+ N-terminal myristoylation prediction
+ N-terminal myristoylation site prediction
+ Palmitoylation prediction
+ Palmitoylation site prediction
+ Phosphoglycerylation prediction
+ Phosphoglycerylation site prediction
+ Phosphorylation prediction
+ Phosphorylation site prediction
+ Phosphosite localization
+ Prenylation prediction
+ Prenylation site prediction
+ Pupylation prediction
+ Pupylation site prediction
+ S-nitrosylation prediction
+ S-nitrosylation site prediction
+ S-sulfenylation prediction
+ S-sulfenylation site prediction
+ Succinylation prediction
+ Succinylation site prediction
+ Sulfation prediction
+ Sulfation site prediction
+ Sumoylation prediction
+ Sumoylation site prediction
+ Tyrosine nitration prediction
+ Tyrosine nitration site prediction
+ Ubiquitination prediction
+ Ubiquitination site prediction
+
+
+ Methods might predict sites of methylation, N-terminal myristoylation, N-terminal acetylation, sumoylation, palmitoylation, phosphorylation, sulfation, glycosylation, glycosylphosphatidylinositol (GPI) modification sites (GPI lipid anchor signals) etc.
+ Post-translational modification site prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Detect or predict signal peptides and signal peptide cleavage sites in protein sequences.
+
+
+ Methods might use sequence motifs and features, amino acid composition, profiles, machine-learned classifiers, etc.
+ Protein signal peptide detection
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Predict catalytic residues, active sites or other ligand-binding sites in protein sequences.
+
+
+ Protein binding site prediction (from sequence)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict or detect RNA and DNA-binding binding sites in protein sequences.
+ Protein-nucleic acid binding detection
+ Protein-nucleic acid binding prediction
+ Protein-nucleic acid binding site detection
+ Protein-nucleic acid binding site prediction
+ Zinc finger prediction
+
+
+ This includes methods that predict and optimise zinc finger protein domains for DNA/RNA binding (for example for transcription factors and nucleases).
+ Nucleic acids-binding site prediction
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.20
+
+ Predict protein sites that are key to protein folding, such as possible sites of nucleation or stabilisation.
+
+
+ Protein folding site prediction
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Detect or predict cleavage sites (enzymatic or chemical) in protein sequences.
+
+
+ Protein cleavage site prediction
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Predict epitopes that bind to MHC class I molecules.
+
+
+ Epitope mapping (MHC Class I)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Predict epitopes that bind to MHC class II molecules.
+
+
+ Epitope mapping (MHC Class II)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Detect, predict and identify whole gene structure in DNA sequences. This includes protein coding regions, exon-intron structure, regulatory regions etc.
+
+
+ Whole gene prediction
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Detect, predict and identify genetic elements such as promoters, coding regions, splice sites, etc in DNA sequences.
+
+
+ Methods for gene prediction might be ab initio, based on phylogenetic comparisons, use motifs, sequence features, support vector machine, alignment etc.
+ Gene component prediction
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Detect or predict transposons, retrotransposons / retrotransposition signatures etc.
+
+
+ Transposon prediction
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Detect polyA signals in nucleotide sequences.
+ PolyA detection
+ PolyA prediction
+ PolyA signal prediction
+ Polyadenylation signal detection
+ Polyadenylation signal prediction
+
+
+ PolyA signal detection
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Detect quadruplex-forming motifs in nucleotide sequences.
+ Quadruplex structure prediction
+
+
+ Quadruplex (4-stranded) structures are formed by guanine-rich regions and are implicated in various important biological processes and as therapeutic targets.
+ Quadruplex formation site detection
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Find CpG rich regions in a nucleotide sequence or isochores in genome sequences.
+ CpG island and isochores detection
+ CpG island and isochores rendering
+
+
+ An isochore is long region (> 3 KB) of DNA with very uniform GC content, in contrast to the rest of the genome. Isochores tend tends to have more genes, higher local melting or denaturation temperatures, and different flexibility. Methods might calculate fractional GC content or variation of GC content, predict methylation status of CpG islands etc. This includes methods that visualise CpG rich regions in a nucleotide sequence, for example plot isochores in a genome sequence.
+ CpG island and isochore detection
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Find and identify restriction enzyme cleavage sites (restriction sites) in (typically) DNA sequences, for example to generate a restriction map.
+
+
+ Restriction site recognition
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identify or predict nucleosome exclusion sequences (nucleosome free regions) in DNA.
+ Nucleosome exclusion sequence prediction
+ Nucleosome formation sequence prediction
+
+
+ Nucleosome position prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identify, predict or analyse splice sites in nucleotide sequences.
+ Splice prediction
+
+
+ Methods might require a pre-mRNA or genomic DNA sequence.
+ Splice site prediction
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.19
+
+ Predict whole gene structure using a combination of multiple methods to achieve better predictions.
+
+
+ Integrated gene prediction
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Find operons (operators, promoters and genes) in bacteria genes.
+
+
+ Operon prediction
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict protein-coding regions (CDS or exon) or open reading frames in nucleotide sequences.
+ ORF finding
+ ORF prediction
+
+
+ Coding region prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict selenocysteine insertion sequence (SECIS) in a DNA sequence.
+ Selenocysteine insertion sequence (SECIS) prediction
+
+
+ SECIS elements are around 60 nucleotides in length with a stem-loop structure directs the cell to translate UGA codons as selenocysteines.
+ SECIS element prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ This includes comparative genomics approaches that identify common, conserved (homologous) or synonymous transcriptional regulatory elements. For example cross-species comparison of transcription factor binding sites (TFBS). Methods might analyse co-regulated or co-expressed genes, or sets of oppositely expressed genes.
+ Identify or predict transcriptional regulatory motifs, patterns, elements or regions in DNA sequences.
+ Regulatory element prediction
+ Transcription regulatory element prediction
+ Conserved transcription regulatory sequence identification
+ Translational regulatory element prediction
+
+
+ This includes promoters, enhancers, silencers and boundary elements / insulators, regulatory protein or transcription factor binding sites etc. Methods might be specific to a particular genome and use motifs, word-based / grammatical methods, position-specific frequency matrices, discriminative pattern analysis etc.
+ Transcriptional regulatory element prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict translation initiation sites, possibly by searching a database of sites.
+
+
+ Translation initiation site prediction
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identify or predict whole promoters or promoter elements (transcription start sites, RNA polymerase binding site, transcription factor binding sites, promoter enhancers etc) in DNA sequences.
+
+
+ Methods might recognize CG content, CpG islands, splice sites, polyA signals etc.
+ Promoter prediction
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identify, predict or analyse cis-regulatory elements in DNA sequences (TATA box, Pribnow box, SOS box, CAAT box, CCAAT box, operator etc.) or in RNA sequences (e.g. riboswitches).
+ Transcriptional regulatory element prediction (DNA-cis)
+ Transcriptional regulatory element prediction (RNA-cis)
+
+
+ Cis-regulatory elements (cis-elements) regulate the expression of genes located on the same strand from which the element was transcribed. Cis-elements are found in the 5' promoter region of the gene, in an intron, or in the 3' untranslated region. Cis-elements are often binding sites of one or more trans-acting factors. They also occur in RNA sequences, e.g. a riboswitch is a region of an mRNA molecule that bind a small target molecule that regulates the gene's activity.
+ cis-regulatory element prediction
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.19
+
+ Identify, predict or analyse cis-regulatory elements (for example riboswitches) in RNA sequences.
+
+
+ Transcriptional regulatory element prediction (RNA-cis)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identify or predict functional RNA sequences with a gene regulatory role (trans-regulatory elements) or targets.
+ Functional RNA identification
+ Transcriptional regulatory element prediction (trans)
+
+
+ Trans-regulatory elements regulate genes distant from the gene from which they were transcribed.
+ trans-regulatory element prediction
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identify matrix/scaffold attachment regions (MARs/SARs) in DNA sequences.
+ MAR/SAR prediction
+ Matrix/scaffold attachment site prediction
+
+
+ MAR/SAR sites often flank a gene or gene cluster and are found nearby cis-regulatory sequences. They might contribute to transcription regulation.
+ S/MAR prediction
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identify or predict transcription factor binding sites in DNA sequences.
+
+
+ Transcription factor binding site prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identify or predict exonic splicing enhancers (ESE) in exons.
+
+
+ An exonic splicing enhancer (ESE) is 6-base DNA sequence motif in an exon that enhances or directs splicing of pre-mRNA or hetero-nuclear RNA (hnRNA) into mRNA.
+ Exonic splicing enhancer prediction
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Evaluate molecular sequence alignment accuracy.
+ Sequence alignment quality evaluation
+
+
+ Evaluation might be purely sequence-based or use structural information.
+ Sequence alignment validation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse character conservation in a molecular sequence alignment, for example to derive a consensus sequence.
+ Residue conservation analysis
+
+
+ Use this concept for methods that calculate substitution rates, estimate relative site variability, identify sites with biased properties, derive a consensus sequence, or identify highly conserved or very poorly conserved sites, regions, blocks etc.
+ Sequence alignment analysis (conservation)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse correlations between sites in a molecular sequence alignment.
+
+
+ This is typically done to identify possible covarying positions and predict contacts or structural constraints in protein structures.
+ Sequence alignment analysis (site correlation)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Detects chimeric sequences (chimeras) from a sequence alignment.
+ Chimeric sequence detection
+
+
+ A chimera includes regions from two or more phylogenetically distinct sequences. They are usually artifacts of PCR and are thought to occur when a prematurely terminated amplicon reanneals to another DNA strand and is subsequently copied to completion in later PCR cycles.
+ Chimera detection
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Detect recombination (hotspots and coldspots) and identify recombination breakpoints in a sequence alignment.
+ Sequence alignment analysis (recombination detection)
+
+
+ Tools might use a genetic algorithm, quartet-mapping, bootscanning, graphical methods, random forest model and so on.
+ Recombination detection
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identify insertion, deletion and duplication events from a sequence alignment.
+ Indel discovery
+ Sequence alignment analysis (indel detection)
+
+
+ Tools might use a genetic algorithm, quartet-mapping, bootscanning, graphical methods, random forest model and so on.
+ Indel detection
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Predict nucleosome formation potential of DNA sequences.
+
+ Nucleosome formation potential prediction
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate a thermodynamic property of DNA or DNA/RNA, such as melting temperature, enthalpy and entropy.
+
+
+ Nucleic acid thermodynamic property calculation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate and plot a DNA or DNA/RNA melting profile.
+
+
+ A melting profile is used to visualise and analyse partly melted DNA conformations.
+ Nucleic acid melting profile plotting
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate and plot a DNA or DNA/RNA stitch profile.
+
+
+ A stitch profile represents the alternative conformations that partly melted DNA can adopt in a temperature range.
+ Nucleic acid stitch profile plotting
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate and plot a DNA or DNA/RNA melting curve.
+
+
+ Nucleic acid melting curve plotting
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate and plot a DNA or DNA/RNA probability profile.
+
+
+ Nucleic acid probability profile plotting
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate and plot a DNA or DNA/RNA temperature profile.
+
+
+ Nucleic acid temperature profile plotting
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate curvature and flexibility / stiffness of a nucleotide sequence.
+
+
+ This includes properties such as.
+ Nucleic acid curvature calculation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identify or predict microRNA sequences (miRNA) and precursors or microRNA targets / binding sites in a DNA sequence.
+ miRNA prediction
+ microRNA detection
+ microRNA target detection
+
+
+ miRNA target prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identify or predict tRNA genes in genomic sequences (tRNA).
+
+
+ tRNA gene prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Assess binding specificity of putative siRNA sequence(s), for example for a functional assay, typically with respect to designing specific siRNA sequences.
+
+
+ siRNA binding specificity prediction
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.18
+
+ Predict secondary structure of protein sequence(s) using multiple methods to achieve better predictions.
+
+
+ Protein secondary structure prediction (integrated)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict helical secondary structure of protein sequences.
+
+
+ Protein secondary structure prediction (helices)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict turn structure (for example beta hairpin turns) of protein sequences.
+
+
+ Protein secondary structure prediction (turns)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict open coils, non-regular secondary structure and intrinsically disordered / unstructured regions of protein sequences.
+
+
+ Protein secondary structure prediction (coils)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict cysteine bonding state and disulfide bond partners in protein sequences.
+
+
+ Disulfide bond prediction
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Not sustainable to have protein type-specific concepts.
+ 1.19
+
+ Predict G protein-coupled receptors (GPCR).
+
+
+ GPCR prediction
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Not sustainable to have protein type-specific concepts.
+ 1.19
+
+ Analyse G-protein coupled receptor proteins (GPCRs).
+
+
+ GPCR analysis
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ This includes methods that predict the folding pathway(s) or non-native structural intermediates of a protein.
+ Predict tertiary structure (backbone and side-chain conformation) of protein sequences.
+ Protein folding pathway prediction
+
+
+ Protein structure prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict structure of DNA or RNA.
+
+
+ Methods might identify thermodynamically stable or evolutionarily conserved structures.
+ Nucleic acid structure prediction
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict tertiary structure of protein sequence(s) without homologs of known structure.
+ de novo structure prediction
+
+
+ Ab initio structure prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Build a three-dimensional protein model based on known (for example homologs) structures.
+ Comparative modelling
+ Homology modelling
+ Homology structure modelling
+ Protein structure comparative modelling
+
+
+ The model might be of a whole, part or aspect of protein structure. Molecular modelling methods might use sequence-structure alignment, structural templates, molecular dynamics, energy minimisation etc.
+ Protein modelling
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Model the structure of a protein in complex with a small molecule or another macromolecule.
+ Docking simulation
+ Macromolecular docking
+
+
+ This includes protein-protein interactions, protein-nucleic acid, protein-ligand binding etc. Methods might predict whether the molecules are likely to bind in vivo, their conformation when bound, the strength of the interaction, possible mutations to achieve bonding and so on.
+ Molecular docking
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Model protein backbone conformation.
+ Protein modelling (backbone)
+ Design optimization
+ Epitope grafting
+ Scaffold search
+ Scaffold selection
+
+
+ Methods might require a preliminary C(alpha) trace.
+ Scaffold selection, scaffold search, epitope grafting and design optimization are stages of backbone modelling done during rational vaccine design.
+ Backbone modelling
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Model, analyse or edit amino acid side chain conformation in protein structure, optimize side-chain packing, hydrogen bonding etc.
+ Protein modelling (side chains)
+ Antibody optimisation
+ Antigen optimisation
+ Antigen resurfacing
+ Rotamer likelihood prediction
+
+
+ Antibody optimisation is to optimize the antibody-interacting surface of the antigen (epitope). Antigen optimisation is to optimize the antigen-interacting surface of the antibody (paratope). Antigen resurfacing is to resurface the antigen by varying the sequence of non-epitope regions.
+ Methods might use a residue rotamer library.
+ This includes rotamer likelihood prediction: the prediction of rotamer likelihoods for all 20 amino acid types at each position in a protein structure, where output typically includes, for each residue position, the likelihoods for the 20 amino acid types with estimated reliability of the 20 likelihoods.
+ Side chain modelling
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Model loop conformation in protein structures.
+ Protein loop modelling
+ Protein modelling (loops)
+
+
+ Loop modelling
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Model protein-ligand (for example protein-peptide) binding using comparative modelling or other techniques.
+ Ligand-binding simulation
+ Protein-peptide docking
+
+
+ Methods aim to predict the position and orientation of a ligand bound to a protein receptor or enzyme.
+ Virtual screening is used in drug discovery to search libraries of small molecules in order to identify those molecules which are most likely to bind to a drug target (typically a protein receptor or enzyme).
+ Protein-ligand docking
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict or optimise RNA sequences (sequence pools) with likely secondary and tertiary structure for in vitro selection.
+ Nucleic acid folding family identification
+ Structured RNA prediction and optimisation
+
+
+ RNA inverse folding
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Find single nucleotide polymorphisms (SNPs) - single nucleotide change in base positions - between sequences. Typically done for sequences from a high-throughput sequencing experiment that differ from a reference genome and which might, especially by reference to population frequency or functional data, indicate a polymorphism.
+ SNP calling
+ SNP discovery
+ Single nucleotide polymorphism detection
+
+
+ This includes functional SNPs for large-scale genotyping purposes, disease-associated non-synonymous SNPs etc.
+ SNP detection
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Generate a physical (radiation hybrid) map of genetic markers in a DNA sequence using provided radiation hybrid (RH) scores for one or more markers.
+
+
+ Radiation Hybrid Mapping
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Map the genetic architecture of dynamic complex traits.
+
+ This can involve characterisation of the underlying quantitative trait loci (QTLs) or nucleotides (QTNs).
+ Functional mapping
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Infer haplotypes, either alleles at multiple loci that are transmitted together on the same chromosome, or a set of single nucleotide polymorphisms (SNPs) on a single chromatid that are statistically associated.
+ Haplotype inference
+ Haplotype map generation
+ Haplotype reconstruction
+
+
+ Haplotype inference can help in population genetic studies and the identification of complex disease genes, , and is typically based on aligned single nucleotide polymorphism (SNP) fragments. Haplotype comparison is a useful way to characterize the genetic variation between individuals. An individual's haplotype describes which nucleotide base occurs at each position for a set of common SNPs. Tools might use combinatorial functions (for example parsimony) or a likelihood function or model with optimisation such as minimum error correction (MEC) model, expectation-maximisation algorithm (EM), genetic algorithm or Markov chain Monte Carlo (MCMC).
+ Haplotype mapping
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate linkage disequilibrium; the non-random association of alleles or polymorphisms at two or more loci (not necessarily on the same chromosome).
+
+
+ Linkage disequilibrium is identified where a combination of alleles (or genetic markers) occurs more or less frequently in a population than expected by chance formation of haplotypes.
+ Linkage disequilibrium calculation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict genetic code from analysis of codon usage data.
+
+
+ Genetic code prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Render a representation of a distribution that consists of group of data points plotted on a simple scale.
+ Categorical plot plotting
+ Dotplot plotting
+
+
+ Dot plots are useful when having not too many (e.g. 20) data points for each category. Example: draw a dotplot of sequence similarities identified from word-matching or character comparison.
+ Dot plot plotting
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Align exactly two molecular sequences.
+ Pairwise alignment
+
+
+ Methods might perform one-to-one, one-to-many or many-to-many comparisons.
+ Pairwise sequence alignment
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Align more than two molecular sequences.
+ Multiple alignment
+
+
+ This includes methods that use an existing alignment, for example to incorporate sequences into an alignment, or combine several multiple alignments into a single, improved alignment.
+ Multiple sequence alignment
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+
+ Locally align exactly two molecular sequences.
+
+ Local alignment methods identify regions of local similarity.
+ Pairwise sequence alignment generation (local)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+
+ Globally align exactly two molecular sequences.
+
+ Global alignment methods identify similarity across the entire length of the sequences.
+ Pairwise sequence alignment generation (global)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Locally align two or more molecular sequences.
+ Local sequence alignment
+ Sequence alignment (local)
+ Smith-Waterman
+
+
+ Local alignment methods identify regions of local similarity.
+ Local alignment
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Globally align two or more molecular sequences.
+ Global sequence alignment
+ Sequence alignment (global)
+
+
+ Global alignment methods identify similarity across the entire length of the sequences.
+ Global alignment
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.19
+
+ Align two or more molecular sequences with user-defined constraints.
+
+
+ Constrained sequence alignment
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.16
+
+ Align two or more molecular sequences using multiple methods to achieve higher quality.
+
+
+ Consensus-based sequence alignment
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Align multiple sequences using relative gap costs calculated from neighbors in a supplied phylogenetic tree.
+ Multiple sequence alignment (phylogenetic tree-based)
+ Multiple sequence alignment construction (phylogenetic tree-based)
+ Phylogenetic tree-based multiple sequence alignment construction
+ Sequence alignment (phylogenetic tree-based)
+ Sequence alignment generation (phylogenetic tree-based)
+
+
+ This is supposed to give a more biologically meaningful alignment than standard alignments.
+ Tree-based sequence alignment
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Align molecular secondary structure (represented as a 1D string).
+
+ Secondary structure alignment generation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.18
+
+ Align protein secondary structures.
+
+
+ Protein secondary structure alignment generation
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Align RNA secondary structures.
+ RNA secondary structure alignment construction
+ RNA secondary structure alignment generation
+ Secondary structure alignment construction (RNA)
+
+
+ RNA secondary structure alignment
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Align (superimpose) exactly two molecular tertiary structures.
+ Structure alignment (pairwise)
+ Pairwise protein structure alignment
+
+
+ Pairwise structure alignment
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Align (superimpose) more than two molecular tertiary structures.
+ Structure alignment (multiple)
+ Multiple protein structure alignment
+
+
+ This includes methods that use an existing alignment.
+ Multiple structure alignment
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Align protein tertiary structures.
+
+ Structure alignment (protein)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Align RNA tertiary structures.
+
+ Structure alignment (RNA)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+
+ Locally align (superimpose) exactly two molecular tertiary structures.
+
+ Local alignment methods identify regions of local similarity, common substructures etc.
+ Pairwise structure alignment generation (local)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+
+ Globally align (superimpose) exactly two molecular tertiary structures.
+
+ Global alignment methods identify similarity across the entire structures.
+ Pairwise structure alignment generation (global)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Locally align (superimpose) two or more molecular tertiary structures.
+ Structure alignment (local)
+ Local protein structure alignment
+
+
+ Local alignment methods identify regions of local similarity, common substructures etc.
+ Local structure alignment
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Globally align (superimpose) two or more molecular tertiary structures.
+ Structure alignment (global)
+ Global protein structure alignment
+
+
+ Global alignment methods identify similarity across the entire structures.
+ Global structure alignment
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.16
+
+
+
+ Align exactly two molecular profiles.
+
+ Methods might perform one-to-one, one-to-many or many-to-many comparisons.
+ Profile-profile alignment (pairwise)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+
+ Align two or more molecular profiles.
+
+ Sequence alignment generation (multiple profile)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.16
+
+
+
+
+ Align exactly two molecular Structural (3D) profiles.
+
+ 3D profile-to-3D profile alignment (pairwise)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+
+
+ Align two or more molecular 3D profiles.
+
+ Structural profile alignment generation (multiple)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Search and retrieve names of or documentation on bioinformatics tools, for example by keyword or which perform a particular function.
+
+ Data retrieval (tool metadata)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Search and retrieve names of or documentation on bioinformatics databases or query terms, for example by keyword.
+
+ Data retrieval (database metadata)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Predict primers for large scale sequencing.
+
+
+ PCR primer design (for large scale sequencing)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Predict primers for genotyping polymorphisms, for example single nucleotide polymorphisms (SNPs).
+
+
+ PCR primer design (for genotyping polymorphisms)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Predict primers for gene transcription profiling.
+
+
+ PCR primer design (for gene transcription profiling)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Predict primers that are conserved across multiple genomes or species.
+
+
+ PCR primer design (for conserved primers)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Predict primers based on gene structure.
+
+
+ PCR primer design (based on gene structure)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Predict primers for methylation PCRs.
+
+
+ PCR primer design (for methylation PCRs)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Sequence assembly by combining fragments using an existing backbone sequence, typically a reference genome.
+ Sequence assembly (mapping assembly)
+
+
+ The final sequence will resemble the backbone sequence. Mapping assemblers are usually much faster and less memory intensive than de-novo assemblers.
+ Mapping assembly
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Sequence assembly by combining fragments without the aid of a reference sequence or genome.
+ De Bruijn graph
+ Sequence assembly (de-novo assembly)
+
+
+ De-novo assemblers are much slower and more memory intensive than mapping assemblers.
+ De-novo assembly
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The process of assembling many short DNA sequences together such that they represent the original chromosomes from which the DNA originated.
+ Genomic assembly
+ Sequence assembly (genome assembly)
+ Breakend assembly
+
+
+ Genome assembly
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Sequence assembly for EST sequences (transcribed mRNA).
+ Sequence assembly (EST assembly)
+
+
+ Assemblers must handle (or be complicated by) alternative splicing, trans-splicing, single-nucleotide polymorphism (SNP), recoding, and post-transcriptional modification.
+ EST assembly
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Make sequence tag to gene assignments (tag mapping) of SAGE, MPSS and SBS data.
+ Tag to gene assignment
+
+
+ Sequence tag mapping assigns experimentally obtained sequence tags to known transcripts or annotate potential virtual sequence tags in a genome.
+ Sequence tag mapping
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Process (read and / or write) serial analysis of gene expression (SAGE) data.
+
+ SAGE data processing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Process (read and / or write) massively parallel signature sequencing (MPSS) data.
+
+ MPSS data processing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Process (read and / or write) sequencing by synthesis (SBS) data.
+
+ SBS data processing
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Generate a heat map of expression data from e.g. microarray data.
+ Heat map construction
+ Heatmap generation
+
+
+ The heat map usually uses a coloring scheme to represent expression values. They can show how quantitative measurements were influenced by experimental conditions.
+ Heat map generation
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Analyse one or more gene expression profiles, typically to interpret them in functional terms.
+
+ Gene expression profile analysis
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Map an expression profile to known biological pathways, for example, to identify or reconstruct a pathway.
+ Pathway mapping
+ Gene expression profile pathway mapping
+ Gene to pathway mapping
+ Gene-to-pathway mapping
+
+
+ Expression profile pathway mapping
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.18
+
+ Assign secondary structure from protein coordinate data.
+
+
+ Protein secondary structure assignment (from coordinate data)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.18
+
+ Assign secondary structure from circular dichroism (CD) spectroscopic data.
+
+
+ Protein secondary structure assignment (from CD data)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.7
+
+ Assign a protein tertiary structure (3D coordinates) from raw X-ray crystallography data.
+
+
+ Protein structure assignment (from X-ray crystallographic data)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.7
+
+ Assign a protein tertiary structure (3D coordinates) from raw NMR spectroscopy data.
+
+
+ Protein structure assignment (from NMR data)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Construct a phylogenetic tree from a specific type of data.
+ Phylogenetic tree construction (data centric)
+ Phylogenetic tree generation (data centric)
+
+
+ Subconcepts of this concept reflect different types of data used to generate a tree, and provide an alternate axis for curation.
+ Phylogenetic inference (data centric)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Construct a phylogenetic tree using a specific method.
+ Phylogenetic tree construction (method centric)
+ Phylogenetic tree generation (method centric)
+
+
+ Subconcepts of this concept reflect different computational methods used to generate a tree, and provide an alternate axis for curation.
+ Phylogenetic inference (method centric)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Phylogenetic tree construction from molecular sequences.
+ Phylogenetic tree construction (from molecular sequences)
+ Phylogenetic tree generation (from molecular sequences)
+
+
+ Methods typically compare multiple molecular sequence and estimate evolutionary distances and relationships to infer gene families or make functional predictions.
+ Phylogenetic inference (from molecular sequences)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Phylogenetic tree construction from continuous quantitative character data.
+ Phylogenetic tree construction (from continuous quantitative characters)
+ Phylogenetic tree generation (from continuous quantitative characters)
+
+
+ Phylogenetic inference (from continuous quantitative characters)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Phylogenetic tree construction from gene frequency data.
+ Phylogenetic tree construction (from gene frequencies)
+ Phylogenetic tree generation (from gene frequencies)
+
+
+ Phylogenetic inference (from gene frequencies)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Phylogenetic tree construction from polymorphism data including microsatellites, RFLP (restriction fragment length polymorphisms), RAPD (random-amplified polymorphic DNA) and AFLP (amplified fragment length polymorphisms) data.
+ Phylogenetic tree construction (from polymorphism data)
+ Phylogenetic tree generation (from polymorphism data)
+
+
+ Phylogenetic inference (from polymorphism data)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Construct a phylogenetic species tree, for example, from a genome-wide sequence comparison.
+ Phylogenetic species tree construction
+ Phylogenetic species tree generation
+
+
+ Species tree construction
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Construct a phylogenetic tree by computing a sequence alignment and searching for the tree with the fewest number of character-state changes from the alignment.
+ Phylogenetic tree construction (parsimony methods)
+ Phylogenetic tree generation (parsimony methods)
+
+
+ This includes evolutionary parsimony (invariants) methods.
+ Phylogenetic inference (parsimony methods)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Construct a phylogenetic tree by computing (or using precomputed) distances between sequences and searching for the tree with minimal discrepancies between pairwise distances.
+ Phylogenetic tree construction (minimum distance methods)
+ Phylogenetic tree generation (minimum distance methods)
+
+
+ This includes neighbor joining (NJ) clustering method.
+ Phylogenetic inference (minimum distance methods)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Construct a phylogenetic tree by relating sequence data to a hypothetical tree topology using a model of sequence evolution.
+ Phylogenetic tree construction (maximum likelihood and Bayesian methods)
+ Phylogenetic tree generation (maximum likelihood and Bayesian methods)
+
+
+ Maximum likelihood methods search for a tree that maximizes a likelihood function, i.e. that is most likely given the data and model. Bayesian analysis estimate the probability of tree for branch lengths and topology, typically using a Monte Carlo algorithm.
+ Phylogenetic inference (maximum likelihood and Bayesian methods)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Construct a phylogenetic tree by computing four-taxon trees (4-trees) and searching for the phylogeny that matches most closely.
+ Phylogenetic tree construction (quartet methods)
+ Phylogenetic tree generation (quartet methods)
+
+
+ Phylogenetic inference (quartet methods)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Construct a phylogenetic tree by using artificial-intelligence methods, for example genetic algorithms.
+ Phylogenetic tree construction (AI methods)
+ Phylogenetic tree generation (AI methods)
+
+
+ Phylogenetic inference (AI methods)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identify a plausible model of DNA substitution that explains a molecular (DNA or protein) sequence alignment.
+ Nucleotide substitution modelling
+
+
+ DNA substitution modelling
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse the shape (topology) of a phylogenetic tree.
+ Phylogenetic tree analysis (shape)
+
+
+ Phylogenetic tree topology analysis
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Apply bootstrapping or other measures to estimate confidence of a phylogenetic tree.
+
+
+ Phylogenetic tree bootstrapping
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Construct a "gene tree" which represents the evolutionary history of the genes included in the study. This can be used to predict families of genes and gene function based on their position in a phylogenetic tree.
+ Phylogenetic tree analysis (gene family prediction)
+
+
+ Gene trees can provide evidence for gene duplication events, as well as speciation events. Where sequences from different homologs are included in a gene tree, subsequent clustering of the orthologs can demonstrate evolutionary history of the orthologs.
+ Gene tree construction
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse a phylogenetic tree to identify allele frequency distribution and change that is subject to evolutionary pressures (natural selection, genetic drift, mutation and gene flow). Identify type of natural selection (such as stabilizing, balancing or disruptive).
+ Phylogenetic tree analysis (natural selection)
+
+
+ Stabilizing/purifying (directional) selection favors a single phenotype and tends to decrease genetic diversity as a population stabilizes on a particular trait, selecting out trait extremes or deleterious mutations. In contrast, balancing selection maintain genetic polymorphisms (or multiple alleles), whereas disruptive (or diversifying) selection favors individuals at both extremes of a trait.
+ Allele frequency distribution analysis
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Compare two or more phylogenetic trees to produce a consensus tree.
+ Phylogenetic tree construction (consensus)
+ Phylogenetic tree generation (consensus)
+
+
+ Methods typically test for topological similarity between trees using for example a congruence index.
+ Consensus tree construction
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Compare two or more phylogenetic trees to detect subtrees or supertrees.
+ Phylogenetic sub/super tree detection
+ Subtree construction
+ Supertree construction
+
+
+ Phylogenetic sub/super tree construction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Compare two or more phylogenetic trees to calculate distances between trees.
+
+
+ Phylogenetic tree distances calculation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Annotate a phylogenetic tree with terms from a controlled vocabulary.
+
+
+ Phylogenetic tree annotation
+ http://www.evolutionaryontology.org/cdao.owl#CDAOAnnotation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Predict and optimise peptide ligands that elicit an immunological response.
+
+
+ Immunogenicity prediction
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict or optimise DNA to elicit (via DNA vaccination) an immunological response.
+
+
+ DNA vaccine design
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Reformat (a file or other report of) molecular sequence(s).
+
+
+ Sequence formatting
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Reformat (a file or other report of) molecular sequence alignment(s).
+
+
+ Sequence alignment formatting
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Reformat a codon usage table.
+
+
+ Codon usage table formatting
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Visualise, format or render a molecular sequence or sequences such as a sequence alignment, possibly with sequence features or properties shown.
+ Sequence rendering
+ Sequence alignment visualisation
+
+
+ Sequence visualisation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.15
+
+ Visualise, format or print a molecular sequence alignment.
+
+
+ Sequence alignment visualisation
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Visualise, format or render sequence clusters.
+ Sequence cluster rendering
+
+
+ Sequence cluster visualisation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Render or visualise a phylogenetic tree.
+ Phylogenetic tree rendering
+
+
+ Phylogenetic tree visualisation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.15
+
+ Visualise RNA secondary structure, knots, pseudoknots etc.
+
+
+ RNA secondary structure visualisation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.15
+
+ Render and visualise protein secondary structure.
+
+
+ Protein secondary structure visualisation
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Visualise or render molecular 3D structure, for example a high-quality static picture or animation.
+ Structure rendering
+ Protein secondary structure visualisation
+ RNA secondary structure visualisation
+
+
+ This includes visualisation of protein secondary structure such as knots, pseudoknots etc. as well as tertiary and quaternary structure.
+ Structure visualisation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Visualise microarray or other expression data.
+ Expression data rendering
+ Gene expression data visualisation
+ Microarray data rendering
+
+
+ Expression data visualisation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.19
+
+ Identify and analyse networks of protein interactions.
+
+
+ Protein interaction network visualisation
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Draw or visualise a DNA map.
+ DNA map drawing
+ Map rendering
+
+
+ Map drawing
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Render a sequence with motifs.
+
+ Sequence motif rendering
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Draw or visualise restriction maps in DNA sequences.
+
+
+ Restriction map drawing
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Draw a linear maps of DNA.
+
+ DNA linear map rendering
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ DNA circular map rendering
+ Draw a circular maps of DNA, for example a plasmid map.
+
+
+ Plasmid map drawing
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Visualise operon structure etc.
+ Operon rendering
+
+
+ Operon drawing
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identify folding families of related RNAs.
+
+ Nucleic acid folding family identification
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.20
+
+ Compute energies of nucleic acid folding, e.g. minimum folding energies for DNA or RNA sequences or energy landscape of RNA mutants.
+
+
+ Nucleic acid folding energy calculation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Retrieve existing annotation (or documentation), typically annotation on a database entity.
+
+ Use this concepts for tools which retrieve pre-existing annotations, not for example prediction methods that might make annotations.
+ Annotation retrieval
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict the biological or biochemical role of a protein, or other aspects of a protein function.
+ Protein function analysis
+ Protein functional analysis
+
+
+ For functional properties that can be mapped to a sequence, use 'Sequence feature detection (protein)' instead.
+ Protein function prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Compare the functional properties of two or more proteins.
+
+
+ Protein function comparison
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Submit a molecular sequence to a database.
+
+ Sequence submission
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse a known network of gene regulation.
+ Gene regulatory network comparison
+ Gene regulatory network modelling
+ Regulatory network comparison
+ Regulatory network modelling
+
+
+ Gene regulatory network analysis
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ WHATIF:UploadPDB
+ Parse, prepare or load a user-specified data file so that it is available for use.
+ Data loading
+ Loading
+
+
+ Parsing
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Query a sequence data resource (typically a database) and retrieve sequences and / or annotation.
+
+ This includes direct retrieval methods (e.g. the dbfetch program) but not those that perform calculations on the sequence.
+ Sequence retrieval
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ WHATIF:DownloadPDB
+ WHATIF:EchoPDB
+ Query a tertiary structure data resource (typically a database) and retrieve structures, structure-related data and annotation.
+
+ This includes direct retrieval methods but not those that perform calculations on the sequence or structure.
+ Structure retrieval
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ WHATIF:GetSurfaceDots
+ Calculate the positions of dots that are homogeneously distributed over the surface of a molecule.
+
+
+ A dot has three coordinates (x,y,z) and (typically) a color.
+ Surface rendering
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Calculate the solvent accessibility ('accessible surface') for each atom in a structure.
+
+
+ Waters are not considered.
+ Protein atom surface calculation (accessible)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Calculate the solvent accessibility ('accessible molecular surface') for each atom in a structure.
+
+
+ Waters are not considered.
+ Protein atom surface calculation (accessible molecular)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Calculate the solvent accessibility ('accessible surface') for each residue in a structure.
+
+
+ Solvent accessibility might be calculated for the backbone, sidechain and total (backbone plus sidechain).
+ Protein residue surface calculation (accessible)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Calculate the solvent accessibility ('vacuum accessible surface') for each residue in a structure. This is the accessibility of the residue when taken out of the protein together with the backbone atoms of any residue it is covalently bound to.
+
+
+ Solvent accessibility might be calculated for the backbone, sidechain and total (backbone plus sidechain).
+ Protein residue surface calculation (vacuum accessible)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Calculate the solvent accessibility ('accessible molecular surface') for each residue in a structure.
+
+
+ Solvent accessibility might be calculated for the backbone, sidechain and total (backbone plus sidechain).
+ Protein residue surface calculation (accessible molecular)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Calculate the solvent accessibility ('vacuum molecular surface') for each residue in a structure. This is the accessibility of the residue when taken out of the protein together with the backbone atoms of any residue it is covalently bound to.
+
+
+ Solvent accessibility might be calculated for the backbone, sidechain and total (backbone plus sidechain).
+ Protein residue surface calculation (vacuum molecular)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Calculate the solvent accessibility ('accessible molecular surface') for a structure as a whole.
+
+
+ Protein surface calculation (accessible molecular)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Calculate the solvent accessibility ('accessible surface') for a structure as a whole.
+
+
+ Protein surface calculation (accessible)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Calculate for each residue in a protein structure all its backbone torsion angles.
+
+
+ Backbone torsion angle calculation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Calculate for each residue in a protein structure all its torsion angles.
+
+
+ Full torsion angle calculation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Calculate for each cysteine (bridge) all its torsion angles.
+
+
+ Cysteine torsion angle calculation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ For each amino acid in a protein structure calculate the backbone angle tau.
+
+
+ Tau is the backbone angle N-Calpha-C (angle over the C-alpha).
+ Tau angle calculation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ WHATIF:ShowCysteineBridge
+ Detect cysteine bridges (from coordinate data) in a protein structure.
+
+
+ Cysteine bridge detection
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ WHATIF:ShowCysteineFree
+ Detect free cysteines in a protein structure.
+
+
+ A free cysteine is neither involved in a cysteine bridge, nor functions as a ligand to a metal.
+ Free cysteine detection
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ WHATIF:ShowCysteineMetal
+ Detect cysteines that are bound to metal in a protein structure.
+
+
+ Metal-bound cysteine detection
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Calculate protein residue contacts with nucleic acids in a structure.
+
+
+ Residue contact calculation (residue-nucleic acid)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate protein residue contacts with metal in a structure.
+ Residue-metal contact calculation
+
+
+ Protein-metal contact calculation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Calculate ion contacts in a structure (all ions for all side chain atoms).
+
+
+ Residue contact calculation (residue-negative ion)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ WHATIF:ShowBumps
+ Detect 'bumps' between residues in a structure, i.e. those with pairs of atoms whose Van der Waals' radii interpenetrate more than a defined distance.
+
+
+ Residue bump detection
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ WHATIF:SymmetryContact
+ Calculate the number of symmetry contacts made by residues in a protein structure.
+
+
+ A symmetry contact is a contact between two atoms in different asymmetric unit.
+ Residue symmetry contact calculation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Calculate contacts between residues and ligands in a protein structure.
+
+
+ Residue contact calculation (residue-ligand)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ WHATIF:HasSaltBridge
+ WHATIF:HasSaltBridgePlus
+ WHATIF:ShowSaltBridges
+ WHATIF:ShowSaltBridgesH
+ Calculate (and possibly score) salt bridges in a protein structure.
+
+
+ Salt bridges are interactions between oppositely charged atoms in different residues. The output might include the inter-atomic distance.
+ Salt bridge calculation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ WHATIF:ShowLikelyRotamers
+ WHATIF:ShowLikelyRotamers100
+ WHATIF:ShowLikelyRotamers200
+ WHATIF:ShowLikelyRotamers300
+ WHATIF:ShowLikelyRotamers400
+ WHATIF:ShowLikelyRotamers500
+ WHATIF:ShowLikelyRotamers600
+ WHATIF:ShowLikelyRotamers700
+ WHATIF:ShowLikelyRotamers800
+ WHATIF:ShowLikelyRotamers900
+ Predict rotamer likelihoods for all 20 amino acid types at each position in a protein structure.
+
+
+ Output typically includes, for each residue position, the likelihoods for the 20 amino acid types with estimated reliability of the 20 likelihoods.
+ Rotamer likelihood prediction
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ WHATIF:ProlineMutationValue
+ Calculate for each position in a protein structure the chance that a proline, when introduced at this position, would increase the stability of the whole protein.
+
+
+ Proline mutation value calculation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ WHATIF: PackingQuality
+ Identify poorly packed residues in protein structures.
+
+
+ Residue packing validation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ WHATIF: ImproperQualityMax
+ WHATIF: ImproperQualitySum
+ Validate protein geometry, for example bond lengths, bond angles, torsion angles, chiralities, planaraties etc. An example is validation of a Ramachandran plot of a protein structure.
+ Ramachandran plot validation
+
+
+ Protein geometry validation
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+ WHATIF: PDB_sequence
+ Extract a molecular sequence from a PDB file.
+
+
+ PDB file sequence retrieval
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Identify HET groups in PDB files.
+
+
+ A HET group usually corresponds to ligands, lipids, but might also (not consistently) include groups that are attached to amino acids. Each HET group is supposed to have a unique three letter code and a unique name which might be given in the output.
+ HET group detection
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Determine for residue the DSSP determined secondary structure in three-state (HSC).
+
+ DSSP secondary structure assignment
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ WHATIF: PDBasXML
+ Reformat (a file or other report of) tertiary structure data.
+
+
+ Structure formatting
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Assign cysteine bonding state and disulfide bond partners in protein structures.
+
+
+ Protein cysteine and disulfide bond assignment
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Identify poor quality amino acid positions in protein structures.
+
+
+ Residue validation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ WHATIF:MovedWaterPDB
+ Query a tertiary structure database and retrieve water molecules.
+
+ Structure retrieval (water)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identify or predict siRNA duplexes in RNA.
+
+
+ siRNA duplex prediction
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Refine an existing sequence alignment.
+
+
+ Sequence alignment refinement
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Process an EMBOSS listfile (list of EMBOSS Uniform Sequence Addresses).
+
+ Listfile processing
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Perform basic (non-analytical) operations on a report or file of sequences (which might include features), such as file concatenation, removal or ordering of sequences, creation of subset or a new file of sequences.
+
+
+ Sequence file editing
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Perform basic (non-analytical) operations on a sequence alignment file, such as copying or removal and ordering of sequences.
+
+ Sequence alignment file processing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Process (read and / or write) physicochemical property data for small molecules.
+
+ Small molecule data processing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Search and retrieve documentation on a bioinformatics ontology.
+
+ Data retrieval (ontology annotation)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Query an ontology and retrieve concepts or relations.
+
+ Data retrieval (ontology concept)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identify a representative sequence from a set of sequences, typically using scores from pair-wise alignment or other comparison of the sequences.
+
+
+ Representative sequence identification
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Perform basic (non-analytical) operations on a file of molecular tertiary structural data.
+
+ Structure file processing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Query a profile data resource and retrieve one or more profile(s) and / or associated annotation.
+
+ This includes direct retrieval methods that retrieve a profile by, e.g. the profile name.
+ Data retrieval (sequence profile)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Perform a statistical data operation of some type, e.g. calibration or validation.
+ Significance testing
+ Statistical analysis
+ Statistical test
+ Statistical testing
+ Expectation maximisation
+ Gibbs sampling
+ Hypothesis testing
+ Omnibus test
+
+
+ Statistical calculation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate a 3D-1D scoring matrix from analysis of protein sequence and structural data.
+ 3D-1D scoring matrix construction
+
+
+ A 3D-1D scoring matrix scores the probability of amino acids occurring in different structural environments.
+ 3D-1D scoring matrix generation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Visualise transmembrane proteins, typically the transmembrane regions within a sequence.
+ Transmembrane protein rendering
+
+
+ Transmembrane protein visualisation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ An operation performing purely illustrative (pedagogical) purposes.
+
+ Demonstration
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Query a biological pathways database and retrieve annotation on one or more pathways.
+
+ Data retrieval (pathway or network)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Query a database and retrieve one or more data identifiers.
+
+ Data retrieval (identifier)
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate a density plot (of base composition) for a nucleotide sequence.
+
+
+ Nucleic acid density plotting
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse one or more known molecular sequences.
+ Sequence analysis (general)
+
+
+ Sequence analysis
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse molecular sequence motifs.
+ Sequence motif processing
+
+
+ Sequence motif analysis
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Process (read and / or write) protein interaction data.
+
+ Protein interaction data processing
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse protein structural data.
+ Structure analysis (protein)
+
+
+ Protein structure analysis
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Process (read and / or write) annotation of some type, typically annotation on an entry from a biological or biomedical database entity.
+
+ Annotation processing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Analyse features in molecular sequences.
+
+ Sequence feature analysis
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Basic (non-analytical) operations of some data, either a file or equivalent entity in memory, such that the same basic type of data is consumed as input and generated as output.
+ File handling
+ File processing
+ Report handling
+ Utility operation
+ Processing
+
+
+ Data handling
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Analyse gene expression and regulation data.
+
+ Gene expression analysis
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Process (read and / or write) one or more structural (3D) profile(s) or template(s) of some type.
+
+ Structural profile processing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Process (read and / or write) an index of (typically a file of) biological data.
+
+ Data index processing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Process (read and / or write) some type of sequence profile.
+
+ Sequence profile processing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.22
+
+ Analyse protein function, typically by processing protein sequence and/or structural data, and generate an informative report.
+
+
+ Protein function analysis
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse, simulate or predict protein folding, typically by processing sequence and / or structural data. For example, predict sites of nucleation or stabilisation key to protein folding.
+ Protein folding modelling
+ Protein folding simulation
+ Protein folding site prediction
+
+
+ Protein folding analysis
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse protein secondary structure data.
+ Secondary structure analysis (protein)
+
+
+ Protein secondary structure analysis
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Process (read and / or write) data on the physicochemical property of a molecule.
+
+ Physicochemical property data processing
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict oligonucleotide primers or probes.
+ Primer and probe prediction
+
+
+ Primer and probe design
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Process (read and / or write) data of a specific type, for example applying analytical methods.
+
+
+ Operation (typed)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Search a database (or other data resource) with a supplied query and retrieve entries (or parts of entries) that are similar to the query.
+ Search
+
+
+ Typically the query is compared to each entry and high scoring matches (hits) are returned. For example, a BLAST search of a sequence database.
+ Database search
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Retrieve an entry (or part of an entry) from a data resource that matches a supplied query. This might include some primary data and annotation. The query is a data identifier or other indexed term. For example, retrieve a sequence record with the specified accession number, or matching supplied keywords.
+ Data extraction
+ Retrieval
+ Data retrieval (metadata)
+ Metadata retrieval
+
+
+ Data retrieval
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Predict, recognise, detect or identify some properties of a biomolecule.
+ Detection
+ Prediction
+ Recognition
+
+
+ Prediction and recognition
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Compare two or more things to identify similarities.
+
+
+ Comparison
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Refine or optimise some data model.
+
+
+ Optimisation and refinement
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Model or simulate some biological entity or system, typically using mathematical techniques including dynamical systems, statistical models, differential equations, and game theoretic models.
+ Mathematical modelling
+
+
+ Modelling and simulation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+ Perform basic operations on some data or a database.
+
+
+ Data handling
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Validate some data.
+ Quality control
+
+
+ Validation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Map properties to positions on an biological entity (typically a molecular sequence or structure), or assemble such an entity from constituent parts.
+ Cartography
+
+
+ Mapping
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Design a biological entity (typically a molecular sequence or structure) with specific properties.
+
+
+ Design
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Process (read and / or write) microarray data.
+
+ Microarray data processing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.18
+
+
+
+ Process (read and / or write) a codon usage table.
+
+ Codon usage table processing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Retrieve a codon usage table and / or associated annotation.
+
+ Data retrieval (codon usage table)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Process (read and / or write) a gene expression profile.
+
+ Gene expression profile processing
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse gene expression patterns to identify sets of genes that are associated with a specific trait, condition, clinical outcome etc.
+ Gene sets can be defined beforehand by biological function, chromosome locations and so on.
+ Gene set testing
+ Identify classes of genes or proteins that are over or under-represented in a large set of genes or proteins. For example analysis of a set of genes corresponding to a gene expression profile, annotated with Gene Ontology (GO) concepts, where eventual over-/under-representation of certain GO concept within the studied set of genes is revealed.
+ Functional enrichment analysis
+ GSEA
+ Gene-set over-represenation analysis
+ Gene set analysis
+ GO-term enrichment
+ Gene Ontology concept enrichment
+ Gene Ontology term enrichment
+
+
+ "Gene set analysis" (often used interchangeably or in an overlapping sense with "gene-set enrichment analysis") refers to the functional analysis (term enrichment) of a differentially expressed set of genes, rather than all genes analysed.
+ The Gene Ontology (GO) is typically used, the input is a set of Gene IDs, and the output of the analysis is typically a ranked list of GO concepts, each associated with a p-value.
+ Gene-set enrichment analysis
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict a network of gene regulation.
+
+
+ Gene regulatory network prediction
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+
+
+ Generate, analyse or handle a biological pathway or network.
+
+ Pathway or network processing
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Process (read and / or write) RNA secondary structure data.
+
+
+ RNA secondary structure analysis
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+ Process (read and / or write) RNA tertiary structure data.
+
+
+ Structure processing (RNA)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict RNA tertiary structure.
+
+
+ RNA structure prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict DNA tertiary structure.
+
+
+ DNA structure prediction
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Generate, process or analyse phylogenetic tree or trees.
+
+
+ Phylogenetic tree processing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Process (read and / or write) protein secondary structure data.
+
+ Protein secondary structure processing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Process (read and / or write) a network of protein interactions.
+
+ Protein interaction network processing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Process (read and / or write) one or more molecular sequences and associated annotation.
+
+ Sequence processing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+ Process (read and / or write) a protein sequence and associated annotation.
+
+
+ Sequence processing (protein)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Process (read and / or write) a nucleotide sequence and associated annotation.
+
+ Sequence processing (nucleic acid)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Compare two or more molecular sequences.
+
+
+ Sequence comparison
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Process (read and / or write) a sequence cluster.
+
+ Sequence cluster processing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Process (read and / or write) a sequence feature table.
+
+ Feature table processing
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Includes methods that predict whole gene structure using a combination of multiple methods to achieve better predictions.
+ Detect, predict and identify genes or components of genes in DNA sequences, including promoters, coding regions, splice sites, etc.
+ Gene calling
+ Gene finding
+ Whole gene prediction
+
+
+ Methods for gene prediction might be ab initio, based on phylogenetic comparisons, use motifs, sequence features, support vector machine, alignment etc.
+ Gene prediction
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.16
+
+ Classify G-protein coupled receptors (GPCRs) into families and subfamilies.
+
+
+ GPCR classification
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Not sustainable to have protein type-specific concepts.
+ 1.19
+
+
+ Predict G-protein coupled receptor (GPCR) coupling selectivity.
+
+ GPCR coupling selectivity prediction
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+ Process (read and / or write) a protein tertiary structure.
+
+
+ Structure processing (protein)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Calculate the solvent accessibility for each atom in a structure.
+
+
+ Waters are not considered.
+ Protein atom surface calculation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Calculate the solvent accessibility for each residue in a structure.
+
+
+ Protein residue surface calculation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Calculate the solvent accessibility of a structure as a whole.
+
+
+ Protein surface calculation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Process (read and / or write) a molecular sequence alignment.
+
+ Sequence alignment processing
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identify or predict protein-protein binding sites.
+ Protein-protein binding site detection
+
+
+ Protein-protein binding site prediction
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Process (read and / or write) a molecular tertiary structure.
+
+ Structure processing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Annotate a DNA map of some type with terms from a controlled vocabulary.
+
+ Map annotation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Retrieve information on a protein.
+
+ Data retrieval (protein annotation)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Retrieve a phylogenetic tree from a data resource.
+
+ Data retrieval (phylogenetic tree)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Retrieve information on a protein interaction.
+
+ Data retrieval (protein interaction annotation)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Retrieve information on a protein family.
+
+ Data retrieval (protein family annotation)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Retrieve information on an RNA family.
+
+ Data retrieval (RNA family annotation)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Retrieve information on a specific gene.
+
+ Data retrieval (gene annotation)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Retrieve information on a specific genotype or phenotype.
+
+ Data retrieval (genotype and phenotype annotation)
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Compare the architecture of two or more protein structures.
+
+
+ Protein architecture comparison
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identify the architecture of a protein structure.
+
+
+ Includes methods that try to suggest the most likely biological unit for a given protein X-ray crystal structure based on crystal symmetry and scoring of putative protein-protein interfaces.
+ Protein architecture recognition
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The simulation of molecular (typically protein) conformation using a computational model of physical forces and computer simulation.
+ Molecular dynamics simulation
+ Protein dynamics
+
+
+ Molecular dynamics
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse a nucleic acid sequence (using methods that are only applicable to nucleic acid sequences).
+ Sequence analysis (nucleic acid)
+ Nucleic acid sequence alignment analysis
+ Sequence alignment analysis (nucleic acid)
+
+
+ Nucleic acid sequence analysis
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse a protein sequence (using methods that are only applicable to protein sequences).
+ Sequence analysis (protein)
+ Protein sequence alignment analysis
+ Sequence alignment analysis (protein)
+
+
+ Protein sequence analysis
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse known molecular tertiary structures.
+
+
+ Structure analysis
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse nucleic acid tertiary structural data.
+
+
+ Nucleic acid structure analysis
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Process (read and / or write) a molecular secondary structure.
+
+ Secondary structure processing
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Compare two or more molecular tertiary structures.
+
+
+ Structure comparison
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Render a helical wheel representation of protein secondary structure.
+ Helical wheel rendering
+
+
+ Helical wheel drawing
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Render a topology diagram of protein secondary structure.
+ Topology diagram rendering
+
+
+ Topology diagram drawing
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Compare protein tertiary structures.
+ Structure comparison (protein)
+
+
+ Methods might identify structural neighbors, find structural similarities or define a structural core.
+ Protein structure comparison
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Compare protein secondary structures.
+ Protein secondary structure
+ Secondary structure comparison (protein)
+ Protein secondary structure alignment
+
+
+ Protein secondary structure comparison
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict the subcellular localisation of a protein sequence.
+ Protein cellular localization prediction
+ Protein subcellular localisation prediction
+ Protein targeting prediction
+
+
+ The prediction might include subcellular localisation (nuclear, cytoplasmic, mitochondrial, chloroplast, plastid, membrane etc) or export (extracellular proteins) of a protein.
+ Subcellular localisation prediction
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Calculate contacts between residues in a protein structure.
+
+
+ Residue contact calculation (residue-residue)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Identify potential hydrogen bonds between amino acid residues.
+
+
+ Hydrogen bond calculation (inter-residue)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict the interactions of proteins with other proteins.
+ Protein-protein interaction detection
+ Protein-protein binding prediction
+ Protein-protein interaction prediction
+
+
+ Protein interaction prediction
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Process (read and / or write) codon usage data.
+
+ Codon usage data processing
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Metagenomic inference is the profiling of phylogenetic marker genes in order to predict metagenome function.
+ Process (read and/or write) expression data from experiments measuring molecules (e.g. omics data), including analysis of one or more expression profiles, typically to interpret them in functional terms.
+ Expression data analysis
+ Gene expression analysis
+ Gene expression data analysis
+ Gene expression regulation analysis
+ Metagenomic inference
+ Microarray data analysis
+ Protein expression analysis
+
+
+ Expression analysis
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+ Process (read and / or write) a network of gene regulation.
+
+
+ Gene regulatory network processing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Notions of pathway and network were mixed up, EDAM 1.24 disentangles them.
+ 1.24
+
+
+
+ Generate, process or analyse a biological pathway or network.
+
+ Pathway or network analysis
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Analyse SAGE, MPSS or SBS experimental data, typically to identify or quantify mRNA transcripts.
+
+ Sequencing-based expression profile data analysis
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict, analyse, characterize or model splice sites, splicing events and so on, typically by comparing multiple nucleic acid sequences.
+ Splicing model analysis
+
+
+ Splicing analysis
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Analyse raw microarray data.
+
+ Microarray raw data analysis
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.19
+
+
+
+ Process (read and / or write) nucleic acid sequence or structural data.
+
+ Nucleic acid analysis
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.19
+
+
+
+ Process (read and / or write) protein sequence or structural data.
+
+ Protein analysis
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+ Process (read and / or write) molecular sequence data.
+
+
+ Sequence data processing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Process (read and / or write) molecular structural data.
+
+ Structural data processing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+
+ Process (read and / or write) text.
+
+ Text processing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.18
+
+
+
+ Analyse a protein sequence alignment, typically to detect features or make predictions.
+
+
+ Protein sequence alignment analysis
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.18
+
+
+
+ Analyse a protein sequence alignment, typically to detect features or make predictions.
+
+
+ Nucleic acid sequence alignment analysis
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.18
+
+
+
+ Compare two or more nucleic acid sequences.
+
+
+ Nucleic acid sequence comparison
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.18
+
+ Compare two or more protein sequences.
+
+
+ Protein sequence comparison
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Back-translate a protein sequence into DNA.
+
+
+ DNA back-translation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Edit or change a nucleic acid sequence, either randomly or specifically.
+
+
+ Sequence editing (nucleic acid)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Edit or change a protein sequence, either randomly or specifically.
+
+
+ Sequence editing (protein)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.22
+
+ Generate a nucleic acid sequence by some means.
+
+
+ Sequence generation (nucleic acid)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.22
+
+ Generate a protein sequence by some means.
+
+
+ Sequence generation (protein)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Visualise, format or render a nucleic acid sequence.
+
+
+ Various nucleic acid sequence analysis methods might generate a sequence rendering but are not (for brevity) listed under here.
+ Nucleic acid sequence visualisation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Visualise, format or render a protein sequence.
+
+
+ Various protein sequence analysis methods might generate a sequence rendering but are not (for brevity) listed under here.
+ Protein sequence visualisation
+ true
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Compare nucleic acid tertiary structures.
+ Structure comparison (nucleic acid)
+
+
+ Nucleic acid structure comparison
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+ Process (read and / or write) nucleic acid tertiary structure data.
+
+
+ Structure processing (nucleic acid)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Generate a map of a DNA sequence annotated with positional or non-positional features of some type.
+
+
+ DNA mapping
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Process (read and / or write) a DNA map of some type.
+
+ Map data processing
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse the hydrophobic, hydrophilic or charge properties of a protein (from analysis of sequence or structural information).
+
+
+ Protein hydropathy calculation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identify or predict catalytic residues, active sites or other ligand-binding sites in protein sequences or structures.
+ Protein binding site detection
+ Protein binding site prediction
+
+
+ Binding site prediction
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Build clusters of similar structures, typically using scores from structural alignment methods.
+ Structural clustering
+
+
+ Structure clustering
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Generate a physical DNA map (sequence map) from analysis of sequence tagged sites (STS).
+ Sequence mapping
+
+
+ An STS is a short subsequence of known sequence and location that occurs only once in the chromosome or genome that is being mapped. Sources of STSs include 1. expressed sequence tags (ESTs), simple sequence length polymorphisms (SSLPs), and random genomic sequences from cloned genomic DNA or database sequences.
+ Sequence tagged site (STS) mapping
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Compare two or more entities, typically the sequence or structure (or derivatives) of macromolecules, to identify equivalent subunits.
+ Alignment construction
+ Alignment generation
+
+
+ Alignment
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate the molecular weight of a protein (or fragments) and compare it to another protein or reference data. Generally used for protein identification.
+ PMF
+ Peptide mass fingerprinting
+ Protein fingerprinting
+
+
+ Protein fragment weight comparison
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Compare the physicochemical properties of two or more proteins (or reference data).
+
+
+ Protein property comparison
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.18
+
+
+
+ Compare two or more molecular secondary structures.
+
+ Secondary structure comparison
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Generate a Hopp and Woods plot of antigenicity of a protein.
+
+
+ Hopp and Woods plotting
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.19
+
+ Generate a view of clustered quantitative data, annotated with textual information.
+
+
+ Cluster textual view generation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ In the case of microarray data, visualise clustered gene expression data as a set of profiles, where each profile shows the gene expression values of a cluster across samples on the X-axis.
+ Visualise clustered quantitative data as set of different profiles, where each profile is plotted versus different entities or samples on the X-axis.
+ Clustered quantitative data plotting
+ Clustered quantitative data rendering
+ Wave graph plotting
+ Microarray cluster temporal graph rendering
+ Microarray wave graph plotting
+ Microarray wave graph rendering
+
+
+ Clustering profile plotting
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.19
+
+ Generate a dendrograph of raw, preprocessed or clustered expression (e.g. microarray) data.
+
+
+ Dendrograph plotting
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Generate a plot of distances (distance or correlation matrix) between expression values.
+ Distance map rendering
+ Distance matrix plotting
+ Distance matrix rendering
+ Proximity map rendering
+ Correlation matrix plotting
+ Correlation matrix rendering
+ Microarray distance map rendering
+ Microarray proximity map plotting
+ Microarray proximity map rendering
+
+
+ Proximity map plotting
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Visualise clustered expression data using a tree diagram.
+ Dendrogram plotting
+ Dendrograph plotting
+ Dendrograph visualisation
+ Expression data tree or dendrogram rendering
+ Expression data tree visualisation
+ Microarray 2-way dendrogram rendering
+ Microarray checks view rendering
+ Microarray matrix tree plot rendering
+ Microarray tree or dendrogram rendering
+
+
+ Dendrogram visualisation
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Examples for visualization are the distribution of variance over the components, loading and score plots.
+ Visualize the results of a principal component analysis (orthogonal data transformation). For example, visualization of the principal components (essential subspace) coming from a Principal Component Analysis (PCA) on the trajectory atomistic coordinates of a molecular structure.
+ PCA plotting
+ Principal component plotting
+ ED visualization
+ Essential Dynamics visualization
+ Microarray principal component plotting
+ Microarray principal component rendering
+ PCA visualization
+ Principal modes visualization
+
+
+ The use of Principal Component Analysis (PCA), a multivariate statistical analysis to obtain collective variables on the atomic positional fluctuations, helps to separate the configurational space in two subspaces: an essential subspace containing relevant motions, and another one containing irrelevant local fluctuations.
+ Principal component visualisation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Comparison of two sets of quantitative data such as two samples of gene expression values.
+ Render a graph in which the values of two variables are plotted along two axes; the pattern of the points reveals any correlation.
+ Scatter chart plotting
+ Microarray scatter plot plotting
+ Microarray scatter plot rendering
+
+
+ Scatter plot plotting
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.18
+
+ Visualise gene expression data where each band (or line graph) corresponds to a sample.
+
+
+ Whole microarray graph plotting
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Visualise gene expression data after hierarchical clustering for representing hierarchical relationships.
+ Expression data tree-map rendering
+ Treemapping
+ Microarray tree-map rendering
+
+
+ Treemap visualisation
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ In the case of micorarray data, visualise raw and pre-processed gene expression data, via a plot showing over- and under-expression along with mean, upper and lower quartiles.
+ Generate a box plot, i.e. a depiction of groups of numerical data through their quartiles.
+ Box plot plotting
+ Microarray Box-Whisker plot plotting
+
+
+ Box-Whisker plot plotting
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Generate a physical (sequence) map of a DNA sequence showing the physical distance (base pairs) between features or landmarks such as restriction sites, cloned DNA fragments, genes and other genetic markers.
+ Physical cartography
+
+
+ Physical mapping
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Apply analytical methods to existing data of a specific type.
+
+
+ This excludes non-analytical methods that read and write the same basic type of data (for that, see 'Data handling').
+ Analysis
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+
+ Process or analyse an alignment of molecular sequences or structures.
+
+ Alignment analysis
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.16
+
+
+
+ Analyse a body of scientific text (typically a full text article from a scientific journal).
+
+ Article analysis
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Analyse the interactions of two or more molecules (or parts of molecules) that are known to interact.
+
+ Molecular interaction analysis
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Includes analysis of raw experimental protein-protein interaction data from for example yeast two-hybrid analysis, protein microarrays, immunoaffinity chromatography followed by mass spectrometry, phage display etc.
+ Analyse the interactions of proteins with other proteins.
+ Protein interaction analysis
+ Protein interaction raw data analysis
+ Protein interaction simulation
+
+
+ Protein-protein interaction analysis
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ WHATIF: HETGroupNames
+ WHATIF:HasMetalContacts
+ WHATIF:HasMetalContactsPlus
+ WHATIF:HasNegativeIonContacts
+ WHATIF:HasNegativeIonContactsPlus
+ WHATIF:HasNucleicContacts
+ WHATIF:ShowDrugContacts
+ WHATIF:ShowDrugContactsShort
+ WHATIF:ShowLigandContacts
+ WHATIF:ShowProteiNucleicContacts
+ Calculate contacts between residues, or between residues and other groups, in a protein structure, on the basis of distance calculations.
+ HET group detection
+ Residue contact calculation (residue-ligand)
+ Residue contact calculation (residue-metal)
+ Residue contact calculation (residue-negative ion)
+ Residue contact calculation (residue-nucleic acid)
+ WHATIF:SymmetryContact
+
+
+ This includes identifying HET groups, which usually correspond to ligands, lipids, but might also (not consistently) include groups that are attached to amino acids. Each HET group is supposed to have a unique three letter code and a unique name which might be given in the output. It can also include calculation of symmetry contacts, i.e. a contact between two atoms in different asymmetric unit.
+ Residue distance calculation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+
+ Process (read and / or write) an alignment of two or more molecular sequences, structures or derived data.
+
+ Alignment processing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Process (read and / or write) a molecular tertiary (3D) structure alignment.
+
+ Structure alignment processing
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate codon usage bias, e.g. generate a codon usage bias plot.
+ Codon usage bias plotting
+
+
+ Codon usage bias calculation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.22
+
+ Generate a codon usage bias plot.
+
+
+ Codon usage bias plotting
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate the differences in codon usage fractions between two sequences, sets of sequences, codon usage tables etc.
+
+
+ Codon usage fraction calculation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Assign molecular sequences, structures or other biological data to a specific group or category according to qualities it shares with that group or category.
+
+
+ Classification
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Process (read and / or write) molecular interaction data.
+
+ Molecular interaction data processing
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Assign molecular sequence(s) to a group or category.
+
+
+ Sequence classification
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Assign molecular structure(s) to a group or category.
+
+
+ Structure classification
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Compare two or more proteins (or some aspect) to identify similarities.
+
+
+ Protein comparison
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Compare two or more nucleic acids to identify similarities.
+
+
+ Nucleic acid comparison
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.19
+
+ Predict, recognise, detect or identify some properties of proteins.
+
+
+ Prediction and recognition (protein)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.19
+
+ Predict, recognise, detect or identify some properties of nucleic acids.
+
+
+ Prediction and recognition (nucleic acid)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta13
+ Edit, convert or otherwise change a molecular tertiary structure, either randomly or specifically.
+
+
+ Structure editing
+
+
+
+
+
+
+
+
+ beta13
+ Edit, convert or otherwise change a molecular sequence alignment, either randomly or specifically.
+
+
+ Sequence alignment editing
+
+
+
+
+
+
+
+
+ beta13
+ Notions of pathway and network were mixed up, EDAM 1.24 disentangles them.
+ 1.24
+
+
+
+ Render (visualise) a biological pathway or network.
+
+ Pathway or network visualisation
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.6
+
+
+ Predict general (non-positional) functional properties of a protein from analysing its sequence.
+
+ For functional properties that are positional, use 'Protein site detection' instead.
+ Protein function prediction (from sequence)
+ true
+
+
+
+
+
+
+
+
+ beta13
+ (jison)This is a distinction made on basis of input; all features exist can be mapped to a sequence so this isn't needed (consolidate with "Protein feature detection").
+ 1.17
+
+
+
+ Predict, recognise and identify functional or other key sites within protein sequences, typically by scanning for known motifs, patterns and regular expressions.
+
+
+ Protein sequence feature detection
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.18
+
+
+ Calculate (or predict) physical or chemical properties of a protein, including any non-positional properties of the molecular sequence, from processing a protein sequence.
+
+
+ Protein property calculation (from sequence)
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.6
+
+
+ Predict, recognise and identify positional features in proteins from analysing protein structure.
+
+ Protein feature prediction (from structure)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta13
+ Predict, recognise and identify positional features in proteins from analysing protein sequences or structures.
+ Protein feature prediction
+ Protein feature recognition
+ Protein secondary database search
+ Protein site detection
+ Protein site prediction
+ Protein site recognition
+ Sequence feature detection (protein)
+ Sequence profile database search
+
+
+ Features includes functional sites or regions, secondary structure, structural domains and so on. Methods might use fingerprints, motifs, profiles, hidden Markov models, sequence alignment etc to provide a mapping of a query protein sequence to a discriminatory element. This includes methods that search a secondary protein database (Prosite, Blocks, ProDom, Prints, Pfam etc.) to assign a protein sequence(s) to a known protein family or group.
+ Protein feature detection
+
+
+
+
+
+
+
+
+ beta13
+ 1.6
+
+
+ Screen a molecular sequence(s) against a database (of some type) to identify similarities between the sequence and database entries.
+
+ Database search (by sequence)
+ true
+
+
+
+
+
+
+
+
+
+ beta13
+ Predict a network of protein interactions.
+
+
+ Protein interaction network prediction
+
+
+
+
+
+
+
+
+
+ beta13
+ Design (or predict) nucleic acid sequences with specific chemical or physical properties.
+ Gene design
+
+
+ Nucleic acid design
+
+
+
+
+
+
+
+
+
+ beta13
+ Edit a data entity, either randomly or specifically.
+
+
+ Editing
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.1
+ Evaluate a DNA sequence assembly, typically for purposes of quality control.
+ Assembly QC
+ Assembly quality evaluation
+ Sequence assembly QC
+ Sequence assembly quality evaluation
+
+
+ Sequence assembly validation
+
+
+
+
+
+
+
+
+
+ 1.1
+ Align two or more (tpyically huge) molecular sequences that represent genomes.
+ Genome alignment construction
+ Whole genome alignment
+
+
+ Genome alignment
+
+
+
+
+
+
+
+
+ 1.1
+ Reconstruction of a sequence assembly in a localised area.
+
+
+ Localised reassembly
+
+
+
+
+
+
+
+
+ 1.1
+ Render and visualise a DNA sequence assembly.
+ Assembly rendering
+ Assembly visualisation
+ Sequence assembly rendering
+
+
+ Sequence assembly visualisation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.1
+ Identify base (nucleobase) sequence from a fluorescence 'trace' data generated by an automated DNA sequencer.
+ Base calling
+ Phred base calling
+ Phred base-calling
+
+
+ Base-calling
+
+
+
+
+
+
+
+
+
+ 1.1
+ The mapping of methylation sites in a DNA (genome) sequence. Typically, the mapping of high-throughput bisulfite reads to the reference genome.
+ Bisulfite read mapping
+ Bisulfite sequence alignment
+ Bisulfite sequence mapping
+
+
+ Bisulfite mapping follows high-throughput sequencing of DNA which has undergone bisulfite treatment followed by PCR amplification; unmethylated cytosines are specifically converted to thymine, allowing the methylation status of cytosine in the DNA to be detected.
+ Bisulfite mapping
+
+
+
+
+
+
+
+
+ 1.1
+ Identify and filter a (typically large) sequence data set to remove sequences from contaminants in the sample that was sequenced.
+
+
+ Sequence contamination filtering
+
+
+
+
+
+
+
+
+ 1.1
+ 1.12
+
+ Trim sequences (typically from an automated DNA sequencer) to remove misleading ends.
+
+
+ For example trim polyA tails, introns and primer sequence flanking the sequence of amplified exons, or other unwanted sequence.
+ Trim ends
+ true
+
+
+
+
+
+
+
+
+ 1.1
+ 1.12
+
+ Trim sequences (typically from an automated DNA sequencer) to remove sequence-specific end regions, typically contamination from vector sequences.
+
+
+ Trim vector
+ true
+
+
+
+
+
+
+
+
+ 1.1
+ 1.12
+
+ Trim sequences (typically from an automated DNA sequencer) to remove the sequence ends that extend beyond an assembled reference sequence.
+
+
+ Trim to reference
+ true
+
+
+
+
+
+
+
+
+ 1.1
+ Cut (remove) the end from a molecular sequence.
+ Trimming
+ Barcode sequence removal
+ Trim ends
+ Trim to reference
+ Trim vector
+
+
+ This includes end trimming, -- Trim sequences (typically from an automated DNA sequencer) to remove misleading ends. For example trim polyA tails, introns and primer sequence flanking the sequence of amplified exons, or other unwanted sequence.-- trimming to a reference sequence, --Trim sequences (typically from an automated DNA sequencer) to remove the sequence ends that extend beyond an assembled reference sequence. -- vector trimming -- Trim sequences (typically from an automated DNA sequencer) to remove sequence-specific end regions, typically contamination from vector sequences.
+ Sequence trimming
+
+
+
+
+
+
+
+
+
+ 1.1
+ Compare the features of two genome sequences.
+
+
+ Genomic elements that might be compared include genes, indels, single nucleotide polymorphisms (SNPs), retrotransposons, tandem repeats and so on.
+ Genome feature comparison
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.1
+ Detect errors in DNA sequences generated from sequencing projects).
+ Short read error correction
+ Short-read error correction
+
+
+ Sequencing error detection
+
+
+
+
+
+
+
+
+ 1.1
+ Analyse DNA sequence data to identify differences between the genetic composition (genotype) of an individual compared to other individual's or a reference sequence.
+
+
+ Methods might consider cytogenetic analyses, copy number polymorphism (and calculate copy number calls for copy-number variation(CNV) regions), single nucleotide polymorphism (SNP), , rare copy number variation (CNV) identification, loss of heterozygosity data and so on.
+ Genotyping
+
+
+
+
+
+
+
+
+ 1.1
+ Analyse a genetic variation, for example to annotate its location, alleles, classification, and effects on individual transcripts predicted for a gene model.
+ Genetic variation annotation
+ Sequence variation analysis
+ Variant analysis
+ Transcript variant analysis
+
+
+ Genetic variation annotation provides contextual interpretation of coding SNP consequences in transcripts. It allows comparisons to be made between variation data in different populations or strains for the same transcript.
+ Genetic variation analysis
+
+
+
+
+
+
+
+
+
+ 1.1
+ Align short oligonucleotide sequences (reads) to a larger (genomic) sequence.
+ Oligonucleotide alignment
+ Oligonucleotide alignment construction
+ Oligonucleotide alignment generation
+ Oligonucleotide mapping
+ Read alignment
+ Short oligonucleotide alignment
+ Short read alignment
+ Short read mapping
+ Short sequence read mapping
+
+
+ The purpose of read mapping is to identify the location of sequenced fragments within a reference genome and assumes that there is, in fact, at least local similarity between the fragment and reference sequences.
+ Read mapping
+
+
+
+
+
+
+
+
+ 1.1
+ A variant of oligonucleotide mapping where a read is mapped to two separate locations because of possible structural variation.
+ Split-read mapping
+
+
+ Split read mapping
+
+
+
+
+
+
+
+
+ 1.1
+ Analyse DNA sequences in order to identify a DNA 'barcode'; marker genes or any short fragment(s) of DNA that are useful to diagnose the taxa of biological organisms.
+ Community profiling
+ Sample barcoding
+
+
+ DNA barcoding
+
+
+
+
+
+
+
+
+
+ 1.1
+ 1.19
+
+ Identify single nucleotide change in base positions in sequencing data that differ from a reference genome and which might, especially by reference to population frequency or functional data, indicate a polymorphism.
+
+
+ SNP calling
+ true
+
+
+
+
+
+
+
+
+ 1.1
+ "Polymorphism detection" and "Variant calling" are essentially the same thing - keeping the later as a more prevalent term nowadays.
+ 1.24
+
+
+ Detect mutations in multiple DNA sequences, for example, from the alignment and comparison of the fluorescent traces produced by DNA sequencing hardware.
+
+
+ Polymorphism detection
+ true
+
+
+
+
+
+
+
+
+ 1.1
+ Visualise, format or render an image of a Chromatogram.
+ Chromatogram viewing
+
+
+ Chromatogram visualisation
+
+
+
+
+
+
+
+
+ 1.1
+ Analyse cytosine methylation states in nucleic acid sequences.
+ Methylation profile analysis
+
+
+ Methylation analysis
+
+
+
+
+
+
+
+
+ 1.1
+ 1.19
+
+ Determine cytosine methylation status of specific positions in a nucleic acid sequences.
+
+
+ Methylation calling
+ true
+
+
+
+
+
+
+
+
+
+ 1.1
+ Measure the overall level of methyl cytosines in a genome from analysis of experimental data, typically from chromatographic methods and methyl accepting capacity assay.
+ Genome methylation analysis
+ Global methylation analysis
+ Methylation level analysis (global)
+
+
+ Whole genome methylation analysis
+
+
+
+
+
+
+
+
+ 1.1
+ Analysing the DNA methylation of specific genes or regions of interest.
+ Gene-specific methylation analysis
+ Methylation level analysis (gene-specific)
+
+
+ Gene methylation analysis
+
+
+
+
+
+
+
+
+
+ 1.1
+ Visualise, format or render a nucleic acid sequence that is part of (and in context of) a complete genome sequence.
+ Genome browser
+ Genome browsing
+ Genome rendering
+ Genome viewing
+
+
+ Genome visualisation
+
+
+
+
+
+
+
+
+
+ 1.1
+ Compare the sequence or features of two or more genomes, for example, to find matching regions.
+ Genomic region matching
+
+
+ Genome comparison
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.1
+ Generate an index of a genome sequence.
+ Burrows-Wheeler
+ Genome indexing (Burrows-Wheeler)
+ Genome indexing (suffix arrays)
+ Suffix arrays
+
+
+ Many sequence alignment tasks involving many or very large sequences rely on a precomputed index of the sequence to accelerate the alignment. The Burrows-Wheeler Transform (BWT) is a permutation of the genome based on a suffix array algorithm. A suffix array consists of the lexicographically sorted list of suffixes of a genome.
+ Genome indexing
+
+
+
+
+
+
+
+
+ 1.1
+ 1.12
+
+ Generate an index of a genome sequence using the Burrows-Wheeler algorithm.
+
+
+ The Burrows-Wheeler Transform (BWT) is a permutation of the genome based on a suffix array algorithm.
+ Genome indexing (Burrows-Wheeler)
+ true
+
+
+
+
+
+
+
+
+ 1.1
+ 1.12
+
+ Generate an index of a genome sequence using a suffix arrays algorithm.
+
+
+ A suffix array consists of the lexicographically sorted list of suffixes of a genome.
+ Genome indexing (suffix arrays)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.1
+ Analyse one or more spectra from mass spectrometry (or other) experiments.
+ Mass spectrum analysis
+ Spectrum analysis
+
+
+ Spectral analysis
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.1
+ Identify peaks in a spectrum from a mass spectrometry, NMR, or some other spectrum-generating experiment.
+ Peak assignment
+ Peak finding
+
+
+ Peak detection
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.1
+ Link together a non-contiguous series of genomic sequences into a scaffold, consisting of sequences separated by gaps of known length. The sequences that are linked are typically typically contigs; contiguous sequences corresponding to read overlaps.
+ Scaffold construction
+ Scaffold generation
+
+
+ Scaffold may be positioned along a chromosome physical map to create a "golden path".
+ Scaffolding
+
+
+
+
+
+
+
+
+ 1.1
+ Fill the gaps in a sequence assembly (scaffold) by merging in additional sequences.
+
+
+ Different techniques are used to generate gap sequences to connect contigs, depending on the size of the gap. For small (5-20kb) gaps, PCR amplification and sequencing is used. For large (>20kb) gaps, fragments are cloned (e.g. in BAC (Bacterial artificial chromosomes) vectors) and then sequenced.
+ Scaffold gap completion
+
+
+
+
+
+
+
+
+
+ 1.1
+ Raw sequence data quality control.
+ Sequencing QC
+ Sequencing quality assessment
+
+
+ Analyse raw sequence data from a sequencing pipeline and identify (and possiby fix) problems.
+ Sequencing quality control
+
+
+
+
+
+
+
+
+
+ 1.1
+ Pre-process sequence reads to ensure (or improve) quality and reliability.
+ Sequence read pre-processing
+
+
+ For example process paired end reads to trim low quality ends remove short sequences, identify sequence inserts, detect chimeric reads, or remove low quality sequences including vector, adaptor, low complexity and contaminant sequences. Sequences might come from genomic DNA library, EST libraries, SSH library and so on.
+ Read pre-processing
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.1
+ Estimate the frequencies of different species from analysis of the molecular sequences, typically of DNA recovered from environmental samples.
+
+
+ Species frequency estimation
+
+
+
+
+
+
+
+
+ 1.1
+ Identify putative protein-binding regions in a genome sequence from analysis of Chip-sequencing data or ChIP-on-chip data.
+ Protein binding peak detection
+ Peak-pair calling
+
+
+ Chip-sequencing combines chromatin immunoprecipitation (ChIP) with massively parallel DNA sequencing to generate a set of reads, which are aligned to a genome sequence. The enriched areas contain the binding sites of DNA-associated proteins. For example, a transcription factor binding site. ChIP-on-chip in contrast combines chromatin immunoprecipitation ('ChIP') with microarray ('chip'). "Peak-pair calling" is similar to "Peak calling" in the context of ChIP-exo.
+ Peak calling
+
+
+
+
+
+
+
+
+ 1.1
+ Identify from molecular sequence analysis (typically from analysis of microarray or RNA-seq data) genes whose expression levels are significantly different between two sample groups.
+ Differential expression analysis
+ Differential gene analysis
+ Differential gene expression analysis
+ Differentially expressed gene identification
+
+
+ Differential gene expression analysis is used, for example, to identify which genes are up-regulated (increased expression) or down-regulated (decreased expression) between a group treated with a drug and a control groups.
+ Differential gene expression profiling
+
+
+
+
+
+
+
+
+ 1.1
+ 1.21
+
+ Analyse gene expression patterns (typically from DNA microarray datasets) to identify sets of genes that are associated with a specific trait, condition, clinical outcome etc.
+
+
+ Gene set testing
+ true
+
+
+
+
+
+
+
+
+
+ 1.1
+ Classify variants based on their potential effect on genes, especially functional effects on the expressed proteins.
+
+
+ Variants are typically classified by their position (intronic, exonic, etc.) in a gene transcript and (for variants in coding exons) by their effect on the protein sequence (synonymous, non-synonymous, frameshifting, etc.)
+ Variant classification
+
+
+
+
+
+
+
+
+ 1.1
+ Identify biologically interesting variants by prioritizing individual variants, for example, homozygous variants absent in control genomes.
+
+
+ Variant prioritisation can be used for example to produce a list of variants responsible for 'knocking out' genes in specific genomes. Methods amino acid substitution, aggregative approaches, probabilistic approach, inheritance and unified likelihood-frameworks.
+ Variant prioritisation
+
+
+
+
+
+
+
+
+
+ 1.1
+ Detect, identify and map mutations, such as single nucleotide polymorphisms, short indels and structural variants, in multiple DNA sequences. Typically the alignment and comparison of the fluorescent traces produced by DNA sequencing hardware, to study genomic alterations.
+ Variant mapping
+ Allele calling
+ Exome variant detection
+ Genome variant detection
+ Germ line variant calling
+ Mutation detection
+ Somatic variant calling
+ de novo mutation detection
+
+
+ Methods often utilise a database of aligned reads.
+ Somatic variant calling is the detection of variations established in somatic cells and hence not inherited as a germ line variant.
+ Variant detection
+ Variant calling
+
+
+
+
+
+
+
+
+ 1.1
+ Detect large regions in a genome subject to copy-number variation, or other structural variations in genome(s).
+ Structural variation discovery
+
+
+ Methods might involve analysis of whole-genome array comparative genome hybridisation or single-nucleotide polymorphism arrays, paired-end mapping of sequencing data, or from analysis of short reads from new sequencing technologies.
+ Structural variation detection
+
+
+
+
+
+
+
+
+ 1.1
+ Analyse sequencing data from experiments aiming to selectively sequence the coding regions of the genome.
+ Exome sequence analysis
+
+
+ Exome assembly
+
+
+
+
+
+
+
+
+ 1.1
+ Analyse mapping density (read depth) of (typically) short reads from sequencing platforms, for example, to detect deletions and duplications.
+
+
+ Read depth analysis
+
+
+
+
+
+
+
+
+ 1.1
+ Combine classical quantitative trait loci (QTL) analysis with gene expression profiling, for example, to describe describe cis- and trans-controlling elements for the expression of phenotype associated genes.
+ Gene expression QTL profiling
+ Gene expression quantitative trait loci profiling
+ eQTL profiling
+
+
+ Gene expression QTL analysis
+
+
+
+
+
+
+
+
+ 1.1
+ Estimate the number of copies of loci of particular gene(s) in DNA sequences typically from gene-expression profiling technology based on microarray hybridisation-based experiments. For example, estimate copy number (or marker dosage) of a dominant marker in samples from polyploid plant cells or tissues, or chromosomal gains and losses in tumors.
+ Transcript copy number estimation
+
+
+ Methods typically implement some statistical model for hypothesis testing, and methods estimate total copy number, i.e. do not distinguish the two inherited chromosomes quantities (specific copy number).
+ Copy number estimation
+
+
+
+
+
+
+
+
+ 1.2
+ Adapter removal
+ Remove forward and/or reverse primers from nucleic acid sequences (typically PCR products).
+
+
+ Primer removal
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.2
+ Infer a transcriptome sequence by analysis of short sequence reads.
+
+
+ Transcriptome assembly
+
+
+
+
+
+
+
+
+ 1.2
+ 1.6
+
+
+ Infer a transcriptome sequence without the aid of a reference genome, i.e. by comparing short sequences (reads) to each other.
+
+ Transcriptome assembly (de novo)
+ true
+
+
+
+
+
+
+
+
+ 1.2
+ 1.6
+
+
+ Infer a transcriptome sequence by mapping short reads to a reference genome.
+
+ Transcriptome assembly (mapping)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.3
+ Convert one set of sequence coordinates to another, e.g. convert coordinates of one assembly to another, cDNA to genomic, CDS to genomic, protein translation to genomic etc.
+
+
+ Sequence coordinate conversion
+
+
+
+
+
+
+
+
+ 1.3
+ Calculate similarity between 2 or more documents.
+
+
+ Document similarity calculation
+
+
+
+
+
+
+
+
+
+ 1.3
+ Cluster (group) documents on the basis of their calculated similarity.
+
+
+ Document clustering
+
+
+
+
+
+
+
+
+
+ 1.3
+ Recognise named entities, ontology concepts, tags, events, and dictionary terms within documents.
+ Concept mining
+ Entity chunking
+ Entity extraction
+ Entity identification
+ Event extraction
+ NER
+ Named-entity recognition
+
+
+ Named-entity and concept recognition
+
+
+
+
+
+
+
+
+
+
+
+ 1.3
+ Map data identifiers to one another for example to establish a link between two biological databases for the purposes of data integration.
+ Accession mapping
+ Identifier mapping
+
+
+ The mapping can be achieved by comparing identifier values or some other means, e.g. exact matches to a provided sequence.
+ ID mapping
+
+
+
+
+
+
+
+
+ 1.3
+ Process data in such a way that makes it hard to trace to the person which the data concerns.
+ Data anonymisation
+
+
+ Anonymisation
+
+
+
+
+
+
+
+
+ 1.3
+ (jison)Too fine-grained, the operation (Data retrieval) hasn't changed, just what is retrieved.
+ 1.17
+
+ Search for and retrieve a data identifier of some kind, e.g. a database entry accession.
+
+
+ ID retrieval
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.4
+ Generate a checksum of a molecular sequence.
+
+
+ Sequence checksum generation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.4
+ Construct a bibliography from the scientific literature.
+ Bibliography construction
+
+
+ Bibliography generation
+
+
+
+
+
+
+
+
+ 1.4
+ Predict the structure of a multi-subunit protein and particularly how the subunits fit together.
+
+
+ Protein quaternary structure prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.4
+ Analyse the surface properties of proteins or other macromolecules, including surface accessible pockets, interior inaccessible cavities etc.
+
+
+ Molecular surface analysis
+
+
+
+
+
+
+
+
+ 1.4
+ Compare two or more ontologies, e.g. identify differences.
+
+
+ Ontology comparison
+
+
+
+
+
+
+
+
+ 1.4
+ 1.9
+
+ Compare two or more ontologies, e.g. identify differences.
+
+
+ Ontology comparison
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.4
+ Recognition of which format the given data is in.
+ Format identification
+ Format inference
+ Format recognition
+
+
+ 'Format recognition' is not a bioinformatics-specific operation, but of great relevance in bioinformatics. Should be removed from EDAM if/when captured satisfactorily in a suitable domain-generic ontology.
+ Format detection
+
+
+
+
+
+ The has_input "Data" (data_0006) may cause visualisation or other problems although ontologically correct. But on the other hand it may be useful to distinguish from nullary operations without inputs.
+
+
+
+
+
+
+
+
+ 1.4
+ Split a file containing multiple data items into many files, each containing one item.
+ File splitting
+
+
+ Splitting
+
+
+
+
+
+
+
+
+ 1.6
+ true
+ Construct some data entity.
+ Construction
+
+
+ For non-analytical operations, see the 'Processing' branch.
+ Generation
+
+
+
+
+
+
+
+
+ 1.6
+ (jison)This is a distinction made on basis of input; all features exist can be mapped to a sequence so this isn't needed.
+ 1.17
+
+
+ Predict, recognise and identify functional or other key sites within nucleic acid sequences, typically by scanning for known motifs, patterns and regular expressions.
+
+
+ Nucleic acid sequence feature detection
+ true
+
+
+
+
+
+
+
+
+ 1.6
+ Deposit some data in a database or some other type of repository or software system.
+ Data deposition
+ Data submission
+ Database deposition
+ Database submission
+ Submission
+
+
+ For non-analytical operations, see the 'Processing' branch.
+ Deposition
+
+
+
+
+
+
+
+
+ 1.6
+ true
+ Group together some data entities on the basis of similarities such that entities in the same group (cluster) are more similar to each other than to those in other groups (clusters).
+
+
+ Clustering
+
+
+
+
+
+
+
+
+ 1.6
+ 1.19
+
+ Construct some entity (typically a molecule sequence) from component pieces.
+
+
+ Assembly
+ true
+
+
+
+
+
+
+
+
+ 1.6
+ true
+ Convert a data set from one form to another.
+
+
+ Conversion
+
+
+
+
+
+
+
+
+ 1.6
+ Standardize or normalize data by some statistical method.
+ Normalisation
+ Standardisation
+
+
+ In the simplest normalisation means adjusting values measured on different scales to a common scale (often between 0.0 and 1.0), but can refer to more sophisticated adjustment whereby entire probability distributions of adjusted values are brought into alignment. Standardisation typically refers to an operation whereby a range of values are standardised to measure how many standard deviations a value is from its mean.
+ Standardisation and normalisation
+
+
+
+
+
+
+
+
+ 1.6
+ Combine multiple files or data items into a single file or object.
+
+
+ Aggregation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.6
+ Compare two or more scientific articles.
+
+
+ Article comparison
+
+
+
+
+
+
+
+
+ 1.6
+ true
+ Mathematical determination of the value of something, typically a properly of a molecule.
+
+
+ Calculation
+
+
+
+
+
+
+
+
+ 1.6
+ Notions of pathway and network were mixed up, EDAM 1.24 disentangles them.
+ 1.24
+
+
+
+
+ Predict a molecular pathway or network.
+
+ Pathway or network prediction
+ true
+
+
+
+
+
+
+
+
+ 1.6
+ 1.12
+
+ The process of assembling many short DNA sequences together such that they represent the original chromosomes from which the DNA originated.
+
+
+ Genome assembly
+ true
+
+
+
+
+
+
+
+
+ 1.6
+ 1.19
+
+ Generate a graph, or other visual representation, of data, showing the relationship between two or more variables.
+
+
+ Plotting
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.7
+ Image processing
+ The analysis of a image (typically a digital image) of some type in order to extract information from it.
+
+
+ Image analysis
+
+
+
+
+
+
+
+
+
+ 1.7
+ Analysis of data from a diffraction experiment.
+
+
+ Diffraction data analysis
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.7
+ Analysis of cell migration images in order to study cell migration, typically in order to study the processes that play a role in the disease progression.
+
+
+ Cell migration analysis
+
+
+
+
+
+
+
+
+
+ 1.7
+ Processing of diffraction data into a corrected, ordered, and simplified form.
+
+
+ Diffraction data reduction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.7
+ Measurement of neurites; projections (axons or dendrites) from the cell body of a neuron, from analysis of neuron images.
+
+
+ Neurite measurement
+
+
+
+
+
+
+
+
+ 1.7
+ The evaluation of diffraction intensities and integration of diffraction maxima from a diffraction experiment.
+ Diffraction profile fitting
+ Diffraction summation integration
+
+
+ Diffraction data integration
+
+
+
+
+
+
+
+
+ 1.7
+ Phase a macromolecular crystal structure, for example by using molecular replacement or experimental phasing methods.
+
+
+ Phasing
+
+
+
+
+
+
+
+
+ 1.7
+ A technique used to construct an atomic model of an unknown structure from diffraction data, based upon an atomic model of a known structure, either a related protein or the same protein from a different crystal form.
+
+
+ The technique solves the phase problem, i.e. retrieve information concern phases of the structure.
+ Molecular replacement
+
+
+
+
+
+
+
+
+ 1.7
+ A method used to refine a structure by moving the whole molecule or parts of it as a rigid unit, rather than moving individual atoms.
+
+
+ Rigid body refinement usually follows molecular replacement in the assignment of a structure from diffraction data.
+ Rigid body refinement
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.7
+ An image processing technique that combines and analyze multiple images of a particulate sample, in order to produce an image with clearer features that are more easily interpreted.
+
+
+ Single particle analysis is used to improve the information that can be obtained by relatively low resolution techniques, , e.g. an image of a protein or virus from transmission electron microscopy (TEM).
+ Single particle analysis
+
+
+
+
+
+
+
+
+
+
+ 1.7
+ true
+ This is two related concepts.
+ Compare (align and classify) multiple particle images from a micrograph in order to produce a representative image of the particle.
+
+
+ A micrograph can include particles in multiple different orientations and/or conformations. Particles are compared and organised into sets based on their similarity. Typically iterations of classification and alignment and are performed to optimise the final 3D EM map.
+ Single particle alignment and classification
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.7
+ Clustering of molecular sequences on the basis of their function, typically using information from an ontology of gene function, or some other measure of functional phenotype.
+ Functional sequence clustering
+
+
+ Functional clustering
+
+
+
+
+
+
+
+
+ 1.7
+ Classifiication (typically of molecular sequences) by assignment to some taxonomic hierarchy.
+ Taxonomy assignment
+ Taxonomic profiling
+
+
+ Taxonomic classification
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.7
+ The prediction of the degree of pathogenicity of a microorganism from analysis of molecular sequences.
+ Pathogenicity prediction
+
+
+ Virulence prediction
+
+
+
+
+
+
+
+
+
+ 1.7
+ Analyse the correlation patterns among features/molecules across across a variety of experiments, samples etc.
+ Co-expression analysis
+ Gene co-expression network analysis
+ Gene expression correlation
+ Gene expression correlation analysis
+
+
+ Expression correlation analysis
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.7
+ true
+ Identify a correlation, i.e. a statistical relationship between two random variables or two sets of data.
+
+
+ Correlation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.7
+ Compute the covariance model for (a family of) RNA secondary structures.
+
+
+ RNA structure covariance model generation
+
+
+
+
+
+
+
+
+ 1.7
+ 1.18
+
+ Predict RNA secondary structure by analysis, e.g. probabilistic analysis, of the shape of RNA folds.
+
+
+ RNA secondary structure prediction (shape-based)
+ true
+
+
+
+
+
+
+
+
+ 1.7
+ 1.18
+
+ Prediction of nucleic-acid folding using sequence alignments as a source of data.
+
+
+ Nucleic acid folding prediction (alignment-based)
+ true
+
+
+
+
+
+
+
+
+ 1.7
+ Count k-mers (substrings of length k) in DNA sequence data.
+
+
+ k-mer counting is used in genome and transcriptome assembly, metagenomic sequencing, and for error correction of sequence reads.
+ k-mer counting
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.7
+ Reconstructing the inner node labels of a phylogenetic tree from its leafes.
+ Phylogenetic tree reconstruction
+ Gene tree reconstruction
+ Species tree reconstruction
+
+
+ Note that this is somewhat different from simply analysing an existing tree or constructing a completely new one.
+ Phylogenetic reconstruction
+
+
+
+
+
+
+
+
+ 1.7
+ Generate some data from a chosen probibalistic model, possibly to evaluate algorithms.
+
+
+ Probabilistic data generation
+
+
+
+
+
+
+
+
+
+ 1.7
+ Generate sequences from some probabilistic model, e.g. a model that simulates evolution.
+
+
+ Probabilistic sequence generation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.7
+ Identify or predict causes for antibiotic resistance from molecular sequence analysis.
+
+
+ Antimicrobial resistance prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.8
+ Analysis of a set of objects, such as genes, annotated with given categories, where eventual over-/under-representation of certain categories within the studied set of objects is revealed.
+ Enrichment
+ Over-representation analysis
+ Functional enrichment
+
+
+ Categories from a relevant ontology can be used. The input is typically a set of genes or other biological objects, possibly represented by their identifiers, and the output of the analysis is typically a ranked list of categories, each associated with a statistical metric of over-/under-representation within the studied data.
+ Enrichment analysis
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.8
+ Analyse a dataset with respect to concepts from an ontology of chemical structure, leveraging chemical similarity information.
+ Chemical class enrichment
+
+
+ Chemical similarity enrichment
+
+
+
+
+
+
+
+
+ 1.8
+ Plot an incident curve such as a survival curve, death curve, mortality curve.
+
+
+ Incident curve plotting
+
+
+
+
+
+
+
+
+ 1.8
+ Identify and map patterns of genomic variations.
+
+
+ Methods often utilise a database of aligned reads.
+ Variant pattern analysis
+
+
+
+
+
+
+
+
+ 1.8
+ 1.12
+
+ Model some biological system using mathematical techniques including dynamical systems, statistical models, differential equations, and game theoretic models.
+
+
+ Mathematical modelling
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.9
+ Visualise images resulting from various types of microscopy.
+
+
+ Microscope image visualisation
+
+
+
+
+
+
+
+
+ 1.9
+ Annotate an image of some sort, typically with terms from a controlled vocabulary.
+
+
+ Image annotation
+
+
+
+
+
+
+
+
+ 1.9
+ Replace missing data with substituted values, usually by using some statistical or other mathematical approach.
+ Data imputation
+
+
+ Imputation
+
+
+
+
+
+
+
+
+
+ 1.9
+ Visualise, format or render data from an ontology, typically a tree of terms.
+ Ontology browsing
+
+
+ Ontology visualisation
+
+
+
+
+
+
+
+
+ 1.9
+ A method for making numerical assessments about the maximum percent of time that a conformer of a flexible macromolecule can exist and still be compatible with the experimental data.
+
+
+ Maximum occurrence analysis
+
+
+
+
+
+
+
+
+
+ 1.9
+ Compare the models or schemas used by two or more databases, or any other general comparison of databases rather than a detailed comparison of the entries themselves.
+ Data model comparison
+ Schema comparison
+
+
+ Database comparison
+
+
+
+
+
+
+
+
+ 1.9
+ 1.24
+
+
+
+ Simulate the bevaviour of a biological pathway or network.
+
+ Notions of pathway and network were mixed up, EDAM 1.24 disentangles them.
+ Network simulation
+ true
+
+
+
+
+
+
+
+
+ 1.9
+ Analyze read counts from RNA-seq experiments.
+
+
+ RNA-seq read count analysis
+
+
+
+
+
+
+
+
+ 1.9
+ Identify and remove redundancy from a set of small molecule structures.
+
+
+ Chemical redundancy removal
+
+
+
+
+
+
+
+
+ 1.9
+ Analyze time series data from an RNA-seq experiment.
+
+
+ RNA-seq time series data analysis
+
+
+
+
+
+
+
+
+ 1.9
+ Simulate gene expression data, e.g. for purposes of benchmarking.
+
+
+ Simulated gene expression data generation
+
+
+
+
+
+
+
+
+ 1.12
+ Identify semantic relations among entities and concepts within a text, using text mining techniques.
+ Relation discovery
+ Relation inference
+ Relationship discovery
+ Relationship extraction
+ Relationship inference
+
+
+ Relation extraction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.12
+ Re-adjust the output of mass spectrometry experiments with shifted ppm values.
+
+
+ Mass spectra calibration
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.12
+ Align multiple data sets using information from chromatography and/or peptide identification, from mass spectrometry experiments.
+
+
+ Chromatographic alignment
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.12
+ The removal of isotope peaks in a spectrum, to represent the fragment ion as one data point.
+ Deconvolution
+
+
+ Deisotoping is commonly done to reduce complexity, and done in conjunction with the charge state deconvolution.
+ Deisotoping
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.12
+ Technique for determining the amount of proteins in a sample.
+ Protein quantitation
+
+
+ Protein quantification
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.12
+ Determination of peptide sequence from mass spectrum.
+ Peptide-spectrum-matching
+
+
+ Peptide identification
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.12
+ Calculate the isotope distribution of a given chemical species.
+
+
+ Isotopic distributions calculation
+
+
+
+
+
+
+
+
+ 1.12
+ Prediction of retention time in a mass spectrometry experiment based on compositional and structural properties of the separated species.
+ Retention time calculation
+
+
+ Retention time prediction
+
+
+
+
+
+
+
+
+ 1.12
+ Quantification without the use of chemical tags.
+
+
+ Label-free quantification
+
+
+
+
+
+
+
+
+ 1.12
+ Quantification based on the use of chemical tags.
+
+
+ Labeled quantification
+
+
+
+
+
+
+
+
+ 1.12
+ Quantification by Selected/multiple Reaction Monitoring workflow (XIC quantitation of precursor / fragment mass pair).
+
+
+ MRM/SRM
+
+
+
+
+
+
+
+
+ 1.12
+ Calculate number of identified MS2 spectra as approximation of peptide / protein quantity.
+
+
+ Spectral counting
+
+
+
+
+
+
+
+
+ 1.12
+ Quantification analysis using stable isotope labeling by amino acids in cell culture.
+
+
+ SILAC
+
+
+
+
+
+
+
+
+ 1.12
+ Quantification analysis using the AB SCIEX iTRAQ isobaric labelling workflow, wherein 2-8 reporter ions are measured in MS2 spectra near 114 m/z.
+
+
+ iTRAQ
+
+
+
+
+
+
+
+
+ 1.12
+ Quantification analysis using labeling based on 18O-enriched H2O.
+
+
+ 18O labeling
+
+
+
+
+
+
+
+
+ 1.12
+ Quantification analysis using the Thermo Fisher tandem mass tag labelling workflow.
+
+
+ TMT-tag
+
+
+
+
+
+
+
+
+ 1.12
+ Quantification analysis using chemical labeling by stable isotope dimethylation.
+
+
+ Stable isotope dimethyl labelling
+
+
+
+
+
+
+
+
+ 1.12
+ Peptide sequence tags are used as piece of information about a peptide obtained by tandem mass spectrometry.
+
+
+ Tag-based peptide identification
+
+
+
+
+
+
+
+
+
+ 1.12
+ Analytical process that derives a peptide's amino acid sequence from its tandem mass spectrum (MS/MS) without the assistance of a sequence database.
+
+
+ de Novo sequencing
+
+
+
+
+
+
+
+
+ 1.12
+ Identification of post-translational modifications (PTMs) of peptides/proteins in mass spectrum.
+
+
+ PTM identification
+
+
+
+
+
+
+
+
+
+ 1.12
+ Determination of best matches between MS/MS spectrum and a database of protein or nucleic acid sequences.
+
+
+ Peptide database search
+
+
+
+
+
+
+
+
+ 1.12
+ Peptide database search for identification of known and unknown PTMs looking for mass difference mismatches.
+ Modification-tolerant peptide database search
+ Unrestricted peptide database search
+
+
+ Blind peptide database search
+
+
+
+
+
+
+
+
+ 1.12
+ 1.19
+
+
+ Statistical estimation of false discovery rate from score distribution for peptide-spectrum-matches, following a peptide database search.
+
+
+ Validation of peptide-spectrum matches
+ true
+
+
+
+
+
+
+
+
+
+ 1.12
+ Validation of peptide-spectrum matches
+ Statistical estimation of false discovery rate from score distribution for peptide-spectrum-matches, following a peptide database search, and by comparison to search results with a database containing incorrect information.
+
+
+ Target-Decoy
+
+
+
+
+
+
+
+
+ 1.12
+ Analyse data in order to deduce properties of an underlying distribution or population.
+ Empirical Bayes
+
+
+ Statistical inference
+
+
+
+
+
+
+
+
+
+ 1.12
+ A statistical calculation to estimate the relationships among variables.
+ Regression
+
+
+ Regression analysis
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.12
+ Model a metabolic network. This can include 1) reconstruction to break down a metabolic pathways into reactions, enzymes, and other relevant information, and compilation of this into a mathematical model and 2) simulations of metabolism based on the model.
+
+
+ Metabolic network reconstruction
+ Metabolic network simulation
+ Metabolic pathway simulation
+ Metabolic reconstruction
+
+
+ The terms and synyonyms here reflect that for practical intents and purposes, "pathway" and "network" can be treated the same.
+ Metabolic network modelling
+
+
+
+
+
+
+
+
+
+ 1.12
+ Predict the effect or function of an individual single nucleotide polymorphism (SNP).
+
+
+ SNP annotation
+
+
+
+
+
+
+
+
+ 1.12
+ Prediction of genes or gene components from first principles, i.e. without reference to existing genes.
+ Gene prediction (ab-initio)
+
+
+ Ab-initio gene prediction
+
+
+
+
+
+
+
+
+
+ 1.12
+ Prediction of genes or gene components by reference to homologous genes.
+ Empirical gene finding
+ Empirical gene prediction
+ Evidence-based gene prediction
+ Gene prediction (homology-based)
+ Similarity-based gene prediction
+ Homology prediction
+ Orthology prediction
+
+
+ Homology-based gene prediction
+
+
+
+
+
+
+
+
+
+ 1.12
+ Construction of a statistical model, or a set of assumptions around some observed data, usually by describing a set of probability distributions which approximate the distribution of data.
+
+
+ Statistical modelling
+
+
+
+
+
+
+
+
+
+
+ 1.12
+ Compare two or more molecular surfaces.
+
+
+ Molecular surface comparison
+
+
+
+
+
+
+
+
+ 1.12
+ Annotate one or more sequences with functional information, such as cellular processes or metaobolic pathways, by reference to a controlled vocabulary - invariably the Gene Ontology (GO).
+ Sequence functional annotation
+
+
+ Gene functional annotation
+
+
+
+
+
+
+
+
+ 1.12
+ Variant filtering is used to eliminate false positive variants based for example on base calling quality, strand and position information, and mapping info.
+
+
+ Variant filtering
+
+
+
+
+
+
+
+
+ 1.12
+ Identify binding sites in nucleic acid sequences that are statistically significantly differentially bound between sample groups.
+
+
+ Differential binding analysis
+
+
+
+
+
+
+
+
+
+ 1.13
+ Analyze data from RNA-seq experiments.
+
+
+ RNA-Seq analysis
+
+
+
+
+
+
+
+
+ 1.13
+ Visualise, format or render a mass spectrum.
+
+
+ Mass spectrum visualisation
+
+
+
+
+
+
+
+
+ 1.13
+ Filter a set of files or data items according to some property.
+ Sequence filtering
+ rRNA filtering
+
+
+ Filtering
+
+
+
+
+
+
+
+
+ 1.14
+ Identification of the best reference for mapping for a specific dataset from a list of potential references, when performing genetic variation analysis.
+
+
+ Reference identification
+
+
+
+
+
+
+
+
+ 1.14
+ Label-free quantification by integration of ion current (ion counting).
+ Ion current integration
+
+
+ Ion counting
+
+
+
+
+
+
+
+
+ 1.14
+ Chemical tagging free amino groups of intact proteins with stable isotopes.
+ ICPL
+
+
+ Isotope-coded protein label
+
+
+
+
+
+
+
+
+ 1.14
+ Labeling all proteins and (possibly) all amino acids using C-13 or N-15 enriched grown medium or feed.
+ C-13 metabolic labeling
+ N-15 metabolic labeling
+
+
+ This includes N-15 metabolic labeling (labeling all proteins and (possibly) all amino acids using N-15 enriched grown medium or feed) and C-13 metabolic labeling (labeling all proteins and (possibly) all amino acids using C-13 enriched grown medium or feed).
+ Metabolic labeling
+
+
+
+
+
+
+
+
+ 1.15
+ Construction of a single sequence assembly of all reads from different samples, typically as part of a comparative metagenomic analysis.
+ Sequence assembly (cross-assembly)
+
+
+ Cross-assembly
+
+
+
+
+
+
+
+
+ 1.15
+ The comparison of samples from a metagenomics study, for example, by comparison of metagenome shotgun reads or assembled contig sequences, by comparison of functional profiles, or some other method.
+
+
+ Sample comparison
+
+
+
+
+
+
+
+
+
+ 1.15
+ Differential protein analysis
+ The analysis, using proteomics techniques, to identify proteins whose encoding genes are differentially expressed under a given experimental setup.
+ Differential protein expression analysis
+
+
+ Differential protein expression profiling
+
+
+
+
+
+
+
+
+ 1.15
+ 1.17
+
+ The analysis, using any of diverse techniques, to identify genes that are differentially expressed under a given experimental setup.
+
+
+ Differential gene expression analysis
+ true
+
+
+
+
+
+
+
+
+ 1.15
+ Visualise, format or render data arising from an analysis of multiple samples from a metagenomics/community experiment.
+
+
+ Multiple sample visualisation
+
+
+
+
+
+
+
+
+ 1.15
+ The extrapolation of empirical characteristics of individuals or populations, backwards in time, to their common ancestors.
+ Ancestral sequence reconstruction
+ Character mapping
+ Character optimisation
+
+
+ Ancestral reconstruction is often used to recover possible ancestral character states of ancient, extinct organisms.
+ Ancestral reconstruction
+
+
+
+
+
+
+
+
+ 1.16
+ Site localisation of post-translational modifications in peptide or protein mass spectra.
+ PTM scoring
+ Site localisation
+
+
+ PTM localisation
+
+
+
+
+
+
+
+
+ 1.16
+ Operations concerning the handling and use of other tools.
+ Endpoint management
+
+
+ Service management
+
+
+
+
+
+
+
+
+ 1.16
+ An operation supporting the browsing or discovery of other tools and services.
+
+
+ Service discovery
+
+
+
+
+
+
+
+
+ 1.16
+ An operation supporting the aggregation of other services (at least two) into a functional unit, for the automation of some task.
+
+
+ Service composition
+
+
+
+
+
+
+
+
+ 1.16
+ An operation supporting the calling (invocation) of other tools and services.
+
+
+ Service invocation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.16
+ A data mining method typically used for studying biological networks based on pairwise correlations between variables.
+ WGCNA
+ Weighted gene co-expression network analysis
+
+
+ Weighted correlation network analysis
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.16
+ Identification of protein, for example from one or more peptide identifications by tandem mass spectrometry.
+ Protein inference
+
+
+ Protein identification
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.16
+ Text annotation is the operation of adding notes, data and metadata, recognised entities and concepts, and their relations to a text (such as a scientific article).
+ Article annotation
+ Literature annotation
+
+
+ Text annotation
+
+
+
+
+
+
+
+
+
+ 1.17
+ A method whereby data on several variants are "collapsed" into a single covariate based on regions such as genes.
+
+
+ Genome-wide association studies (GWAS) analyse a genome-wide set of genetic variants in different individuals to see if any variant is associated with a trait. Traditional association techniques can lack the power to detect the significance of rare variants individually, or measure their compound effect (rare variant burden). "Collapsing methods" were developed to overcome these problems.
+ Collapsing methods
+
+
+
+
+
+
+
+
+ 1.17
+ miRNA analysis
+ The analysis of microRNAs (miRNAs) : short, highly conserved small noncoding RNA molecules that are naturally occurring plant and animal genomes.
+ miRNA expression profiling
+
+
+ miRNA expression analysis
+
+
+
+
+
+
+
+
+ 1.17
+ Counting and summarising the number of short sequence reads that map to genomic features.
+
+
+ Read summarisation
+
+
+
+
+
+
+
+
+ 1.17
+ A technique whereby molecules with desired properties and function are isolated from libraries of random molecules, through iterative cycles of selection, amplification, and mutagenesis.
+
+
+ In vitro selection
+
+
+
+
+
+
+
+
+ 1.17
+ The calculation of species richness for a number of individual samples, based on plots of the number of species as a function of the number of samples (rarefaction curves).
+ Species richness assessment
+
+
+ Rarefaction
+
+
+
+
+
+
+
+
+
+ 1.17
+ An operation which groups reads or contigs and assigns them to operational taxonomic units.
+ Binning
+ Binning shotgun reads
+
+
+ Binning methods use one or a combination of compositional features or sequence similarity.
+ Read binning
+
+
+
+
+
+
+
+
+
+ 1.17
+ true
+ Counting and measuring experimentally determined observations into quantities.
+ Quantitation
+
+
+ Quantification
+
+
+
+
+
+
+
+
+ 1.17
+ Quantification of data arising from RNA-Seq high-throughput sequencing, typically the quantification of transcript abundances durnig transcriptome analysis in a gene expression study.
+ RNA-Seq quantitation
+
+
+ RNA-Seq quantification
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.17
+ Match experimentally measured mass spectrum to a spectrum in a spectral library or database.
+
+
+ Spectral library search
+
+
+
+
+
+
+
+
+ 1.17
+ Sort a set of files or data items according to some property.
+
+
+ Sorting
+
+
+
+
+
+
+
+
+ 1.17
+ Mass spectra identification of compounds that are produced by living systems. Including polyketides, terpenoids, phenylpropanoids, alkaloids and antibiotics.
+ De novo metabolite identification
+ Fragmenation tree generation
+ Metabolite identification
+
+
+ Natural product identification
+
+
+
+
+
+
+
+
+ 1.19
+ Identify and assess specific genes or regulatory regions of interest that are differentially methylated.
+ Differentially-methylated region identification
+
+
+ DMR identification
+
+
+
+
+
+
+
+
+ 1.21
+
+
+ Genotyping of multiple loci, typically characterizing microbial species isolates using internal fragments of multiple housekeeping genes.
+ MLST
+
+
+ Multilocus sequence typing
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.21
+ Calculate a theoretical mass spectrometry spectra for given sequences.
+ Spectrum prediction
+
+
+ Spectrum calculation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.22
+ 3D visualization of a molecular trajectory.
+
+
+ Trajectory visualization
+
+
+
+
+
+
+
+
+
+ 1.22
+ Compute Essential Dynamics (ED) on a simulation trajectory: an analysis of molecule dynamics using PCA (Principal Component Analysis) applied to the atomic positional fluctuations.
+ ED
+ PCA
+ Principal modes
+
+
+ Principal Component Analysis (PCA) is a multivariate statistical analysis to obtain collective variables and reduce the dimensionality of the system.
+ Essential dynamics
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.22
+ Obtain force field parameters (charge, bonds, dihedrals, etc.) from a molecule, to be used in molecular simulations.
+ Ligand parameterization
+ Molecule parameterization
+
+
+ Forcefield parameterisation
+
+
+
+
+
+
+
+
+ 1.22
+ Analyse DNA sequences in order to determine an individual's DNA characteristics, for example in criminal forensics, parentage testing and so on.
+ DNA fingerprinting
+ DNA profiling
+
+
+
+
+
+
+
+
+
+ 1.22
+ Predict or detect active sites in proteins; the region of an enzyme which binds a substrate bind and catalyses a reaction.
+ Active site detection
+
+
+ Active site prediction
+
+
+
+
+
+
+
+
+
+ 1.22
+ Predict or detect ligand-binding sites in proteins; a region of a protein which reversibly binds a ligand for some biochemical purpose, such as transport or regulation of protein function.
+ Ligand-binding site detection
+ Peptide-protein binding prediction
+
+
+ Ligand-binding site prediction
+
+
+
+
+
+
+
+
+
+ 1.22
+ Predict or detect metal ion-binding sites in proteins.
+ Metal-binding site detection
+ Protein metal-binding site prediction
+
+
+ Metal-binding site prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.22
+ Model or simulate protein-protein binding using comparative modelling or other techniques.
+ Protein docking
+
+
+ Protein-protein docking
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.22
+ Predict DNA-binding proteins.
+ DNA-binding protein detection
+ DNA-protein interaction prediction
+ Protein-DNA interaction prediction
+
+
+ DNA-binding protein prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.22
+ Predict RNA-binding proteins.
+ Protein-RNA interaction prediction
+ RNA-binding protein detection
+ RNA-protein interaction prediction
+
+
+ RNA-binding protein prediction
+
+
+
+
+
+
+
+
+ 1.22
+ Predict or detect RNA-binding sites in protein sequences.
+ Protein-RNA binding site detection
+ Protein-RNA binding site prediction
+ RNA binding site detection
+
+
+ RNA binding site prediction
+
+
+
+
+
+
+
+
+ 1.22
+ Predict or detect DNA-binding sites in protein sequences.
+ Protein-DNA binding site detection
+ Protein-DNA binding site prediction
+ DNA binding site detection
+
+
+ DNA binding site prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.22
+ Identify or predict intrinsically disordered regions in proteins.
+
+
+ Protein disorder prediction
+
+
+
+
+
+
+
+
+
+ 1.22
+ Extract structured information from unstructured ("free") or semi-structured textual documents.
+ IE
+
+
+ Information extraction
+
+
+
+
+
+
+
+
+
+ 1.22
+ Retrieve resources from information systems matching a specific information need.
+
+
+ Information retrieval
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.24
+ Genomic analysis
+ Study of genomic feature structure, variation, function and evolution at a genomic scale.
+ Genome analysis
+
+
+
+
+
+
+
+
+ 1.24
+ The determination of cytosine methylation status of specific positions in a nucleic acid sequences (usually reads from a bisulfite sequencing experiment).
+
+
+ Methylation calling
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.24
+ The identification of changes in DNA sequence or chromosome structure, usually in the context of diagnostic tests for disease, or to study ancestry or phylogeny.
+ Genetic testing
+
+
+ This can include indirect methods which reveal the results of genetic changes, such as RNA analysis to indicate gene expression, or biochemical analysis to identify expressed proteins.
+ DNA testing
+
+
+
+
+
+
+
+
+
+ 1.24
+ The processing of reads from high-throughput sequencing machines.
+
+
+ Sequence read processing
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.24
+ Render (visualise) a network - typically a biological network of some sort.
+ Network rendering
+ Protein interaction network rendering
+ Protein interaction network visualisation
+ Network visualisation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.24
+ Render (visualise) a biological pathway.
+ Pathway rendering
+
+
+ Pathway visualisation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.24
+ Generate, process or analyse a biological network.
+ Biological network analysis
+ Biological network modelling
+ Biological network prediction
+ Network comparison
+ Network modelling
+ Network prediction
+ Network simulation
+ Network topology simulation
+
+
+ Network analysis
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.24
+ Generate, process or analyse a biological pathway.
+ Biological pathway analysis
+ Biological pathway modelling
+ Biological pathway prediction
+ Functional pathway analysis
+ Pathway comparison
+ Pathway modelling
+ Pathway prediction
+ Pathway simulation
+
+
+ Pathway analysis
+
+
+
+
+
+
+
+
+
+
+ 1.24
+ Predict a metabolic pathway.
+
+
+ Metabolic pathway prediction
+
+
+
+
+
+
+
+
+ 1.24
+ Assigning sequence reads to separate groups / files based on their index tag (sample origin).
+ Sequence demultiplexing
+
+
+ NGS sequence runs are often performed with multiple samples pooled together. In such cases, an index tag (or "barcode") - a unique sequence of between 6 and 12bp - is ligated to each sample's genetic material so that the sequence reads from different samples can be identified. The process of demultiplexing (dividing sequence reads into separate files for each index tag/sample) may be performed automatically by the sequencing hardware. Alternatively the reads may be lumped together in one file with barcodes still attached, requiring you to do the splitting using software. In such cases, a "mapping" file is used which indicates which barcodes correspond to which samples.
+ Demultiplexing
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.24
+ A process used in statistics, machine learning, and information theory that reduces the number of random variables by obtaining a set of principal variables.
+ Dimension reduction
+
+
+ Dimensionality reduction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.24
+ A dimensionality reduction process that selects a subset of relevant features (variables, predictors) for use in model construction.
+ Attribute selection
+ Variable selection
+ Variable subset selection
+
+
+ Feature selection
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.24
+ A dimensionality reduction process which builds (ideally) informative and non-redundant values (features) from an initial set of measured data, to aid subsequent generalization, learning or interpretation.
+ Feature projection
+
+
+ Feature extraction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.24
+ Virtual screening is used in drug discovery to identify potential drug compounds. It involves searching libraries of small molecules in order to identify those molecules which are most likely to bind to a drug target (typically a protein receptor or enzyme).
+ Ligand-based screening
+ Ligand-based virtual screening
+ Structure-based screening
+ Structured-based virtual screening
+ Virtual ligand screening
+
+
+ Virtual screening is widely used for lead identification, lead optimization, and scaffold hopping during drug design and discovery.
+ Virtual screening
+
+
+
+
+
+
+
+
+ 1.24
+ The application of phylogenetic and other methods to estimate paleogeographical events such as speciation.
+ Biogeographic dating
+ Speciation dating
+ Species tree dating
+ Tree-dating
+
+
+ Tree dating
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.24
+ The development and use of mathematical models and systems analysis for the description of ecological processes, and applications such as the sustainable management of resources.
+
+
+ Ecological modelling
+
+
+
+
+
+
+
+
+ 1.24
+ Mapping between gene tree nodes and species tree nodes or branches, to analyse and account for possible differences between gene histories and species histories, explaining this in terms of gene-scale events such as duplication, loss, transfer etc.
+ Gene tree / species tree reconciliation
+
+
+ Methods typically test for topological similarity between trees using for example a congruence index.
+ Phylogenetic tree reconciliation
+
+
+
+
+
+
+
+
+ 1.24
+ The detection of genetic selection, or (the end result of) the process by which certain traits become more prevalent in a species than other traits.
+
+
+ Selection detection
+
+
+
+
+
+
+
+
+ 1.25
+ A statistical procedure that uses an orthogonal transformation to convert a set of observations of possibly correlated variables into a set of values of linearly uncorrelated variables called principal components.
+
+
+ Principal component analysis
+
+
+
+
+
+
+
+
+
+ 1.25
+ Identify where sections of the genome are repeated and the number of repeats in the genome varies between individuals.
+ CNV detection
+
+
+ Copy number variation detection
+
+
+
+
+
+
+
+
+ 1.25
+ Identify deletion events causing the number of repeats in the genome to vary between individuals.
+
+
+ Deletion detection
+
+
+
+
+
+
+
+
+ 1.25
+ Identify duplication events causing the number of repeats in the genome to vary between individuals.
+
+
+ Duplication detection
+
+
+
+
+
+
+
+
+ 1.25
+ Identify copy number variations which are complex, e.g. multi-allelic variations that have many structural alleles and have rearranged multiple times in the ancestral genomes.
+
+
+ Complex CNV detection
+
+
+
+
+
+
+
+
+ 1.25
+ Identify amplification events causing the number of repeats in the genome to vary between individuals.
+
+
+ Amplification detection
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.25
+ Predict adhesins in protein sequences.
+
+
+ An adhesin is a cell-surface component that facilitate the adherence of a microorganism to a cell or surface. They are important virulence factors during establishment of infection and thus are targeted during vaccine development approaches that seek to block adhesin function and prevent adherence to host cell.
+ Adhesin prediction
+
+
+
+
+
+
+
+
+ 1.25
+ Design new protein molecules with specific structural or functional properties.
+ Protein redesign
+ Rational protein design
+ de novo protein design
+
+
+ Protein design
+
+
+
+
+
+
+
+
+
+ 1.25
+ The design of small molecules with specific biological activity, such as inhibitors or modulators for proteins that are of therapeutic interest. This can involve the modification of individual atoms, the addition or removal of molecular fragments, and the use reaction-based design to explore tractable synthesis options for the small molecule.
+ Drug design
+ Ligand-based drug design
+ Structure-based drug design
+ Structure-based small molecule design
+ Small molecule design can involve assessment of target druggability and flexibility, molecular docking, in silico fragment screening, molecular dynamics, and homology modeling.
+ There are two broad categories of small molecule design techniques when applied to the design of drugs: ligand-based drug design (e.g. ligand similarity) and structure-based drug design (ligand docking) methods. Ligand similarity methods exploit structural similarities to known active ligands, whereas ligand docking methods use the 3D structure of a target protein to predict the binding modes and affinities of ligands to it.
+ Small molecule design
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.26
+ The estimation of the power of a test; that is the probability of correctly rejecting the null hypothesis when it is false.
+ Estimation of statistical power
+ Power analysis
+
+
+ Power test
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.26
+ The prediction of DNA modifications (e.g. N4-methylcytosine and N6-Methyladenine) using, for example, statistical models.
+
+
+ DNA modification prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.26
+ The analysis and simulation of disease transmission using, for example, statistical methods such as the SIR-model.
+
+
+ Disease transmission analysis
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.26
+ The correction of p-values from multiple statistical tests to correct for false positives.
+ False discovery rate estimation
+ FDR estimation
+
+
+ Multiple testing correction
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ A category denoting a rather broad domain or field of interest, of study, application, work, data, or technology. Topics have no clearly defined borders between each other.
+ sumo:FieldOfStudy
+
+
+ Topic
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ The processing and analysis of nucleic acid sequence, structural and other data.
+ Nucleic acid bioinformatics
+ Nucleic acid informatics
+ Nucleic_acids
+ Nucleic acid physicochemistry
+ Nucleic acid properties
+
+
+ Nucleic acids
+
+ http://purl.bioontology.org/ontology/MSH/D017422
+ http://purl.bioontology.org/ontology/MSH/D017423
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Archival, processing and analysis of protein data, typically molecular sequence and structural data.
+ Protein bioinformatics
+ Protein informatics
+ Proteins
+ Protein databases
+
+
+ Proteins
+
+ http://purl.bioontology.org/ontology/MSH/D020539
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ The structures of reactants or products of metabolism, for example small molecules such as including vitamins, polyols, nucleotides and amino acids.
+
+
+ Metabolites
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ The archival, processing and analysis of molecular sequences (monomer composition of polymers) including molecular sequence data resources, sequence sites, alignments, motifs and profiles.
+ Sequence_analysis
+ Biological sequences
+ Sequence databases
+
+
+
+ Sequence analysis
+
+ http://purl.bioontology.org/ontology/MSH/D017421
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ The curation, processing, analysis and prediction of data about the structure of biological molecules, typically proteins and nucleic acids and other macromolecules.
+ Biomolecular structure
+ Structural bioinformatics
+ Structure_analysis
+ Computational structural biology
+ Molecular structure
+ Structure data resources
+ Structure databases
+ Structures
+
+
+
+ This includes related concepts such as structural properties, alignments and structural motifs.
+ Structure analysis
+
+ http://purl.bioontology.org/ontology/MSH/D015394
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ The prediction of molecular structure, including the prediction, modelling, recognition or design of protein secondary or tertiary structure or other structural features, and the folding of nucleic acid molecules and the prediction or design of nucleic acid (typically RNA) sequences with specific conformations.
+ Structure_prediction
+ DNA structure prediction
+ Nucleic acid design
+ Nucleic acid folding
+ Nucleic acid structure prediction
+ Protein fold recognition
+ Protein structure prediction
+ RNA structure prediction
+
+
+ This includes the recognition (prediction and assignment) of known protein structural domains or folds in protein sequence(s), for example by threading, or the alignment of molecular sequences to structures, structural (3D) profiles or templates (representing a structure or structure alignment).
+ Structure prediction
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+
+ The alignment (equivalence between sites) of molecular sequences, structures or profiles (representing a sequence or structure alignment).
+
+ Alignment
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ The study of evolutionary relationships amongst organisms.
+ Phylogeny
+ Phylogenetic clocks
+ Phylogenetic dating
+ Phylogenetic simulation
+ Phylogenetic stratigraphy
+ Phylogeny reconstruction
+
+
+
+ This includes diverse phylogenetic methods, including phylogenetic tree construction, typically from molecular sequence or morphological data, methods that simulate DNA sequence evolution, a phylogenetic tree or the underlying data, or which estimate or use molecular clock and stratigraphic (age) data, methods for studying gene evolution etc.
+ Phylogeny
+
+ http://purl.bioontology.org/ontology/MSH/D010802
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ The study of gene or protein functions and their interactions in totality in a given organism, tissue, cell etc.
+ Functional_genomics
+
+
+
+ Functional genomics
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ The conceptualisation, categorisation and nomenclature (naming) of entities or phenomena within biology or bioinformatics. This includes formal ontologies, controlled vocabularies, structured glossary, symbols and terminology or other related resource.
+ Ontology_and_terminology
+ Applied ontology
+ Ontologies
+ Ontology
+ Ontology relations
+ Terminology
+ Upper ontology
+
+
+
+ Ontology and terminology
+
+ http://purl.bioontology.org/ontology/MSH/D002965
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ The search and query of data sources (typically databases or ontologies) in order to retrieve entries or other information.
+
+
+
+ Information retrieval
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ VT 1.5.6 Bioinformatics
+ The archival, curation, processing and analysis of complex biological data.
+ Bioinformatics
+
+
+
+ This includes data processing in general, including basic handling of files and databases, datatypes, workflows and annotation.
+ Bioinformatics
+
+ http://purl.bioontology.org/ontology/MSH/D016247
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Computer graphics
+ VT 1.2.5 Computer graphics
+ Rendering (drawing on a computer screen) or visualisation of molecular sequences, structures or other biomolecular data.
+ Data rendering
+ Data_visualisation
+
+
+ Data visualisation
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ The study of the thermodynamic properties of a nucleic acid.
+
+ Nucleic acid thermodynamics
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The archival, curation, processing and analysis of nucleic acid structural information, such as whole structures, structural features and alignments, and associated annotation.
+ Nucleic acid structure
+ Nucleic_acid_structure_analysis
+ DNA melting
+ DNA structure
+ Nucleic acid denaturation
+ Nucleic acid thermodynamics
+ RNA alignment
+ RNA structure
+ RNA structure alignment
+
+
+ Includes secondary and tertiary nucleic acid structural data, nucleic acid thermodynamic, thermal and conformational properties including DNA or DNA/RNA denaturation (melting) etc.
+ Nucleic acid structure analysis
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ RNA sequences and structures.
+ RNA
+ Small RNA
+
+
+ RNA
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Topic for the study of restriction enzymes, their cleavage sites and the restriction of nucleic acids.
+
+ Nucleic acid restriction
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ The mapping of complete (typically nucleotide) sequences. Mapping (in the sense of short read alignment, or more generally, just alignment) has application in RNA-Seq analysis (mapping of transcriptomics reads), variant discovery (e.g. mapping of exome capture), and re-sequencing (mapping of WGS reads).
+ Mapping
+ Genetic linkage
+ Linkage
+ Linkage mapping
+ Synteny
+
+
+ This includes resources that aim to identify, map or analyse genetic markers in DNA sequences, for example to produce a genetic (linkage) map of a chromosome or genome or to analyse genetic linkage and synteny. It also includes resources for physical (sequence) maps of a DNA sequence showing the physical distance (base pairs) between features or landmarks such as restriction sites, cloned DNA fragments, genes and other genetic markers. It also covers for example the alignment of sequences of (typically millions) of short reads to a reference genome.
+ Mapping
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ The study of codon usage in nucleotide sequence(s), genetic codes and so on.
+
+ Genetic codes and codon usage
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The translation of mRNA into protein and subsequent protein processing in the cell.
+ Protein_expression
+ Translation
+
+
+
+ Protein expression
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Methods that aims to identify, predict, model or analyse genes or gene structure in DNA sequences.
+
+ This includes the study of promoters, coding regions, splice sites, etc. Methods for gene prediction might be ab initio, based on phylogenetic comparisons, use motifs, sequence features, support vector machine, alignment etc.
+ Gene finding
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ The transcription of DNA into mRNA.
+
+ Transcription
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Promoters in DNA sequences (region of DNA that facilitates the transcription of a particular gene by binding RNA polymerase and transcription factor proteins).
+
+ Promoters
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+ The folding (in 3D space) of nucleic acid molecules.
+
+
+ Nucleic acid folding
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Gene structure, regions which make an RNA product and features such as promoters, coding regions, gene fusion, splice sites etc.
+ Gene features
+ Gene_structure
+ Fusion genes
+
+
+ This includes the study of promoters, coding regions etc.
+ This includes operons (operators, promoters and genes) from a bacterial genome. For example the operon leader and trailer gene, gene composition of the operon and associated information.
+ Gene structure
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Protein and peptide identification, especially in the study of whole proteomes of organisms.
+ Proteomics
+ Bottom-up proteomics
+ Discovery proteomics
+ MS-based targeted proteomics
+ MS-based untargeted proteomics
+ Metaproteomics
+ Peptide identification
+ Protein and peptide identification
+ Quantitative proteomics
+ Targeted proteomics
+ Top-down proteomics
+
+
+
+ Includes metaproteomics: proteomics analysis of an environmental sample.
+ Proteomics includes any methods (especially high-throughput) that separate, characterize and identify expressed proteins such as mass spectrometry, two-dimensional gel electrophoresis and protein microarrays, as well as in-silico methods that perform proteolytic or mass calculations on a protein sequence and other analyses of protein production data, for example in different cells or tissues.
+ Proteomics
+
+ http://purl.bioontology.org/ontology/MSH/D040901
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ The elucidation of the three dimensional structure for all (available) proteins in a given organism.
+ Structural_genomics
+
+
+
+ Structural genomics
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ The study of the physical and biochemical properties of peptides and proteins, for example the hydrophobic, hydrophilic and charge properties of a protein.
+ Protein physicochemistry
+ Protein_properties
+ Protein hydropathy
+
+
+ Protein properties
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Protein-protein, protein-DNA/RNA and protein-ligand interactions, including analysis of known interactions and prediction of putative interactions.
+ Protein_interactions
+ Protein interaction map
+ Protein interaction networks
+ Protein interactome
+ Protein-DNA interaction
+ Protein-DNA interactions
+ Protein-RNA interaction
+ Protein-RNA interactions
+ Protein-ligand interactions
+ Protein-nucleic acid interactions
+ Protein-protein interactions
+
+
+ This includes experimental (e.g. yeast two-hybrid) and computational analysis techniques.
+ Protein interactions
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Protein stability, folding (in 3D space) and protein sequence-structure-function relationships. This includes for example study of inter-atomic or inter-residue interactions in protein (3D) structures, the effect of mutation, and the design of proteins with specific properties, typically by designing changes (via site-directed mutagenesis) to an existing protein.
+ Protein_folding_stability_and_design
+ Protein design
+ Protein folding
+ Protein residue interactions
+ Protein stability
+ Rational protein design
+
+
+ Protein folding, stability and design
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Two-dimensional gel electrophoresis image and related data.
+
+ Two-dimensional gel electrophoresis
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ An analytical chemistry technique that measures the mass-to-charge ratio and abundance of ions in the gas phase.
+
+
+ Mass spectrometry
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Protein microarray data.
+
+ Protein microarrays
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ The study of the hydrophobic, hydrophilic and charge properties of a protein.
+
+ Protein hydropathy
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The study of how proteins are transported within and without the cell, including signal peptides, protein subcellular localisation and export.
+ Protein_targeting_and_localisation
+ Protein localisation
+ Protein sorting
+ Protein targeting
+
+
+ Protein targeting and localisation
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Enzyme or chemical cleavage sites and proteolytic or mass calculations on a protein sequence.
+
+ Protein cleavage sites and proteolysis
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+ The comparison of two or more protein structures.
+
+
+ Use this concept for methods that are exclusively for protein structure.
+ Protein structure comparison
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ The processing and analysis of inter-atomic or inter-residue interactions in protein (3D) structures.
+
+ Protein residue interactions
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Protein-protein interactions, individual interactions and networks, protein complexes, protein functional coupling etc.
+
+ Protein-protein interactions
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Protein-ligand (small molecule) interactions.
+
+ Protein-ligand interactions
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Protein-DNA/RNA interactions.
+
+ Protein-nucleic acid interactions
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ The design of proteins with specific properties, typically by designing changes (via site-directed mutagenesis) to an existing protein.
+
+ Protein design
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ G-protein coupled receptors (GPCRs).
+
+ G protein-coupled receptors (GPCR)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Carbohydrates, typically including structural information.
+ Carbohydrates
+
+
+ Carbohydrates
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Lipids and their structures.
+ Lipidomics
+ Lipids
+
+
+ Lipids
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Small molecules of biological significance, typically archival, curation, processing and analysis of structural information.
+ Small_molecules
+ Amino acids
+ Chemical structures
+ Drug structures
+ Drug targets
+ Drugs and target structures
+ Metabolite structures
+ Peptides
+ Peptides and amino acids
+ Target structures
+ Targets
+ Toxins
+ Toxins and targets
+ CHEBI:23367
+
+
+ Small molecules include organic molecules, metal-organic compounds, small polypeptides, small polysaccharides and oligonucleotides. Structural data is usually included.
+ This concept excludes macromolecules such as proteins and nucleic acids.
+ This includes the structures of drugs, drug target, their interactions and binding affinities. Also the structures of reactants or products of metabolism, for example small molecules such as including vitamins, polyols, nucleotides and amino acids. Also the physicochemical, biochemical or structural properties of amino acids or peptides. Also structural and associated data for toxic chemical substances.
+ Small molecules
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+
+ Edit, convert or otherwise change a molecular sequence, either randomly or specifically.
+
+ Sequence editing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ The archival, processing and analysis of the basic character composition of molecular sequences, for example character or word frequency, ambiguity, complexity, particularly regions of low complexity, and repeats or the repetitive nature of molecular sequences.
+ Sequence_composition_complexity_and_repeats
+ Low complexity sequences
+ Nucleic acid repeats
+ Protein repeats
+ Protein sequence repeats
+ Repeat sequences
+ Sequence complexity
+ Sequence composition
+ Sequence repeats
+
+
+ This includes repetitive elements within a nucleic acid sequence, e.g. long terminal repeats (LTRs); sequences (typically retroviral) directly repeated at both ends of a sequence and other types of repeating unit.
+ This includes short repetitive subsequences (repeat sequences) in a protein sequence.
+ Sequence composition, complexity and repeats
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Conserved patterns (motifs) in molecular sequences, that (typically) describe functional or other key sites.
+
+ Sequence motifs
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ The comparison of two or more molecular sequences, for example sequence alignment and clustering.
+
+
+ The comparison might be on the basis of sequence, physico-chemical or some other properties of the sequences.
+ Sequence comparison
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ The archival, detection, prediction and analysis of positional features such as functional and other key sites, in molecular sequences and the conserved patterns (motifs, profiles etc.) that may be used to describe them.
+ Sequence_sites_features_and_motifs
+ Functional sites
+ HMMs
+ Sequence features
+ Sequence motifs
+ Sequence profiles
+ Sequence sites
+
+
+ Sequence sites, features and motifs
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Search and retrieve molecular sequences that are similar to a sequence-based query (typically a simple sequence).
+
+ The query is a sequence-based entity such as another sequence, a motif or profile.
+ Sequence database search
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.7
+
+ The comparison and grouping together of molecular sequences on the basis of their similarities.
+
+
+ This includes systems that generate, process and analyse sequence clusters.
+ Sequence clustering
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Structural features or common 3D motifs within protein structures, including the surface of a protein structure, such as biological interfaces with other molecules.
+ Protein 3D motifs
+ Protein_structural_motifs_and_surfaces
+ Protein structural features
+ Protein structural motifs
+ Protein surfaces
+ Structural motifs
+
+
+ This includes conformation of conserved substructures, conserved geometry (spatial arrangement) of secondary structure or protein backbone, solvent-exposed surfaces, internal cavities, the analysis of shape, hydropathy, electrostatic patches, role and functions etc.
+ Protein structural motifs and surfaces
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ The processing, analysis or use of some type of structural (3D) profile or template; a computational entity (typically a numerical matrix) that is derived from and represents a structure or structure alignment.
+
+ Structural (3D) profiles
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ The prediction, modelling, recognition or design of protein secondary or tertiary structure or other structural features.
+
+
+ Protein structure prediction
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ The folding of nucleic acid molecules and the prediction or design of nucleic acid (typically RNA) sequences with specific conformations.
+
+
+ Nucleic acid structure prediction
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.7
+
+ The prediction of three-dimensional structure of a (typically protein) sequence from first principles, using a physics-based or empirical scoring function and without using explicit structural templates.
+
+
+ Ab initio structure prediction
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ The modelling of the three-dimensional structure of a protein using known sequence and structural data.
+
+ Homology modelling
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Molecular flexibility
+ Molecular motions
+ The study and simulation of molecular (typically protein) conformation using a computational model of physical forces and computer simulation.
+ Molecular_dynamics
+ Protein dynamics
+
+
+ This includes methods such as Molecular Dynamics, Coarse-grained dynamics, metadynamics, Quantum Mechanics, QM/MM, Markov State Models, etc. This includes resources concerning flexibility and motion in protein and other molecular structures.
+ Molecular dynamics
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ 1.12
+
+ The modelling the structure of proteins in complex with small molecules or other macromolecules.
+
+
+ Molecular docking
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+ The prediction of secondary or supersecondary structure of protein sequences.
+
+
+ Protein secondary structure prediction
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+ The prediction of tertiary structure of protein sequences.
+
+
+ Protein tertiary structure prediction
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ The recognition (prediction and assignment) of known protein structural domains or folds in protein sequence(s).
+
+
+ Protein fold recognition
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.7
+
+ The alignment of molecular sequences or sequence profiles (representing sequence alignments).
+
+
+ This includes the generation of alignments (the identification of equivalent sites), the analysis of alignments, editing, visualisation, alignment databases, the alignment (equivalence between sites) of sequence profiles (representing sequence alignments) and so on.
+ Sequence alignment
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.7
+
+ The superimposition of molecular tertiary structures or structural (3D) profiles (representing a structure or structure alignment).
+
+
+ This includes the generation, storage, analysis, rendering etc. of structure alignments.
+ Structure alignment
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+ The alignment of molecular sequences to structures, structural (3D) profiles or templates (representing a structure or structure alignment).
+
+
+ Threading
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Sequence profiles; typically a positional, numerical matrix representing a sequence alignment.
+
+ Sequence profiles include position-specific scoring matrix (position weight matrix), hidden Markov models etc.
+ Sequence profiles and HMMs
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ The reconstruction of a phylogeny (evolutionary relatedness amongst organisms), for example, by building a phylogenetic tree.
+
+ Currently too specific for the topic sub-ontology (but might be unobsoleted).
+ Phylogeny reconstruction
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ The integrated study of evolutionary relationships and whole genome data, for example, in the analysis of species trees, horizontal gene transfer and evolutionary reconstruction.
+ Phylogenomics
+
+
+
+ Phylogenomics
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Simulated polymerase chain reaction (PCR).
+
+ Virtual PCR
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ The assembly of fragments of a DNA sequence to reconstruct the original sequence.
+ Sequence_assembly
+ Assembly
+
+
+ Assembly has two broad types, de-novo and re-sequencing. Re-sequencing is a specialised case of assembly, where an assembled (typically de-novo assembled) reference genome is available and is about 95% identical to the re-sequenced genome. All other cases of assembly are 'de-novo'.
+ Sequence assembly
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Stable, naturally occurring mutations in a nucleotide sequence including alleles, naturally occurring mutations such as single base nucleotide substitutions, deletions and insertions, RFLPs and other polymorphisms.
+ DNA variation
+ Genetic_variation
+ Genomic variation
+ Mutation
+ Polymorphism
+ Somatic mutations
+
+
+ Genetic variation
+
+ http://purl.bioontology.org/ontology/MSH/D014644
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Microarrays, for example, to process microarray data or design probes and experiments.
+
+ Microarrays
+ http://purl.bioontology.org/ontology/MSH/D046228
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ VT 3.1.7 Pharmacology and pharmacy
+ The study of drugs and their effects or responses in living systems.
+ Pharmacology
+ Computational pharmacology
+ Pharmacoinformatics
+
+
+
+ Pharmacology
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ http://edamontology.org/topic_0197
+ The analysis of levels and patterns of synthesis of gene products (proteins and functional RNA) including interpretation in functional terms of gene expression data.
+ Expression
+ Gene_expression
+ Codon usage
+ DNA chips
+ DNA microarrays
+ Gene expression profiling
+ Gene transcription
+ Gene translation
+ Transcription
+
+
+
+ Gene expression levels are analysed by identifying, quantifying or comparing mRNA transcripts, for example using microarrays, RNA-seq, northern blots, gene-indexed expression profiles etc.
+ This includes the study of codon usage in nucleotide sequence(s), genetic codes and so on.
+ Gene expression
+
+ http://purl.bioontology.org/ontology/MSH/D015870
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ The regulation of gene expression.
+ Regulatory genomics
+
+
+ Gene regulation
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ The influence of genotype on drug response, for example by correlating gene expression or single-nucleotide polymorphisms with drug efficacy or toxicity.
+ Pharmacogenomics
+ Pharmacogenetics
+
+
+
+ Pharmacogenomics
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ VT 3.1.4 Medicinal chemistry
+ The design and chemical synthesis of bioactive molecules, for example drugs or potential drug compounds, for medicinal purposes.
+ Drug design
+ Medicinal_chemistry
+
+
+
+ This includes methods that search compound collections, generate or analyse drug 3D conformations, identify drug targets with structural docking etc.
+ Medicinal chemistry
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Information on a specific fish genome including molecular sequences, genes and annotation.
+
+ Fish
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Information on a specific fly genome including molecular sequences, genes and annotation.
+
+ Flies
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ (jison)Out of EDAM scope. While very useful to have a basic set of IDs for organisms, should find a better way to provide this e.g. in bio.tools (NCBI taxon ID subset).
+ 1.17
+
+
+ Information on a specific mouse or rat genome including molecular sequences, genes and annotation.
+
+ The resource may be specific to a group of mice / rats or all mice / rats.
+ Mice or rats
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Information on a specific worm genome including molecular sequences, genes and annotation.
+
+ Worms
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+ The processing and analysis of the bioinformatics literature and bibliographic data, such as literature search and query.
+
+
+ Literature analysis
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The processing and analysis of natural language, such as scientific literature in English, in order to extract data and information, or to enable human-computer interaction.
+ NLP
+ Natural_language_processing
+ BioNLP
+ Literature mining
+ Text analytics
+ Text data mining
+ Text mining
+
+
+
+ Natural language processing
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Deposition and curation of database accessions, including annotation, typically with terms from a controlled vocabulary.
+ Data_submission_annotation_and_curation
+ Data curation
+ Data provenance
+ Database curation
+
+
+
+ Data submission, annotation, and curation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ The management and manipulation of digital documents, including database records, files and reports.
+
+
+ Document, record and content management
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Annotation of a molecular sequence.
+
+ Sequence annotation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+
+
+ Annotation of a genome.
+
+ Genome annotation
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Spectroscopy
+ An analytical technique that exploits the magenetic properties of certain atomic nuclei to provide information on the structure, dynamics, reaction state and chemical environment of molecules.
+ NMR spectroscopy
+ Nuclear magnetic resonance spectroscopy
+ NMR
+ HOESY
+ Heteronuclear Overhauser Effect Spectroscopy
+ NOESY
+ Nuclear Overhauser Effect Spectroscopy
+ ROESY
+ Rotational Frame Nuclear Overhauser Effect Spectroscopy
+
+
+
+ NMR
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ The classification of molecular sequences based on some measure of their similarity.
+
+
+ Methods including sequence motifs, profile and other diagnostic elements which (typically) represent conserved patterns (of residues or properties) in molecular sequences.
+ Sequence classification
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ primarily the classification of proteins (from sequence or structural data) into clusters, groups, families etc.
+
+ Protein classification
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Sequence motifs, or sequence profiles derived from an alignment of molecular sequences of a particular type.
+
+ This includes comparison, discovery, recognition etc. of sequence motifs.
+ Sequence motif or profile
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Protein chemical modifications, e.g. post-translational modifications.
+ PTMs
+ Post-translational modifications
+ Protein post-translational modification
+ Protein_modifications
+ Post-translation modifications
+ Protein chemical modifications
+ Protein post-translational modifications
+ GO:0006464
+ MOD:00000
+
+
+ EDAM does not describe all possible protein modifications. For fine-grained annotation of protein modification use the Gene Ontology (children of concept GO:0006464) and/or the Protein Modifications ontology (children of concept MOD:00000)
+ Protein modifications
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ http://edamontology.org/topic_3076
+ Molecular interactions, biological pathways, networks and other models.
+ Molecular_interactions_pathways_and_networks
+ Biological models
+ Biological networks
+ Biological pathways
+ Cellular process pathways
+ Disease pathways
+ Environmental information processing pathways
+ Gene regulatory networks
+ Genetic information processing pathways
+ Interactions
+ Interactome
+ Metabolic pathways
+ Molecular interactions
+ Networks
+ Pathways
+ Signal transduction pathways
+ Signaling pathways
+
+
+
+ Molecular interactions, pathways and networks
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ VT 1.3 Information sciences
+ VT 1.3.3 Information retrieval
+ VT 1.3.4 Information management
+ VT 1.3.5 Knowledge management
+ VT 1.3.99 Other
+ The study and practice of information processing and use of computer information systems.
+ Information management
+ Information science
+ Knowledge management
+ Informatics
+
+
+ Informatics
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+ Data resources for the biological or biomedical literature, either a primary source of literature or some derivative.
+
+
+ Literature data resources
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Laboratory management and resources, for example, catalogues of biological resources for use in the lab including cell lines, viruses, plasmids, phages, DNA probes and primers and so on.
+ Laboratory_Information_management
+ Laboratory resources
+
+
+
+ Laboratory information management
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ General cell culture or data on a specific cell lines.
+
+ Cell and tissue culture
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ VT 1.5.15 Ecology
+ The ecological and environmental sciences and especially the application of information technology (ecoinformatics).
+ Ecology
+ Computational ecology
+ Ecoinformatics
+ Ecological informatics
+ Ecosystem science
+
+
+
+ Ecology
+
+ http://purl.bioontology.org/ontology/MSH/D004777
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Electron diffraction experiment
+ The study of matter by studying the interference pattern from firing electrons at a sample, to analyse structures at resolutions higher than can be achieved using light.
+ Electron_microscopy
+ Electron crystallography
+ SEM
+ Scanning electron microscopy
+ Single particle electron microscopy
+ TEM
+ Transmission electron microscopy
+
+
+
+ Electron microscopy
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ The cell cycle including key genes and proteins.
+
+ Cell cycle
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ The physicochemical, biochemical or structural properties of amino acids or peptides.
+
+
+ Peptides and amino acids
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ A specific organelle, or organelles in general, typically the genes and proteins (or genome and proteome).
+
+ Organelles
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Ribosomes, typically of ribosome-related genes and proteins.
+
+ Ribosomes
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ A database about scents.
+
+ Scents
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ The structures of drugs, drug target, their interactions and binding affinities.
+
+
+ Drugs and target structures
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ A specific organism, or group of organisms, used to study a particular aspect of biology.
+ Organisms
+ Model_organisms
+
+
+
+ This may include information on the genome (including molecular sequences and map, genes and annotation), proteome, as well as more general information about an organism.
+ Model organisms
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Whole genomes of one or more organisms, or genomes in general, such as meta-information on genomes, genome projects, gene names etc.
+ Genomics
+ Exomes
+ Genome annotation
+ Genomes
+ Personal genomics
+ Synthetic genomics
+ Viral genomics
+ Whole genomes
+
+
+
+ Genomics
+
+ http://purl.bioontology.org/ontology/MSH/D023281
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Particular gene(s), gene family or other gene group or system and their encoded proteins.Primarily the classification of proteins (from sequence or structural data) into clusters, groups, families etc., curation of a particular protein or protein family, or any other proteins that have been classified as members of a common group.
+ Genes, gene family or system
+ Gene_and protein_families
+ Gene families
+ Gene family
+ Gene system
+ Protein families
+ Protein sequence classification
+
+
+
+ A protein families database might include the classifier (e.g. a sequence profile) used to build the classification.
+ Gene and protein families
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Study of chromosomes.
+
+
+ Chromosomes
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ The study of genetic constitution of a living entity, such as an individual, and organism, a cell and so on, typically with respect to a particular observable phenotypic traits, or resources concerning such traits, which might be an aspect of biochemistry, physiology, morphology, anatomy, development and so on.
+ Genotype and phenotype resources
+ Genotype-phenotype
+ Genotype-phenotype analysis
+ Genotype_and_phenotype
+ Genotype
+ Genotyping
+ Phenotype
+ Phenotyping
+
+
+
+ Genotype and phenotype
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Gene expression e.g. microarray data, northern blots, gene-indexed expression profiles etc.
+
+ Gene expression and microarray
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Molecular probes (e.g. a peptide probe or DNA microarray probe) or PCR primers and hybridisation oligos in a nucleic acid sequence.
+ Probes_and_primers
+ Primer quality
+ Primers
+ Probes
+
+
+ This includes the design of primers for PCR and DNA amplification or the design of molecular probes.
+ Probes and primers
+ http://purl.bioontology.org/ontology/MSH/D015335
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ VT 3.1.6 Pathology
+ Diseases, including diseases in general and the genes, gene variations and proteins involved in one or more specific diseases.
+ Disease
+ Pathology
+
+
+
+ Pathology
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ A particular protein, protein family or other group of proteins.
+
+ Specific protein resources
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ VT 1.5.25 Taxonomy
+ Organism classification, identification and naming.
+ Taxonomy
+
+
+ Taxonomy
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Archival, processing and analysis of protein sequences and sequence-based entities such as alignments, motifs and profiles.
+
+
+ Protein sequence analysis
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ The archival, processing and analysis of nucleotide sequences and and sequence-based entities such as alignments, motifs and profiles.
+
+
+ Nucleic acid sequence analysis
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ The repetitive nature of molecular sequences.
+
+ Repeat sequences
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ The (character) complexity of molecular sequences, particularly regions of low complexity.
+
+ Low complexity sequences
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ A specific proteome including protein sequences and annotation.
+
+ Proteome
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ DNA sequences and structure, including processes such as methylation and replication.
+ DNA analysis
+ DNA
+ Ancient DNA
+ Chromosomes
+
+
+ The DNA sequences might be coding or non-coding sequences.
+ DNA
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Protein-coding regions including coding sequences (CDS), exons, translation initiation sites and open reading frames.
+
+
+ Coding RNA
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Non-coding or functional RNA sequences, including regulatory RNA sequences, ribosomal RNA (rRNA) and transfer RNA (tRNA).
+ Functional_regulatory_and_non-coding_RNA
+ Functional RNA
+ Long ncRNA
+ Long non-coding RNA
+ Non-coding RNA
+ Regulatory RNA
+ Small and long non-coding RNAs
+ Small interfering RNA
+ Small ncRNA
+ Small non-coding RNA
+ Small nuclear RNA
+ Small nucleolar RNA
+ lncRNA
+ miRNA
+ microRNA
+ ncRNA
+ piRNA
+ piwi-interacting RNA
+ siRNA
+ snRNA
+ snoRNA
+
+
+ Non-coding RNA includes piwi-interacting RNA (piRNA), small nuclear RNA (snRNA) and small nucleolar RNA (snoRNA). Regulatory RNA includes microRNA (miRNA) - short single stranded RNA molecules that regulate gene expression, and small interfering RNA (siRNA).
+ Functional, regulatory and non-coding RNA
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ One or more ribosomal RNA (rRNA) sequences.
+
+ rRNA
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ One or more transfer RNA (tRNA) sequences.
+
+ tRNA
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Protein secondary structure or secondary structure alignments.
+
+
+ This includes assignment, analysis, comparison, prediction, rendering etc. of secondary structure data.
+ Protein secondary structure
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ RNA secondary or tertiary structure and alignments.
+
+ RNA structure
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Protein tertiary structures.
+
+
+ Protein tertiary structure
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Classification of nucleic acid sequences and structures.
+
+ Nucleic acid classification
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.14
+
+ Primarily the classification of proteins (from sequence or structural data) into clusters, groups, families etc., curation of a particular protein or protein family, or any other proteins that have been classified as members of a common group.
+
+
+ Protein families
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Protein tertiary structural domains and folds in a protein or polypeptide chain.
+ Protein_folds_and_structural_domains
+ Intramembrane regions
+ Protein domains
+ Protein folds
+ Protein membrane regions
+ Protein structural domains
+ Protein topological domains
+ Protein transmembrane regions
+ Transmembrane regions
+
+
+ This includes topological domains such as cytoplasmic regions in a protein.
+ This includes trans- or intra-membrane regions of a protein, typically describing physicochemical properties of the secondary structure elements. For example, the location and size of the membrane spanning segments and intervening loop regions, transmembrane region IN/OUT orientation relative to the membrane, plus the following data for each amino acid: A Z-coordinate (the distance to the membrane center), the free energy of membrane insertion (calculated in a sliding window over the sequence) and a reliability score. The z-coordinate implies information about re-entrant helices, interfacial helices, the tilt of a transmembrane helix and loop lengths.
+ Protein folds and structural domains
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+ Nucleotide sequence alignments.
+
+
+ Nucleic acid sequence alignment
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Protein sequence alignments.
+
+ A sequence profile typically represents a sequence alignment.
+ Protein sequence alignment
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+
+ The archival, detection, prediction and analysis ofpositional features such as functional sites in nucleotide sequences.
+
+ Nucleic acid sites and features
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+
+ The detection, identification and analysis of positional features in proteins, such as functional sites.
+
+ Protein sites and features
+ true
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Proteins that bind to DNA and control transcription of DNA to mRNA (transcription factors) and also transcriptional regulatory sites, elements and regions (such as promoters, enhancers, silencers and boundary elements / insulators) in nucleotide sequences.
+ Transcription_factors_and_regulatory_sites
+ -10 signals
+ -35 signals
+ Attenuators
+ CAAT signals
+ CAT box
+ CCAAT box
+ CpG islands
+ Enhancers
+ GC signals
+ Isochores
+ Promoters
+ TATA signals
+ TFBS
+ Terminators
+ Transcription factor binding sites
+ Transcription factors
+ Transcriptional regulatory sites
+
+
+ This includes CpG rich regions (isochores) in a nucleotide sequence.
+ This includes promoters, CAAT signals, TATA signals, -35 signals, -10 signals, GC signals, primer binding sites for initiation of transcription or reverse transcription, enhancer, attenuator, terminators and ribosome binding sites.
+ Transcription factor proteins either promote (as an activator) or block (as a repressor) the binding to DNA of RNA polymerase. Regulatory sites including transcription factor binding site as well as promoters, enhancers, silencers and boundary elements / insulators.
+ Transcription factors and regulatory sites
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.0
+
+
+
+ Protein phosphorylation and phosphorylation sites in protein sequences.
+
+ Phosphorylation sites
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Metabolic pathways.
+
+
+ Metabolic pathways
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Signaling pathways.
+
+
+ Signaling pathways
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Protein and peptide identification.
+
+ Protein and peptide identification
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Biological or biomedical analytical workflows or pipelines.
+ Pipelines
+ Workflows
+ Software integration
+ Tool integration
+ Tool interoperability
+
+
+ Workflows
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.0
+
+
+ Structuring data into basic types and (computational) objects.
+
+ Data types and objects
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Theoretical biology.
+
+ Theoretical biology
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Mitochondria, typically of mitochondrial genes and proteins.
+
+ Mitochondria
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ VT 1.5.10 Botany
+ VT 1.5.22 Plant science
+ Plants, e.g. information on a specific plant genome including molecular sequences, genes and annotation.
+ Botany
+ Plant
+ Plant science
+ Plants
+ Plant_biology
+ Plant anatomy
+ Plant cell biology
+ Plant ecology
+ Plant genetics
+ Plant physiology
+
+
+ The resource may be specific to a plant, a group of plants or all plants.
+ Plant biology
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ VT 1.5.28
+ Study of viruses, e.g. sequence and structural data, interactions of viral proteins, or a viral genome including molecular sequences, genes and annotation.
+ Virology
+
+
+ Virology
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ (jison)Out of EDAM scope. While very useful to have a basic set of IDs for organisms, should find a better way to provide this e.g. in bio.tools (NCBI taxon ID subset).
+ 1.17
+
+
+ Fungi and molds, e.g. information on a specific fungal genome including molecular sequences, genes and annotation.
+
+ The resource may be specific to a fungus, a group of fungi or all fungi.
+ Fungi
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ (jison)Out of EDAM scope. While very useful to have a basic set of IDs for organisms, should find a better way to provide this e.g. in bio.tools (NCBI taxon ID subset). Definition is wrong anyway.
+ 1.17
+
+
+ Pathogens, e.g. information on a specific vertebrate genome including molecular sequences, genes and annotation.
+
+ The resource may be specific to a pathogen, a group of pathogens or all pathogens.
+ Pathogens
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Arabidopsis-specific data.
+
+ Arabidopsis
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Rice-specific data.
+
+ Rice
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Informatics resources that aim to identify, map or analyse genetic markers in DNA sequences, for example to produce a genetic (linkage) map of a chromosome or genome or to analyse genetic linkage and synteny.
+
+ Genetic mapping and linkage
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ The study (typically comparison) of the sequence, structure or function of multiple genomes.
+ Comparative_genomics
+
+
+
+ Comparative genomics
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Mobile genetic elements, such as transposons, Plasmids, Bacteriophage elements and Group II introns.
+ Mobile_genetic_elements
+ Transposons
+
+
+ Mobile genetic elements
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Human diseases, typically describing the genes, mutations and proteins implicated in disease.
+
+ Human disease
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ VT 3.1.3 Immunology
+ The application of information technology to immunology such as immunological processes, immunological genes, proteins and peptide ligands, antigens and so on.
+ Immunology
+
+
+
+ Immunology
+
+ http://purl.bioontology.org/ontology/MSH/D007120
+ http://purl.bioontology.org/ontology/MSH/D007125
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Lipoproteins (protein-lipid assemblies), and proteins or region of a protein that spans or are associated with a membrane.
+ Membrane_and_lipoproteins
+ Lipoproteins
+ Membrane proteins
+ Transmembrane proteins
+
+
+ Membrane and lipoproteins
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Proteins that catalyze chemical reaction, the kinetics of enzyme-catalysed reactions, enzyme nomenclature etc.
+ Enzymology
+ Enzymes
+
+
+ Enzymes
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ PCR primers and hybridisation oligos in a nucleic acid sequence.
+
+
+ Primers
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Regions or sites in a eukaryotic and eukaryotic viral RNA sequence which directs endonuclease cleavage or polyadenylation of an RNA transcript.
+
+
+ PolyA signal or sites
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ CpG rich regions (isochores) in a nucleotide sequence.
+
+
+ CpG island and isochores
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Restriction enzyme recognition sites (restriction sites) in a nucleic acid sequence.
+
+
+ Restriction sites
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+
+
+ Splice sites in a nucleotide sequence or alternative RNA splicing events.
+
+ Splice sites
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Matrix/scaffold attachment regions (MARs/SARs) in a DNA sequence.
+
+
+ Matrix/scaffold attachment sites
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Operons (operators, promoters and genes) from a bacterial genome.
+
+
+ Operon
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Whole promoters or promoter elements (transcription start sites, RNA polymerase binding site, transcription factor binding sites, promoter enhancers etc) in a DNA sequence.
+
+
+ Promoters
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ VT 1.5.24 Structural biology
+ The molecular structure of biological molecules, particularly macromolecules such as proteins and nucleic acids.
+ Structural_biology
+ Structural assignment
+ Structural determination
+ Structure determination
+
+
+
+ This includes experimental methods for biomolecular structure determination, such as X-ray crystallography, nuclear magnetic resonance (NMR), circular dichroism (CD) spectroscopy, microscopy etc., including the assignment or modelling of molecular structure from such data.
+ Structural biology
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Trans- or intra-membrane regions of a protein, typically describing physicochemical properties of the secondary structure elements.
+
+
+ Protein membrane regions
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ The comparison of two or more molecular structures, for example structure alignment and clustering.
+
+
+ This might involve comparison of secondary or tertiary (3D) structural information.
+ Structure comparison
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ The study of gene and protein function including the prediction of functional properties of a protein.
+ Functional analysis
+ Function_analysis
+ Protein function analysis
+ Protein function prediction
+
+
+
+ Function analysis
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ (jison)Out of EDAM scope. While very useful to have a basic set of IDs for organisms, should find a better way to provide this e.g. in bio.tools (NCBI taxon ID subset).
+ 1.17
+
+
+ Specific bacteria or archaea, e.g. information on a specific prokaryote genome including molecular sequences, genes and annotation.
+
+ The resource may be specific to a prokaryote, a group of prokaryotes or all prokaryotes.
+ Prokaryotes and Archaea
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Protein data resources.
+
+ Protein databases
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Experimental methods for biomolecular structure determination, such as X-ray crystallography, nuclear magnetic resonance (NMR), circular dichroism (CD) spectroscopy, microscopy etc., including the assignment or modelling of molecular structure from such data.
+
+ Structure determination
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ VT 1.5.11 Cell biology
+ Cells, such as key genes and proteins involved in the cell cycle.
+ Cell_biology
+ Cells
+ Cellular processes
+ Protein subcellular localization
+
+
+ Cell biology
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Topic focused on identifying, grouping, or naming things in a structured way according to some schema based on observable relationships.
+
+ Classification
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Lipoproteins (protein-lipid assemblies).
+
+ Lipoproteins
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Visualise a phylogeny, for example, render a phylogenetic tree.
+
+ Phylogeny visualisation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ The application of information technology to chemistry in biological research environment.
+ Chemical informatics
+ Chemoinformatics
+ Cheminformatics
+
+
+
+ Cheminformatics
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ The holistic modelling and analysis of complex biological systems and the interactions therein.
+ Systems_biology
+ Biological modelling
+ Biological system modelling
+ Systems modelling
+
+
+
+ This includes databases of models and methods to construct or analyse a model.
+ Systems biology
+
+ http://purl.bioontology.org/ontology/MSH/D049490
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The application of statistical methods to biological problems.
+ Statistics_and_probability
+ Bayesian methods
+ Biostatistics
+ Descriptive statistics
+ Gaussian processes
+ Inferential statistics
+ Markov processes
+ Multivariate statistics
+ Probabilistic graphical model
+ Probability
+ Statistics
+
+
+
+ Statistics and probability
+
+
+
+ http://purl.bioontology.org/ontology/MSH/D056808
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Search for and retrieve molecular structures that are similar to a structure-based query (typically another structure or part of a structure).
+
+ The query is a structure-based entity such as another structure, a 3D (structural) motif, 3D profile or template.
+ Structure database search
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ The construction, analysis, evaluation, refinement etc. of models of a molecules properties or behaviour, including the modelling the structure of proteins in complex with small molecules or other macromolecules (docking).
+ Molecular_modelling
+ Comparative modelling
+ Docking
+ Homology modeling
+ Homology modelling
+ Molecular docking
+
+
+ Molecular modelling
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.2
+
+
+ The prediction of functional properties of a protein.
+
+ Protein function prediction
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Single nucleotide polymorphisms (SNP) and associated data, for example, the discovery and annotation of SNPs.
+
+
+ SNP
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+
+ Predict transmembrane domains and topology in protein sequences.
+
+ Transmembrane protein prediction
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+
+ The comparison two or more nucleic acid (typically RNA) secondary or tertiary structures.
+
+ Use this concept for methods that are exclusively for nucleic acid structures.
+ Nucleic acid structure comparison
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Exons in a nucleotide sequences.
+
+
+ Exons
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Transcription of DNA into RNA including the regulation of transcription.
+
+
+ Gene transcription
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ DNA mutation.
+ DNA_mutation
+
+
+ DNA mutation
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ VT 3.2.16 Oncology
+ The study of cancer, for example, genes and proteins implicated in cancer.
+ Cancer biology
+ Oncology
+ Cancer
+ Neoplasm
+ Neoplasms
+
+
+
+ Oncology
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Structural and associated data for toxic chemical substances.
+
+
+ Toxins and targets
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Introns in a nucleotide sequences.
+
+
+ Introns
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+ A topic concerning primarily bioinformatics software tools, typically the broad function or purpose of a tool.
+
+
+ Tool topic
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+ A general area of bioinformatics study, typically the broad scope or category of content of a bioinformatics journal or conference proceeding.
+
+
+ Study topic
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Biological nomenclature (naming), symbols and terminology.
+
+ Nomenclature
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ The genes, gene variations and proteins involved in one or more specific diseases.
+
+ Disease genes and proteins
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ http://edamontology.org/topic_3040
+ Protein secondary or tertiary structural data and/or associated annotation.
+ Protein structure
+ Protein_structure_analysis
+ Protein tertiary structure
+
+
+
+ Protein structure analysis
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The study of human beings in general, including the human genome and proteome.
+ Humans
+ Human_biology
+
+
+ Human biology
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Informatics resource (typically a database) primarily focused on genes.
+
+ Gene resources
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Yeast, e.g. information on a specific yeast genome including molecular sequences, genes and annotation.
+
+ Yeast
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ (jison) Out of EDAM scope. While very useful to have a basic set of IDs for organisms, should find a better way to provide this e.g. in bio.tools (NCBI taxon ID subset).
+ 1.17
+
+
+ Eukaryotes or data concerning eukaryotes, e.g. information on a specific eukaryote genome including molecular sequences, genes and annotation.
+
+ The resource may be specific to a eukaryote, a group of eukaryotes or all eukaryotes.
+ Eukaryotes
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ (jison)Out of EDAM scope. While very useful to have a basic set of IDs for organisms, should find a better way to provide this e.g. in bio.tools (NCBI taxon ID subset).
+ 1.17
+
+
+ Invertebrates, e.g. information on a specific invertebrate genome including molecular sequences, genes and annotation.
+
+ The resource may be specific to an invertebrate, a group of invertebrates or all invertebrates.
+ Invertebrates
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ (jison)Out of EDAM scope. While very useful to have a basic set of IDs for organisms, should find a better way to provide this e.g. in bio.tools (NCBI taxon ID subset).
+ 1.17
+
+
+ Vertebrates, e.g. information on a specific vertebrate genome including molecular sequences, genes and annotation.
+
+ The resource may be specific to a vertebrate, a group of vertebrates or all vertebrates.
+ Vertebrates
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ (jison)Out of EDAM scope. While very useful to have a basic set of IDs for organisms, should find a better way to provide this e.g. in bio.tools (NCBI taxon ID subset).
+ 1.17
+
+
+ Unicellular eukaryotes, e.g. information on a unicellular eukaryote genome including molecular sequences, genes and annotation.
+
+ The resource may be specific to a unicellular eukaryote, a group of unicellular eukaryotes or all unicellular eukaryotes.
+ Unicellular eukaryotes
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Protein secondary or tertiary structure alignments.
+
+ Protein structure alignment
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The study of matter and their structure by means of the diffraction of X-rays, typically the diffraction pattern caused by the regularly spaced atoms of a crystalline sample.
+ Crystallography
+ X-ray_diffraction
+ X-ray crystallography
+ X-ray microscopy
+
+
+
+ X-ray diffraction
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Conceptualisation, categorisation and naming of entities or phenomena within biology or bioinformatics.
+
+ Ontologies, nomenclature and classification
+ http://purl.bioontology.org/ontology/MSH/D002965
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Immunity-related proteins and their ligands.
+ Immunoproteins_and_antigens
+ Antigens
+ Immunopeptides
+ Immunoproteins
+ Therapeutic antibodies
+
+
+
+ This includes T cell receptors (TR), major histocompatibility complex (MHC), immunoglobulin superfamily (IgSF) / antibodies, major histocompatibility complex superfamily (MhcSF), etc."
+ Immunoproteins and antigens
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Specific molecules, including large molecules built from repeating subunits (macromolecules) and small molecules of biological significance.
+ CHEBI:23367
+
+ Molecules
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ VT 3.1.9 Toxicology
+ Toxins and the adverse effects of these chemical substances on living organisms.
+ Toxicology
+ Computational toxicology
+ Toxicoinformatics
+
+
+
+ Toxicology
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Parallelised sequencing processes that are capable of sequencing many thousands of sequences simultaneously.
+
+ High-throughput sequencing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Gene regulatory networks.
+
+
+ Gene regulatory networks
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Informatics resources dedicated to one or more specific diseases (not diseases in general).
+
+ Disease (specific)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Variable number of tandem repeat (VNTR) polymorphism in a DNA sequence.
+
+
+ VNTR
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+
+ Microsatellite polymorphism in a DNA sequence.
+
+
+ Microsatellites
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+
+ Restriction fragment length polymorphisms (RFLP) in a DNA sequence.
+
+
+ RFLP
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ DNA polymorphism.
+ DNA_polymorphism
+ Microsatellites
+ RFLP
+ SNP
+ Single nucleotide polymorphism
+ VNTR
+ Variable number of tandem repeat polymorphism
+ snps
+
+
+ Includes microsatellite polymorphism in a DNA sequence. A microsatellite polymorphism is a very short subsequence that is repeated a variable number of times between individuals. These repeats consist of the nucleotides cytosine and adenosine.
+ Includes restriction fragment length polymorphisms (RFLP) in a DNA sequence. An RFLP is defined by the presence or absence of a specific restriction site of a bacterial restriction enzyme.
+ Includes single nucleotide polymorphisms (SNP) and associated data, for example, the discovery and annotation of SNPs. A SNP is a DNA sequence variation where a single nucleotide differs between members of a species or paired chromosomes in an individual.
+ Includes variable number of tandem repeat (VNTR) polymorphism in a DNA sequence. VNTRs occur in non-coding regions of DNA and consists sub-sequence that is repeated a multiple (and varied) number of times.
+ DNA polymorphism
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Topic for the design of nucleic acid sequences with specific conformations.
+
+ Nucleic acid design
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.3
+
+
+ The design of primers for PCR and DNA amplification or the design of molecular probes.
+
+ Primer or probe design
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.2
+
+
+ Molecular secondary or tertiary (3D) structural data resources, typically of proteins and nucleic acids.
+
+ Structure databases
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.2
+
+
+ Nucleic acid (secondary or tertiary) structure, such as whole structures, structural features and associated annotation.
+
+ Nucleic acid structure
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.3
+
+
+ Molecular sequence data resources, including sequence sites, alignments, motifs and profiles.
+
+ Sequence databases
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.3
+
+
+ Nucleotide sequences and associated concepts such as sequence sites, alignments, motifs and profiles.
+
+ Nucleic acid sequences
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.3
+
+ Protein sequences and associated concepts such as sequence sites, alignments, motifs and profiles.
+
+
+ Protein sequences
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.3
+
+
+ Protein interaction networks.
+
+ Protein interaction networks
+ true
+
+
+
+
+
+
+
+
+ beta13
+ true
+ VT 1.5.4 Biochemistry and molecular biology
+ The molecular basis of biological activity, particularly the macromolecules (e.g. proteins and nucleic acids) that are essential to life.
+ Molecular_biology
+ Biological processes
+
+
+
+ Molecular biology
+
+
+
+
+
+
+
+
+
+ beta13
+ 1.3
+
+
+ Mammals, e.g. information on a specific mammal genome including molecular sequences, genes and annotation.
+
+ Mammals
+ true
+
+
+
+
+
+
+
+
+ beta13
+ true
+ VT 1.5.5 Biodiversity conservation
+ The degree of variation of life forms within a given ecosystem, biome or an entire planet.
+ Biodiversity
+
+
+
+ Biodiversity
+
+ http://purl.bioontology.org/ontology/MSH/D044822
+
+
+
+
+
+
+
+
+ beta13
+ 1.3
+
+
+ The comparison, grouping together and classification of macromolecules on the basis of sequence similarity.
+
+ This includes the results of sequence clustering, ortholog identification, assignment to families, annotation etc.
+ Sequence clusters and classification
+ true
+
+
+
+
+
+
+
+
+ beta13
+ true
+ The study of genes, genetic variation and heredity in living organisms.
+ Genetics
+ Genes
+ Heredity
+
+
+
+ Genetics
+
+ http://purl.bioontology.org/ontology/MSH/D005823
+
+
+
+
+
+
+
+
+ beta13
+ true
+ The genes and genetic mechanisms such as Mendelian inheritance that underly continuous phenotypic traits (such as height or weight).
+ Quantitative_genetics
+
+
+ Quantitative genetics
+
+
+
+
+
+
+
+
+
+ beta13
+ true
+ The distribution of allele frequencies in a population of organisms and its change subject to evolutionary processes including natural selection, genetic drift, mutation and gene flow.
+ Population_genetics
+
+
+
+ Population genetics
+
+
+
+
+
+
+
+
+
+ beta13
+ 1.3
+
+ Regulatory RNA sequences including microRNA (miRNA) and small interfering RNA (siRNA).
+
+
+ Regulatory RNA
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.13
+
+ The documentation of resources such as tools, services and databases and how to get help.
+
+
+ Documentation and help
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.3
+
+
+ The structural and functional organisation of genes and other genetic elements.
+
+ Genetic organisation
+ true
+
+
+
+
+
+
+
+
+ beta13
+ true
+ The application of information technology to health, disease and biomedicine.
+ Biomedical informatics
+ Clinical informatics
+ Health and disease
+ Health informatics
+ Healthcare informatics
+ Medical_informatics
+
+
+
+ Medical informatics
+
+
+
+
+
+
+
+
+
+ beta13
+ true
+ VT 1.5.14 Developmental biology
+ How organisms grow and develop.
+ Developmental_biology
+ Development
+
+
+
+ Developmental biology
+
+
+
+
+
+
+
+
+
+ beta13
+ true
+ The development of organisms between the one-cell stage (typically the zygote) and the end of the embryonic stage.
+ Embryology
+
+
+
+ Embryology
+
+
+
+
+
+
+
+
+
+ beta13
+ true
+ VT 3.1.1 Anatomy and morphology
+ The form and function of the structures of living organisms.
+ Anatomy
+
+
+
+ Anatomy
+
+
+
+
+
+
+
+
+
+ beta13
+ true
+ The scientific literature, language processing, reference information, and documentation.
+ Language
+ Literature
+ Literature_and_language
+ Bibliography
+ Citations
+ Documentation
+ References
+ Scientific literature
+
+
+
+ This includes the documentation of resources such as tools, services and databases, user support, how to get help etc.
+ Literature and language
+ http://purl.bioontology.org/ontology/MSH/D011642
+
+
+
+
+
+
+
+
+ beta13
+ true
+ VT 1.5 Biological sciences
+ VT 1.5.1 Aerobiology
+ VT 1.5.13 Cryobiology
+ VT 1.5.23 Reproductive biology
+ VT 1.5.3 Behavioural biology
+ VT 1.5.7 Biological rhythm
+ VT 1.5.8 Biology
+ VT 1.5.99 Other
+ The study of life and living organisms, including their morphology, biochemistry, physiology, development, evolution, and so on.
+ Biological science
+ Biology
+ Aerobiology
+ Behavioural biology
+ Biological rhythms
+ Chronobiology
+ Cryobiology
+ Reproductive biology
+
+
+
+ Biology
+
+
+
+
+
+
+
+
+
+ beta13
+ true
+ VT 1.3.1 Data management
+ Data management comprises the practices and principles of taking care of data, other than analysing them. This includes for example taking care of the associated metadata, formatting, storage, archiving, or access.
+
+
+
+ Data management
+ Metadata management
+ Data stewardship
+
+
+ http://purl.bioontology.org/ontology/MSH/D000079803
+
+
+
+
+
+
+
+
+ beta13
+ 1.3
+
+
+ The detection of the positional features, such as functional and other key sites, in molecular sequences.
+
+ Sequence feature detection
+ http://purl.bioontology.org/ontology/MSH/D058977
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.3
+
+
+ The detection of positional features such as functional sites in nucleotide sequences.
+
+ Nucleic acid feature detection
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.3
+
+
+ The detection, identification and analysis of positional protein sequence features, such as functional sites.
+
+ Protein feature detection
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.2
+
+
+ Topic for modelling biological systems in mathematical terms.
+
+ Biological system modelling
+ true
+
+
+
+
+
+
+
+
+ beta13
+ The acquisition of data, typically measurements of physical systems using any type of sampling system, or by another other means.
+ Data collection
+
+
+ Data acquisition
+
+
+
+
+
+
+
+
+
+ beta13
+ 1.3
+
+
+ Specific genes and/or their encoded proteins or a family or other grouping of related genes and proteins.
+
+ Genes and proteins resources
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.13
+
+ Topological domains such as cytoplasmic regions in a protein.
+
+
+ Protein topological domains
+ true
+
+
+
+
+
+
+
+
+ beta13
+ true
+
+ Protein sequence variants produced e.g. from alternative splicing, alternative promoter usage, alternative initiation and ribosomal frameshifting.
+ Protein_variants
+
+
+ Protein variants
+
+
+
+
+
+
+
+
+
+ beta13
+ 1.12
+
+
+ Regions within a nucleic acid sequence containing a signal that alters a biological function.
+
+ Expression signals
+ true
+
+
+
+
+
+
+
+
+
+ beta13
+
+ Nucleic acids binding to some other molecule.
+ DNA_binding_sites
+ Matrix-attachment region
+ Matrix/scaffold attachment region
+ Nucleosome exclusion sequences
+ Restriction sites
+ Ribosome binding sites
+ Scaffold-attachment region
+
+
+ This includes ribosome binding sites (Shine-Dalgarno sequence in prokaryotes), restriction enzyme recognition sites (restriction sites) etc.
+ This includes sites involved with DNA replication and recombination. This includes binding sites for initiation of replication (origin of replication), regions where transfer is initiated during the conjugation or mobilisation (origin of transfer), starting sites for DNA duplication (origin of replication) and regions which are eliminated through any of kind of recombination. Also nucleosome exclusion regions, i.e. specific patterns or regions which exclude nucleosomes (the basic structural units of eukaryotic chromatin which play a significant role in regulating gene expression).
+ DNA binding sites
+
+
+
+
+
+
+
+
+
+ beta13
+ 1.13
+
+ Repetitive elements within a nucleic acid sequence.
+
+
+ This includes long terminal repeats (LTRs); sequences (typically retroviral) directly repeated at both ends of a defined sequence and other types of repeating unit.
+ Nucleic acid repeats
+ true
+
+
+
+
+
+
+
+
+ beta13
+ true
+ DNA replication or recombination.
+ DNA_replication_and_recombination
+
+
+ DNA replication and recombination
+
+
+
+
+
+
+
+
+
+
+ beta13
+ 1.13
+
+ Coding sequences for a signal or transit peptide.
+
+
+ Signal or transit peptide
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.13
+
+ Sequence tagged sites (STS) in nucleic acid sequences.
+
+
+ Sequence tagged sites
+ true
+
+
+
+
+
+
+
+
+ 1.1
+ true
+ The determination of complete (typically nucleotide) sequences, including those of genomes (full genome sequencing, de novo sequencing and resequencing), amplicons and transcriptomes.
+ DNA-Seq
+ Sequencing
+ Chromosome walking
+ Clone verification
+ DNase-Seq
+ High throughput sequencing
+ High-throughput sequencing
+ NGS
+ NGS data analysis
+ Next gen sequencing
+ Next generation sequencing
+ Panels
+ Primer walking
+ Sanger sequencing
+ Targeted next-generation sequencing panels
+
+
+
+ Sequencing
+
+ http://purl.bioontology.org/ontology/MSH/D059014
+
+
+
+
+
+
+
+
+
+ 1.1
+ The analysis of protein-DNA interactions where chromatin immunoprecipitation (ChIP) is used in combination with massively parallel DNA sequencing to identify the binding sites of DNA-associated proteins.
+ ChIP-sequencing
+ Chip Seq
+ Chip sequencing
+ Chip-sequencing
+ ChIP-seq
+ ChIP-exo
+
+
+ ChIP-seq
+
+
+
+
+
+
+
+
+
+ 1.1
+ A topic concerning high-throughput sequencing of cDNA to measure the RNA content (transcriptome) of a sample, for example, to investigate how different alleles of a gene are expressed, detect post-transcriptional mutations or identify gene fusions.
+ RNA sequencing
+ RNA-Seq analysis
+ Small RNA sequencing
+ Small RNA-Seq
+ Small-Seq
+ Transcriptome profiling
+ WTSS
+ Whole transcriptome shotgun sequencing
+ RNA-Seq
+ MicroRNA sequencing
+ miRNA-seq
+
+
+ This includes small RNA profiling (small RNA-Seq), for example to find novel small RNAs, characterize mutations and analyze expression of small RNAs.
+ RNA-Seq
+
+
+
+
+
+
+
+
+
+
+ 1.1
+ 1.3
+
+ DNA methylation including bisulfite sequencing, methylation sites and analysis, for example of patterns and profiles of DNA methylation in a population, tissue etc.
+
+
+ DNA methylation
+ true
+
+
+
+
+
+
+
+
+ 1.1
+ true
+ The systematic study of metabolites, the chemical processes they are involved, and the chemical fingerprints of specific cellular processes in a whole cell, tissue, organ or organism.
+ Metabolomics
+ Exometabolomics
+ LC-MS-based metabolomics
+ MS-based metabolomics
+ MS-based targeted metabolomics
+ MS-based untargeted metabolomics
+ Mass spectrometry-based metabolomics
+ Metabolites
+ Metabolome
+ Metabonomics
+ NMR-based metabolomics
+
+
+
+ Metabolomics
+
+ http://purl.bioontology.org/ontology/MSH/D055432
+
+
+
+
+
+
+
+
+
+ 1.1
+ true
+ The study of the epigenetic modifications of a whole cell, tissue, organism etc.
+ Epigenomics
+
+
+
+ Epigenetics concerns the heritable changes in gene expression owing to mechanisms other than DNA sequence variation.
+ Epigenomics
+
+ http://purl.bioontology.org/ontology/MSH/D057890
+
+
+
+
+
+
+
+
+
+ 1.1
+ true
+ Biome sequencing
+ Community genomics
+ Ecogenomics
+ Environmental genomics
+ Environmental omics
+ Environmental sequencing
+ Environmental DNA (eDNA)
+ The study of genetic material recovered from environmental samples, and associated environmental data.
+ Metagenomics
+ Shotgun metagenomics
+
+
+
+ Metagenomics
+
+
+
+
+
+
+
+
+
+
+
+ 1.1
+ Variation in chromosome structure including microscopic and submicroscopic types of variation such as deletions, duplications, copy-number variants, insertions, inversions and translocations.
+ DNA structural variation
+ Genomic structural variation
+ DNA_structural_variation
+ Deletion
+ Duplication
+ Insertion
+ Inversion
+ Translocation
+
+
+ Structural variation
+
+
+
+
+
+
+
+
+
+ 1.1
+ DNA-histone complexes (chromatin), organisation of chromatin into nucleosomes and packaging into higher-order structures.
+ DNA_packaging
+ Nucleosome positioning
+
+
+ DNA packaging
+
+ http://purl.bioontology.org/ontology/MSH/D042003
+
+
+
+
+
+
+
+
+ 1.1
+ 1.3
+
+
+ A topic concerning high-throughput sequencing of randomly fragmented genomic DNA, for example, to investigate whole-genome sequencing and resequencing, SNP discovery, identification of copy number variations and chromosomal rearrangements.
+
+ DNA-Seq
+ true
+
+
+
+
+
+
+
+
+ 1.1
+ 1.3
+
+
+ The alignment of sequences of (typically millions) of short reads to a reference genome. This is a specialised topic within sequence alignment, especially because of complications arising from RNA splicing.
+
+ RNA-Seq alignment
+ true
+
+
+
+
+
+
+
+
+ 1.1
+ Experimental techniques that combine chromatin immunoprecipitation ('ChIP') with microarray ('chip'). ChIP-on-chip is used for high-throughput study protein-DNA interactions.
+ ChIP-chip
+ ChIP-on-chip
+ ChiP
+
+
+ ChIP-on-chip
+
+
+
+
+
+
+
+
+
+ 1.3
+ The protection of data, such as patient health data, from damage or unwanted access from unauthorised users.
+ Data privacy
+ Data_security
+
+
+ Data security
+
+
+
+
+
+
+
+
+
+ 1.3
+ Biological samples and specimens.
+ Specimen collections
+ Sample_collections
+ biosamples
+ samples
+
+
+
+ Sample collections
+
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ VT 1.5.4 Biochemistry and molecular biology
+ Chemical substances and physico-chemical processes and that occur within living organisms.
+ Biological chemistry
+ Biochemistry
+ Glycomics
+ Pathobiochemistry
+ Phytochemistry
+
+
+
+ Biochemistry
+
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ The study of evolutionary relationships amongst organisms from analysis of genetic information (typically gene or protein sequences).
+ Phylogenetics
+
+
+ Phylogenetics
+
+ http://purl.bioontology.org/ontology/MSH/D010802
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ Topic concerning the study of heritable changes, for example in gene expression or phenotype, caused by mechanisms other than changes in the DNA sequence.
+ Epigenetics
+ DNA methylation
+ Histone modification
+ Methylation profiles
+
+
+
+ This includes sub-topics such as histone modification and DNA methylation (methylation sites and analysis, for example of patterns and profiles of DNA methylation in a population, tissue etc.)
+ Epigenetics
+
+ http://purl.bioontology.org/ontology/MSH/D019175
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ The exploitation of biological process, structure and function for industrial purposes, for example the genetic manipulation of microorganisms for the antibody production.
+ Biotechnology
+ Applied microbiology
+
+
+
+ Biotechnology
+
+
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ Phenomes, or the study of the change in phenotype (the physical and biochemical traits of organisms) in response to genetic and environmental factors.
+ Phenomics
+
+
+
+ Phenomics
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ VT 1.5.16 Evolutionary biology
+ The evolutionary processes, from the genetic to environmental scale, that produced life in all its diversity.
+ Evolution
+ Evolutionary_biology
+
+
+
+ Evolutionary biology
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ VT 3.1.8 Physiology
+ The functions of living organisms and their constituent parts.
+ Physiology
+ Electrophysiology
+
+
+
+ Physiology
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ VT 1.5.20 Microbiology
+ The biology of microorganisms.
+ Microbiology
+ Antimicrobial stewardship
+ Medical microbiology
+ Microbial genetics
+ Microbial physiology
+ Microbial surveillance
+ Microbiological surveillance
+ Molecular infection biology
+ Molecular microbiology
+
+
+
+ Microbiology
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ The biology of parasites.
+ Parasitology
+
+
+
+ Parasitology
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ VT 3.1 Basic medicine
+ VT 3.2 Clinical medicine
+ VT 3.2.9 General and internal medicine
+ Research in support of healing by diagnosis, treatment, and prevention of disease.
+ Biomedical research
+ Clinical medicine
+ Experimental medicine
+ Medicine
+ General medicine
+ Internal medicine
+
+
+
+ Medicine
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ Neuroscience
+ VT 3.1.5 Neuroscience
+ The study of the nervous system and brain; its anatomy, physiology and function.
+ Neurobiology
+ Molecular neuroscience
+ Neurophysiology
+ Systemetic neuroscience
+
+
+
+ Neurobiology
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ VT 3.3.1 Epidemiology
+ Topic concerning the the patterns, cause, and effect of disease within populations.
+ Public_health_and_epidemiology
+ Epidemiology
+ Public health
+
+
+
+ Public health and epidemiology
+
+
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ VT 1.5.9 Biophysics
+ The use of physics to study biological system.
+ Biophysics
+ Medical physics
+
+
+
+ Biophysics
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ VT 1.5.12 Computational biology
+ VT 1.5.19 Mathematical biology
+ VT 1.5.26 Theoretical biology
+ The development and application of theory, analytical methods, mathematical models and computational simulation of biological systems.
+ Computational_biology
+ Biomathematics
+ Mathematical biology
+ Theoretical biology
+
+
+
+ This includes the modeling and treatment of biological processes and systems in mathematical terms (theoretical biology).
+ Computational biology
+
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ The analysis of transcriptomes, or a set of all the RNA molecules in a specific cell, tissue etc.
+ Transcriptomics
+ Comparative transcriptomics
+ Transcriptome
+
+
+
+ Transcriptomics
+
+
+
+
+
+
+
+
+
+ 1.3
+ Chemical science
+ Polymer science
+ VT 1.7.10 Polymer science
+ VT 1.7 Chemical sciences
+ VT 1.7.2 Chemistry
+ VT 1.7.3 Colloid chemistry
+ VT 1.7.5 Electrochemistry
+ VT 1.7.6 Inorganic and nuclear chemistry
+ VT 1.7.7 Mathematical chemistry
+ VT 1.7.8 Organic chemistry
+ VT 1.7.9 Physical chemistry
+ The composition and properties of matter, reactions, and the use of reactions to create new substances.
+ Chemistry
+ Inorganic chemistry
+ Mathematical chemistry
+ Nuclear chemistry
+ Organic chemistry
+ Physical chemistry
+
+
+
+ Chemistry
+
+
+
+
+
+
+
+
+
+ 1.3
+ VT 1.1.99 Other
+ VT:1.1 Mathematics
+ The study of numbers (quantity) and other topics including structure, space, and change.
+ Maths
+ Mathematics
+ Dynamic systems
+ Dynamical systems
+ Dynymical systems theory
+ Graph analytics
+ Monte Carlo methods
+ Multivariate analysis
+
+
+
+ Mathematics
+
+
+
+
+
+
+
+
+
+ 1.3
+ VT 1.2 Computer sciences
+ VT 1.2.99 Other
+ The theory and practical use of computer systems.
+ Computer_science
+ Cloud computing
+ HPC
+ High performance computing
+ High-performance computing
+
+
+
+ Computer science
+
+
+
+
+
+
+
+
+
+ 1.3
+ The study of matter, space and time, and related concepts such as energy and force.
+ Physics
+
+
+
+ Physics
+
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ RNA splicing; post-transcription RNA modification involving the removal of introns and joining of exons.
+ Alternative splicing
+ RNA_splicing
+ Splice sites
+
+
+ This includes the study of splice sites, splicing patterns, alternative splicing events and variants, isoforms, etc..
+ RNA splicing
+
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ The structure and function of genes at a molecular level.
+ Molecular_genetics
+
+
+
+ Molecular genetics
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ VT 3.2.25 Respiratory systems
+ The study of respiratory system.
+ Pulmonary medicine
+ Pulmonology
+ Respiratory_medicine
+ Pulmonary disorders
+ Respiratory disease
+
+
+
+ Respiratory medicine
+
+
+
+
+
+
+
+
+
+ 1.3
+ 1.4
+
+
+ The study of metabolic diseases.
+
+ Metabolic disease
+ true
+
+
+
+
+
+
+
+
+ 1.3
+ VT 3.3.4 Infectious diseases
+ The branch of medicine that deals with the prevention, diagnosis and management of transmissible disease with clinically evident illness resulting from infection with pathogenic biological agents (viruses, bacteria, fungi, protozoa, parasites and prions).
+ Communicable disease
+ Transmissible disease
+ Infectious_disease
+
+
+
+ Infectious disease
+
+
+
+
+
+
+
+
+
+ 1.3
+ The study of rare diseases.
+ Rare_diseases
+
+
+
+ Rare diseases
+
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ VT 1.7.4 Computational chemistry
+ Topic concerning the development and application of theory, analytical methods, mathematical models and computational simulation of chemical systems.
+ Computational_chemistry
+
+
+
+ Computational chemistry
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ The branch of medicine that deals with the anatomy, functions and disorders of the nervous system.
+ Neurology
+ Neurological disorders
+
+
+
+ Neurology
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ VT 3.2.22 Peripheral vascular disease
+ VT 3.2.4 Cardiac and Cardiovascular systems
+ The diseases and abnormalities of the heart and circulatory system.
+ Cardiovascular medicine
+ Cardiology
+ Cardiovascular disease
+ Heart disease
+
+
+
+ Cardiology
+
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ The discovery and design of drugs or potential drug compounds.
+ Drug_discovery
+
+
+
+ This includes methods that search compound collections, generate or analyse drug 3D conformations, identify drug targets with structural docking etc.
+ Drug discovery
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ Repositories of biological samples, typically human, for basic biological and clinical research.
+ Tissue collection
+ biobanking
+ Biobank
+
+
+
+ Biobank
+
+
+
+
+
+
+
+
+
+ 1.3
+ Laboratory study of mice, for example, phenotyping, and mutagenesis of mouse cell lines.
+ Laboratory mouse
+ Mouse_clinic
+
+
+
+ Mouse clinic
+
+
+
+
+
+
+
+
+
+ 1.3
+ Collections of microbial cells including bacteria, yeasts and moulds.
+ Microbial_collection
+
+
+
+ Microbial collection
+
+
+
+
+
+
+
+
+
+ 1.3
+ Collections of cells grown under laboratory conditions, specifically, cells from multi-cellular eukaryotes and especially animal cells.
+ Cell_culture_collection
+
+
+
+ Cell culture collection
+
+
+
+
+
+
+
+
+
+ 1.3
+ Collections of DNA, including both collections of cloned molecules, and populations of micro-organisms that store and propagate cloned DNA.
+ Clone_library
+
+
+
+ Clone library
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ 'translating' the output of basic and biomedical research into better diagnostic tools, medicines, medical procedures, policies and advice.
+ Translational_medicine
+
+
+
+ Translational medicine
+
+
+
+
+
+
+
+
+
+ 1.3
+ Collections of chemicals, typically for use in high-throughput screening experiments.
+ Compound_libraries_and_screening
+ Chemical library
+ Chemical screening
+ Compound library
+ Small chemical compounds libraries
+ Small compounds libraries
+ Target identification and validation
+
+
+
+ Compound libraries and screening
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ VT 3.3 Health sciences
+ Topic concerning biological science that is (typically) performed in the context of medicine.
+ Biomedical sciences
+ Health science
+ Biomedical_science
+
+
+
+ Biomedical science
+
+
+
+
+
+
+
+
+
+ 1.3
+ Topic concerning the identity of biological entities, or reports on such entities, and the mapping of entities and records in different databases.
+ Data_identity_and_mapping
+
+
+
+ Data identity and mapping
+
+
+
+
+
+
+
+
+ 1.3
+ 1.12
+
+ The search and retrieval from a database on the basis of molecular sequence similarity.
+
+
+ Sequence search
+ true
+
+
+
+
+
+
+
+
+ 1.4
+ true
+ Objective indicators of biological state often used to assess health, and determinate treatment.
+ Diagnostic markers
+ Biomarkers
+
+
+ Biomarkers
+
+
+
+
+
+
+
+
+
+ 1.4
+ The procedures used to conduct an experiment.
+ Experimental techniques
+ Lab method
+ Lab techniques
+ Laboratory method
+ Laboratory_techniques
+ Experiments
+ Laboratory experiments
+
+
+
+ Laboratory techniques
+
+
+
+
+
+
+
+
+
+ 1.4
+ The development of policies, models and standards that cover data acquisition, storage and integration, such that it can be put to use, typically through a process of systematically applying statistical and / or logical techniques to describe, illustrate, summarise or evaluate data.
+ Data_architecture_analysis_and_design
+ Data analysis
+ Data architecture
+ Data design
+
+
+
+ Data architecture, analysis and design
+
+
+
+
+
+
+
+
+
+ 1.4
+ The combination and integration of data from different sources, for example into a central repository or warehouse, to provide users with a unified view of these data.
+ Data_integration_and_warehousing
+ Data integration
+ Data warehousing
+
+
+
+ Data integration and warehousing
+
+
+
+
+
+
+
+
+
+
+ 1.4
+ Any matter, surface or construct that interacts with a biological system.
+ Biomaterials
+
+
+
+ Biomaterials
+
+
+
+
+
+
+
+
+
+
+ 1.4
+ true
+ The use of synthetic chemistry to study and manipulate biological systems.
+ Chemical_biology
+
+
+
+ Chemical biology
+
+
+
+
+
+
+
+
+
+ 1.4
+ VT 1.7.1 Analytical chemistry
+ The study of the separation, identification, and quantification of the chemical components of natural and artificial materials.
+ Analytical_chemistry
+
+
+
+ Analytical chemistry
+
+
+
+
+
+
+
+
+
+ 1.4
+ The use of chemistry to create new compounds.
+ Synthetic_chemistry
+ Synthetic organic chemistry
+
+
+
+ Synthetic chemistry
+
+
+
+
+
+
+
+
+
+ 1.4
+ 1.2.12 Programming languages
+ Software engineering
+ VT 1.2.1 Algorithms
+ VT 1.2.14 Software engineering
+ VT 1.2.7 Data structures
+ The process that leads from an original formulation of a computing problem to executable programs.
+ Computer programming
+ Software development
+ Software_engineering
+ Algorithms
+ Data structures
+ Programming languages
+
+
+
+ Software engineering
+
+
+
+
+
+
+
+
+
+ 1.4
+ true
+ The process of bringing a new drug to market once a lead compounds has been identified through drug discovery.
+ Drug development science
+ Medicine development
+ Medicines development
+ Drug_development
+
+
+
+ Drug development
+
+
+
+
+
+
+
+
+
+ 1.4
+ Drug delivery
+ Drug formulation
+ Drug formulation and delivery
+ The process of formulating and administering a pharmaceutical compound to achieve a therapeutic effect.
+ Biotherapeutics
+
+
+
+ Biotherapeutics
+
+
+
+
+
+
+
+
+ 1.4
+ true
+ The study of how a drug interacts with the body.
+ Drug_metabolism
+ ADME
+ Drug absorption
+ Drug distribution
+ Drug excretion
+ Pharmacodynamics
+ Pharmacokinetics
+ Pharmacokinetics and pharmacodynamics
+
+
+
+ Drug metabolism
+
+
+
+
+
+
+
+
+
+ 1.4
+ Health care research
+ Health care science
+ The discovery, development and approval of medicines.
+ Drug discovery and development
+ Medicines_research_and_development
+
+
+
+ Medicines research and development
+
+
+
+
+
+
+
+
+
+
+ 1.4
+ The safety (or lack) of drugs and other medical interventions.
+ Patient safety
+ Safety_sciences
+ Drug safety
+
+
+
+ Safety sciences
+
+
+
+
+
+
+
+
+
+ 1.4
+ The detection, assessment, understanding and prevention of adverse effects of medicines.
+ Pharmacovigilence
+
+
+
+ Pharmacovigilence concerns safety once a drug has gone to market.
+ Pharmacovigilance
+
+
+
+
+
+
+
+
+
+
+ 1.4
+ The testing of new medicines, vaccines or procedures on animals (preclinical) and humans (clinical) prior to their approval by regulatory authorities.
+ Preclinical_and_clinical_studies
+ Clinical studies
+ Clinical study
+ Clinical trial
+ Drug trials
+ Preclinical studies
+ Preclinical study
+
+
+
+ Preclinical and clinical studies
+
+
+
+
+
+
+
+
+
+ 1.4
+ true
+ The visual representation of an object.
+ Imaging
+ Diffraction experiment
+ Microscopy
+ Microscopy imaging
+ Optical super resolution microscopy
+ Photonic force microscopy
+ Photonic microscopy
+
+
+
+ This includes diffraction experiments that are based upon the interference of waves, typically electromagnetic waves such as X-rays or visible light, by some object being studied, typical in order to produce an image of the object or determine its structure.
+ Imaging
+
+
+
+
+
+
+
+
+
+ 1.4
+ The use of imaging techniques to understand biology.
+ Biological imaging
+ Biological_imaging
+
+
+
+ Bioimaging
+
+
+
+
+
+
+
+
+
+ 1.4
+ VT 3.2.13 Medical imaging
+ VT 3.2.14 Nuclear medicine
+ VT 3.2.24 Radiology
+ The use of imaging techniques for clinical purposes for medical research.
+ Medical_imaging
+ Neuroimaging
+ Nuclear medicine
+ Radiology
+
+
+
+ Medical imaging
+
+
+
+
+
+
+
+
+
+ 1.4
+ The use of optical instruments to magnify the image of an object.
+ Light_microscopy
+
+
+
+ Light microscopy
+
+
+
+
+
+
+
+
+
+ 1.4
+ The use of animals and alternatives in experimental research.
+ Animal experimentation
+ Animal research
+ Animal testing
+ In vivo testing
+ Laboratory_animal_science
+
+
+
+ Laboratory animal science
+
+
+
+
+
+
+
+
+
+ 1.4
+ true
+ VT 1.5.18 Marine and Freshwater biology
+ The study of organisms in the ocean or brackish waters.
+ Marine_biology
+
+
+
+ Marine biology
+
+
+
+
+
+
+
+
+
+ 1.4
+ true
+ The identification of molecular and genetic causes of disease and the development of interventions to correct them.
+ Molecular_medicine
+
+
+
+ Molecular medicine
+
+
+
+
+
+
+
+
+
+ 1.4
+ VT 3.3.7 Nutrition and Dietetics
+ The study of the effects of food components on the metabolism, health, performance and disease resistance of humans and animals. It also includes the study of human behaviours related to food choices.
+ Nutrition
+ Nutrition science
+ Nutritional_science
+ Dietetics
+
+
+
+ Nutritional science
+
+
+
+
+
+
+
+
+
+ 1.4
+ true
+ The collective characterisation and quantification of pools of biological molecules that translate into the structure, function, and dynamics of an organism or organisms.
+ Omics
+
+
+
+ Omics
+
+
+
+
+
+
+
+
+
+ 1.4
+ The processes that need to be in place to ensure the quality of products for human or animal use.
+ Quality assurance
+ Quality_affairs
+ Good clinical practice
+ Good laboratory practice
+ Good manufacturing practice
+
+
+
+ Quality affairs
+
+
+
+
+
+
+
+
+ 1.4
+ The protection of public health by controlling the safety and efficacy of products in areas including pharmaceuticals, veterinary medicine, medical devices, pesticides, agrochemicals, cosmetics, and complementary medicines.
+ Healthcare RA
+ Regulatory_affairs
+
+
+
+ Regulatory affairs
+
+
+
+
+
+
+
+
+
+ 1.4
+ true
+ Biomedical approaches to clinical interventions that involve the use of stem cells.
+ Stem cell research
+ Regenerative_medicine
+
+
+
+ Regenerative medicine
+
+
+
+
+
+
+
+
+
+ 1.4
+ true
+ An interdisciplinary field of study that looks at the dynamic systems of the human body as part of an integrted whole, incorporating biochemical, physiological, and environmental interactions that sustain life.
+ Systems_medicine
+
+
+
+ Systems medicine
+
+
+
+
+
+
+
+
+
+ 1.4
+ Topic concerning the branch of medicine that deals with the prevention, diagnosis, and treatment of disease, disorder and injury in animals.
+ Veterinary_medicine
+ Clinical veterinary medicine
+
+
+
+ Veterinary medicine
+
+
+
+
+
+
+
+
+
+ 1.4
+ The application of biological concepts and methods to the analytical and synthetic methodologies of engineering.
+ Biological engineering
+ Bioengineering
+
+
+
+ Bioengineering
+
+
+
+
+
+
+
+
+
+ 1.4
+ true
+ Ageing
+ Aging
+ Gerontology
+ VT 3.2.10 Geriatrics and gerontology
+ The branch of medicine dealing with the diagnosis, treatment and prevention of disease in older people, and the problems specific to aging.
+ Geriatrics
+ Geriatric_medicine
+
+
+
+ Geriatric medicine
+
+
+
+
+
+
+
+
+
+ 1.4
+ true
+ VT 3.2.1 Allergy
+ Health issues related to the immune system and their prevention, diagnosis and management.
+ Allergy_clinical_immunology_and_immunotherapeutics
+ Allergy
+ Clinical immunology
+ Immune disorders
+ Immunomodulators
+ Immunotherapeutics
+
+
+
+ Allergy, clinical immunology and immunotherapeutics
+
+
+
+
+
+
+
+
+
+
+
+ 1.4
+ true
+ The prevention of pain and the evaluation, treatment and rehabilitation of persons in pain.
+ Algiatry
+ Pain management
+ Pain_medicine
+
+
+
+ Pain medicine
+
+
+
+
+
+
+
+
+
+ 1.4
+ VT 3.2.2 Anaesthesiology
+ Anaesthesia and anaesthetics.
+ Anaesthetics
+ Anaesthesiology
+
+
+
+ Anaesthesiology
+
+
+
+
+
+
+
+
+
+ 1.4
+ VT 3.2.5 Critical care/Emergency medicine
+ The multidisciplinary that cares for patients with acute, life-threatening illness or injury.
+ Acute medicine
+ Emergency medicine
+ Intensive care medicine
+ Critical_care_medicine
+
+
+
+ Critical care medicine
+
+
+
+
+
+
+
+
+
+ 1.4
+ VT 3.2.7 Dermatology and venereal diseases
+ The branch of medicine that deals with prevention, diagnosis and treatment of disorders of the skin, scalp, hair and nails.
+ Dermatology
+ Dermatological disorders
+
+
+
+ Dermatology
+
+
+
+
+
+
+
+
+
+ 1.4
+ The study, diagnosis, prevention and treatments of disorders of the oral cavity, maxillofacial area and adjacent structures.
+ Dentistry
+
+
+
+ Dentistry
+
+
+
+
+
+
+
+
+
+ 1.4
+ VT 3.2.20 Otorhinolaryngology
+ The branch of medicine that deals with the prevention, diagnosis, and treatment of disorders of the ear, nose and throat.
+ Audiovestibular medicine
+ Otolaryngology
+ Otorhinolaryngology
+ Ear_nose_and_throat_medicine
+ Head and neck disorders
+
+
+
+ Ear, nose and throat medicine
+
+
+
+
+
+
+
+
+
+ 1.4
+ true
+ The branch of medicine dealing with diseases of endocrine organs, hormone systems, their target organs, and disorders of the pathways of glucose and lipid metabolism.
+ Endocrinology_and_metabolism
+ Endocrine disorders
+ Endocrinology
+ Metabolic disorders
+ Metabolism
+
+
+
+ Endocrinology and metabolism
+
+
+
+
+
+
+
+
+
+ 1.4
+ true
+ VT 3.2.11 Hematology
+ The branch of medicine that deals with the blood, blood-forming organs and blood diseases.
+ Haematology
+ Blood disorders
+ Haematological disorders
+
+
+
+ Haematology
+
+
+
+
+
+
+
+
+
+ 1.4
+ true
+ VT 3.2.8 Gastroenterology and hepatology
+ The branch of medicine that deals with disorders of the oesophagus, stomach, duodenum, jejenum, ileum, large intestine, sigmoid colon and rectum.
+ Gastroenterology
+ Gastrointestinal disorders
+
+
+
+ Gastroenterology
+
+
+
+
+
+
+
+
+
+ 1.4
+ The study of the biological and physiological differences between males and females and how they effect differences in disease presentation and management.
+ Gender_medicine
+
+
+
+ Gender medicine
+
+
+
+
+
+
+
+
+
+ 1.4
+ true
+ VT 3.2.15 Obstetrics and gynaecology
+ The branch of medicine that deals with the health of the female reproductive system, pregnancy and birth.
+ Gynaecology_and_obstetrics
+ Gynaecological disorders
+ Gynaecology
+ Obstetrics
+
+
+
+ Gynaecology and obstetrics
+
+
+
+
+
+
+
+
+
+
+ 1.4
+ true
+ The branch of medicine that deals with the liver, gallbladder, bile ducts and bile.
+ Hepatology
+ Hepatic_and_biliary_medicine
+ Liver disorders
+
+
+
+ Hepatic and biliary medicine
+
+ Hepatobiliary medicine
+
+
+
+
+
+
+
+
+ 1.4
+ 1.13
+
+ The branch of medicine that deals with the infectious diseases of the tropics.
+
+
+ Infectious tropical disease
+ true
+
+
+
+
+
+
+
+
+ 1.4
+ The branch of medicine that treats body wounds or shock produced by sudden physical injury, as from violence or accident.
+ Traumatology
+ Trauma_medicine
+
+
+
+ Trauma medicine
+
+
+
+
+
+
+
+
+
+ 1.4
+ true
+ The branch of medicine that deals with the diagnosis, management and prevention of poisoning and other adverse health effects caused by medications, occupational and environmental toxins, and biological agents.
+ Medical_toxicology
+
+
+
+ Medical toxicology
+
+
+
+
+
+
+
+
+
+ 1.4
+ VT 3.2.19 Orthopaedics
+ VT 3.2.26 Rheumatology
+ The branch of medicine that deals with the prevention, diagnosis, and treatment of disorders of the muscle, bone and connective tissue. It incorporates aspects of orthopaedics, rheumatology, rehabilitation medicine and pain medicine.
+ Musculoskeletal_medicine
+ Musculoskeletal disorders
+ Orthopaedics
+ Rheumatology
+
+
+
+ Musculoskeletal medicine
+
+
+
+
+
+
+
+
+
+
+
+ 1.4
+ Optometry
+ VT 3.2.17 Ophthalmology
+ VT 3.2.18 Optometry
+ The branch of medicine that deals with disorders of the eye, including eyelid, optic nerve/visual pathways and occular muscles.
+ Ophthalmology
+ Eye disoders
+
+
+
+ Ophthalmology
+
+
+
+
+
+
+
+
+
+ 1.4
+ VT 3.2.21 Paediatrics
+ The branch of medicine that deals with the medical care of infants, children and adolescents.
+ Child health
+ Paediatrics
+
+
+
+ Paediatrics
+
+
+
+
+
+
+
+
+
+ 1.4
+ Mental health
+ VT 3.2.23 Psychiatry
+ The branch of medicine that deals with the management of mental illness, emotional disturbance and abnormal behaviour.
+ Psychiatry
+ Psychiatric disorders
+
+
+
+ Psychiatry
+
+
+
+
+
+
+
+
+
+ 1.4
+ VT 3.2.3 Andrology
+ The health of the reproductive processes, functions and systems at all stages of life.
+ Reproductive_health
+ Andrology
+ Family planning
+ Fertility medicine
+ Reproductive disorders
+
+
+
+ Reproductive health
+
+
+
+
+
+
+
+
+
+ 1.4
+ VT 3.2.28 Transplantation
+ The use of operative, manual and instrumental techniques on a patient to investigate and/or treat a pathological condition or help improve bodily function or appearance.
+ Surgery
+ Transplantation
+
+
+
+ Surgery
+
+
+
+
+
+
+
+
+
+ 1.4
+ VT 3.2.29 Urology and nephrology
+ The branches of medicine and physiology focussing on the function and disorders of the urinary system in males and females, the reproductive system in males, and the kidney.
+ Urology_and_nephrology
+ Kidney disease
+ Nephrology
+ Urological disorders
+ Urology
+
+
+
+ Urology and nephrology
+
+
+
+
+
+
+
+
+
+
+ 1.4
+ Alternative medicine
+ Holistic medicine
+ Integrative medicine
+ VT 3.2.12 Integrative and Complementary medicine
+ Medical therapies that fall beyond the scope of conventional medicine but may be used alongside it in the treatment of disease and ill health.
+ Complementary_medicine
+
+
+
+ Complementary medicine
+
+
+
+
+
+
+
+
+
+ 1.7
+ Techniques that uses magnetic fields and radiowaves to form images, typically to investigate the anatomy and physiology of the human body.
+ MRT
+ Magnetic resonance imaging
+ Magnetic resonance tomography
+ NMRI
+ Nuclear magnetic resonance imaging
+ MRI
+
+
+ MRI
+
+
+
+
+
+
+
+
+
+
+ 1.7
+ The study of matter by studying the diffraction pattern from firing neutrons at a sample, typically to determine atomic and/or magnetic structure.
+ Neutron diffraction experiment
+ Neutron_diffraction
+ Elastic neutron scattering
+ Neutron microscopy
+
+
+ Neutron diffraction
+
+
+
+
+
+
+
+
+
+ 1.7
+ Imaging in sections (sectioning), through the use of a wave-generating device (tomograph) that generates an image (a tomogram).
+ CT
+ Computed tomography
+ TDM
+ Tomography
+ Electron tomography
+ PET
+ Positron emission tomography
+ X-ray tomography
+
+
+ Tomography
+
+
+
+
+
+
+
+
+
+ 1.7
+ true
+ KDD
+ Knowledge discovery in databases
+ VT 1.3.2 Data mining
+ The discovery of patterns in large data sets and the extraction and trasnsformation of those patterns into a useful format.
+ Data_mining
+ Pattern recognition
+
+
+ Data mining
+
+
+
+
+
+
+
+
+
+ 1.7
+ Artificial Intelligence
+ VT 1.2.2 Artificial Intelligence (expert systems, machine learning, robotics)
+ A topic concerning the application of artificial intelligence methods to algorithms, in order to create methods that can learn from data in order to generate an output, rather than relying on explicitly encoded information only.
+ Machine_learning
+ Active learning
+ Ensembl learning
+ Kernel methods
+ Knowledge representation
+ Neural networks
+ Recommender system
+ Reinforcement learning
+ Supervised learning
+ Unsupervised learning
+
+
+ Machine learning
+
+
+
+
+
+
+
+
+
+ 1.8
+ The general handling of data stored in digital archives such as databases, databanks, web portals, and other data resources.
+ Database administration
+ Database_management
+ Databases
+ Information systems
+ Content management
+ Document management
+ File management
+ Record management
+
+
+ This includes databases for the results of scientific experiments, the application of high-throughput technology, computational analysis and the scientific literature. It covers the management and manipulation of digital documents, including database records, files, and reports.
+ Database management
+
+
+
+
+
+
+
+
+
+ 1.8
+ VT 1.5.29 Zoology
+ Animals, e.g. information on a specific animal genome including molecular sequences, genes and annotation.
+ Animal
+ Animal biology
+ Animals
+ Metazoa
+ Zoology
+ Animal genetics
+ Animal physiology
+ Entomology
+
+
+ The study of the animal kingdom.
+ Zoology
+
+
+
+
+
+
+
+
+
+
+ 1.8
+ The biology, archival, detection, prediction and analysis of positional features such as functional and other key sites, in protein sequences and the conserved patterns (motifs, profiles etc.) that may be used to describe them.
+ Protein_sites_features_and_motifs
+ Protein sequence features
+ Signal peptide cleavage sites
+
+
+ A signal peptide coding sequence encodes an N-terminal domain of a secreted protein, which is involved in attaching the polypeptide to a membrane leader sequence. A transit peptide coding sequence encodes an N-terminal domain of a nuclear-encoded organellar protein; which is involved in import of the protein into the organelle.
+ Protein sites, features and motifs
+
+
+
+
+
+
+
+
+
+ 1.8
+ The biology, archival, detection, prediction and analysis of positional features such as functional and other key sites, in nucleic acid sequences and the conserved patterns (motifs, profiles etc.) that may be used to describe them.
+ Nucleic_acid_sites_features_and_motifs
+ Nucleic acid functional sites
+ Nucleic acid sequence features
+ Primer binding sites
+ Sequence tagged sites
+
+
+ Sequence tagged sites are short DNA sequences that are unique within a genome and serve as a mapping landmark, detectable by PCR they allow a genome to be mapped via an ordering of STSs.
+ Nucleic acid sites, features and motifs
+
+
+
+
+
+
+
+
+
+ 1.8
+ Transcription of DNA into RNA and features of a messenger RNA (mRNA) molecules including precursor RNA, primary (unprocessed) transcript and fully processed molecules.
+ mRNA features
+ Gene_transcripts
+ Coding RNA
+ EST
+ Exons
+ Fusion transcripts
+ Gene transcript features
+ Introns
+ PolyA signal
+ PolyA site
+ Signal peptide coding sequence
+ Transit peptide coding sequence
+ cDNA
+ mRNA
+
+
+ This includes 5'untranslated region (5'UTR), coding sequences (CDS), exons, intervening sequences (intron) and 3'untranslated regions (3'UTR).
+ This includes Introns, and protein-coding regions including coding sequences (CDS), exons, translation initiation sites and open reading frames. Also expressed sequence tag (EST) or complementary DNA (cDNA) sequences.
+ This includes coding sequences for a signal or transit peptide. A signal peptide coding sequence encodes an N-terminal domain of a secreted protein, which is involved in attaching the polypeptide to a membrane leader sequence. A transit peptide coding sequence encodes an N-terminal domain of a nuclear-encoded organellar protein; which is involved in import of the protein into the organelle.
+ This includes regions or sites in a eukaryotic and eukaryotic viral RNA sequence which directs endonuclease cleavage or polyadenylation of an RNA transcript. A polyA signal is required for endonuclease cleavage of an RNA transcript that is followed by polyadenylation. A polyA site is a site on an RNA transcript to which adenine residues will be added during post-transcriptional polyadenylation.
+ Gene transcripts
+
+
+
+
+
+
+
+
+
+ 1.8
+ 1.13
+
+ Protein-ligand (small molecule) interaction(s).
+
+
+ Protein-ligand interactions
+ true
+
+
+
+
+
+
+
+
+ 1.8
+ 1.13
+
+ Protein-drug interaction(s).
+
+
+ Protein-drug interactions
+ true
+
+
+
+
+
+
+
+
+ 1.8
+ Genotype experiment including case control, population, and family studies. These might use array based methods and re-sequencing methods.
+ Genotyping_experiment
+
+
+ Genotyping experiment
+
+
+
+
+
+
+
+
+
+ 1.8
+ Genome-wide association study experiments.
+ GWAS
+ GWAS analysis
+ Genome-wide association study
+ GWAS_study
+
+
+ GWAS study
+
+
+
+
+
+
+
+
+
+ 1.8
+ Microarray experiments including conditions, protocol, sample:data relationships etc.
+ Microarrays
+ Microarray_experiment
+ Gene expression microarray
+ Genotyping array
+ Methylation array
+ MicroRNA array
+ Multichannel microarray
+ One channel microarray
+ Proprietary platform micoarray
+ RNA chips
+ RNA microarrays
+ Reverse phase protein array
+ SNP array
+ Tiling arrays
+ Tissue microarray
+ Two channel microarray
+ aCGH microarray
+ mRNA microarray
+ miRNA array
+
+
+ This might specify which raw data file relates to which sample and information on hybridisations, e.g. which are technical and which are biological replicates.
+ Microarray experiment
+
+
+
+
+
+
+
+
+
+ 1.8
+ PCR experiments, e.g. quantitative real-time PCR.
+ Polymerase chain reaction
+ PCR_experiment
+ Quantitative PCR
+ RT-qPCR
+ Real Time Quantitative PCR
+
+
+ PCR experiment
+
+
+
+
+
+
+
+
+
+ 1.8
+ Proteomics experiments.
+ Proteomics_experiment
+ 2D PAGE experiment
+ DIA
+ Data-independent acquisition
+ MS
+ MS experiments
+ Mass spectrometry
+ Mass spectrometry experiments
+ Northern blot experiment
+ Spectrum demultiplexing
+
+
+ This includes two-dimensional gel electrophoresis (2D PAGE) experiments, gels or spots in a gel. Also mass spectrometry - an analytical chemistry technique that measures the mass-to-charge ratio and abundance of ions in the gas phase. Also Northern blot experiments.
+ Proteomics experiment
+
+
+
+
+
+
+
+
+
+
+ 1.8
+ 1.13
+
+ Two-dimensional gel electrophoresis experiments, gels or spots in a gel.
+
+
+ 2D PAGE experiment
+ true
+
+
+
+
+
+
+
+
+ 1.8
+ 1.13
+
+ Northern Blot experiments.
+
+
+ Northern blot experiment
+ true
+
+
+
+
+
+
+
+
+ 1.8
+ RNAi experiments.
+ RNAi_experiment
+
+
+ RNAi experiment
+
+
+
+
+
+
+
+
+
+ 1.8
+ Biological computational model experiments (simulation), for example the minimum information required in order to permit its correct interpretation and reproduction.
+ Simulation_experiment
+
+
+ Simulation experiment
+
+
+
+
+
+
+
+
+ 1.8
+ 1.13
+
+ Protein-DNA/RNA interaction(s).
+
+
+ Protein-nucleic acid interactions
+ true
+
+
+
+
+
+
+
+
+ 1.8
+ 1.13
+
+ Protein-protein interaction(s), including interactions between protein domains.
+
+
+ Protein-protein interactions
+ true
+
+
+
+
+
+
+
+
+ 1.8
+ 1.13
+
+ Cellular process pathways.
+
+
+ Cellular process pathways
+ true
+
+
+
+
+
+
+
+
+ 1.8
+ 1.13
+
+ Disease pathways, typically of human disease.
+
+
+ Disease pathways
+ true
+
+
+
+
+
+
+
+
+ 1.8
+ 1.13
+
+ Environmental information processing pathways.
+
+
+ Environmental information processing pathways
+ true
+
+
+
+
+
+
+
+
+ 1.8
+ 1.13
+
+ Genetic information processing pathways.
+
+
+ Genetic information processing pathways
+ true
+
+
+
+
+
+
+
+
+ 1.8
+ 1.13
+
+ Super-secondary structure of protein sequence(s).
+
+
+ Protein super-secondary structure
+ true
+
+
+
+
+
+
+
+
+ 1.8
+ 1.13
+
+ Catalytic residues (active site) of an enzyme.
+
+
+ Protein active sites
+ true
+
+
+
+
+
+
+
+
+ 1.8
+ Binding sites in proteins, including cleavage sites (for a proteolytic enzyme or agent), key residues involved in protein folding, catalytic residues (active site) of an enzyme, ligand-binding (non-catalytic) residues of a protein, such as sites that bind metal, prosthetic groups or lipids, RNA and DNA-binding proteins and binding sites etc.
+ Protein_binding_sites
+ Enzyme active site
+ Protein cleavage sites
+ Protein functional sites
+ Protein key folding sites
+ Protein-nucleic acid binding sites
+
+
+ Protein binding sites
+
+
+
+
+
+
+
+
+
+ 1.8
+ 1.13
+
+ RNA and DNA-binding proteins and binding sites in protein sequences.
+
+
+ Protein-nucleic acid binding sites
+ true
+
+
+
+
+
+
+
+
+ 1.8
+ 1.13
+
+ Cleavage sites (for a proteolytic enzyme or agent) in a protein sequence.
+
+
+ Protein cleavage sites
+ true
+
+
+
+
+
+
+
+
+ 1.8
+ 1.13
+
+ Chemical modification of a protein.
+
+
+ Protein chemical modifications
+ true
+
+
+
+
+
+
+
+
+ 1.8
+ Disordered structure in a protein.
+ Protein features (disordered structure)
+ Protein_disordered_structure
+
+
+ Protein disordered structure
+
+
+
+
+
+
+
+
+
+ 1.8
+ 1.13
+
+ Structural domains or 3D folds in a protein or polypeptide chain.
+
+
+ Protein domains
+ true
+
+
+
+
+
+
+
+
+ 1.8
+ 1.13
+
+ Key residues involved in protein folding.
+
+
+ Protein key folding sites
+ true
+
+
+
+
+
+
+
+
+ 1.8
+ 1.13
+
+ Post-translation modifications in a protein sequence, typically describing the specific sites involved.
+
+
+ Protein post-translational modifications
+ true
+
+
+
+
+
+
+
+
+ 1.8
+ Secondary structure (predicted or real) of a protein, including super-secondary structure.
+ Protein features (secondary structure)
+ Protein_secondary_structure
+ Protein super-secondary structure
+
+
+ Super-secondary structures include leucine zippers, coiled coils, Helix-Turn-Helix etc.
+ The location and size of the secondary structure elements and intervening loop regions is typically given. The report can include disulphide bonds and post-translationally formed peptide bonds (crosslinks).
+ Protein secondary structure
+
+
+
+
+
+
+
+
+
+ 1.8
+ 1.13
+
+ Short repetitive subsequences (repeat sequences) in a protein sequence.
+
+
+ Protein sequence repeats
+ true
+
+
+
+
+
+
+
+
+ 1.8
+ 1.13
+
+ Signal peptides or signal peptide cleavage sites in protein sequences.
+
+
+ Protein signal peptides
+ true
+
+
+
+
+
+
+
+
+ 1.10
+ VT 1.1.1 Applied mathematics
+ The application of mathematics to specific problems in science, typically by the formulation and analysis of mathematical models.
+ Applied_mathematics
+
+
+ Applied mathematics
+
+
+
+
+
+
+
+
+
+ 1.10
+ VT 1.1.1 Pure mathematics
+ The study of abstract mathematical concepts.
+ Pure_mathematics
+ Linear algebra
+
+
+ Pure mathematics
+
+
+
+
+
+
+
+
+
+ 1.10
+ The control of data entry and maintenance to ensure the data meets defined standards, qualities or constraints.
+ Data_governance
+ Data stewardship
+
+
+ Data governance
+
+ http://purl.bioontology.org/ontology/MSH/D030541
+
+
+
+
+
+
+
+
+ 1.10
+ The quality, integrity, and cleaning up of data.
+ Data_quality_management
+ Data clean-up
+ Data cleaning
+ Data integrity
+ Data quality
+
+
+ Data quality management
+
+
+
+
+
+
+
+
+
+ 1.10
+ Freshwater science
+ VT 1.5.18 Marine and Freshwater biology
+ The study of organisms in freshwater ecosystems.
+ Freshwater_biology
+
+
+
+ Freshwater biology
+
+
+
+
+
+
+
+
+
+ 1.10
+ true
+ VT 3.1.2 Human genetics
+ The study of inheritance in human beings.
+ Human_genetics
+
+
+
+ Human genetics
+
+
+
+
+
+
+
+
+
+ 1.10
+ VT 3.3.14 Tropical medicine
+ Health problems that are prevalent in tropical and subtropical regions.
+ Tropical_medicine
+
+
+
+ Tropical medicine
+
+
+
+
+
+
+
+
+
+ 1.10
+ true
+ VT 3.3.14 Tropical medicine
+ VT 3.4 Medical biotechnology
+ VT 3.4.1 Biomedical devices
+ VT 3.4.2 Health-related biotechnology
+ Biotechnology applied to the medical sciences and the development of medicines.
+ Medical_biotechnology
+ Pharmaceutical biotechnology
+
+
+
+ Medical biotechnology
+
+
+
+
+
+
+
+
+
+ 1.10
+ true
+ VT 3.4.5 Molecular diagnostics
+ An approach to medicine whereby decisions, practices and are tailored to the individual patient based on their predicted response or risk of disease.
+ Precision medicine
+ Personalised_medicine
+ Molecular diagnostics
+
+
+
+ Personalised medicine
+
+
+
+
+
+
+
+
+
+ 1.12
+ Experimental techniques to purify a protein-DNA crosslinked complex. Usually sequencing follows e.g. in the techniques ChIP-chip, ChIP-seq and MeDIP-seq.
+ Chromatin immunoprecipitation
+ Immunoprecipitation_experiment
+
+
+ Immunoprecipitation experiment
+
+
+
+
+
+
+
+
+
+ 1.12
+ Laboratory technique to sequence the complete DNA sequence of an organism's genome at a single time.
+ Genome sequencing
+ WGS
+ Whole_genome_sequencing
+ De novo genome sequencing
+ Whole genome resequencing
+
+
+ Whole genome sequencing
+
+
+
+
+
+
+
+
+
+ 1.12
+
+ Laboratory technique to sequence the methylated regions in DNA.
+ MeDIP-chip
+ MeDIP-seq
+ mDIP
+ Methylated_DNA_immunoprecipitation
+ BS-Seq
+ Bisulfite sequencing
+ MeDIP
+ Methylated DNA immunoprecipitation (MeDIP)
+ Methylation sequencing
+ WGBS
+ Whole-genome bisulfite sequencing
+ methy-seq
+ methyl-seq
+
+
+ Methylated DNA immunoprecipitation
+
+
+
+
+
+
+
+
+
+ 1.12
+ Laboratory technique to sequence all the protein-coding regions in a genome, i.e., the exome.
+ Exome
+ Exome analysis
+ Exome capture
+ Targeted exome capture
+ WES
+ Whole exome sequencing
+ Exome_sequencing
+
+
+ Exome sequencing is considered a cheap alternative to whole genome sequencing.
+ Exome sequencing
+
+
+
+
+
+
+
+
+
+ 1.12
+
+ true
+ The design of an experiment intended to test a hypothesis, and describe or explain empirical data obtained under various experimental conditions.
+ Design of experiments
+ Experimental design
+ Studies
+ Experimental_design_and_studies
+
+
+ Experimental design and studies
+
+
+
+
+
+
+
+
+
+
+ 1.12
+ The design of an experiment involving non-human animals.
+ Animal_study
+ Challenge study
+
+
+ Animal study
+
+
+
+
+
+
+
+
+
+
+ 1.13
+ true
+ The ecology of microorganisms including their relationship with one another and their environment.
+ Environmental microbiology
+ Microbial_ecology
+ Community analysis
+ Microbiome
+ Molecular community analysis
+
+
+ Microbial ecology
+
+
+
+
+
+
+
+
+
+ 1.17
+ An antibody-based technique used to map in vivo RNA-protein interactions.
+ RIP
+ RNA_immunoprecipitation
+ CLIP
+ CLIP-seq
+ HITS-CLIP
+ PAR-CLIP
+ iCLIP
+
+
+ RNA immunoprecipitation
+
+
+
+
+
+
+
+
+
+ 1.17
+ Large-scale study (typically comparison) of DNA sequences of populations.
+ Population_genomics
+
+
+
+ Population genomics
+
+
+
+
+
+
+
+
+
+ 1.20
+ Agriculture
+ Agroecology
+ Agronomy
+ Multidisciplinary study, research and development within the field of agriculture.
+ Agricultural_science
+ Agricultural biotechnology
+ Agricultural economics
+ Animal breeding
+ Animal husbandry
+ Animal nutrition
+ Farming systems research
+ Food process engineering
+ Food security
+ Horticulture
+ Phytomedicine
+ Plant breeding
+ Plant cultivation
+ Plant nutrition
+ Plant pathology
+ Soil science
+
+
+ Agricultural science
+
+
+
+
+
+
+
+
+
+ 1.20
+ Approach which samples, in parallel, all genes in all organisms present in a given sample, e.g. to provide insight into biodiversity and function.
+ Shotgun metagenomic sequencing
+ Metagenomic_sequencing
+
+
+ Metagenomic sequencing
+
+
+
+
+
+
+
+
+
+ 1.21
+ Study of the environment, the interactions between its physical, chemical, and biological components and it's effect on life. Also how humans impact upon the environment, and how we can manage and utilise natural resources.
+ Environment
+ Environmental_science
+
+
+ Environmental sciences
+
+
+
+
+
+
+
+
+
+ 1.22
+ The study and simulation of molecular conformations using a computational model and computer simulations.
+
+
+ This includes methods such as Molecular Dynamics, Coarse-grained dynamics, metadynamics, Quantum Mechanics, QM/MM, Markov State Models, etc.
+ Biomolecular simulation
+
+
+
+
+
+
+
+
+
+ 1.22
+ The application of multi-disciplinary science and technology for the construction of artificial biological systems for diverse applications.
+ Biomimeic chemistry
+
+
+ Synthetic biology
+
+
+
+
+
+
+
+
+
+
+ 1.22
+ The application of biotechnology to directly manipulate an organism's genes.
+ Genetic manipulation
+ Genetic modification
+ Genetic_engineering
+ Genome editing
+ Genome engineering
+
+
+ Genetic engineering
+
+
+
+
+
+
+
+
+
+ 1.24
+ A field of biological research focused on the discovery and identification of peptides, typically by comparing mass spectra against a protein database.
+ Proteogenomics
+
+
+ Proteogenomics
+
+
+
+
+
+
+
+
+
+ 1.24
+ Laboratory experiment to identify the differences between a specific genome (of an individual) and a reference genome (developed typically from many thousands of individuals). WGS re-sequencing is used as golden standard to detect variations compared to a given reference genome, including small variants (SNP and InDels) as well as larger genome re-organisations (CNVs, translocations, etc.).
+ Resequencing
+ Amplicon panels
+ Amplicon sequencing
+ Amplicon-based sequencing
+ Highly targeted resequencing
+ Whole genome resequencing (WGR)
+ Whole-genome re-sequencing (WGSR)
+ Amplicon sequencing is the ultra-deep sequencing of PCR products (amplicons), usually for the purpose of efficient genetic variant identification and characterisation in specific genomic regions.
+ Ultra-deep sequencing
+ Genome resequencing
+
+
+
+
+
+
+
+
+
+ 1.24
+ A biomedical field that bridges immunology and genetics, to study the genetic basis of the immune system.
+ Immune system genetics
+ Immungenetics
+ Immunology and genetics
+ Immunogenetics
+ Immunogenes
+
+
+ This involves the study of often complex genetic traits underlying diseases involving defects in the immune system. For example, identifying target genes for therapeutic approaches, or genetic variations involved in immunological pathology.
+ Immunogenetics
+
+
+
+
+
+
+
+
+
+ 1.24
+ Interdisciplinary science focused on extracting information from chemical systems by data analytical approaches, for example multivariate statistics, applied mathematics, and computer science.
+ Chemometrics
+
+
+ Chemometrics
+
+
+
+
+
+
+
+
+
+ 1.24
+ Cytometry is the measurement of the characteristics of cells.
+ Cytometry
+ Flow cytometry
+ Image cytometry
+ Mass cytometry
+
+
+ Cytometry
+
+
+
+
+
+
+
+
+
+ 1.24
+ Biotechnology approach that seeks to optimize cellular genetic and regulatory processes in order to increase the cells' production of a certain substance.
+
+
+ Metabolic engineering
+
+
+
+
+
+
+
+
+
+ 1.24
+ Molecular biology methods used to analyze the spatial organization of chromatin in a cell.
+ 3C technologies
+ 3C-based methods
+ Chromosome conformation analysis
+ Chromosome_conformation_capture
+ Chromatin accessibility
+ Chromatin accessibility assay
+ Chromosome conformation capture
+
+
+
+
+
+
+
+
+
+
+ 1.24
+ The study of microbe gene expression within natural environments (i.e. the metatranscriptome).
+ Metatranscriptomics
+
+
+ Metatranscriptomics methods can be used for whole gene expression profiling of complex microbial communities.
+ Metatranscriptomics
+
+
+
+
+
+
+
+
+
+ 1.24
+ The reconstruction and analysis of genomic information in extinct species.
+ Paleogenomics
+ Ancestral genomes
+ Paleogenetics
+ Paleogenomics
+
+
+
+
+
+
+
+
+
+
+ 1.24
+ The biological classification of organisms by categorizing them in groups ("clades") based on their most recent common ancestor.
+ Cladistics
+ Tree of life
+
+
+ Cladistics
+
+
+
+
+
+
+
+
+
+
+
+ 1.24
+ The study of the process and mechanism of change of biomolecules such as DNA, RNA, and proteins across generations.
+ Molecular_evolution
+
+
+ Molecular evolution
+
+
+
+
+
+
+
+
+
+
+ 1.24
+ Immunoinformatics is the field of computational biology that deals with the study of immunoloogical questions. Immunoinformatics is at the interface between immunology and computer science. It takes advantage of computational, statistical, mathematical approaches and enhances the understanding of immunological knowledge.
+ Computational immunology
+ Immunoinformatics
+ This involves the study of often complex genetic traits underlying diseases involving defects in the immune system. For example, identifying target genes for therapeutic approaches, or genetic variations involved in immunological pathology.
+ Immunoinformatics
+
+
+
+
+
+
+
+
+
+ 1.24
+ A diagnostic imaging technique based on the application of ultrasound.
+ Standardized echography
+ Ultrasound imaging
+ Echography
+ Diagnostic sonography
+ Medical ultrasound
+ Standard echography
+ Ultrasonography
+
+
+ Echography
+
+
+
+
+
+
+
+
+
+ 1.24
+ Experimental approaches to determine the rates of metabolic reactions - the metabolic fluxes - within a biological entity.
+ Fluxomics
+ The "fluxome" is the complete set of metabolic fluxes in a cell, and is a dynamic aspect of phenotype.
+ Fluxomics
+
+
+
+
+
+
+
+
+
+ 1.12
+ An experiment for studying protein-protein interactions.
+ Protein_interaction_experiment
+ Co-immunoprecipitation
+ Phage display
+ Yeast one-hybrid
+ Yeast two-hybrid
+
+
+ This used to have the ID http://edamontology.org/topic_3557 but the numerical part (owing to an error) duplicated http://edamontology.org/operation_3557 ('Imputation'). ID of this concept set to http://edamontology.org/topic_3957 in EDAM 1.24.
+ Protein interaction experiment
+
+
+
+
+
+
+
+
+
+ 1.25
+ A DNA structural variation, specifically a duplication or deletion event, resulting in sections of the genome to be repeated, or the number of repeats in the genome to vary between individuals.
+ Copy_number_variation
+ CNV deletion
+ CNV duplication
+ CNV insertion / amplification
+ Complex CNV
+ Copy number variant
+ Copy number variation
+
+
+
+
+
+
+
+
+
+ 1.25
+ The branch of genetics concerned with the relationships between chromosomes and cellular behaviour, especially during mitosis and meiosis.
+
+
+ Cytogenetics
+
+
+
+
+
+
+
+
+
+ 1.25
+ The design of vaccines to protect against a particular pathogen, including antigens, delivery systems, and adjuvants to elicit a predictable immune response against specific epitopes.
+ Vaccinology
+ Rational vaccine design
+ Reverse vaccinology
+ Structural vaccinology
+ Structure-based immunogen design
+ Vaccine design
+
+
+ Vaccinology
+
+
+
+
+
+
+
+
+
+ 1.25
+ The study of immune system as a whole, its regulation and response to pathogens using genome-wide approaches.
+
+
+ Immunomics
+
+
+
+
+
+
+
+
+
+ 1.25
+ Epistatic genetic interaction
+ Epistatic interactions
+
+
+ Epistasis can be defined as the ability of the genotype at one locus to supersede the phenotypic effect of a mutation at another locus. This interaction between genes can occur at different level: gene expression, protein levels, etc...
+ Epistasis
+
+ http://purl.bioontology.org/ontology/MSH/D004843
+
+
+
+
+
+
+
+
+ 1.26
+
+ Open science encompasses the practices of making scientific research transparent and participatory, and its outputs publicly accessible.
+
+
+ Open science
+
+
+
+
+
+
+
+ 1.26
+ Data rescue
+ Data rescue denotes digitalisation, formatting, archival, and publication of data that were not available in accessible or usable form. Examples are data from private archives, data inside publications, or in paper records stored privately or publicly.
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.26
+
+
+ FAIR data is data that meets the principles of being findable, accessible, interoperable, and reusable.
+ Findable, accessible, interoperable, reusable data
+ Open data
+ FAIR data principles
+ FAIRification
+ A substantially overlapping term is 'open data', i.e. publicly available data that is free to use, distribute, and create derivative work from, without restrictions. Open data does not automatically have to be FAIR (e.g. findable or interoperable), while FAIR data does in some cases not have to be publicly available without restrictions (especially sensitive personal data).
+
+
+ FAIR data
+
+
+
+
+
+
+
+ 1.26
+ Antimicrobial Resistance
+ AMR
+ Antifungal resistance
+ Antiviral resistance
+ Antiprotozoal resistance
+ Multiple drug resistance (MDR)
+ Multidrug resistance
+ Multiresistance
+ Extensive drug resistance (XDR)
+ Pandrug resistance (PDR)
+ Total drug resistance (TDR)
+
+
+ Microbial mechanisms for protecting microorganisms against antimicrobial agents.
+
+
+
+
+
+
+
+
+
+
+ 1.26
+ Electroencephalography
+ EEG
+
+
+ The monitoring method for measuring electrical activity in the brain.
+
+
+
+
+
+
+
+
+
+
+ 1.26
+ Electrocardiography
+ EKG
+ ECG
+
+
+ The monitoring method for measuring electrical activity in the heart.
+
+
+
+
+
+
+
+
+
+
+ 1.26
+ Cryogenic electron microscopy
+ cryo-EM
+
+
+ A method for studying biomolecules and other structures at very low (cryogenic) temperature using electron microscopy.
+
+
+
+
+
+
+
+
+
+ 1.26
+ Biosciences
+ Life sciences
+
+
+ Biosciences, or life sciences, include fields of study related to life, living beings, and biomolecules.
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.26
+ The carbon cycle is the biogeochemical pathway of carbon moving through the different parts of the Earth (such as ocean, atmosphere, soil), or eventually another planet.
+ Biogeochemical cycle
+
+
+ Carbon cycle
+
+
+ Note that the carbon-nitrogen-oxygen (CNO) cycle (https://en.wikipedia.org/wiki/CNO_cycle) is a completely different, thermonuclear reaction in stars.
+
+
+
+
+
+
+ 1.26
+ Multiomics
+ Panomics
+ Pan-omics
+ Integrative omics
+ Multi-omics
+
+
+ Multiomics concerns integration of data from multiple omics (e.g. transcriptomics, proteomics, epigenomics).
+
+
+
+
+
+
+
+
+
+
+ 1.26
+
+ Ribosome Profiling
+ RIBO-seq
+ ribo-seq
+ RiboSeq
+ Ribo-Seq
+ ribosomal footprinting
+ translation footprinting
+
+
+ With ribosome profiling, ribosome-protected mRNA fragments are analyzed with RNA-seq techniques leading to a genome-wide measurement of the translation landscape.
+
+
+
+
+
+
+
+
+ 1.26
+
+ Single-Cell Sequencing
+ Single Cell Genomics
+
+
+ Combined with NGS (Next Generation Sequencing) technologies, single-cell sequencing allows the study of genetic information (DNA, RNA, epigenome...) at a single cell level. It is often used for differential analysis and gene expression profiling.
+
+
+
+
+
+
+
+
+
+ 1.26
+
+ Acoustics
+
+
+ The study of mechanical waves in liquids, solids, and gases.
+
+
+
+
+
+
+ 1.26
+
+
+
+ Microfluidics
+ Fluidics
+
+
+ Interdisplinary study of behavior, precise control, and manipulation of low (microlitre) volume fluids in constrained space.
+
+
+
+
+
+
+
+
+ 1.26
+ Genomic imprinting
+ Gene imprinting
+
+
+ Genomic imprinting is a gene regulation mechanism by which a subset of genes are expressed from one of the two parental chromosomes only. Imprinted genes are organized in clusters, their silencing/activation of the imprinted loci involves epigenetic marks (DNA methylation, etc) and so-called imprinting control regions (ICR). It has been described in mammals, but also plants and insects.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.26
+ Environmental DNA (eDNA)
+ Environmental RNA (eRNA)
+ Environmental sequencing
+ Taxonomic profiling
+ Metabarcoding is the barcoding of (environmental) DNA or RNA to identify multiple taxa from the same sample.
+ DNA metabarcoding
+ Environmental metabarcoding
+ RNA metabarcoding
+ eDNA metabarcoding
+ eRNA metabarcoding
+
+
+ Typically, high-throughput sequencing is performed and the resulting sequence reads are matched to DNA barcodes in a reference database.
+ Metabarcoding
+
+
+
+
+
+
+
+
+
+
+
+ 1.2
+
+ An obsolete concept (redefined in EDAM).
+
+ Needed for conversion to the OBO format.
+ Obsolete concept (EDAM)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/edamfu/tests/EDAM_dev.robot.owl b/edamfu/tests/EDAM_dev.robot.owl
new file mode 100644
index 0000000..a7f48c6
--- /dev/null
+++ b/edamfu/tests/EDAM_dev.robot.owl
@@ -0,0 +1,61094 @@
+
+
+
+
+ 4040
+
+ 03.10.2023 11:14 UTC
+ EDAM http://edamontology.org/ "EDAM relations, concept properties, and subsets"
+ EDAM_data http://edamontology.org/data_ "EDAM types of data"
+ EDAM_format http://edamontology.org/format_ "EDAM data formats"
+ EDAM_operation http://edamontology.org/operation_ "EDAM operations"
+ EDAM_topic http://edamontology.org/topic_ "EDAM topics"
+ EDAM is a community project and its development can be followed and contributed to at https://github.com/edamontology/edamontology.
+ EDAM is particularly suitable for semantic annotations and categorisation of diverse resources related to data analysis and management: e.g. tools, workflows, learning materials, or standards. EDAM is also useful in data management itself, for recording provenance metadata of processed data.
+ https://github.com/edamontology/edamontology/graphs/contributors and many more!
+ Hervé Ménager
+ Jon Ison
+ Matúš Kalaš
+ EDAM is a domain ontology of data analysis and data management in bio- and other sciences, and science-based applications. It comprises concepts related to analysis, modelling, optimisation, and data life-cycle. Targetting usability by diverse users, the structure of EDAM is relatively simple, divided into 4 main sections: Topic, Operation, Data (incl. Identifier), and Format.
+ application/rdf+xml
+ EDAM - The ontology of data analysis and management
+
+
+ 1.26_dev
+
+
+
+
+
+
+
+
+
+ Matúš Kalaš
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.13
+ true
+ Publication reference
+ 'Citation' concept property ('citation' metadata tag) contains a dereferenceable URI, preferably including a DOI, pointing to a citeable publication of the given data format.
+ Publication
+
+ Citation
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true
+ Version in which a concept was created.
+
+ Created in
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true
+ A comment explaining why the comment should be or was deprecated, including name of person commenting (jison, mkalas etc.).
+
+ deprecation_comment
+
+
+
+
+
+
+
+ true
+ 'Documentation' trailing modifier (qualifier, 'documentation') of 'xref' links of 'Format' concepts. When 'true', the link is pointing to a page with explanation, description, documentation, or specification of the given data format.
+ Specification
+
+ Documentation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true
+ 'Example' concept property ('example' metadata tag) lists examples of valid values of types of identifiers (accessions). Applicable to some other types of data, too.
+
+ Separated by bar ('|'). For more complex data and data formats, it can be a link to a website with examples, instead.
+ Example
+
+
+
+
+
+
+
+ true
+ 'File extension' concept property ('file_extension' metadata tag) lists examples of usual file extensions of formats.
+
+ N.B.: File extensions that are not correspondigly defined at http://filext.com are recorded in EDAM only if not in conflict with http://filext.com, and/or unique and usual within life-science computing.
+ Separated by bar ('|'), without a dot ('.') prefix, preferably not all capital characters.
+ File extension
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true
+ 'Information standard' trailing modifier (qualifier, 'information_standard') of 'xref' links of 'Format' concepts. When 'true', the link is pointing to an information standard supported by the given data format.
+ Minimum information checklist
+ Minimum information standard
+
+ "Supported by the given data format" here means, that the given format enables representation of data that satisfies the information standard.
+ Information standard
+
+
+
+
+
+
+
+ true
+ When 'true', the concept has been proposed to be deprecated.
+
+ deprecation_candidate
+
+
+
+
+
+
+
+ true
+ When 'true', the concept has been proposed to be refactored.
+
+ refactor_candidate
+
+
+
+
+
+
+
+ true
+ When 'true', the concept has been proposed or is supported within Debian as a tag.
+
+ isdebtag
+
+
+
+
+
+
+
+ true
+ 'Media type' trailing modifier (qualifier, 'media_type') of 'xref' links of 'Format' concepts. When 'true', the link is pointing to a page specifying a media type of the given data format.
+ MIME type
+
+ Media type
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true
+ Whether terms associated with this concept are recommended for use in annotation.
+
+ notRecommendedForAnnotation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true
+ Version in which a concept was made obsolete.
+
+ Obsolete since
+
+
+
+
+
+
+
+ true
+ EDAM concept URI of the erstwhile "parent" of a now deprecated concept.
+
+ Old parent
+
+
+
+
+
+
+
+ true
+ EDAM concept URI of an erstwhile related concept (by has_input, has_output, has_topic, is_format_of, etc.) of a now deprecated concept.
+
+ Old related
+
+
+
+
+
+
+
+ true
+ 'Ontology used' concept property ('ontology_used' metadata tag) of format concepts links to a domain ontology that is used inside the given data format, or contains a note about ontology use within the format.
+
+ Ontology used
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true
+ 'Organisation' trailing modifier (qualifier, 'organisation') of 'xref' links of 'Format' concepts. When 'true', the link is pointing to an organisation that developed, standardised, and maintains the given data format.
+ Organization
+
+ Organisation
+
+
+
+
+
+
+
+ true
+ A comment explaining the proposed refactoring, including name of person commenting (jison, mkalas etc.).
+
+ refactor_comment
+
+
+
+
+
+
+
+ true
+ 'Regular expression' concept property ('regex' metadata tag) specifies the allowed values of types of identifiers (accessions). Applicable to some other types of data, too.
+
+ Regular expression
+
+
+
+
+
+
+
+ 'Related term' concept property ('related_term'; supposedly a synonym modifier in OBO format) states a related term - not necessarily closely semantically related - that users (also non-specialists) may use when searching.
+
+ Related term
+
+
+
+
+
+
+
+
+ true
+ 'Repository' trailing modifier (qualifier, 'repository') of 'xref' links of 'Format' concepts. When 'true', the link is pointing to the public source-code repository where the given data format is developed or maintained.
+ Public repository
+ Source-code repository
+
+ Repository
+
+
+
+
+
+
+
+ true
+ Name of thematic editor (http://biotools.readthedocs.io/en/latest/governance.html#registry-editors) responsible for this concept and its children.
+
+ thematic_editor
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ false
+ false
+ false
+ OBO_REL:is_a
+ 'A has_format B' defines for the subject A, that it has the object B as its data format.
+
+ false
+ Subject A can be any concept or entity outside of an ontology (or an ontology concept in a role of an entity being semantically annotated) that is (or is in a role of) 'Data', or an input, output, input or output argument of an 'Operation'. Object B can either be a concept that is a 'Format', or in unexpected cases an entity outside of an ontology that is a 'Format' or is in the role of a 'Format'. In EDAM, 'has_format' is not explicitly defined between EDAM concepts, only the inverse 'is_format_of'.
+ has format
+
+
+
+
+
+
+
+
+
+
+
+ false
+ false
+ false
+ OBO_REL:is_a
+ 'A has_function B' defines for the subject A, that it has the object B as its function.
+ OBO_REL:bearer_of
+
+ true
+ Subject A can be any concept or entity outside of an ontology (or an ontology concept in a role of an entity being semantically annotated). Object B can either be a concept that is (or is in a role of) a function, or an entity outside of an ontology that is (or is in a role of) a function specification. In the scope of EDAM, 'has_function' serves only for relating annotated entities outside of EDAM with 'Operation' concepts.
+ has function
+
+
+
+
+
+
+
+ OBO_REL:bearer_of
+ Is defined anywhere? Not in the 'unknown' version of RO. 'OBO_REL:bearer_of' is narrower in the sense that it only relates ontological categories (concepts) that are an 'independent_continuant' (snap:IndependentContinuant) with ontological categories that are a 'specifically_dependent_continuant' (snap:SpecificallyDependentContinuant), and broader in the sense that it relates with any borne objects not just functions of the subject.
+
+
+
+
+ true
+ In very unusual cases.
+
+
+
+
+
+
+
+
+
+
+ false
+ false
+ false
+ OBO_REL:is_a
+ 'A has_identifier B' defines for the subject A, that it has the object B as its identifier.
+
+ false
+ Subject A can be any concept or entity outside of an ontology (or an ontology concept in a role of an entity being semantically annotated). Object B can either be a concept that is an 'Identifier', or an entity outside of an ontology that is an 'Identifier' or is in the role of an 'Identifier'. In EDAM, 'has_identifier' is not explicitly defined between EDAM concepts, only the inverse 'is_identifier_of'.
+ has identifier
+
+
+
+
+
+
+
+
+
+
+ false
+ false
+ false
+ OBO_REL:is_a
+ 'A has_input B' defines for the subject A, that it has the object B as a necessary or actual input or input argument.
+ OBO_REL:has_participant
+
+ true
+ Subject A can either be concept that is or has an 'Operation' function, or an entity outside of an ontology (or an ontology concept in a role of an entity being semantically annotated) that has an 'Operation' function or is an 'Operation'. Object B can be any concept or entity. In EDAM, only 'has_input' is explicitly defined between EDAM concepts ('Operation' 'has_input' 'Data'). The inverse, 'is_input_of', is not explicitly defined.
+ has input
+
+
+
+
+
+
+ OBO_REL:has_participant
+ 'OBO_REL:has_participant' is narrower in the sense that it only relates ontological categories (concepts) that are a 'process' (span:Process) with ontological categories that are a 'continuant' (snap:Continuant), and broader in the sense that it relates with any participating objects not just inputs or input arguments of the subject.
+
+
+
+
+ true
+ In very unusual cases.
+
+
+
+
+
+
+
+
+
+
+ false
+ false
+ false
+ OBO_REL:is_a
+ 'A has_output B' defines for the subject A, that it has the object B as a necessary or actual output or output argument.
+ OBO_REL:has_participant
+
+ true
+ Subject A can either be concept that is or has an 'Operation' function, or an entity outside of an ontology (or an ontology concept in a role of an entity being semantically annotated) that has an 'Operation' function or is an 'Operation'. Object B can be any concept or entity. In EDAM, only 'has_output' is explicitly defined between EDAM concepts ('Operation' 'has_output' 'Data'). The inverse, 'is_output_of', is not explicitly defined.
+ has output
+
+
+
+
+
+
+ OBO_REL:has_participant
+ 'OBO_REL:has_participant' is narrower in the sense that it only relates ontological categories (concepts) that are a 'process' (span:Process) with ontological categories that are a 'continuant' (snap:Continuant), and broader in the sense that it relates with any participating objects not just outputs or output arguments of the subject. It is also not clear whether an output (result) actually participates in the process that generates it.
+
+
+
+
+ true
+ In very unusual cases.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ false
+ false
+ false
+ OBO_REL:is_a
+ 'A has_topic B' defines for the subject A, that it has the object B as its topic (A is in the scope of a topic B).
+
+ true
+ Subject A can be any concept or entity outside of an ontology (or an ontology concept in a role of an entity being semantically annotated). Object B can either be a concept that is a 'Topic', or in unexpected cases an entity outside of an ontology that is a 'Topic' or is in the role of a 'Topic'. In EDAM, only 'has_topic' is explicitly defined between EDAM concepts ('Operation' or 'Data' 'has_topic' 'Topic'). The inverse, 'is_topic_of', is not explicitly defined.
+ has topic
+
+
+
+
+
+
+
+
+ true
+ In very unusual cases.
+
+
+
+
+
+
+
+
+
+ false
+ false
+ false
+ OBO_REL:is_a
+ 'A is_format_of B' defines for the subject A, that it is a data format of the object B.
+ OBO_REL:quality_of
+
+ false
+ Subject A can either be a concept that is a 'Format', or in unexpected cases an entity outside of an ontology (or an ontology concept in a role of an entity being semantically annotated) that is a 'Format' or is in the role of a 'Format'. Object B can be any concept or entity outside of an ontology that is (or is in a role of) 'Data', or an input, output, input or output argument of an 'Operation'. In EDAM, only 'is_format_of' is explicitly defined between EDAM concepts ('Format' 'is_format_of' 'Data'). The inverse, 'has_format', is not explicitly defined.
+ is format of
+
+
+
+
+
+ OBO_REL:quality_of
+ Is defined anywhere? Not in the 'unknown' version of RO. 'OBO_REL:quality_of' might be seen narrower in the sense that it only relates subjects that are a 'quality' (snap:Quality) with objects that are an 'independent_continuant' (snap:IndependentContinuant), and is broader in the sense that it relates any qualities of the object.
+
+
+
+
+
+
+
+
+ false
+ false
+ false
+ OBO_REL:is_a
+ 'A is_function_of B' defines for the subject A, that it is a function of the object B.
+ OBO_REL:function_of
+ OBO_REL:inheres_in
+
+ true
+ Subject A can either be concept that is (or is in a role of) a function, or an entity outside of an ontology (or an ontology concept in a role of an entity being semantically annotated) that is (or is in a role of) a function specification. Object B can be any concept or entity. Within EDAM itself, 'is_function_of' is not used.
+ is function of
+
+
+
+
+
+
+ OBO_REL:function_of
+ Is defined anywhere? Not in the 'unknown' version of RO. 'OBO_REL:function_of' only relates subjects that are a 'function' (snap:Function) with objects that are an 'independent_continuant' (snap:IndependentContinuant), so for example no processes. It does not define explicitly that the subject is a function of the object.
+
+
+
+
+ OBO_REL:inheres_in
+ Is defined anywhere? Not in the 'unknown' version of RO. 'OBO_REL:inheres_in' is narrower in the sense that it only relates ontological categories (concepts) that are a 'specifically_dependent_continuant' (snap:SpecificallyDependentContinuant) with ontological categories that are an 'independent_continuant' (snap:IndependentContinuant), and broader in the sense that it relates any borne subjects not just functions.
+
+
+
+
+ true
+ In very unusual cases.
+
+
+
+
+
+
+
+
+
+ false
+ false
+ false
+ OBO_REL:is_a
+ 'A is_identifier_of B' defines for the subject A, that it is an identifier of the object B.
+
+ false
+ Subject A can either be a concept that is an 'Identifier', or an entity outside of an ontology (or an ontology concept in a role of an entity being semantically annotated) that is an 'Identifier' or is in the role of an 'Identifier'. Object B can be any concept or entity outside of an ontology. In EDAM, only 'is_identifier_of' is explicitly defined between EDAM concepts (only 'Identifier' 'is_identifier_of' 'Data'). The inverse, 'has_identifier', is not explicitly defined.
+ is identifier of
+
+
+
+
+
+
+
+
+
+ false
+ false
+ false
+ OBO_REL:is_a
+ 'A is_input_of B' defines for the subject A, that it as a necessary or actual input or input argument of the object B.
+ OBO_REL:participates_in
+
+ true
+ Subject A can be any concept or entity outside of an ontology (or an ontology concept in a role of an entity being semantically annotated). Object B can either be a concept that is or has an 'Operation' function, or an entity outside of an ontology that has an 'Operation' function or is an 'Operation'. In EDAM, 'is_input_of' is not explicitly defined between EDAM concepts, only the inverse 'has_input'.
+ is input of
+
+
+
+
+
+
+ OBO_REL:participates_in
+ 'OBO_REL:participates_in' is narrower in the sense that it only relates ontological categories (concepts) that are a 'continuant' (snap:Continuant) with ontological categories that are a 'process' (span:Process), and broader in the sense that it relates any participating subjects not just inputs or input arguments.
+
+
+
+
+ true
+ In very unusual cases.
+
+
+
+
+
+
+
+
+
+ false
+ false
+ false
+ OBO_REL:is_a
+ 'A is_output_of B' defines for the subject A, that it as a necessary or actual output or output argument of the object B.
+ OBO_REL:participates_in
+
+ true
+ Subject A can be any concept or entity outside of an ontology (or an ontology concept in a role of an entity being semantically annotated). Object B can either be a concept that is or has an 'Operation' function, or an entity outside of an ontology that has an 'Operation' function or is an 'Operation'. In EDAM, 'is_output_of' is not explicitly defined between EDAM concepts, only the inverse 'has_output'.
+ is output of
+
+
+
+
+
+
+ OBO_REL:participates_in
+ 'OBO_REL:participates_in' is narrower in the sense that it only relates ontological categories (concepts) that are a 'continuant' (snap:Continuant) with ontological categories that are a 'process' (span:Process), and broader in the sense that it relates any participating subjects not just outputs or output arguments. It is also not clear whether an output (result) actually participates in the process that generates it.
+
+
+
+
+ true
+ In very unusual cases.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ false
+ false
+ false
+ OBO_REL:is_a
+ 'A is_topic_of B' defines for the subject A, that it is a topic of the object B (a topic A is the scope of B).
+ OBO_REL:quality_of
+
+ true
+ Subject A can either be a concept that is a 'Topic', or in unexpected cases an entity outside of an ontology (or an ontology concept in a role of an entity being semantically annotated) that is a 'Topic' or is in the role of a 'Topic'. Object B can be any concept or entity outside of an ontology. In EDAM, 'is_topic_of' is not explicitly defined between EDAM concepts, only the inverse 'has_topic'.
+ is topic of
+
+
+
+
+
+ OBO_REL:quality_of
+ Is defined anywhere? Not in the 'unknown' version of RO. 'OBO_REL:quality_of' might be seen narrower in the sense that it only relates subjects that are a 'quality' (snap:Quality) with objects that are an 'independent_continuant' (snap:IndependentContinuant), and is broader in the sense that it relates any qualities of the object.
+
+
+
+
+ true
+ In very unusual cases.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A type of computational resource used in bioinformatics.
+
+ Resource type
+ true
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Information, represented in an information artefact (data record) that is 'understandable' by dedicated computational tools that can use the data as input or produce it as output.
+ Data record
+ Data set
+ Datum
+
+
+ Data
+
+
+
+
+
+
+
+
+
+
+
+
+ Data record
+ EDAM does not distinguish a data record (a tool-understandable information artefact) from data or datum (its content, the tool-understandable encoding of an information).
+
+
+
+
+ Data set
+ EDAM does not distinguish the multiplicity of data, such as one data item (datum) versus a collection of data (data set).
+
+
+
+
+ Datum
+ EDAM does not distinguish the multiplicity of data, such as one data item (datum) versus a collection of data (data set).
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+ A bioinformatics package or tool, e.g. a standalone application or web service.
+
+
+ Tool
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+ A digital data archive typically based around a relational model but sometimes using an object-oriented, tree or graph-based model.
+
+
+ Database
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An ontology of biological or bioinformatics concepts and relations, a controlled vocabulary, structured glossary etc.
+
+
+ Ontology
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A directory on disk from which files are read.
+
+ Directory metadata
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Controlled vocabulary from National Library of Medicine. The MeSH thesaurus is used to index articles in biomedical journals for the Medline/PubMED databases.
+
+ MeSH vocabulary
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Controlled vocabulary for gene names (symbols) from HUGO Gene Nomenclature Committee.
+
+ HGNC vocabulary
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Compendium of controlled vocabularies for the biomedical domain (Unified Medical Language System).
+
+ UMLS vocabulary
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ A text token, number or something else which identifies an entity, but which may not be persistent (stable) or unique (the same identifier may identify multiple things).
+ ID
+
+
+
+ Identifier
+
+
+
+
+
+
+
+
+ Almost exact but limited to identifying resources, and being unambiguous.
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An entry (retrievable via URL) from a biological database.
+
+ Database entry
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Mass of a molecule.
+
+
+ Molecular mass
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PDBML:pdbx_formal_charge
+ Net charge of a molecule.
+
+
+ Molecular charge
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A specification of a chemical structure.
+ Chemical structure specification
+
+
+ Chemical formula
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A QSAR quantitative descriptor (name-value pair) of chemical structure.
+
+
+ QSAR descriptors have numeric values that quantify chemical information encoded in a symbolic representation of a molecule. They are used in quantitative structure activity relationship (QSAR) applications. Many subtypes of individual descriptors (not included in EDAM) cover various types of protein properties.
+ QSAR descriptor
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Deprecated because this is bloat / confusing & better handled as an EDAM Format concept - "raw" sequences just imply a particular format (i.e. one with a vanilla string, possible in a particular alphabet, with no metadata).
+ 1.23
+
+ A raw molecular sequence (string of characters) which might include ambiguity, unknown positions and non-sequence characters.
+
+
+ Non-sequence characters may be used for example for gaps and translation stop.
+ Raw sequence
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ SO:2000061
+ A molecular sequence and associated metadata.
+
+
+ Sequence record
+ http://purl.bioontology.org/ontology/MSH/D058977
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A collection of one or typically multiple molecular sequences (which can include derived data or metadata) that do not (typically) correspond to molecular sequence database records or entries and which (typically) are derived from some analytical method.
+ Alignment reference
+ SO:0001260
+
+
+ An example is an alignment reference; one or a set of reference molecular sequences, structures, or profiles used for alignment of genomic, transcriptomic, or proteomic experimental data.
+ This concept may be used for arbitrary sequence sets and associated data arising from processing.
+ Sequence set
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A character used to replace (mask) other characters in a molecular sequence.
+
+ Sequence mask character
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A label (text token) describing the type of sequence masking to perform.
+
+ Sequence masking is where specific characters or positions in a molecular sequence are masked (replaced) with an another (mask character). The mask type indicates what is masked, for example regions that are not of interest or which are information-poor including acidic protein regions, basic protein regions, proline-rich regions, low compositional complexity regions, short-periodicity internal repeats, simple repeats and low complexity regions. Masked sequences are used in database search to eliminate statistically significant but biologically uninteresting hits.
+ Sequence mask type
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.20
+
+
+ The strand of a DNA sequence (forward or reverse).
+
+ The forward or 'top' strand might specify a sequence is to be used as given, the reverse or 'bottom' strand specifying the reverse complement of the sequence is to be used.
+ DNA sense specification
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A specification of sequence length(s).
+
+ Sequence length specification
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Basic or general information concerning molecular sequences.
+
+ This is used for such things as a report including the sequence identifier, type and length.
+ Sequence metadata
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ How the annotation of a sequence feature (for example in EMBL or Swiss-Prot) was derived.
+
+
+ This might be the name and version of a software tool, the name of a database, or 'curated' to indicate a manual annotation (made by a human).
+ Sequence feature source
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A report of sequence hits and associated data from searching a database of sequences (for example a BLAST search). This will typically include a list of scores (often with statistical evaluation) and a set of alignments for the hits.
+ Database hits (sequence)
+ Sequence database hits
+ Sequence database search results
+ Sequence search hits
+
+
+ The score list includes the alignment score, percentage of the query sequence matched, length of the database sequence entry in this alignment, identifier of the database sequence entry, excerpt of the database sequence entry description etc.
+ Sequence search results
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Report on the location of matches ("hits") between sequences, sequence profiles, motifs (conserved or functional patterns) and other types of sequence signatures.
+ Profile-profile alignment
+ Protein secondary database search results
+ Search results (protein secondary database)
+ Sequence motif hits
+ Sequence motif matches
+ Sequence profile alignment
+ Sequence profile hits
+ Sequence profile matches
+ Sequence-profile alignment
+
+
+ A "profile-profile alignment" is an alignment of two sequence profiles, each profile typically representing a sequence alignment.
+ A "sequence-profile alignment" is an alignment of one or more molecular sequence(s) to one or more sequence profile(s) (each profile typically representing a sequence alignment).
+ This includes reports of hits from a search of a protein secondary or domain database. Data associated with the search or alignment might also be included, e.g. ranked list of best-scoring sequences, a graphical representation of scores etc.
+ Sequence signature matches
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Data files used by motif or profile methods.
+
+ Sequence signature model
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data concerning concerning specific or conserved pattern in molecular sequences and the classifiers used for their identification, including sequence motifs, profiles or other diagnostic element.
+
+
+ This can include metadata about a motif or sequence profile such as its name, length, technical details about the profile construction, and so on.
+ Sequence signature data
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Alignment of exact matches between subsequences (words) within two or more molecular sequences.
+
+ Sequence alignment (words)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A dotplot of sequence similarities identified from word-matching or character comparison.
+
+
+ Dotplot
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alignment of multiple molecular sequences.
+ Multiple sequence alignment
+ msa
+
+
+ Sequence alignment
+
+ http://purl.bioontology.org/ontology/MSH/D016415
+ http://semanticscience.org/resource/SIO_010066
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Some simple value controlling a sequence alignment (or similar 'match') operation.
+
+ Sequence alignment parameter
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A value representing molecular sequence similarity.
+
+
+ Sequence similarity score
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Report of general information on a sequence alignment, typically include a description, sequence identifiers and alignment score.
+
+ Sequence alignment metadata
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An informative report of molecular sequence alignment-derived data or metadata.
+ Sequence alignment metadata
+
+
+ Use this for any computer-generated reports on sequence alignments, and for general information (metadata) on a sequence alignment, such as a description, sequence identifiers and alignment score.
+ Sequence alignment report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ "Sequence-profile alignment" and "Profile-profile alignment" are synonymous with "Sequence signature matches" which was already stated as including matches (alignment) and other data.
+ 1.25 or earlier
+
+ A profile-profile alignment (each profile typically representing a sequence alignment).
+
+
+ Profile-profile alignment
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ "Sequence-profile alignment" and "Profile-profile alignment" are synonymous with "Sequence signature matches" which was already stated as including matches (alignment) and other data.
+ 1.24
+
+ Alignment of one or more molecular sequence(s) to one or more sequence profile(s) (each profile typically representing a sequence alignment).
+
+
+ Sequence-profile alignment
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby:phylogenetic_distance_matrix
+ A matrix of estimated evolutionary distance between molecular sequences, such as is suitable for phylogenetic tree calculation.
+ Phylogenetic distance matrix
+
+
+ Methods might perform character compatibility analysis or identify patterns of similarity in an alignment or data matrix.
+ Sequence distance matrix
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Basic character data from which a phylogenetic tree may be generated.
+
+
+ As defined, this concept would also include molecular sequences, microsatellites, polymorphisms (RAPDs, RFLPs, or AFLPs), restriction sites and fragments
+ Phylogenetic character data
+ http://www.evolutionaryontology.org/cdao.owl#Character
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby:Tree
+ Moby:myTree
+ Moby:phylogenetic_tree
+ The raw data (not just an image) from which a phylogenetic tree is directly generated or plotted, such as topology, lengths (in time or in expected amounts of variance) and a confidence interval for each length.
+ Phylogeny
+
+
+ A phylogenetic tree is usually constructed from a set of sequences from which an alignment (or data matrix) is calculated. See also 'Phylogenetic tree image'.
+ Phylogenetic tree
+ http://purl.bioontology.org/ontology/MSH/D010802
+ http://www.evolutionaryontology.org/cdao.owl#Tree
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Matrix of integer or floating point numbers for amino acid or nucleotide sequence comparison.
+ Substitution matrix
+
+
+ The comparison matrix might include matrix name, optional comment, height and width (or size) of matrix, an index row/column (of characters) and data rows/columns (of integers or floats).
+ Comparison matrix
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+ Predicted or actual protein topology represented as a string of protein secondary structure elements.
+
+
+ The location and size of the secondary structure elements and intervening loop regions is usually indicated.
+ Protein topology
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Secondary structure (predicted or real) of a protein.
+
+
+ Protein features report (secondary structure)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Super-secondary structure of protein sequence(s).
+
+
+ Super-secondary structures include leucine zippers, coiled coils, Helix-Turn-Helix etc.
+ Protein features report (super-secondary)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Alignment of the (1D representations of) secondary structure of two or more proteins.
+ Secondary structure alignment (protein)
+
+
+ Protein secondary structure alignment
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An informative report on protein secondary structure alignment-derived data or metadata.
+
+ Secondary structure alignment metadata (protein)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby:RNAStructML
+ An informative report of secondary structure (predicted or real) of an RNA molecule.
+ Secondary structure (RNA)
+
+
+ This includes thermodynamically stable or evolutionarily conserved structures such as knots, pseudoknots etc.
+ RNA secondary structure
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Moby:RNAStructAlignmentML
+ Alignment of the (1D representations of) secondary structure of two or more RNA molecules.
+ Secondary structure alignment (RNA)
+
+
+ RNA secondary structure alignment
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An informative report of RNA secondary structure alignment-derived data or metadata.
+
+ Secondary structure alignment metadata (RNA)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 3D coordinate and associated data for a macromolecular tertiary (3D) structure or part of a structure.
+ Coordinate model
+ Structure data
+
+
+ The coordinate data may be predicted or real.
+ Structure
+ http://purl.bioontology.org/ontology/MSH/D015394
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An entry from a molecular tertiary (3D) structure database.
+
+ Tertiary structure record
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+
+ Results (hits) from searching a database of tertiary structure.
+
+ Structure database search results
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alignment (superimposition) of molecular tertiary (3D) structures.
+
+
+ A tertiary structure alignment will include the untransformed coordinates of one macromolecule, followed by the second (or subsequent) structure(s) with all the coordinates transformed (by rotation / translation) to give a superposition.
+ Structure alignment
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An informative report of molecular tertiary structure alignment-derived data.
+
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Structure alignment report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A value representing molecular structure similarity, measured from structure alignment or some other type of structure comparison.
+
+
+ Structure similarity score
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Some type of structural (3D) profile or template (representing a structure or structure alignment).
+ 3D profile
+ Structural (3D) profile
+
+
+ Structural profile
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A 3D profile-3D profile alignment (each profile representing structures or a structure alignment).
+ Structural profile alignment
+
+
+ Structural (3D) profile alignment
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ An alignment of a sequence to a 3D profile (representing structures or a structure alignment).
+
+ Sequence-3D profile alignment
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Matrix of values used for scoring sequence-structure compatibility.
+
+
+ Protein sequence-structure scoring matrix
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An alignment of molecular sequence to structure (from threading sequence(s) through 3D structure or representation of structure(s)).
+
+
+ Sequence-structure alignment
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ An informative report about a specific amino acid.
+
+ Amino acid annotation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ An informative report about a specific peptide.
+
+ Peptide annotation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An informative human-readable report about one or more specific protein molecules or protein structural domains, derived from analysis of primary (sequence or structural) data.
+ Gene product annotation
+
+
+ Protein report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A report of primarily non-positional data describing intrinsic physical, chemical or other properties of a protein molecule or model.
+ Protein physicochemical property
+ Protein properties
+ Protein sequence statistics
+
+
+ This is a broad data type and is used a placeholder for other, more specific types. Data may be based on analysis of nucleic acid sequence or structural data, for example reports on the surface properties (shape, hydropathy, electrostatic patches etc) of a protein structure, protein flexibility or motion, and protein architecture (spatial arrangement of secondary structure).
+ Protein property
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ 3D structural motifs in a protein.
+
+ Protein structural motifs and surfaces
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Data concerning the classification of the sequences and/or structures of protein structural domain(s).
+
+ Protein domain classification
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ structural domains or 3D folds in a protein or polypeptide chain.
+
+
+ Protein features report (domains)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ An informative report on architecture (spatial arrangement of secondary structure) of a protein structure.
+
+ Protein architecture report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ A report on an analysis or model of protein folding properties, folding pathways, residues or sites that are key to protein folding, nucleation or stabilisation centers etc.
+
+
+ Protein folding report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Data on the effect of (typically point) mutation on protein folding, stability, structure and function.
+
+ This is a broad data type and is used a placeholder for other, more specific types. It is primarily intended to help navigation of EDAM and would not typically be used for annotation.
+ Protein features (mutation)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Protein-protein interaction data from for example yeast two-hybrid analysis, protein microarrays, immunoaffinity chromatography followed by mass spectrometry, phage display etc.
+
+
+ This is a broad data type and is used a placeholder for other, more specific types. It is primarily intended to help navigation of EDAM and would not typically be used for annotation.
+ Protein interaction raw data
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data concerning the interactions (predicted or known) within or between a protein, structural domain or part of a protein. This includes intra- and inter-residue contacts and distances, as well as interactions with other proteins and non-protein entities such as nucleic acid, metal atoms, water, ions etc.
+ Protein interaction record
+ Protein interaction report
+ Protein report (interaction)
+ Protein-protein interaction data
+ Atom interaction data
+ Protein non-covalent interactions report
+ Residue interaction data
+
+
+ Protein interaction data
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Protein classification data
+ An informative report on a specific protein family or other classification or group of protein sequences or structures.
+ Protein family annotation
+
+
+ Protein family report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The maximum initial velocity or rate of a reaction. It is the limiting velocity as substrate concentrations get very large.
+
+
+ Vmax
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Km is the concentration (usually in Molar units) of substrate that leads to half-maximal velocity of an enzyme-catalysed reaction.
+
+
+ Km
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ An informative report about a specific nucleotide base.
+
+ Nucleotide base annotation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A report of primarily non-positional data describing intrinsic physical, chemical or other properties of a nucleic acid molecule.
+ Nucleic acid physicochemical property
+ GC-content
+ Nucleic acid property (structural)
+ Nucleic acid structural property
+
+
+ Nucleic acid structural properties stiffness, curvature, twist/roll data or other conformational parameters or properties.
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Nucleic acid property
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data derived from analysis of codon usage (typically a codon usage table) of DNA sequences.
+ Codon usage report
+
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Codon usage data
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby:GeneInfo
+ Moby:gene
+ Moby_namespace:Human_Readable_Description
+ A report on predicted or actual gene structure, regions which make an RNA product and features such as promoters, coding regions, splice sites etc.
+ Gene and transcript structure (report)
+ Gene annotation
+ Gene features report
+ Gene function (report)
+ Gene structure (repot)
+ Nucleic acid features (gene and transcript structure)
+
+
+ This includes any report on a particular locus or gene. This might include the gene name, description, summary and so on. It can include details about the function of a gene, such as its encoded protein or a functional classification of the gene sequence along according to the encoded protein(s).
+ Gene report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A report on the classification of nucleic acid / gene sequences according to the functional classification of their gene products.
+
+ Gene classification
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ stable, naturally occurring mutations in a nucleotide sequence including alleles, naturally occurring mutations such as single base nucleotide substitutions, deletions and insertions, RFLPs and other polymorphisms.
+
+
+ DNA variation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A human-readable collection of information about a specific chromosome.
+
+
+ This includes basic information. e.g. chromosome number, length, karyotype features, chromosome sequence etc.
+ Chromosome report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A human-readable collection of information about the set of genes (or allelic forms) present in an individual, organism or cell and associated with a specific physical characteristic, or a report concerning an organisms traits and phenotypes.
+ Genotype/phenotype annotation
+
+
+ Genotype/phenotype report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ PCR experiments, e.g. quantitative real-time PCR.
+
+
+ PCR experiment report
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Fluorescence trace data generated by an automated DNA sequencer, which can be interpreted as a molecular sequence (reads), given associated sequencing metadata such as base-call quality scores.
+
+
+ This is the raw data produced by a DNA sequencing machine.
+ Sequence trace
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An assembly of fragments of a (typically genomic) DNA sequence.
+ Contigs
+ SO:0000353
+ SO:0001248
+
+
+ Typically, an assembly is a collection of contigs (for example ESTs and genomic DNA fragments) that are ordered, aligned and merged. Annotation of the assembled sequence might be included.
+ Sequence assembly
+
+
+
+
+
+ SO:0001248
+ Perhaps surprisingly, the definition of 'SO:assembly' is narrower than the 'SO:sequence_assembly'.
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Radiation hybrid scores (RH) scores for one or more markers.
+ Radiation Hybrid (RH) scores
+
+
+ Radiation Hybrid (RH) scores are used in Radiation Hybrid mapping.
+ RH scores
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A human-readable collection of information about the linkage of alleles.
+ Gene annotation (linkage)
+ Linkage disequilibrium (report)
+
+
+ This includes linkage disequilibrium; the non-random association of alleles or polymorphisms at two or more loci (not necessarily on the same chromosome).
+ Genetic linkage report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data quantifying the level of expression of (typically) multiple genes, derived for example from microarray experiments.
+ Gene expression pattern
+
+
+ Gene expression profile
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ microarray experiments including conditions, protocol, sample:data relationships etc.
+
+
+ Microarray experiment report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Data on oligonucleotide probes (typically for use with DNA microarrays).
+
+ Oligonucleotide probe data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Output from a serial analysis of gene expression (SAGE) experiment.
+
+ SAGE experimental data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Massively parallel signature sequencing (MPSS) data.
+
+ MPSS experimental data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Sequencing by synthesis (SBS) data.
+
+ SBS experimental data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.14
+
+ Tag to gene assignments (tag mapping) of SAGE, MPSS and SBS data. Typically this is the sequencing-based expression profile annotated with gene identifiers.
+
+
+ Sequence tag profile (with gene assignment)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Protein X-ray crystallographic data
+ X-ray crystallography data.
+
+
+ Electron density map
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Nuclear magnetic resonance (NMR) raw data, typically for a protein.
+ Protein NMR data
+
+
+ Raw NMR data
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Protein secondary structure from protein coordinate or circular dichroism (CD) spectroscopic data.
+ CD spectrum
+ Protein circular dichroism (CD) spectroscopic data
+
+
+ CD spectra
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Volume map data from electron microscopy.
+ 3D volume map
+ EM volume map
+ Electron microscopy volume map
+
+
+ Volume map
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.19
+
+ Annotation on a structural 3D model (volume map) from electron microscopy.
+
+
+ Electron microscopy model
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Two-dimensional gel electrophoresis image.
+
+
+ 2D PAGE image
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Spectra from mass spectrometry.
+ Mass spectrometry spectra
+
+
+ Mass spectrum
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A set of peptide masses (peptide mass fingerprint) from mass spectrometry.
+ Peak list
+ Protein fingerprint
+ Molecular weights standard fingerprint
+
+
+ A molecular weight standard fingerprint is standard protonated molecular masses e.g. from trypsin (modified porcine trypsin, Promega) and keratin peptides.
+ Peptide mass fingerprint
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Protein or peptide identifications with evidence supporting the identifications, for example from comparing a peptide mass fingerprint (from mass spectrometry) to a sequence database, or the set of typical spectra one obtains when running a protein through a mass spectrometer.
+ 'Protein identification'
+ Peptide spectrum match
+
+
+ Peptide identification
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An informative report about a specific biological pathway or network, typically including a map (diagram) of the pathway.
+
+ Pathway or network annotation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A map (typically a diagram) of a biological pathway.
+
+ Biological pathway map
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A definition of a data resource serving one or more types of data, including metadata and links to the resource or data proper.
+
+ Data resource definition
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Basic information, annotation or documentation concerning a workflow (but not the workflow itself).
+
+
+ Workflow metadata
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A biological model represented in mathematical terms.
+ Biological model
+
+
+ Mathematical model
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A value representing estimated statistical significance of some observed data; typically sequence database hits.
+
+
+ Statistical estimate score
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Resource definition for an EMBOSS database.
+
+ EMBOSS database resource definition
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Information on a version of software or data, for example name, version number and release date.
+
+ Development status / maturity may be part of the version information, for example in case of tools, standards, or some data records.
+ Version information
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A mapping of the accession numbers (or other database identifier) of entries between (typically) two biological or biomedical databases.
+
+
+ The cross-mapping is typically a table where each row is an accession number and each column is a database being cross-referenced. The cells give the accession number or identifier of the corresponding entry in a database. If a cell in the table is not filled then no mapping could be found for the database. Additional information might be given on version, date etc.
+ Database cross-mapping
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An index of data of biological relevance.
+
+
+ Data index
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A human-readable collection of information concerning an analysis of an index of biological data.
+ Database index annotation
+
+
+ Data index report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Basic information on bioinformatics database(s) or other data sources such as name, type, description, URL etc.
+
+
+ Database metadata
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Basic information about one or more bioinformatics applications or packages, such as name, type, description, or other documentation.
+
+
+ Tool metadata
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Textual metadata on a submitted or completed job.
+
+ Job metadata
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Textual metadata on a software author or end-user, for example a person or other software.
+
+
+ User metadata
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A human-readable collection of information about a specific chemical compound.
+ Chemical compound annotation
+ Chemical structure report
+ Small molecule annotation
+
+
+ Small molecule report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A human-readable collection of information about a particular strain of organism cell line including plants, virus, fungi and bacteria. The data typically includes strain number, organism type, growth conditions, source and so on.
+ Cell line annotation
+ Organism strain data
+
+
+ Cell line report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ An informative report about a specific scent.
+
+ Scent annotation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A term (name) from an ontology.
+ Ontology class name
+ Ontology terms
+
+
+ Ontology term
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data concerning or derived from a concept from a biological ontology.
+ Ontology class metadata
+ Ontology term metadata
+
+
+ Ontology concept data
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby:BooleanQueryString
+ Moby:Global_Keyword
+ Moby:QueryString
+ Moby:Wildcard_Query
+ Keyword(s) or phrase(s) used (typically) for text-searching purposes.
+ Phrases
+ Term
+
+
+ Boolean operators (AND, OR and NOT) and wildcard characters may be allowed.
+ Keyword
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby:GCP_SimpleCitation
+ Moby:Publication
+ Bibliographic data that uniquely identifies a scientific article, book or other published material.
+ Bibliographic reference
+ Reference
+
+
+ A bibliographic reference might include information such as authors, title, journal name, date and (possibly) a link to the abstract or full-text of the article if available.
+ Citation
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A scientific text, typically a full text article from a scientific journal.
+ Article text
+ Scientific article
+
+
+ Article
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A human-readable collection of information resulting from text mining.
+ Text mining output
+
+
+ A text mining abstract will typically include an annotated a list of words or sentences extracted from one or more scientific articles.
+ Text mining report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An identifier of a biological entity or phenomenon.
+
+ Entity identifier
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An identifier of a data resource.
+
+ Data resource identifier
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An identifier that identifies a particular type of data.
+ Identifier (typed)
+
+
+
+ This concept exists only to assist EDAM maintenance and navigation in graphical browsers. It does not add semantic information. This branch provides an alternative organisation of the concepts nested under 'Accession' and 'Name'. All concepts under here are already included under 'Accession' or 'Name'.
+ Identifier (by type of entity)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An identifier of a bioinformatics tool, e.g. an application or web service.
+
+
+
+ Tool identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Name or other identifier of a discrete entity (any biological thing with a distinct, discrete physical existence).
+
+ Discrete entity identifier
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Name or other identifier of an entity feature (a physical part or region of a discrete biological entity, or a feature that can be mapped to such a thing).
+
+ Entity feature identifier
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Name or other identifier of a collection of discrete biological entities.
+
+ Entity collection identifier
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Name or other identifier of a physical, observable biological occurrence or event.
+
+ Phenomenon identifier
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Name or other identifier of a molecule.
+
+
+
+ Molecule identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier (e.g. character symbol) of a specific atom.
+ Atom identifier
+
+
+
+ Atom ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Name of a specific molecule.
+
+
+
+ Molecule name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A label (text token) describing the type a molecule.
+
+ For example, 'Protein', 'DNA', 'RNA' etc.
+ Molecule type
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Unique identifier of a chemical compound.
+
+ Chemical identifier
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Name of a chromosome.
+
+
+
+ Chromosome name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a peptide chain.
+
+
+
+ Peptide identifier
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a protein.
+
+
+
+ Protein identifier
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique name of a chemical compound.
+ Chemical name
+
+
+
+ Compound name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique registry number of a chemical compound.
+
+
+
+ Chemical registry number
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Code word for a ligand, for example from a PDB file.
+
+ Ligand identifier
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a drug.
+
+
+
+ Drug identifier
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of an amino acid.
+ Residue identifier
+
+
+
+ Amino acid identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Name or other identifier of a nucleotide.
+
+
+
+ Nucleotide identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a monosaccharide.
+
+
+
+ Monosaccharide identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique name from Chemical Entities of Biological Interest (ChEBI) of a chemical compound.
+ ChEBI chemical name
+
+
+
+ This is the recommended chemical name for use for example in database annotation.
+ Chemical name (ChEBI)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ IUPAC recommended name of a chemical compound.
+ IUPAC chemical name
+
+
+
+ Chemical name (IUPAC)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ International Non-proprietary Name (INN or 'generic name') of a chemical compound, assigned by the World Health Organisation (WHO).
+ INN chemical name
+
+
+
+ Chemical name (INN)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Brand name of a chemical compound.
+ Brand chemical name
+
+
+
+ Chemical name (brand)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Synonymous name of a chemical compound.
+ Synonymous chemical name
+
+
+
+ Chemical name (synonymous)
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ CAS registry number of a chemical compound; a unique numerical identifier of chemicals in the scientific literature, as assigned by the Chemical Abstracts Service.
+ CAS chemical registry number
+ Chemical registry number (CAS)
+
+
+
+ CAS number
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Beilstein registry number of a chemical compound.
+ Beilstein chemical registry number
+
+
+
+ Chemical registry number (Beilstein)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Gmelin registry number of a chemical compound.
+ Gmelin chemical registry number
+
+
+
+ Chemical registry number (Gmelin)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 3-letter code word for a ligand (HET group) from a PDB file, for example ATP.
+ Component identifier code
+ Short ligand name
+
+
+
+ HET group name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ String of one or more ASCII characters representing an amino acid.
+
+
+
+ Amino acid name
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ String of one or more ASCII characters representing a nucleotide.
+
+
+
+ Nucleotide code
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PDBML:pdbx_PDB_strand_id
+ WHATIF: chain
+ Identifier of a polypeptide chain from a protein.
+ Chain identifier
+ PDB chain identifier
+ PDB strand id
+ Polypeptide chain identifier
+ Protein chain identifier
+
+
+
+ This is typically a character (for the chain) appended to a PDB identifier, e.g. 1cukA
+ Polypeptide chain ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Name of a protein.
+
+
+
+ Protein name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Name or other identifier of an enzyme or record from a database of enzymes.
+
+
+
+ Enzyme identifier
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]+\.-\.-\.-|[0-9]+\.[0-9]+\.-\.-|[0-9]+\.[0-9]+\.[0-9]+\.-|[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+
+ Moby:Annotated_EC_Number
+ Moby:EC_Number
+ An Enzyme Commission (EC) number of an enzyme.
+ EC
+ EC code
+ Enzyme Commission number
+
+
+
+ EC number
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Name of an enzyme.
+
+
+
+ Enzyme name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Name of a restriction enzyme.
+
+
+
+ Restriction enzyme name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A specification (partial or complete) of one or more positions or regions of a molecular sequence or map.
+
+ Sequence position specification
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A unique identifier of molecular sequence feature, for example an ID of a feature that is unique within the scope of the GFF file.
+
+
+
+ Sequence feature ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PDBML:_atom_site.id
+ WHATIF: PDBx_atom_site
+ WHATIF: number
+ A position of one or more points (base or residue) in a sequence, or part of such a specification.
+ SO:0000735
+
+
+ Sequence position
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Specification of range(s) of sequence positions.
+
+
+ Sequence range
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Name or other identifier of an nucleic acid feature.
+
+ Nucleic acid feature identifier
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Name or other identifier of a protein feature.
+
+ Protein feature identifier
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The type of a sequence feature, typically a term or accession from the Sequence Ontology, for example an EMBL or Swiss-Prot sequence feature key.
+ Sequence feature method
+ Sequence feature type
+
+
+ A feature key indicates the biological nature of the feature or information about changes to or versions of the sequence.
+ Sequence feature key
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Typically one of the EMBL or Swiss-Prot feature qualifiers.
+
+
+ Feature qualifiers hold information about a feature beyond that provided by the feature key and location.
+ Sequence feature qualifier
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A name of a sequence feature, e.g. the name of a feature to be displayed to an end-user. Typically an EMBL or Swiss-Prot feature label.
+ Sequence feature name
+
+
+ A feature label identifies a feature of a sequence database entry. When used with the database name and the entry's primary accession number, it is a unique identifier of that feature.
+ Sequence feature label
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a sequence feature-containing entity adhering to the standard feature naming scheme used by all EMBOSS applications.
+ UFO
+
+
+ EMBOSS Uniform Feature Object
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ String of one or more ASCII characters representing a codon.
+
+ Codon name
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Moby:GeneAccessionList
+ An identifier of a gene, such as a name/symbol or a unique identifier of a gene in a database.
+
+
+
+ Gene identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby_namespace:Global_GeneCommonName
+ Moby_namespace:Global_GeneSymbol
+ The short name of a gene; a single word that does not contain white space characters. It is typically derived from the gene name.
+
+
+
+ Gene symbol
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ http://www.geneontology.org/doc/GO.xrf_abbs:LocusID
+ http://www.geneontology.org/doc/GO.xrf_abbs:NCBI_Gene
+ An NCBI unique identifier of a gene.
+ Entrez gene ID
+ Gene identifier (Entrez)
+ Gene identifier (NCBI)
+ NCBI gene ID
+ NCBI geneid
+
+
+
+ Gene ID (NCBI)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An NCBI RefSeq unique identifier of a gene.
+
+ Gene identifier (NCBI RefSeq)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An NCBI UniGene unique identifier of a gene.
+
+ Gene identifier (NCBI UniGene)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An Entrez unique identifier of a gene.
+
+ Gene identifier (Entrez)
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a gene or feature from the CGD database.
+ CGD ID
+
+
+
+ Gene ID (CGD)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a gene from DictyBase.
+
+
+
+ Gene ID (DictyBase)
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier for a gene (or other feature) from the Ensembl database.
+ Gene ID (Ensembl)
+
+
+
+ Ensembl gene ID
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ S[0-9]+
+ Identifier of an entry from the SGD database.
+ SGD identifier
+
+
+
+ Gene ID (SGD)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [a-zA-Z_0-9\.-]*
+ Moby_namespace:GeneDB
+ Identifier of a gene from the GeneDB database.
+ GeneDB identifier
+
+
+
+ Gene ID (GeneDB)
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the TIGR database.
+
+
+
+ TIGR identifier
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Gene:[0-9]{7}
+ Identifier of an gene from the TAIR database.
+
+
+
+ TAIR accession (gene)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a protein structural domain.
+
+
+
+ This is typically a character or string concatenated with a PDB identifier and a chain identifier.
+ Protein domain ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a protein domain (or other node) from the SCOP database.
+
+
+
+ SCOP domain identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1nr3A00
+ Identifier of a protein domain from CATH.
+ CATH domain identifier
+
+
+
+ CATH domain ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A SCOP concise classification string (sccs) is a compact representation of a SCOP domain classification.
+
+
+
+ An scss includes the class (alphabetical), fold, superfamily and family (all numerical) to which a given domain belongs.
+ SCOP concise classification string (sccs)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 33229
+ Unique identifier (number) of an entry in the SCOP hierarchy, for example 33229.
+ SCOP unique identifier
+ sunid
+
+
+
+ A sunid uniquely identifies an entry in the SCOP hierarchy, including leaves (the SCOP domains) and higher level nodes including entries corresponding to the protein level.
+ SCOP sunid
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 3.30.1190.10.1.1.1.1.1
+ A code number identifying a node from the CATH database.
+ CATH code
+ CATH node identifier
+
+
+
+ CATH node ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a biological kingdom (Bacteria, Archaea, or Eukaryotes).
+
+
+
+ Kingdom name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a species (typically a taxonomic group) of organism.
+ Organism species
+
+
+
+ Species name
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a strain of an organism variant, typically a plant, virus or bacterium.
+
+
+
+ Strain name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ A string of characters that name or otherwise identify a resource on the Internet.
+ URIs
+
+
+ URI
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An identifier of a biological or bioinformatics database.
+ Database identifier
+
+
+
+ Database ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a directory.
+
+
+
+ Directory name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name (or part of a name) of a file (of any type).
+
+
+
+ File name
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Name of an ontology of biological or bioinformatics concepts and relations.
+
+
+
+ Ontology name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby:Link
+ Moby:URL
+ A Uniform Resource Locator (URL).
+
+
+ URL
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A Uniform Resource Name (URN).
+
+
+ URN
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A Life Science Identifier (LSID) - a unique identifier of some data.
+ Life Science Identifier
+
+
+ LSIDs provide a standard way to locate and describe data. An LSID is represented as a Uniform Resource Name (URN) with the following format: URN:LSID:<Authority>:<Namespace>:<ObjectID>[:<Version>]
+ LSID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a biological or bioinformatics database.
+
+
+
+ Database name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ The name of a molecular sequence database.
+
+ Sequence database name
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a file (of any type) with restricted possible values.
+
+
+
+ Enumerated file name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The extension of a file name.
+
+
+
+ A file extension is the characters appearing after the final '.' in the file name.
+ File name extension
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The base name of a file.
+
+
+
+ A file base name is the file name stripped of its directory specification and extension.
+ File base name
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Name of a QSAR descriptor.
+
+
+
+ QSAR descriptor name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An identifier of an entry from a database where the same type of identifier is used for objects (data) of different semantic type.
+
+ This concept is required for completeness. It should never have child concepts.
+ Database entry identifier
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An identifier of molecular sequence(s) or entries from a molecular sequence database.
+
+
+
+ Sequence identifier
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An identifier of a set of molecular sequence(s).
+
+
+
+ Sequence set ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+
+ Identifier of a sequence signature (motif or profile) for example from a database of sequence patterns.
+
+ Sequence signature identifier
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a molecular sequence alignment, for example a record from an alignment database.
+
+
+
+ Sequence alignment ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of a phylogenetic distance matrix.
+
+ Phylogenetic distance matrix identifier
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a phylogenetic tree for example from a phylogenetic tree database.
+
+
+
+ Phylogenetic tree ID
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An identifier of a comparison matrix.
+ Substitution matrix identifier
+
+
+
+ Comparison matrix identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ A unique and persistent identifier of a molecular tertiary structure, typically an entry from a structure database.
+
+
+
+ Structure ID
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier or name of a structural (3D) profile or template (representing a structure or structure alignment).
+ Structural profile identifier
+
+
+
+ Structural (3D) profile ID
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of an entry from a database of tertiary structure alignments.
+
+
+
+ Structure alignment ID
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of an index of amino acid physicochemical and biochemical property data.
+
+
+
+ Amino acid index ID
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Molecular interaction ID
+ Identifier of a report of protein interactions from a protein interaction database (typically).
+
+
+
+ Protein interaction ID
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a protein family.
+ Protein secondary database record identifier
+
+
+
+ Protein family identifier
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique name of a codon usage table.
+
+
+
+ Codon usage table name
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a transcription factor (or a TF binding site).
+
+
+
+ Transcription factor identifier
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of an entry from a database of microarray data.
+
+
+
+ Experiment annotation ID
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of an entry from a database of electron microscopy data.
+
+
+
+ Electron microscopy model ID
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Accession of a report of gene expression (e.g. a gene expression profile) from a database.
+ Gene expression profile identifier
+
+
+
+ Gene expression report ID
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of an entry from a database of genotypes and phenotypes.
+
+
+
+ Genotype and phenotype annotation ID
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of an entry from a database of biological pathways or networks.
+
+
+
+ Pathway or network identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a biological or biomedical workflow, typically from a database of workflows.
+
+
+
+ Workflow ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a data type definition from some provider.
+ Data resource definition identifier
+
+
+
+ Data resource definition ID
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a mathematical model, typically an entry from a database.
+ Biological model identifier
+
+
+
+ Biological model ID
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of an entry from a database of chemicals.
+ Chemical compound identifier
+ Compound ID
+ Small molecule identifier
+
+
+
+ Compound identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A unique (typically numerical) identifier of a concept in an ontology of biological or bioinformatics concepts and relations.
+
+
+
+ Ontology concept ID
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Unique identifier of a scientific article.
+ Article identifier
+
+
+
+ Article ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ FB[a-zA-Z_0-9]{2}[0-9]{7}
+ Identifier of an object from the FlyBase database.
+
+
+
+ FlyBase ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Name of an object from the WormBase database, usually a human-readable name.
+
+
+
+ WormBase name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Class of an object from the WormBase database.
+
+
+
+ A WormBase class describes the type of object such as 'sequence' or 'protein'.
+ WormBase class
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ A persistent, unique identifier of a molecular sequence database entry.
+ Sequence accession number
+
+
+
+ Sequence accession
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A label (text token) describing a type of molecular sequence.
+
+ Sequence type might reflect the molecule (protein, nucleic acid etc) or the sequence itself (gapped, ambiguous etc).
+ Sequence type
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a sequence-based entity adhering to the standard sequence naming scheme used by all EMBOSS applications.
+ EMBOSS USA
+
+
+
+ EMBOSS Uniform Sequence Address
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Accession number of a protein sequence database entry.
+ Protein sequence accession number
+
+
+
+ Sequence accession (protein)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Accession number of a nucleotide sequence database entry.
+ Nucleotide sequence accession number
+
+
+
+ Sequence accession (nucleic acid)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ (NC|AC|NG|NT|NW|NZ|NM|NR|XM|XR|NP|AP|XP|YP|ZP)_[0-9]+
+ Accession number of a RefSeq database entry.
+ RefSeq ID
+
+
+
+ RefSeq accession
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.0
+
+
+ Accession number of a UniProt (protein sequence) database entry. May contain version or isoform number.
+
+ UniProt accession (extended)
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An identifier of PIR sequence database entry.
+ PIR ID
+ PIR accession number
+
+
+
+ PIR identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.2
+
+ Identifier of a TREMBL sequence database entry.
+
+
+ TREMBL accession
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Primary identifier of a Gramene database entry.
+ Gramene primary ID
+
+
+
+ Gramene primary identifier
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a (nucleic acid) entry from the EMBL/GenBank/DDBJ databases.
+
+
+
+ EMBL/GenBank/DDBJ ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A unique identifier of an entry (gene cluster) from the NCBI UniGene database.
+ UniGene ID
+ UniGene cluster ID
+ UniGene identifier
+
+
+
+ Sequence cluster ID (UniGene)
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a dbEST database entry.
+ dbEST ID
+
+
+
+ dbEST accession
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a dbSNP database entry.
+ dbSNP identifier
+
+
+
+ dbSNP ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The EMBOSS type of a molecular sequence.
+
+ See the EMBOSS documentation (http://emboss.sourceforge.net/) for a definition of what this includes.
+ EMBOSS sequence type
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ List of EMBOSS Uniform Sequence Addresses (EMBOSS listfile).
+
+ EMBOSS listfile
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An identifier of a cluster of molecular sequence(s).
+
+
+
+ Sequence cluster ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier of an entry from the COG database.
+ COG ID
+
+
+
+ Sequence cluster ID (COG)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a sequence motif, for example an entry from a motif database.
+
+
+
+ Sequence motif identifier
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a sequence profile.
+
+
+
+ A sequence profile typically represents a sequence alignment.
+ Sequence profile ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the ELMdb database of protein functional sites.
+
+
+
+ ELM ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PS[0-9]{5}
+ Accession number of an entry from the Prosite database.
+ Prosite ID
+
+
+
+ Prosite accession number
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier or name of a HMMER hidden Markov model.
+
+
+
+ HMMER hidden Markov model ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier or name of a profile from the JASPAR database.
+
+
+
+ JASPAR profile ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A label (text token) describing the type of a sequence alignment.
+
+ Possible values include for example the EMBOSS alignment types, BLAST alignment types and so on.
+ Sequence alignment type
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The type of a BLAST sequence alignment.
+
+ BLAST sequence alignment type
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A label (text token) describing the type of a phylogenetic tree.
+
+ For example 'nj', 'upgmp' etc.
+ Phylogenetic tree type
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Accession number of an entry from the TreeBASE database.
+
+
+
+ TreeBASE study accession number
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Accession number of an entry from the TreeFam database.
+
+
+
+ TreeFam accession number
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A label (text token) describing the type of a comparison matrix.
+
+ For example 'blosum', 'pam', 'gonnet', 'id' etc. Comparison matrix type may be required where a series of matrices of a certain type are used.
+ Comparison matrix type
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique name or identifier of a comparison matrix.
+ Substitution matrix name
+
+
+
+ See for example http://www.ebi.ac.uk/Tools/webservices/help/matrix.
+ Comparison matrix name
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9][a-zA-Z_0-9]{3}
+ An identifier of an entry from the PDB database.
+ PDB identifier
+ PDBID
+
+
+
+ A PDB identification code which consists of 4 characters, the first of which is a digit in the range 0 - 9; the remaining 3 are alphanumeric, and letters are upper case only. (source: https://cdn.rcsb.org/wwpdb/docs/documentation/file-format/PDB_format_1996.pdf)
+ PDB ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the AAindex database.
+
+
+
+ AAindex ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Accession number of an entry from the BIND database.
+
+
+
+ BIND accession number
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ EBI\-[0-9]+
+ Accession number of an entry from the IntAct database.
+
+
+
+ IntAct accession number
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Name of a protein family.
+
+
+
+ Protein family name
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Name of an InterPro entry, usually indicating the type of protein matches for that entry.
+
+
+
+ InterPro entry name
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ IPR015590
+ IPR[0-9]{6}
+ Primary accession number of an InterPro entry.
+ InterPro primary accession
+ InterPro primary accession number
+
+
+
+ Every InterPro entry has a unique accession number to provide a persistent citation of database records.
+ InterPro accession
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Secondary accession number of an InterPro entry.
+ InterPro secondary accession number
+
+
+
+ InterPro secondary accession
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier of an entry from the Gene3D database.
+
+
+
+ Gene3D ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PIRSF[0-9]{6}
+ Unique identifier of an entry from the PIRSF database.
+
+
+
+ PIRSF ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PR[0-9]{5}
+ The unique identifier of an entry in the PRINTS database.
+
+
+
+ PRINTS code
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PF[0-9]{5}
+ Accession number of a Pfam entry.
+
+
+
+ Pfam accession number
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ SM[0-9]{5}
+ Accession number of an entry from the SMART database.
+
+
+
+ SMART accession number
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier (number) of a hidden Markov model from the Superfamily database.
+
+
+
+ Superfamily hidden Markov model number
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Accession number of an entry (family) from the TIGRFam database.
+ TIGRFam accession number
+
+
+
+ TIGRFam ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PD[0-9]+
+ A ProDom domain family accession number.
+
+
+
+ ProDom is a protein domain family database.
+ ProDom accession number
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the TRANSFAC database.
+
+
+
+ TRANSFAC accession number
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [AEP]-[a-zA-Z_0-9]{4}-[0-9]+
+ Accession number of an entry from the ArrayExpress database.
+ ArrayExpress experiment ID
+
+
+
+ ArrayExpress accession number
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]+
+ PRIDE experiment accession number.
+
+
+
+ PRIDE experiment accession number
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the EMDB electron microscopy database.
+
+
+
+ EMDB ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [GDS|GPL|GSE|GSM][0-9]+
+ Accession number of an entry from the GEO database.
+
+
+
+ GEO accession number
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the GermOnline database.
+
+
+
+ GermOnline ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the EMAGE database.
+
+
+
+ EMAGE ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Accession number of an entry from a database of disease.
+
+
+
+ Disease ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the HGVbase database.
+
+
+
+ HGVbase ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry from the HIVDB database.
+
+ HIVDB identifier
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [*#+%^]?[0-9]{6}
+ Identifier of an entry from the OMIM database.
+
+
+
+ OMIM ID
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier of an object from one of the KEGG databases (excluding the GENES division).
+
+
+
+ KEGG object identifier
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ REACT_[0-9]+(\.[0-9]+)?
+ Identifier of an entry from the Reactome database.
+ Reactome ID
+
+
+
+ Pathway ID (reactome)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry from the aMAZE database.
+
+ Pathway ID (aMAZE)
+ true
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an pathway from the BioCyc biological pathways database.
+ BioCyc pathway ID
+
+
+
+ Pathway ID (BioCyc)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the INOH database.
+ INOH identifier
+
+
+
+ Pathway ID (INOH)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the PATIKA database.
+ PATIKA ID
+
+
+
+ Pathway ID (PATIKA)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the CPDB (ConsensusPathDB) biological pathways database, which is an identifier from an external database integrated into CPDB.
+ CPDB ID
+
+
+
+ This concept refers to identifiers used by the databases collated in CPDB; CPDB identifiers are not independently defined.
+ Pathway ID (CPDB)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PTHR[0-9]{5}
+ Identifier of a biological pathway from the Panther Pathways database.
+ Panther Pathways ID
+
+
+
+ Pathway ID (Panther)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ MIR:00100005
+ MIR:[0-9]{8}
+ Unique identifier of a MIRIAM data resource.
+
+
+
+ This is the identifier used internally by MIRIAM for a data type.
+ MIRIAM identifier
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a data type from the MIRIAM database.
+
+
+
+ MIRIAM data type name
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ urn:miriam:pubmed:16333295|urn:miriam:obo.go:GO%3A0045202
+ The URI (URL or URN) of a data entity from the MIRIAM database.
+ identifiers.org synonym
+
+
+
+ A MIRIAM URI consists of the URI of the MIRIAM data type (PubMed, UniProt etc) followed by the identifier of an element of that data type, for example PMID for a publication or an accession number for a GO term.
+ MIRIAM URI
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ UniProt|Enzyme Nomenclature
+ The primary name of a data type from the MIRIAM database.
+
+
+
+ The primary name of a MIRIAM data type is taken from a controlled vocabulary.
+ MIRIAM data type primary name
+
+
+
+
+ UniProt|Enzyme Nomenclature
+ A protein entity has the MIRIAM data type 'UniProt', and an enzyme has the MIRIAM data type 'Enzyme Nomenclature'.
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A synonymous name of a data type from the MIRIAM database.
+
+
+
+ A synonymous name for a MIRIAM data type taken from a controlled vocabulary.
+ MIRIAM data type synonymous name
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier of a Taverna workflow.
+
+
+
+ Taverna workflow ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Name of a biological (mathematical) model.
+
+
+
+ Biological model name
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ (BIOMD|MODEL)[0-9]{10}
+ Unique identifier of an entry from the BioModel database.
+
+
+
+ BioModel ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]+
+ Chemical structure specified in PubChem Compound Identification (CID), a non-zero integer identifier for a unique chemical structure.
+ PubChem compound accession identifier
+
+
+
+ PubChem CID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]+
+ Identifier of an entry from the ChemSpider database.
+
+
+
+ ChemSpider ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ CHEBI:[0-9]+
+ Identifier of an entry from the ChEBI database.
+ ChEBI IDs
+ ChEBI identifier
+
+
+
+ ChEBI ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An identifier of a concept from the BioPax ontology.
+
+
+
+ BioPax concept ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]{7}|GO:[0-9]{7}
+ An identifier of a concept from The Gene Ontology.
+ GO concept identifier
+
+
+
+ GO concept ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An identifier of a concept from the MeSH vocabulary.
+
+
+
+ MeSH concept ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An identifier of a concept from the HGNC controlled vocabulary.
+
+
+
+ HGNC concept ID
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 9662|3483|182682
+ [1-9][0-9]{0,8}
+ A stable unique identifier for each taxon (for a species, a family, an order, or any other group in the NCBI taxonomy database.
+ NCBI tax ID
+ NCBI taxonomy identifier
+
+
+
+ NCBI taxonomy ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An identifier of a concept from the Plant Ontology (PO).
+
+
+
+ Plant Ontology concept ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An identifier of a concept from the UMLS vocabulary.
+
+
+
+ UMLS concept ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ FMA:[0-9]+
+ An identifier of a concept from Foundational Model of Anatomy.
+
+
+
+ Classifies anatomical entities according to their shared characteristics (genus) and distinguishing characteristics (differentia). Specifies the part-whole and spatial relationships of the entities, morphological transformation of the entities during prenatal development and the postnatal life cycle and principles, rules and definitions according to which classes and relationships in the other three components of FMA are represented.
+ FMA concept ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An identifier of a concept from the EMAP mouse ontology.
+
+
+
+ EMAP concept ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An identifier of a concept from the ChEBI ontology.
+
+
+
+ ChEBI concept ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An identifier of a concept from the MGED ontology.
+
+
+
+ MGED concept ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An identifier of a concept from the myGrid ontology.
+
+
+
+ The ontology is provided as two components, the service ontology and the domain ontology. The domain ontology acts provides concepts for core bioinformatics data types and their relations. The service ontology describes the physical and operational features of web services.
+ myGrid concept ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 4963447
+ [1-9][0-9]{0,8}
+ PubMed unique identifier of an article.
+ PMID
+
+
+
+ PubMed ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ (doi\:)?[0-9]{2}\.[0-9]{4}/.*
+ Digital Object Identifier (DOI) of a published article.
+ Digital Object Identifier
+
+
+
+ DOI
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Medline UI (unique identifier) of an article.
+ Medline unique identifier
+
+
+
+ The use of Medline UI has been replaced by the PubMed unique identifier.
+ Medline UI
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a computer package, application, method or function.
+
+
+
+ Tool name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The unique name of a signature (sequence classifier) method.
+
+
+
+ Signature methods from http://www.ebi.ac.uk/Tools/InterProScan/help.html#results include BlastProDom, FPrintScan, HMMPIR, HMMPfam, HMMSmart, HMMTigr, ProfileScan, ScanRegExp, SuperFamily and HAMAP.
+ Tool name (signature)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a BLAST tool.
+ BLAST name
+
+
+
+ This include 'blastn', 'blastp', 'blastx', 'tblastn' and 'tblastx'.
+ Tool name (BLAST)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a FASTA tool.
+
+
+
+ This includes 'fasta3', 'fastx3', 'fasty3', 'fastf3', 'fasts3' and 'ssearch'.
+ Tool name (FASTA)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of an EMBOSS application.
+
+
+
+ Tool name (EMBOSS)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of an EMBASSY package.
+
+
+
+ Tool name (EMBASSY package)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A QSAR constitutional descriptor.
+ QSAR constitutional descriptor
+
+
+ QSAR descriptor (constitutional)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A QSAR electronic descriptor.
+ QSAR electronic descriptor
+
+
+ QSAR descriptor (electronic)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A QSAR geometrical descriptor.
+ QSAR geometrical descriptor
+
+
+ QSAR descriptor (geometrical)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A QSAR topological descriptor.
+ QSAR topological descriptor
+
+
+ QSAR descriptor (topological)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A QSAR molecular descriptor.
+ QSAR molecular descriptor
+
+
+ QSAR descriptor (molecular)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Any collection of multiple protein sequences and associated metadata that do not (typically) correspond to common sequence database records or database entries.
+
+
+ Sequence set (protein)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Any collection of multiple nucleotide sequences and associated metadata that do not (typically) correspond to common sequence database records or database entries.
+
+
+ Sequence set (nucleic acid)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A set of sequences that have been clustered or otherwise classified as belonging to a group including (typically) sequence cluster information.
+
+
+ The cluster might include sequences identifiers, short descriptions, alignment and summary information.
+ Sequence cluster
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A file of intermediate results from a PSIBLAST search that is used for priming the search in the next PSIBLAST iteration.
+
+ A Psiblast checkpoint file uses ASN.1 Binary Format and usually has the extension '.asn'.
+ Psiblast checkpoint file
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Sequences generated by HMMER package in FASTA-style format.
+
+ HMMER synthetic sequences set
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A protein sequence cleaved into peptide fragments (by enzymatic or chemical cleavage) with fragment masses.
+
+
+ Proteolytic digest
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ SO:0000412
+ Restriction digest fragments from digesting a nucleotide sequence with restriction sites using a restriction endonuclease.
+
+
+ Restriction digest
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Oligonucleotide primer(s) for PCR and DNA amplification, for example a minimal primer set.
+
+
+ PCR primers
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ File of sequence vectors used by EMBOSS vectorstrip application, or any file in same format.
+
+ vectorstrip cloning vector definition file
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A library of nucleotide sequences to avoid during hybridisation events. Hybridisation of the internal oligo to sequences in this library is avoided, rather than priming from them. The file is in a restricted FASTA format.
+
+ Primer3 internal oligo mishybridizing library
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A nucleotide sequence library of sequences to avoid during amplification (for example repetitive sequences, or possibly the sequences of genes in a gene family that should not be amplified. The file must is in a restricted FASTA format.
+
+ Primer3 mispriming library file
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ File of one or more pairs of primer sequences, as used by EMBOSS primersearch application.
+
+ primersearch primer pairs sequence record
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A cluster of protein sequences.
+ Protein sequence cluster
+
+
+ The sequences are typically related, for example a family of sequences.
+ Sequence cluster (protein)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A cluster of nucleotide sequences.
+ Nucleotide sequence cluster
+
+
+ The sequences are typically related, for example a family of sequences.
+ Sequence cluster (nucleic acid)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The size (length) of a sequence, subsequence or region in a sequence, or range(s) of lengths.
+
+
+ Sequence length
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Size of a sequence word.
+
+ Word size is used for example in word-based sequence database search methods.
+ Word size
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Size of a sequence window.
+
+ A window is a region of fixed size but not fixed position over a molecular sequence. It is typically moved (computationally) over a sequence during scoring.
+ Window size
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Specification of range(s) of length of sequences.
+
+ Sequence length range
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+ Report on basic information about a molecular sequence such as name, accession number, type (nucleic or protein), length, description etc.
+
+
+ Sequence information report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An informative report about non-positional sequence features, typically a report on general molecular sequence properties derived from sequence analysis.
+ Sequence properties report
+
+
+ Sequence property
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Annotation of positional features of molecular sequence(s), i.e. that can be mapped to position(s) in the sequence.
+ Feature record
+ Features
+ General sequence features
+ Sequence features report
+ SO:0000110
+
+
+ This includes annotation of positional sequence features, organised into a standard feature table, or any other report of sequence features. General feature reports are a source of sequence feature table information although internal conversion would be required.
+ Sequence features
+ http://purl.bioontology.org/ontology/MSH/D058977
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Comparative data on sequence features such as statistics, intersections (and data on intersections), differences etc.
+
+ This is a broad data type and is used a placeholder for other, more specific types. It is primarily intended to help navigation of EDAM and would not typically be used for annotation.
+ Sequence features (comparative)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A report of general sequence properties derived from protein sequence data.
+
+ Sequence property (protein)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A report of general sequence properties derived from nucleotide sequence data.
+
+ Sequence property (nucleic acid)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A report on sequence complexity, for example low-complexity or repeat regions in sequences.
+ Sequence property (complexity)
+
+
+ Sequence complexity report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A report on ambiguity in molecular sequence(s).
+ Sequence property (ambiguity)
+
+
+ Sequence ambiguity report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A report (typically a table) on character or word composition / frequency of a molecular sequence(s).
+ Sequence composition
+ Sequence property (composition)
+
+
+ Sequence composition report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A report on peptide fragments of certain molecular weight(s) in one or more protein sequences.
+
+
+ Peptide molecular weight hits
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A plot of third base position variability in a nucleotide sequence.
+
+
+ Base position variability plot
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A table of character or word composition / frequency of a molecular sequence.
+
+ Sequence composition table
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A table of base frequencies of a nucleotide sequence.
+
+
+ Base frequencies table
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A table of word composition of a nucleotide sequence.
+
+
+ Base word frequencies table
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A table of amino acid frequencies of a protein sequence.
+ Sequence composition (amino acid frequencies)
+
+
+ Amino acid frequencies table
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A table of amino acid word composition of a protein sequence.
+ Sequence composition (amino acid words)
+
+
+ Amino acid word frequencies table
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Annotation of a molecular sequence in DAS format.
+
+ DAS sequence feature annotation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Annotation of positional sequence features, organised into a standard feature table.
+ Sequence feature table
+
+
+ Feature table
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A map of (typically one) DNA sequence annotated with positional or non-positional features.
+ DNA map
+
+
+ Map
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An informative report on intrinsic positional features of a nucleotide sequence, formatted to be machine-readable.
+ Feature table (nucleic acid)
+ Nucleic acid feature table
+ Genome features
+ Genomic features
+
+
+ This includes nucleotide sequence feature annotation in any known sequence feature table format and any other report of nucleic acid features.
+ Nucleic acid features
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An informative report on intrinsic positional features of a protein sequence.
+ Feature table (protein)
+ Protein feature table
+
+
+ This includes protein sequence feature annotation in any known sequence feature table format and any other report of protein features.
+ Protein features
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby:GeneticMap
+ A map showing the relative positions of genetic markers in a nucleic acid sequence, based on estimation of non-physical distance such as recombination frequencies.
+ Linkage map
+
+
+ A genetic (linkage) map indicates the proximity of two genes on a chromosome, whether two genes are linked and the frequency they are transmitted together to an offspring. They are limited to genetic markers of traits observable only in whole organisms.
+ Genetic map
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A map of genetic markers in a contiguous, assembled genomic sequence, with the sizes and separation of markers measured in base pairs.
+
+
+ A sequence map typically includes annotation on significant subsequences such as contigs, haplotypes and genes. The contigs shown will (typically) be a set of small overlapping clones representing a complete chromosomal segment.
+ Sequence map
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A map of DNA (linear or circular) annotated with physical features or landmarks such as restriction sites, cloned DNA fragments, genes or genetic markers, along with the physical distances between them.
+
+
+ Distance in a physical map is measured in base pairs. A physical map might be ordered relative to a reference map (typically a genetic map) in the process of genome sequencing.
+ Physical map
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+ Image of a sequence with matches to signatures, motifs or profiles.
+
+
+ Sequence signature map
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A map showing banding patterns derived from direct observation of a stained chromosome.
+ Chromosome map
+ Cytogenic map
+ Cytologic map
+
+
+ This is the lowest-resolution physical map and can provide only rough estimates of physical (base pair) distances. Like a genetic map, they are limited to genetic markers of traits observable only in whole organisms.
+ Cytogenetic map
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A gene map showing distances between loci based on relative cotransduction frequencies.
+
+
+ DNA transduction map
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Sequence map of a single gene annotated with genetic features such as introns, exons, untranslated regions, polyA signals, promoters, enhancers and (possibly) mutations defining alleles of a gene.
+
+
+ Gene map
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Sequence map of a plasmid (circular DNA).
+
+
+ Plasmid map
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Sequence map of a whole genome.
+
+
+ Genome map
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Image of the restriction enzyme cleavage sites (restriction sites) in a nucleic acid sequence.
+
+
+ Restriction map
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+ Image showing matches between protein sequence(s) and InterPro Entries.
+
+
+ The sequence(s) might be screened against InterPro, or be the sequences from the InterPro entry itself. Each protein is represented as a scaled horizontal line with colored bars indicating the position of the matches.
+ InterPro compact match image
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+ Image showing detailed information on matches between protein sequence(s) and InterPro Entries.
+
+
+ The sequence(s) might be screened against InterPro, or be the sequences from the InterPro entry itself.
+ InterPro detailed match image
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+ Image showing the architecture of InterPro domains in a protein sequence.
+
+
+ The sequence(s) might be screened against InterPro, or be the sequences from the InterPro entry itself. Domain architecture is shown as a series of non-overlapping domains in the protein.
+ InterPro architecture image
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ SMART protein schematic in PNG format.
+
+ SMART protein schematic
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+ Images based on GlobPlot prediction of intrinsic disordered regions and globular domains in protein sequences.
+
+
+ GlobPlot domain image
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Report on the location of matches to profiles, motifs (conserved or functional patterns) or other signatures in one or more sequences.
+
+
+ Sequence motif matches
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Location of short repetitive subsequences (repeat sequences) in (typically nucleotide) sequences.
+
+ The report might include derived data map such as classification, annotation, organisation, periodicity etc.
+ Sequence features (repeats)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A report on predicted or actual gene structure, regions which make an RNA product and features such as promoters, coding regions, splice sites etc.
+
+ Gene and transcript structure (report)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ regions of a nucleic acid sequence containing mobile genetic elements.
+
+
+ Mobile genetic elements
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A report on quadruplex-forming motifs in a nucleotide sequence.
+
+ Nucleic acid features (quadruplexes)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Report on nucleosome formation potential or exclusion sequence(s).
+
+
+ Nucleosome exclusion sequences
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+ A report on exonic splicing enhancers (ESE) in an exon.
+
+
+ Gene features (exonic splicing enhancer)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A report on microRNA sequence (miRNA) or precursor, microRNA targets, miRNA binding sites in an RNA sequence etc.
+
+ Nucleic acid features (microRNA)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ protein-coding regions including coding sequences (CDS), exons, translation initiation sites and open reading frames.
+
+
+ Coding region
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ A report on selenocysteine insertion sequence (SECIS) element in a DNA sequence.
+
+ Gene features (SECIS element)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ transcription factor binding sites (TFBS) in a DNA sequence.
+
+
+ Transcription factor binding sites
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A report on predicted or known key residue positions (sites) in a protein sequence, such as binding or functional sites.
+
+ Use this concept for collections of specific sites which are not necessarily contiguous, rather than contiguous stretches of amino acids.
+ Protein features (sites)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ signal peptides or signal peptide cleavage sites in protein sequences.
+
+
+ Protein features report (signal peptides)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ cleavage sites (for a proteolytic enzyme or agent) in a protein sequence.
+
+
+ Protein features report (cleavage sites)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ post-translation modifications in a protein sequence, typically describing the specific sites involved.
+
+
+ Protein features (post-translation modifications)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ catalytic residues (active site) of an enzyme.
+
+
+ Protein features report (active sites)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ ligand-binding (non-catalytic) residues of a protein, such as sites that bind metal, prosthetic groups or lipids.
+
+
+ Protein features report (binding sites)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+ A report on antigenic determinant sites (epitopes) in proteins, from sequence and / or structural data.
+
+
+ Epitope mapping is commonly done during vaccine design.
+ Protein features (epitopes)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ RNA and DNA-binding proteins and binding sites in protein sequences.
+
+
+ Protein features report (nucleic acid binding sites)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A report on epitopes that bind to MHC class I molecules.
+
+ MHC Class I epitopes report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A report on predicted epitopes that bind to MHC class II molecules.
+
+ MHC Class II epitopes report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+ A report or plot of PEST sites in a protein sequence.
+
+
+ 'PEST' motifs target proteins for proteolytic degradation and reduce the half-lives of proteins dramatically.
+ Protein features (PEST sites)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Scores from a sequence database search (for example a BLAST search).
+
+ Sequence database hits scores list
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Alignments from a sequence database search (for example a BLAST search).
+
+ Sequence database hits alignments list
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A report on the evaluation of the significance of sequence similarity scores from a sequence database search (for example a BLAST search).
+
+ Sequence database hits evaluation data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Alphabet for the motifs (patterns) that MEME will search for.
+
+ MEME motif alphabet
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ MEME background frequencies file.
+
+ MEME background frequencies file
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ File of directives for ordering and spacing of MEME motifs.
+
+ MEME motifs directive file
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Dirichlet distribution used by hidden Markov model analysis programs.
+
+
+ Dirichlet distribution
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+
+ Emission and transition counts of a hidden Markov model, generated once HMM has been determined, for example after residues/gaps have been assigned to match, delete and insert states.
+
+ HMM emission and transition counts
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Regular expression pattern.
+
+
+ Regular expression
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Any specific or conserved pattern (typically expressed as a regular expression) in a molecular sequence.
+
+
+ Sequence motif
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Some type of statistical model representing a (typically multiple) sequence alignment.
+
+
+ Sequence profile
+ http://semanticscience.org/resource/SIO_010531
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An informative report about a specific or conserved protein sequence pattern.
+ InterPro entry
+ Protein domain signature
+ Protein family signature
+ Protein region signature
+ Protein repeat signature
+ Protein site signature
+
+
+ Protein signature
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A nucleotide regular expression pattern from the Prosite database.
+
+ Prosite nucleotide pattern
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A protein regular expression pattern from the Prosite database.
+
+ Prosite protein pattern
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A profile (typically representing a sequence alignment) that is a simple matrix of nucleotide (or amino acid) counts per position.
+ PFM
+
+
+ Position frequency matrix
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A profile (typically representing a sequence alignment) that is weighted matrix of nucleotide (or amino acid) counts per position.
+ PWM
+
+
+ Contributions of individual sequences to the matrix might be uneven (weighted).
+ Position weight matrix
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A profile (typically representing a sequence alignment) derived from a matrix of nucleotide (or amino acid) counts per position that reflects information content at each position.
+ ICM
+
+
+ Information content matrix
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A statistical Markov model of a system which is assumed to be a Markov process with unobserved (hidden) states. For example, a hidden Markov model representation of a set or alignment of sequences.
+ HMM
+
+
+ Hidden Markov model
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ One or more fingerprints (sequence classifiers) as used in the PRINTS database.
+
+
+ Fingerprint
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A protein signature of the type used in the EMBASSY Signature package.
+
+ Domainatrix signature
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ NULL hidden Markov model representation used by the HMMER package.
+
+ HMMER NULL hidden Markov model
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A protein family signature (sequence classifier) from the InterPro database.
+
+ Protein family signatures cover all domains in the matching proteins and span >80% of the protein length and with no adjacent protein domain signatures or protein region signatures.
+ Protein family signature
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A protein domain signature (sequence classifier) from the InterPro database.
+
+ Protein domain signatures identify structural or functional domains or other units with defined boundaries.
+ Protein domain signature
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A protein region signature (sequence classifier) from the InterPro database.
+
+ A protein region signature defines a region which cannot be described as a protein family or domain signature.
+ Protein region signature
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A protein repeat signature (sequence classifier) from the InterPro database.
+
+ A protein repeat signature is a repeated protein motif, that is not in single copy expected to independently fold into a globular domain.
+ Protein repeat signature
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A protein site signature (sequence classifier) from the InterPro database.
+
+ A protein site signature is a classifier for a specific site in a protein.
+ Protein site signature
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ A protein conserved site signature (sequence classifier) from the InterPro database.
+
+ A protein conserved site signature is any short sequence pattern that may contain one or more unique residues and is cannot be described as a active site, binding site or post-translational modification.
+ Protein conserved site signature
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ A protein active site signature (sequence classifier) from the InterPro database.
+
+ A protein active site signature corresponds to an enzyme catalytic pocket. An active site typically includes non-contiguous residues, therefore multiple signatures may be required to describe an active site. ; residues involved in enzymatic reactions for which mutational data is typically available.
+ Protein active site signature
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ A protein binding site signature (sequence classifier) from the InterPro database.
+
+ A protein binding site signature corresponds to a site that reversibly binds chemical compounds, which are not themselves substrates of the enzymatic reaction. This includes enzyme cofactors and residues involved in electron transport or protein structure modification.
+ Protein binding site signature
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ A protein post-translational modification signature (sequence classifier) from the InterPro database.
+
+ A protein post-translational modification signature corresponds to sites that undergo modification of the primary structure, typically to activate or de-activate a function. For example, methylation, sumoylation, glycosylation etc. The modification might be permanent or reversible.
+ Protein post-translational modification signature
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Alignment of exactly two molecular sequences.
+ Sequence alignment (pair)
+
+
+ Pair sequence alignment
+ http://semanticscience.org/resource/SIO_010068
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Alignment of more than two molecular sequences.
+
+ Sequence alignment (multiple)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alignment of multiple nucleotide sequences.
+ Sequence alignment (nucleic acid)
+ DNA sequence alignment
+ RNA sequence alignment
+
+
+ Nucleic acid sequence alignment
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alignment of multiple protein sequences.
+ Sequence alignment (protein)
+
+
+ Protein sequence alignment
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alignment of multiple molecular sequences of different types.
+ Sequence alignment (hybrid)
+
+
+ Hybrid sequence alignments include for example genomic DNA to EST, cDNA or mRNA.
+ Hybrid sequence alignment
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+
+
+ Alignment of exactly two nucleotide sequences.
+
+ Sequence alignment (nucleic acid pair)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+
+
+ Alignment of exactly two protein sequences.
+
+ Sequence alignment (protein pair)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Alignment of exactly two molecular sequences of different types.
+
+ Hybrid sequence alignment (pair)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Alignment of more than two nucleotide sequences.
+
+ Multiple nucleotide sequence alignment
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Alignment of more than two protein sequences.
+
+ Multiple protein sequence alignment
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A simple floating point number defining the penalty for opening or extending a gap in an alignment.
+
+
+ Alignment score or penalty
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Whether end gaps are scored or not.
+
+ Score end gaps control
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Controls the order of sequences in an output sequence alignment.
+
+ Aligned sequence order
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A penalty for opening a gap in an alignment.
+
+
+ Gap opening penalty
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A penalty for extending a gap in an alignment.
+
+
+ Gap extension penalty
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A penalty for gaps that are close together in an alignment.
+
+
+ Gap separation penalty
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+
+ A penalty for gaps at the termini of an alignment, either from the N/C terminal of protein or 5'/3' terminal of nucleotide sequences.
+
+ Terminal gap penalty
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The score for a 'match' used in various sequence database search applications with simple scoring schemes.
+
+
+ Match reward score
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The score (penalty) for a 'mismatch' used in various alignment and sequence database search applications with simple scoring schemes.
+
+
+ Mismatch penalty score
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ This is the threshold drop in score at which extension of word alignment is halted.
+
+
+ Drop off score
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A simple floating point number defining the penalty for opening a gap in an alignment.
+
+ Gap opening penalty (integer)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A simple floating point number defining the penalty for opening a gap in an alignment.
+
+ Gap opening penalty (float)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A simple floating point number defining the penalty for extending a gap in an alignment.
+
+ Gap extension penalty (integer)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A simple floating point number defining the penalty for extending a gap in an alignment.
+
+ Gap extension penalty (float)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A simple floating point number defining the penalty for gaps that are close together in an alignment.
+
+ Gap separation penalty (integer)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A simple floating point number defining the penalty for gaps that are close together in an alignment.
+
+ Gap separation penalty (float)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A number defining the penalty for opening gaps at the termini of an alignment, either from the N/C terminal of protein or 5'/3' terminal of nucleotide sequences.
+
+
+ Terminal gap opening penalty
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A number defining the penalty for extending gaps at the termini of an alignment, either from the N/C terminal of protein or 5'/3' terminal of nucleotide sequences.
+
+
+ Terminal gap extension penalty
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Sequence identity is the number (%) of matches (identical characters) in positions from an alignment of two molecular sequences.
+
+
+ Sequence identity
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Sequence similarity is the similarity (expressed as a percentage) of two molecular sequences calculated from their alignment, a scoring matrix for scoring characters substitutions and penalties for gap insertion and extension.
+
+
+ Data Type is float probably.
+ Sequence similarity
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Data on molecular sequence alignment quality (estimated accuracy).
+
+ Sequence alignment metadata (quality report)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ Data on character conservation in a molecular sequence alignment.
+
+ This is a broad data type and is used a placeholder for other, more specific types. It is primarily intended to help navigation of EDAM and would not typically be used for annotation. Use this concept for calculated substitution rates, relative site variability, data on sites with biased properties, highly conserved or very poorly conserved sites, regions, blocks etc.
+ Sequence alignment report (site conservation)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ Data on correlations between sites in a molecular sequence alignment, typically to identify possible covarying positions and predict contacts or structural constraints in protein structures.
+
+ Sequence alignment report (site correlation)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Alignment of molecular sequences to a Domainatrix signature (representing a sequence alignment).
+
+ Sequence-profile alignment (Domainatrix signature)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Alignment of molecular sequence(s) to a hidden Markov model(s).
+
+ Sequence-profile alignment (HMM)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Alignment of molecular sequences to a protein fingerprint from the PRINTS database.
+
+ Sequence-profile alignment (fingerprint)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Continuous quantitative data that may be read during phylogenetic tree calculation.
+ Phylogenetic continuous quantitative characters
+ Quantitative traits
+
+
+ Phylogenetic continuous quantitative data
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Character data with discrete states that may be read during phylogenetic tree calculation.
+ Discrete characters
+ Discretely coded characters
+ Phylogenetic discrete states
+
+
+ Phylogenetic discrete data
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ One or more cliques of mutually compatible characters that are generated, for example from analysis of discrete character data, and are used to generate a phylogeny.
+ Phylogenetic report (cliques)
+
+
+ Phylogenetic character cliques
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Phylogenetic invariants data for testing alternative tree topologies.
+ Phylogenetic report (invariants)
+
+
+ Phylogenetic invariants
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A report of data concerning or derived from a phylogenetic tree, or from comparing two or more phylogenetic trees.
+
+ This is a broad data type and is used for example for reports on confidence, shape or stratigraphic (age) data derived from phylogenetic tree analysis.
+ Phylogenetic report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A model of DNA substitution that explains a DNA sequence alignment, derived from phylogenetic tree analysis.
+ Phylogenetic tree report (DNA substitution model)
+ Sequence alignment report (DNA substitution model)
+ Substitution model
+
+
+ DNA substitution model
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ Data about the shape of a phylogenetic tree.
+
+ Phylogenetic tree report (tree shape)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ Data on the confidence of a phylogenetic tree.
+
+ Phylogenetic tree report (tree evaluation)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Distances, such as Branch Score distance, between two or more phylogenetic trees.
+ Phylogenetic tree report (tree distances)
+
+
+ Phylogenetic tree distances
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ Molecular clock and stratigraphic (age) data derived from phylogenetic tree analysis.
+
+ Phylogenetic tree report (tree stratigraphic)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Independent contrasts for characters used in a phylogenetic tree, or covariances, regressions and correlations between characters for those contrasts.
+ Phylogenetic report (character contrasts)
+
+
+ Phylogenetic character contrasts
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Matrix of integer numbers for sequence comparison.
+
+ Comparison matrix (integers)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Matrix of floating point numbers for sequence comparison.
+
+ Comparison matrix (floats)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Matrix of integer or floating point numbers for nucleotide comparison.
+ Nucleotide comparison matrix
+ Nucleotide substitution matrix
+
+
+ Comparison matrix (nucleotide)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Matrix of integer or floating point numbers for amino acid comparison.
+ Amino acid comparison matrix
+ Amino acid substitution matrix
+
+
+ Comparison matrix (amino acid)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Matrix of integer numbers for nucleotide comparison.
+
+ Nucleotide comparison matrix (integers)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Matrix of floating point numbers for nucleotide comparison.
+
+ Nucleotide comparison matrix (floats)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Matrix of integer numbers for amino acid comparison.
+
+ Amino acid comparison matrix (integers)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Matrix of floating point numbers for amino acid comparison.
+
+ Amino acid comparison matrix (floats)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 3D coordinate and associated data for a nucleic acid tertiary (3D) structure.
+
+
+ Nucleic acid structure
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 3D coordinate and associated data for a protein tertiary (3D) structure, or part of a structure, possibly in complex with other molecules.
+ Protein structures
+
+
+ Protein structure
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The structure of a protein in complex with a ligand, typically a small molecule such as an enzyme substrate or cofactor, but possibly another macromolecule.
+
+
+ This includes interactions of proteins with atoms, ions and small molecules or macromolecules such as nucleic acids or other polypeptides. For stable inter-polypeptide interactions use 'Protein complex' instead.
+ Protein-ligand complex
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 3D coordinate and associated data for a carbohydrate (3D) structure.
+
+
+ Carbohydrate structure
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 3D coordinate and associated data for the (3D) structure of a small molecule, such as any common chemical compound.
+ CHEBI:23367
+
+
+ Small molecule structure
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 3D coordinate and associated data for a DNA tertiary (3D) structure.
+
+
+ DNA structure
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 3D coordinate and associated data for an RNA tertiary (3D) structure.
+
+
+ RNA structure
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 3D coordinate and associated data for a tRNA tertiary (3D) structure, including tmRNA, snoRNAs etc.
+
+
+ tRNA structure
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 3D coordinate and associated data for the tertiary (3D) structure of a polypeptide chain.
+
+
+ Protein chain
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 3D coordinate and associated data for the tertiary (3D) structure of a protein domain.
+
+
+ Protein domain
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ 3D coordinate and associated data for a protein tertiary (3D) structure (all atoms).
+
+ Protein structure (all atoms)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 3D coordinate and associated data for a protein tertiary (3D) structure (typically C-alpha atoms only).
+ Protein structure (C-alpha atoms)
+
+
+ C-beta atoms from amino acid side-chains may be included.
+ C-alpha trace
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ 3D coordinate and associated data for a polypeptide chain tertiary (3D) structure (all atoms).
+
+ Protein chain (all atoms)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ 3D coordinate and associated data for a polypeptide chain tertiary (3D) structure (typically C-alpha atoms only).
+
+ C-beta atoms from amino acid side-chains may be included.
+ Protein chain (C-alpha atoms)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ 3D coordinate and associated data for a protein domain tertiary (3D) structure (all atoms).
+
+ Protein domain (all atoms)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ 3D coordinate and associated data for a protein domain tertiary (3D) structure (typically C-alpha atoms only).
+
+ C-beta atoms from amino acid side-chains may be included.
+ Protein domain (C-alpha atoms)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alignment (superimposition) of exactly two molecular tertiary (3D) structures.
+ Pair structure alignment
+
+
+ Structure alignment (pair)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Alignment (superimposition) of more than two molecular tertiary (3D) structures.
+
+ Structure alignment (multiple)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alignment (superimposition) of protein tertiary (3D) structures.
+ Structure alignment (protein)
+
+
+ Protein structure alignment
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alignment (superimposition) of nucleic acid tertiary (3D) structures.
+ Structure alignment (nucleic acid)
+
+
+ Nucleic acid structure alignment
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+
+
+ Alignment (superimposition) of exactly two protein tertiary (3D) structures.
+
+ Structure alignment (protein pair)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Alignment (superimposition) of more than two protein tertiary (3D) structures.
+
+ Multiple protein tertiary structure alignment
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Alignment (superimposition) of protein tertiary (3D) structures (all atoms considered).
+
+ Structure alignment (protein all atoms)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Alignment (superimposition) of protein tertiary (3D) structures (typically C-alpha atoms only considered).
+
+ C-beta atoms from amino acid side-chains may be considered.
+ Structure alignment (protein C-alpha atoms)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Alignment (superimposition) of exactly two protein tertiary (3D) structures (all atoms considered).
+
+ Pairwise protein tertiary structure alignment (all atoms)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Alignment (superimposition) of exactly two protein tertiary (3D) structures (typically C-alpha atoms only considered).
+
+ C-beta atoms from amino acid side-chains may be included.
+ Pairwise protein tertiary structure alignment (C-alpha atoms)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Alignment (superimposition) of exactly two protein tertiary (3D) structures (all atoms considered).
+
+ Multiple protein tertiary structure alignment (all atoms)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Alignment (superimposition) of exactly two protein tertiary (3D) structures (typically C-alpha atoms only considered).
+
+ C-beta atoms from amino acid side-chains may be included.
+ Multiple protein tertiary structure alignment (C-alpha atoms)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+
+
+ Alignment (superimposition) of exactly two nucleic acid tertiary (3D) structures.
+
+ Structure alignment (nucleic acid pair)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Alignment (superimposition) of more than two nucleic acid tertiary (3D) structures.
+
+ Multiple nucleic acid tertiary structure alignment
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alignment (superimposition) of RNA tertiary (3D) structures.
+ Structure alignment (RNA)
+
+
+ RNA structure alignment
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Matrix to transform (rotate/translate) 3D coordinates, typically the transformation necessary to superimpose two molecular structures.
+
+
+ Structural transformation matrix
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ DaliLite hit table of protein chain tertiary structure alignment data.
+
+ The significant and top-scoring hits for regions of the compared structures is shown. Data such as Z-Scores, number of aligned residues, root-mean-square deviation (RMSD) of atoms and sequence identity are given.
+ DaliLite hit table
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A score reflecting structural similarities of two molecules.
+
+ Molecular similarity score
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Root-mean-square deviation (RMSD) is calculated to measure the average distance between superimposed macromolecular coordinates.
+ RMSD
+
+
+ Root-mean-square deviation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A measure of the similarity between two ligand fingerprints.
+
+
+ A ligand fingerprint is derived from ligand structural data from a Protein DataBank file. It reflects the elements or groups present or absent, covalent bonds and bond orders and the bonded environment in terms of SATIS codes and BLEEP atom types.
+ Tanimoto similarity score
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A matrix of 3D-1D scores reflecting the probability of amino acids to occur in different tertiary structural environments.
+
+
+ 3D-1D scoring matrix
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A table of 20 numerical values which quantify a property (e.g. physicochemical or biochemical) of the common amino acids.
+
+
+ Amino acid index
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Chemical classification (small, aliphatic, aromatic, polar, charged etc) of amino acids.
+ Chemical classes (amino acids)
+
+
+ Amino acid index (chemical classes)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Statistical protein contact potentials.
+ Contact potentials (amino acid pair-wise)
+
+
+ Amino acid pair-wise contact potentials
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Molecular weights of amino acids.
+ Molecular weight (amino acids)
+
+
+ Amino acid index (molecular weight)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Hydrophobic, hydrophilic or charge properties of amino acids.
+ Hydropathy (amino acids)
+
+
+ Amino acid index (hydropathy)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Experimental free energy values for the water-interface and water-octanol transitions for the amino acids.
+ White-Wimley data (amino acids)
+
+
+ Amino acid index (White-Wimley data)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Van der Waals radii of atoms for different amino acid residues.
+ van der Waals radii (amino acids)
+
+
+ Amino acid index (van der Waals radii)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ An informative report on a specific enzyme.
+
+ Enzyme report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ An informative report on a specific restriction enzyme such as enzyme reference data.
+
+ This might include name of enzyme, organism, isoschizomers, methylation, source, suppliers, literature references, or data on restriction enzyme patterns such as name of enzyme, recognition site, length of pattern, number of cuts made by enzyme, details of blunt or sticky end cut etc.
+ Restriction enzyme report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ List of molecular weight(s) of one or more proteins or peptides, for example cut by proteolytic enzymes or reagents.
+
+
+ The report might include associated data such as frequency of peptide fragment molecular weights.
+ Peptide molecular weights
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Report on the hydrophobic moment of a polypeptide sequence.
+
+
+ Hydrophobic moment is a peptides hydrophobicity measured for different angles of rotation.
+ Peptide hydrophobic moment
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The aliphatic index of a protein.
+
+
+ The aliphatic index is the relative protein volume occupied by aliphatic side chains.
+ Protein aliphatic index
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A protein sequence with annotation on hydrophobic or hydrophilic / charged regions, hydrophobicity plot etc.
+
+
+ Hydrophobic moment is a peptides hydrophobicity measured for different angles of rotation.
+ Protein sequence hydropathy plot
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A plot of the mean charge of the amino acids within a window of specified length as the window is moved along a protein sequence.
+
+
+ Protein charge plot
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The solubility or atomic solvation energy of a protein sequence or structure.
+ Protein solubility data
+
+
+ Protein solubility
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data on the crystallizability of a protein sequence.
+ Protein crystallizability data
+
+
+ Protein crystallizability
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data on the stability, intrinsic disorder or globularity of a protein sequence.
+ Protein globularity data
+
+
+ Protein globularity
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The titration curve of a protein.
+
+
+ Protein titration curve
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The isoelectric point of one proteins.
+
+
+ Protein isoelectric point
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The pKa value of a protein.
+
+
+ Protein pKa value
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The hydrogen exchange rate of a protein.
+
+
+ Protein hydrogen exchange rate
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The extinction coefficient of a protein.
+
+
+ Protein extinction coefficient
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The optical density of a protein.
+
+
+ Protein optical density
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ An informative report on protein subcellular localisation (nuclear, cytoplasmic, mitochondrial, chloroplast, plastid, membrane etc) or destination (exported / extracellular proteins).
+
+ Protein subcellular localisation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An report on allergenicity / immunogenicity of peptides and proteins.
+ Peptide immunogenicity
+ Peptide immunogenicity report
+
+
+ This includes data on peptide ligands that elicit an immune response (immunogens), allergic cross-reactivity, predicted antigenicity (Hopp and Woods plot) etc. These data are useful in the development of peptide-specific antibodies or multi-epitope vaccines. Methods might use sequence data (for example motifs) and / or structural data.
+ Peptide immunogenicity data
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ A report on the immunogenicity of MHC class I or class II binding peptides.
+
+ MHC peptide immunogenicity report
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A human-readable collection of information about one or more specific protein 3D structure(s) or structural domains.
+ Protein property (structural)
+ Protein report (structure)
+ Protein structural property
+ Protein structure report (domain)
+ Protein structure-derived report
+
+
+ Protein structure report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Report on the quality of a protein three-dimensional model.
+ Protein property (structural quality)
+ Protein report (structural quality)
+ Protein structure report (quality evaluation)
+ Protein structure validation report
+
+
+ Model validation might involve checks for atomic packing, steric clashes, agreement with electron density maps etc.
+ Protein structural quality report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Data on inter-atomic or inter-residue contacts, distances and interactions in protein structure(s) or on the interactions of protein atoms or residues with non-protein groups.
+
+
+ Protein non-covalent interactions report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ Informative report on flexibility or motion of a protein structure.
+
+ This is a broad data type and is used a placeholder for other, more specific types. It is primarily intended to help navigation of EDAM and would not typically be used for annotation.
+ Protein flexibility or motion report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data on the solvent accessible or buried surface area of a protein structure.
+
+
+ This concept covers definitions of the protein surface, interior and interfaces, accessible and buried residues, surface accessible pockets, interior inaccessible cavities etc.
+ Protein solvent accessibility
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ Data on the surface properties (shape, hydropathy, electrostatic patches etc) of a protein structure.
+
+ This is a broad data type and is used a placeholder for other, more specific types. It is primarily intended to help navigation of EDAM and would not typically be used for annotation.
+ Protein surface report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Phi/psi angle data or a Ramachandran plot of a protein structure.
+
+
+ Ramachandran plot
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data on the net charge distribution (dipole moment) of a protein structure.
+
+
+ Protein dipole moment
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A matrix of distances between amino acid residues (for example the C-alpha atoms) in a protein structure.
+
+
+ Protein distance matrix
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An amino acid residue contact map for a protein structure.
+
+
+ Protein contact map
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Report on clusters of contacting residues in protein structures such as a key structural residue network.
+
+
+ Protein residue 3D cluster
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Patterns of hydrogen bonding in protein structures.
+
+
+ Protein hydrogen bonds
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ Non-canonical atomic interactions in protein structures.
+
+ Protein non-canonical interactions
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Information on a node from the CATH database.
+
+ The report (for example http://www.cathdb.info/cathnode/1.10.10.10) includes CATH code (of the node and upper levels in the hierarchy), classification text (of appropriate levels in hierarchy), list of child nodes, representative domain and other relevant data and links.
+ CATH node
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Information on a node from the SCOP database.
+
+ SCOP node
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+ An EMBASSY domain classification file (DCF) of classification and other data for domains from SCOP or CATH, in EMBL-like format.
+
+
+ EMBASSY domain classification
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Information on a protein 'class' node from the CATH database.
+
+ CATH class
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Information on a protein 'architecture' node from the CATH database.
+
+ CATH architecture
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Information on a protein 'topology' node from the CATH database.
+
+ CATH topology
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Information on a protein 'homologous superfamily' node from the CATH database.
+
+ CATH homologous superfamily
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Information on a protein 'structurally similar group' node from the CATH database.
+
+ CATH structurally similar group
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Information on a protein 'functional category' node from the CATH database.
+
+ CATH functional category
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A report on known protein structural domains or folds that are recognised (identified) in protein sequence(s).
+
+ Methods use some type of mapping between sequence and fold, for example secondary structure prediction and alignment, profile comparison, sequence properties, homologous sequence search, kernel machines etc. Domains and folds might be taken from SCOP or CATH.
+ Protein fold recognition report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ protein-protein interaction(s), including interactions between protein domains.
+
+
+ Protein-protein interaction report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An informative report on protein-ligand (small molecule) interaction(s).
+ Protein-drug interaction report
+
+
+ Protein-ligand interaction report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ protein-DNA/RNA interaction(s).
+
+
+ Protein-nucleic acid interactions report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data on the dissociation characteristics of a double-stranded nucleic acid molecule (DNA or a DNA/RNA hybrid) during heating.
+ Nucleic acid stability profile
+ Melting map
+ Nucleic acid melting curve
+
+
+ A melting (stability) profile calculated the free energy required to unwind and separate the nucleic acid strands, plotted for sliding windows over a sequence.
+ Nucleic acid melting curve: a melting curve of a double-stranded nucleic acid molecule (DNA or DNA/RNA). Shows the proportion of nucleic acid which are double-stranded versus temperature.
+ Nucleic acid probability profile: a probability profile of a double-stranded nucleic acid molecule (DNA or DNA/RNA). Shows the probability of a base pair not being melted (i.e. remaining as double-stranded DNA) at a specified temperature
+ Nucleic acid stitch profile: stitch profile of hybridised or double stranded nucleic acid (DNA or RNA/DNA). A stitch profile diagram shows partly melted DNA conformations (with probabilities) at a range of temperatures. For example, a stitch profile might show possible loop openings with their location, size, probability and fluctuations at a given temperature.
+ Nucleic acid temperature profile: a temperature profile of a double-stranded nucleic acid molecule (DNA or DNA/RNA). Plots melting temperature versus base position.
+ Nucleic acid melting profile
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Enthalpy of hybridised or double stranded nucleic acid (DNA or RNA/DNA).
+
+
+ Nucleic acid enthalpy
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Entropy of hybridised or double stranded nucleic acid (DNA or RNA/DNA).
+
+
+ Nucleic acid entropy
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Melting temperature of hybridised or double stranded nucleic acid (DNA or RNA/DNA).
+
+ Nucleic acid melting temperature
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.21
+
+ Stitch profile of hybridised or double stranded nucleic acid (DNA or RNA/DNA).
+
+
+ Nucleic acid stitch profile
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ DNA base pair stacking energies data.
+
+
+ DNA base pair stacking energies data
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ DNA base pair twist angle data.
+
+
+ DNA base pair twist angle data
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ DNA base trimer roll angles data.
+
+
+ DNA base trimer roll angles data
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ RNA parameters used by the Vienna package.
+
+ Vienna RNA parameters
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Structure constraints used by the Vienna package.
+
+ Vienna RNA structure constraints
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ RNA concentration data used by the Vienna package.
+
+ Vienna RNA concentration data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ RNA calculated energy data generated by the Vienna package.
+
+ Vienna RNA calculated energy
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Dotplot of RNA base pairing probability matrix.
+
+
+ Such as generated by the Vienna package.
+ Base pairing probability matrix dotplot
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A human-readable collection of information about RNA/DNA folding, minimum folding energies for DNA or RNA sequences, energy landscape of RNA mutants etc.
+ Nucleic acid report (folding model)
+ Nucleic acid report (folding)
+ RNA secondary structure folding classification
+ RNA secondary structure folding probabilities
+
+
+ Nucleic acid folding report
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Table of codon usage data calculated from one or more nucleic acid sequences.
+
+
+ A codon usage table might include the codon usage table name, optional comments and a table with columns for codons and corresponding codon usage data. A genetic code can be extracted from or represented by a codon usage table.
+ Codon usage table
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A genetic code for an organism.
+
+
+ A genetic code need not include detailed codon usage information.
+ Genetic code
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A simple measure of synonymous codon usage bias often used to predict gene expression levels.
+
+ Codon adaptation index
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A plot of the synonymous codon usage calculated for windows over a nucleotide sequence.
+ Synonymous codon usage statistic plot
+
+
+ Codon usage bias plot
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The effective number of codons used in a gene sequence. This reflects how far codon usage of a gene departs from equal usage of synonymous codons.
+
+ Nc statistic
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The differences in codon usage fractions between two codon usage tables.
+
+
+ Codon usage fraction difference
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A human-readable collection of information about the influence of genotype on drug response.
+
+
+ The report might correlate gene expression or single-nucleotide polymorphisms with drug efficacy or toxicity.
+ Pharmacogenomic test report
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A human-readable collection of information about a specific disease.
+
+
+ For example, an informative report on a specific tumor including nature and origin of the sample, anatomic site, organ or tissue, tumor type, including morphology and/or histologic type, and so on.
+ Disease report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ A report on linkage disequilibrium; the non-random association of alleles or polymorphisms at two or more loci (not necessarily on the same chromosome).
+
+
+ Linkage disequilibrium (report)
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A graphical 2D tabular representation of expression data, typically derived from an omics experiment. A heat map is a table where rows and columns correspond to different features and contexts (for example, cells or samples) and the cell colour represents the level of expression of a gene that context.
+
+
+ Heat map
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Affymetrix library file of information about which probes belong to which probe set.
+
+ Affymetrix probe sets library file
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Affymetrix library file of information about the probe sets such as the gene name with which the probe set is associated.
+ GIN file
+
+ Affymetrix probe sets information library file
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Standard protonated molecular masses from trypsin (modified porcine trypsin, Promega) and keratin peptides, used in EMBOSS.
+
+
+ Molecular weights standard fingerprint
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ A report typically including a map (diagram) of a metabolic pathway.
+
+
+ This includes carbohydrate, energy, lipid, nucleotide, amino acid, glycan, PK/NRP, cofactor/vitamin, secondary metabolite, xenobiotics etc.
+ Metabolic pathway report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ genetic information processing pathways.
+
+
+ Genetic information processing pathway report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ environmental information processing pathways.
+
+
+ Environmental information processing pathway report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ A report typically including a map (diagram) of a signal transduction pathway.
+
+
+ Signal transduction pathway report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Topic concernning cellular process pathways.
+
+
+ Cellular process pathways report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ disease pathways, typically of human disease.
+
+
+ Disease pathway or network report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.21
+
+ A report typically including a map (diagram) of drug structure relationships.
+
+
+ Drug structure relationship map
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+
+ networks of protein interactions.
+
+ Protein interaction networks
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ An entry (data type) from the Minimal Information Requested in the Annotation of Biochemical Models (MIRIAM) database of data resources.
+
+ A MIRIAM entry describes a MIRIAM data type including the official name, synonyms, root URI, identifier pattern (regular expression applied to a unique identifier of the data type) and documentation. Each data type can be associated with several resources. Each resource is a physical location of a service (typically a database) providing information on the elements of a data type. Several resources may exist for each data type, provided the same (mirrors) or different information. MIRIAM provides a stable and persistent reference to its data types.
+ MIRIAM datatype
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A simple floating point number defining the lower or upper limit of an expectation value (E-value).
+ Expectation value
+
+
+ An expectation value (E-Value) is the expected number of observations which are at least as extreme as observations expected to occur by random chance. The E-value describes the number of hits with a given score or better that are expected to occur at random when searching a database of a particular size. It decreases exponentially with the score (S) of a hit. A low E value indicates a more significant score.
+ E-value
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The z-value is the number of standard deviations a data value is above or below a mean value.
+
+
+ A z-value might be specified as a threshold for reporting hits from database searches.
+ Z-value
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The P-value is the probability of obtaining by random chance a result that is at least as extreme as an observed result, assuming a NULL hypothesis is true.
+
+
+ A z-value might be specified as a threshold for reporting hits from database searches.
+ P-value
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Information on a database (or ontology) version, for example name, version number and release date.
+
+ Database version information
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Information on an application version, for example name, version number and release date.
+
+ Tool version information
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Information on a version of the CATH database.
+
+ CATH version information
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Cross-mapping of Swiss-Prot codes to PDB identifiers.
+
+ Swiss-Prot to PDB mapping
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Cross-references from a sequence record to other databases.
+
+ Sequence database cross-references
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Metadata on the status of a submitted job.
+
+ Values for EBI services are 'DONE' (job has finished and the results can then be retrieved), 'ERROR' (the job failed or no results where found), 'NOT_FOUND' (the job id is no longer available; job results might be deleted, 'PENDING' (the job is in a queue waiting processing), 'RUNNING' (the job is currently being processed).
+ Job status
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.0
+
+
+ The (typically numeric) unique identifier of a submitted job.
+
+ Job ID
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A label (text token) describing the type of job, for example interactive or non-interactive.
+
+ Job type
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A report of tool-specific metadata on some analysis or process performed, for example a log of diagnostic or error messages.
+
+ Tool log
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ DaliLite log file describing all the steps taken by a DaliLite alignment of two protein structures.
+
+ DaliLite log file
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ STRIDE log file.
+
+ STRIDE log file
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ NACCESS log file.
+
+ NACCESS log file
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ EMBOSS wordfinder log file.
+
+ EMBOSS wordfinder log file
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ EMBOSS (EMBASSY) domainatrix application log file.
+
+ EMBOSS domainatrix log file
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ EMBOSS (EMBASSY) sites application log file.
+
+ EMBOSS sites log file
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ EMBOSS (EMBASSY) supermatcher error file.
+
+ EMBOSS supermatcher error file
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ EMBOSS megamerger log file.
+
+ EMBOSS megamerger log file
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ EMBOSS megamerger log file.
+
+ EMBOSS whichdb log file
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ EMBOSS vectorstrip log file.
+
+ EMBOSS vectorstrip log file
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A username on a computer system or a website.
+
+
+
+ Username
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A password on a computer system, or a website.
+
+
+
+ Password
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby:Email
+ Moby:EmailAddress
+ A valid email address of an end-user.
+
+
+
+ Email address
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a person.
+
+
+
+ Person name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Number of iterations of an algorithm.
+
+ Number of iterations
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Number of entities (for example database hits, sequences, alignments etc) to write to an output file.
+
+ Number of output entities
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Controls the order of hits (reported matches) in an output file from a database search.
+
+ Hit sort order
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A human-readable collection of information about a specific drug.
+ Drug annotation
+ Drug structure relationship map
+
+
+ A drug structure relationship map is report (typically a map diagram) of drug structure relationships.
+ Drug report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An image (for viewing or printing) of a phylogenetic tree including (typically) a plot of rooted or unrooted phylogenies, cladograms, circular trees or phenograms and associated information.
+
+
+ See also 'Phylogenetic tree'
+ Phylogenetic tree image
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Image of RNA secondary structure, knots, pseudoknots etc.
+
+
+ RNA secondary structure image
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Image of protein secondary structure.
+
+
+ Protein secondary structure image
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Image of one or more molecular tertiary (3D) structures.
+
+
+ Structure image
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Image of two or more aligned molecular sequences possibly annotated with alignment features.
+
+
+ Sequence alignment image
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An image of the structure of a small chemical compound.
+ Small molecule structure image
+ Chemical structure sketch
+ Small molecule sketch
+
+
+ The molecular identifier and formula are typically included.
+ Chemical structure image
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A fate map is a plan of early stage of an embryo such as a blastula, showing areas that are significance to development.
+
+
+ Fate map
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An image of spots from a microarray experiment.
+
+
+ Microarray spots image
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A term from the BioPax ontology.
+
+ BioPax term
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A term definition from The Gene Ontology (GO).
+
+ GO
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A term from the MeSH vocabulary.
+
+ MeSH
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A term from the HGNC controlled vocabulary.
+
+ HGNC
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A term from the NCBI taxonomy vocabulary.
+
+ NCBI taxonomy vocabulary
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A term from the Plant Ontology (PO).
+
+ Plant ontology term
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A term from the UMLS vocabulary.
+
+ UMLS
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A term from Foundational Model of Anatomy.
+
+ Classifies anatomical entities according to their shared characteristics (genus) and distinguishing characteristics (differentia). Specifies the part-whole and spatial relationships of the entities, morphological transformation of the entities during prenatal development and the postnatal life cycle and principles, rules and definitions according to which classes and relationships in the other three components of FMA are represented.
+ FMA
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A term from the EMAP mouse ontology.
+
+ EMAP
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A term from the ChEBI ontology.
+
+ ChEBI
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A term from the MGED ontology.
+
+ MGED
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A term from the myGrid ontology.
+
+ The ontology is provided as two components, the service ontology and the domain ontology. The domain ontology acts provides concepts for core bioinformatics data types and their relations. The service ontology describes the physical and operational features of web services.
+ myGrid
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A term definition for a biological process from the Gene Ontology (GO).
+
+ Data Type is an enumerated string.
+ GO (biological process)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A term definition for a molecular function from the Gene Ontology (GO).
+
+ Data Type is an enumerated string.
+ GO (molecular function)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A term definition for a cellular component from the Gene Ontology (GO).
+
+ Data Type is an enumerated string.
+ GO (cellular component)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A relation type defined in an ontology.
+
+ Ontology relation type
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The definition of a concept from an ontology.
+ Ontology class definition
+
+
+ Ontology concept definition
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ A comment on a concept from an ontology.
+
+ Ontology concept comment
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Reference for a concept from an ontology.
+
+ Ontology concept reference
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Information on a published article provided by the doc2loc program.
+
+ The doc2loc output includes the url, format, type and availability code of a document for every service provider.
+ doc2loc document information
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PDBML:PDB_residue_no
+ WHATIF: pdb_number
+ A residue identifier (a string) from a PDB file.
+
+
+ PDB residue number
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Cartesian coordinate of an atom (in a molecular structure).
+ Cartesian coordinate
+
+
+ Atomic coordinate
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.21
+
+ Cartesian x coordinate of an atom (in a molecular structure).
+
+
+ Atomic x coordinate
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.21
+
+ Cartesian y coordinate of an atom (in a molecular structure).
+
+
+ Atomic y coordinate
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.21
+
+ Cartesian z coordinate of an atom (in a molecular structure).
+
+
+ Atomic z coordinate
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PDBML:pdbx_PDB_atom_name
+ WHATIF: PDBx_auth_atom_id
+ WHATIF: PDBx_type_symbol
+ WHATIF: alternate_atom
+ WHATIF: atom_type
+ Identifier (a string) of a specific atom from a PDB file for a molecular structure.
+
+
+
+ PDB atom name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data on a single atom from a protein structure.
+ Atom data
+ CHEBI:33250
+
+
+ This is a broad data type and is used a placeholder for other, more specific types. It is primarily intended to help navigation of EDAM and would not typically be used for annotation.
+ Protein atom
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data on a single amino acid residue position in a protein structure.
+ Residue
+
+
+ This is a broad data type and is used a placeholder for other, more specific types. It is primarily intended to help navigation of EDAM and would not typically be used for annotation.
+ Protein residue
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Name of an atom.
+
+
+
+ Atom name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ WHATIF: type
+ Three-letter amino acid residue names as used in PDB files.
+
+
+
+ PDB residue name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PDBML:pdbx_PDB_model_num
+ WHATIF: model_number
+ Identifier of a model structure from a PDB file.
+ Model number
+
+
+
+ PDB model number
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Summary of domain classification information for a CATH domain.
+
+ The report (for example http://www.cathdb.info/domain/1cukA01) includes CATH codes for levels in the hierarchy for the domain, level descriptions and relevant data and links.
+ CATH domain report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ FASTA sequence database (based on ATOM records in PDB) for CATH domains (clustered at different levels of sequence identity).
+
+ CATH representative domain sequences (ATOM)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ FASTA sequence database (based on COMBS sequence data) for CATH domains (clustered at different levels of sequence identity).
+
+ CATH representative domain sequences (COMBS)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ FASTA sequence database for all CATH domains (based on PDB ATOM records).
+
+ CATH domain sequences (ATOM)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ FASTA sequence database for all CATH domains (based on COMBS sequence data).
+
+ CATH domain sequences (COMBS)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Information on an molecular sequence version.
+ Sequence version information
+
+
+ Sequence version
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A numerical value, that is some type of scored value arising for example from a prediction method.
+
+
+ Score
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Report on general functional properties of specific protein(s).
+
+ For properties that can be mapped to a sequence, use 'Sequence report' instead.
+ Protein report (function)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Name of a gene from Aspergillus Genome Database.
+
+ Gene name (ASPGD)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Name of a gene from Candida Genome Database.
+
+ Gene name (CGD)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Name of a gene from dictyBase database.
+
+ Gene name (dictyBase)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Primary name of a gene from EcoGene Database.
+
+ Gene name (EcoGene primary)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Name of a gene from MaizeGDB (maize genes) database.
+
+ Gene name (MaizeGDB)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Name of a gene from Saccharomyces Genome Database.
+
+ Gene name (SGD)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Name of a gene from Tetrahymena Genome Database.
+
+ Gene name (TGD)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Symbol of a gene from E.coli Genetic Stock Center.
+
+ Gene name (CGSC)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Symbol of a gene approved by the HUGO Gene Nomenclature Committee.
+
+ Gene name (HGNC)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Symbol of a gene from the Mouse Genome Database.
+
+ Gene name (MGD)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Symbol of a gene from Bacillus subtilis Genome Sequence Project.
+
+ Gene name (Bacillus subtilis)
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ http://www.geneontology.org/doc/GO.xrf_abbs: ApiDB_PlasmoDB
+ Identifier of a gene from PlasmoDB Plasmodium Genome Resource.
+
+
+
+ Gene ID (PlasmoDB)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a gene from EcoGene Database.
+ EcoGene Accession
+ EcoGene ID
+
+
+
+ Gene ID (EcoGene)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ http://www.geneontology.org/doc/GO.xrf_abbs: FB
+ http://www.geneontology.org/doc/GO.xrf_abbs: FlyBase
+ Gene identifier from FlyBase database.
+
+
+
+ Gene ID (FlyBase)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Gene identifier from Glossina morsitans GeneDB database.
+
+ Gene ID (GeneDB Glossina morsitans)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Gene identifier from Leishmania major GeneDB database.
+
+ Gene ID (GeneDB Leishmania major)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ http://www.geneontology.org/doc/GO.xrf_abbs: GeneDB_Pfalciparum
+ Gene identifier from Plasmodium falciparum GeneDB database.
+
+ Gene ID (GeneDB Plasmodium falciparum)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ http://www.geneontology.org/doc/GO.xrf_abbs: GeneDB_Spombe
+ Gene identifier from Schizosaccharomyces pombe GeneDB database.
+
+ Gene ID (GeneDB Schizosaccharomyces pombe)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ http://www.geneontology.org/doc/GO.xrf_abbs: GeneDB_Tbrucei
+ Gene identifier from Trypanosoma brucei GeneDB database.
+
+ Gene ID (GeneDB Trypanosoma brucei)
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ http://www.geneontology.org/doc/GO.xrf_abbs: GR_GENE
+ http://www.geneontology.org/doc/GO.xrf_abbs: GR_gene
+ Gene identifier from Gramene database.
+
+
+
+ Gene ID (Gramene)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ http://www.geneontology.org/doc/GO.xrf_abbs: PAMGO_VMD
+ http://www.geneontology.org/doc/GO.xrf_abbs: VMD
+ Gene identifier from Virginia Bioinformatics Institute microbial database.
+
+
+
+ Gene ID (Virginia microbial)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ http://www.geneontology.org/doc/GO.xrf_abbs: SGN
+ Gene identifier from Sol Genomics Network.
+
+
+
+ Gene ID (SGN)
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ WBGene[0-9]{8}
+ http://www.geneontology.org/doc/GO.xrf_abbs: WB
+ http://www.geneontology.org/doc/GO.xrf_abbs: WormBase
+ Gene identifier used by WormBase database.
+
+
+
+ Gene ID (WormBase)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Any name (other than the recommended one) for a gene.
+
+ Gene synonym
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of an open reading frame attributed by a sequencing project.
+
+
+
+ ORF name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A component of a larger sequence assembly.
+
+ Sequence assembly component
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A report on a chromosome aberration such as abnormalities in chromosome structure.
+
+ Chromosome annotation (aberration)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An identifier of a clone (cloned molecular sequence) from a database.
+
+
+
+ Clone ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PDBML:pdbx_PDB_ins_code
+ WHATIF: insertion_code
+ An insertion code (part of the residue number) for an amino acid residue from a PDB file.
+
+
+ PDB insertion code
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ WHATIF: PDBx_occupancy
+ The fraction of an atom type present at a site in a molecular structure.
+
+
+ The sum of the occupancies of all the atom types at a site should not normally significantly exceed 1.0.
+ Atomic occupancy
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ WHATIF: PDBx_B_iso_or_equiv
+ Isotropic B factor (atomic displacement parameter) for an atom from a PDB file.
+
+
+ Isotropic B factor
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A cytogenetic map showing chromosome banding patterns in mutant cell lines relative to the wild type.
+ Deletion-based cytogenetic map
+
+
+ A cytogenetic map is built from a set of mutant cell lines with sub-chromosomal deletions and a reference wild-type line ('genome deletion panel'). The panel is used to map markers onto the genome by comparing mutant to wild-type banding patterns. Markers are linked (occur in the same deleted region) if they share the same banding pattern (presence or absence) as the deletion panel.
+ Deletion map
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A genetic map which shows the approximate location of quantitative trait loci (QTL) between two or more markers.
+ Quantitative trait locus map
+
+
+ QTL map
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby:Haplotyping_Study_obj
+ A map of haplotypes in a genome or other sequence, describing common patterns of genetic variation.
+
+
+ Haplotype map
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.21
+
+ Data describing a set of multiple genetic or physical maps, typically sharing a common set of features which are mapped.
+
+
+ Map set data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+
+
+ A feature which may mapped (positioned) on a genetic or other type of map.
+
+ Mappable features may be based on Gramene's notion of map features; see http://www.gramene.org/db/cmap/feature_type_info.
+ Map feature
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A designation of the type of map (genetic map, physical map, sequence map etc) or map set.
+
+ Map types may be based on Gramene's notion of a map type; see http://www.gramene.org/db/cmap/map_type_info.
+ Map type
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a protein fold.
+
+
+
+ Protein fold name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby:BriefTaxonConcept
+ Moby:PotentialTaxon
+ The name of a group of organisms belonging to the same taxonomic rank.
+ Taxonomic rank
+ Taxonomy rank
+
+
+
+ For a complete list of taxonomic ranks see https://www.phenoscape.org/wiki/Taxonomic_Rank_Vocabulary.
+ Taxon
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ A unique identifier of a (group of) organisms.
+
+
+
+ Organism identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a genus of organism.
+
+
+
+ Genus name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby:GCP_Taxon
+ Moby:TaxonName
+ Moby:TaxonScientificName
+ Moby:TaxonTCS
+ Moby:iANT_organism-xml
+ The full name for a group of organisms, reflecting their biological classification and (usually) conforming to a standard nomenclature.
+ Taxonomic information
+ Taxonomic name
+
+
+
+ Name components correspond to levels in a taxonomic hierarchy (e.g. 'Genus', 'Species', etc.) Meta information such as a reference where the name was defined and a date might be included.
+ Taxonomic classification
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby_namespace:iHOPorganism
+ A unique identifier for an organism used in the iHOP database.
+
+
+
+ iHOP organism ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Common name for an organism as used in the GenBank database.
+
+
+
+ Genbank common name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a taxon from the NCBI taxonomy database.
+
+
+
+ NCBI taxon
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An alternative for a word.
+
+ Synonym
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A common misspelling of a word.
+
+ Misspelling
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An abbreviation of a phrase or word.
+
+ Acronym
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A term which is likely to be misleading of its meaning.
+
+ Misnomer
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby:Author
+ Information on the authors of a published work.
+
+
+
+ Author ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An identifier representing an author in the DragonDB database.
+
+
+
+ DragonDB author identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby:DescribedLink
+ A URI along with annotation describing the data found at the address.
+
+
+ Annotated URI
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A controlled vocabulary for words and phrases that can appear in the keywords field (KW line) of entries from the UniProt database.
+
+ UniProt keywords
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby_namespace:GENEFARM_GeneID
+ Identifier of a gene from the GeneFarm database.
+
+
+
+ Gene ID (GeneFarm)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby_namespace:Blattner_number
+ The blattner identifier for a gene.
+
+
+
+ Blattner number
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Moby_namespace:MIPS_GE_Maize
+ Identifier for genetic elements in MIPS Maize database.
+
+ Gene ID (MIPS Maize)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Moby_namespace:MIPS_GE_Medicago
+ Identifier for genetic elements in MIPS Medicago database.
+
+ Gene ID (MIPS Medicago)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ The name of an Antirrhinum Gene from the DragonDB database.
+
+ Gene name (DragonDB)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ A unique identifier for an Arabidopsis gene, which is an acronym or abbreviation of the gene name.
+
+ Gene name (Arabidopsis)
+ true
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby_namespace:iHOPsymbol
+ A unique identifier of a protein or gene used in the iHOP database.
+
+
+
+ iHOP symbol
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Name of a gene from the GeneFarm database.
+
+ Gene name (GeneFarm)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ A unique name or other identifier of a genetic locus, typically conforming to a scheme that names loci (such as predicted genes) depending on their position in a molecular sequence, for example a completely sequenced genome or chromosome.
+ Locus identifier
+ Locus name
+
+
+
+ Locus ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ AT[1-5]G[0-9]{5}
+ http://www.geneontology.org/doc/GO.xrf_abbs:AGI_LocusCode
+ Locus identifier for Arabidopsis Genome Initiative (TAIR, TIGR and MIPS databases).
+ AGI ID
+ AGI identifier
+ AGI locus code
+ Arabidopsis gene loci number
+
+
+
+ Locus ID (AGI)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ http://www.geneontology.org/doc/GO.xrf_abbs: ASPGD
+ http://www.geneontology.org/doc/GO.xrf_abbs: ASPGDID
+ Identifier for loci from ASPGD (Aspergillus Genome Database).
+
+
+
+ Locus ID (ASPGD)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ http://www.geneontology.org/doc/GO.xrf_abbs: Broad_MGG
+ Identifier for loci from Magnaporthe grisea Database at the Broad Institute.
+
+
+
+ Locus ID (MGG)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ http://www.geneontology.org/doc/GO.xrf_abbs: CGD
+ http://www.geneontology.org/doc/GO.xrf_abbs: CGDID
+ Identifier for loci from CGD (Candida Genome Database).
+ CGD locus identifier
+ CGDID
+
+
+
+ Locus ID (CGD)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ http://www.geneontology.org/doc/GO.xrf_abbs: JCVI_CMR
+ http://www.geneontology.org/doc/GO.xrf_abbs: TIGR_CMR
+ Locus identifier for Comprehensive Microbial Resource at the J. Craig Venter Institute.
+
+
+
+ Locus ID (CMR)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby_namespace:LocusID
+ http://www.geneontology.org/doc/GO.xrf_abbs: NCBI_locus_tag
+ Identifier for loci from NCBI database.
+ Locus ID (NCBI)
+
+
+
+ NCBI locus tag
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ http://www.geneontology.org/doc/GO.xrf_abbs: SGD
+ http://www.geneontology.org/doc/GO.xrf_abbs: SGDID
+ Identifier for loci from SGD (Saccharomyces Genome Database).
+ SGDID
+
+
+
+ Locus ID (SGD)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby_namespace:MMP_Locus
+ Identifier of loci from Maize Mapping Project.
+
+
+
+ Locus ID (MMP)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby_namespace:DDB_gene
+ Identifier of locus from DictyBase (Dictyostelium discoideum).
+
+
+
+ Locus ID (DictyBase)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby_namespace:EntrezGene_EntrezGeneID
+ Moby_namespace:EntrezGene_ID
+ Identifier of a locus from EntrezGene database.
+
+
+
+ Locus ID (EntrezGene)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby_namespace:MaizeGDB_Locus
+ Identifier of locus from MaizeGDB (Maize genome database).
+
+
+
+ Locus ID (MaizeGDB)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Moby:SO_QTL
+ A stretch of DNA that is closely linked to the genes underlying a quantitative trait (a phenotype that varies in degree and depends upon the interactions between multiple genes and their environment).
+
+ A QTL sometimes but does not necessarily correspond to a gene.
+ Quantitative trait locus
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby_namespace:GeneId
+ Identifier of a gene from the KOME database.
+
+
+
+ Gene ID (KOME)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby:Tropgene_locus
+ Identifier of a locus from the Tropgene database.
+
+
+
+ Locus ID (Tropgene)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An alignment of molecular sequences, structures or profiles derived from them.
+
+
+ Alignment
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data for an atom (in a molecular structure).
+ General atomic property
+
+
+ Atomic property
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby_namespace:SP_KW
+ http://www.geneontology.org/doc/GO.xrf_abbs: SP_KW
+ A word or phrase that can appear in the keywords field (KW line) of entries from the UniProt database.
+
+
+ UniProt keyword
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A name for a genetic locus conforming to a scheme that names loci (such as predicted genes) depending on their position in a molecular sequence, for example a completely sequenced genome or chromosome.
+
+ Ordered locus name
+ true
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby:GCP_MapInterval
+ Moby:GCP_MapPoint
+ Moby:GCP_MapPosition
+ Moby:GenePosition
+ Moby:HitPosition
+ Moby:Locus
+ Moby:MapPosition
+ Moby:Position
+ PDBML:_atom_site.id
+ A position in a map (for example a genetic map), either a single position (point) or a region / interval.
+ Locus
+ Map position
+
+
+ This includes positions in genomes based on a reference sequence. A position may be specified for any mappable object, i.e. anything that may have positional information such as a physical position in a chromosome. Data might include sequence region name, strand, coordinate system name, assembly name, start position and end position.
+ Sequence coordinates
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data concerning the intrinsic physical (e.g. structural) or chemical properties of one, more or all amino acids.
+ Amino acid data
+
+
+ Amino acid property
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ A human-readable collection of information which (typically) is generated or collated by hand and which describes a biological entity, phenomena or associated primary (e.g. sequence or structural) data, as distinct from the primary data itself and computer-generated reports derived from it.
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Annotation
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data describing a molecular map (genetic or physical) or a set of such maps, including various attributes of, data extracted from or derived from the analysis of them, but excluding the map(s) themselves. This includes metadata for map sets that share a common set of features which are mapped.
+ Map attribute
+ Map set data
+
+
+ Map data
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Data used by the Vienna RNA analysis package.
+
+ Vienna RNA structural data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Data used to replace (mask) characters in a molecular sequence.
+
+ Sequence mask parameter
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data concerning chemical reaction(s) catalysed by enzyme(s).
+
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Enzyme kinetics data
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A plot giving an approximation of the kinetics of an enzyme-catalysed reaction, assuming simple kinetics (i.e. no intermediate or product inhibition, allostericity or cooperativity). It plots initial reaction rate to the substrate concentration (S) from which the maximum rate (vmax) is apparent.
+
+
+ Michaelis Menten plot
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A plot based on the Michaelis Menten equation of enzyme kinetics plotting the ratio of the initial substrate concentration (S) against the reaction velocity (v).
+
+
+ Hanes Woolf plot
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+
+ Raw data from or annotation on laboratory experiments.
+
+ This is a broad data type and is used a placeholder for other, more specific types. It is primarily intended to help navigation of EDAM and would not typically be used for annotation.
+ Experimental data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Information on a genome version.
+
+ Genome version information
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Typically a human-readable summary of body of facts or information indicating why a statement is true or valid. This may include a computational prediction, laboratory experiment, literature reference etc.
+
+
+ Evidence
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ A molecular sequence and minimal metadata, typically an identifier of the sequence and/or a comment.
+
+
+ Sequence record lite
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ One or more molecular sequences, possibly with associated annotation.
+ Sequences
+
+
+ This concept is a placeholder of concepts for primary sequence data including raw sequences and sequence records. It should not normally be used for derivatives such as sequence alignments, motifs or profiles.
+ Sequence
+ http://purl.bioontology.org/ontology/MSH/D008969
+ http://purl.org/biotop/biotop.owl#BioMolecularSequenceInformation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ A nucleic acid sequence and minimal metadata, typically an identifier of the sequence and/or a comment.
+
+
+ Nucleic acid sequence record (lite)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ A protein sequence and minimal metadata, typically an identifier of the sequence and/or a comment.
+
+
+ Protein sequence record (lite)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A human-readable collection of information including annotation on a biological entity or phenomena, computer-generated reports of analysis of primary data (e.g. sequence or structural), and metadata (data about primary data) or any other free (essentially unformatted) text, as distinct from the primary data itself.
+ Document
+ Record
+
+
+ You can use this term by default for any textual report, in case you can't find another, more specific term. Reports may be generated automatically or collated by hand and can include metadata on the origin, source, history, ownership or location of some thing.
+ Report
+ http://semanticscience.org/resource/SIO_000148
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ General data for a molecule.
+ General molecular property
+
+
+ Molecular property (general)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+
+ Data concerning molecular structural data.
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Structural data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A nucleotide sequence motif.
+ Nucleic acid sequence motif
+ DNA sequence motif
+ RNA sequence motif
+
+
+ Sequence motif (nucleic acid)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An amino acid sequence motif.
+ Protein sequence motif
+
+
+ Sequence motif (protein)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Some simple value controlling a search operation, typically a search of a database.
+
+ Search parameter
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A report of hits from searching a database of some type.
+ Database hits
+ Search results
+
+
+ Database search results
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ The secondary structure assignment (predicted or real) of a nucleic acid or protein.
+
+ Secondary structure
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An array of numerical values.
+ Array
+
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Matrix
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+
+ Data concerning, extracted from, or derived from the analysis of molecular alignment of some type.
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Alignment data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A human-readable collection of information about one or more specific nucleic acid molecules.
+
+
+ Nucleic acid report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A human-readable collection of information about one or more molecular tertiary (3D) structures. It might include annotation on the structure, a computer-generated report of analysis of structural data, and metadata (data about primary data) or any other free (essentially unformatted) text, as distinct from the primary data itself.
+ Structure-derived report
+
+
+ Structure report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.21
+
+
+
+ A report on nucleic acid structure-derived data, describing structural properties of a DNA molecule, or any other annotation or information about specific nucleic acid 3D structure(s).
+
+ Nucleic acid structure data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A report on the physical (e.g. structural) or chemical properties of molecules, or parts of a molecule.
+ Physicochemical property
+ SO:0000400
+
+
+ Molecular property
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Structural data for DNA base pairs or runs of bases, such as energy or angle data.
+
+
+ DNA base structural data
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Information on a database (or ontology) entry version, such as name (or other identifier) or parent database, unique identifier of entry, data, author and so on.
+
+ Database entry version information
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ A persistent (stable) and unique identifier, typically identifying an object (entry) from a database.
+
+
+
+ Accession
+ http://semanticscience.org/resource/SIO_000675
+ http://semanticscience.org/resource/SIO_000731
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ single nucleotide polymorphism (SNP) in a DNA sequence.
+
+
+ SNP
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Reference to a dataset (or a cross-reference between two datasets), typically one or more entries in a biological database or ontology.
+
+
+ A list of database accessions or identifiers are usually included.
+ Data reference
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An identifier of a submitted job.
+
+
+
+ Job identifier
+ http://wsio.org/data_009
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+
+ A name of a thing, which need not necessarily uniquely identify it.
+ Symbolic name
+
+
+
+ Name
+ "http://www.w3.org/2000/01/rdf-schema#label
+ http://semanticscience.org/resource/SIO_000116
+ http://usefulinc.com/ns/doap#name
+
+
+
+
+
+ Closely related, but focusing on labeling and human readability but not on identification.
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A label (text token) describing the type of a thing, typically an enumerated string (a string with one of a limited set of values).
+
+ Type
+ http://purl.org/dc/elements/1.1/type
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Authentication data usually used to log in into an account on an information system such as a web application or a database.
+
+
+
+ Account authentication
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A three-letter code used in the KEGG databases to uniquely identify organisms.
+
+
+
+ KEGG organism code
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Name of an entry (gene) from the KEGG GENES database.
+
+ Gene name (KEGG GENES)
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an object from one of the BioCyc databases.
+
+
+
+ BioCyc ID
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a compound from the BioCyc chemical compounds database.
+ BioCyc compound ID
+ BioCyc compound identifier
+
+
+
+ Compound ID (BioCyc)
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a biological reaction from the BioCyc reactions database.
+
+
+
+ Reaction ID (BioCyc)
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an enzyme from the BioCyc enzymes database.
+ BioCyc enzyme ID
+
+
+
+ Enzyme ID (BioCyc)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a biological reaction from a database.
+
+
+
+ Reaction ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An identifier that is re-used for data objects of fundamentally different types (typically served from a single database).
+
+
+
+ This branch provides an alternative organisation of the concepts nested under 'Accession' and 'Name'. All concepts under here are already included under 'Accession' or 'Name'.
+ Identifier (hybrid)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a molecular property.
+
+
+
+ Molecular property identifier
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a codon usage table, for example a genetic code.
+ Codon usage table identifier
+
+
+
+ Codon usage table ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Primary identifier of an object from the FlyBase database.
+
+
+
+ FlyBase primary identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an object from the WormBase database.
+
+
+
+ WormBase identifier
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ CE[0-9]{5}
+ Protein identifier used by WormBase database.
+
+
+
+ WormBase wormpep ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An informative report on a trinucleotide sequence that encodes an amino acid including the triplet sequence, the encoded amino acid or whether it is a start or stop codon.
+
+ Nucleic acid features (codon)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An identifier of a map of a molecular sequence.
+
+
+
+ Map identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An identifier of a software end-user on a website or a database (typically a person or an entity).
+
+
+
+ Person identifier
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Name or other identifier of a nucleic acid molecule.
+
+
+
+ Nucleic acid identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.20
+
+
+ Frame for translation of DNA (3 forward and 3 reverse frames relative to a chromosome).
+
+ Translation frame specification
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An identifier of a genetic code.
+
+
+
+ Genetic code identifier
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Informal name for a genetic code, typically an organism name.
+
+
+
+ Genetic code name
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Name of a file format such as HTML, PNG, PDF, EMBL, GenBank and so on.
+
+
+
+ File format name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A label (text token) describing a type of sequence profile such as frequency matrix, Gribskov profile, hidden Markov model etc.
+
+ Sequence profile type
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Name of a computer operating system such as Linux, PC or Mac.
+
+
+
+ Operating system name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A type of point or block mutation, including insertion, deletion, change, duplication and moves.
+
+ Mutation type
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A logical operator such as OR, AND, XOR, and NOT.
+
+
+
+ Logical operator
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A control of the order of data that is output, for example the order of sequences in an alignment.
+
+ Possible options including sorting by score, rank, by increasing P-value (probability, i.e. most statistically significant hits given first) and so on.
+ Results sort order
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A simple parameter that is a toggle (boolean value), typically a control for a modal tool.
+
+ Toggle
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The width of an output sequence or alignment.
+
+ Sequence width
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A penalty for introducing or extending a gap in an alignment.
+
+
+ Gap penalty
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A temperature concerning nucleic acid denaturation, typically the temperature at which the two strands of a hybridised or double stranded nucleic acid (DNA or RNA/DNA) molecule separate.
+ Melting temperature
+
+
+ Nucleic acid melting temperature
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The concentration of a chemical compound.
+
+
+ Concentration
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Size of the incremental 'step' a sequence window is moved over a sequence.
+
+ Window step size
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An image of a graph generated by the EMBOSS suite.
+
+ EMBOSS graph
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An application report generated by the EMBOSS suite.
+
+ EMBOSS report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ An offset for a single-point sequence position.
+
+ Sequence offset
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A value that serves as a threshold for a tool (usually to control scoring or output).
+
+ Threshold
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ An informative report on a transcription factor protein.
+
+ This might include conformational or physicochemical properties, as well as sequence information for transcription factor(s) binding sites.
+ Protein report (transcription factor)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The name of a category of biological or bioinformatics database.
+
+ Database category name
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Name of a sequence profile.
+
+ Sequence profile name
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Specification of one or more colors.
+
+ Color
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A parameter that is used to control rendering (drawing) to a device or image.
+
+ Rendering parameter
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Any arbitrary name of a molecular sequence.
+
+
+
+ Sequence name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A temporal date.
+
+ Date
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+
+ Word composition data for a molecular sequence.
+
+ Word composition
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A plot of Fickett testcode statistic (identifying protein coding regions) in a nucleotide sequences.
+
+
+ Fickett testcode plot
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A plot of sequence similarities identified from word-matching or character comparison.
+ Sequence conservation report
+
+
+ Use this concept for calculated substitution rates, relative site variability, data on sites with biased properties, highly conserved or very poorly conserved sites, regions, blocks etc.
+ Sequence similarity plot
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An image of peptide sequence sequence looking down the axis of the helix for highlighting amphipathicity and other properties.
+
+
+ Helical wheel
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An image of peptide sequence sequence in a simple 3,4,3,4 repeating pattern that emulates at a simple level the arrangement of residues around an alpha helix.
+
+
+ Useful for highlighting amphipathicity and other properties.
+ Helical net
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A plot of general physicochemical properties of a protein sequence.
+
+ Protein sequence properties plot
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A plot of pK versus pH for a protein.
+
+
+ Protein ionisation curve
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A plot of character or word composition / frequency of a molecular sequence.
+
+
+ Sequence composition plot
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Density plot (of base composition) for a nucleotide sequence.
+
+
+ Nucleic acid density plot
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Image of a sequence trace (nucleotide sequence versus probabilities of each of the 4 bases).
+
+
+ Sequence trace image
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A report on siRNA duplexes in mRNA.
+
+ Nucleic acid features (siRNA)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A collection of multiple molecular sequences and (typically) associated metadata that is intended for sequential processing.
+
+ This concept may be used for sequence sets that are expected to be read and processed a single sequence at a time.
+ Sequence set (stream)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Secondary identifier of an object from the FlyBase database.
+
+
+
+ Secondary identifier are used to handle entries that were merged with or split from other entries in the database.
+ FlyBase secondary identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The number of a certain thing.
+
+ Cardinality
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A single thing.
+
+ Exactly 1
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ One or more things.
+
+ 1 or more
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Exactly two things.
+
+ Exactly 2
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Two or more things.
+
+ 2 or more
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A fixed-size datum calculated (by using a hash function) for a molecular sequence, typically for purposes of error detection or indexing.
+ Hash
+ Hash code
+ Hash sum
+ Hash value
+
+
+ Sequence checksum
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ chemical modification of a protein.
+
+
+ Protein features report (chemical modifications)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Data on an error generated by computer system or tool.
+
+ Error
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Basic information on any arbitrary database entry.
+
+
+ Database entry metadata
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ A cluster of similar genes.
+
+ Gene cluster
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ A molecular sequence and comprehensive metadata (such as a feature table), typically corresponding to a full entry from a molecular sequence database.
+
+
+ Sequence record full
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An identifier of a plasmid in a database.
+
+
+
+ Plasmid identifier
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ A unique identifier of a specific mutation catalogued in a database.
+
+
+
+ Mutation ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Information describing the mutation itself, the organ site, tissue and type of lesion where the mutation has been identified, description of the patient origin and life-style.
+
+ Mutation annotation (basic)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An informative report on the prevalence of mutation(s), including data on samples and mutation prevalence (e.g. by tumour type)..
+
+ Mutation annotation (prevalence)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An informative report on mutation prognostic data, such as information on patient cohort, the study settings and the results of the study.
+
+ Mutation annotation (prognostic)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An informative report on the functional properties of mutant proteins including transcriptional activities, promotion of cell growth and tumorigenicity, dominant negative effects, capacity to induce apoptosis, cell-cycle arrest or checkpoints in human cells and so on.
+
+ Mutation annotation (functional)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The number of a codon, for instance, at which a mutation is located.
+
+
+ Codon number
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ An informative report on a specific tumor including nature and origin of the sample, anatomic site, organ or tissue, tumor type, including morphology and/or histologic type, and so on.
+
+ Tumor annotation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Basic information about a server on the web, such as an SRS server.
+
+ Server metadata
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a field in a database.
+
+
+
+ Database field name
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier of a sequence cluster from the SYSTERS database.
+ SYSTERS cluster ID
+
+
+
+ Sequence cluster ID (SYSTERS)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data concerning a biological ontology.
+
+
+ Ontology metadata
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Raw SCOP domain classification data files.
+
+ These are the parsable data files provided by SCOP.
+ Raw SCOP domain classification
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Raw CATH domain classification data files.
+
+ These are the parsable data files provided by CATH.
+ Raw CATH domain classification
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ An informative report on the types of small molecules or 'heterogens' (non-protein groups) that are represented in PDB files.
+
+ Heterogen annotation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Phylogenetic property values data.
+
+ Phylogenetic property values
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A collection of sequences output from a bootstrapping (resampling) procedure.
+
+ Bootstrapping is often performed in phylogenetic analysis.
+ Sequence set (bootstrapped)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A consensus phylogenetic tree derived from comparison of multiple trees.
+
+ Phylogenetic consensus tree
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A data schema for organising or transforming data of some type.
+
+ Schema
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A DTD (document type definition).
+
+ DTD
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ An XML Schema.
+
+ XML Schema
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A relax-NG schema.
+
+ Relax-NG schema
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ An XSLT stylesheet.
+
+ XSLT stylesheet
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a data type.
+
+
+
+ Data resource definition name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Name of an OBO file format such as OBO-XML, plain and so on.
+
+
+
+ OBO file format name
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier for genetic elements in MIPS database.
+ MIPS genetic element identifier
+
+
+
+ Gene ID (MIPS)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An identifier of protein sequence(s) or protein sequence database entries.
+
+ Sequence identifier (protein)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An identifier of nucleotide sequence(s) or nucleotide sequence database entries.
+
+ Sequence identifier (nucleic acid)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An accession number of an entry from the EMBL sequence database.
+ EMBL ID
+ EMBL accession number
+ EMBL identifier
+
+
+
+ EMBL accession
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An identifier of a polypeptide in the UniProt database.
+ UniProt entry name
+ UniProt identifier
+ UniProtKB entry name
+ UniProtKB identifier
+
+
+
+ UniProt ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Accession number of an entry from the GenBank sequence database.
+ GenBank ID
+ GenBank accession number
+ GenBank identifier
+
+
+
+ GenBank accession
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Secondary (internal) identifier of a Gramene database entry.
+ Gramene internal ID
+ Gramene internal identifier
+ Gramene secondary ID
+
+
+
+ Gramene secondary identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An identifier of an entry from a database of molecular sequence variation.
+
+
+
+ Sequence variation ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ A unique (and typically persistent) identifier of a gene in a database, that is (typically) different to the gene name/symbol.
+ Gene accession
+ Gene code
+
+
+
+ Gene ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Name of an entry (gene) from the AceView genes database.
+
+ Gene name (AceView)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ http://www.geneontology.org/doc/GO.xrf_abbs: ECK
+ Identifier of an E. coli K-12 gene from EcoGene Database.
+ E. coli K-12 gene identifier
+ ECK accession
+
+
+
+ Gene ID (ECK)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier for a gene approved by the HUGO Gene Nomenclature Committee.
+ HGNC ID
+
+
+
+ Gene ID (HGNC)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a gene, (typically) assigned by a person and/or according to a naming scheme. It may contain white space characters and is typically more intuitive and readable than a gene symbol. It (typically) may be used to identify similar genes in different species and to derive a gene symbol.
+ Allele name
+
+
+
+ Gene name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Name of an entry (gene) from the NCBI genes database.
+
+ Gene name (NCBI)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A specification of a chemical structure in SMILES format.
+
+
+ SMILES string
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier of an entry from the STRING database of protein-protein interactions.
+
+
+
+ STRING ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ An informative report on a specific virus.
+
+ Virus annotation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ An informative report on the taxonomy of a specific virus.
+
+ Virus annotation (taxonomy)
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]+
+ Identifier of a biological reaction from the SABIO-RK reactions database.
+
+
+
+ Reaction ID (SABIO-RK)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A human-readable collection of information about one or more specific carbohydrate 3D structure(s).
+
+
+ Carbohydrate report
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A series of digits that are assigned consecutively to each sequence record processed by NCBI. The GI number bears no resemblance to the Accession number of the sequence record.
+ NCBI GI number
+
+
+
+ Nucleotide sequence GI number is shown in the VERSION field of the database record. Protein sequence GI number is shown in the CDS/db_xref field of a nucleotide database record, and the VERSION field of a protein database record.
+ GI number
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An identifier assigned to sequence records processed by NCBI, made of the accession number of the database record followed by a dot and a version number.
+ NCBI accession.version
+ accession.version
+
+
+
+ Nucleotide sequence version contains two letters followed by six digits, a dot, and a version number (or for older nucleotide sequence records, the format is one letter followed by five digits, a dot, and a version number). Protein sequence version contains three letters followed by five digits, a dot, and a version number.
+ NCBI version
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a cell line.
+
+
+
+ Cell line name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The exact name of a cell line.
+
+
+
+ Cell line name (exact)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The truncated name of a cell line.
+
+
+
+ Cell line name (truncated)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a cell line without any punctuation.
+
+
+
+ Cell line name (no punctuation)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The assonant name of a cell line.
+
+
+
+ Cell line name (assonant)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ A unique, persistent identifier of an enzyme.
+ Enzyme accession
+
+
+
+ Enzyme ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an enzyme from the REBASE enzymes database.
+
+
+
+ REBASE enzyme number
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ DB[0-9]{5}
+ Unique identifier of a drug from the DrugBank database.
+
+
+
+ DrugBank ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A unique identifier assigned to NCBI protein sequence records.
+ protein gi
+ protein gi number
+
+
+
+ Nucleotide sequence GI number is shown in the VERSION field of the database record. Protein sequence GI number is shown in the CDS/db_xref field of a nucleotide database record, and the VERSION field of a protein database record.
+ GI number (protein)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A score derived from the alignment of two sequences, which is then normalised with respect to the scoring system.
+
+
+ Bit scores are normalised with respect to the scoring system and therefore can be used to compare alignment scores from different searches.
+ Bit score
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.20
+
+
+ Phase for translation of DNA (0, 1 or 2) relative to a fragment of the coding sequence.
+
+ Translation phase specification
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data concerning or describing some core computational resource, as distinct from primary data. This includes metadata on the origin, source, history, ownership or location of some thing.
+ Provenance metadata
+
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Resource metadata
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Any arbitrary identifier of an ontology.
+
+
+
+ Ontology identifier
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a concept in an ontology.
+
+
+
+ Ontology concept name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An identifier of a build of a particular genome.
+
+
+
+ Genome build identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a biological pathway or network.
+
+
+
+ Pathway or network name
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [a-zA-Z_0-9]{2,3}[0-9]{5}
+ Identifier of a pathway from the KEGG pathway database.
+ KEGG pathway ID
+
+
+
+ Pathway ID (KEGG)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [a-zA-Z_0-9]+
+ Identifier of a pathway from the NCI-Nature pathway database.
+
+
+
+ Pathway ID (NCI-Nature)
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a pathway from the ConsensusPathDB pathway database.
+
+
+
+ Pathway ID (ConsensusPathDB)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier of an entry from the UniRef database.
+ UniRef cluster id
+ UniRef entry accession
+
+
+
+ Sequence cluster ID (UniRef)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier of an entry from the UniRef100 database.
+ UniRef100 cluster id
+ UniRef100 entry accession
+
+
+
+ Sequence cluster ID (UniRef100)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier of an entry from the UniRef90 database.
+ UniRef90 cluster id
+ UniRef90 entry accession
+
+
+
+ Sequence cluster ID (UniRef90)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier of an entry from the UniRef50 database.
+ UniRef50 cluster id
+ UniRef50 entry accession
+
+
+
+ Sequence cluster ID (UniRef50)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data concerning or derived from an ontology.
+ Ontological data
+
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Ontology data
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A human-readable collection of information about a specific RNA family or other group of classified RNA sequences.
+ RNA family annotation
+
+
+ RNA family report
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of an RNA family, typically an entry from a RNA sequence classification database.
+
+
+
+ RNA family identifier
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Stable accession number of an entry (RNA family) from the RFAM database.
+
+
+
+ RFAM accession
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A label (text token) describing a type of protein family signature (sequence classifier) from the InterPro database.
+
+ Protein signature type
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ An informative report on protein domain-DNA/RNA interaction(s).
+
+ Domain-nucleic acid interaction report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+
+ An informative report on protein domain-protein domain interaction(s).
+
+ Domain-domain interactions
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Data on indirect protein domain-protein domain interaction(s).
+
+ Domain-domain interaction (indirect)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Accession number of a nucleotide or protein sequence database entry.
+
+
+
+ Sequence accession (hybrid)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+ Data concerning two-dimensional polygel electrophoresis.
+
+
+ This is a broad data type and is used a placeholder for other, more specific types. It is primarily intended to help navigation of EDAM and would not typically be used for annotation.
+ 2D PAGE data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ two-dimensional gel electrophoresis experiments, gels or spots in a gel.
+
+
+ 2D PAGE report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ A persistent, unique identifier of a biological pathway or network (typically a database entry).
+
+
+
+ Pathway or network accession
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alignment of the (1D representations of) secondary structure of two or more molecules.
+
+
+ Secondary structure alignment
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an object from the ASTD database.
+
+
+
+ ASTD ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an exon from the ASTD database.
+
+
+
+ ASTD ID (exon)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an intron from the ASTD database.
+
+
+
+ ASTD ID (intron)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a polyA signal from the ASTD database.
+
+
+
+ ASTD ID (polya)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a transcription start site from the ASTD database.
+
+
+
+ ASTD ID (tss)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ An informative report on individual spot(s) from a two-dimensional (2D PAGE) gel.
+
+
+ 2D PAGE spot report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Unique identifier of a spot from a two-dimensional (protein) gel.
+
+
+
+ Spot ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier of a spot from a two-dimensional (protein) gel in the SWISS-2DPAGE database.
+
+
+
+ Spot serial number
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier of a spot from a two-dimensional (protein) gel from a HSC-2DPAGE database.
+
+
+
+ Spot ID (HSC-2DPAGE)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Data on the interaction of a protein (or protein domain) with specific structural (3D) and/or sequence motifs.
+
+ Protein-motif interaction
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a strain of an organism variant, typically a plant, virus or bacterium.
+
+
+
+ Strain identifier
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A unique identifier of an item from the CABRI database.
+
+
+
+ CABRI accession
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Report of genotype experiment including case control, population, and family studies. These might use array based methods and re-sequencing methods.
+
+
+ Experiment report (genotyping)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of an entry from a database of genotype experiment metadata.
+
+
+
+ Genotype experiment ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the EGA database.
+
+
+
+ EGA accession
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ IPI[0-9]{8}
+ Identifier of a protein entry catalogued in the International Protein Index (IPI) database.
+
+
+
+ IPI protein ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Accession number of a protein from the RefSeq database.
+ RefSeq protein ID
+
+
+
+ RefSeq accession (protein)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry (promoter) from the EPD database.
+ EPD identifier
+
+
+
+ EPD ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the TAIR database.
+
+
+
+ TAIR accession
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an Arabidopsis thaliana gene from the TAIR database.
+
+
+
+ TAIR accession (At gene)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the UniSTS database.
+
+
+
+ UniSTS accession
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the UNITE database.
+
+
+
+ UNITE accession
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the UTR database.
+
+
+
+ UTR accession
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ UPI[A-F0-9]{10}
+ Accession number of a UniParc (protein sequence) database entry.
+ UPI
+ UniParc ID
+
+
+
+ UniParc accession
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the Rouge or HUGE databases.
+
+
+
+ mFLJ/mKIAA number
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ An informative report on a specific fungus.
+
+ Fungi annotation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ An informative report on a specific fungus anamorph.
+
+ Fungi annotation (anamorph)
+ true
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier for a protein from the Ensembl database.
+ Ensembl ID (protein)
+ Protein ID (Ensembl)
+
+
+
+ Ensembl protein ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ An informative report on a specific toxin.
+
+ Toxin annotation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An informative report on a membrane protein.
+
+ Protein report (membrane protein)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ An informative report on tentative or known protein-drug interaction(s).
+
+
+ Protein-drug interaction report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+
+ Data concerning a map of molecular sequence(s).
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Map data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data concerning phylogeny, typically of molecular sequences, including reports of information concerning or derived from a phylogenetic tree, or from comparing two or more phylogenetic trees.
+
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Phylogenetic data
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Data concerning one or more protein molecules.
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Protein data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Data concerning one or more nucleic acid molecules.
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Nucleic acid data
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data concerning, extracted from, or derived from the analysis of a scientific text (or texts) such as a full text article from a scientific journal.
+ Article data
+ Scientific text data
+
+
+ This is a broad data type and is used a placeholder for other, more specific types. It is primarily intended to help navigation of EDAM and would not typically be used for annotation. It includes concepts that are best described as scientific text or closely concerned with or derived from text.
+ Text data
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.16
+
+
+ Typically a simple numerical or string value that controls the operation of a tool.
+
+ Parameter
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Data concerning a specific type of molecule.
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Molecular data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+
+ An informative report on a specific molecule.
+
+ Molecule report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A human-readable collection of information about a specific organism.
+ Organism annotation
+
+
+ Organism report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A human-readable collection of information about about how a scientific experiment or analysis was carried out that results in a specific set of data or results used for further analysis or to test a specific hypothesis.
+ Experiment annotation
+ Experiment metadata
+ Experiment report
+
+
+ Protocol
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An attribute of a molecular sequence, possibly in reference to some other sequence.
+ Sequence parameter
+
+
+ Sequence attribute
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Output from a serial analysis of gene expression (SAGE), massively parallel signature sequencing (MPSS) or sequencing by synthesis (SBS) experiment. In all cases this is a list of short sequence tags and the number of times it is observed.
+ Sequencing-based expression profile
+ Sequence tag profile (with gene assignment)
+
+
+ SAGE, MPSS and SBS experiments are usually performed to study gene expression. The sequence tags are typically subsequently annotated (after a database search) with the mRNA (and therefore gene) the tag was extracted from.
+ This includes tag to gene assignments (tag mapping) of SAGE, MPSS and SBS data. Typically this is the sequencing-based expression profile annotated with gene identifiers.
+ Sequence tag profile
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data concerning a mass spectrometry measurement.
+
+
+ Mass spectrometry data
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Raw data from experimental methods for determining protein structure.
+
+
+ This is a broad data type and is used a placeholder for other, more specific types. It is primarily intended to help navigation of EDAM and would not typically be used for annotation.
+ Protein structure raw data
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An identifier of a mutation.
+
+
+
+ Mutation identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+
+ Data concerning an alignment of two or more molecular sequences, structures or derived data.
+
+ This is a broad data type and is used a placeholder for other, more specific types. This includes entities derived from sequences and structures such as motifs and profiles.
+ Alignment data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Data concerning an index of data.
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Data index data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Single letter amino acid identifier, e.g. G.
+
+
+
+ Amino acid name (single letter)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Three letter amino acid identifier, e.g. GLY.
+
+
+
+ Amino acid name (three letter)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Full name of an amino acid, e.g. Glycine.
+
+
+
+ Amino acid name (full name)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a toxin.
+
+
+
+ Toxin identifier
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier of a toxin from the ArachnoServer database.
+
+
+
+ ArachnoServer ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A simple summary of expressed genes.
+
+ Expressed gene list
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier of a monomer from the BindingDB database.
+
+
+
+ BindingDB Monomer ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The name of a concept from the GO ontology.
+
+ GO concept name
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]{7}|GO:[0-9]{7}
+ An identifier of a 'biological process' concept from the the Gene Ontology.
+
+
+
+ GO concept ID (biological process)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]{7}|GO:[0-9]{7}
+ An identifier of a 'molecular function' concept from the the Gene Ontology.
+
+
+
+ GO concept ID (molecular function)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The name of a concept for a cellular component from the GO ontology.
+
+ GO concept name (cellular component)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An image arising from a Northern Blot experiment.
+
+
+ Northern blot image
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Unique identifier of a blot from a Northern Blot.
+
+
+
+ Blot ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier of a blot from a Northern Blot from the BlotBase database.
+
+
+
+ BlotBase blot ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Raw data on a biological hierarchy, describing the hierarchy proper, hierarchy components and possibly associated annotation.
+ Hierarchy annotation
+
+
+ Hierarchy
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry from a database of biological hierarchies.
+
+ Hierarchy identifier
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the Brite database of biological hierarchies.
+
+
+
+ Brite hierarchy ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A type (represented as a string) of cancer.
+
+ Cancer type
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A unique identifier for an organism used in the BRENDA database.
+
+
+
+ BRENDA organism ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a taxon using the controlled vocabulary of the UniGene database.
+ UniGene organism abbreviation
+
+
+
+ UniGene taxon
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a taxon using the controlled vocabulary of the UTRdb database.
+
+
+
+ UTRdb taxon
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An identifier of a catalogue of biological resources.
+ Catalogue identifier
+
+
+
+ Catalogue ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a catalogue of biological resources from the CABRI database.
+
+
+
+ CABRI catalogue name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An informative report on protein secondary structure alignment-derived data or metadata.
+
+ Secondary structure alignment metadata
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Was deprecated since 1.5, but not correctly (fully) obsoleted until 1.19.
+ 1.5
+
+
+ An informative report on the physical, chemical or other information concerning the interaction of two or more molecules (or parts of molecules).
+
+ Molecule interaction report
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Primary data about a specific biological pathway or network (the nodes and connections within the pathway or network).
+ Network
+ Pathway
+
+
+ Pathway or network
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Data concerning one or more small molecules.
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Small molecule data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Data concerning a particular genotype, phenotype or a genotype / phenotype relation.
+
+ Genotype and phenotype data
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Image, hybridisation or some other data arising from a study of feature/molecule expression, typically profiling or quantification.
+ Gene expression data
+ Gene product profile
+ Gene product quantification data
+ Gene transcription profile
+ Gene transcription quantification data
+ Metabolite expression data
+ Microarray data
+ Non-coding RNA profile
+ Non-coding RNA quantification data
+ Protein expression data
+ RNA profile
+ RNA quantification data
+ RNA-seq data
+ Transcriptome profile
+ Transcriptome quantification data
+ mRNA profile
+ mRNA quantification data
+ Protein profile
+ Protein quantification data
+ Proteome profile
+ Proteome quantification data
+
+
+ Expression data
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ C[0-9]+
+ Unique identifier of a chemical compound from the KEGG database.
+ KEGG compound ID
+ KEGG compound identifier
+
+
+
+ Compound ID (KEGG)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Name (not necessarily stable) an entry (RNA family) from the RFAM database.
+
+
+
+ RFAM name
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ R[0-9]+
+ Identifier of a biological reaction from the KEGG reactions database.
+
+
+
+ Reaction ID (KEGG)
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ D[0-9]+
+ Unique identifier of a drug from the KEGG Drug database.
+
+
+
+ Drug ID (KEGG)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ ENS[A-Z]*[FPTG][0-9]{11}
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl database.
+ Ensembl IDs
+
+
+
+ Ensembl ID
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [A-Z][0-9]+(\.[-[0-9]+])?
+ An identifier of a disease from the International Classification of Diseases (ICD) database.
+
+
+
+ ICD identifier
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9A-Za-z]+:[0-9]+:[0-9]{1,5}(\.[0-9])?
+ Unique identifier of a sequence cluster from the CluSTr database.
+ CluSTr ID
+ CluSTr cluster ID
+
+
+
+ Sequence cluster ID (CluSTr)
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ G[0-9]+
+ Unique identifier of a glycan ligand from the KEGG GLYCAN database (a subset of KEGG LIGAND).
+
+
+
+ KEGG Glycan ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]+\.[A-Z]\.[0-9]+\.[0-9]+\.[0-9]+
+ A unique identifier of a family from the transport classification database (TCDB) of membrane transport proteins.
+ TC number
+
+
+
+ OBO file for regular expression.
+ TCDB ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ MINT\-[0-9]{1,5}
+ Unique identifier of an entry from the MINT database of protein-protein interactions.
+
+
+
+ MINT ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ DIP[\:\-][0-9]{3}[EN]
+ Unique identifier of an entry from the DIP database of protein-protein interactions.
+
+
+
+ DIP ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A[0-9]{6}
+ Unique identifier of a protein listed in the UCSD-Nature Signaling Gateway Molecule Pages database.
+
+
+
+ Signaling Gateway protein ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a protein modification catalogued in a database.
+
+
+
+ Protein modification ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ AA[0-9]{4}
+ Identifier of a protein modification catalogued in the RESID database.
+
+
+
+ RESID ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]{4,7}
+ Identifier of an entry from the RGD database.
+
+
+
+ RGD ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ AASequence:[0-9]{10}
+ Identifier of a protein sequence from the TAIR database.
+
+
+
+ TAIR accession (protein)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ HMDB[0-9]{5}
+ Identifier of a small molecule metabolite from the Human Metabolome Database (HMDB).
+ HMDB ID
+
+
+
+ Compound ID (HMDB)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ LM(FA|GL|GP|SP|ST|PR|SL|PK)[0-9]{4}([0-9a-zA-Z]{4})?
+ Identifier of an entry from the LIPID MAPS database.
+ LM ID
+
+
+
+ LIPID MAPS ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PAp[0-9]{8}
+ PDBML:pdbx_PDB_strand_id
+ Identifier of a peptide from the PeptideAtlas peptide databases.
+
+
+
+ PeptideAtlas ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.7
+
+ Identifier of a report of molecular interactions from a database (typically).
+
+
+ Molecular interaction ID
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]+
+ A unique identifier of an interaction from the BioGRID database.
+
+
+
+ BioGRID interaction ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ S[0-9]{2}\.[0-9]{3}
+ Unique identifier of a peptidase enzyme from the MEROPS database.
+ MEROPS ID
+
+
+
+ Enzyme ID (MEROPS)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An identifier of a mobile genetic element.
+
+
+
+ Mobile genetic element ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ mge:[0-9]+
+ An identifier of a mobile genetic element from the Aclame database.
+
+
+
+ ACLAME ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PWY[a-zA-Z_0-9]{2}\-[0-9]{3}
+ Identifier of an entry from the Saccharomyces genome database (SGD).
+
+
+
+ SGD ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Unique identifier of a book.
+
+
+
+ Book ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ (ISBN)?(-13|-10)?[:]?[ ]?([0-9]{2,3}[ -]?)?[0-9]{1,5}[ -]?[0-9]{1,7}[ -]?[0-9]{1,6}[ -]?([0-9]|X)
+ The International Standard Book Number (ISBN) is for identifying printed books.
+
+
+
+ ISBN
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ B[0-9]{5}
+ Identifier of a metabolite from the 3DMET database.
+ 3DMET ID
+
+
+
+ Compound ID (3DMET)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ ([A-NR-Z][0-9][A-Z][A-Z0-9][A-Z0-9][0-9])_.*|([OPQ][0-9][A-Z0-9][A-Z0-9][A-Z0-9][0-9]_.*)|(GAG_.*)|(MULT_.*)|(PFRAG_.*)|(LIP_.*)|(CAT_.*)
+ A unique identifier of an interaction from the MatrixDB database.
+
+
+
+ MatrixDB interaction ID
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]+
+ A unique identifier for pathways, reactions, complexes and small molecules from the cPath (Pathway Commons) database.
+
+
+
+ These identifiers are unique within the cPath database, however, they are not stable between releases.
+ cPath ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ [0-9]+
+ Identifier of an assay from the PubChem database.
+
+
+
+ PubChem bioassay ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the PubChem database.
+ PubChem identifier
+
+
+
+ PubChem ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ M[0-9]{4}
+ Identifier of an enzyme reaction mechanism from the MACie database.
+ MACie entry number
+
+
+
+ Reaction ID (MACie)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ MI[0-9]{7}
+ Identifier for a gene from the miRBase database.
+ miRNA ID
+ miRNA identifier
+ miRNA name
+
+
+
+ Gene ID (miRBase)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ ZDB\-GENE\-[0-9]+\-[0-9]+
+ Identifier for a gene from the Zebrafish information network genome (ZFIN) database.
+
+
+
+ Gene ID (ZFIN)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]{5}
+ Identifier of an enzyme-catalysed reaction from the Rhea database.
+
+
+
+ Reaction ID (Rhea)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ UPA[0-9]{5}
+ Identifier of a biological pathway from the Unipathway database.
+ upaid
+
+
+
+ Pathway ID (Unipathway)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]+
+ Identifier of a small molecular from the ChEMBL database.
+ ChEMBL ID
+
+
+
+ Compound ID (ChEMBL)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [a-zA-Z_0-9]+
+ Unique identifier of an entry from the Ligand-gated ion channel (LGICdb) database.
+
+
+
+ LGICdb identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]+
+ Identifier of a biological reaction (kinetics entry) from the SABIO-RK reactions database.
+
+
+
+ Reaction kinetics ID (SABIO-RK)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PA[0-9]+
+ Identifier of an entry from the pharmacogenetics and pharmacogenomics knowledge base (PharmGKB).
+
+
+
+ PharmGKB ID
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PA[0-9]+
+ Identifier of a pathway from the pharmacogenetics and pharmacogenomics knowledge base (PharmGKB).
+
+
+
+ Pathway ID (PharmGKB)
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PA[0-9]+
+ Identifier of a disease from the pharmacogenetics and pharmacogenomics knowledge base (PharmGKB).
+
+
+
+ Disease ID (PharmGKB)
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PA[0-9]+
+ Identifier of a drug from the pharmacogenetics and pharmacogenomics knowledge base (PharmGKB).
+
+
+
+ Drug ID (PharmGKB)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ DAP[0-9]+
+ Identifier of a drug from the Therapeutic Target Database (TTD).
+
+
+
+ Drug ID (TTD)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ TTDS[0-9]+
+ Identifier of a target protein from the Therapeutic Target Database (TTD).
+
+
+
+ Target ID (TTD)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ A unique identifier of a type or group of cells.
+
+
+
+ Cell type identifier
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]+
+ A unique identifier of a neuron from the NeuronDB database.
+
+
+
+ NeuronDB ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [a-zA-Z_0-9]+
+ A unique identifier of a neuron from the NeuroMorpho database.
+
+
+
+ NeuroMorpho ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]+
+ Identifier of a chemical from the ChemIDplus database.
+ ChemIDplus ID
+
+
+
+ Compound ID (ChemIDplus)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ SMP[0-9]{5}
+ Identifier of a pathway from the Small Molecule Pathway Database (SMPDB).
+
+
+
+ Pathway ID (SMPDB)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]+
+ Identifier of an entry from the BioNumbers database of key numbers and associated data in molecular biology.
+
+
+
+ BioNumbers ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ T3D[0-9]+
+ Unique identifier of a toxin from the Toxin and Toxin Target Database (T3DB) database.
+
+
+
+ T3DB ID
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a carbohydrate.
+
+
+
+ Carbohydrate identifier
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]+
+ Identifier of an entry from the GlycomeDB database.
+
+
+
+ GlycomeDB ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [a-zA-Z_0-9]+[0-9]+
+ Identifier of an entry from the LipidBank database.
+
+
+
+ LipidBank ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ cd[0-9]{5}
+ Identifier of a conserved domain from the Conserved Domain Database.
+
+
+
+ CDD ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]{1,5}
+ An identifier of an entry from the MMDB database.
+ MMDB accession
+
+
+
+ MMDB ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]+
+ Unique identifier of an entry from the iRefIndex database of protein-protein interactions.
+
+
+
+ iRefIndex ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]+
+ Unique identifier of an entry from the ModelDB database.
+
+
+
+ ModelDB ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]+
+ Identifier of a signaling pathway from the Database of Quantitative Cellular Signaling (DQCS).
+
+
+
+ Pathway ID (DQCS)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database (Homo sapiens division).
+
+ Ensembl ID (Homo sapiens)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Bos taurus' division).
+
+ Ensembl ID ('Bos taurus')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Canis familiaris' division).
+
+ Ensembl ID ('Canis familiaris')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Cavia porcellus' division).
+
+ Ensembl ID ('Cavia porcellus')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Ciona intestinalis' division).
+
+ Ensembl ID ('Ciona intestinalis')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Ciona savignyi' division).
+
+ Ensembl ID ('Ciona savignyi')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Danio rerio' division).
+
+ Ensembl ID ('Danio rerio')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Dasypus novemcinctus' division).
+
+ Ensembl ID ('Dasypus novemcinctus')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Echinops telfairi' division).
+
+ Ensembl ID ('Echinops telfairi')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Erinaceus europaeus' division).
+
+ Ensembl ID ('Erinaceus europaeus')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Felis catus' division).
+
+ Ensembl ID ('Felis catus')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Gallus gallus' division).
+
+ Ensembl ID ('Gallus gallus')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Gasterosteus aculeatus' division).
+
+ Ensembl ID ('Gasterosteus aculeatus')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Homo sapiens' division).
+
+ Ensembl ID ('Homo sapiens')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Loxodonta africana' division).
+
+ Ensembl ID ('Loxodonta africana')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Macaca mulatta' division).
+
+ Ensembl ID ('Macaca mulatta')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Monodelphis domestica' division).
+
+ Ensembl ID ('Monodelphis domestica')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Mus musculus' division).
+
+ Ensembl ID ('Mus musculus')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Myotis lucifugus' division).
+
+ Ensembl ID ('Myotis lucifugus')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Ornithorhynchus anatinus' division).
+
+ Ensembl ID ("Ornithorhynchus anatinus")
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Oryctolagus cuniculus' division).
+
+ Ensembl ID ('Oryctolagus cuniculus')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Oryzias latipes' division).
+
+ Ensembl ID ('Oryzias latipes')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Otolemur garnettii' division).
+
+ Ensembl ID ('Otolemur garnettii')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Pan troglodytes' division).
+
+ Ensembl ID ('Pan troglodytes')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Rattus norvegicus' division).
+
+ Ensembl ID ('Rattus norvegicus')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Spermophilus tridecemlineatus' division).
+
+ Ensembl ID ('Spermophilus tridecemlineatus')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Takifugu rubripes' division).
+
+ Ensembl ID ('Takifugu rubripes')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Tupaia belangeri' division).
+
+ Ensembl ID ('Tupaia belangeri')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Xenopus tropicalis' division).
+
+ Ensembl ID ('Xenopus tropicalis')
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a protein domain (or other node) from the CATH database.
+
+
+
+ CATH identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 2.10.10.10
+ A code number identifying a family from the CATH database.
+
+
+
+ CATH node ID (family)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an enzyme from the CAZy enzymes database.
+ CAZy ID
+
+
+
+ Enzyme ID (CAZy)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A unique identifier assigned by the I.M.A.G.E. consortium to a clone (cloned molecular sequence).
+ I.M.A.G.E. cloneID
+ IMAGE cloneID
+
+
+
+ Clone ID (IMAGE)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]{7}|GO:[0-9]{7}
+ An identifier of a 'cellular component' concept from the Gene Ontology.
+ GO concept identifier (cellular compartment)
+
+
+
+ GO concept ID (cellular component)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Name of a chromosome as used in the BioCyc database.
+
+
+
+ Chromosome name (BioCyc)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An identifier of a gene expression profile from the CleanEx database.
+
+
+
+ CleanEx entry name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An identifier of (typically a list of) gene expression experiments catalogued in the CleanEx database.
+
+
+
+ CleanEx dataset code
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A human-readable collection of information concerning a genome as a whole.
+
+
+ Genome report
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier for a protein complex from the CORUM database.
+ CORUM complex ID
+
+
+
+ Protein ID (CORUM)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier of a position-specific scoring matrix from the CDD database.
+
+
+
+ CDD PSSM-ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier for a protein from the CuticleDB database.
+ CuticleDB ID
+
+
+
+ Protein ID (CuticleDB)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a predicted transcription factor from the DBD database.
+
+
+
+ DBD ID
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ General annotation on an oligonucleotide probe, or a set of probes.
+ Oligonucleotide probe sets annotation
+
+
+ Oligonucleotide probe annotation
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of an oligonucleotide from a database.
+
+
+
+ Oligonucleotide ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an oligonucleotide probe from the dbProbe database.
+
+
+
+ dbProbe ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Physicochemical property data for one or more dinucleotides.
+
+
+ Dinucleotide property
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an dinucleotide property from the DiProDB database.
+
+
+
+ DiProDB ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ disordered structure in a protein.
+
+
+ Protein features report (disordered structure)
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier for a protein from the DisProt database.
+ DisProt ID
+
+
+
+ Protein ID (DisProt)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Annotation on an embryo or concerning embryological development.
+
+ Embryo report
+ true
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier for a gene transcript from the Ensembl database.
+ Transcript ID (Ensembl)
+
+
+
+ Ensembl transcript ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ An informative report on one or more small molecules that are enzyme inhibitors.
+
+ Inhibitor annotation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Moby:GeneAccessionList
+ An identifier of a promoter of a gene that is catalogued in a database.
+
+
+
+ Promoter ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an EST sequence.
+
+
+
+ EST accession
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an EST sequence from the COGEME database.
+
+
+
+ COGEME EST ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a unisequence from the COGEME database.
+
+
+
+ A unisequence is a single sequence assembled from ESTs.
+ COGEME unisequence ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Accession number of an entry (protein family) from the GeneFarm database.
+ GeneFarm family ID
+
+
+
+ Protein family ID (GeneFarm)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a family of organism.
+
+
+
+ Family name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ The name of a genus of viruses.
+
+ Genus name (virus)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ The name of a family of viruses.
+
+ Family name (virus)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ The name of a SwissRegulon database.
+
+ Database name (SwissRegulon)
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A feature identifier as used in the SwissRegulon database.
+
+
+
+ This can be name of a gene, the ID of a TFBS, or genomic coordinates in form "chr:start..end".
+ Sequence feature ID (SwissRegulon)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A unique identifier of gene in the NMPDR database.
+
+
+
+ A FIG ID consists of four parts: a prefix, genome id, locus type and id number.
+ FIG ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A unique identifier of gene in the Xenbase database.
+
+
+
+ Gene ID (Xenbase)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A unique identifier of gene in the Genolist database.
+
+
+
+ Gene ID (Genolist)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Name of an entry (gene) from the Genolist genes database.
+
+ Gene name (Genolist)
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry (promoter) from the ABS database.
+ ABS identifier
+
+
+
+ ABS ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a transcription factor from the AraC-XylS database.
+
+
+
+ AraC-XylS ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Name of an entry (gene) from the HUGO database.
+
+ Gene name (HUGO)
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a locus from the PseudoCAP database.
+
+
+
+ Locus ID (PseudoCAP)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a locus from the UTR database.
+
+
+
+ Locus ID (UTR)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier of a monosaccharide from the MonosaccharideDB database.
+
+
+
+ MonosaccharideDB ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ The name of a subdivision of the Collagen Mutation Database (CMD) database.
+
+ Database name (CMD)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ The name of a subdivision of the Osteogenesis database.
+
+ Database name (Osteogenesis)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An identifier of a particular genome.
+
+
+
+ Genome identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.26
+
+
+ An identifier of a particular genome.
+
+
+ GenomeReviews ID
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]+
+ Identifier of an entry from the GlycoMapsDB (Glycosciences.de) database.
+
+
+
+ GlycoMap ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A conformational energy map of the glycosidic linkages in a carbohydrate molecule.
+
+
+ Carbohydrate conformational map
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a transcription factor.
+
+
+
+ Transcription factor name
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a membrane transport proteins from the transport classification database (TCDB).
+
+
+
+ TCID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PF[0-9]{5}
+ Name of a domain from the Pfam database.
+
+
+
+ Pfam domain name
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ CL[0-9]{4}
+ Accession number of a Pfam clan.
+
+
+
+ Pfam clan ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier for a gene from the VectorBase database.
+ VectorBase ID
+
+
+
+ Gene ID (VectorBase)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the UTRSite database of regulatory motifs in eukaryotic UTRs.
+
+
+
+ UTRSite ID
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An informative report about a specific or conserved pattern in a molecular sequence, such as its context in genes or proteins, its role, origin or method of construction, etc.
+ Sequence motif report
+ Sequence profile report
+
+
+ Sequence signature report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An informative report on a particular locus.
+
+ Locus annotation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Official name of a protein as used in the UniProt database.
+
+
+
+ Protein name (UniProt)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ One or more terms from one or more controlled vocabularies which are annotations on an entity.
+
+ The concepts are typically provided as a persistent identifier or some other link the source ontologies. Evidence of the validity of the annotation might be included.
+ Term ID list
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Name of a protein family from the HAMAP database.
+
+
+
+ HAMAP ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+
+ Basic information concerning an identifier of data (typically including the identifier itself). For example, a gene symbol with information concerning its provenance.
+
+ Identifier with metadata
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Annotation about a gene symbol.
+
+ Gene symbol annotation
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a RNA transcript.
+
+
+
+ Transcript ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an RNA transcript from the H-InvDB database.
+
+
+
+ HIT ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A unique identifier of gene cluster in the H-InvDB database.
+
+
+
+ HIX ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a antibody from the HPA database.
+
+
+
+ HPA antibody id
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a human major histocompatibility complex (HLA) or other protein from the IMGT/HLA database.
+
+
+
+ IMGT/HLA ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A unique identifier of gene assigned by the J. Craig Venter Institute (JCVI).
+
+
+
+ Gene ID (JCVI)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a kinase protein.
+
+
+
+ Kinase name
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a physical entity from the ConsensusPathDB database.
+
+
+
+ ConsensusPathDB entity ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Name of a physical entity from the ConsensusPathDB database.
+
+
+
+ ConsensusPathDB entity name
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The number of a strain of algae and protozoa from the CCAP database.
+
+
+
+ CCAP strain number
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An identifier of stock from a catalogue of biological resources.
+
+
+
+ Stock number
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A stock number from The Arabidopsis information resource (TAIR).
+
+
+
+ Stock number (TAIR)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the RNA editing database (REDIdb).
+
+
+
+ REDIdb ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Name of a domain from the SMART database.
+
+
+
+ SMART domain name
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Accession number of an entry (family) from the PANTHER database.
+ Panther family ID
+
+
+
+ Protein family ID (PANTHER)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A unique identifier for a virus from the RNAVirusDB database.
+
+
+
+ Could list (or reference) other taxa here from https://www.phenoscape.org/wiki/Taxonomic_Rank_Vocabulary.
+ RNAVirusDB ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An accession of annotation on a (group of) viruses (catalogued in a database).
+ Virus ID
+
+
+
+ Virus identifier
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An identifier of a genome project assigned by NCBI.
+
+
+
+ NCBI Genome Project ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A unique identifier of a whole genome assigned by the NCBI.
+
+
+
+ NCBI genome accession
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Data concerning, extracted from, or derived from the analysis of a sequence profile, such as its name, length, technical details about the profile or it's construction, the biological role or annotation, and so on.
+
+
+ Sequence profile data
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier for a membrane protein from the TopDB database.
+ TopDB ID
+
+
+
+ Protein ID (TopDB)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a two-dimensional (protein) gel.
+ Gel identifier
+
+
+
+ Gel ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Name of a reference map gel from the SWISS-2DPAGE database.
+
+
+
+ Reference map name (SWISS-2DPAGE)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier for a peroxidase protein from the PeroxiBase database.
+ PeroxiBase ID
+
+
+
+ Protein ID (PeroxiBase)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the SISYPHUS database of tertiary structure alignments.
+
+
+
+ SISYPHUS ID
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Accession of an open reading frame (catalogued in a database).
+
+
+
+ ORF ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An identifier of an open reading frame.
+
+
+
+ ORF identifier
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [1-9][0-9]*
+ Identifier of an entry from the GlycosciencesDB database.
+ LInear Notation for Unique description of Carbohydrate Sequences ID
+
+
+
+ LINUCS ID
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier for a ligand-gated ion channel protein from the LGICdb database.
+ LGICdb ID
+
+
+
+ Protein ID (LGICdb)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an EST sequence from the MaizeDB database.
+
+
+
+ MaizeDB ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A unique identifier of gene in the MfunGD database.
+
+
+
+ Gene ID (MfunGD)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An identifier of a disease from the Orpha database.
+
+
+
+ Orpha number
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier for a protein from the EcID database.
+
+
+
+ Protein ID (EcID)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A unique identifier of a cDNA molecule catalogued in the RefSeq database.
+
+
+
+ Clone ID (RefSeq)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier for a cone snail toxin protein from the ConoServer database.
+
+
+
+ Protein ID (ConoServer)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a GeneSNP database entry.
+
+
+
+ GeneSNP ID
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a lipid.
+
+
+
+ Lipid identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+ A flat-file (textual) data archive.
+
+
+ Databank
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+ A web site providing data (web pages) on a common theme to a HTTP client.
+
+
+ Web portal
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier for a gene from the VBASE2 database.
+ VBASE2 ID
+
+
+
+ Gene ID (VBASE2)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A unique identifier for a virus from the DPVweb database.
+ DPVweb virus ID
+
+
+
+ DPVweb ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]+
+ Identifier of a pathway from the BioSystems pathway database.
+
+
+
+ Pathway ID (BioSystems)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Data concerning a proteomics experiment.
+
+ Experimental data (proteomics)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An abstract of a scientific article.
+
+
+ Abstract
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 3D coordinate and associated data for a lipid structure.
+
+
+ Lipid structure
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 3D coordinate and associated data for the (3D) structure of a drug.
+
+
+ Drug structure
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 3D coordinate and associated data for the (3D) structure of a toxin.
+
+
+ Toxin structure
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A simple matrix of numbers, where each value (or column of values) is derived derived from analysis of the corresponding position in a sequence alignment.
+ PSSM
+
+
+ Position-specific scoring matrix
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A matrix of distances between molecular entities, where a value (distance) is (typically) derived from comparison of two entities and reflects their similarity.
+
+
+ Distance matrix
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Distances (values representing similarity) between a group of molecular structures.
+
+
+ Structural distance matrix
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Bibliographic data concerning scientific article(s).
+
+ Article metadata
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A concept from a biological ontology.
+
+
+ This includes any fields from the concept definition such as concept name, definition, comments and so on.
+ Ontology concept
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A numerical measure of differences in the frequency of occurrence of synonymous codons in DNA sequences.
+
+
+ Codon usage bias
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Northern Blot experiments.
+
+
+ Northern blot report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A map showing distance between genetic markers estimated by radiation-induced breaks in a chromosome.
+ RH map
+
+
+ The radiation method can break very closely linked markers providing a more detailed map. Most genetic markers and subsequences may be located to a defined map position and with a more precise estimates of distance than a linkage map.
+ Radiation hybrid map
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A simple list of data identifiers (such as database accessions), possibly with additional basic information on the addressed data.
+
+
+ ID list
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Gene frequencies data that may be read during phylogenetic tree calculation.
+
+
+ Phylogenetic gene frequencies data
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ A set of sub-sequences displaying some type of polymorphism, typically indicating the sequence in which they occur, their position and other metadata.
+
+ Sequence set (polymorphic)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ An entry (resource) from the DRCAT bioinformatics resource catalogue.
+
+ DRCAT resource
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 3D coordinate and associated data for a multi-protein complex; two or more polypeptides chains in a stable, functional association with one another.
+
+
+ Protein complex
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 3D coordinate and associated data for a protein (3D) structural motif; any group of contiguous or non-contiguous amino acid residues but typically those forming a feature with a structural or functional role.
+
+
+ Protein structural motif
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A human-readable collection of information about one or more specific lipid 3D structure(s).
+
+
+ Lipid report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ Image of one or more molecular secondary structures.
+
+ Secondary structure image
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ An informative report on general information, properties or features of one or more molecular secondary structures.
+
+ Secondary structure report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ DNA sequence-specific feature annotation (not in a feature table).
+
+ DNA features
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Features concerning RNA or regions of DNA that encode an RNA molecule.
+
+ RNA features report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Biological data that has been plotted as a graph of some type, or plotting instructions for rendering such a graph.
+ Graph data
+
+
+ Plot
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A protein sequence and associated metadata.
+ Sequence record (protein)
+
+
+ Protein sequence record
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A nucleic acid sequence and associated metadata.
+ Nucleotide sequence record
+ Sequence record (nucleic acid)
+ DNA sequence record
+ RNA sequence record
+
+
+ Nucleic acid sequence record
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ A protein sequence and comprehensive metadata (such as a feature table), typically corresponding to a full entry from a molecular sequence database.
+
+
+ Protein sequence record (full)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ A nucleic acid sequence and comprehensive metadata (such as a feature table), typically corresponding to a full entry from a molecular sequence database.
+
+
+ Nucleic acid sequence record (full)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Accession of a mathematical model, typically an entry from a database.
+
+
+
+ Biological model accession
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a type or group of cells.
+
+
+
+ Cell type name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Accession of a type or group of cells (catalogued in a database).
+ Cell type ID
+
+
+
+ Cell type accession
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Accession of an entry from a database of chemicals.
+ Chemical compound accession
+ Small molecule accession
+
+
+
+ Compound accession
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Accession of a drug.
+
+
+
+ Drug accession
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Name of a toxin.
+
+
+
+ Toxin name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Accession of a toxin (catalogued in a database).
+
+
+
+ Toxin accession
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Accession of a monosaccharide (catalogued in a database).
+
+
+
+ Monosaccharide accession
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Common name of a drug.
+
+
+
+ Drug name
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Accession of an entry from a database of carbohydrates.
+
+
+
+ Carbohydrate accession
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Accession of a specific molecule (catalogued in a database).
+
+
+
+ Molecule accession
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Accession of a data definition (catalogued in a database).
+
+
+
+ Data resource definition accession
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An accession of a particular genome (in a database).
+
+
+
+ Genome accession
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An accession of a map of a molecular sequence (deposited in a database).
+
+
+
+ Map accession
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Accession of an entry from a database of lipids.
+
+
+
+ Lipid accession
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Accession of a peptide deposited in a database.
+
+
+
+ Peptide ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Accession of a protein deposited in a database.
+ Protein accessions
+
+
+
+ Protein accession
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An accession of annotation on a (group of) organisms (catalogued in a database).
+
+
+
+ Organism accession
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby:BriefOccurrenceRecord
+ Moby:FirstEpithet
+ Moby:InfraspecificEpithet
+ Moby:OccurrenceRecord
+ Moby:Organism_Name
+ Moby:OrganismsLongName
+ Moby:OrganismsShortName
+ The name of an organism (or group of organisms).
+
+
+
+ Organism name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Accession of a protein family (that is deposited in a database).
+
+
+
+ Protein family accession
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Accession of an entry from a database of transcription factors or binding sites.
+
+
+
+ Transcription factor accession
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Accession number of a strain of an organism variant, typically a plant, virus or bacterium.
+
+
+
+ Strain accession
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ 1.26
+
+ An accession of annotation on a (group of) viruses (catalogued in a database).
+
+
+ Virus identifier
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Metadata on sequence features.
+
+
+ Sequence features metadata
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a Gramene database entry.
+
+
+
+ Gramene identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An identifier of an entry from the DDBJ sequence database.
+ DDBJ ID
+ DDBJ accession number
+ DDBJ identifier
+
+
+
+ DDBJ accession
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An identifier of an entity from the ConsensusPathDB database.
+
+
+
+ ConsensusPathDB identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+
+ Data concerning, extracted from, or derived from the analysis of molecular sequence(s).
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Sequence data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Data concerning codon usage.
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Codon usage
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+
+ Data derived from the analysis of a scientific text such as a full text article from a scientific journal.
+
+ Article report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An informative report of information about molecular sequence(s), including basic information (metadata), and reports generated from molecular sequence analysis, including positional features and non-positional properties.
+ Sequence-derived report
+
+
+ Sequence report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data concerning the properties or features of one or more protein secondary structures.
+
+
+ Protein secondary structure
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A Hopp and Woods plot of predicted antigenicity of a peptide or protein.
+
+
+ Hopp and Woods plot
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.21
+
+
+ A melting curve of a double-stranded nucleic acid molecule (DNA or DNA/RNA).
+
+
+ Nucleic acid melting curve
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.21
+
+ A probability profile of a double-stranded nucleic acid molecule (DNA or DNA/RNA).
+
+
+ Nucleic acid probability profile
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.21
+
+ A temperature profile of a double-stranded nucleic acid molecule (DNA or DNA/RNA).
+
+
+ Nucleic acid temperature profile
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ A report typically including a map (diagram) of a gene regulatory network.
+
+
+ Gene regulatory network report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ An informative report on a two-dimensional (2D PAGE) gel.
+
+
+ 2D PAGE gel report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.14
+
+ General annotation on a set of oligonucleotide probes, such as the gene name with which the probe set is associated and which probes belong to the set.
+
+
+ Oligonucleotide probe sets annotation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ An image from a microarray experiment which (typically) allows a visualisation of probe hybridisation and gene-expression data.
+
+ Microarray image
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data (typically biological or biomedical) that has been rendered into an image, typically for display on screen.
+ Image data
+
+
+ Image
+ http://semanticscience.org/resource/SIO_000079
+ http://semanticscience.org/resource/SIO_000081
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Image of a molecular sequence, possibly with sequence features or properties shown.
+
+
+ Sequence image
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A report on protein properties concerning hydropathy.
+ Protein hydropathy report
+
+
+ Protein hydropathy data
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Data concerning a computational workflow.
+
+ Workflow data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A computational workflow.
+
+ Workflow
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Data concerning molecular secondary structure data.
+
+ Secondary structure data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Deprecated because this is bloat / confusing & better handled as an EDAM Format concept - "raw" sequences just imply a particular format (i.e. one with a vanilla string, possible in a particular alphabet, with no metadata).
+ 1.23
+
+ A raw protein sequence (string of characters).
+
+
+ Protein sequence (raw)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Deprecated because this is bloat / confusing & better handled as an EDAM Format concept - "raw" sequences just imply a particular format (i.e. one with a vanilla string, possible in a particular alphabet, with no metadata).
+ 1.23
+
+ A raw nucleic acid sequence.
+
+
+ Nucleic acid sequence (raw)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+ One or more protein sequences, possibly with associated annotation.
+ Amino acid sequence
+ Amino acid sequences
+ Protein sequences
+
+
+ Protein sequence
+ http://purl.org/biotop/biotop.owl#AminoAcidSequenceInformation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ One or more nucleic acid sequences, possibly with associated annotation.
+ Nucleic acid sequences
+ Nucleotide sequence
+ Nucleotide sequences
+ DNA sequence
+
+
+ Nucleic acid sequence
+ http://purl.org/biotop/biotop.owl#NucleotideSequenceInformation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data concerning a biochemical reaction, typically data and more general annotation on the kinetics of enzyme-catalysed reaction.
+ Enzyme kinetics annotation
+ Reaction annotation
+
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Reaction data
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data concerning small peptides.
+ Peptide data
+
+
+ Peptide property
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Was deprecated since 1.5, but not correctly (fully) obsoleted until 1.19.
+ 1.5
+
+
+ An informative report concerning the classification of protein sequences or structures.
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Protein classification
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Data concerning specific or conserved pattern in molecular sequences.
+
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Sequence motif data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Data concerning models representing a (typically multiple) sequence alignment.
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Sequence profile data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+
+ Data concerning a specific biological pathway or network.
+
+ Pathway or network data
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An informative report concerning or derived from the analysis of a biological pathway or network, such as a map (diagram) or annotation.
+
+
+ Pathway or network report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A thermodynamic or kinetic property of a nucleic acid molecule.
+ Nucleic acid property (thermodynamic or kinetic)
+ Nucleic acid thermodynamic property
+
+
+ Nucleic acid thermodynamic data
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Was deprecated since 1.5, but not correctly (fully) obsoleted until 1.19.
+ 1.5
+
+
+ Data concerning the classification of nucleic acid sequences or structures.
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Nucleic acid classification
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A report on a classification of molecular sequences, structures or other entities.
+
+ This can include an entire classification, components such as classifiers, assignments of entities to a classification and so on.
+ Classification report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ key residues involved in protein folding.
+
+
+ Protein features report (key folding sites)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Geometry data for a protein structure, for example bond lengths, bond angles, torsion angles, chiralities, planaraties etc.
+ Torsion angle data
+
+
+ Protein geometry data
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An image of protein structure.
+ Structure image (protein)
+
+
+ Protein structure image
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Weights for sequence positions or characters in phylogenetic analysis where zero is defined as unweighted.
+
+
+ Phylogenetic character weights
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Annotation of one particular positional feature on a biomolecular (typically genome) sequence, suitable for import and display in a genome browser.
+ Genome annotation track
+ Genome track
+ Genome-browser track
+ Genomic track
+ Sequence annotation track
+
+
+ Annotation track
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+ P43353|Q7M1G0|Q9C199|A5A6J6
+ [OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}
+ Accession number of a UniProt (protein sequence) database entry.
+ UniProt accession number
+ UniProt entry accession
+ UniProtKB accession
+ UniProtKB accession number
+ Swiss-Prot entry accession
+ TrEMBL entry accession
+
+
+
+ UniProt accession
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 16
+ [1-9][0-9]?
+ Identifier of a genetic code in the NCBI list of genetic codes.
+
+
+
+ NCBI genetic code ID
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a concept in an ontology of biological or bioinformatics concepts and relations.
+
+
+
+ Ontology concept identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The name of a concept for a biological process from the GO ontology.
+
+ GO concept name (biological process)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The name of a concept for a molecular function from the GO ontology.
+
+ GO concept name (molecular function)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data concerning the classification, identification and naming of organisms.
+ Taxonomic data
+
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Taxonomy
+
+
+
+
+
+
+
+
+
+ beta13
+ EMBL/GENBANK/DDBJ coding feature protein identifier, issued by International collaborators.
+
+
+
+ This qualifier consists of a stable ID portion (3+5 format with 3 position letters and 5 numbers) plus a version number after the decimal point. When the protein sequence encoded by the CDS changes, only the version number of the /protein_id value is incremented; the stable part of the /protein_id remains unchanged and as a result will permanently be associated with a given protein; this qualifier is valid only on CDS features which translate into a valid protein.
+ Protein ID (EMBL/GenBank/DDBJ)
+
+
+
+
+
+
+
+
+ beta13
+ 1.5
+
+ A type of data that (typically) corresponds to entries from the primary biological databases and which is (typically) the primary input or output of a tool, i.e. the data the tool processes or generates, as distinct from metadata and identifiers which describe and identify such core data, parameters that control the behaviour of tools, reports of derivative data generated by tools and annotation.
+
+
+ Core data entities typically have a format and may be identified by an accession number.
+ Core data
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta13
+ true
+ Name or other identifier of molecular sequence feature(s).
+
+
+
+ Sequence feature identifier
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta13
+ true
+ An identifier of a molecular tertiary structure, typically an entry from a structure database.
+
+
+
+ Structure identifier
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta13
+ true
+ An identifier of an array of numerical values, such as a comparison matrix.
+
+
+
+ Matrix identifier
+
+
+
+
+
+
+
+
+ beta13
+ 1.8
+
+ A report (typically a table) on character or word composition / frequency of protein sequence(s).
+
+
+ Protein sequence composition
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.8
+
+ A report (typically a table) on character or word composition / frequency of nucleic acid sequence(s).
+
+
+ Nucleic acid sequence composition (report)
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.5
+
+
+ A node from a classification of protein structural domain(s).
+
+ Protein domain classification node
+ true
+
+
+
+
+
+
+
+
+ beta13
+ Duplicates http://edamontology.org/data_1002, hence deprecated.
+ 1.23
+
+ Unique numerical identifier of chemicals in the scientific literature, as assigned by the Chemical Abstracts Service.
+
+
+ CAS number
+ true
+
+
+
+
+
+
+
+
+
+ beta13
+ Unique identifier of a drug conforming to the Anatomical Therapeutic Chemical (ATC) Classification System, a drug classification system controlled by the WHO Collaborating Centre for Drug Statistics Methodology (WHOCC).
+
+
+
+ ATC code
+
+
+
+
+
+
+
+
+ beta13
+ A unique, unambiguous, alphanumeric identifier of a chemical substance as catalogued by the Substance Registration System of the Food and Drug Administration (FDA).
+ Unique Ingredient Identifier
+
+
+
+ UNII
+
+
+
+
+
+
+
+
+ beta13
+ 1.5
+
+
+ Basic information concerning geographical location or time.
+
+ Geotemporal metadata
+ true
+
+
+
+
+
+
+
+
+ beta13
+ Metadata concerning the software, hardware or other aspects of a computer system.
+
+
+ System metadata
+
+
+
+
+
+
+
+
+ beta13
+ 1.15
+
+ A name of a sequence feature, e.g. the name of a feature to be displayed to an end-user.
+
+
+ Sequence feature name
+ true
+
+
+
+
+
+
+
+
+ beta13
+ Raw data such as measurements or other results from laboratory experiments, as generated from laboratory hardware.
+ Experimental measurement data
+ Experimentally measured data
+ Measured data
+ Measurement
+ Measurement data
+ Measurement metadata
+ Raw experimental data
+
+
+ This is a broad data type and is used a placeholder for other, more specific types. It is primarily intended to help navigation of EDAM and would not typically be used for annotation.
+ Experimental measurement
+
+
+
+
+
+
+
+
+
+ beta13
+ Raw data (typically MIAME-compliant) for hybridisations from a microarray experiment.
+
+
+ Such data as found in Affymetrix CEL or GPR files.
+ Raw microarray data
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta13
+ Data generated from processing and analysis of probe set data from a microarray experiment.
+ Gene annotation (expression)
+ Gene expression report
+ Microarray probe set data
+
+
+ Such data as found in Affymetrix .CHP files or data from other software such as RMA or dChip.
+ Processed microarray data
+
+
+
+
+
+
+
+
+
+ beta13
+ The final processed (normalised) data for a set of hybridisations in a microarray experiment.
+ Gene expression data matrix
+ Normalised microarray data
+
+
+ This combines data from all hybridisations.
+ Gene expression matrix
+
+
+
+
+
+
+
+
+ beta13
+ Annotation on a biological sample, for example experimental factors and their values.
+
+
+ This might include compound and dose in a dose response experiment.
+ Sample annotation
+
+
+
+
+
+
+
+
+ beta13
+ Annotation on the array itself used in a microarray experiment.
+
+
+ This might include gene identifiers, genomic coordinates, probe oligonucleotide sequences etc.
+ Microarray metadata
+
+
+
+
+
+
+
+
+ beta13
+ 1.8
+
+ Annotation on laboratory and/or data processing protocols used in an microarray experiment.
+
+
+ This might describe e.g. the normalisation methods used to process the raw data.
+ Microarray protocol annotation
+ true
+
+
+
+
+
+
+
+
+ beta13
+ Data concerning the hybridisations measured during a microarray experiment.
+
+
+ Microarray hybridisation data
+
+
+
+
+
+
+
+
+ beta13
+ 1.5
+
+
+ A report of regions in a molecular sequence that are biased to certain characters.
+
+ Sequence features (compositionally-biased regions)
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.5
+
+ A report on features in a nucleic acid sequence that indicate changes to or differences between sequences.
+
+
+ Nucleic acid features (difference and change)
+ true
+
+
+
+
+
+
+
+
+
+ beta13
+ A human-readable collection of information about regions within a nucleic acid sequence which form secondary or tertiary (3D) structures.
+ Nucleic acid features (structure)
+ Quadruplexes (report)
+ Stem loop (report)
+ d-loop (report)
+
+
+ The report may be based on analysis of nucleic acid sequence or structural data, or any annotation or information about specific nucleic acid 3D structure(s) or such structures in general.
+ Nucleic acid structure report
+
+
+
+
+
+
+
+
+ beta13
+ 1.8
+
+ short repetitive subsequences (repeat sequences) in a protein sequence.
+
+
+ Protein features report (repeats)
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.8
+
+ Report on the location of matches to profiles, motifs (conserved or functional patterns) or other signatures in one or more protein sequences.
+
+
+ Sequence motif matches (protein)
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.8
+
+ Report on the location of matches to profiles, motifs (conserved or functional patterns) or other signatures in one or more nucleic acid sequences.
+
+
+ Sequence motif matches (nucleic acid)
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.5
+
+
+ A report on displacement loops in a mitochondrial DNA sequence.
+
+ A displacement loop is a region of mitochondrial DNA in which one of the strands is displaced by an RNA molecule.
+ Nucleic acid features (d-loop)
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.5
+
+
+ A report on stem loops in a DNA sequence.
+
+ A stem loop is a hairpin structure; a double-helical structure formed when two complementary regions of a single strand of RNA or DNA molecule form base-pairs.
+ Nucleic acid features (stem loop)
+ true
+
+
+
+
+
+
+
+
+ beta13
+ An informative report on features of a messenger RNA (mRNA) molecules including precursor RNA, primary (unprocessed) transcript and fully processed molecules. This includes reports on a specific gene transcript, clone or EST.
+ Clone or EST (report)
+ Gene transcript annotation
+ Nucleic acid features (mRNA features)
+ Transcript (report)
+ mRNA (report)
+ mRNA features
+
+
+ This includes 5'untranslated region (5'UTR), coding sequences (CDS), exons, intervening sequences (intron) and 3'untranslated regions (3'UTR).
+ Gene transcript report
+
+
+
+
+
+
+
+
+ beta13
+ 1.8
+
+ features of non-coding or functional RNA molecules, including tRNA and rRNA.
+
+
+ Non-coding RNA
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.5
+
+
+ Features concerning transcription of DNA into RNA including the regulation of transcription.
+
+ This includes promoters, CAAT signals, TATA signals, -35 signals, -10 signals, GC signals, primer binding sites for initiation of transcription or reverse transcription, enhancer, attenuator, terminators and ribosome binding sites.
+ Transcriptional features (report)
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.5
+
+
+ A report on predicted or actual immunoglobulin gene structure including constant, switch and variable regions and diversity, joining and variable segments.
+
+ Nucleic acid features (immunoglobulin gene structure)
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.5
+
+
+ Information on a 'class' node from the SCOP database.
+
+ SCOP class
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.5
+
+
+ Information on a 'fold' node from the SCOP database.
+
+ SCOP fold
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.5
+
+
+ Information on a 'superfamily' node from the SCOP database.
+
+ SCOP superfamily
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.5
+
+
+ Information on a 'family' node from the SCOP database.
+
+ SCOP family
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.5
+
+
+ Information on a 'protein' node from the SCOP database.
+
+ SCOP protein
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.5
+
+
+ Information on a 'species' node from the SCOP database.
+
+ SCOP species
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.8
+
+ mass spectrometry experiments.
+
+
+ Mass spectrometry experiment
+ true
+
+
+
+
+
+
+
+
+ beta13
+ Nucleic acid classification
+ A human-readable collection of information about a particular family of genes, typically a set of genes with similar sequence that originate from duplication of a common ancestor gene, or any other classification of nucleic acid sequences or structures that reflects gene structure.
+ Gene annotation (homology information)
+ Gene annotation (homology)
+ Gene family annotation
+ Gene homology (report)
+ Homology information
+
+
+ This includes reports on on gene homologues between species.
+ Gene family report
+
+
+
+
+
+
+
+
+ beta13
+ An image of a protein.
+
+
+ Protein image
+
+
+
+
+
+
+
+
+ beta13
+ 1.24
+
+
+
+
+ An alignment of protein sequences and/or structures.
+
+ Protein alignment
+ true
+
+
+
+
+
+
+
+
+ 1.0
+ 1.8
+
+ sequencing experiment, including samples, sampling, preparation, sequencing, and analysis.
+
+
+ NGS experiment
+ true
+
+
+
+
+
+
+
+
+ 1.1
+ An informative report about a DNA sequence assembly.
+ Assembly report
+
+
+ This might include an overall quality assessment of the assembly and summary statistics including counts, average length and number of bases for reads, matches and non-matches, contigs, reads in pairs etc.
+ Sequence assembly report
+
+
+
+
+
+
+
+
+ 1.1
+ An index of a genome sequence.
+
+
+ Many sequence alignment tasks involving many or very large sequences rely on a precomputed index of the sequence to accelerate the alignment.
+ Genome index
+
+
+
+
+
+
+
+
+ 1.1
+ 1.8
+
+ Report concerning genome-wide association study experiments.
+
+
+ GWAS report
+ true
+
+
+
+
+
+
+
+
+ 1.2
+ The position of a cytogenetic band in a genome.
+
+
+ Information might include start and end position in a chromosome sequence, chromosome identifier, name of band and so on.
+ Cytoband position
+
+
+
+
+
+
+
+
+
+
+ 1.2
+ CL_[0-9]{7}
+ Cell type ontology concept ID.
+ CL ID
+
+
+
+ Cell type ontology ID
+
+
+
+
+
+
+
+
+ 1.2
+ Mathematical model of a network, that contains biochemical kinetics.
+
+
+ Kinetic model
+
+
+
+
+
+
+
+
+
+ 1.3
+ Identifier of a COSMIC database entry.
+ COSMIC identifier
+
+
+
+ COSMIC ID
+
+
+
+
+
+
+
+
+
+ 1.3
+ Identifier of a HGMD database entry.
+ HGMD identifier
+
+
+
+ HGMD ID
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ Unique identifier of sequence assembly.
+ Sequence assembly version
+
+
+
+ Sequence assembly ID
+
+
+
+
+
+
+
+
+ 1.3
+ 1.5
+
+
+ A label (text token) describing a type of sequence feature such as gene, transcript, cds, exon, repeat, simple, misc, variation, somatic variation, structural variation, somatic structural variation, constrained or regulatory.
+
+ Sequence feature type
+ true
+
+
+
+
+
+
+
+
+ 1.3
+ 1.5
+
+
+ An informative report on gene homologues between species.
+
+ Gene homology (report)
+ true
+
+
+
+
+
+
+
+
+
+
+ 1.3
+ ENSGT00390000003602
+ Unique identifier for a gene tree from the Ensembl database.
+ Ensembl ID (gene tree)
+
+
+
+ Ensembl gene tree ID
+
+
+
+
+
+
+
+
+ 1.3
+ A phylogenetic tree that is an estimate of the character's phylogeny.
+
+
+ Gene tree
+
+
+
+
+
+
+
+
+ 1.3
+ A phylogenetic tree that reflects phylogeny of the taxa from which the characters (used in calculating the tree) were sampled.
+
+
+ Species tree
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ Name or other identifier of an entry from a biosample database.
+ Sample accession
+
+
+
+ Sample ID
+
+
+
+
+
+
+
+
+
+ 1.3
+ Identifier of an object from the MGI database.
+
+
+
+ MGI accession
+
+
+
+
+
+
+
+
+ 1.3
+ Name of a phenotype.
+ Phenotype
+ Phenotypes
+
+
+
+ Phenotype name
+
+
+
+
+
+
+
+
+ 1.4
+ A HMM transition matrix contains the probabilities of switching from one HMM state to another.
+ HMM transition matrix
+
+
+ Consider for example an HMM with two states (AT-rich and GC-rich). The transition matrix will hold the probabilities of switching from the AT-rich to the GC-rich state, and vica versa.
+ Transition matrix
+
+
+
+
+
+
+
+
+ 1.4
+ A HMM emission matrix holds the probabilities of choosing the four nucleotides (A, C, G and T) in each of the states of a HMM.
+ HMM emission matrix
+
+
+ Consider for example an HMM with two states (AT-rich and GC-rich). The emission matrix holds the probabilities of choosing each of the four nucleotides (A, C, G and T) in the AT-rich state and in the GC-rich state.
+ Emission matrix
+
+
+
+
+
+
+
+
+ 1.4
+ 1.15
+
+ A statistical Markov model of a system which is assumed to be a Markov process with unobserved (hidden) states.
+
+
+ Hidden Markov model
+ true
+
+
+
+
+
+
+
+
+ 1.4
+ true
+ An identifier of a data format.
+
+
+ Format identifier
+
+
+
+
+
+
+
+
+ 1.5
+ Raw biological or biomedical image generated by some experimental technique.
+
+
+ Raw image
+ http://semanticscience.org/resource/SIO_000081
+
+
+
+
+
+
+
+
+ 1.5
+ Data concerning the intrinsic physical (e.g. structural) or chemical properties of one, more or all carbohydrates.
+ Carbohydrate data
+
+
+ Carbohydrate property
+
+
+
+
+
+
+
+
+ 1.5
+ 1.8
+
+ Report concerning proteomics experiments.
+
+
+ Proteomics experiment report
+ true
+
+
+
+
+
+
+
+
+ 1.5
+ 1.8
+
+ RNAi experiments.
+
+
+ RNAi report
+ true
+
+
+
+
+
+
+
+
+ 1.5
+ 1.8
+
+ biological computational model experiments (simulation), for example the minimum information required in order to permit its correct interpretation and reproduction.
+
+
+ Simulation experiment report
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.7
+ An imaging technique that uses magnetic fields and radiowaves to form images, typically to investigate the anatomy and physiology of the human body.
+ MRT image
+ Magnetic resonance imaging image
+ Magnetic resonance tomography image
+ NMRI image
+ Nuclear magnetic resonance imaging image
+
+
+ MRI image
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.7
+ An image from a cell migration track assay.
+
+
+ Cell migration track image
+
+
+
+
+
+
+
+
+ 1.7
+ Rate of association of a protein with another protein or some other molecule.
+ kon
+
+
+ Rate of association
+
+
+
+
+
+
+
+
+ 1.7
+ Multiple gene identifiers in a specific order.
+
+
+ Such data are often used for genome rearrangement tools and phylogenetic tree labeling.
+ Gene order
+
+
+
+
+
+
+
+
+ 1.7
+ The spectrum of frequencies of electromagnetic radiation emitted from a molecule as a result of some spectroscopy experiment.
+ Spectra
+
+
+ Spectrum
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.7
+ Spectral information for a molecule from a nuclear magnetic resonance experiment.
+ NMR spectra
+
+
+ NMR spectrum
+
+
+
+
+
+
+
+
+ 1.8
+ 1.21
+
+ A sketch of a small molecule made with some specialised drawing package.
+
+
+ Chemical structure sketches are used for presentational purposes but also as inputs to various analysis software.
+ Chemical structure sketch
+ true
+
+
+
+
+
+
+
+
+ 1.8
+ An informative report about a specific or conserved nucleic acid sequence pattern.
+
+
+ Nucleic acid signature
+
+
+
+
+
+
+
+
+ 1.8
+ A DNA sequence.
+ DNA sequences
+
+
+ DNA sequence
+
+
+
+
+
+
+
+
+ 1.8
+ An RNA sequence.
+ RNA sequences
+
+
+ RNA sequence
+
+
+
+
+
+
+
+
+ 1.8
+ Deprecated because this is bloat / confusing & better handled as an EDAM Format concept - "raw" sequences just imply a particular format (i.e. one with a vanilla string, possible in a particular alphabet, with no metadata).
+ 1.23
+
+ A raw RNA sequence.
+
+
+ RNA sequence (raw)
+ true
+
+
+
+
+
+
+
+
+ 1.8
+ Deprecated because this is bloat / confusing & better handled as an EDAM Format concept - "raw" sequences just imply a particular format (i.e. one with a vanilla string, possible in a particular alphabet, with no metadata).
+ 1.23
+
+ A raw DNA sequence.
+
+
+ DNA sequence (raw)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.8
+ Data on gene sequence variations resulting large-scale genotyping and DNA sequencing projects.
+ Gene sequence variations
+
+
+ Variations are stored along with a reference genome.
+ Sequence variations
+
+
+
+
+
+
+
+
+ 1.8
+ A list of publications such as scientic papers or books.
+
+
+ Bibliography
+
+
+
+
+
+
+
+
+ 1.8
+ A mapping of supplied textual terms or phrases to ontology concepts (URIs).
+
+
+ Ontology mapping
+
+
+
+
+
+
+
+
+ 1.9
+ Any data concerning a specific biological or biomedical image.
+ Image-associated data
+ Image-related data
+
+
+ This can include basic provenance and technical information about the image, scientific annotation and so on.
+ Image metadata
+
+
+
+
+
+
+
+
+ 1.9
+ A human-readable collection of information concerning a clinical trial.
+ Clinical trial information
+
+
+ Clinical trial report
+
+
+
+
+
+
+
+
+ 1.10
+ A report about a biosample.
+ Biosample report
+
+
+ Reference sample report
+
+
+
+
+
+
+
+
+ 1.10
+ Accession number of an entry from the Gene Expression Atlas.
+
+
+
+ Gene Expression Atlas Experiment ID
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.12
+ true
+ Identifier of an entry from a database of disease.
+
+
+
+ Disease identifier
+
+
+
+
+
+
+
+
+
+ 1.12
+ The name of some disease.
+
+
+
+ Disease name
+
+
+
+
+
+
+
+
+ 1.12
+ Some material that is used for educational (training) purposes.
+ OER
+ Open educational resource
+
+
+ Training material
+
+
+
+
+
+
+
+
+ 1.12
+ A training course available for use on the Web.
+ On-line course
+ MOOC
+ Massive open online course
+
+
+ Online course
+
+
+
+
+
+
+
+
+ 1.12
+ Any free or plain text, typically for human consumption and in English. Can instantiate also as a textual search query.
+ Free text
+ Plain text
+ Textual search query
+
+
+ Text
+
+
+
+
+
+
+
+
+
+ 1.14
+ Machine-readable biodiversity data.
+ Biodiversity information
+ OTU table
+
+
+ Biodiversity data
+
+
+
+
+
+
+
+
+ 1.14
+ A human-readable collection of information concerning biosafety data.
+ Biosafety information
+
+
+ Biosafety report
+
+
+
+
+
+
+
+
+ 1.14
+ A report about any kind of isolation of biological material.
+ Geographic location
+ Isolation source
+
+
+ Isolation report
+
+
+
+
+
+
+
+
+ 1.14
+ Information about the ability of an organism to cause disease in a corresponding host.
+ Pathogenicity
+
+
+ Pathogenicity report
+
+
+
+
+
+
+
+
+ 1.14
+ Information about the biosafety classification of an organism according to corresponding law.
+ Biosafety level
+
+
+ Biosafety classification
+
+
+
+
+
+
+
+
+ 1.14
+ A report about localisation of the isolaton of biological material e.g. country or coordinates.
+
+
+ Geographic location
+
+
+
+
+
+
+
+
+ 1.14
+ A report about any kind of isolation source of biological material e.g. blood, water, soil.
+
+
+ Isolation source
+
+
+
+
+
+
+
+
+ 1.14
+ Experimentally determined parameter of the physiology of an organism, e.g. substrate spectrum.
+
+
+ Physiology parameter
+
+
+
+
+
+
+
+
+ 1.14
+ Experimentally determined parameter of the morphology of an organism, e.g. size & shape.
+
+
+ Morphology parameter
+
+
+
+
+
+
+
+
+ 1.14
+ Experimental determined parameter for the cultivation of an organism.
+ Cultivation conditions
+ Carbon source
+ Culture media composition
+ Nitrogen source
+ Salinity
+ Temperature
+ pH value
+
+
+ Cultivation parameter
+
+
+
+
+
+
+
+
+ 1.15
+ Data concerning a sequencing experiment, that may be specified as an input to some tool.
+
+
+ Sequencing metadata name
+
+
+
+
+
+
+
+
+ 1.15
+ An identifier of a flow cell of a sequencing machine.
+
+
+ A flow cell is used to immobilise, amplify and sequence millions of molecules at once. In Illumina machines, a flowcell is composed of 8 "lanes" which allows 8 experiments in a single analysis.
+ Flow cell identifier
+
+
+
+
+
+
+
+
+ 1.15
+ An identifier of a lane within a flow cell of a sequencing machine, within which millions of sequences are immobilised, amplified and sequenced.
+
+
+ Lane identifier
+
+
+
+
+
+
+
+
+ 1.15
+ A number corresponding to the number of an analysis performed by a sequencing machine. For example, if it's the 13th analysis, the run is 13.
+
+
+ Run number
+
+
+
+
+
+
+
+
+ 1.15
+ Data concerning ecology; for example measurements and reports from the study of interactions among organisms and their environment.
+
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Ecological data
+
+
+
+
+
+
+
+
+ 1.15
+ The mean species diversity in sites or habitats at a local scale.
+ α-diversity
+
+
+ Alpha diversity data
+
+
+
+
+
+
+
+
+ 1.15
+ The ratio between regional and local species diversity.
+ True beta diversity
+ β-diversity
+
+
+ Beta diversity data
+
+
+
+
+
+
+
+
+ 1.15
+ The total species diversity in a landscape.
+ ɣ-diversity
+
+
+ Gamma diversity data
+
+
+
+
+
+
+
+
+
+ 1.15
+ A plot in which community data (e.g. species abundance data) is summarised. Similar species and samples are plotted close together, and dissimilar species and samples are plotted placed far apart.
+
+
+ Ordination plot
+
+
+
+
+
+
+
+
+ 1.16
+ A ranked list of categories (usually ontology concepts), each associated with a statistical metric of over-/under-representation within the studied data.
+ Enrichment report
+ Over-representation report
+ Functional enrichment report
+
+
+ Over-representation data
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.16
+ GO-term report
+ A ranked list of Gene Ontology concepts, each associated with a p-value, concerning or derived from the analysis of e.g. a set of genes or proteins.
+ GO-term enrichment report
+ Gene ontology concept over-representation report
+ Gene ontology enrichment report
+ Gene ontology term enrichment report
+
+
+ GO-term enrichment data
+
+
+
+
+
+
+
+
+ 1.16
+ Score for localization of one or more post-translational modifications in peptide sequence measured by mass spectrometry.
+ False localisation rate
+ PTM localisation
+ PTM score
+
+
+ Localisation score
+
+
+
+
+
+
+
+
+
+ 1.16
+ Identifier of a protein modification catalogued in the Unimod database.
+
+
+
+ Unimod ID
+
+
+
+
+
+
+
+
+ 1.16
+ Identifier for mass spectrometry proteomics data in the proteomexchange.org repository.
+
+
+
+ ProteomeXchange ID
+
+
+
+
+
+
+
+
+ 1.16
+ Groupings of expression profiles according to a clustering algorithm.
+ Clustered gene expression profiles
+
+
+ Clustered expression profiles
+
+
+
+
+
+
+
+
+
+ 1.16
+ An identifier of a concept from the BRENDA ontology.
+
+
+
+ BRENDA ontology concept ID
+
+
+
+
+
+
+
+
+
+ 1.16
+ A text (such as a scientific article), annotated with notes, data and metadata, such as recognised entities, concepts, and their relations.
+
+
+ Annotated text
+
+
+
+
+
+
+
+
+ 1.16
+ A structured query, in form of a script, that defines a database search task.
+
+
+ Query script
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.19
+ Structural 3D model (volume map) from electron microscopy.
+
+
+ 3D EM Map
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.19
+ Annotation on a structural 3D EM Map from electron microscopy. This might include one or several locations in the map of the known features of a particular macromolecule.
+
+
+ 3D EM Mask
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.19
+ Raw DDD movie acquisition from electron microscopy.
+
+
+ EM Movie
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.19
+ Raw acquisition from electron microscopy or average of an aligned DDD movie.
+
+
+ EM Micrograph
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.21
+ Data coming from molecular simulations, computer "experiments" on model molecules.
+
+
+ Typically formed by two separated but indivisible pieces of information: topology data (static) and trajectory data (dynamic).
+ Molecular simulation data
+
+
+
+
+
+
+
+
+
+ 1.21
+ Identifier of an entry from the RNA central database of annotated human miRNAs.
+
+
+
+ There are canonical and taxon-specific forms of RNAcentral ID. Canonical form e.g. urs_9or10digits identifies an RNA sequence (within the RNA central database) which may appear in multiple sequences. Taxon-specific form identifies a sequence in the specific taxon (e.g. urs_9or10digits_taxonID).
+ RNA central ID
+
+
+
+
+
+
+
+
+ 1.21
+ A human-readable systematic collection of patient (or population) health information in a digital format.
+ EHR
+ EMR
+ Electronic medical record
+
+
+ Electronic health record
+
+
+
+
+
+
+
+
+ 1.22
+ Data coming from molecular simulations, computer "experiments" on model molecules. Typically formed by two separated but indivisible pieces of information: topology data (static) and trajectory data (dynamic).
+
+
+ Simulation
+
+
+
+
+
+
+
+
+ 1.22
+ Dynamic information of a structure molecular system coming from a molecular simulation: XYZ 3D coordinates (sometimes with their associated velocities) for every atom along time.
+
+
+ Trajectory data
+
+
+
+
+
+
+
+
+ 1.22
+ Force field parameters: charges, masses, radii, bond lengths, bond dihedrals, etc. define the structural molecular system, and are essential for the proper description and simulation of a molecular system.
+
+
+ Forcefield parameters
+
+
+
+
+
+
+
+
+ 1.22
+ Static information of a structure molecular system that is needed for a molecular simulation: the list of atoms, their non-bonded parameters for Van der Waals and electrostatic interactions, and the complete connectivity in terms of bonds, angles and dihedrals.
+
+
+ Topology data
+
+
+
+
+
+
+
+
+ 1.22
+ Visualization of distribution of quantitative data, e.g. expression data, by histograms, violin plots and density plots.
+ Density plot
+
+
+ Histogram
+
+
+
+
+
+
+
+
+ 1.23
+ Report of the quality control review that was made of factors involved in a procedure.
+ QC metrics
+ QC report
+ Quality control metrics
+ Quality control report
+
+
+
+
+
+
+
+
+ 1.23
+ A table of unnormalized values representing summarised read counts per genomic region (e.g. gene, transcript, peak).
+ Read count matrix
+
+
+ Count matrix
+
+
+
+
+
+
+
+
+ 1.24
+ Alignment (superimposition) of DNA tertiary (3D) structures.
+ Structure alignment (DNA)
+
+
+ DNA structure alignment
+
+
+
+
+
+
+
+
+ 1.24
+ A score derived from the P-value to ensure correction for multiple tests. The Q-value provides an estimate of the positive False Discovery Rate (pFDR), i.e. the rate of false positives among all the cases reported positive: pFDR = FP / (FP + TP).
+ Adjusted P-value
+ FDR
+ Padj
+ pFDR
+
+
+ Q-values are widely used in high-throughput data analysis (e.g. detection of differentially expressed genes from transcriptome data).
+ Q-value
+
+
+
+
+
+
+
+
+
+
+ 1.24
+ A profile HMM is a variant of a Hidden Markov model that is derived specifically from a set of (aligned) biological sequences. Profile HMMs provide the basis for a position-specific scoring system, which can be used to align sequences and search databases for related sequences.
+
+
+ Profile HMM
+
+
+
+
+
+
+
+
+
+
+
+ 1.24
+
+ WP[0-9]+
+ Identifier of a pathway from the WikiPathways pathway database.
+ WikiPathways ID
+ WikiPathways pathway ID
+
+
+
+ Pathway ID (WikiPathways)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.24
+ A ranked list of pathways, each associated with z-score, p-value or similar, concerning or derived from the analysis of e.g. a set of genes or proteins.
+ Pathway analysis results
+ Pathway enrichment report
+ Pathway over-representation report
+ Pathway report
+ Pathway term enrichment report
+
+
+ Pathway overrepresentation data
+
+
+
+
+
+
+
+
+ 1.26
+
+
+ \d{4}-\d{4}-\d{4}-\d{3}(\d|X)
+ Identifier of a researcher registered with the ORCID database. Used to identify author IDs.
+
+
+
+ ORCID Identifier
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+
+ Chemical structure specified in Simplified Molecular Input Line Entry System (SMILES) line notation.
+
+
+ SMILES
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Chemical structure specified in IUPAC International Chemical Identifier (InChI) line notation.
+
+
+ InChI
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Chemical structure specified by Molecular Formula (MF), including a count of each element in a compound.
+
+
+ The general MF query format consists of a series of valid atomic symbols, with an optional number or range.
+ mf
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The InChIKey (hashed InChI) is a fixed length (25 character) condensed digital representation of an InChI chemical structure specification. It uniquely identifies a chemical compound.
+
+
+ An InChIKey identifier is not human- nor machine-readable but is more suitable for web searches than an InChI chemical structure specification.
+ InChIKey
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ SMILES ARbitrary Target Specification (SMARTS) format for chemical structure specification, which is a subset of the SMILES line notation.
+
+
+ smarts
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for a molecular sequence with possible unknown positions but without ambiguity or non-sequence characters.
+
+
+ unambiguous pure
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for a nucleotide sequence with possible ambiguity, unknown positions and non-sequence characters.
+
+
+ Non-sequence characters may be used for example for gaps.
+ nucleotide
+ http://onto.eva.mpg.de/ontologies/gfo-bio.owl#Nucleotide_sequence
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for a protein sequence with possible ambiguity, unknown positions and non-sequence characters.
+
+
+ Non-sequence characters may be used for gaps and translation stop.
+ protein
+ http://onto.eva.mpg.de/ontologies/gfo-bio.owl#Amino_acid_sequence
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for the consensus of two or more molecular sequences.
+
+
+ consensus
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for a nucleotide sequence with possible ambiguity and unknown positions but without non-sequence characters.
+
+
+ pure nucleotide
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for a nucleotide sequence (characters ACGTU only) with possible unknown positions but without ambiguity or non-sequence characters .
+
+
+ unambiguous pure nucleotide
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for a DNA sequence with possible ambiguity, unknown positions and non-sequence characters.
+
+
+ dna
+ http://onto.eva.mpg.de/ontologies/gfo-bio.owl#DNA_sequence
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for an RNA sequence with possible ambiguity, unknown positions and non-sequence characters.
+
+
+ rna
+ http://onto.eva.mpg.de/ontologies/gfo-bio.owl#RNA_sequence
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for a DNA sequence (characters ACGT only) with possible unknown positions but without ambiguity or non-sequence characters.
+
+
+ unambiguous pure dna
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for a DNA sequence with possible ambiguity and unknown positions but without non-sequence characters.
+
+
+ pure dna
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for an RNA sequence (characters ACGU only) with possible unknown positions but without ambiguity or non-sequence characters.
+
+
+ unambiguous pure rna sequence
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for an RNA sequence with possible ambiguity and unknown positions but without non-sequence characters.
+
+
+ pure rna
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for any protein sequence with possible unknown positions but without ambiguity or non-sequence characters.
+
+
+ unambiguous pure protein
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for any protein sequence with possible ambiguity and unknown positions but without non-sequence characters.
+
+
+ pure protein
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of an entry from UniGene.
+
+ A UniGene entry includes a set of transcript sequences assigned to the same transcription locus (gene or expressed pseudogene), with information on protein similarities, gene expression, cDNA clone reagents, and genomic location.
+ UniGene entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of an entry from the COG database of clusters of (related) protein sequences.
+
+ COG sequence cluster format
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format for sequence positions (feature location) as used in DDBJ/EMBL/GenBank database.
+ Feature location
+
+
+ EMBL feature location
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Report format for tandem repeats in a nucleotide sequence (format generated by the Sanger Centre quicktandem program).
+
+
+ quicktandem
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Report format for inverted repeats in a nucleotide sequence (format generated by the Sanger Centre inverted program).
+
+
+ Sanger inverted repeats
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Report format for tandem repeats in a sequence (an EMBOSS report format).
+
+
+ EMBOSS repeat
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of a report on exon-intron structure generated by EMBOSS est2genome.
+
+
+ est2genome format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Report format for restriction enzyme recognition sites used by EMBOSS restrict program.
+
+
+ restrict format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Report format for restriction enzyme recognition sites used by EMBOSS restover program.
+
+
+ restover format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Report format for restriction enzyme recognition sites used by REBASE database.
+
+
+ REBASE restriction sites
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of results of a sequence database search using FASTA.
+
+
+ This includes (typically) score data, alignment data and a histogram (of observed and expected distribution of E values.)
+ FASTA search results format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of results of a sequence database search using some variant of BLAST.
+
+
+ This includes score data, alignment data and summary table.
+ BLAST results
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of results of a sequence database search using some variant of MSPCrunch.
+
+
+ mspcrunch
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of results of a sequence database search using some variant of Smith Waterman.
+
+
+ Smith-Waterman format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of EMBASSY domain hits file (DHF) of hits (sequences) with domain classification information.
+
+
+ The hits are relatives to a SCOP or CATH family and are found from a search of a sequence database.
+ dhf
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of EMBASSY ligand hits file (LHF) of database hits (sequences) with ligand classification information.
+
+
+ The hits are putative ligand-binding sequences and are found from a search of a sequence database.
+ lhf
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Results format for searches of the InterPro database.
+
+
+ InterPro hits format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of results of a search of the InterPro database showing matches of query protein sequence(s) to InterPro entries.
+
+
+ The report includes a classification of regions in a query protein sequence which are assigned to a known InterPro protein family or group.
+ InterPro protein view report format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of results of a search of the InterPro database showing matches between protein sequence(s) and signatures for an InterPro entry.
+
+
+ The table presents matches between query proteins (rows) and signature methods (columns) for this entry. Alternatively the sequence(s) might be from from the InterPro entry itself. The match position in the protein sequence and match status (true positive, false positive etc) are indicated.
+ InterPro match table format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Dirichlet distribution HMMER format.
+
+
+ HMMER Dirichlet prior
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Dirichlet distribution MEME format.
+
+
+ MEME Dirichlet prior
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of a report from the HMMER package on the emission and transition counts of a hidden Markov model.
+
+
+ HMMER emission and transition
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of a regular expression pattern from the Prosite database.
+
+
+ prosite-pattern
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of an EMBOSS sequence pattern.
+
+
+ EMBOSS sequence pattern
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A motif in the format generated by the MEME program.
+
+
+ meme-motif
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Sequence profile (sequence classifier) format used in the PROSITE database.
+
+
+ prosite-profile
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A profile (sequence classifier) in the format used in the JASPAR database.
+
+
+ JASPAR format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of the model of random sequences used by MEME.
+
+
+ MEME background Markov model
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of a hidden Markov model representation used by the HMMER package.
+
+
+ HMMER format
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ FASTA-style format for multiple sequences aligned by HMMER package to an HMM.
+
+
+ HMMER-aln
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of multiple sequences aligned by DIALIGN package.
+
+
+ DIALIGN format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ EMBASSY 'domain alignment file' (DAF) format, containing a sequence alignment of protein domains belonging to the same SCOP or CATH family.
+
+
+ The format is clustal-like and includes annotation of domain family classification information.
+ daf
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format for alignment of molecular sequences to MEME profiles (position-dependent scoring matrices) as generated by the MAST tool from the MEME package.
+
+
+ Sequence-MEME profile alignment
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format used by the HMMER package for an alignment of a sequence against a hidden Markov model database.
+
+
+ HMMER profile alignment (sequences versus HMMs)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format used by the HMMER package for of an alignment of a hidden Markov model against a sequence database.
+
+
+ HMMER profile alignment (HMM versus sequences)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of PHYLIP phylogenetic distance matrix data.
+
+
+ Data Type must include the distance matrix, probably as pairs of sequence identifiers with a distance (integer or float).
+ Phylip distance matrix
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Dendrogram (tree file) format generated by ClustalW.
+
+
+ ClustalW dendrogram
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Raw data file format used by Phylip from which a phylogenetic tree is directly generated or plotted.
+
+
+ Phylip tree raw
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PHYLIP file format for continuous quantitative character data.
+
+
+ Phylip continuous quantitative characters
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of phylogenetic property data.
+
+ Phylogenetic property values format
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PHYLIP file format for phylogenetics character frequency data.
+
+
+ Phylip character frequencies format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of PHYLIP discrete states data.
+
+
+ Phylip discrete states format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of PHYLIP cliques data.
+
+
+ Phylip cliques format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Phylogenetic tree data format used by the PHYLIP program.
+
+
+ Phylip tree format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The format of an entry from the TreeBASE database of phylogenetic data.
+
+
+ TreeBASE format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The format of an entry from the TreeFam database of phylogenetic data.
+
+
+ TreeFam format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format for distances, such as Branch Score distance, between two or more phylogenetic trees as used by the Phylip package.
+
+
+ Phylip tree distance format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of an entry from the DSSP database (Dictionary of Secondary Structure in Proteins).
+
+
+ The DSSP database is built using the DSSP application which defines secondary structure, geometrical features and solvent exposure of proteins, given atomic coordinates in PDB format.
+ dssp
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Entry format of the HSSP database (Homology-derived Secondary Structure in Proteins).
+
+
+ hssp
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of RNA secondary structure in dot-bracket notation, originally generated by the Vienna RNA package/server.
+ Vienna RNA format
+ Vienna RNA secondary structure format
+
+
+ Dot-bracket format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of local RNA secondary structure components with free energy values, generated by the Vienna RNA package/server.
+
+
+ Vienna local RNA secondary structure format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of an entry (or part of an entry) from the PDB database.
+ PDB entry format
+
+
+ PDB database entry format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Entry format of PDB database in PDB format.
+ PDB format
+
+
+ PDB
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Entry format of PDB database in mmCIF format.
+
+
+ mmCIF
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Entry format of PDB database in PDBML (XML) format.
+
+
+ PDBML
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+ Format of a matrix of 3D-1D scores used by the EMBOSS Domainatrix applications.
+
+
+ Domainatrix 3D-1D scoring matrix format
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Amino acid index format used by the AAindex database.
+
+
+ aaindex
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of an entry from IntEnz (The Integrated Relational Enzyme Database).
+
+ IntEnz is the master copy of the Enzyme Nomenclature, the recommendations of the NC-IUBMB on the Nomenclature and Classification of Enzyme-Catalysed Reactions.
+ IntEnz enzyme report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of an entry from the BRENDA enzyme database.
+
+ BRENDA enzyme report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of an entry from the KEGG REACTION database of biochemical reactions.
+
+ KEGG REACTION enzyme report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of an entry from the KEGG ENZYME database.
+
+ KEGG ENZYME enzyme report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of an entry from the proto section of the REBASE enzyme database.
+
+ REBASE proto enzyme report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of an entry from the withrefm section of the REBASE enzyme database.
+
+ REBASE withrefm enzyme report format
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of output of the Pcons Model Quality Assessment Program (MQAP).
+
+
+ Pcons ranks protein models by assessing their quality based on the occurrence of recurring common three-dimensional structural patterns. Pcons returns a score reflecting the overall global quality and a score for each individual residue in the protein reflecting the local residue quality.
+ Pcons report format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of output of the ProQ protein model quality predictor.
+
+
+ ProQ is a neural network-based predictor that predicts the quality of a protein model based on the number of structural features.
+ ProQ report format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of SMART domain assignment data.
+
+ The SMART output file includes data on genetically mobile domains / analysis of domain architectures, including phyletic distributions, functional class, tertiary structures and functionally important residues.
+ SMART domain assignment report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format for the BIND database of protein interaction.
+
+ BIND entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format for the IntAct database of protein interaction.
+
+ IntAct entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format for the InterPro database of protein signatures (sequence classifiers) and classified sequences.
+
+ This includes signature metadata, sequence references and a reference to the signature itself. There is normally a header (entry accession numbers and name), abstract, taxonomy information, example proteins etc. Each entry also includes a match list which give a number of different views of the signature matches for the sequences in each InterPro entry.
+ InterPro entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format for the textual abstract of signatures in an InterPro entry and its protein matches.
+
+ References are included and a functional inference is made where possible.
+ InterPro entry abstract format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format for the Gene3D protein secondary database.
+
+ Gene3D entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format for the PIRSF protein secondary database.
+
+ PIRSF entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format for the PRINTS protein secondary database.
+
+ PRINTS entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format for the Panther library of protein families and subfamilies.
+
+ Panther Families and HMMs entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format for the Pfam protein secondary database.
+
+ Pfam entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format for the SMART protein secondary database.
+
+ SMART entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format for the Superfamily protein secondary database.
+
+ Superfamily entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format for the TIGRFam protein secondary database.
+
+ TIGRFam entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format for the ProDom protein domain classification database.
+
+ ProDom entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format for the FSSP database.
+
+ FSSP entry format
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A report format for the kinetics of enzyme-catalysed reaction(s) in a format generated by EMBOSS findkm. This includes Michaelis Menten plot, Hanes Woolf plot, Michaelis Menten constant (Km) and maximum velocity (Vmax).
+
+
+ findkm
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format of Ensembl genome database.
+
+ Ensembl gene report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format of DictyBase genome database.
+
+ DictyBase gene report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format of Candida Genome database.
+
+ CGD gene report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format of DragonDB genome database.
+
+ DragonDB gene report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format of EcoCyc genome database.
+
+ EcoCyc gene report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format of FlyBase genome database.
+
+ FlyBase gene report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format of Gramene genome database.
+
+ Gramene gene report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format of KEGG GENES genome database.
+
+ KEGG GENES gene report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format of the Maize genetics and genomics database (MaizeGDB).
+
+ MaizeGDB gene report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format of the Mouse Genome Database (MGD).
+
+ MGD gene report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format of the Rat Genome Database (RGD).
+
+ RGD gene report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format of the Saccharomyces Genome Database (SGD).
+
+ SGD gene report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format of the Sanger GeneDB genome database.
+
+ GeneDB gene report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format of The Arabidopsis Information Resource (TAIR) genome database.
+
+ TAIR gene report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format of the WormBase genomes database.
+
+ WormBase gene report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format of the Zebrafish Information Network (ZFIN) genome database.
+
+ ZFIN gene report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format of the TIGR genome database.
+
+ TIGR gene report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format for the dbSNP database.
+
+ dbSNP polymorphism report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of an entry from the OMIM database of genotypes and phenotypes.
+
+ OMIM entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of a record from the HGVbase database of genotypes and phenotypes.
+
+ HGVbase entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of a record from the HIVDB database of genotypes and phenotypes.
+
+ HIVDB entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of an entry from the KEGG DISEASE database.
+
+ KEGG DISEASE entry format
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Report format on PCR primers and hybridisation oligos as generated by Whitehead primer3 program.
+
+
+ Primer3 primer
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A format of raw sequence read data from an Applied Biosystems sequencing machine.
+
+
+ ABI
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of MIRA sequence trace information file.
+
+
+ mira
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+ caf
+
+ Common Assembly Format (CAF). A sequence assembly format including contigs, base-call qualities, and other metadata.
+
+
+ CAF
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+ Sequence assembly project file EXP format.
+ Affymetrix EXP format
+
+
+ EXP
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+ Staden Chromatogram Files format (SCF) of base-called sequence reads, qualities, and other metadata.
+
+
+ SCF
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+ PHD sequence trace format to store serialised chromatogram data (reads).
+
+
+ PHD
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of Affymetrix data file of raw image data.
+ Affymetrix image data file format
+
+
+ dat
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of Affymetrix data file of information about (raw) expression levels of the individual probes.
+ Affymetrix probe raw data format
+
+
+ cel
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of affymetrix gene cluster files (hc-genes.txt, hc-chips.txt) from hierarchical clustering.
+
+
+ affymetrix
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format for the ArrayExpress microarrays database.
+
+ ArrayExpress entry format
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Affymetrix data file format for information about experimental conditions and protocols.
+ Affymetrix experimental conditions data file format
+
+
+ affymetrix-exp
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+ chp
+ Format of Affymetrix data file of information about (normalised) expression levels of the individual probes.
+ Affymetrix probe normalised data format
+
+
+ CHP
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of an entry from the Electron Microscopy DataBase (EMDB).
+
+ EMDB entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The format of an entry from the KEGG PATHWAY database of pathway maps for molecular interactions and reaction networks.
+
+ KEGG PATHWAY entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The format of an entry from the MetaCyc metabolic pathways database.
+
+ MetaCyc entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The format of a report from the HumanCyc metabolic pathways database.
+
+ HumanCyc entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The format of an entry from the INOH signal transduction pathways database.
+
+ INOH entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The format of an entry from the PATIKA biological pathways database.
+
+ PATIKA entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The format of an entry from the reactome biological pathways database.
+
+ Reactome entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The format of an entry from the aMAZE biological pathways and molecular interactions database.
+
+ aMAZE entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The format of an entry from the CPDB database.
+
+ CPDB entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The format of an entry from the Panther Pathways database.
+
+ Panther Pathways entry format
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of Taverna workflows.
+
+
+ Taverna workflow format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of mathematical models from the BioModel database.
+
+ Models are annotated and linked to relevant data resources, such as publications, databases of compounds and pathways, controlled vocabularies, etc.
+ BioModel mathematical model format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The format of an entry from the KEGG LIGAND chemical database.
+
+ KEGG LIGAND entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The format of an entry from the KEGG COMPOUND database.
+
+ KEGG COMPOUND entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The format of an entry from the KEGG PLANT database.
+
+ KEGG PLANT entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The format of an entry from the KEGG GLYCAN database.
+
+ KEGG GLYCAN entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The format of an entry from PubChem.
+
+ PubChem entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The format of an entry from a database of chemical structures and property predictions.
+
+ ChemSpider entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The format of an entry from Chemical Entities of Biological Interest (ChEBI).
+
+ ChEBI includes an ontological classification defining relations between entities or classes of entities.
+ ChEBI entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The format of an entry from the MSDchem ligand dictionary.
+
+ MSDchem ligand dictionary entry format
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The format of an entry from the HET group dictionary (HET groups from PDB files).
+
+
+ HET group dictionary entry format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The format of an entry from the KEGG DRUG database.
+
+ KEGG DRUG entry format
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of bibliographic reference as used by the PubMed database.
+
+
+ PubMed citation
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format for abstracts of scientific articles from the Medline database.
+
+
+ Bibliographic reference information including citation information is included
+ Medline Display Format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ CiteXplore 'core' citation format including title, journal, authors and abstract.
+
+
+ CiteXplore-core
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ CiteXplore 'all' citation format includes all known details such as Mesh terms and cross-references.
+
+
+ CiteXplore-all
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Article format of the PubMed Central database.
+
+
+ pmc
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The format of iHOP (Information Hyperlinked over Proteins) text-mining result.
+
+
+ iHOP format
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ OSCAR format of annotated chemical text.
+
+
+ OSCAR (Open-Source Chemistry Analysis Routines) software performs chemistry-specific parsing of chemical documents. It attempts to identify chemical names, ontology concepts, and chemical data from a document.
+ OSCAR format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Format of an ATOM record (describing data for an individual atom) from a PDB file.
+
+ PDB atom record format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of CATH domain classification information for a polypeptide chain.
+
+ The report (for example http://www.cathdb.info/chain/1cukA) includes chain identifiers, domain identifiers and CATH codes for domains in a given protein chain.
+ CATH chain report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of CATH domain classification information for a protein PDB file.
+
+ The report (for example http://www.cathdb.info/pdb/1cuk) includes chain identifiers, domain identifiers and CATH codes for domains in a given PDB file.
+ CATH PDB report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry (gene) format of the NCBI database.
+
+ NCBI gene report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Moby:GI_Gene
+ Report format for biological functions associated with a gene name and its alternative names (synonyms, homonyms), as generated by the GeneIlluminator service.
+
+ This includes a gene name and abbreviation of the name which may be in a name space indicating the gene status and relevant organisation.
+ GeneIlluminator gene report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Moby:BacMapGeneCard
+ Format of a report on the DNA and protein sequences for a given gene label from a bacterial chromosome maps from the BacMap database.
+
+ BacMap gene card format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of a report on Escherichia coli genes, proteins and molecules from the CyberCell Database (CCDB).
+
+ ColiCard report format
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Map of a plasmid (circular DNA) in PlasMapper TextMap format.
+
+
+ PlasMapper TextMap
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Phylogenetic tree Newick (text) format.
+ nh
+
+
+ newick
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Phylogenetic tree TreeCon (text) format.
+
+
+ TreeCon format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Phylogenetic tree Nexus (text) format.
+
+
+ Nexus format
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ A defined way or layout of representing and structuring data in a computer file, blob, string, message, or elsewhere.
+ Data format
+ Data model
+ Exchange format
+ File format
+
+
+ The main focus in EDAM lies on formats as means of structuring data exchanged between different tools or resources. The serialisation, compression, or encoding of concrete data formats/models is not in scope of EDAM. Format 'is format of' Data.
+ Format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Data model
+ A defined data format has its implicit or explicit data model, and EDAM does not distinguish the two. Some data models, however, do not have any standard way of serialisation into an exchange format, and those are thus not considered formats in EDAM. (Remark: even broader - or closely related - term to 'Data model' would be an 'Information model'.)
+
+
+
+
+ File format
+ File format denotes only formats of a computer file, but the same formats apply also to data blobs or exchanged messages.
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Data format for an individual atom.
+
+ Atomic data format
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Data format for a molecular sequence record.
+
+
+ Sequence record format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Data format for molecular sequence feature information.
+
+
+ Sequence feature annotation format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Data format for molecular sequence alignment information.
+
+
+ Alignment format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ ACEDB sequence format.
+
+
+ acedb
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Clustalw output format.
+
+ clustal sequence format
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Codata entry format.
+
+
+ codata
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Fasta format variant with database name before ID.
+
+
+ dbid
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ EMBL entry format.
+ EMBL
+ EMBL sequence format
+
+
+ EMBL format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Staden experiment file format.
+
+
+ Staden experiment format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ FASTA format including NCBI-style IDs.
+ FASTA format
+ FASTA sequence format
+
+
+ FASTA
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ fastq
+ fq
+ FASTQ short read format ignoring quality scores.
+ FASTAQ
+ fq
+
+
+ FASTQ
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ FASTQ Illumina 1.3 short read format.
+
+
+ FASTQ-illumina
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ FASTQ short read format with phred quality.
+
+
+ FASTQ-sanger
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ FASTQ Solexa/Illumina 1.0 short read format.
+
+
+ FASTQ-solexa
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Fitch program format.
+
+
+ fitch program
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ GCG sequence file format.
+ GCG SSF
+
+
+ GCG SSF (single sequence file) file format.
+ GCG
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Genbank entry format.
+ GenBank
+
+
+ GenBank format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Genpept protein entry format.
+
+
+ Currently identical to refseqp format
+ genpept
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ GFF feature file format with sequence in the header.
+
+
+ GFF2-seq
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ GFF3 feature file format with sequence.
+
+
+ GFF3-seq
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ FASTA sequence format including NCBI-style GIs.
+
+
+ giFASTA format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Hennig86 output sequence format.
+
+
+ hennig86
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Intelligenetics sequence format.
+
+
+ ig
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Intelligenetics sequence format (strict version).
+
+
+ igstrict
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Jackknifer interleaved and non-interleaved sequence format.
+
+
+ jackknifer
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Mase program sequence format.
+
+
+ mase format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Mega interleaved and non-interleaved sequence format.
+
+
+ mega-seq
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ GCG MSF (multiple sequence file) file format.
+
+
+ GCG MSF
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ pir
+ NBRF/PIR entry sequence format.
+ nbrf
+ pir
+
+
+ nbrf/pir
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Nexus/paup interleaved sequence format.
+
+
+ nexus-seq
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PDB sequence format (ATOM lines).
+
+
+ pdb format in EMBOSS.
+ pdbatom
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PDB nucleotide sequence format (ATOM lines).
+
+
+ pdbnuc format in EMBOSS.
+ pdbatomnuc
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PDB nucleotide sequence format (SEQRES lines).
+
+
+ pdbnucseq format in EMBOSS.
+ pdbseqresnuc
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PDB sequence format (SEQRES lines).
+
+
+ pdbseq format in EMBOSS.
+ pdbseqres
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Plain old FASTA sequence format (unspecified format for IDs).
+
+
+ Pearson format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Phylip interleaved sequence format.
+
+ phylip sequence format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ PHYLIP non-interleaved sequence format.
+
+ phylipnon sequence format
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Raw sequence format with no non-sequence characters.
+
+
+ raw
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Refseq protein entry sequence format.
+
+
+ Currently identical to genpept format
+ refseqp
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Selex sequence format.
+
+ selex sequence format
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+
+
+ Staden suite sequence format.
+
+
+ Staden format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+ Stockholm multiple sequence alignment format (used by Pfam and Rfam).
+
+
+ Stockholm format
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ DNA strider output sequence format.
+
+
+ strider format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ UniProtKB entry sequence format.
+ SwissProt format
+ UniProt format
+
+
+ UniProtKB format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ txt
+ Plain text sequence format (essentially unformatted).
+
+
+ plain text format (unformatted)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Treecon output sequence format.
+
+ treecon sequence format
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ NCBI ASN.1-based sequence format.
+
+
+ ASN.1 sequence format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ DAS sequence (XML) format (any type).
+ das sequence format
+
+
+ DAS format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ DAS sequence (XML) format (nucleotide-only).
+
+
+ The use of this format is deprecated.
+ dasdna
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ EMBOSS debugging trace sequence format of full internal data content.
+
+
+ debug-seq
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Jackknifer output sequence non-interleaved format.
+
+
+ jackknifernon
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Mega non-interleaved output sequence format.
+
+ meganon sequence format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ NCBI FASTA sequence format with NCBI-style IDs.
+
+
+ There are several variants of this.
+ NCBI format
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Nexus/paup non-interleaved sequence format.
+
+
+ nexusnon
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+ General Feature Format (GFF) of sequence features.
+
+
+ GFF2
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+
+ Generic Feature Format version 3 (GFF3) of sequence features.
+
+
+ GFF3
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.7
+
+ PIR feature format.
+
+
+ pir
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Swiss-Prot feature format.
+
+ swiss feature
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ DAS GFF (XML) feature format.
+ DASGFF feature
+ das feature
+
+
+ DASGFF
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ EMBOSS debugging trace feature format of full internal data content.
+
+
+ debug-feat
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ EMBL feature format.
+
+ EMBL feature
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Genbank feature format.
+
+ GenBank feature
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ ClustalW format for (aligned) sequences.
+ clustal
+
+
+ ClustalW format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ EMBOSS alignment format for debugging trace of full internal data content.
+
+
+ debug
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Fasta format for (aligned) sequences.
+
+
+ FASTA-aln
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Pearson MARKX0 alignment format.
+
+
+ markx0
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Pearson MARKX1 alignment format.
+
+
+ markx1
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Pearson MARKX10 alignment format.
+
+
+ markx10
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Pearson MARKX2 alignment format.
+
+
+ markx2
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Pearson MARKX3 alignment format.
+
+
+ markx3
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alignment format for start and end of matches between sequence pairs.
+
+
+ match
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Mega format for (typically aligned) sequences.
+
+
+ mega
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Mega non-interleaved format for (typically aligned) sequences.
+
+
+ meganon
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ MSF format for (aligned) sequences.
+
+ msf alignment format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Nexus/paup format for (aligned) sequences.
+
+ nexus alignment format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Nexus/paup non-interleaved format for (aligned) sequences.
+
+ nexusnon alignment format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ EMBOSS simple sequence pairwise alignment format.
+
+
+ pair
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ http://www.bioperl.org/wiki/PHYLIP_multiple_alignment_format
+ Phylip format for (aligned) sequences.
+ PHYLIP
+ PHYLIP interleaved format
+ ph
+ phy
+
+
+ PHYLIP format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ http://www.bioperl.org/wiki/PHYLIP_multiple_alignment_format
+ Phylip non-interleaved format for (aligned) sequences.
+ PHYLIP sequential format
+ phylipnon
+
+
+ PHYLIP sequential
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alignment format for score values for pairs of sequences.
+
+
+ scores format
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ SELEX format for (aligned) sequences.
+
+
+ selex
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ EMBOSS simple multiple alignment format.
+
+
+ EMBOSS simple format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Simple multiple sequence (alignment) format for SRS.
+
+
+ srs format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Simple sequence pair (alignment) format for SRS.
+
+
+ srspair
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ T-Coffee program alignment format.
+
+
+ T-Coffee format
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Treecon format for (aligned) sequences.
+
+
+ TreeCon-seq
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Data format for a phylogenetic tree.
+
+
+ Phylogenetic tree format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Data format for a biological pathway or network.
+
+
+ Biological pathway or network format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Data format for a sequence-profile alignment.
+
+
+ Sequence-profile alignment format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Data format for a sequence-HMM profile alignment.
+
+ Sequence-profile alignment (HMM) format
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data format for an amino acid index.
+
+
+ Amino acid index format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Data format for a full-text scientific article.
+ Literature format
+
+
+ Article format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Data format of a report from text mining.
+
+
+ Text mining report format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Data format for reports on enzyme kinetics.
+
+
+ Enzyme kinetics report format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format of a report on a chemical compound.
+ Chemical compound annotation format
+ Chemical structure format
+ Small molecule report format
+ Small molecule structure format
+
+
+ Chemical data format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format of a report on a particular locus, gene, gene system or groups of genes.
+ Gene features format
+
+
+ Gene annotation format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format of a workflow.
+ Programming language
+ Script format
+
+
+ Workflow format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Data format for a molecular tertiary structure.
+
+
+ Tertiary structure format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.2
+
+
+ Data format for a biological model.
+
+ Biological model format
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Text format of a chemical formula.
+
+
+ Chemical formula format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format of raw (unplotted) phylogenetic data.
+
+
+ Phylogenetic character data format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of phylogenetic continuous quantitative character data.
+
+
+ Phylogenetic continuous quantitative character format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of phylogenetic discrete states data.
+
+
+ Phylogenetic discrete states format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of phylogenetic cliques data.
+
+
+ Phylogenetic tree report (cliques) format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of phylogenetic invariants data.
+
+
+ Phylogenetic tree report (invariants) format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Annotation format for electron microscopy models.
+
+ Electron microscopy model format
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format for phylogenetic tree distance data.
+
+
+ Phylogenetic tree report (tree distances) format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.0
+
+
+ Format for sequence polymorphism data.
+
+ Polymorphism report format
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format for reports on a protein family.
+
+
+ Protein family report format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format for molecular interaction data.
+ Molecular interaction format
+
+
+ Protein interaction format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format for sequence assembly data.
+
+
+ Sequence assembly format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format for information about a microarray experimental per se (not the data generated from that experiment).
+
+
+ Microarray experiment data format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format for sequence trace data (i.e. including base call information).
+
+
+ Sequence trace format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format of a file of gene expression data, e.g. a gene expression matrix or profile.
+ Gene expression data format
+
+
+ Gene expression report format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of a report on genotype / phenotype information.
+
+ Genotype and phenotype annotation format
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format of a map of (typically one) molecular sequence annotated with features.
+
+
+ Map format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format of a report on PCR primers or hybridisation oligos in a nucleic acid sequence.
+
+
+ Nucleic acid features (primers) format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format of a report of general information about a specific protein.
+
+
+ Protein report format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of a report of general information about a specific enzyme.
+
+ Protein report (enzyme) format
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of a matrix of 3D-1D scores (amino acid environment probabilities).
+
+
+ 3D-1D scoring matrix format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format of a report on the quality of a protein three-dimensional model.
+
+
+ Protein structure report (quality evaluation) format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format of a report on sequence hits and associated data from searching a sequence database.
+
+
+ Database hits (sequence) format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format of a matrix of genetic distances between molecular sequences.
+
+
+ Sequence distance matrix format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format of a sequence motif.
+
+
+ Sequence motif format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format of a sequence profile.
+
+
+ Sequence profile format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of a hidden Markov model.
+
+
+ Hidden Markov model format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Data format of a dirichlet distribution.
+
+
+ Dirichlet distribution format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Data format for the emission and transition counts of a hidden Markov model.
+
+
+ HMM emission and transition counts format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format for secondary structure (predicted or real) of an RNA molecule.
+
+
+ RNA secondary structure format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format for secondary structure (predicted or real) of a protein molecule.
+
+
+ Protein secondary structure format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format used to specify range(s) of sequence positions.
+
+
+ Sequence range format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for molecular sequence with possible unknown positions but without non-sequence characters.
+
+
+ pure
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for a molecular sequence with possible unknown positions but possibly with non-sequence characters.
+
+
+ unpure
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for a molecular sequence with possible unknown positions but without ambiguity characters.
+
+
+ unambiguous sequence
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for a molecular sequence with possible unknown positions and possible ambiguity characters.
+
+
+ ambiguous
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format used for map of repeats in molecular (typically nucleotide) sequences.
+
+
+ Sequence features (repeats) format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format used for report on restriction enzyme recognition sites in nucleotide sequences.
+
+
+ Nucleic acid features (restriction sites) format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.10
+
+ Format used for report on coding regions in nucleotide sequences.
+
+
+ Gene features (coding region) format
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format used for clusters of molecular sequences.
+
+
+ Sequence cluster format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format used for clusters of protein sequences.
+
+
+ Sequence cluster format (protein)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format used for clusters of nucleotide sequences.
+
+
+ Sequence cluster format (nucleic acid)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Format used for clusters of genes.
+
+ Gene cluster format
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A text format resembling EMBL entry format.
+
+
+ This concept may be used for the many non-standard EMBL-like text formats.
+ EMBL-like (text)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A text format resembling FASTQ short read format.
+
+
+ This concept may be used for non-standard FASTQ short read-like formats.
+ FASTQ-like format (text)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+ true
+ XML format for EMBL entries.
+
+
+ EMBLXML
+ https://fairsharing.org/bsg-s001452/
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+ true
+ Specific XML format for EMBL entries (only uses certain sections).
+
+
+ cdsxml
+ https://fairsharing.org/bsg-s001452/
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+ INSDSeq provides the elements of a sequence as presented in the GenBank/EMBL/DDBJ-style flatfile formats, with a small amount of additional structure.
+ INSD XML
+ INSDC XML
+
+
+ INSDSeq
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Geneseq sequence format.
+
+
+ geneseq
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A text sequence format resembling uniprotkb entry format.
+
+
+ UniProt-like (text)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ UniProt entry sequence format.
+
+
+ UniProt format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+
+ ipi sequence format.
+
+ ipi
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Abstract format used by MedLine database.
+
+
+ medline
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format used for ontologies.
+
+
+ Ontology format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A serialisation format conforming to the Open Biomedical Ontologies (OBO) model.
+
+
+ OBO format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A text format resembling FASTA format.
+
+
+ This concept may also be used for the many non-standard FASTA-like formats.
+ FASTA-like (text)
+ http://filext.com/file-extension/FASTA
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Data format for a molecular sequence record, typically corresponding to a full entry from a molecular sequence database.
+
+
+ Sequence record full format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Data format for a molecular sequence record 'lite', typically molecular sequence and minimal metadata, such as an identifier of the sequence and/or a comment.
+
+
+ Sequence record lite format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An XML format for EMBL entries.
+
+
+ This is a placeholder for other more specific concepts. It should not normally be used for annotation.
+ EMBL format (XML)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A text format resembling GenBank entry (plain text) format.
+
+
+ This concept may be used for the non-standard GenBank-like text formats.
+ GenBank-like format (text)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Text format for a sequence feature table.
+
+
+ Sequence feature table format (text)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.0
+
+
+ Format of a report on organism strain data / cell line.
+
+ Strain data format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format for a report of strain data as used for CIP database entries.
+
+ CIP strain data format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ PHYLIP file format for phylogenetic property data.
+
+ phylip property values
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format (HTML) for the STRING database of protein interaction.
+
+ STRING entry format (HTML)
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Entry format (XML) for the STRING database of protein interaction.
+
+
+ STRING entry format (XML)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ GFF feature format (of indeterminate version).
+
+
+ GFF
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+
+ Gene Transfer Format (GTF), a restricted version of GFF.
+
+
+ GTF
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ FASTA format wrapped in HTML elements.
+
+
+ FASTA-HTML
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ EMBL entry format wrapped in HTML elements.
+
+
+ EMBL-HTML
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of an entry from the BioCyc enzyme database.
+
+ BioCyc enzyme report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of an entry from the Enzyme nomenclature database (ENZYME).
+
+ ENZYME enzyme report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of a report on a gene from the PseudoCAP database.
+
+ PseudoCAP gene report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of a report on a gene from the GeneCards database.
+
+ GeneCards gene report format
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Textual format.
+ Plain text format
+ txt
+
+
+ Data in text format can be compressed into binary format, or can be a value of an XML element or attribute. Markup formats are not considered textual (or more precisely, not plain-textual).
+ Textual format
+ http://filext.com/file-extension/TXT
+ http://www.iana.org/assignments/media-types/media-types.xhtml#text
+ http://www.iana.org/assignments/media-types/text/plain
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ HTML format.
+ Hypertext Markup Language
+
+
+ HTML
+ http://filext.com/file-extension/HTML
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ xml
+
+
+
+ eXtensible Markup Language (XML) format.
+ eXtensible Markup Language
+
+
+ Data in XML format can be serialised into text, or binary format.
+ XML
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Binary format.
+
+
+ Only specific native binary formats are listed under 'Binary format' in EDAM. Generic binary formats - such as any data being zipped, or any XML data being serialised into the Efficient XML Interchange (EXI) format - are not modelled in EDAM. Refer to http://wsio.org/compression_004.
+ Binary format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Typical textual representation of a URI.
+
+ URI format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The format of an entry from the NCI-Nature pathways database.
+
+ NCI-Nature pathway entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ A placeholder concept for visual navigation by dividing data formats by the content of the data that is represented.
+ Format (typed)
+
+
+ This concept exists only to assist EDAM maintenance and navigation in graphical browsers. It does not add semantic information. The concept branch under 'Format (typed)' provides an alternative organisation of the concepts nested under the other top-level branches ('Binary', 'HTML', 'RDF', 'Text' and 'XML'. All concepts under here are already included under those branches.
+ Format (by type of data)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+
+
+
+
+ Any ontology allowed, none mandatory. Preferably with URIs but URIs are not mandatory. Non-ontology terms are also allowed as the last resort in case of a lack of suitable ontology.
+
+
+
+ BioXSD-schema-based XML format of sequence-based data and some other common data - sequence records, alignments, feature records, references to resources, and more - optimised for integrative bioinformatics, Web services, and object-oriented programming.
+ BioJSON
+ BioXSD
+ BioXSD XML
+ BioXSD XML format
+ BioXSD data model
+ BioXSD format
+ BioXSD in XML
+ BioXSD in XML format
+ BioXSD+XML
+ BioXSD/GTrack
+ BioXSD|GTrack
+ BioYAML
+
+
+ 'BioXSD' belongs to the 'BioXSD|GTrack' ecosystem of generic formats. 'BioXSD in XML' is the XML format based on the common, unified 'BioXSD data model', a.k.a. 'BioXSD|BioJSON|BioYAML'.
+ BioXSD (XML)
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A serialisation format conforming to the Resource Description Framework (RDF) model.
+ Resource Description Framework format
+ RDF
+ Resource Description Framework
+
+
+ RDF format
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Genbank entry format wrapped in HTML elements.
+
+
+ GenBank-HTML
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of a report on protein features (domain composition).
+
+ Protein features (domains) format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A format resembling EMBL entry (plain text) format.
+
+
+ This concept may be used for the many non-standard EMBL-like formats.
+ EMBL-like format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A format resembling FASTQ short read format.
+
+
+ This concept may be used for non-standard FASTQ short read-like formats.
+ FASTQ-like format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A format resembling FASTA format.
+
+
+ This concept may be used for the many non-standard FASTA-like formats.
+ FASTA-like
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A sequence format resembling uniprotkb entry format.
+
+
+ uniprotkb-like format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format for a sequence feature table.
+
+
+ Sequence feature table format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ OBO ontology text format.
+
+
+ OBO
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ OBO ontology XML format.
+
+
+ OBO-XML
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data format for a molecular sequence record (text).
+
+
+ Sequence record format (text)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data format for a molecular sequence record (XML).
+
+
+ Sequence record format (XML)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ XML format for a sequence feature table.
+
+
+ Sequence feature table format (XML)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Text format for molecular sequence alignment information.
+
+
+ Alignment format (text)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ XML format for molecular sequence alignment information.
+
+
+ Alignment format (XML)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Text format for a phylogenetic tree.
+
+
+ Phylogenetic tree format (text)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ XML format for a phylogenetic tree.
+
+
+ Phylogenetic tree format (XML)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An XML format resembling EMBL entry format.
+
+
+ This concept may be used for the any non-standard EMBL-like XML formats.
+ EMBL-like (XML)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A format resembling GenBank entry (plain text) format.
+
+
+ This concept may be used for the non-standard GenBank-like formats.
+ GenBank-like format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format for the STRING database of protein interaction.
+
+ STRING entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Text format for sequence assembly data.
+
+
+ Sequence assembly format (text)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Text format (representation) of amino acid residues.
+
+ Amino acid identifier format
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for a molecular sequence without any unknown positions or ambiguity characters.
+
+
+ completely unambiguous
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for a molecular sequence without unknown positions, ambiguity or non-sequence characters.
+
+
+ completely unambiguous pure
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for a nucleotide sequence (characters ACGTU only) without unknown positions, ambiguity or non-sequence characters .
+
+
+ completely unambiguous pure nucleotide
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for a DNA sequence (characters ACGT only) without unknown positions, ambiguity or non-sequence characters.
+
+
+ completely unambiguous pure dna
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for an RNA sequence (characters ACGU only) without unknown positions, ambiguity or non-sequence characters.
+
+
+ completely unambiguous pure rna sequence
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format of a raw molecular sequence (i.e. the alphabet used).
+
+
+ Raw sequence format
+ http://www.onto-med.de/ontologies/gfo.owl#Symbol_sequence
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+ BAM format, the binary, BGZF-formatted compressed version of SAM format for alignment of nucleotide sequences (e.g. sequencing reads) to (a) reference sequence(s). May contain base-call and alignment qualities and other data.
+
+
+ BAM
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+ Sequence Alignment/Map (SAM) format for alignment of nucleotide sequences (e.g. sequencing reads) to (a) reference sequence(s). May contain base-call and alignment qualities and other data.
+
+
+ The format supports short and long reads (up to 128Mbp) produced by different sequencing platforms and is used to hold mapped data within the GATK and across the Broad Institute, the Sanger Centre, and throughout the 1000 Genomes project.
+ SAM
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+ Systems Biology Markup Language (SBML), the standard XML format for models of biological processes such as for example metabolism, cell signaling, and gene regulation.
+
+
+ SBML
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for any protein sequence without unknown positions, ambiguity or non-sequence characters.
+
+
+ completely unambiguous pure protein
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format of a bibliographic reference.
+
+
+ Bibliographic reference format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of a sequence annotation track.
+
+
+ Sequence annotation track format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data format for molecular sequence alignment information that can hold sequence alignment(s) of only 2 sequences.
+
+
+ Alignment format (pair only)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format of sequence variation annotation.
+
+
+ Sequence variation annotation format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Some variant of Pearson MARKX alignment format.
+
+
+ markx0 variant
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Some variant of Mega format for (typically aligned) sequences.
+
+
+ mega variant
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Some variant of Phylip format for (aligned) sequences.
+
+
+ Phylip format variant
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ AB1 binary format of raw DNA sequence reads (output of Applied Biosystems' sequencing analysis software). Contains an electropherogram and the DNA base sequence.
+
+
+ AB1 uses the generic binary Applied Biosystems, Inc. Format (ABIF).
+ AB1
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+ ACE sequence assembly format including contigs, base-call qualities, and other metadata (version Aug 1998 and onwards).
+
+
+ ACE
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+ Browser Extensible Data (BED) format of sequence annotation track, typically to be displayed in a genome browser.
+
+
+ BED detail format includes 2 additional columns (http://genome.ucsc.edu/FAQ/FAQformat#format1.7) and BED 15 includes 3 additional columns for experiment scores (http://genomewiki.ucsc.edu/index.php/Microarray_track).
+ BED
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+ bigBed format for large sequence annotation tracks, similar to textual BED format.
+
+
+ bigBed
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+ wig
+
+ Wiggle format (WIG) of a sequence annotation track that consists of a value for each sequence position. Typically to be displayed in a genome browser.
+
+
+ WIG
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+ bigWig format for large sequence annotation tracks that consist of a value for each sequence position. Similar to textual WIG format.
+
+
+ bigWig
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+ PSL format of alignments, typically generated by BLAT or psLayout. Can be displayed in a genome browser like a sequence annotation track.
+
+
+ PSL
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+ Multiple Alignment Format (MAF) supporting alignments of whole genomes with rearrangements, directions, multiple pieces to the alignment, and so forth.
+
+
+ Typically generated by Multiz and TBA aligners; can be displayed in a genome browser like a sequence annotation track. This should not be confused with MIRA Assembly Format or Mutation Annotation Format.
+ MAF
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+
+ 2bit binary format of nucleotide sequences using 2 bits per nucleotide. In addition encodes unknown nucleotides and lower-case 'masking'.
+
+
+ 2bit
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+ .nib (nibble) binary format of a nucleotide sequence using 4 bits per nucleotide (including unknown) and its lower-case 'masking'.
+
+
+ .nib
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+ gp
+
+ genePred table format for gene prediction tracks.
+
+
+ genePred format has 3 main variations (http://genome.ucsc.edu/FAQ/FAQformat#format9 http://www.broadinstitute.org/software/igv/genePred). They reflect UCSC Browser DB tables.
+ genePred
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+ Personal Genome SNP (pgSnp) format for sequence variation tracks (indels and polymorphisms), supported by the UCSC Genome Browser.
+
+
+ pgSnp
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+ axt format of alignments, typically produced from BLASTZ.
+
+
+ axt
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+ lav
+
+ LAV format of alignments generated by BLASTZ and LASTZ.
+
+
+ LAV
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+ Pileup format of alignment of sequences (e.g. sequencing reads) to (a) reference sequence(s). Contains aligned bases per base of the reference sequence(s).
+
+
+ Pileup
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+ vcf
+ vcf.gz
+ Variant Call Format (VCF) is tabular format for storing genomic sequence variations.
+
+
+ 1000 Genomes Project has its own specification for encoding structural variations in VCF (https://www.internationalgenome.org/wiki/Analysis/Variant%20Call%20Format/VCF%20(Variant%20Call%20Format)%20version%204.0/encoding-structural-variants). This is based on VCF version 4.0 and not directly compatible with VCF version 4.3.
+ VCF
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+ Sequence Read Format (SRF) of sequence trace data. Supports submission to the NCBI Short Read Archive.
+
+
+ SRF
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+ ZTR format for storing chromatogram data from DNA sequencing instruments.
+
+
+ ZTR
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+ Genome Variation Format (GVF). A GFF3-compatible format with defined header and attribute tags for sequence variation.
+
+
+ GVF
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+ bcf
+ bcf.gz
+
+ BCF is the binary version of Variant Call Format (VCF) for sequence variation (indels, polymorphisms, structural variation).
+
+
+ BCF
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta13
+ true
+ Format of a matrix (array) of numerical values.
+
+
+ Matrix format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta13
+ true
+ Format of data concerning the classification of the sequences and/or structures of protein structural domain(s).
+
+
+ Protein domain classification format
+
+
+
+
+
+
+
+
+ beta13
+ Format of raw SCOP domain classification data files.
+
+
+ These are the parsable data files provided by SCOP.
+ Raw SCOP domain classification format
+
+
+
+
+
+
+
+
+ beta13
+ Format of raw CATH domain classification data files.
+
+
+ These are the parsable data files provided by CATH.
+ Raw CATH domain classification format
+
+
+
+
+
+
+
+
+ beta13
+ Format of summary of domain classification information for a CATH domain.
+
+
+ The report (for example http://www.cathdb.info/domain/1cukA01) includes CATH codes for levels in the hierarchy for the domain, level descriptions and relevant data and links.
+ CATH domain report format
+
+
+
+
+
+
+
+
+
+ 1.0
+
+
+ Systems Biology Result Markup Language (SBRML), the standard XML format for simulated or calculated results (e.g. trajectories) of systems biology models.
+
+
+ SBRML
+
+
+
+
+
+
+
+
+ 1.0
+
+
+ BioPAX is an exchange format for pathway data, with its data model defined in OWL.
+
+
+ BioPAX
+
+
+
+
+
+
+
+
+
+
+ 1.0
+
+
+ EBI Application Result XML is a format returned by sequence similarity search Web services at EBI.
+
+
+ EBI Application Result XML
+
+
+
+
+
+
+
+
+
+ 1.0
+
+
+ XML Molecular Interaction Format (MIF), standardised by HUPO PSI MI.
+ MIF
+
+
+ PSI MI XML (MIF)
+
+
+
+
+
+
+
+
+
+ 1.0
+
+
+ phyloXML is a standardised XML format for phylogenetic trees, networks, and associated data.
+
+
+ phyloXML
+
+
+
+
+
+
+
+
+
+ 1.0
+
+
+ NeXML is a standardised XML format for rich phyloinformatic data.
+
+
+ NeXML
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.0
+
+
+ MAGE-ML XML format for microarray expression data, standardised by MGED (now FGED).
+
+
+ MAGE-ML
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.0
+
+
+ MAGE-TAB textual format for microarray expression data, standardised by MGED (now FGED).
+
+
+ MAGE-TAB
+
+
+
+
+
+
+
+
+
+ 1.0
+
+
+ GCDML XML format for genome and metagenome metadata according to MIGS/MIMS/MIMARKS information standards, standardised by the Genomic Standards Consortium (GSC).
+
+
+ GCDML
+
+
+
+
+
+
+
+
+
+
+
+ 1.0
+
+
+
+
+
+
+
+
+
+ GTrack is a generic and optimised tabular format for genome or sequence feature tracks. GTrack unifies the power of other track formats (e.g. GFF3, BED, WIG), and while optimised in size, adds more flexibility, customisation, and automation ("machine understandability").
+ BioXSD/GTrack GTrack
+ BioXSD|GTrack GTrack
+ GTrack ecosystem of formats
+ GTrack format
+ GTrack|BTrack|GSuite GTrack
+ GTrack|GSuite|BTrack GTrack
+
+
+ 'GTrack' belongs to the 'BioXSD|GTrack' ecosystem of generic formats, and particular to its subset, the 'GTrack ecosystem' (GTrack, GSuite, BTrack). 'GTrack' is the tabular format for representing features of sequences and genomes.
+ GTrack
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.0
+ true
+ Data format for a report of information derived from a biological pathway or network.
+
+
+ Biological pathway or network report format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.0
+ true
+ Data format for annotation on a laboratory experiment.
+
+
+ Experiment annotation format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.2
+
+
+ Cytoband format for chromosome cytobands.
+
+
+ Reflects a UCSC Browser DB table.
+ Cytoband format
+
+
+
+
+
+
+
+
+
+
+ 1.2
+
+
+ CopasiML, the native format of COPASI.
+
+
+ CopasiML
+
+
+
+
+
+
+
+
+
+ 1.2
+
+
+
+
+ CellML, the format for mathematical models of biological and other networks.
+
+
+ CellML
+
+
+
+
+
+
+
+
+
+ 1.2
+
+
+
+
+
+ Tabular Molecular Interaction format (MITAB), standardised by HUPO PSI MI.
+
+
+ PSI MI TAB (MITAB)
+
+
+
+
+
+
+
+
+ 1.2
+
+
+ Protein affinity format (PSI-PAR), standardised by HUPO PSI MI. It is compatible with PSI MI XML (MIF) and uses the same XML Schema.
+
+
+ PSI-PAR
+
+
+
+
+
+
+
+
+
+ 1.2
+
+
+ mzML format for raw spectrometer output data, standardised by HUPO PSI MSS.
+
+
+ mzML is the successor and unifier of the mzData format developed by PSI and mzXML developed at the Seattle Proteome Center.
+ mzML
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.2
+ true
+ Format for mass pectra and derived data, include peptide sequences etc.
+
+
+ Mass spectrometry data format
+
+
+
+
+
+
+
+
+
+ 1.2
+
+
+ TraML (Transition Markup Language) is the format for mass spectrometry transitions, standardised by HUPO PSI MSS.
+
+
+ TraML
+
+
+
+
+
+
+
+
+
+ 1.2
+
+
+ mzIdentML is the exchange format for peptides and proteins identified from mass spectra, standardised by HUPO PSI PI. It can be used for outputs of proteomics search engines.
+
+
+ mzIdentML
+
+
+
+
+
+
+
+
+
+ 1.2
+
+
+ mzQuantML is the format for quantitation values associated with peptides, proteins and small molecules from mass spectra, standardised by HUPO PSI PI. It can be used for outputs of quantitation software for proteomics.
+
+
+ mzQuantML
+
+
+
+
+
+
+
+
+
+ 1.2
+
+
+ GelML is the format for describing the process of gel electrophoresis, standardised by HUPO PSI PS.
+
+
+ GelML
+
+
+
+
+
+
+
+
+
+ 1.2
+
+
+ spML is the format for describing proteomics sample processing, other than using gels, prior to mass spectrometric protein identification, standardised by HUPO PSI PS. It may also be applicable for metabolomics.
+
+
+ spML
+
+
+
+
+
+
+
+
+
+ 1.2
+ A human-readable encoding for the Web Ontology Language (OWL).
+
+
+ OWL Functional Syntax
+
+
+
+
+
+
+
+
+
+ 1.2
+ A syntax for writing OWL class expressions.
+
+
+ This format was influenced by the OWL Abstract Syntax and the DL style syntax.
+ Manchester OWL Syntax
+
+
+
+
+
+
+
+
+
+ 1.2
+ A superset of the "Description-Logic Knowledge Representation System Specification from the KRSS Group of the ARPA Knowledge Sharing Effort".
+
+
+ This format is used in Protege 4.
+ KRSS2 Syntax
+
+
+
+
+
+
+
+
+
+ 1.2
+ The Terse RDF Triple Language (Turtle) is a human-friendly serialisation format for RDF (Resource Description Framework) graphs.
+
+
+ The SPARQL Query Language incorporates a very similar syntax.
+ Turtle
+
+
+
+
+
+
+
+
+
+ 1.2
+ nt
+ A plain text serialisation format for RDF (Resource Description Framework) graphs, and a subset of the Turtle (Terse RDF Triple Language) format.
+
+
+ N-Triples should not be confused with Notation 3 which is a superset of Turtle.
+ N-Triples
+
+
+
+
+
+
+
+
+
+ 1.2
+ n3
+ A shorthand non-XML serialisation of Resource Description Framework model, designed with human-readability in mind.
+ N3
+
+
+ Notation3
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.2
+ OWL ontology XML serialisation format.
+ OWL
+
+
+ OWL/XML
+
+
+
+
+
+
+
+
+
+ 1.3
+
+
+ The A2M format is used as the primary format for multiple alignments of protein or nucleic-acid sequences in the SAM suite of tools. It is a small modification of FASTA format for sequences and is compatible with most tools that read FASTA.
+
+
+ A2M
+
+
+
+
+
+
+
+
+
+ 1.3
+
+
+ Standard flowgram format (SFF) is a binary file format used to encode results of pyrosequencing from the 454 Life Sciences platform for high-throughput sequencing.
+ Standard flowgram format
+
+
+ SFF
+
+
+
+
+
+
+
+
+ 1.3
+
+ The MAP file describes SNPs and is used by the Plink package.
+ Plink MAP
+
+
+ MAP
+
+
+
+
+
+
+
+
+ 1.3
+
+ The PED file describes individuals and genetic data and is used by the Plink package.
+ Plink PED
+
+
+ PED
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ Data format for a metadata on an individual and their genetic data.
+
+
+ Individual genetic data format
+
+
+
+
+
+
+
+
+
+ 1.3
+
+ The PED/MAP file describes data used by the Plink package.
+ Plink PED/MAP
+
+
+ PED/MAP
+
+
+
+
+
+
+
+
+
+ 1.3
+
+
+ File format of a CT (Connectivity Table) file from the RNAstructure package.
+ Connect format
+ Connectivity Table file format
+
+
+ CT
+
+
+
+
+
+
+
+
+
+ 1.3
+
+ XRNA old input style format.
+
+
+ SS
+
+
+
+
+
+
+
+
+
+
+ 1.3
+
+ RNA Markup Language.
+
+
+ RNAML
+
+
+
+
+
+
+
+
+
+ 1.3
+
+ Format for the Genetic Data Environment (GDE).
+
+
+ GDE
+
+
+
+
+
+
+
+
+ 1.3
+
+ A multiple alignment in vertical format, as used in the AMPS (Alignment of Multiple Protein Sequences) package.
+ Block file format
+
+
+ BLC
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ Format of a data index of some type.
+
+
+ Data index format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.3
+
+ BAM indexing format.
+
+
+ BAI
+
+
+
+
+
+
+
+
+ 1.3
+
+ HMMER profile HMM file for HMMER versions 2.x.
+
+
+ HMMER2
+
+
+
+
+
+
+
+
+ 1.3
+
+ HMMER profile HMM file for HMMER versions 3.x.
+
+
+ HMMER3
+
+
+
+
+
+
+
+
+ 1.3
+
+ PO is the output format of Partial Order Alignment program (POA) performing Multiple Sequence Alignment (MSA).
+
+
+ PO
+
+
+
+
+
+
+
+
+
+ 1.3
+ XML format as produced by the NCBI Blast package.
+
+
+ BLAST XML results format
+
+
+
+
+
+
+
+
+
+ 1.7
+ http://www.ebi.ac.uk/ena/software/cram-usage#format_specification http://samtools.github.io/hts-specs/CRAMv2.1.pdf
+ http://www.ebi.ac.uk/ena/software/cram-usage#format_specification http://samtools.github.io/hts-specs/CRAMv2.1.pdf
+ Reference-based compression of alignment format.
+
+
+ CRAM
+
+
+
+
+
+
+
+
+
+ 1.7
+ json
+
+
+
+ JavaScript Object Notation format; a lightweight, text-based format to represent tree-structured data using key-value pairs.
+ JavaScript Object Notation
+
+
+ JSON
+
+
+
+
+
+
+
+
+
+ 1.7
+ Encapsulated PostScript format.
+
+
+ EPS
+
+
+
+
+
+
+
+
+ 1.7
+ Graphics Interchange Format.
+
+
+ GIF
+
+
+
+
+
+
+
+
+
+ 1.7
+ Microsoft Excel spreadsheet format.
+ Microsoft Excel format
+
+
+ xls
+
+
+
+
+
+
+
+
+ 1.7
+ tab
+ tsv
+
+
+
+ Tabular data represented as tab-separated values in a text file.
+ Tab-delimited
+ Tab-separated values
+ tab
+
+
+ TSV
+
+
+
+
+
+
+
+
+ 1.7
+ 1.10
+
+ Format of a file of gene expression data, e.g. a gene expression matrix or profile.
+
+
+ Gene expression data format
+ true
+
+
+
+
+
+
+
+
+
+ 1.7
+ Format of the cytoscape input file of gene expression ratios or values are specified over one or more experiments.
+
+
+ Cytoscape input file format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.7
+ https://github.com/BenLangmead/bowtie/blob/master/MANUAL
+ Bowtie format for indexed reference genome for "small" genomes.
+ Bowtie index format
+
+
+ ebwt
+
+
+
+
+
+
+
+
+ 1.7
+ http://www.molbiol.ox.ac.uk/tutorials/Seqlab_GCG.pdf
+ Rich sequence format.
+ GCG RSF
+
+
+ RSF-format files contain one or more sequences that may or may not be related. In addition to the sequence data, each sequence can be annotated with descriptive sequence information (from the GCG manual).
+ RSF
+
+
+
+
+
+
+
+
+
+
+ 1.7
+ Some format based on the GCG format.
+
+
+ GCG format variant
+
+
+
+
+
+
+
+
+
+ 1.7
+ http://rothlab.ucdavis.edu/genhelp/chapter_2_using_sequences.html#_Creating_and_Editing_Single_Sequenc
+ Bioinformatics Sequence Markup Language format.
+
+
+ BSML
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.7
+ https://github.com/BenLangmead/bowtie/blob/master/MANUAL
+ Bowtie format for indexed reference genome for "large" genomes.
+ Bowtie long index format
+
+
+ ebwtl
+
+
+
+
+
+
+
+
+
+ 1.8
+
+ Ensembl standard format for variation data.
+
+
+ Ensembl variation file format
+
+
+
+
+
+
+
+
+
+ 1.8
+ Microsoft Word format.
+ Microsoft Word format
+ doc
+
+
+ docx
+
+
+
+
+
+
+
+
+ 1.8
+ true
+ Format of documents including word processor, spreadsheet and presentation.
+
+
+ Document format
+
+
+
+
+
+
+
+
+
+ 1.8
+ Portable Document Format.
+
+
+ PDF
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.9
+ true
+ Format used for images and image metadata.
+
+
+ Image format
+
+
+
+
+
+
+
+
+
+ 1.9
+
+ Medical image format corresponding to the Digital Imaging and Communications in Medicine (DICOM) standard.
+
+
+ DICOM format
+
+
+
+
+
+
+
+
+
+ 1.9
+
+ nii
+ An open file format from the Neuroimaging Informatics Technology Initiative (NIfTI) commonly used to store brain imaging data obtained using Magnetic Resonance Imaging (MRI) methods.
+ NIFTI format
+ NIfTI-1 format
+
+
+ nii
+
+
+
+
+
+
+
+
+
+ 1.9
+
+ Text-based tagged file format for medical images generated using the MetaImage software package.
+ Metalmage format
+
+
+ mhd
+
+
+
+
+
+
+
+
+
+ 1.9
+
+ Nearly Raw Rasta Data format designed to support scientific visualisation and image processing involving N-dimensional raster data.
+
+
+ nrrd
+
+
+
+
+
+
+
+
+ 1.9
+ File format used for scripts written in the R programming language for execution within the R software environment, typically for statistical computation and graphics.
+
+
+ R file format
+
+
+
+
+
+
+
+
+ 1.9
+ File format used for scripts for the Statistical Package for the Social Sciences.
+
+
+ SPSS
+
+
+
+
+
+
+
+
+ 1.9
+
+ eml
+ mht
+ mhtml
+
+
+
+ MIME HTML format for Web pages, which can include external resources, including images, Flash animations and so on.
+ HTML email format
+ HTML email message format
+ MHT
+ MHT format
+ MHTML format
+ MIME HTML
+ MIME HTML format
+ eml
+ MIME multipart
+ MIME multipart format
+ MIME multipart message
+ MIME multipart message format
+
+
+ MHTML is not strictly an HTML format, it is encoded as an HTML email message (although with multipart/related instead of multipart/alternative). It, however, contains the main HTML block as its core, and thus it is for practical reasons included in EDAM as a specialisation of 'HTML'.
+ MHTML
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.10
+ Proprietary file format for (raw) BeadArray data used by genomewide profiling platforms from Illumina Inc. This format is output directly from the scanner and stores summary intensities for each probe-type on an array.
+
+
+ IDAT
+
+
+
+
+
+
+
+
+
+ 1.10
+
+ Joint Picture Group file format for lossy graphics file.
+ JPEG
+ jpeg
+
+
+ Sequence of segments with markers. Begins with byte of 0xFF and follows by marker type.
+ JPG
+
+
+
+
+
+
+
+
+
+ 1.10
+ Reporter Code Count-A data file (.csv) output by the Nanostring nCounter Digital Analyzer, which contains gene sample information, probe information and probe counts.
+
+
+ rcc
+
+
+
+
+
+
+
+
+ 1.11
+
+
+ ARFF (Attribute-Relation File Format) is an ASCII text file format that describes a list of instances sharing a set of attributes.
+
+
+ This file format is for machine learning.
+ arff
+
+
+
+
+
+
+
+
+
+ 1.11
+
+
+ AFG is a single text-based file assembly format that holds read and consensus information together.
+
+
+ afg
+
+
+
+
+
+
+
+
+
+ 1.11
+
+
+ The bedGraph format allows display of continuous-valued data in track format. This display type is useful for probability scores and transcriptome data.
+
+
+ Holds a tab-delimited chromosome /start /end / datavalue dataset.
+ bedgraph
+
+
+
+
+
+
+
+
+ 1.11
+
+
+ Browser Extensible Data (BED) format of sequence annotation track that strictly does not contain non-standard fields beyond the first 3 columns.
+
+
+ Galaxy allows BED files to contain non-standard fields beyond the first 3 columns, some other implementations do not.
+ bedstrict
+
+
+
+
+
+
+
+
+ 1.11
+
+
+ BED file format where each feature is described by chromosome, start, end, name, score, and strand.
+
+
+ Tab delimited data in strict BED format - no non-standard columns allowed; column count forced to 6
+ bed6
+
+
+
+
+
+
+
+
+ 1.11
+
+
+ A BED file where each feature is described by all twelve columns.
+
+
+ Tab delimited data in strict BED format - no non-standard columns allowed; column count forced to 12
+ bed12
+
+
+
+
+
+
+
+
+
+ 1.11
+
+
+ Tabular format of chromosome names and sizes used by Galaxy.
+
+
+ Galaxy allows BED files to contain non-standard fields beyond the first 3 columns, some other implementations do not.
+ chrominfo
+
+
+
+
+
+
+
+
+
+ 1.11
+
+
+ Custom Sequence annotation track format used by Galaxy.
+
+
+ Used for tracks/track views within galaxy.
+ customtrack
+
+
+
+
+
+
+
+
+
+ 1.11
+
+
+ Color space FASTA format sequence variant.
+
+
+ FASTA format extended for color space information.
+ csfasta
+
+
+
+
+
+
+
+
+
+ 1.11
+
+
+ HDF5 is a data model, library, and file format for storing and managing data, based on Hierarchical Data Format (HDF).
+ h5
+
+
+ An HDF5 file appears to the user as a directed graph. The nodes of this graph are the higher-level HDF5 objects that are exposed by the HDF5 APIs: Groups, Datasets, Named datatypes. Currently supported by the Python MDTraj package.
+ HDF5 is the new version, according to the HDF group, a completely different technology (https://support.hdfgroup.org/products/hdf4/ compared to HDF.
+ HDF5
+
+
+
+
+
+
+
+
+
+ 1.11
+
+ A versatile bitmap format.
+
+
+ The TIFF format is perhaps the most versatile and diverse bitmap format in existence. Its extensible nature and support for numerous data compression schemes allow developers to customize the TIFF format to fit any peculiar data storage needs.
+ TIFF
+
+
+
+
+
+
+
+
+
+ 1.11
+
+ Standard bitmap storage format in the Microsoft Windows environment.
+
+
+ Although it is based on Windows internal bitmap data structures, it is supported by many non-Windows and non-PC applications.
+ BMP
+
+
+
+
+
+
+
+
+
+ 1.11
+
+ IM is a format used by LabEye and other applications based on the IFUNC image processing library.
+
+
+ IFUNC library reads and writes most uncompressed interchange versions of this format.
+ im
+
+
+
+
+
+
+
+
+
+ 1.11
+
+ pcd
+ Photo CD format, which is the highest resolution format for images on a CD.
+
+
+ PCD was developed by Kodak. A PCD file contains five different resolution (ranging from low to high) of a slide or film negative. Due to it PCD is often used by many photographers and graphics professionals for high-end printed applications.
+ pcd
+
+
+
+
+
+
+
+
+
+ 1.11
+
+ PCX is an image file format that uses a simple form of run-length encoding. It is lossless.
+
+
+ pcx
+
+
+
+
+
+
+
+
+
+ 1.11
+
+ The PPM format is a lowest common denominator color image file format.
+
+
+ ppm
+
+
+
+
+
+
+
+
+
+ 1.11
+
+ PSD (Photoshop Document) is a proprietary file that allows the user to work with the images' individual layers even after the file has been saved.
+
+
+ psd
+
+
+
+
+
+
+
+
+
+ 1.11
+
+ X BitMap is a plain text binary image format used by the X Window System used for storing cursor and icon bitmaps used in the X GUI.
+
+
+ The XBM format was replaced by XPM for X11 in 1989.
+ xbm
+
+
+
+
+
+
+
+
+
+ 1.11
+
+ X PixMap (XPM) is an image file format used by the X Window System, it is intended primarily for creating icon pixmaps, and supports transparent pixels.
+
+
+ Sequence of segments with markers. Begins with byte of 0xFF and follows by marker type.
+ xpm
+
+
+
+
+
+
+
+
+
+ 1.11
+
+ RGB file format is the native raster graphics file format for Silicon Graphics workstations.
+
+
+ rgb
+
+
+
+
+
+
+
+
+
+ 1.11
+
+ The PBM format is a lowest common denominator monochrome file format. It serves as the common language of a large family of bitmap image conversion filters.
+
+
+ pbm
+
+
+
+
+
+
+
+
+
+ 1.11
+
+ The PGM format is a lowest common denominator grayscale file format.
+
+
+ It is designed to be extremely easy to learn and write programs for.
+ pgm
+
+
+
+
+
+
+
+
+
+ 1.11
+
+ png
+ PNG is a file format for image compression.
+
+
+ It iis expected to replace the Graphics Interchange Format (GIF).
+ PNG
+
+
+
+
+
+
+
+
+
+ 1.11
+
+ Scalable Vector Graphics (SVG) is an XML-based vector image format for two-dimensional graphics with support for interactivity and animation.
+ Scalable Vector Graphics
+
+
+ The SVG specification is an open standard developed by the World Wide Web Consortium (W3C) since 1999.
+ SVG
+
+
+
+
+
+
+
+
+
+ 1.11
+
+ Sun Raster is a raster graphics file format used on SunOS by Sun Microsystems.
+
+
+ The SVG specification is an open standard developed by the World Wide Web Consortium (W3C) since 1999.
+ rast
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.11
+ true
+ Textual report format for sequence quality for reports from sequencing machines.
+
+
+ Sequence quality report format (text)
+
+
+
+
+
+
+
+
+
+
+ 1.11
+ http://en.wikipedia.org/wiki/Phred_quality_score
+ FASTQ format subset for Phred sequencing quality score data only (no sequences).
+
+
+ Phred quality scores are defined as a property which is logarithmically related to the base-calling error probabilities.
+ qual
+
+
+
+
+
+
+
+
+
+ 1.11
+ FASTQ format subset for Phred sequencing quality score data only (no sequences) for Solexa/Illumina 1.0 format.
+
+
+ Solexa/Illumina 1.0 format can encode a Solexa/Illumina quality score from -5 to 62 using ASCII 59 to 126 (although in raw read data Solexa scores from -5 to 40 only are expected)
+ qualsolexa
+
+
+
+
+
+
+
+
+
+ 1.11
+ http://en.wikipedia.org/wiki/Phred_quality_score
+ FASTQ format subset for Phred sequencing quality score data only (no sequences) from Illumina 1.5 and before Illumina 1.8.
+
+
+ Starting in Illumina 1.5 and before Illumina 1.8, the Phred scores 0 to 2 have a slightly different meaning. The values 0 and 1 are no longer used and the value 2, encoded by ASCII 66 "B", is used also at the end of reads as a Read Segment Quality Control Indicator.
+ qualillumina
+
+
+
+
+
+
+
+
+ 1.11
+ http://en.wikipedia.org/wiki/Phred_quality_score
+ FASTQ format subset for Phred sequencing quality score data only (no sequences) for SOLiD data.
+
+
+ For SOLiD data, the sequence is in color space, except the first position. The quality values are those of the Sanger format.
+ qualsolid
+
+
+
+
+
+
+
+
+ 1.11
+ http://en.wikipedia.org/wiki/Phred_quality_score
+ FASTQ format subset for Phred sequencing quality score data only (no sequences) from 454 sequencers.
+
+
+ qual454
+
+
+
+
+
+
+
+
+ 1.11
+
+
+ Human ENCODE peak format.
+
+
+ Format that covers both the broad peak format and narrow peak format from ENCODE.
+ ENCODE peak format
+
+
+
+
+
+
+
+
+ 1.11
+
+
+ Human ENCODE narrow peak format.
+
+
+ Format that covers both the broad peak format and narrow peak format from ENCODE.
+ ENCODE narrow peak format
+
+
+
+
+
+
+
+
+ 1.11
+
+
+ Human ENCODE broad peak format.
+
+
+ ENCODE broad peak format
+
+
+
+
+
+
+
+
+
+ 1.11
+
+ bgz
+ Blocked GNU Zip format.
+
+
+ BAM files are compressed using a variant of GZIP (GNU ZIP), into a format called BGZF (Blocked GNU Zip Format).
+ bgzip
+
+
+
+
+
+
+
+
+
+ 1.11
+
+
+ TAB-delimited genome position file index format.
+
+
+ tabix
+
+
+
+
+
+
+
+
+ 1.11
+ true
+ Data format for graph data.
+
+
+ Graph format
+
+
+
+
+
+
+
+
+ 1.11
+
+ XML-based format used to store graph descriptions within Galaxy.
+
+
+ xgmml
+
+
+
+
+
+
+
+
+ 1.11
+
+ SIF (simple interaction file) Format - a network/pathway format used for instance in cytoscape.
+
+
+ sif
+
+
+
+
+
+
+
+
+
+ 1.11
+ MS Excel spreadsheet format consisting of a set of XML documents stored in a ZIP-compressed file.
+
+
+ xlsx
+
+
+
+
+
+
+
+
+ 1.11
+
+ Data format used by the SQLite database.
+
+
+ SQLite format
+
+
+
+
+
+
+
+
+
+ 1.11
+
+ Data format used by the SQLite database conformant to the Gemini schema.
+
+
+ Gemini SQLite format
+
+
+
+
+
+
+
+
+ 1.11
+ Duplicate of http://edamontology.org/format_3326
+ 1.20
+
+
+ Format of a data index of some type.
+
+
+ Index format
+ true
+
+
+
+
+
+
+
+
+
+ 1.11
+ An index of a genome database, indexed for use by the snpeff tool.
+
+
+ snpeffdb
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.12
+
+ Binary format used by MATLAB files to store workspace variables.
+ .mat file format
+ MAT file format
+ MATLAB file format
+
+
+ MAT
+
+
+
+
+
+
+
+
+
+
+ 1.12
+
+ Format used by netCDF software library for writing and reading chromatography-MS data files. Also used to store trajectory atom coordinates information, such as the ones obtained by Molecular Dynamics simulations.
+ ANDI-MS
+
+
+ Network Common Data Form (NetCDF) library is supported by AMBER MD package from version 9.
+ netCDF
+
+
+
+
+
+
+
+
+ 1.12
+ mgf
+ Mascot Generic Format. Encodes multiple MS/MS spectra in a single file.
+
+
+ Files includes *m*/*z*, intensity pairs separated by headers; headers can contain a bit more information, including search engine instructions.
+ MGF
+
+
+
+
+
+
+
+
+ 1.12
+ Spectral data format file where each spectrum is written to a separate file.
+
+
+ Each file contains one header line for the known or assumed charge and the mass of the precursor peptide ion, calculated from the measured *m*/*z* and the charge. This one line was then followed by all the *m*/*z*, intensity pairs that represent the spectrum.
+ dta
+
+
+
+
+
+
+
+
+ 1.12
+ Spectral data file similar to dta.
+
+
+ Differ from .dta only in subtleties of the header line format and content and support the added feature of being able to.
+ pkl
+
+
+
+
+
+
+
+
+ 1.12
+ https://dx.doi.org/10.1038%2Fnbt1031
+ Common file format for proteomics mass spectrometric data developed at the Seattle Proteome Center/Institute for Systems Biology.
+
+
+ mzXML
+
+
+
+
+
+
+
+
+
+ 1.12
+ http://sashimi.sourceforge.net/schema_revision/pepXML/pepXML_v118.xsd
+ Open data format for the storage, exchange, and processing of peptide sequence assignments of MS/MS scans, intended to provide a common data output format for many different MS/MS search engines and subsequent peptide-level analyses.
+
+
+ pepXML
+
+
+
+
+
+
+
+
+
+ 1.12
+
+ Graphical Pathway Markup Language (GPML) is an XML format used for exchanging biological pathways.
+
+
+ GPML
+
+
+
+
+
+
+
+
+
+ 1.12
+
+ oxlicg
+
+
+
+ A list of k-mers and their occurrences in a dataset. Can also be used as an implicit De Bruijn graph.
+ K-mer countgraph
+
+
+
+
+
+
+
+
+
+
+ 1.13
+
+
+ mzTab is a tab-delimited format for mass spectrometry-based proteomics and metabolomics results.
+
+
+ mzTab
+
+
+
+
+
+
+
+
+
+
+ 1.13
+
+ imzml
+
+ imzML metadata is a data format for mass spectrometry imaging metadata.
+
+
+ imzML data are recorded in 2 files: '.imzXML' is a metadata XML file based on mzML by HUPO-PSI, and '.ibd' is a binary file containing the mass spectra. This entry is for the metadata XML file
+ imzML metadata file
+
+
+
+
+
+
+
+
+
+
+ 1.13
+
+
+ qcML is an XML format for quality-related data of mass spectrometry and other high-throughput measurements.
+
+
+ The focus of qcML is towards mass spectrometry based proteomics, but the format is suitable for metabolomics and sequencing as well.
+ qcML
+
+
+
+
+
+
+
+
+
+
+ 1.13
+
+
+ PRIDE XML is an XML format for mass spectra, peptide and protein identifications, and metadata about a corresponding measurement, sample, experiment.
+
+
+ PRIDE XML
+
+
+
+
+
+
+
+
+
+
+
+ 1.13
+
+
+ Simulation Experiment Description Markup Language (SED-ML) is an XML format for encoding simulation setups, according to the MIASE (Minimum Information About a Simulation Experiment) requirements.
+
+
+ SED-ML
+
+
+
+
+
+
+
+
+
+
+
+ 1.13
+
+
+ Open Modeling EXchange format (OMEX) is a ZIPped format for encapsulating all information necessary for a modeling and simulation project in systems biology.
+
+
+ An OMEX file is a ZIP container that includes a manifest file, listing the content of the archive, an optional metadata file adding information about the archive and its content, and the files describing the model. OMEX is one of the standardised formats within COMBINE (Computational Modeling in Biology Network).
+ COMBINE OMEX
+
+
+
+
+
+
+
+
+
+
+ 1.13
+
+
+ The Investigation / Study / Assay (ISA) tab-delimited (TAB) format incorporates metadata from experiments employing a combination of technologies.
+
+
+ ISA-TAB is based on MAGE-TAB. Other than tabular, the ISA model can also be represented in RDF, and in JSON (compliable with a set of defined JSON Schemata).
+ ISA-TAB
+
+
+
+
+
+
+
+
+
+
+ 1.13
+
+
+ SBtab is a tabular format for biochemical network models.
+
+
+ SBtab
+
+
+
+
+
+
+
+
+
+ 1.13
+
+
+ Biological Connection Markup Language (BCML) is an XML format for biological pathways.
+
+
+ BCML
+
+
+
+
+
+
+
+
+
+ 1.13
+
+
+ Biological Dynamics Markup Language (BDML) is an XML format for quantitative data describing biological dynamics.
+
+
+ BDML
+
+
+
+
+
+
+
+
+ 1.13
+
+
+ Biological Expression Language (BEL) is a textual format for representing scientific findings in life sciences in a computable form.
+
+
+ BEL
+
+
+
+
+
+
+
+
+
+ 1.13
+
+
+ SBGN-ML is an XML format for Systems Biology Graphical Notation (SBGN) diagrams of biological pathways or networks.
+
+
+ SBGN-ML
+
+
+
+
+
+
+
+
+
+ 1.13
+
+ agp
+
+ AGP is a tabular format for a sequence assembly (a contig, a scaffold/supercontig, or a chromosome).
+
+
+ AGP
+
+
+
+
+
+
+
+
+ 1.13
+ PostScript format.
+ PostScript
+
+
+ PS
+
+
+
+
+
+
+
+
+ 1.13
+
+ sra
+ SRA archive format (SRA) is the archive format used for input to the NCBI Sequence Read Archive.
+ SRA
+ SRA archive format
+
+
+ SRA format
+
+
+
+
+
+
+
+
+ 1.13
+
+ VDB ('vertical database') is the native format used for export from the NCBI Sequence Read Archive.
+ SRA native format
+
+
+ VDB
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.13
+
+ Index file format used by the samtools package to index TAB-delimited genome position files.
+
+
+ Tabix index file format
+
+
+
+
+
+
+
+
+ 1.13
+ A five-column, tab-delimited table of feature locations and qualifiers for importing annotation into an existing Sequin submission (an NCBI tool for submitting and updating GenBank entries).
+
+
+ Sequin format
+
+
+
+
+
+
+
+
+ 1.14
+ Proprietary mass-spectrometry format of Thermo Scientific's ProteomeDiscoverer software.
+ Magellan storage file format
+
+
+ This format corresponds to an SQLite database, and you can look into the files with e.g. SQLiteStudio3. There are also some readers (http://doi.org/10.1021/pr2005154) and converters (http://doi.org/10.1016/j.jprot.2015.06.015) for this format available, which re-engineered the database schema, but there is no official DB schema specification of Thermo Scientific for the format.
+ MSF
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.14
+ true
+ Data format for biodiversity data.
+
+
+ Biodiversity data format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.14
+
+ Exchange format of the Access to Biological Collections Data (ABCD) Schema; a standard for the access to and exchange of data about specimens and observations (primary biodiversity data).
+ ABCD
+
+
+ ABCD format
+
+
+
+
+
+
+
+
+
+ 1.14
+ Tab-delimited text files of GenePattern that contain a column for each sample, a row for each gene, and an expression value for each gene in each sample.
+ GCT format
+ Res format
+
+
+ GCT/Res format
+
+
+
+
+
+
+
+
+
+ 1.14
+ wiff
+ Mass spectrum file format from QSTAR and QTRAP instruments (ABI/Sciex).
+ wiff
+
+
+ WIFF format
+
+
+
+
+
+
+
+
+
+ 1.14
+
+ Output format used by X! series search engines that is based on the XML language BIOML.
+
+
+ X!Tandem XML
+
+
+
+
+
+
+
+
+
+ 1.14
+ Proprietary file format for mass spectrometry data from Thermo Scientific.
+
+
+ Proprietary format for which documentation is not available.
+ Thermo RAW
+
+
+
+
+
+
+
+
+
+ 1.14
+
+ "Raw" result file from Mascot database search.
+
+
+ Mascot .dat file
+
+
+
+
+
+
+
+
+
+ 1.14
+
+ Format of peak list files from Andromeda search engine (MaxQuant) that consist of arbitrarily many spectra.
+ MaxQuant APL
+
+
+ MaxQuant APL peaklist format
+
+
+
+
+
+
+
+
+ 1.14
+
+ Synthetic Biology Open Language (SBOL) is an XML format for the specification and exchange of biological design information in synthetic biology.
+
+
+ SBOL introduces a standardised format for the electronic exchange of information on the structural and functional aspects of biological designs.
+ SBOL
+
+
+
+
+
+
+
+
+ 1.14
+
+ PMML uses XML to represent mining models. The structure of the models is described by an XML Schema.
+
+
+ One or more mining models can be contained in a PMML document.
+ PMML
+
+
+
+
+
+
+
+
+
+ 1.14
+
+ Image file format used by the Open Microscopy Environment (OME).
+
+
+ An OME-TIFF dataset consists of one or more files in standard TIFF or BigTIFF format, with the file extension .ome.tif or .ome.tiff, and an identical (or in the case of multiple files, nearly identical) string of OME-XML metadata embedded in the ImageDescription tag of each file's first IFD (Image File Directory). BigTIFF file extensions are also permitted, with the file extension .ome.tf2, .ome.tf8 or .ome.btf, but note these file extensions are an addition to the original specification, and software using an older version of the specification may not be able to handle these file extensions.
+ OME develops open-source software and data format standards for the storage and manipulation of biological microscopy data. It is a joint project between universities, research establishments, industry and the software development community.
+ OME-TIFF
+
+
+
+
+
+
+
+
+ 1.14
+
+ The LocARNA PP format combines sequence or alignment information and (respectively, single or consensus) ensemble probabilities into an PP 2.0 record.
+
+
+ Format for multiple aligned or single sequences together with the probabilistic description of the (consensus) RNA secondary structure ensemble by probabilities of base pairs, base pair stackings, and base pairs and unpaired bases in the loop of base pairs.
+ LocARNA PP
+
+
+
+
+
+
+
+
+ 1.14
+
+ Input format used by the Database of Genotypes and Phenotypes (dbGaP).
+
+
+ The Database of Genotypes and Phenotypes (dbGaP) is a National Institutes of Health (NIH) sponsored repository charged to archive, curate and distribute information produced by studies investigating the interaction of genotype and phenotype.
+ dbGaP format
+
+
+
+
+
+
+
+
+
+
+ 1.15
+
+ biom
+ The BIological Observation Matrix (BIOM) is a format for representing biological sample by observation contingency tables in broad areas of comparative omics. The primary use of this format is to represent OTU tables and metagenome tables.
+ BIological Observation Matrix format
+ biom
+
+
+ BIOM is a recognised standard for the Earth Microbiome Project, and is a project supported by Genomics Standards Consortium. Supported in QIIME, Mothur, MEGAN, etc.
+ BIOM format
+
+
+
+
+
+
+
+
+
+ 1.15
+
+
+ A format for storage, exchange, and processing of protein identifications created from ms/ms-derived peptide sequence data.
+
+
+ No human-consumable information about this format is available (see http://tools.proteomecenter.org/wiki/index.php?title=Formats:protXML).
+ protXML
+ http://doi.org/10.1038/msb4100024
+ http://sashimi.sourceforge.net/schema_revision/protXML/protXML_v3.xsd
+
+
+
+
+
+
+
+
+
+
+ 1.15
+ true
+ A linked data format enables publishing structured data as linked data (Linked Data), so that the data can be interlinked and become more useful through semantic queries.
+ Semantic Web format
+
+
+ Linked data format
+
+
+
+
+
+
+
+
+
+
+
+ 1.15
+
+ jsonld
+
+
+ JSON-LD, or JavaScript Object Notation for Linked Data, is a method of encoding Linked Data using JSON.
+ JavaScript Object Notation for Linked Data
+ jsonld
+
+
+ JSON-LD
+
+
+
+
+
+
+
+
+
+ 1.15
+
+ yaml
+ yml
+
+ YAML (YAML Ain't Markup Language) is a human-readable tree-structured data serialisation language.
+ YAML Ain't Markup Language
+ yml
+
+
+ Data in YAML format can be serialised into text, or binary format.
+ YAML version 1.2 is a superset of JSON; prior versions were "not strictly compatible".
+ YAML
+
+
+
+
+
+
+
+
+
+ 1.16
+ Tabular data represented as values in a text file delimited by some character.
+ Delimiter-separated values
+ Tabular format
+
+
+ DSV
+
+
+
+
+
+
+
+
+
+ 1.16
+ csv
+
+
+
+ Tabular data represented as comma-separated values in a text file.
+ Comma-separated values
+
+
+ CSV
+
+
+
+
+
+
+
+
+
+ 1.16
+ out
+ "Raw" result file from SEQUEST database search.
+
+
+ SEQUEST .out file
+
+
+
+
+
+
+
+
+
+ 1.16
+ http://ftp.mi.fu-berlin.de/pub/OpenMS/release1.9-documentation/html/classOpenMS_1_1IdXMLFile.html
+ http://open-ms.sourceforge.net/schemas/
+ XML file format for files containing information about peptide identifications from mass spectrometry data analysis carried out with OpenMS.
+
+
+ idXML
+
+
+
+
+
+
+
+
+ 1.16
+ Data table formatted such that it can be passed/streamed within the KNIME platform.
+
+
+ KNIME datatable format
+
+
+
+
+
+
+
+
+
+
+ 1.16
+
+ UniProtKB XML sequence features format is an XML format available for downloading UniProt entries.
+ UniProt XML
+ UniProt XML format
+ UniProtKB XML format
+
+
+ UniProtKB XML
+
+
+
+
+
+
+
+
+
+ 1.16
+
+ UniProtKB RDF sequence features format is an RDF format available for downloading UniProt entries (in RDF/XML).
+ UniProt RDF
+ UniProt RDF format
+ UniProt RDF/XML
+ UniProt RDF/XML format
+ UniProtKB RDF format
+ UniProtKB RDF/XML
+ UniProtKB RDF/XML format
+
+
+ UniProtKB RDF
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.16
+
+
+
+
+
+ BioJSON is a BioXSD-schema-based JSON format of sequence-based data and some other common data - sequence records, alignments, feature records, references to resources, and more - optimised for integrative bioinformatics, web applications and APIs, and object-oriented programming.
+ BioJSON (BioXSD data model)
+ BioJSON format (BioXSD)
+ BioXSD BioJSON
+ BioXSD BioJSON format
+ BioXSD JSON
+ BioXSD JSON format
+ BioXSD in JSON
+ BioXSD in JSON format
+ BioXSD+JSON
+ BioXSD/GTrack BioJSON
+ BioXSD|BioJSON|BioYAML BioJSON
+ BioXSD|GTrack BioJSON
+
+
+ Work in progress. 'BioXSD' belongs to the 'BioXSD|GTrack' ecosystem of generic formats. 'BioJSON' is the JSON format based on the common, unified 'BioXSD data model', a.k.a. 'BioXSD|BioJSON|BioYAML'.
+ BioJSON (BioXSD)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.16
+
+
+
+
+
+ BioYAML is a BioXSD-schema-based YAML format of sequence-based data and some other common data - sequence records, alignments, feature records, references to resources, and more - optimised for integrative bioinformatics, web APIs, human readability and editing, and object-oriented programming.
+ BioXSD BioYAML
+ BioXSD BioYAML format
+ BioXSD YAML
+ BioXSD YAML format
+ BioXSD in YAML
+ BioXSD in YAML format
+ BioXSD+YAML
+ BioXSD/GTrack BioYAML
+ BioXSD|BioJSON|BioYAML BioYAML
+ BioXSD|GTrack BioYAML
+ BioYAML (BioXSD data model)
+ BioYAML (BioXSD)
+ BioYAML format
+ BioYAML format (BioXSD)
+
+
+ Work in progress. 'BioXSD' belongs to the 'BioXSD|GTrack' ecosystem of generic formats. 'BioYAML' is the YAML format based on the common, unified 'BioXSD data model', a.k.a. 'BioXSD|BioJSON|BioYAML'.
+ BioYAML
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.16
+
+
+ BioJSON is a JSON format of single multiple sequence alignments, with their annotations, features, and custom visualisation and application settings for the Jalview workbench.
+ BioJSON format (Jalview)
+ JSON (Jalview)
+ JSON format (Jalview)
+ Jalview BioJSON
+ Jalview BioJSON format
+ Jalview JSON
+ Jalview JSON format
+
+
+ BioJSON (Jalview)
+
+
+
+
+
+
+
+
+
+
+
+ 1.16
+
+
+
+
+
+
+ GSuite is a tabular format for collections of genome or sequence feature tracks, suitable for integrative multi-track analysis. GSuite contains links to genome/sequence tracks, with additional metadata.
+ BioXSD/GTrack GSuite
+ BioXSD|GTrack GSuite
+ GSuite (GTrack ecosystem of formats)
+ GSuite format
+ GTrack|BTrack|GSuite GSuite
+ GTrack|GSuite|BTrack GSuite
+
+
+ 'GSuite' belongs to the 'BioXSD|GTrack' ecosystem of generic formats, and particular to its subset, the 'GTrack ecosystem' (GTrack, GSuite, BTrack). 'GSuite' is the tabular format for an annotated collection of individual GTrack files.
+ GSuite
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.16
+
+ BTrack is an HDF5-based binary format for genome or sequence feature tracks and their collections, suitable for integrative multi-track analysis. BTrack is a binary, compressed alternative to the GTrack and GSuite formats.
+ BTrack (GTrack ecosystem of formats)
+ BTrack format
+ BioXSD/GTrack BTrack
+ BioXSD|GTrack BTrack
+ GTrack|BTrack|GSuite BTrack
+ GTrack|GSuite|BTrack BTrack
+
+
+ 'BTrack' belongs to the 'BioXSD|GTrack' ecosystem of generic formats, and particular to its subset, the 'GTrack ecosystem' (GTrack, GSuite, BTrack). 'BTrack' is the binary, optionally compressed HDF5-based version of the GTrack and GSuite formats.
+ BTrack
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.16
+
+
+
+
+
+
+
+
+
+
+ The FAO/Bioversity/IPGRI Multi-Crop Passport Descriptors (MCPD) is an international standard format for exchange of germplasm information.
+ Bioversity MCPD
+ FAO MCPD
+ IPGRI MCPD
+ MCPD V.1
+ MCPD V.2
+ MCPD format
+ Multi-Crop Passport Descriptors
+ Multi-Crop Passport Descriptors format
+
+
+ Multi-Crop Passport Descriptors is a format available in 2 successive versions, V.1 (FAO/IPGRI 2001) and V.2 (FAO/Bioversity 2012).
+ MCPD
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.16
+ true
+ Data format of an annotated text, e.g. with recognised entities, concepts, and relations.
+
+
+ Annotated text format
+
+
+
+
+
+
+
+
+
+
+ 1.16
+
+
+ JSON format of annotated scientific text used by PubAnnotations and other tools.
+
+
+ PubAnnotation format
+
+
+
+
+
+
+
+
+
+
+ 1.16
+
+
+ BioC is a standardised XML format for sharing and integrating text data and annotations.
+
+
+ BioC
+
+
+
+
+
+
+
+
+
+
+
+ 1.16
+
+
+ Native textual export format of annotated scientific text from PubTator.
+
+
+ PubTator format
+
+
+
+
+
+
+
+
+
+
+ 1.16
+
+
+ A format of text annotation using the linked-data Open Annotation Data Model, serialised typically in RDF or JSON-LD.
+
+
+ Open Annotation format
+
+
+
+
+
+
+
+
+
+
+ 1.16
+
+
+
+
+
+
+
+
+
+
+
+
+ A family of similar formats of text annotation, used by BRAT and other tools, known as BioNLP Shared Task format (BioNLP 2009 Shared Task on Event Extraction, BioNLP Shared Task 2011, BioNLP Shared Task 2013), BRAT format, BRAT standoff format, and similar.
+ BRAT format
+ BRAT standoff format
+
+
+ BioNLP Shared Task format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.16
+ true
+ A query language (format) for structured database queries.
+ Query format
+
+
+ Query language
+
+
+
+
+
+
+
+
+ 1.16
+ sql
+
+
+
+ SQL (Structured Query Language) is the de-facto standard query language (format of queries) for querying and manipulating data in relational databases.
+ Structured Query Language
+
+
+ SQL
+
+
+
+
+
+
+
+
+
+ 1.16
+
+ xq
+ xquery
+ xqy
+
+ XQuery (XML Query) is a query language (format of queries) for querying and manipulating structured and unstructured data, usually in the form of XML, text, and with vendor-specific extensions for other data formats (JSON, binary, etc.).
+ XML Query
+ xq
+ xqy
+
+
+ XQuery
+
+
+
+
+
+
+
+
+
+ 1.16
+
+
+ SPARQL (SPARQL Protocol and RDF Query Language) is a semantic query language for querying and manipulating data stored in Resource Description Framework (RDF) format.
+ SPARQL Protocol and RDF Query Language
+
+
+ SPARQL
+
+
+
+
+
+
+
+
+
+ 1.17
+ XML format for XML Schema.
+
+
+ xsd
+
+
+
+
+
+
+
+
+
+ 1.20
+
+ XMFA format stands for eXtended Multi-FastA format and is used to store collinear sub-alignments that constitute a single genome alignment.
+ eXtended Multi-FastA format
+
+
+ XMFA
+
+
+
+
+
+
+
+
+
+
+ 1.20
+
+ The GEN file format contains genetic data and describes SNPs.
+ Genotype file format
+
+
+ GEN
+
+
+
+
+
+
+
+
+ 1.20
+
+ The SAMPLE file format contains information about each individual i.e. individual IDs, covariates, phenotypes and missing data proportions, from a GWAS study.
+
+
+ SAMPLE file format
+
+
+
+
+
+
+
+
+
+ 1.20
+
+ SDF is one of a family of chemical-data file formats developed by MDL Information Systems; it is intended especially for structural information.
+
+
+ SDF
+
+
+
+
+
+
+
+
+
+ 1.20
+
+ An MDL Molfile is a file format for holding information about the atoms, bonds, connectivity and coordinates of a molecule.
+
+
+ Molfile
+
+
+
+
+
+
+
+
+
+ 1.20
+
+ Complete, portable representation of a SYBYL molecule. ASCII file which contains all the information needed to reconstruct a SYBYL molecule.
+
+
+ Mol2
+
+
+
+
+
+
+
+
+
+ 1.20
+
+ format for the LaTeX document preparation system.
+ LaTeX format
+
+
+ uses the TeX typesetting program format
+ latex
+
+
+
+
+
+
+
+
+
+ 1.20
+
+ Tab-delimited text file format used by Eland - the read-mapping program distributed by Illumina with its sequencing analysis pipeline - which maps short Solexa sequence reads to the human reference genome.
+ ELAND
+ eland
+
+
+ ELAND format
+
+
+
+
+
+
+
+
+ 1.20
+
+
+ Phylip multiple alignment sequence format, less stringent than PHYLIP format.
+ PHYLIP Interleaved format
+
+
+ It differs from Phylip Format (format_1997) on length of the ID sequence. There no length restrictions on the ID, but whitespaces aren't allowed in the sequence ID/Name because one space separates the longest ID and the beginning of the sequence. Sequences IDs must be padded to the longest ID length.
+ Relaxed PHYLIP Interleaved
+
+
+
+
+
+
+
+
+ 1.20
+
+
+ Phylip multiple alignment sequence format, less stringent than PHYLIP sequential format (format_1998).
+ Relaxed PHYLIP non-interleaved
+ Relaxed PHYLIP non-interleaved format
+ Relaxed PHYLIP sequential format
+
+
+ It differs from Phylip sequential format (format_1997) on length of the ID sequence. There no length restrictions on the ID, but whitespaces aren't allowed in the sequence ID/Name because one space separates the longest ID and the beginning of the sequence. Sequences IDs must be padded to the longest ID length.
+ Relaxed PHYLIP Sequential
+
+
+
+
+
+
+
+
+
+ 1.20
+
+ Default XML format of VisANT, containing all the network information.
+ VisANT xml
+ VisANT xml format
+
+
+ VisML
+
+
+
+
+
+
+
+
+
+ 1.20
+
+ GML (Graph Modeling Language) is a text file format supporting network data with a very easy syntax. It is used by Graphlet, Pajek, yEd, LEDA and NetworkX.
+ GML format
+
+
+ GML
+
+
+
+
+
+
+
+
+
+ 1.20
+
+
+ FASTG is a format for faithfully representing genome assemblies in the face of allelic polymorphism and assembly uncertainty.
+ FASTG assembly graph format
+
+
+ It is called FASTG, like FASTA, but the G stands for "graph".
+ FASTG
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.20
+ true
+ Data format for raw data from a nuclear magnetic resonance (NMR) spectroscopy experiment.
+ NMR peak assignment data format
+ NMR processed data format
+ NMR raw data format
+ Nuclear magnetic resonance spectroscopy data format
+ Processed NMR data format
+ Raw NMR data format
+
+
+ NMR data format
+
+
+
+
+
+
+
+
+
+ 1.20
+
+
+ nmrML is an MSI supported XML-based open access format for metabolomics NMR raw and processed spectral data. It is accompanies by an nmrCV (controlled vocabulary) to allow ontology-based annotations.
+
+
+ nmrML
+
+
+
+
+
+
+
+
+
+
+ 1.20
+
+ . proBAM is an adaptation of BAM (format_2572), which was extended to meet specific requirements entailed by proteomics data.
+
+
+ proBAM
+
+
+
+
+
+
+
+
+
+ 1.20
+
+ . proBED is an adaptation of BED (format_3003), which was extended to meet specific requirements entailed by proteomics data.
+
+
+ proBED
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.20
+ true
+ Data format for raw microarray data.
+ Microarray data format
+
+
+ Raw microarray data format
+
+
+
+
+
+
+
+
+
+ 1.20
+
+ GenePix Results (GPR) text file format developed by Axon Instruments that is used to save GenePix Results data.
+
+
+ GPR
+
+
+
+
+
+
+
+
+
+ 1.20
+ Binary format used by the ARB software suite.
+ ARB binary format
+
+
+ ARB
+
+
+
+
+
+
+
+
+
+ 1.20
+ http://ftp.mi.fu-berlin.de/pub/OpenMS/release1.9-documentation/html/classOpenMS_1_1ConsensusXMLFile.html
+ OpenMS format for grouping features in one map or across several maps.
+
+
+ consensusXML
+
+
+
+
+
+
+
+
+
+ 1.20
+ http://ftp.mi.fu-berlin.de/pub/OpenMS/release1.9-documentation/html/classOpenMS_1_1FeatureXMLFile.html
+ OpenMS format for quantitation results (LC/MS features).
+
+
+ featureXML
+
+
+
+
+
+
+
+
+
+ 1.20
+ http://www.psidev.info/mzdata-1_0_5-docs
+ Now deprecated data format of the HUPO Proteomics Standards Initiative. Replaced by mzML (format_3244).
+
+
+ mzData
+
+
+
+
+
+
+
+
+
+ 1.20
+ http://cruxtoolkit.sourceforge.net/tide-search.html
+ Format supported by the Tide tool for identifying peptides from tandem mass spectra.
+
+
+ TIDE TXT
+
+
+
+
+
+
+
+
+
+ 1.20
+ ftp://ftp.ncbi.nlm.nih.gov/blast/documents/NEWXML/ProposedBLASTXMLChanges.pdf
+ ftp://ftp.ncbi.nlm.nih.gov/blast/documents/NEWXML/xml2.pdf
+ http://www.ncbi.nlm.nih.gov/data_specs/schema/NCBI_BlastOutput2.mod.xsd
+ XML format as produced by the NCBI Blast package v2.
+
+
+ BLAST XML v2 results format
+
+
+
+
+
+
+
+
+
+ 1.20
+
+
+ Microsoft Powerpoint format.
+
+
+ pptx
+
+
+
+
+
+
+
+
+
+
+ 1.20
+
+ ibd
+
+ ibd is a data format for mass spectrometry imaging data.
+
+
+ imzML data is recorded in 2 files: '.imzXML' is a metadata XML file based on mzML by HUPO-PSI, and '.ibd' is a binary file containing the mass spectra.
+ ibd
+
+
+
+
+
+
+
+
+ 1.21
+ Data format used in Natural Language Processing.
+ Natural Language Processing format
+
+
+ NLP format
+
+
+
+
+
+
+
+
+
+ 1.21
+
+ XML input file format for BEAST Software (Bayesian Evolutionary Analysis Sampling Trees).
+
+
+ BEAST
+
+
+
+
+
+
+
+
+
+ 1.21
+
+ Chado-XML format is a direct mapping of the Chado relational schema into XML.
+
+
+ Chado-XML
+
+
+
+
+
+
+
+
+
+ 1.21
+
+ An alignment format generated by PRANK/PRANKSTER consisting of four elements: newick, nodes, selection and model.
+
+
+ HSAML
+
+
+
+
+
+
+
+
+
+ 1.21
+
+ Output xml file from the InterProScan sequence analysis application.
+
+
+ InterProScan XML
+
+
+
+
+
+
+
+
+
+ 1.21
+
+ The KEGG Markup Language (KGML) is an exchange format of the KEGG pathway maps, which is converted from internally used KGML+ (KGML+SVG) format.
+ KEGG Markup Language
+
+
+ KGML
+
+
+
+
+
+
+
+
+
+ 1.21
+ XML format for collected entries from bibliographic databases MEDLINE and PubMed.
+ MEDLINE XML
+
+
+ PubMed XML
+
+
+
+
+
+
+
+
+
+ 1.21
+
+ A set of XML compliant markup components for describing multiple sequence alignments.
+
+
+ MSAML
+
+
+
+
+
+
+
+
+
+
+ 1.21
+
+ OrthoXML is designed broadly to allow the storage and comparison of orthology data from any ortholog database. It establishes a structure for describing orthology relationships while still allowing flexibility for database-specific information to be encapsulated in the same format.
+
+
+ OrthoXML
+
+
+
+
+
+
+
+
+
+ 1.21
+
+ Tree structure of Protein Sequence Database Markup Language generated using Matra software.
+
+
+ PSDML
+
+
+
+
+
+
+
+
+
+ 1.21
+
+ SeqXML is an XML Schema to describe biological sequences, developed by the Stockholm Bioinformatics Centre.
+
+
+ SeqXML
+
+
+
+
+
+
+
+
+
+ 1.21
+
+ XML format for the UniParc database.
+
+
+ UniParc XML
+
+
+
+
+
+
+
+
+
+ 1.21
+
+ XML format for the UniRef reference clusters.
+
+
+ UniRef XML
+
+
+
+
+
+
+
+
+
+
+ 1.21
+
+
+
+
+ cwl
+
+
+
+ Common Workflow Language (CWL) format for description of command-line tools and workflows.
+ Common Workflow Language
+ CommonWL
+
+
+ CWL
+
+
+
+
+
+
+
+
+
+ 1.21
+ Proprietary file format for mass spectrometry data from Waters.
+
+
+ Proprietary format for which documentation is not available, but used by multiple tools.
+ Waters RAW
+
+
+
+
+
+
+
+
+
+ 1.21
+
+ A standardized file format for data exchange in mass spectrometry, initially developed for infrared spectrometry.
+
+
+ JCAMP-DX is an ASCII based format and therefore not very compact even though it includes standards for file compression.
+ JCAMP-DX
+
+
+
+
+
+
+
+
+
+ 1.21
+ An NLP format used for annotated textual documents.
+
+
+ NLP annotation format
+
+
+
+
+
+
+
+
+ 1.21
+ NLP format used by a specific type of corpus (collection of texts).
+
+
+ NLP corpus format
+
+
+
+
+
+
+
+
+
+
+ 1.21
+
+
+
+ mirGFF3 is a common format for microRNA data resulting from small-RNA RNA-Seq workflows.
+ miRTop format
+
+
+ mirGFF3 is a specialisation of GFF3; produced by small-RNA-Seq analysis workflows, usable and convertible with the miRTop API (https://mirtop.readthedocs.io/en/latest/), and consumable by tools for downstream analysis.
+ mirGFF3
+
+
+
+
+
+
+
+
+ 1.21
+ A "placeholder" concept for formats of annotated RNA data, including e.g. microRNA and RNA-Seq data.
+ RNA data format
+ miRNA data format
+ microRNA data format
+
+
+ RNA annotation format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.22
+ true
+ File format to store trajectory information for a 3D structure .
+ CG trajectory formats
+ MD trajectory formats
+ NA trajectory formats
+ Protein trajectory formats
+
+
+ Formats differ on what they are able to store (coordinates, velocities, topologies) and how they are storing it (raw, compressed, textual, binary).
+ Trajectory format
+
+
+
+
+
+
+
+
+ 1.22
+ true
+ Binary file format to store trajectory information for a 3D structure .
+
+
+ Trajectory format (binary)
+
+
+
+
+
+
+
+
+ 1.22
+ true
+ Textual file format to store trajectory information for a 3D structure .
+
+
+ Trajectory format (text)
+
+
+
+
+
+
+
+
+
+ 1.22
+ HDF is the name of a set of file formats and libraries designed to store and organize large amounts of numerical data, originally developed at the National Center for Supercomputing Applications at the University of Illinois.
+
+
+ HDF is currently supported by many commercial and non-commercial software platforms such as Java, MATLAB/Scilab, Octave, Python and R.
+ HDF
+
+
+
+
+
+
+
+
+
+ 1.22
+ PCAZip format is a binary compressed file to store atom coordinates based on Essential Dynamics (ED) and Principal Component Analysis (PCA).
+
+
+ The compression is made projecting the Cartesian snapshots collected along the trajectory into an orthogonal space defined by the most relevant eigenvectors obtained by diagonalization of the covariance matrix (PCA). In the compression/decompression process, part of the original information is lost, depending on the final number of eigenvectors chosen. However, with a reasonable choice of the set of eigenvectors the compression typically reduces the trajectory file to less than one tenth of their original size with very acceptable loss of information. Compression with PCAZip can only be applied to unsolvated structures.
+ PCAzip
+
+
+
+
+
+
+
+
+
+ 1.22
+ Portable binary format for trajectories produced by GROMACS package.
+
+
+ XTC uses the External Data Representation (xdr) routines for writing and reading data which were created for the Unix Network File System (NFS). XTC files use a reduced precision (lossy) algorithm which works multiplying the coordinates by a scaling factor (typically 1000), so converting them to pm (GROMACS standard distance unit is nm). This allows an integer rounding of the values. Several other tricks are performed, such as making use of atom proximity information: atoms close in sequence are usually close in space (e.g. water molecules). That makes XTC format the most efficient in terms of disk usage, in most cases reducing by a factor of 2 the size of any other binary trajectory format.
+ XTC
+
+
+
+
+
+
+
+
+
+ 1.22
+ Trajectory Next Generation (TNG) is a format for storage of molecular simulation data. It is designed and implemented by the GROMACS development group, and it is called to be the substitute of the XTC format.
+ Trajectory Next Generation format
+
+
+ Fully architecture-independent format, regarding both endianness and the ability to mix single/double precision trajectories and I/O libraries. Self-sufficient, it should not require any other files for reading, and all the data should be contained in a single file for easy transport. Temporal compression of data, improving the compression rate of the previous XTC format. Possibility to store meta-data with information about the simulation. Direct access to a particular frame. Efficient parallel I/O.
+ TNG
+
+
+
+
+
+
+
+
+
+
+ 1.22
+ The XYZ chemical file format is widely supported by many programs, although many slightly different XYZ file formats coexist (Tinker XYZ, UniChem XYZ, etc.). Basic information stored for each atom in the system are x, y and z coordinates and atom element/atomic number.
+
+
+ XYZ files are structured in this way: First line contains the number of atoms in the file. Second line contains a title, comment, or filename. Remaining lines contain atom information. Each line starts with the element symbol, followed by x, y and z coordinates in angstroms separated by whitespace. Multiple molecules or frames can be contained within one file, so it supports trajectory storage. XYZ files can be directly represented by a molecular viewer, as they contain all the basic information needed to build the 3D model.
+ XYZ
+
+
+
+
+
+
+
+
+
+
+ 1.22
+
+ AMBER trajectory (also called mdcrd), with 10 coordinates per line and format F8.3 (fixed point notation with field width 8 and 3 decimal places).
+ AMBER trajectory format
+ inpcrd
+
+
+ mdcrd
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.22
+ true
+ Format of topology files; containing the static information of a structure molecular system that is needed for a molecular simulation.
+ CG topology format
+ MD topology format
+ NA topology format
+ Protein topology format
+
+
+ Many different file formats exist describing structural molecular topology. Typically, each MD package or simulation software works with their own implementation (e.g. GROMACS top, CHARMM psf, AMBER prmtop).
+ Topology format
+
+
+
+
+
+
+
+
+
+
+ 1.22
+
+ GROMACS MD package top textual files define an entire structure system topology, either directly, or by including itp files.
+
+
+ There is currently no tool available for conversion between GROMACS topology format and other formats, due to the internal differences in both approaches. There is, however, a method to convert small molecules parameterized with AMBER force-field into GROMACS format, allowing simulations of these systems with GROMACS MD package.
+ GROMACS top
+
+
+
+
+
+
+
+
+
+
+ 1.22
+
+ AMBER Prmtop file (version 7) is a structure topology text file divided in several sections designed to be parsed easily using simple Fortran code. Each section contains particular topology information, such as atom name, charge, mass, angles, dihedrals, etc.
+ AMBER Parm
+ AMBER Parm7
+ Parm7
+ Prmtop
+ Prmtop7
+
+
+ It can be modified manually, but as the size of the system increases, the hand-editing becomes increasingly complex. AMBER Parameter-Topology file format is used extensively by the AMBER software suite and is referred to as the Prmtop file for short.
+ version 7 is written to distinguish it from old versions of AMBER Prmtop. Similarly to HDF5, it is a completely different format, according to AMBER group: a drastic change to the file format occurred with the 2004 release of Amber 7 (http://ambermd.org/prmtop.pdf)
+ AMBER top
+
+
+
+
+
+
+
+
+
+
+ 1.22
+
+ X-Plor Protein Structure Files (PSF) are structure topology files used by NAMD and CHARMM molecular simulations programs. PSF files contain six main sections of interest: atoms, bonds, angles, dihedrals, improper dihedrals (force terms used to maintain planarity) and cross-terms.
+
+
+ The high similarity in the functional form of the two potential energy functions used by AMBER and CHARMM force-fields gives rise to the possible use of one force-field within the other MD engine. Therefore, the conversion of PSF files to AMBER Prmtop format is possible with the use of AMBER chamber (CHARMM - AMBER) program.
+ PSF
+
+
+
+
+
+
+
+
+
+
+
+ 1.22
+
+ GROMACS itp files (include topology) contain structure topology information, and are typically included in GROMACS topology files (GROMACS top). Itp files are used to define individual (or multiple) components of a topology as a separate file. This is particularly useful if there is a molecule that is used frequently, and also reduces the size of the system topology file, splitting it in different parts.
+
+
+ GROMACS itp files are used also to define position restrictions on the molecule, or to define the force field parameters for a particular ligand.
+ GROMACS itp
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.22
+ Format of force field parameter files, which store the set of parameters (charges, masses, radii, bond lengths, bond dihedrals, etc.) that are essential for the proper description and simulation of a molecular system.
+ Many different file formats exist describing force field parameters. Typically, each MD package or simulation software works with their own implementation (e.g. GROMACS itp, CHARMM rtf, AMBER off / frcmod).
+ FF parameter format
+
+
+
+
+
+
+
+
+
+ 1.22
+ Scripps Research Institute BinPos format is a binary formatted file to store atom coordinates.
+ Scripps Research Institute BinPos
+
+
+ It is basically a translation of the ASCII atom coordinate format to binary code. The only additional information stored is a magic number that identifies the BinPos format and the number of atoms per snapshot. The remainder is the chain of coordinates binary encoded. A drawback of this format is its architecture dependency. Integers and floats codification depends on the architecture, thus it needs to be converted if working in different platforms (little endian, big endian).
+ BinPos
+
+
+
+
+
+
+
+
+
+
+ 1.22
+
+ AMBER coordinate/restart file with 6 coordinates per line and decimal format F12.7 (fixed point notation with field width 12 and 7 decimal places).
+ restrt
+ rst7
+
+
+ RST
+
+
+
+
+
+
+
+
+
+
+ 1.22
+ Format of CHARMM Residue Topology Files (RTF), which define groups by including the atoms, the properties of the group, and bond and charge information.
+
+
+ There is currently no tool available for conversion between GROMACS topology format and other formats, due to the internal differences in both approaches. There is, however, a method to convert small molecules parameterized with AMBER force-field into GROMACS format, allowing simulations of these systems with GROMACS MD package.
+ CHARMM rtf
+
+
+
+
+
+
+
+
+
+ 1.22
+
+ AMBER frcmod (Force field Modification) is a file format to store any modification to the standard force field needed for a particular molecule to be properly represented in the simulation.
+
+
+ AMBER frcmod
+
+
+
+
+
+
+
+
+
+ 1.22
+
+ AMBER Object File Format library files (OFF library files) store residue libraries (forcefield residue parameters).
+ AMBER Object File Format
+ AMBER lib
+ AMBER off
+
+
+
+
+
+
+
+
+
+ 1.22
+ MReData is a text based data standard for processed NMR data. It is relying on SDF molecule data and allows to store assignments of NMR peaks to molecule features. The NMR-extracted data (or "NMReDATA") includes: Chemical shift,scalar coupling, 2D correlation, assignment, etc.
+
+
+ NMReData is a text based data standard for processed NMR data. It is relying on SDF molecule data and allows to store assignments of NMR peaks to molecule features. The NMR-extracted data (or "NMReDATA") includes: Chemical shift,scalar coupling, 2D correlation, assignment, etc. Find more in the paper at https://doi.org/10.1002/mrc.4527.
+ NMReDATA
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.22
+
+
+
+
+ BpForms is a string format for concretely representing the primary structures of biopolymers, including DNA, RNA, and proteins that include non-canonical nucleic and amino acids. See https://www.bpforms.org for more information.
+
+
+ BpForms
+
+
+
+
+
+
+
+
+
+
+ 1.22
+
+ Format of trr files that contain the trajectory of a simulation experiment used by GROMACS.
+ The first 4 bytes of any trr file containing 1993. See https://github.com/galaxyproject/galaxy/pull/6597/files#diff-409951594551183dbf886e24de6cb129R760
+ trr
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.22
+
+
+
+
+
+ msh
+
+
+
+ Mash sketch is a format for sequence / sequence checksum information. To make a sketch, each k-mer in a sequence is hashed, which creates a pseudo-random identifier. By sorting these hashes, a small subset from the top of the sorted list can represent the entire sequence.
+ Mash sketch
+ min-hash sketch
+
+
+ msh
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.23
+
+
+
+ loom
+ The Loom file format is based on HDF5, a standard for storing large numerical datasets. The Loom format is designed to efficiently hold large omics datasets. Typically, such data takes the form of a large matrix of numbers, along with metadata for the rows and columns.
+ Loom
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.23
+
+
+
+ zarray
+ zgroup
+ The Zarr format is an implementation of chunked, compressed, N-dimensional arrays for storing data.
+ Zarr
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.23
+
+
+ mtx
+
+ The Matrix Market matrix (MTX) format stores numerical or pattern matrices in a dense (array format) or sparse (coordinate format) representation.
+ MTX
+
+
+
+
+
+
+
+
+
+
+ 1.24
+
+
+
+
+
+ text/plain
+
+
+ BcForms is a format for abstractly describing the molecular structure (atoms and bonds) of macromolecular complexes as a collection of subunits and crosslinks. Each subunit can be described with BpForms (http://edamontology.org/format_3909) or SMILES (http://edamontology.org/data_2301). BcForms uses an ontology of crosslinks to abstract the chemical details of crosslinks from the descriptions of complexes (see https://bpforms.org/crosslink.html).
+ BcForms is related to http://edamontology.org/format_3909. (BcForms uses BpForms to describe subunits which are DNA, RNA, or protein polymers.) However, that format isn't the parent of BcForms. BcForms is similarly related to SMILES (http://edamontology.org/data_2301).
+ BcForms
+
+
+
+
+
+
+
+
+
+ 1.24
+
+ nq
+ N-Quads is a line-based, plain text format for encoding an RDF dataset. It includes information about the graph each triple belongs to.
+
+
+ N-Quads should not be confused with N-Triples which does not contain graph information.
+ N-Quads
+
+
+
+
+
+
+
+
+
+ 1.25
+
+
+
+
+ json
+ application/json
+
+ Vega is a visualization grammar, a declarative language for creating, saving, and sharing interactive visualization designs. With Vega, you can describe the visual appearance and interactive behavior of a visualization in a JSON format, and generate web-based views using Canvas or SVG.
+
+
+ Vega
+
+
+
+
+
+
+
+
+
+ 1.25
+
+
+
+
+ json
+ application/json
+
+ Vega-Lite is a high-level grammar of interactive graphics. It provides a concise JSON syntax for rapidly generating visualizations to support analysis. Vega-Lite specifications can be compiled to Vega specifications.
+
+
+ Vega-lite
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.25
+
+
+
+
+ application/xml
+
+ A model description language for computational neuroscience.
+
+
+ NeuroML
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.25
+
+
+
+
+ bngl
+ application/xml
+ plain/text
+
+ BioNetGen is a format for the specification and simulation of rule-based models of biochemical systems, including signal transduction, metabolic, and genetic regulatory networks.
+ BioNetGen Language
+
+
+ BNGL
+
+
+
+
+
+
+
+
+ 1.25
+
+
+
+ A Docker image is a file, comprised of multiple layers, that is used to execute code in a Docker container. An image is essentially built from the instructions for a complete and executable version of an application, which relies on the host OS kernel.
+
+
+ Docker image
+
+
+
+
+
+
+
+
+
+
+ 1.25
+
+
+ gfa
+
+ Graphical Fragment Assembly captures sequence graphs as the product of an assembly, a representation of variation in genomes, splice graphs in genes, or even overlap between reads from long-read sequencing technology.
+ Graphical Fragment Assembly (GFA) 1.0
+
+
+ GFA 1
+
+
+
+
+
+
+
+
+
+
+ 1.25
+
+
+ gfa
+
+ Graphical Fragment Assembly captures sequence graphs as the product of an assembly, a representation of variation in genomes, splice graphs in genes, or even overlap between reads from long-read sequencing technology. GFA2 is an update of GFA1 which is not compatible with GFA1.
+ Graphical Fragment Assembly (GFA) 2.0
+
+
+ GFA 2
+
+
+
+
+
+
+
+
+
+ 1.25
+
+
+ xlsx
+ application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
+
+ ObjTables is a toolkit for creating re-usable datasets that are both human and machine-readable, combining the ease of spreadsheets (e.g., Excel workbooks) with the rigor of schemas (classes, their attributes, the type of each attribute, and the possible relationships between instances of classes). ObjTables consists of a format for describing schemas for spreadsheets, numerous data types for science, a syntax for indicating the class and attribute represented by each table and column in a workbook, and software for using schemas to rigorously validate, merge, split, compare, and revision datasets.
+
+
+ ObjTables
+
+
+
+
+
+
+
+
+
+ 1.25
+ contig
+ The CONTIG format used for output of the SOAPdenovo alignment program. It contains contig sequences generated without using mate pair information.
+
+
+ CONTIG
+
+
+
+
+
+
+
+
+
+ 1.25
+ wego
+ WEGO native format used by the Web Gene Ontology Annotation Plot application. Tab-delimited format with gene names and others GO IDs (columns) with one annotation record per line.
+
+
+ WEGO
+
+
+
+
+
+
+
+
+
+ 1.25
+ rpkm
+ Tab-delimited format for gene expression levels table, calculated as Reads Per Kilobase per Million (RPKM) mapped reads.
+ Gene expression levels table format
+
+
+ For example a 1kb transcript with 1000 alignments in a sample of 10 million reads (out of which 8 million reads can be mapped) will have RPKM = 1000/(1 * 8) = 125
+ RPKM
+
+
+
+
+
+
+
+
+ 1.25
+ tar
+ TAR archive file format generated by the Unix-based utility tar.
+ TAR
+ Tarball
+ tar
+
+
+ For example a 1kb transcript with 1000 alignments in a sample of 10 million reads (out of which 8 million reads can be mapped) will have RPKM = 1000/(1 * 8) = 125
+ TAR format
+
+
+
+
+
+
+
+
+
+
+ 1.25
+ chain
+ The CHAIN format describes a pairwise alignment that allow gaps in both sequences simultaneously and is used by the UCSC Genome Browser.
+
+
+ CHAIN
+ https://genome.ucsc.edu/goldenPath/help/chain.html
+
+
+
+
+
+
+
+
+
+ 1.25
+ net
+ The NET file format is used to describe the data that underlie the net alignment annotations in the UCSC Genome Browser.
+
+
+ NET
+ https://genome.ucsc.edu/goldenPath/help/net.html
+
+
+
+
+
+
+
+
+
+ 1.25
+ qmap
+ Format of QMAP files generated for methylation data from an internal BGI pipeline.
+
+
+ QMAP
+
+
+
+
+
+
+
+
+
+ 1.25
+ ga
+ An emerging format for high-level Galaxy workflow description.
+ Galaxy workflow format
+ GalaxyWF
+ ga
+
+
+ gxformat2
+ https://github.com/galaxyproject/gxformat2
+
+
+
+
+
+
+
+
+
+ 1.25
+ wmv
+ The proprietary native video format of various Microsoft programs such as Windows Media Player.
+ Windows Media Video format
+ Windows movie file format
+
+
+ WMV
+
+
+
+
+
+
+
+
+
+ 1.25
+ zip
+ ZIP is an archive file format that supports lossless data compression.
+ ZIP
+
+
+ A ZIP file may contain one or more files or directories that may have been compressed.
+ ZIP format
+
+
+
+
+
+
+
+
+
+
+ 1.25
+ lsm
+ Zeiss' proprietary image format based on TIFF.
+
+
+ LSM files are the default data export for the Zeiss LSM series confocal microscopes (e.g. LSM 510, LSM 710). In addition to the image data, LSM files contain most imaging settings.
+ LSM
+
+
+
+
+
+
+
+
+ 1.25
+ gz
+ gzip
+ GNU zip compressed file format common to Unix-based operating systems.
+ GNU Zip
+ gz
+ gzip
+
+
+ GZIP format
+
+
+
+
+
+
+
+
+
+
+ 1.25
+ avi
+ Audio Video Interleaved (AVI) format is a multimedia container format for AVI files, that allows synchronous audio-with-video playback.
+ Audio Video Interleaved
+
+
+ AVI
+
+
+
+
+
+
+
+
+
+
+ 1.25
+ trackdb
+ A declaration file format for UCSC browsers track dataset display charateristics.
+
+
+ TrackDB
+
+
+
+
+
+
+
+
+
+ 1.25
+ cigar
+ Compact Idiosyncratic Gapped Alignment Report format is a compressed (run-length encoded) pairwise alignment format. It is useful for representing long (e.g. genomic) pairwise alignments.
+ CIGAR
+
+
+ CIGAR format
+ http://wiki.bits.vib.be/index.php/CIGAR/
+
+
+
+
+
+
+
+
+
+ 1.25
+ stl
+ STL is a file format native to the stereolithography CAD software created by 3D Systems. The format is used to save and share surface-rendered 3D images and also for 3D printing.
+ stl
+
+
+ Stereolithography format
+
+
+
+
+
+
+
+
+
+ 1.25
+ u3d
+ U3D (Universal 3D) is a compressed file format and data structure for 3D computer graphics. It contains 3D model information such as triangle meshes, lighting, shading, motion data, lines and points with color and structure.
+ Universal 3D
+ Universal 3D format
+
+
+ U3D
+
+
+
+
+
+
+
+
+
+ 1.25
+ tex
+ Bitmap image format used for storing textures.
+
+
+ Texture files can create the appearance of different surfaces and can be applied to both 2D and 3D objects. Note the file extension .tex is also used for LaTex documents which are a completely different format and they are NOT interchangeable.
+ Texture file format
+
+
+
+
+
+
+
+
+
+ 1.25
+ py
+ Format for scripts writtenin Python - a widely used high-level programming language for general-purpose programming.
+ Python
+ Python program
+ py
+
+
+ Python script
+
+
+
+
+
+
+
+
+
+ 1.25
+ mp4
+ A digital multimedia container format most commonly used to store video and audio.
+ MP4
+
+
+ MPEG-4
+
+
+
+
+
+
+
+
+
+
+ 1.25
+ pl
+ Format for scripts written in Perl - a family of high-level, general-purpose, interpreted, dynamic programming languages.
+ Perl
+ Perl program
+ pl
+
+
+ Perl script
+
+
+
+
+
+
+
+
+
+ 1.25
+ r
+ Format for scripts written in the R language - an open source programming language and software environment for statistical computing and graphics that is supported by the R Foundation for Statistical Computing.
+ R
+ R program
+
+
+ R script
+
+
+
+
+
+
+
+
+
+ 1.25
+ rmd
+ A file format for making dynamic documents (R Markdown scripts) with the R language.
+
+
+ R markdown
+ https://rmarkdown.rstudio.com/articles_intro.html
+
+
+
+
+
+
+
+
+ 1.25
+ This duplicates an existing concept (http://edamontology.org/format_3549).
+ 1.26
+
+ An open file format from the Neuroimaging Informatics Technology Initiative (NIfTI) commonly used to store brain imaging data obtained using Magnetic Resonance Imaging (MRI) methods.
+
+
+ NIFTI format
+ true
+
+
+
+
+
+
+
+
+ 1.25
+ pickle
+ Format used by Python pickle module for serializing and de-serializing a Python object structure.
+
+
+ pickle
+ https://docs.python.org/2/library/pickle.html
+
+
+
+
+
+
+
+
+ 1.25
+ npy
+ The standard binary file format used by NumPy - a fundamental package for scientific computing with Python - for persisting a single arbitrary NumPy array on disk. The format stores all of the shape and dtype information necessary to reconstruct the array correctly.
+ NumPy
+ npy
+
+
+ NumPy format
+
+
+
+
+
+
+
+
+ 1.25
+ repz
+ Format of repertoire (archive) files that can be read by SimToolbox (a MATLAB toolbox for structured illumination fluorescence microscopy) or alternatively extracted with zip file archiver software.
+
+
+ SimTools repertoire file format
+ https://pdfs.semanticscholar.org/5f25/f1cc6cdf2225fe22dc6fd4fc0296d486a85c.pdf
+
+
+
+
+
+
+
+
+ 1.25
+ cfg
+ A configuration file used by various programs to store settings that are specific to their respective software.
+
+
+ Configuration file format
+
+
+
+
+
+
+
+
+ 1.25
+ zst
+ Format used by the Zstandard real-time compression algorithm.
+ Zstandard compression format
+ Zstandard-compressed file format
+ zst
+
+
+ Zstandard format
+ https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md
+
+
+
+
+
+
+
+
+
+ 1.25
+ m
+ The file format for MATLAB scripts or functions.
+ MATLAB
+ m
+
+
+ MATLAB script
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.26
+
+
+
+ A data format for specifying parameter estimation problems in systems biology.
+
+
+ PEtab
+
+
+
+
+
+
+
+
+
+ 1.26
+
+
+ g.vcf
+ g.vcf.gz
+ Genomic Variant Call Format (gVCF) is a version of VCF that includes not only the positions that are variant when compared to a reference genome, but also the non-variant positions as ranges, including metrics of confidence that the positions in the range are actually non-variant e.g. minimum read-depth and genotype quality.
+ g.vcf
+ g.vcf.gz
+
+
+ gVCF
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.26
+
+
+ cml
+
+ Chemical Markup Language (CML) is an XML-based format for encoding detailed information about a wide range of chemical concepts.
+ ChemML
+
+
+ cml
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.26
+
+
+ cif
+
+ Crystallographic Information File (CIF) is a data exchange standard file format for Crystallographic Information and related Structural Science data.
+
+
+ cif
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.26
+
+
+ json
+
+
+
+
+
+
+
+
+
+ Format for describing the capabilities of a biosimulation tool including the modeling frameworks, simulation algorithms, and modeling formats that it supports, as well as metadata such as a list of the interfaces, programming languages, and operating systems supported by the tool; a link to download the tool; a list of the authors of the tool; and the license to the tool.
+
+
+ BioSimulators format for the specifications of biosimulation tools
+
+
+
+
+
+
+
+
+
+ 1.26
+
+
+
+ Outlines the syntax and semantics of the input and output arguments for command-line interfaces for biosimulation tools.
+
+
+ BioSimulators standard for command-line interfaces for biosimulation tools
+
+
+
+
+
+
+
+
+
+ 1.26
+ Data format derived from the standard PDB format, which enables user to incorporate parameters for charge and radius to the existing PDB data file.
+
+
+ PQR
+
+
+
+
+
+
+
+
+
+
+ 1.26
+ Data format used in AutoDock 4 for storing atomic coordinates, partial atomic charges and AutoDock atom types for both receptors and ligands.
+
+
+ PDBQT
+
+
+
+
+
+
+
+
+
+
+
+ 1.26
+
+
+ msp
+ MSP is a data format for mass spectrometry data.
+
+
+ NIST Text file format for storing MS∕MS spectra (m∕z and intensity of mass peaks) along with additional annotations for each spectrum. A single MSP file can thus contain single or multiple spectra. This format is frequently used to share spectra libraries.
+ MSP
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Function
+ A function that processes a set of inputs and results in a set of outputs, or associates arguments (inputs) with values (outputs).
+ Computational method
+ Computational operation
+ Computational procedure
+ Computational subroutine
+ Function (programming)
+ Lambda abstraction
+ Mathematical function
+ Mathematical operation
+ Computational tool
+ Process
+ sumo:Function
+
+
+ Special cases are: a) An operation that consumes no input (has no input arguments). Such operation is either a constant function, or an operation depending only on the underlying state. b) An operation that may modify the underlying state but has no output. c) The singular-case operation with no input or output, that still may modify the underlying state.
+ Operation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Function
+ Operation is a function that is computational. It typically has input(s) and output(s), which are always data.
+
+
+
+
+ Computational tool
+ Computational tool provides one or more operations.
+
+
+
+
+ Process
+ Process can have a function (as its quality/attribute), and can also perform an operation with inputs and outputs.
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Search or query a data resource and retrieve entries and / or annotation.
+ Database retrieval
+ Query
+
+
+ Query and retrieval
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Search database to retrieve all relevant references to a particular entity or entry.
+
+ Data retrieval (database cross-reference)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Annotate an entity (typically a biological or biomedical database entity) with terms from a controlled vocabulary.
+
+
+ This is a broad concept and is used a placeholder for other, more specific concepts.
+ Annotation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Generate an index of (typically a file of) biological data.
+ Data indexing
+ Database indexing
+
+
+ Indexing
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Analyse an index of biological data.
+
+ Data index analysis
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Retrieve basic information about a molecular sequence.
+
+ Annotation retrieval (sequence)
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Generate a molecular sequence by some means.
+ Sequence generation (nucleic acid)
+ Sequence generation (protein)
+
+
+ Sequence generation
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Edit or change a molecular sequence, either randomly or specifically.
+
+
+ Sequence editing
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Merge two or more (typically overlapping) molecular sequences.
+ Sequence splicing
+ Paired-end merging
+ Paired-end stitching
+ Read merging
+ Read stitching
+
+
+ Sequence merging
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Convert a molecular sequence from one type to another.
+
+
+ Sequence conversion
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate sequence complexity, for example to find low-complexity regions in sequences.
+
+
+ Sequence complexity calculation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate sequence ambiguity, for example identity regions in protein or nucleotide sequences with many ambiguity codes.
+
+
+ Sequence ambiguity calculation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate character or word composition or frequency of a molecular sequence.
+
+
+ Sequence composition calculation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Find and/or analyse repeat sequences in (typically nucleotide) sequences.
+
+
+ Repeat sequences include tandem repeats, inverted or palindromic repeats, DNA microsatellites (Simple Sequence Repeats or SSRs), interspersed repeats, maximal duplications and reverse, complemented and reverse complemented repeats etc. Repeat units can be exact or imperfect, in tandem or dispersed, of specified or unspecified length.
+ Repeat sequence analysis
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Discover new motifs or conserved patterns in sequences or sequence alignments (de-novo discovery).
+ Motif discovery
+
+
+ Motifs and patterns might be conserved or over-represented (occur with improbable frequency).
+ Sequence motif discovery
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Find (scan for) known motifs, patterns and regular expressions in molecular sequence(s).
+ Motif scanning
+ Sequence signature detection
+ Sequence signature recognition
+ Motif detection
+ Motif recognition
+ Motif search
+ Sequence motif detection
+ Sequence motif search
+ Sequence profile search
+
+
+ Sequence motif recognition
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Find motifs shared by molecular sequences.
+
+
+ Sequence motif comparison
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Analyse the sequence, conformational or physicochemical properties of transcription regulatory elements in DNA sequences.
+
+ For example transcription factor binding sites (TFBS) analysis to predict accessibility of DNA to binding factors.
+ Transcription regulatory sequence analysis
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.19
+
+ Identify common, conserved (homologous) or synonymous transcriptional regulatory motifs (transcription factor binding sites).
+
+
+ Conserved transcription regulatory sequence identification
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.18
+
+
+ Extract, calculate or predict non-positional (physical or chemical) properties of a protein from processing a protein (3D) structure.
+
+
+ Protein property calculation (from structure)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse flexibility and motion in protein structure.
+ CG analysis
+ MD analysis
+ Protein Dynamics Analysis
+ Trajectory analysis
+ Nucleic Acid Dynamics Analysis
+ Protein flexibility and motion analysis
+ Protein flexibility prediction
+ Protein motion prediction
+
+
+ Use this concept for analysis of flexible and rigid residues, local chain deformability, regions undergoing conformational change, molecular vibrations or fluctuational dynamics, domain motions or other large-scale structural transitions in a protein structure.
+ Simulation analysis
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identify or screen for 3D structural motifs in protein structure(s).
+ Protein structural feature identification
+ Protein structural motif recognition
+
+
+ This includes conserved substructures and conserved geometry, such as spatial arrangement of secondary structure or protein backbone. Methods might use structure alignment, structural templates, searches for similar electrostatic potential and molecular surface shape, surface-mapping of phylogenetic information etc.
+ Structural motif discovery
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identify structural domains in a protein structure from first principles (for example calculations on structural compactness).
+
+
+ Protein domain recognition
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse the architecture (spatial arrangement of secondary structure) of protein structure(s).
+
+
+ Protein architecture analysis
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ WHATIF: SymShellFiveXML
+ WHATIF: SymShellOneXML
+ WHATIF: SymShellTenXML
+ WHATIF: SymShellTwoXML
+ WHATIF:ListContactsNormal
+ WHATIF:ListContactsRelaxed
+ WHATIF:ListSideChainContactsNormal
+ WHATIF:ListSideChainContactsRelaxed
+ Calculate or extract inter-atomic, inter-residue or residue-atom contacts, distances and interactions in protein structure(s).
+
+
+ Residue interaction calculation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ WHATIF:CysteineTorsions
+ WHATIF:ResidueTorsions
+ WHATIF:ResidueTorsionsBB
+ WHATIF:ShowTauAngle
+ Calculate, visualise or analyse phi/psi angles of a protein structure.
+ Backbone torsion angle calculation
+ Cysteine torsion angle calculation
+ Tau angle calculation
+ Torsion angle calculation
+
+
+ Protein geometry calculation
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Extract, calculate or predict non-positional (physical or chemical) properties of a protein, including any non-positional properties of the molecular sequence, from processing a protein sequence or 3D structure.
+ Protein property rendering
+ Protein property calculation (from sequence)
+ Protein property calculation (from structure)
+ Protein structural property calculation
+ Structural property calculation
+
+
+ This includes methods to render and visualise the properties of a protein sequence, and a residue-level search for properties such as solvent accessibility, hydropathy, secondary structure, ligand-binding etc.
+ Protein property calculation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Immunogen design
+ Predict antigenicity, allergenicity / immunogenicity, allergic cross-reactivity etc of peptides and proteins.
+ Antigenicity prediction
+ Immunogenicity prediction
+ B cell peptide immunogenicity prediction
+ Hopp and Woods plotting
+ MHC peptide immunogenicity prediction
+
+
+ Immunological system are cellular or humoral. In vaccine design to induces a cellular immune response, methods must search for antigens that can be recognized by the major histocompatibility complex (MHC) molecules present in T lymphocytes. If a humoral response is required, antigens for B cells must be identified.
+ This includes methods that generate a graphical rendering of antigenicity of a protein, such as a Hopp and Woods plot.
+ This is usually done in the development of peptide-specific antibodies or multi-epitope vaccines. Methods might use sequence data (for example motifs) and / or structural data.
+ Peptide immunogenicity prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict, recognise and identify positional features in molecular sequences such as key functional sites or regions.
+ Sequence feature prediction
+ Sequence feature recognition
+ Motif database search
+ SO:0000110
+
+
+ Look at "Protein feature detection" (http://edamontology.org/operation_3092) and "Nucleic acid feature detection" (http://edamontology.org/operation_0415) in case more specific terms are needed.
+ Sequence feature detection
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Extract a sequence feature table from a sequence database entry.
+
+ Data retrieval (feature table)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Query the features (in a feature table) of molecular sequence(s).
+
+ Feature table query
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Compare the feature tables of two or more molecular sequences.
+ Feature comparison
+ Feature table comparison
+
+
+ Sequence feature comparison
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Display basic information about a sequence alignment.
+
+ Data retrieval (sequence alignment)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse a molecular sequence alignment.
+
+
+ Sequence alignment analysis
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Compare (typically by aligning) two molecular sequence alignments.
+
+
+ See also 'Sequence profile alignment'.
+ Sequence alignment comparison
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Convert a molecular sequence alignment from one type to another (for example amino acid to coding nucleotide sequence).
+
+
+ Sequence alignment conversion
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Process (read and / or write) physicochemical property data of nucleic acids.
+
+ Nucleic acid property processing
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate or predict physical or chemical properties of nucleic acid molecules, including any non-positional properties of the molecular sequence.
+
+
+ Nucleic acid property calculation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict splicing alternatives or transcript isoforms from analysis of sequence data.
+ Alternative splicing analysis
+ Alternative splicing detection
+ Differential splicing analysis
+ Splice transcript prediction
+
+
+ Alternative splicing prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Detect frameshifts in DNA sequences, including frameshift sites and signals, and frameshift errors from sequencing projects.
+ Frameshift error detection
+
+
+ Methods include sequence alignment (if related sequences are available) and word-based sequence comparison.
+ Frameshift detection
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Detect vector sequences in nucleotide sequence, typically by comparison to a set of known vector sequences.
+
+
+ Vector sequence detection
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict secondary structure of protein sequences.
+ Secondary structure prediction (protein)
+
+
+ Methods might use amino acid composition, local sequence information, multiple sequence alignments, physicochemical features, estimated energy content, statistical algorithms, hidden Markov models, support vector machines, kernel machines, neural networks etc.
+ Protein secondary structure prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict super-secondary structure of protein sequence(s).
+
+
+ Super-secondary structures include leucine zippers, coiled coils, Helix-Turn-Helix etc.
+ Protein super-secondary structure prediction
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict and/or classify transmembrane proteins or transmembrane (helical) domains or regions in protein sequences.
+
+
+ Transmembrane protein prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse transmembrane protein(s), typically by processing sequence and / or structural data, and write an informative report for example about the protein and its transmembrane domains / regions.
+
+
+ Use this (or child) concept for analysis of transmembrane domains (buried and exposed faces), transmembrane helices, helix topology, orientation, inter-helical contacts, membrane dipping (re-entrant) loops and other secondary structure etc. Methods might use pattern discovery, hidden Markov models, sequence alignment, structural profiles, amino acid property analysis, comparison to known domains or some combination (hybrid methods).
+ Transmembrane protein analysis
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ This is a "organisational class" not very useful for annotation per se.
+ 1.19
+
+
+
+
+ Predict tertiary structure of a molecular (biopolymer) sequence.
+
+ Structure prediction
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict contacts, non-covalent interactions and distance (constraints) between amino acids in protein sequences.
+ Residue interaction prediction
+ Contact map prediction
+ Protein contact map prediction
+
+
+ Methods usually involve multiple sequence alignment analysis.
+ Residue contact prediction
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.19
+
+ Analyse experimental protein-protein interaction data from for example yeast two-hybrid analysis, protein microarrays, immunoaffinity chromatography followed by mass spectrometry, phage display etc.
+
+
+ Protein interaction raw data analysis
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Identify or predict protein-protein interactions, interfaces, binding sites etc in protein sequences.
+
+
+ Protein-protein interaction prediction (from protein sequence)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Identify or predict protein-protein interactions, interfaces, binding sites etc in protein structures.
+
+
+ Protein-protein interaction prediction (from protein structure)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse a network of protein interactions.
+ Protein interaction network comparison
+
+
+ Protein interaction network analysis
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Notions of pathway and network were mixed up, EDAM 1.24 disentangles them.
+ 1.24
+
+
+
+ Compare two or more biological pathways or networks.
+
+ Pathway or network comparison
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict RNA secondary structure (for example knots, pseudoknots, alternative structures etc).
+ RNA shape prediction
+
+
+ Methods might use RNA motifs, predicted intermolecular contacts, or RNA sequence-structure compatibility (inverse RNA folding).
+ RNA secondary structure prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse some aspect of RNA/DNA folding, typically by processing sequence and/or structural data. For example, compute folding energies such as minimum folding energies for DNA or RNA sequences or energy landscape of RNA mutants.
+ Nucleic acid folding
+ Nucleic acid folding modelling
+ Nucleic acid folding prediction
+ Nucleic acid folding energy calculation
+
+
+ Nucleic acid folding analysis
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Retrieve information on restriction enzymes or restriction enzyme sites.
+
+ Data retrieval (restriction enzyme annotation)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Identify genetic markers in DNA sequences.
+
+ A genetic marker is any DNA sequence of known chromosomal location that is associated with and specific to a particular gene or trait. This includes short sequences surrounding a SNP, Sequence-Tagged Sites (STS) which are well suited for PCR amplification, a longer minisatellites sequence etc.
+ Genetic marker identification
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Generate a genetic (linkage) map of a DNA sequence (typically a chromosome) showing the relative positions of genetic markers based on estimation of non-physical distances.
+ Functional mapping
+ Genetic cartography
+ Genetic map construction
+ Genetic map generation
+ Linkage mapping
+ QTL mapping
+
+
+ Mapping involves ordering genetic loci along a chromosome and estimating the physical distance between loci. A genetic map shows the relative (not physical) position of known genes and genetic markers.
+ This includes mapping of the genetic architecture of dynamic complex traits (functional mapping), e.g. by characterisation of the underlying quantitative trait loci (QTLs) or nucleotides (QTNs).
+ Genetic mapping
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse genetic linkage.
+
+
+ For example, estimate how close two genes are on a chromosome by calculating how often they are transmitted together to an offspring, ascertain whether two genes are linked and parental linkage, calculate linkage map distance etc.
+ Linkage analysis
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate codon usage statistics and create a codon usage table.
+ Codon usage table construction
+
+
+ Codon usage table generation
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Compare two or more codon usage tables.
+
+
+ Codon usage table comparison
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse codon usage in molecular sequences or process codon usage data (e.g. a codon usage table).
+ Codon usage data analysis
+ Codon usage table analysis
+
+
+ Codon usage analysis
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identify and plot third base position variability in a nucleotide sequence.
+
+
+ Base position variability plotting
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Find exact character or word matches between molecular sequences without full sequence alignment.
+
+
+ Sequence word comparison
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate a sequence distance matrix or otherwise estimate genetic distances between molecular sequences.
+ Phylogenetic distance matrix generation
+ Sequence distance calculation
+ Sequence distance matrix construction
+
+
+ Sequence distance matrix generation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Compare two or more molecular sequences, identify and remove redundant sequences based on some criteria.
+
+
+ Sequence redundancy removal
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Build clusters of similar sequences, typically using scores from pair-wise alignment or other comparison of the sequences.
+ Sequence cluster construction
+ Sequence cluster generation
+
+
+ The clusters may be output or used internally for some other purpose.
+ Sequence clustering
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Align (identify equivalent sites within) molecular sequences.
+ Sequence alignment construction
+ Sequence alignment generation
+ Consensus-based sequence alignment
+ Constrained sequence alignment
+ Multiple sequence alignment (constrained)
+ Sequence alignment (constrained)
+
+
+ Includes methods that align sequence profiles (representing sequence alignments): ethods might perform one-to-one, one-to-many or many-to-many comparisons. See also 'Sequence alignment comparison'.
+ See also "Read mapping"
+ Sequence alignment
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Align two or more molecular sequences of different types (for example genomic DNA to EST, cDNA or mRNA).
+
+ Hybrid sequence alignment construction
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Align molecular sequences using sequence and structural information.
+ Sequence alignment (structure-based)
+
+
+ Structure-based sequence alignment
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Align (superimpose) molecular tertiary structures.
+ Structural alignment
+ 3D profile alignment
+ 3D profile-to-3D profile alignment
+ Structural profile alignment
+
+
+ Includes methods that align structural (3D) profiles or templates (representing structures or structure alignments) - including methods that perform one-to-one, one-to-many or many-to-many comparisons.
+ Structure alignment
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Generate some type of sequence profile (for example a hidden Markov model) from a sequence alignment.
+ Sequence profile construction
+
+
+ Sequence profile generation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Generate some type of structural (3D) profile or template from a structure or structure alignment.
+ Structural profile construction
+ Structural profile generation
+
+
+ 3D profile generation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.19
+
+ Align sequence profiles (representing sequence alignments).
+
+
+ Profile-profile alignment
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.19
+
+ Align structural (3D) profiles or templates (representing structures or structure alignments).
+
+
+ 3D profile-to-3D profile alignment
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Align molecular sequence(s) to sequence profile(s), or profiles to other profiles. A profile typically represents a sequence alignment.
+ Profile-profile alignment
+ Profile-to-profile alignment
+ Sequence-profile alignment
+ Sequence-to-profile alignment
+
+
+ A sequence profile typically represents a sequence alignment. Methods might perform one-to-one, one-to-many or many-to-many comparisons.
+ Sequence profile alignment
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.19
+
+ Align molecular sequence(s) to structural (3D) profile(s) or template(s) (representing a structure or structure alignment).
+
+
+ Sequence-to-3D-profile alignment
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Align molecular sequence to structure in 3D space (threading).
+ Sequence-structure alignment
+ Sequence-3D profile alignment
+ Sequence-to-3D-profile alignment
+
+
+ This includes sequence-to-3D-profile alignment methods, which align molecular sequence(s) to structural (3D) profile(s) or template(s) (representing a structure or structure alignment) - methods might perform one-to-one, one-to-many or many-to-many comparisons.
+ Use this concept for methods that evaluate sequence-structure compatibility by assessing residue interactions in 3D. Methods might perform one-to-one, one-to-many or many-to-many comparisons.
+ Protein threading
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Recognize (predict and identify) known protein structural domains or folds in protein sequence(s) which (typically) are not accompanied by any significant sequence similarity to know structures.
+ Domain prediction
+ Fold prediction
+ Protein domain prediction
+ Protein fold prediction
+ Protein fold recognition
+
+
+ Methods use some type of mapping between sequence and fold, for example secondary structure prediction and alignment, profile comparison, sequence properties, homologous sequence search, kernel machines etc. Domains and folds might be taken from SCOP or CATH.
+ Fold recognition
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ (jison)Too fine-grained, the operation (Data retrieval) hasn't changed, just what is retrieved.
+ 1.17
+
+ Search for and retrieve data concerning or describing some core data, as distinct from the primary data that is being described.
+
+
+ This includes documentation, general information and other metadata on entities such as databases, database entries and tools.
+ Metadata retrieval
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Query scientific literature, in search for articles, article data, concepts, named entities, or for statistics.
+
+
+ Literature search
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Text analysis
+ Process and analyse text (typically scientific literature) to extract information from it.
+ Literature mining
+ Text analytics
+ Text data mining
+ Article analysis
+ Literature analysis
+
+
+ Text mining
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Perform in-silico (virtual) PCR.
+
+
+ Virtual PCR
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Design or predict oligonucleotide primers for PCR and DNA amplification etc.
+ PCR primer prediction
+ Primer design
+ PCR primer design (based on gene structure)
+ PCR primer design (for conserved primers)
+ PCR primer design (for gene transcription profiling)
+ PCR primer design (for genotyping polymorphisms)
+ PCR primer design (for large scale sequencing)
+ PCR primer design (for methylation PCRs)
+ Primer quality estimation
+
+
+ Primer design involves predicting or selecting primers that are specific to a provided PCR template. Primers can be designed with certain properties such as size of product desired, primer size etc. The output might be a minimal or overlapping primer set.
+ This includes predicting primers based on gene structure, promoters, exon-exon junctions, predicting primers that are conserved across multiple genomes or species, primers for for gene transcription profiling, for genotyping polymorphisms, for example single nucleotide polymorphisms (SNPs), for large scale sequencing, or for methylation PCRs.
+ PCR primer design
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict and/or optimize oligonucleotide probes for DNA microarrays, for example for transcription profiling of genes, or for genomes and gene families.
+ Microarray probe prediction
+
+
+ Microarray probe design
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Combine (align and merge) overlapping fragments of a DNA sequence to reconstruct the original sequence.
+ Metagenomic assembly
+ Sequence assembly editing
+
+
+ For example, assemble overlapping reads from paired-end sequencers into contigs (a contiguous sequence corresponding to read overlaps). Or assemble contigs, for example ESTs and genomic DNA fragments, depending on the detected fragment overlaps.
+ Sequence assembly
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.16
+
+ Standardize or normalize microarray data.
+
+
+ Microarray data standardisation and normalisation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Process (read and / or write) SAGE, MPSS or SBS experimental data.
+
+ Sequencing-based expression profile data processing
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Perform cluster analysis of expression data to identify groups with similar expression profiles, for example by clustering.
+ Gene expression clustering
+ Gene expression profile clustering
+
+
+ Expression profile clustering
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The measurement of the activity (expression) of multiple genes in a cell, tissue, sample etc., in order to get an impression of biological function.
+ Feature expression analysis
+ Functional profiling
+ Gene expression profile construction
+ Gene expression profile generation
+ Gene expression quantification
+ Gene transcription profiling
+ Non-coding RNA profiling
+ Protein profiling
+ RNA profiling
+ mRNA profiling
+
+
+ Gene expression profiling generates some sort of gene expression profile, for example from microarray data.
+ Gene expression profiling
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Comparison of expression profiles.
+ Gene expression comparison
+ Gene expression profile comparison
+
+
+ Expression profile comparison
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+ Interpret (in functional terms) and annotate gene expression data.
+
+
+ Functional profiling
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Analyse EST or cDNA sequences.
+
+ For example, identify full-length cDNAs from EST sequences or detect potential EST antisense transcripts.
+ EST and cDNA sequence analysis
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identify and select targets for protein structural determination.
+
+ Methods will typically navigate a graph of protein families of known structure.
+ Structural genomics target selection
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Assign secondary structure from protein coordinate or experimental data.
+
+
+ Includes secondary structure assignment from circular dichroism (CD) spectroscopic data, and from protein coordinate data.
+ Protein secondary structure assignment
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Assign a protein tertiary structure (3D coordinates), or other aspects of protein structure, from raw experimental data.
+ NOE assignment
+ Structure calculation
+
+
+ Protein structure assignment
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ WHATIF: CorrectedPDBasXML
+ WHATIF: UseFileDB
+ WHATIF: UseResidueDB
+ Evaluate the quality or correctness a protein three-dimensional model.
+ Protein model validation
+ Residue validation
+
+
+ Model validation might involve checks for atomic packing, steric clashes (bumps), volume irregularities, agreement with electron density maps, number of amino acid residues, percentage of residues with missing or bad atoms, irregular Ramachandran Z-scores, irregular Chi-1 / Chi-2 normality scores, RMS-Z score on bonds and angles etc.
+ The PDB file format has had difficulties, inconsistencies and errors. Corrections can include identifying a meaningful sequence, removal of alternate atoms, correction of nomenclature problems, removal of incomplete residues and spurious waters, addition or removal of water, modelling of missing side chains, optimisation of cysteine bonds, regularisation of bond lengths, bond angles and planarities etc.
+ This includes methods that calculate poor quality residues. The scoring function to identify poor quality residues may consider residues with bad atoms or atoms with high B-factor, residues in the N- or C-terminal position, adjacent to an unstructured residue, non-canonical residues, glycine and proline (or adjacent to these such residues).
+ Protein structure validation
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ WHATIF: CorrectedPDBasXML
+ Refine (after evaluation) a model of a molecular structure (typically a protein structure) to reduce steric clashes, volume irregularities etc.
+ Protein model refinement
+
+
+ Molecular model refinement
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Construct a phylogenetic tree.
+ Phlyogenetic tree construction
+ Phylogenetic reconstruction
+ Phylogenetic tree generation
+
+
+ Phylogenetic trees are usually constructed from a set of sequences from which an alignment (or data matrix) is calculated.
+ Phylogenetic inference
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse an existing phylogenetic tree or trees, typically to detect features or make predictions.
+ Phylogenetic tree analysis
+ Phylogenetic modelling
+
+
+ Phylgenetic modelling is the modelling of trait evolution and prediction of trait values using phylogeny as a basis.
+ Phylogenetic analysis
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Compare two or more phylogenetic trees.
+
+
+ For example, to produce a consensus tree, subtrees, supertrees, calculate distances between trees or test topological similarity between trees (e.g. a congruence index) etc.
+ Phylogenetic tree comparison
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Edit a phylogenetic tree.
+
+
+ Phylogenetic tree editing
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Comparison of a DNA sequence to orthologous sequences in different species and inference of a phylogenetic tree, in order to identify regulatory elements such as transcription factor binding sites (TFBS).
+ Phylogenetic shadowing
+
+
+ Phylogenetic shadowing is a type of footprinting where many closely related species are used. A phylogenetic 'shadow' represents the additive differences between individual sequences. By masking or 'shadowing' variable positions a conserved sequence is produced with few or none of the variations, which is then compared to the sequences of interest to identify significant regions of conservation.
+ Phylogenetic footprinting
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.20
+
+ Simulate the folding of a protein.
+
+
+ Protein folding simulation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.19
+
+ Predict the folding pathway(s) or non-native structural intermediates of a protein.
+
+
+ Protein folding pathway prediction
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Map and model the effects of single nucleotide polymorphisms (SNPs) on protein structure(s).
+
+
+ Protein SNP mapping
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict the effect of point mutation on a protein structure, in terms of strucural effects and protein folding, stability and function.
+ Variant functional prediction
+ Protein SNP mapping
+ Protein mutation modelling
+ Protein stability change prediction
+
+
+ Protein SNP mapping maps and modesl the effects of single nucleotide polymorphisms (SNPs) on protein structure(s). Methods might predict silent or pathological mutations.
+ Variant effect prediction
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+ Design molecules that elicit an immune response (immunogens).
+
+
+ Immunogen design
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.18
+
+ Predict and optimise zinc finger protein domains for DNA/RNA binding (for example for transcription factors and nucleases).
+
+
+ Zinc finger prediction
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate Km, Vmax and derived data for an enzyme reaction.
+
+
+ Enzyme kinetics calculation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Reformat a file of data (or equivalent entity in memory).
+ File format conversion
+ File formatting
+ File reformatting
+ Format conversion
+ Reformatting
+
+
+ Formatting
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Test and validate the format and content of a data file.
+ File format validation
+
+
+ Format validation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Visualise, plot or render (graphically) biomolecular data such as molecular sequences or structures.
+ Data visualisation
+ Rendering
+ Molecular visualisation
+ Plotting
+
+
+ This includes methods to render and visualise molecules.
+ Visualisation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Search a sequence database by sequence comparison and retrieve similar sequences. Sequences matching a given sequence motif or pattern, such as a Prosite pattern or regular expression.
+
+
+ This excludes direct retrieval methods (e.g. the dbfetch program).
+ Sequence database search
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Search a tertiary structure database, typically by sequence and/or structure comparison, or some other means, and retrieve structures and associated data.
+
+
+ Structure database search
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Search a secondary protein database (of classification information) to assign a protein sequence(s) to a known protein family or group.
+
+
+ Protein secondary database search
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+
+ Screen a sequence against a motif or pattern database.
+
+ Motif database search
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ Search a database of sequence profiles with a query sequence.
+
+ Sequence profile database search
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Search a database of transmembrane proteins, for example for sequence or structural similarities.
+
+ Transmembrane protein database search
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Query a database and retrieve sequences with a given entry code or accession number.
+
+ Sequence retrieval (by code)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Query a database and retrieve sequences containing a given keyword.
+
+ Sequence retrieval (by keyword)
+ true
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Search a sequence database and retrieve sequences that are similar to a query sequence.
+ Sequence database search (by sequence)
+ Structure database search (by sequence)
+
+
+ Sequence similarity search
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Search a sequence database and retrieve sequences matching a given sequence motif or pattern, such as a Prosite pattern or regular expression.
+
+
+ Sequence database search (by motif or pattern)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Search a sequence database and retrieve sequences of a given amino acid composition.
+
+ Sequence database search (by amino acid composition)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Search a sequence database and retrieve sequences with a specified property, typically a physicochemical or compositional property.
+
+
+ Sequence database search (by property)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Search a sequence database and retrieve sequences that are similar to a query sequence using a word-based method.
+
+ Word-based methods (for example BLAST, gapped BLAST, MEGABLAST, WU-BLAST etc.) are usually quicker than alignment-based methods. They may or may not handle gaps.
+ Sequence database search (by sequence using word-based methods)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Search a sequence database and retrieve sequences that are similar to a query sequence using a sequence profile-based method, or with a supplied profile as query.
+
+ This includes tools based on PSI-BLAST.
+ Sequence database search (by sequence using profile-based methods)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Search a sequence database for sequences that are similar to a query sequence using a local alignment-based method.
+
+ This includes tools based on the Smith-Waterman algorithm or FASTA.
+ Sequence database search (by sequence using local alignment-based methods)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Search sequence(s) or a sequence database for sequences that are similar to a query sequence using a global alignment-based method.
+
+ This includes tools based on the Needleman and Wunsch algorithm.
+ Sequence database search (by sequence using global alignment-based methods)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Search a DNA database (for example a database of conserved sequence tags) for matches to Sequence-Tagged Site (STS) primer sequences.
+
+ STSs are genetic markers that are easily detected by the polymerase chain reaction (PCR) using specific primers.
+ Sequence database search (by sequence for primer sequences)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+ Search sequence(s) or a sequence database for sequences which match a set of peptide masses, for example a peptide mass fingerprint from mass spectrometry.
+
+
+ Sequence database search (by molecular weight)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Search sequence(s) or a sequence database for sequences of a given isoelectric point.
+
+ Sequence database search (by isoelectric point)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Query a tertiary structure database and retrieve entries with a given entry code or accession number.
+
+ Structure retrieval (by code)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Query a tertiary structure database and retrieve entries containing a given keyword.
+
+ Structure retrieval (by keyword)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Search a tertiary structure database and retrieve structures with a sequence similar to a query sequence.
+
+
+ Structure database search (by sequence)
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Search a database of molecular structure and retrieve structures that are similar to a query structure.
+ Structure database search (by structure)
+ Structure retrieval by structure
+
+
+ Structural similarity search
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Annotate a molecular sequence record with terms from a controlled vocabulary.
+
+
+ Sequence annotation
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Annotate a genome sequence with terms from a controlled vocabulary.
+ Functional genome annotation
+ Metagenome annotation
+ Structural genome annotation
+
+
+ Genome annotation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Generate the reverse and / or complement of a nucleotide sequence.
+ Nucleic acid sequence reverse and complement
+ Reverse / complement
+ Reverse and complement
+
+
+ Reverse complement
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Generate a random sequence, for example, with a specific character composition.
+
+
+ Random sequence generation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Generate digest fragments for a nucleotide sequence containing restriction sites.
+ Nucleic acid restriction digest
+
+
+ Restriction digest
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Cleave a protein sequence into peptide fragments (corresponding to enzymatic or chemical cleavage).
+
+
+ This is often followed by calculation of protein fragment masses (http://edamontology.org/operation_0398).
+ Protein sequence cleavage
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Mutate a molecular sequence a specified amount or shuffle it to produce a randomised sequence with the same overall composition.
+
+
+ Sequence mutation and randomisation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Mask characters in a molecular sequence (replacing those characters with a mask character).
+
+
+ For example, SNPs or repeats in a DNA sequence might be masked.
+ Sequence masking
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Cut (remove) characters or a region from a molecular sequence.
+
+
+ Sequence cutting
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Create (or remove) restriction sites in sequences, for example using silent mutations.
+
+
+ Restriction site creation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Translate a DNA sequence into protein.
+
+
+ DNA translation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Transcribe a nucleotide sequence into mRNA sequence(s).
+
+
+ DNA transcription
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Calculate base frequency or word composition of a nucleotide sequence.
+
+
+ Sequence composition calculation (nucleic acid)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Calculate amino acid frequency or word composition of a protein sequence.
+
+
+ Sequence composition calculation (protein)
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Find (and possibly render) short repetitive subsequences (repeat sequences) in (typically nucleotide) sequences.
+
+
+ Repeat sequence detection
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse repeat sequence organisation such as periodicity.
+
+
+ Repeat sequence organisation analysis
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Analyse the hydrophobic, hydrophilic or charge properties of a protein structure.
+
+
+ Protein hydropathy calculation (from structure)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ WHATIF:AtomAccessibilitySolvent
+ WHATIF:AtomAccessibilitySolventPlus
+ Calculate solvent accessible or buried surface areas in protein or other molecular structures.
+ Protein solvent accessibility calculation
+
+
+ Solvent accessibility might be calculated for the backbone, sidechain and total (backbone plus sidechain).
+ Accessible surface calculation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Identify clusters of hydrophobic or charged residues in a protein structure.
+
+
+ Protein hydropathy cluster calculation
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate whether a protein structure has an unusually large net charge (dipole moment).
+
+
+ Protein dipole moment calculation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ WHATIF:AtomAccessibilityMolecular
+ WHATIF:AtomAccessibilityMolecularPlus
+ WHATIF:ResidueAccessibilityMolecular
+ WHATIF:ResidueAccessibilitySolvent
+ WHATIF:ResidueAccessibilityVacuum
+ WHATIF:ResidueAccessibilityVacuumMolecular
+ WHATIF:TotAccessibilityMolecular
+ WHATIF:TotAccessibilitySolvent
+ Calculate the molecular surface area in proteins and other macromolecules.
+ Protein atom surface calculation
+ Protein residue surface calculation
+ Protein surface and interior calculation
+ Protein surface calculation
+
+
+ Molecular surface calculation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Identify or predict catalytic residues, active sites or other ligand-binding sites in protein structures.
+
+
+ Protein binding site prediction (from structure)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse the interaction of protein with nucleic acids, e.g. RNA or DNA-binding sites, interfaces etc.
+ Protein-nucleic acid binding site analysis
+ Protein-DNA interaction analysis
+ Protein-RNA interaction analysis
+
+
+ Protein-nucleic acid interaction analysis
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Decompose a structure into compact or globular fragments (protein peeling).
+
+
+ Protein peeling
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate a matrix of distance between residues (for example the C-alpha atoms) in a protein structure.
+
+
+ Protein distance matrix calculation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate a residue contact map (typically all-versus-all inter-residue contacts) for a protein structure.
+ Protein contact map calculation
+
+
+ Contact map calculation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate clusters of contacting residues in protein structures.
+
+
+ This includes for example clusters of hydrophobic or charged residues, or clusters of contacting residues which have a key structural or functional role.
+ Residue cluster calculation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ WHATIF:HasHydrogenBonds
+ WHATIF:ShowHydrogenBonds
+ WHATIF:ShowHydrogenBondsM
+ Identify potential hydrogen bonds between amino acids and other groups.
+
+
+ The output might include the atoms involved in the bond, bond geometric parameters and bond enthalpy.
+ Hydrogen bond calculation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+
+ Calculate non-canonical atomic interactions in protein structures.
+
+ Residue non-canonical interaction detection
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate a Ramachandran plot of a protein structure.
+
+
+ Ramachandran plot calculation
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.22
+
+ Validate a Ramachandran plot of a protein structure.
+
+
+ Ramachandran plot validation
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate the molecular weight of a protein sequence or fragments.
+ Peptide mass calculation
+
+
+ Protein molecular weight calculation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict extinction coefficients or optical density of a protein sequence.
+
+
+ Protein extinction coefficient calculation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate pH-dependent properties from pKa calculations of a protein sequence.
+ Protein pH-dependent property calculation
+
+
+ Protein pKa calculation
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Hydropathy calculation on a protein sequence.
+
+
+ Protein hydropathy calculation (from sequence)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Plot a protein titration curve.
+
+
+ Protein titration curve plotting
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate isoelectric point of a protein sequence.
+
+
+ Protein isoelectric point calculation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Estimate hydrogen exchange rate of a protein sequence.
+
+
+ Protein hydrogen exchange rate calculation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate hydrophobic or hydrophilic / charged regions of a protein sequence.
+
+
+ Protein hydrophobic region calculation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate aliphatic index (relative volume occupied by aliphatic side chains) of a protein.
+
+
+ Protein aliphatic index calculation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate the hydrophobic moment of a peptide sequence and recognize amphiphilicity.
+
+
+ Hydrophobic moment is a peptides hydrophobicity measured for different angles of rotation.
+ Protein hydrophobic moment plotting
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict the stability or globularity of a protein sequence, whether it is intrinsically unfolded etc.
+
+
+ Protein globularity prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict the solubility or atomic solvation energy of a protein sequence.
+
+
+ Protein solubility prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict crystallizability of a protein sequence.
+
+
+ Protein crystallizability prediction
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ (jison)Too fine-grained.
+ 1.17
+
+ Detect or predict signal peptides (and typically predict subcellular localisation) of eukaryotic proteins.
+
+
+ Protein signal peptide detection (eukaryotes)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ (jison)Too fine-grained.
+ 1.17
+
+ Detect or predict signal peptides (and typically predict subcellular localisation) of bacterial proteins.
+
+
+ Protein signal peptide detection (bacteria)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Predict MHC class I or class II binding peptides, promiscuous binding peptides, immunogenicity etc.
+
+
+ MHC peptide immunogenicity prediction
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Predict, recognise and identify positional features in protein sequences such as functional sites or regions and secondary structure.
+
+ Methods typically involve scanning for known motifs, patterns and regular expressions.
+ Protein feature prediction (from sequence)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict, recognise and identify features in nucleotide sequences such as functional sites or regions, typically by scanning for known motifs, patterns and regular expressions.
+ Sequence feature detection (nucleic acid)
+ Nucleic acid feature prediction
+ Nucleic acid feature recognition
+ Nucleic acid site detection
+ Nucleic acid site prediction
+ Nucleic acid site recognition
+
+
+ Methods typically involve scanning for known motifs, patterns and regular expressions.
+ This is placeholder but does not comprehensively include all child concepts - please inspect other concepts under "Nucleic acid sequence analysis" for example "Gene prediction", for other feature detection operations.
+ Nucleic acid feature detection
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict antigenic determinant sites (epitopes) in protein sequences.
+ Antibody epitope prediction
+ Epitope prediction
+ B cell epitope mapping
+ B cell epitope prediction
+ Epitope mapping (MHC Class I)
+ Epitope mapping (MHC Class II)
+ T cell epitope mapping
+ T cell epitope prediction
+
+
+ Epitope mapping is commonly done during vaccine design.
+ Epitope mapping
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict post-translation modification sites in protein sequences.
+ PTM analysis
+ PTM prediction
+ PTM site analysis
+ PTM site prediction
+ Post-translation modification site prediction
+ Post-translational modification analysis
+ Protein post-translation modification site prediction
+ Acetylation prediction
+ Acetylation site prediction
+ Dephosphorylation prediction
+ Dephosphorylation site prediction
+ GPI anchor prediction
+ GPI anchor site prediction
+ GPI modification prediction
+ GPI modification site prediction
+ Glycosylation prediction
+ Glycosylation site prediction
+ Hydroxylation prediction
+ Hydroxylation site prediction
+ Methylation prediction
+ Methylation site prediction
+ N-myristoylation prediction
+ N-myristoylation site prediction
+ N-terminal acetylation prediction
+ N-terminal acetylation site prediction
+ N-terminal myristoylation prediction
+ N-terminal myristoylation site prediction
+ Palmitoylation prediction
+ Palmitoylation site prediction
+ Phosphoglycerylation prediction
+ Phosphoglycerylation site prediction
+ Phosphorylation prediction
+ Phosphorylation site prediction
+ Phosphosite localization
+ Prenylation prediction
+ Prenylation site prediction
+ Pupylation prediction
+ Pupylation site prediction
+ S-nitrosylation prediction
+ S-nitrosylation site prediction
+ S-sulfenylation prediction
+ S-sulfenylation site prediction
+ Succinylation prediction
+ Succinylation site prediction
+ Sulfation prediction
+ Sulfation site prediction
+ Sumoylation prediction
+ Sumoylation site prediction
+ Tyrosine nitration prediction
+ Tyrosine nitration site prediction
+ Ubiquitination prediction
+ Ubiquitination site prediction
+
+
+ Methods might predict sites of methylation, N-terminal myristoylation, N-terminal acetylation, sumoylation, palmitoylation, phosphorylation, sulfation, glycosylation, glycosylphosphatidylinositol (GPI) modification sites (GPI lipid anchor signals) etc.
+ Post-translational modification site prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Detect or predict signal peptides and signal peptide cleavage sites in protein sequences.
+
+
+ Methods might use sequence motifs and features, amino acid composition, profiles, machine-learned classifiers, etc.
+ Protein signal peptide detection
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Predict catalytic residues, active sites or other ligand-binding sites in protein sequences.
+
+
+ Protein binding site prediction (from sequence)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict or detect RNA and DNA-binding binding sites in protein sequences.
+ Protein-nucleic acid binding detection
+ Protein-nucleic acid binding prediction
+ Protein-nucleic acid binding site detection
+ Protein-nucleic acid binding site prediction
+ Zinc finger prediction
+
+
+ This includes methods that predict and optimise zinc finger protein domains for DNA/RNA binding (for example for transcription factors and nucleases).
+ Nucleic acids-binding site prediction
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.20
+
+ Predict protein sites that are key to protein folding, such as possible sites of nucleation or stabilisation.
+
+
+ Protein folding site prediction
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Detect or predict cleavage sites (enzymatic or chemical) in protein sequences.
+
+
+ Protein cleavage site prediction
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Predict epitopes that bind to MHC class I molecules.
+
+
+ Epitope mapping (MHC Class I)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Predict epitopes that bind to MHC class II molecules.
+
+
+ Epitope mapping (MHC Class II)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Detect, predict and identify whole gene structure in DNA sequences. This includes protein coding regions, exon-intron structure, regulatory regions etc.
+
+
+ Whole gene prediction
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Detect, predict and identify genetic elements such as promoters, coding regions, splice sites, etc in DNA sequences.
+
+
+ Methods for gene prediction might be ab initio, based on phylogenetic comparisons, use motifs, sequence features, support vector machine, alignment etc.
+ Gene component prediction
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Detect or predict transposons, retrotransposons / retrotransposition signatures etc.
+
+
+ Transposon prediction
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Detect polyA signals in nucleotide sequences.
+ PolyA detection
+ PolyA prediction
+ PolyA signal prediction
+ Polyadenylation signal detection
+ Polyadenylation signal prediction
+
+
+ PolyA signal detection
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Detect quadruplex-forming motifs in nucleotide sequences.
+ Quadruplex structure prediction
+
+
+ Quadruplex (4-stranded) structures are formed by guanine-rich regions and are implicated in various important biological processes and as therapeutic targets.
+ Quadruplex formation site detection
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Find CpG rich regions in a nucleotide sequence or isochores in genome sequences.
+ CpG island and isochores detection
+ CpG island and isochores rendering
+
+
+ An isochore is long region (> 3 KB) of DNA with very uniform GC content, in contrast to the rest of the genome. Isochores tend tends to have more genes, higher local melting or denaturation temperatures, and different flexibility. Methods might calculate fractional GC content or variation of GC content, predict methylation status of CpG islands etc. This includes methods that visualise CpG rich regions in a nucleotide sequence, for example plot isochores in a genome sequence.
+ CpG island and isochore detection
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Find and identify restriction enzyme cleavage sites (restriction sites) in (typically) DNA sequences, for example to generate a restriction map.
+
+
+ Restriction site recognition
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identify or predict nucleosome exclusion sequences (nucleosome free regions) in DNA.
+ Nucleosome exclusion sequence prediction
+ Nucleosome formation sequence prediction
+
+
+ Nucleosome position prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identify, predict or analyse splice sites in nucleotide sequences.
+ Splice prediction
+
+
+ Methods might require a pre-mRNA or genomic DNA sequence.
+ Splice site prediction
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.19
+
+ Predict whole gene structure using a combination of multiple methods to achieve better predictions.
+
+
+ Integrated gene prediction
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Find operons (operators, promoters and genes) in bacteria genes.
+
+
+ Operon prediction
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict protein-coding regions (CDS or exon) or open reading frames in nucleotide sequences.
+ ORF finding
+ ORF prediction
+
+
+ Coding region prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict selenocysteine insertion sequence (SECIS) in a DNA sequence.
+ Selenocysteine insertion sequence (SECIS) prediction
+
+
+ SECIS elements are around 60 nucleotides in length with a stem-loop structure directs the cell to translate UGA codons as selenocysteines.
+ SECIS element prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identify or predict transcriptional regulatory motifs, patterns, elements or regions in DNA sequences.
+ Regulatory element prediction
+ Transcription regulatory element prediction
+ Conserved transcription regulatory sequence identification
+ Translational regulatory element prediction
+
+
+ This includes comparative genomics approaches that identify common, conserved (homologous) or synonymous transcriptional regulatory elements. For example cross-species comparison of transcription factor binding sites (TFBS). Methods might analyse co-regulated or co-expressed genes, or sets of oppositely expressed genes.
+ This includes promoters, enhancers, silencers and boundary elements / insulators, regulatory protein or transcription factor binding sites etc. Methods might be specific to a particular genome and use motifs, word-based / grammatical methods, position-specific frequency matrices, discriminative pattern analysis etc.
+ Transcriptional regulatory element prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict translation initiation sites, possibly by searching a database of sites.
+
+
+ Translation initiation site prediction
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identify or predict whole promoters or promoter elements (transcription start sites, RNA polymerase binding site, transcription factor binding sites, promoter enhancers etc) in DNA sequences.
+
+
+ Methods might recognize CG content, CpG islands, splice sites, polyA signals etc.
+ Promoter prediction
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identify, predict or analyse cis-regulatory elements in DNA sequences (TATA box, Pribnow box, SOS box, CAAT box, CCAAT box, operator etc.) or in RNA sequences (e.g. riboswitches).
+ Transcriptional regulatory element prediction (DNA-cis)
+ Transcriptional regulatory element prediction (RNA-cis)
+
+
+ Cis-regulatory elements (cis-elements) regulate the expression of genes located on the same strand from which the element was transcribed. Cis-elements are found in the 5' promoter region of the gene, in an intron, or in the 3' untranslated region. Cis-elements are often binding sites of one or more trans-acting factors. They also occur in RNA sequences, e.g. a riboswitch is a region of an mRNA molecule that bind a small target molecule that regulates the gene's activity.
+ cis-regulatory element prediction
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.19
+
+ Identify, predict or analyse cis-regulatory elements (for example riboswitches) in RNA sequences.
+
+
+ Transcriptional regulatory element prediction (RNA-cis)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identify or predict functional RNA sequences with a gene regulatory role (trans-regulatory elements) or targets.
+ Functional RNA identification
+ Transcriptional regulatory element prediction (trans)
+
+
+ Trans-regulatory elements regulate genes distant from the gene from which they were transcribed.
+ trans-regulatory element prediction
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identify matrix/scaffold attachment regions (MARs/SARs) in DNA sequences.
+ MAR/SAR prediction
+ Matrix/scaffold attachment site prediction
+
+
+ MAR/SAR sites often flank a gene or gene cluster and are found nearby cis-regulatory sequences. They might contribute to transcription regulation.
+ S/MAR prediction
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identify or predict transcription factor binding sites in DNA sequences.
+
+
+ Transcription factor binding site prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identify or predict exonic splicing enhancers (ESE) in exons.
+
+
+ An exonic splicing enhancer (ESE) is 6-base DNA sequence motif in an exon that enhances or directs splicing of pre-mRNA or hetero-nuclear RNA (hnRNA) into mRNA.
+ Exonic splicing enhancer prediction
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Evaluate molecular sequence alignment accuracy.
+ Sequence alignment quality evaluation
+
+
+ Evaluation might be purely sequence-based or use structural information.
+ Sequence alignment validation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse character conservation in a molecular sequence alignment, for example to derive a consensus sequence.
+ Residue conservation analysis
+
+
+ Use this concept for methods that calculate substitution rates, estimate relative site variability, identify sites with biased properties, derive a consensus sequence, or identify highly conserved or very poorly conserved sites, regions, blocks etc.
+ Sequence alignment analysis (conservation)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse correlations between sites in a molecular sequence alignment.
+
+
+ This is typically done to identify possible covarying positions and predict contacts or structural constraints in protein structures.
+ Sequence alignment analysis (site correlation)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Detects chimeric sequences (chimeras) from a sequence alignment.
+ Chimeric sequence detection
+
+
+ A chimera includes regions from two or more phylogenetically distinct sequences. They are usually artifacts of PCR and are thought to occur when a prematurely terminated amplicon reanneals to another DNA strand and is subsequently copied to completion in later PCR cycles.
+ Chimera detection
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Detect recombination (hotspots and coldspots) and identify recombination breakpoints in a sequence alignment.
+ Sequence alignment analysis (recombination detection)
+
+
+ Tools might use a genetic algorithm, quartet-mapping, bootscanning, graphical methods, random forest model and so on.
+ Recombination detection
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identify insertion, deletion and duplication events from a sequence alignment.
+ Indel discovery
+ Sequence alignment analysis (indel detection)
+
+
+ Tools might use a genetic algorithm, quartet-mapping, bootscanning, graphical methods, random forest model and so on.
+ Indel detection
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Predict nucleosome formation potential of DNA sequences.
+
+ Nucleosome formation potential prediction
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate a thermodynamic property of DNA or DNA/RNA, such as melting temperature, enthalpy and entropy.
+
+
+ Nucleic acid thermodynamic property calculation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate and plot a DNA or DNA/RNA melting profile.
+
+
+ A melting profile is used to visualise and analyse partly melted DNA conformations.
+ Nucleic acid melting profile plotting
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate and plot a DNA or DNA/RNA stitch profile.
+
+
+ A stitch profile represents the alternative conformations that partly melted DNA can adopt in a temperature range.
+ Nucleic acid stitch profile plotting
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate and plot a DNA or DNA/RNA melting curve.
+
+
+ Nucleic acid melting curve plotting
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate and plot a DNA or DNA/RNA probability profile.
+
+
+ Nucleic acid probability profile plotting
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate and plot a DNA or DNA/RNA temperature profile.
+
+
+ Nucleic acid temperature profile plotting
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate curvature and flexibility / stiffness of a nucleotide sequence.
+
+
+ This includes properties such as.
+ Nucleic acid curvature calculation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identify or predict microRNA sequences (miRNA) and precursors or microRNA targets / binding sites in a DNA sequence.
+ miRNA prediction
+ microRNA detection
+ microRNA target detection
+
+
+ miRNA target prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identify or predict tRNA genes in genomic sequences (tRNA).
+
+
+ tRNA gene prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Assess binding specificity of putative siRNA sequence(s), for example for a functional assay, typically with respect to designing specific siRNA sequences.
+
+
+ siRNA binding specificity prediction
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.18
+
+ Predict secondary structure of protein sequence(s) using multiple methods to achieve better predictions.
+
+
+ Protein secondary structure prediction (integrated)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict helical secondary structure of protein sequences.
+
+
+ Protein secondary structure prediction (helices)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict turn structure (for example beta hairpin turns) of protein sequences.
+
+
+ Protein secondary structure prediction (turns)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict open coils, non-regular secondary structure and intrinsically disordered / unstructured regions of protein sequences.
+
+
+ Protein secondary structure prediction (coils)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict cysteine bonding state and disulfide bond partners in protein sequences.
+
+
+ Disulfide bond prediction
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Not sustainable to have protein type-specific concepts.
+ 1.19
+
+ Predict G protein-coupled receptors (GPCR).
+
+
+ GPCR prediction
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Not sustainable to have protein type-specific concepts.
+ 1.19
+
+ Analyse G-protein coupled receptor proteins (GPCRs).
+
+
+ GPCR analysis
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict tertiary structure (backbone and side-chain conformation) of protein sequences.
+ Protein folding pathway prediction
+
+
+ This includes methods that predict the folding pathway(s) or non-native structural intermediates of a protein.
+ Protein structure prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict structure of DNA or RNA.
+
+
+ Methods might identify thermodynamically stable or evolutionarily conserved structures.
+ Nucleic acid structure prediction
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict tertiary structure of protein sequence(s) without homologs of known structure.
+ de novo structure prediction
+
+
+ Ab initio structure prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Build a three-dimensional protein model based on known (for example homologs) structures.
+ Comparative modelling
+ Homology modelling
+ Homology structure modelling
+ Protein structure comparative modelling
+
+
+ The model might be of a whole, part or aspect of protein structure. Molecular modelling methods might use sequence-structure alignment, structural templates, molecular dynamics, energy minimisation etc.
+ Protein modelling
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Model the structure of a protein in complex with a small molecule or another macromolecule.
+ Docking simulation
+ Macromolecular docking
+
+
+ This includes protein-protein interactions, protein-nucleic acid, protein-ligand binding etc. Methods might predict whether the molecules are likely to bind in vivo, their conformation when bound, the strength of the interaction, possible mutations to achieve bonding and so on.
+ Molecular docking
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Model protein backbone conformation.
+ Protein modelling (backbone)
+ Design optimization
+ Epitope grafting
+ Scaffold search
+ Scaffold selection
+
+
+ Methods might require a preliminary C(alpha) trace.
+ Scaffold selection, scaffold search, epitope grafting and design optimization are stages of backbone modelling done during rational vaccine design.
+ Backbone modelling
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Model, analyse or edit amino acid side chain conformation in protein structure, optimize side-chain packing, hydrogen bonding etc.
+ Protein modelling (side chains)
+ Antibody optimisation
+ Antigen optimisation
+ Antigen resurfacing
+ Rotamer likelihood prediction
+
+
+ Antibody optimisation is to optimize the antibody-interacting surface of the antigen (epitope). Antigen optimisation is to optimize the antigen-interacting surface of the antibody (paratope). Antigen resurfacing is to resurface the antigen by varying the sequence of non-epitope regions.
+ Methods might use a residue rotamer library.
+ This includes rotamer likelihood prediction: the prediction of rotamer likelihoods for all 20 amino acid types at each position in a protein structure, where output typically includes, for each residue position, the likelihoods for the 20 amino acid types with estimated reliability of the 20 likelihoods.
+ Side chain modelling
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Model loop conformation in protein structures.
+ Protein loop modelling
+ Protein modelling (loops)
+
+
+ Loop modelling
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Model protein-ligand (for example protein-peptide) binding using comparative modelling or other techniques.
+ Ligand-binding simulation
+ Protein-peptide docking
+
+
+ Methods aim to predict the position and orientation of a ligand bound to a protein receptor or enzyme.
+ Virtual screening is used in drug discovery to search libraries of small molecules in order to identify those molecules which are most likely to bind to a drug target (typically a protein receptor or enzyme).
+ Protein-ligand docking
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict or optimise RNA sequences (sequence pools) with likely secondary and tertiary structure for in vitro selection.
+ Nucleic acid folding family identification
+ Structured RNA prediction and optimisation
+
+
+ RNA inverse folding
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Find single nucleotide polymorphisms (SNPs) - single nucleotide change in base positions - between sequences. Typically done for sequences from a high-throughput sequencing experiment that differ from a reference genome and which might, especially by reference to population frequency or functional data, indicate a polymorphism.
+ SNP calling
+ SNP discovery
+ Single nucleotide polymorphism detection
+
+
+ This includes functional SNPs for large-scale genotyping purposes, disease-associated non-synonymous SNPs etc.
+ SNP detection
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Generate a physical (radiation hybrid) map of genetic markers in a DNA sequence using provided radiation hybrid (RH) scores for one or more markers.
+
+
+ Radiation Hybrid Mapping
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Map the genetic architecture of dynamic complex traits.
+
+ This can involve characterisation of the underlying quantitative trait loci (QTLs) or nucleotides (QTNs).
+ Functional mapping
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Infer haplotypes, either alleles at multiple loci that are transmitted together on the same chromosome, or a set of single nucleotide polymorphisms (SNPs) on a single chromatid that are statistically associated.
+ Haplotype inference
+ Haplotype map generation
+ Haplotype reconstruction
+
+
+ Haplotype inference can help in population genetic studies and the identification of complex disease genes, , and is typically based on aligned single nucleotide polymorphism (SNP) fragments. Haplotype comparison is a useful way to characterize the genetic variation between individuals. An individual's haplotype describes which nucleotide base occurs at each position for a set of common SNPs. Tools might use combinatorial functions (for example parsimony) or a likelihood function or model with optimisation such as minimum error correction (MEC) model, expectation-maximisation algorithm (EM), genetic algorithm or Markov chain Monte Carlo (MCMC).
+ Haplotype mapping
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate linkage disequilibrium; the non-random association of alleles or polymorphisms at two or more loci (not necessarily on the same chromosome).
+
+
+ Linkage disequilibrium is identified where a combination of alleles (or genetic markers) occurs more or less frequently in a population than expected by chance formation of haplotypes.
+ Linkage disequilibrium calculation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict genetic code from analysis of codon usage data.
+
+
+ Genetic code prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Render a representation of a distribution that consists of group of data points plotted on a simple scale.
+ Categorical plot plotting
+ Dotplot plotting
+
+
+ Dot plots are useful when having not too many (e.g. 20) data points for each category. Example: draw a dotplot of sequence similarities identified from word-matching or character comparison.
+ Dot plot plotting
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Align exactly two molecular sequences.
+ Pairwise alignment
+
+
+ Methods might perform one-to-one, one-to-many or many-to-many comparisons.
+ Pairwise sequence alignment
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Align more than two molecular sequences.
+ Multiple alignment
+
+
+ This includes methods that use an existing alignment, for example to incorporate sequences into an alignment, or combine several multiple alignments into a single, improved alignment.
+ Multiple sequence alignment
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+
+ Locally align exactly two molecular sequences.
+
+ Local alignment methods identify regions of local similarity.
+ Pairwise sequence alignment generation (local)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+
+ Globally align exactly two molecular sequences.
+
+ Global alignment methods identify similarity across the entire length of the sequences.
+ Pairwise sequence alignment generation (global)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Locally align two or more molecular sequences.
+ Local sequence alignment
+ Sequence alignment (local)
+ Smith-Waterman
+
+
+ Local alignment methods identify regions of local similarity.
+ Local alignment
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Globally align two or more molecular sequences.
+ Global sequence alignment
+ Sequence alignment (global)
+
+
+ Global alignment methods identify similarity across the entire length of the sequences.
+ Global alignment
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.19
+
+ Align two or more molecular sequences with user-defined constraints.
+
+
+ Constrained sequence alignment
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.16
+
+ Align two or more molecular sequences using multiple methods to achieve higher quality.
+
+
+ Consensus-based sequence alignment
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Align multiple sequences using relative gap costs calculated from neighbors in a supplied phylogenetic tree.
+ Multiple sequence alignment (phylogenetic tree-based)
+ Multiple sequence alignment construction (phylogenetic tree-based)
+ Phylogenetic tree-based multiple sequence alignment construction
+ Sequence alignment (phylogenetic tree-based)
+ Sequence alignment generation (phylogenetic tree-based)
+
+
+ This is supposed to give a more biologically meaningful alignment than standard alignments.
+ Tree-based sequence alignment
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Align molecular secondary structure (represented as a 1D string).
+
+ Secondary structure alignment generation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.18
+
+ Align protein secondary structures.
+
+
+ Protein secondary structure alignment generation
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Align RNA secondary structures.
+ RNA secondary structure alignment construction
+ RNA secondary structure alignment generation
+ Secondary structure alignment construction (RNA)
+
+
+ RNA secondary structure alignment
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Align (superimpose) exactly two molecular tertiary structures.
+ Structure alignment (pairwise)
+ Pairwise protein structure alignment
+
+
+ Pairwise structure alignment
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Align (superimpose) more than two molecular tertiary structures.
+ Structure alignment (multiple)
+ Multiple protein structure alignment
+
+
+ This includes methods that use an existing alignment.
+ Multiple structure alignment
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Align protein tertiary structures.
+
+ Structure alignment (protein)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Align RNA tertiary structures.
+
+ Structure alignment (RNA)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+
+ Locally align (superimpose) exactly two molecular tertiary structures.
+
+ Local alignment methods identify regions of local similarity, common substructures etc.
+ Pairwise structure alignment generation (local)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+
+ Globally align (superimpose) exactly two molecular tertiary structures.
+
+ Global alignment methods identify similarity across the entire structures.
+ Pairwise structure alignment generation (global)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Locally align (superimpose) two or more molecular tertiary structures.
+ Structure alignment (local)
+ Local protein structure alignment
+
+
+ Local alignment methods identify regions of local similarity, common substructures etc.
+ Local structure alignment
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Globally align (superimpose) two or more molecular tertiary structures.
+ Structure alignment (global)
+ Global protein structure alignment
+
+
+ Global alignment methods identify similarity across the entire structures.
+ Global structure alignment
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.16
+
+
+
+ Align exactly two molecular profiles.
+
+ Methods might perform one-to-one, one-to-many or many-to-many comparisons.
+ Profile-profile alignment (pairwise)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+
+ Align two or more molecular profiles.
+
+ Sequence alignment generation (multiple profile)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.16
+
+
+
+
+ Align exactly two molecular Structural (3D) profiles.
+
+ 3D profile-to-3D profile alignment (pairwise)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+
+
+ Align two or more molecular 3D profiles.
+
+ Structural profile alignment generation (multiple)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Search and retrieve names of or documentation on bioinformatics tools, for example by keyword or which perform a particular function.
+
+ Data retrieval (tool metadata)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Search and retrieve names of or documentation on bioinformatics databases or query terms, for example by keyword.
+
+ Data retrieval (database metadata)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Predict primers for large scale sequencing.
+
+
+ PCR primer design (for large scale sequencing)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Predict primers for genotyping polymorphisms, for example single nucleotide polymorphisms (SNPs).
+
+
+ PCR primer design (for genotyping polymorphisms)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Predict primers for gene transcription profiling.
+
+
+ PCR primer design (for gene transcription profiling)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Predict primers that are conserved across multiple genomes or species.
+
+
+ PCR primer design (for conserved primers)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Predict primers based on gene structure.
+
+
+ PCR primer design (based on gene structure)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Predict primers for methylation PCRs.
+
+
+ PCR primer design (for methylation PCRs)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Sequence assembly by combining fragments using an existing backbone sequence, typically a reference genome.
+ Sequence assembly (mapping assembly)
+
+
+ The final sequence will resemble the backbone sequence. Mapping assemblers are usually much faster and less memory intensive than de-novo assemblers.
+ Mapping assembly
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Sequence assembly by combining fragments without the aid of a reference sequence or genome.
+ De Bruijn graph
+ Sequence assembly (de-novo assembly)
+
+
+ De-novo assemblers are much slower and more memory intensive than mapping assemblers.
+ De-novo assembly
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The process of assembling many short DNA sequences together such that they represent the original chromosomes from which the DNA originated.
+ Genomic assembly
+ Sequence assembly (genome assembly)
+ Breakend assembly
+
+
+ Genome assembly
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Sequence assembly for EST sequences (transcribed mRNA).
+ Sequence assembly (EST assembly)
+
+
+ Assemblers must handle (or be complicated by) alternative splicing, trans-splicing, single-nucleotide polymorphism (SNP), recoding, and post-transcriptional modification.
+ EST assembly
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Make sequence tag to gene assignments (tag mapping) of SAGE, MPSS and SBS data.
+ Tag to gene assignment
+
+
+ Sequence tag mapping assigns experimentally obtained sequence tags to known transcripts or annotate potential virtual sequence tags in a genome.
+ Sequence tag mapping
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Process (read and / or write) serial analysis of gene expression (SAGE) data.
+
+ SAGE data processing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Process (read and / or write) massively parallel signature sequencing (MPSS) data.
+
+ MPSS data processing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Process (read and / or write) sequencing by synthesis (SBS) data.
+
+ SBS data processing
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Generate a heat map of expression data from e.g. microarray data.
+ Heat map construction
+ Heatmap generation
+
+
+ The heat map usually uses a coloring scheme to represent expression values. They can show how quantitative measurements were influenced by experimental conditions.
+ Heat map generation
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Analyse one or more gene expression profiles, typically to interpret them in functional terms.
+
+ Gene expression profile analysis
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Map an expression profile to known biological pathways, for example, to identify or reconstruct a pathway.
+ Pathway mapping
+ Gene expression profile pathway mapping
+ Gene to pathway mapping
+ Gene-to-pathway mapping
+
+
+ Expression profile pathway mapping
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.18
+
+ Assign secondary structure from protein coordinate data.
+
+
+ Protein secondary structure assignment (from coordinate data)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.18
+
+ Assign secondary structure from circular dichroism (CD) spectroscopic data.
+
+
+ Protein secondary structure assignment (from CD data)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.7
+
+ Assign a protein tertiary structure (3D coordinates) from raw X-ray crystallography data.
+
+
+ Protein structure assignment (from X-ray crystallographic data)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.7
+
+ Assign a protein tertiary structure (3D coordinates) from raw NMR spectroscopy data.
+
+
+ Protein structure assignment (from NMR data)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Construct a phylogenetic tree from a specific type of data.
+ Phylogenetic tree construction (data centric)
+ Phylogenetic tree generation (data centric)
+
+
+ Subconcepts of this concept reflect different types of data used to generate a tree, and provide an alternate axis for curation.
+ Phylogenetic inference (data centric)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Construct a phylogenetic tree using a specific method.
+ Phylogenetic tree construction (method centric)
+ Phylogenetic tree generation (method centric)
+
+
+ Subconcepts of this concept reflect different computational methods used to generate a tree, and provide an alternate axis for curation.
+ Phylogenetic inference (method centric)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Phylogenetic tree construction from molecular sequences.
+ Phylogenetic tree construction (from molecular sequences)
+ Phylogenetic tree generation (from molecular sequences)
+
+
+ Methods typically compare multiple molecular sequence and estimate evolutionary distances and relationships to infer gene families or make functional predictions.
+ Phylogenetic inference (from molecular sequences)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Phylogenetic tree construction from continuous quantitative character data.
+ Phylogenetic tree construction (from continuous quantitative characters)
+ Phylogenetic tree generation (from continuous quantitative characters)
+
+
+ Phylogenetic inference (from continuous quantitative characters)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Phylogenetic tree construction from gene frequency data.
+ Phylogenetic tree construction (from gene frequencies)
+ Phylogenetic tree generation (from gene frequencies)
+
+
+ Phylogenetic inference (from gene frequencies)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Phylogenetic tree construction from polymorphism data including microsatellites, RFLP (restriction fragment length polymorphisms), RAPD (random-amplified polymorphic DNA) and AFLP (amplified fragment length polymorphisms) data.
+ Phylogenetic tree construction (from polymorphism data)
+ Phylogenetic tree generation (from polymorphism data)
+
+
+ Phylogenetic inference (from polymorphism data)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Construct a phylogenetic species tree, for example, from a genome-wide sequence comparison.
+ Phylogenetic species tree construction
+ Phylogenetic species tree generation
+
+
+ Species tree construction
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Construct a phylogenetic tree by computing a sequence alignment and searching for the tree with the fewest number of character-state changes from the alignment.
+ Phylogenetic tree construction (parsimony methods)
+ Phylogenetic tree generation (parsimony methods)
+
+
+ This includes evolutionary parsimony (invariants) methods.
+ Phylogenetic inference (parsimony methods)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Construct a phylogenetic tree by computing (or using precomputed) distances between sequences and searching for the tree with minimal discrepancies between pairwise distances.
+ Phylogenetic tree construction (minimum distance methods)
+ Phylogenetic tree generation (minimum distance methods)
+
+
+ This includes neighbor joining (NJ) clustering method.
+ Phylogenetic inference (minimum distance methods)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Construct a phylogenetic tree by relating sequence data to a hypothetical tree topology using a model of sequence evolution.
+ Phylogenetic tree construction (maximum likelihood and Bayesian methods)
+ Phylogenetic tree generation (maximum likelihood and Bayesian methods)
+
+
+ Maximum likelihood methods search for a tree that maximizes a likelihood function, i.e. that is most likely given the data and model. Bayesian analysis estimate the probability of tree for branch lengths and topology, typically using a Monte Carlo algorithm.
+ Phylogenetic inference (maximum likelihood and Bayesian methods)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Construct a phylogenetic tree by computing four-taxon trees (4-trees) and searching for the phylogeny that matches most closely.
+ Phylogenetic tree construction (quartet methods)
+ Phylogenetic tree generation (quartet methods)
+
+
+ Phylogenetic inference (quartet methods)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Construct a phylogenetic tree by using artificial-intelligence methods, for example genetic algorithms.
+ Phylogenetic tree construction (AI methods)
+ Phylogenetic tree generation (AI methods)
+
+
+ Phylogenetic inference (AI methods)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identify a plausible model of DNA substitution that explains a molecular (DNA or protein) sequence alignment.
+ Nucleotide substitution modelling
+
+
+ DNA substitution modelling
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse the shape (topology) of a phylogenetic tree.
+ Phylogenetic tree analysis (shape)
+
+
+ Phylogenetic tree topology analysis
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Apply bootstrapping or other measures to estimate confidence of a phylogenetic tree.
+
+
+ Phylogenetic tree bootstrapping
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Construct a "gene tree" which represents the evolutionary history of the genes included in the study. This can be used to predict families of genes and gene function based on their position in a phylogenetic tree.
+ Phylogenetic tree analysis (gene family prediction)
+
+
+ Gene trees can provide evidence for gene duplication events, as well as speciation events. Where sequences from different homologs are included in a gene tree, subsequent clustering of the orthologs can demonstrate evolutionary history of the orthologs.
+ Gene tree construction
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse a phylogenetic tree to identify allele frequency distribution and change that is subject to evolutionary pressures (natural selection, genetic drift, mutation and gene flow). Identify type of natural selection (such as stabilizing, balancing or disruptive).
+ Phylogenetic tree analysis (natural selection)
+
+
+ Stabilizing/purifying (directional) selection favors a single phenotype and tends to decrease genetic diversity as a population stabilizes on a particular trait, selecting out trait extremes or deleterious mutations. In contrast, balancing selection maintain genetic polymorphisms (or multiple alleles), whereas disruptive (or diversifying) selection favors individuals at both extremes of a trait.
+ Allele frequency distribution analysis
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Compare two or more phylogenetic trees to produce a consensus tree.
+ Phylogenetic tree construction (consensus)
+ Phylogenetic tree generation (consensus)
+
+
+ Methods typically test for topological similarity between trees using for example a congruence index.
+ Consensus tree construction
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Compare two or more phylogenetic trees to detect subtrees or supertrees.
+ Phylogenetic sub/super tree detection
+ Subtree construction
+ Supertree construction
+
+
+ Phylogenetic sub/super tree construction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Compare two or more phylogenetic trees to calculate distances between trees.
+
+
+ Phylogenetic tree distances calculation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Annotate a phylogenetic tree with terms from a controlled vocabulary.
+
+
+ Phylogenetic tree annotation
+ http://www.evolutionaryontology.org/cdao.owl#CDAOAnnotation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Predict and optimise peptide ligands that elicit an immunological response.
+
+
+ Immunogenicity prediction
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict or optimise DNA to elicit (via DNA vaccination) an immunological response.
+
+
+ DNA vaccine design
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Reformat (a file or other report of) molecular sequence(s).
+
+
+ Sequence formatting
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Reformat (a file or other report of) molecular sequence alignment(s).
+
+
+ Sequence alignment formatting
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Reformat a codon usage table.
+
+
+ Codon usage table formatting
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Visualise, format or render a molecular sequence or sequences such as a sequence alignment, possibly with sequence features or properties shown.
+ Sequence rendering
+ Sequence alignment visualisation
+
+
+ Sequence visualisation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.15
+
+ Visualise, format or print a molecular sequence alignment.
+
+
+ Sequence alignment visualisation
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Visualise, format or render sequence clusters.
+ Sequence cluster rendering
+
+
+ Sequence cluster visualisation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Render or visualise a phylogenetic tree.
+ Phylogenetic tree rendering
+
+
+ Phylogenetic tree visualisation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.15
+
+ Visualise RNA secondary structure, knots, pseudoknots etc.
+
+
+ RNA secondary structure visualisation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.15
+
+ Render and visualise protein secondary structure.
+
+
+ Protein secondary structure visualisation
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Visualise or render molecular 3D structure, for example a high-quality static picture or animation.
+ Structure rendering
+ Protein secondary structure visualisation
+ RNA secondary structure visualisation
+
+
+ This includes visualisation of protein secondary structure such as knots, pseudoknots etc. as well as tertiary and quaternary structure.
+ Structure visualisation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Visualise microarray or other expression data.
+ Expression data rendering
+ Gene expression data visualisation
+ Microarray data rendering
+
+
+ Expression data visualisation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.19
+
+ Identify and analyse networks of protein interactions.
+
+
+ Protein interaction network visualisation
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Draw or visualise a DNA map.
+ DNA map drawing
+ Map rendering
+
+
+ Map drawing
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Render a sequence with motifs.
+
+ Sequence motif rendering
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Draw or visualise restriction maps in DNA sequences.
+
+
+ Restriction map drawing
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Draw a linear maps of DNA.
+
+ DNA linear map rendering
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ DNA circular map rendering
+ Draw a circular maps of DNA, for example a plasmid map.
+
+
+ Plasmid map drawing
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Visualise operon structure etc.
+ Operon rendering
+
+
+ Operon drawing
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identify folding families of related RNAs.
+
+ Nucleic acid folding family identification
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.20
+
+ Compute energies of nucleic acid folding, e.g. minimum folding energies for DNA or RNA sequences or energy landscape of RNA mutants.
+
+
+ Nucleic acid folding energy calculation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Retrieve existing annotation (or documentation), typically annotation on a database entity.
+
+ Use this concepts for tools which retrieve pre-existing annotations, not for example prediction methods that might make annotations.
+ Annotation retrieval
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict the biological or biochemical role of a protein, or other aspects of a protein function.
+ Protein function analysis
+ Protein functional analysis
+
+
+ For functional properties that can be mapped to a sequence, use 'Sequence feature detection (protein)' instead.
+ Protein function prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Compare the functional properties of two or more proteins.
+
+
+ Protein function comparison
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Submit a molecular sequence to a database.
+
+ Sequence submission
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse a known network of gene regulation.
+ Gene regulatory network comparison
+ Gene regulatory network modelling
+ Regulatory network comparison
+ Regulatory network modelling
+
+
+ Gene regulatory network analysis
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ WHATIF:UploadPDB
+ Parse, prepare or load a user-specified data file so that it is available for use.
+ Data loading
+ Loading
+
+
+ Parsing
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Query a sequence data resource (typically a database) and retrieve sequences and / or annotation.
+
+ This includes direct retrieval methods (e.g. the dbfetch program) but not those that perform calculations on the sequence.
+ Sequence retrieval
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ WHATIF:DownloadPDB
+ WHATIF:EchoPDB
+ Query a tertiary structure data resource (typically a database) and retrieve structures, structure-related data and annotation.
+
+ This includes direct retrieval methods but not those that perform calculations on the sequence or structure.
+ Structure retrieval
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ WHATIF:GetSurfaceDots
+ Calculate the positions of dots that are homogeneously distributed over the surface of a molecule.
+
+
+ A dot has three coordinates (x,y,z) and (typically) a color.
+ Surface rendering
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Calculate the solvent accessibility ('accessible surface') for each atom in a structure.
+
+
+ Waters are not considered.
+ Protein atom surface calculation (accessible)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Calculate the solvent accessibility ('accessible molecular surface') for each atom in a structure.
+
+
+ Waters are not considered.
+ Protein atom surface calculation (accessible molecular)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Calculate the solvent accessibility ('accessible surface') for each residue in a structure.
+
+
+ Solvent accessibility might be calculated for the backbone, sidechain and total (backbone plus sidechain).
+ Protein residue surface calculation (accessible)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Calculate the solvent accessibility ('vacuum accessible surface') for each residue in a structure. This is the accessibility of the residue when taken out of the protein together with the backbone atoms of any residue it is covalently bound to.
+
+
+ Solvent accessibility might be calculated for the backbone, sidechain and total (backbone plus sidechain).
+ Protein residue surface calculation (vacuum accessible)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Calculate the solvent accessibility ('accessible molecular surface') for each residue in a structure.
+
+
+ Solvent accessibility might be calculated for the backbone, sidechain and total (backbone plus sidechain).
+ Protein residue surface calculation (accessible molecular)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Calculate the solvent accessibility ('vacuum molecular surface') for each residue in a structure. This is the accessibility of the residue when taken out of the protein together with the backbone atoms of any residue it is covalently bound to.
+
+
+ Solvent accessibility might be calculated for the backbone, sidechain and total (backbone plus sidechain).
+ Protein residue surface calculation (vacuum molecular)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Calculate the solvent accessibility ('accessible molecular surface') for a structure as a whole.
+
+
+ Protein surface calculation (accessible molecular)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Calculate the solvent accessibility ('accessible surface') for a structure as a whole.
+
+
+ Protein surface calculation (accessible)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Calculate for each residue in a protein structure all its backbone torsion angles.
+
+
+ Backbone torsion angle calculation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Calculate for each residue in a protein structure all its torsion angles.
+
+
+ Full torsion angle calculation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Calculate for each cysteine (bridge) all its torsion angles.
+
+
+ Cysteine torsion angle calculation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ For each amino acid in a protein structure calculate the backbone angle tau.
+
+
+ Tau is the backbone angle N-Calpha-C (angle over the C-alpha).
+ Tau angle calculation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ WHATIF:ShowCysteineBridge
+ Detect cysteine bridges (from coordinate data) in a protein structure.
+
+
+ Cysteine bridge detection
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ WHATIF:ShowCysteineFree
+ Detect free cysteines in a protein structure.
+
+
+ A free cysteine is neither involved in a cysteine bridge, nor functions as a ligand to a metal.
+ Free cysteine detection
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ WHATIF:ShowCysteineMetal
+ Detect cysteines that are bound to metal in a protein structure.
+
+
+ Metal-bound cysteine detection
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Calculate protein residue contacts with nucleic acids in a structure.
+
+
+ Residue contact calculation (residue-nucleic acid)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate protein residue contacts with metal in a structure.
+ Residue-metal contact calculation
+
+
+ Protein-metal contact calculation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Calculate ion contacts in a structure (all ions for all side chain atoms).
+
+
+ Residue contact calculation (residue-negative ion)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ WHATIF:ShowBumps
+ Detect 'bumps' between residues in a structure, i.e. those with pairs of atoms whose Van der Waals' radii interpenetrate more than a defined distance.
+
+
+ Residue bump detection
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ WHATIF:SymmetryContact
+ Calculate the number of symmetry contacts made by residues in a protein structure.
+
+
+ A symmetry contact is a contact between two atoms in different asymmetric unit.
+ Residue symmetry contact calculation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Calculate contacts between residues and ligands in a protein structure.
+
+
+ Residue contact calculation (residue-ligand)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ WHATIF:HasSaltBridge
+ WHATIF:HasSaltBridgePlus
+ WHATIF:ShowSaltBridges
+ WHATIF:ShowSaltBridgesH
+ Calculate (and possibly score) salt bridges in a protein structure.
+
+
+ Salt bridges are interactions between oppositely charged atoms in different residues. The output might include the inter-atomic distance.
+ Salt bridge calculation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ WHATIF:ShowLikelyRotamers
+ WHATIF:ShowLikelyRotamers100
+ WHATIF:ShowLikelyRotamers200
+ WHATIF:ShowLikelyRotamers300
+ WHATIF:ShowLikelyRotamers400
+ WHATIF:ShowLikelyRotamers500
+ WHATIF:ShowLikelyRotamers600
+ WHATIF:ShowLikelyRotamers700
+ WHATIF:ShowLikelyRotamers800
+ WHATIF:ShowLikelyRotamers900
+ Predict rotamer likelihoods for all 20 amino acid types at each position in a protein structure.
+
+
+ Output typically includes, for each residue position, the likelihoods for the 20 amino acid types with estimated reliability of the 20 likelihoods.
+ Rotamer likelihood prediction
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ WHATIF:ProlineMutationValue
+ Calculate for each position in a protein structure the chance that a proline, when introduced at this position, would increase the stability of the whole protein.
+
+
+ Proline mutation value calculation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ WHATIF: PackingQuality
+ Identify poorly packed residues in protein structures.
+
+
+ Residue packing validation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ WHATIF: ImproperQualityMax
+ WHATIF: ImproperQualitySum
+ Validate protein geometry, for example bond lengths, bond angles, torsion angles, chiralities, planaraties etc. An example is validation of a Ramachandran plot of a protein structure.
+ Ramachandran plot validation
+
+
+ Protein geometry validation
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+ WHATIF: PDB_sequence
+ Extract a molecular sequence from a PDB file.
+
+
+ PDB file sequence retrieval
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Identify HET groups in PDB files.
+
+
+ A HET group usually corresponds to ligands, lipids, but might also (not consistently) include groups that are attached to amino acids. Each HET group is supposed to have a unique three letter code and a unique name which might be given in the output.
+ HET group detection
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Determine for residue the DSSP determined secondary structure in three-state (HSC).
+
+ DSSP secondary structure assignment
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ WHATIF: PDBasXML
+ Reformat (a file or other report of) tertiary structure data.
+
+
+ Structure formatting
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Assign cysteine bonding state and disulfide bond partners in protein structures.
+
+
+ Protein cysteine and disulfide bond assignment
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Identify poor quality amino acid positions in protein structures.
+
+
+ Residue validation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ WHATIF:MovedWaterPDB
+ Query a tertiary structure database and retrieve water molecules.
+
+ Structure retrieval (water)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identify or predict siRNA duplexes in RNA.
+
+
+ siRNA duplex prediction
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Refine an existing sequence alignment.
+
+
+ Sequence alignment refinement
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Process an EMBOSS listfile (list of EMBOSS Uniform Sequence Addresses).
+
+ Listfile processing
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Perform basic (non-analytical) operations on a report or file of sequences (which might include features), such as file concatenation, removal or ordering of sequences, creation of subset or a new file of sequences.
+
+
+ Sequence file editing
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Perform basic (non-analytical) operations on a sequence alignment file, such as copying or removal and ordering of sequences.
+
+ Sequence alignment file processing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Process (read and / or write) physicochemical property data for small molecules.
+
+ Small molecule data processing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Search and retrieve documentation on a bioinformatics ontology.
+
+ Data retrieval (ontology annotation)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Query an ontology and retrieve concepts or relations.
+
+ Data retrieval (ontology concept)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identify a representative sequence from a set of sequences, typically using scores from pair-wise alignment or other comparison of the sequences.
+
+
+ Representative sequence identification
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Perform basic (non-analytical) operations on a file of molecular tertiary structural data.
+
+ Structure file processing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Query a profile data resource and retrieve one or more profile(s) and / or associated annotation.
+
+ This includes direct retrieval methods that retrieve a profile by, e.g. the profile name.
+ Data retrieval (sequence profile)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Perform a statistical data operation of some type, e.g. calibration or validation.
+ Significance testing
+ Statistical analysis
+ Statistical test
+ Statistical testing
+ Expectation maximisation
+ Gibbs sampling
+ Hypothesis testing
+ Omnibus test
+
+
+ Statistical calculation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate a 3D-1D scoring matrix from analysis of protein sequence and structural data.
+ 3D-1D scoring matrix construction
+
+
+ A 3D-1D scoring matrix scores the probability of amino acids occurring in different structural environments.
+ 3D-1D scoring matrix generation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Visualise transmembrane proteins, typically the transmembrane regions within a sequence.
+ Transmembrane protein rendering
+
+
+ Transmembrane protein visualisation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ An operation performing purely illustrative (pedagogical) purposes.
+
+ Demonstration
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Query a biological pathways database and retrieve annotation on one or more pathways.
+
+ Data retrieval (pathway or network)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Query a database and retrieve one or more data identifiers.
+
+ Data retrieval (identifier)
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate a density plot (of base composition) for a nucleotide sequence.
+
+
+ Nucleic acid density plotting
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse one or more known molecular sequences.
+ Sequence analysis (general)
+
+
+ Sequence analysis
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse molecular sequence motifs.
+ Sequence motif processing
+
+
+ Sequence motif analysis
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Process (read and / or write) protein interaction data.
+
+ Protein interaction data processing
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse protein structural data.
+ Structure analysis (protein)
+
+
+ Protein structure analysis
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Process (read and / or write) annotation of some type, typically annotation on an entry from a biological or biomedical database entity.
+
+ Annotation processing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Analyse features in molecular sequences.
+
+ Sequence feature analysis
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Basic (non-analytical) operations of some data, either a file or equivalent entity in memory, such that the same basic type of data is consumed as input and generated as output.
+ File handling
+ File processing
+ Report handling
+ Utility operation
+ Processing
+
+
+ Data handling
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Analyse gene expression and regulation data.
+
+ Gene expression analysis
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Process (read and / or write) one or more structural (3D) profile(s) or template(s) of some type.
+
+ Structural profile processing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Process (read and / or write) an index of (typically a file of) biological data.
+
+ Data index processing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Process (read and / or write) some type of sequence profile.
+
+ Sequence profile processing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.22
+
+ Analyse protein function, typically by processing protein sequence and/or structural data, and generate an informative report.
+
+
+ Protein function analysis
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse, simulate or predict protein folding, typically by processing sequence and / or structural data. For example, predict sites of nucleation or stabilisation key to protein folding.
+ Protein folding modelling
+ Protein folding simulation
+ Protein folding site prediction
+
+
+ Protein folding analysis
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse protein secondary structure data.
+ Secondary structure analysis (protein)
+
+
+ Protein secondary structure analysis
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Process (read and / or write) data on the physicochemical property of a molecule.
+
+ Physicochemical property data processing
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict oligonucleotide primers or probes.
+ Primer and probe prediction
+
+
+ Primer and probe design
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Process (read and / or write) data of a specific type, for example applying analytical methods.
+
+
+ Operation (typed)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Search a database (or other data resource) with a supplied query and retrieve entries (or parts of entries) that are similar to the query.
+ Search
+
+
+ Typically the query is compared to each entry and high scoring matches (hits) are returned. For example, a BLAST search of a sequence database.
+ Database search
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Retrieve an entry (or part of an entry) from a data resource that matches a supplied query. This might include some primary data and annotation. The query is a data identifier or other indexed term. For example, retrieve a sequence record with the specified accession number, or matching supplied keywords.
+ Data extraction
+ Retrieval
+ Data retrieval (metadata)
+ Metadata retrieval
+
+
+ Data retrieval
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Predict, recognise, detect or identify some properties of a biomolecule.
+ Detection
+ Prediction
+ Recognition
+
+
+ Prediction and recognition
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Compare two or more things to identify similarities.
+
+
+ Comparison
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Refine or optimise some data model.
+
+
+ Optimisation and refinement
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Model or simulate some biological entity or system, typically using mathematical techniques including dynamical systems, statistical models, differential equations, and game theoretic models.
+ Mathematical modelling
+
+
+ Modelling and simulation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+ Perform basic operations on some data or a database.
+
+
+ Data handling
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Validate some data.
+ Quality control
+
+
+ Validation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Map properties to positions on an biological entity (typically a molecular sequence or structure), or assemble such an entity from constituent parts.
+ Cartography
+
+
+ Mapping
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Design a biological entity (typically a molecular sequence or structure) with specific properties.
+
+
+ Design
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Process (read and / or write) microarray data.
+
+ Microarray data processing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.18
+
+
+
+ Process (read and / or write) a codon usage table.
+
+ Codon usage table processing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Retrieve a codon usage table and / or associated annotation.
+
+ Data retrieval (codon usage table)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Process (read and / or write) a gene expression profile.
+
+ Gene expression profile processing
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Gene set testing
+ Identify classes of genes or proteins that are over or under-represented in a large set of genes or proteins. For example analysis of a set of genes corresponding to a gene expression profile, annotated with Gene Ontology (GO) concepts, where eventual over-/under-representation of certain GO concept within the studied set of genes is revealed.
+ Functional enrichment analysis
+ GSEA
+ Gene-set over-represenation analysis
+ Gene set analysis
+ GO-term enrichment
+ Gene Ontology concept enrichment
+ Gene Ontology term enrichment
+
+
+ "Gene set analysis" (often used interchangeably or in an overlapping sense with "gene-set enrichment analysis") refers to the functional analysis (term enrichment) of a differentially expressed set of genes, rather than all genes analysed.
+ Analyse gene expression patterns to identify sets of genes that are associated with a specific trait, condition, clinical outcome etc.
+ Gene sets can be defined beforehand by biological function, chromosome locations and so on.
+ The Gene Ontology (GO) is typically used, the input is a set of Gene IDs, and the output of the analysis is typically a ranked list of GO concepts, each associated with a p-value.
+ Gene-set enrichment analysis
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict a network of gene regulation.
+
+
+ Gene regulatory network prediction
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+
+
+ Generate, analyse or handle a biological pathway or network.
+
+ Pathway or network processing
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Process (read and / or write) RNA secondary structure data.
+
+
+ RNA secondary structure analysis
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+ Process (read and / or write) RNA tertiary structure data.
+
+
+ Structure processing (RNA)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict RNA tertiary structure.
+
+
+ RNA structure prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict DNA tertiary structure.
+
+
+ DNA structure prediction
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Generate, process or analyse phylogenetic tree or trees.
+
+
+ Phylogenetic tree processing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Process (read and / or write) protein secondary structure data.
+
+ Protein secondary structure processing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Process (read and / or write) a network of protein interactions.
+
+ Protein interaction network processing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Process (read and / or write) one or more molecular sequences and associated annotation.
+
+ Sequence processing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+ Process (read and / or write) a protein sequence and associated annotation.
+
+
+ Sequence processing (protein)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Process (read and / or write) a nucleotide sequence and associated annotation.
+
+ Sequence processing (nucleic acid)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Compare two or more molecular sequences.
+
+
+ Sequence comparison
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Process (read and / or write) a sequence cluster.
+
+ Sequence cluster processing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Process (read and / or write) a sequence feature table.
+
+ Feature table processing
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Detect, predict and identify genes or components of genes in DNA sequences, including promoters, coding regions, splice sites, etc.
+ Gene calling
+ Gene finding
+ Whole gene prediction
+
+
+ Includes methods that predict whole gene structure using a combination of multiple methods to achieve better predictions.
+ Methods for gene prediction might be ab initio, based on phylogenetic comparisons, use motifs, sequence features, support vector machine, alignment etc.
+ Gene prediction
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.16
+
+ Classify G-protein coupled receptors (GPCRs) into families and subfamilies.
+
+
+ GPCR classification
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Not sustainable to have protein type-specific concepts.
+ 1.19
+
+
+ Predict G-protein coupled receptor (GPCR) coupling selectivity.
+
+ GPCR coupling selectivity prediction
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+ Process (read and / or write) a protein tertiary structure.
+
+
+ Structure processing (protein)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Calculate the solvent accessibility for each atom in a structure.
+
+
+ Waters are not considered.
+ Protein atom surface calculation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Calculate the solvent accessibility for each residue in a structure.
+
+
+ Protein residue surface calculation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Calculate the solvent accessibility of a structure as a whole.
+
+
+ Protein surface calculation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Process (read and / or write) a molecular sequence alignment.
+
+ Sequence alignment processing
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identify or predict protein-protein binding sites.
+ Protein-protein binding site detection
+
+
+ Protein-protein binding site prediction
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Process (read and / or write) a molecular tertiary structure.
+
+ Structure processing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Annotate a DNA map of some type with terms from a controlled vocabulary.
+
+ Map annotation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Retrieve information on a protein.
+
+ Data retrieval (protein annotation)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Retrieve a phylogenetic tree from a data resource.
+
+ Data retrieval (phylogenetic tree)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Retrieve information on a protein interaction.
+
+ Data retrieval (protein interaction annotation)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Retrieve information on a protein family.
+
+ Data retrieval (protein family annotation)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Retrieve information on an RNA family.
+
+ Data retrieval (RNA family annotation)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Retrieve information on a specific gene.
+
+ Data retrieval (gene annotation)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Retrieve information on a specific genotype or phenotype.
+
+ Data retrieval (genotype and phenotype annotation)
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Compare the architecture of two or more protein structures.
+
+
+ Protein architecture comparison
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identify the architecture of a protein structure.
+
+
+ Includes methods that try to suggest the most likely biological unit for a given protein X-ray crystal structure based on crystal symmetry and scoring of putative protein-protein interfaces.
+ Protein architecture recognition
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The simulation of molecular (typically protein) conformation using a computational model of physical forces and computer simulation.
+ Molecular dynamics simulation
+ Protein dynamics
+
+
+ Molecular dynamics
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse a nucleic acid sequence (using methods that are only applicable to nucleic acid sequences).
+ Sequence analysis (nucleic acid)
+ Nucleic acid sequence alignment analysis
+ Sequence alignment analysis (nucleic acid)
+
+
+ Nucleic acid sequence analysis
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse a protein sequence (using methods that are only applicable to protein sequences).
+ Sequence analysis (protein)
+ Protein sequence alignment analysis
+ Sequence alignment analysis (protein)
+
+
+ Protein sequence analysis
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse known molecular tertiary structures.
+
+
+ Structure analysis
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse nucleic acid tertiary structural data.
+
+
+ Nucleic acid structure analysis
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Process (read and / or write) a molecular secondary structure.
+
+ Secondary structure processing
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Compare two or more molecular tertiary structures.
+
+
+ Structure comparison
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Render a helical wheel representation of protein secondary structure.
+ Helical wheel rendering
+
+
+ Helical wheel drawing
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Render a topology diagram of protein secondary structure.
+ Topology diagram rendering
+
+
+ Topology diagram drawing
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Compare protein tertiary structures.
+ Structure comparison (protein)
+
+
+ Methods might identify structural neighbors, find structural similarities or define a structural core.
+ Protein structure comparison
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Compare protein secondary structures.
+ Protein secondary structure
+ Secondary structure comparison (protein)
+ Protein secondary structure alignment
+
+
+ Protein secondary structure comparison
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict the subcellular localisation of a protein sequence.
+ Protein cellular localization prediction
+ Protein subcellular localisation prediction
+ Protein targeting prediction
+
+
+ The prediction might include subcellular localisation (nuclear, cytoplasmic, mitochondrial, chloroplast, plastid, membrane etc) or export (extracellular proteins) of a protein.
+ Subcellular localisation prediction
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Calculate contacts between residues in a protein structure.
+
+
+ Residue contact calculation (residue-residue)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Identify potential hydrogen bonds between amino acid residues.
+
+
+ Hydrogen bond calculation (inter-residue)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict the interactions of proteins with other proteins.
+ Protein-protein interaction detection
+ Protein-protein binding prediction
+ Protein-protein interaction prediction
+
+
+ Protein interaction prediction
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Process (read and / or write) codon usage data.
+
+ Codon usage data processing
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Process (read and/or write) expression data from experiments measuring molecules (e.g. omics data), including analysis of one or more expression profiles, typically to interpret them in functional terms.
+ Expression data analysis
+ Gene expression analysis
+ Gene expression data analysis
+ Gene expression regulation analysis
+ Metagenomic inference
+ Microarray data analysis
+ Protein expression analysis
+
+
+ Metagenomic inference is the profiling of phylogenetic marker genes in order to predict metagenome function.
+ Expression analysis
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+ Process (read and / or write) a network of gene regulation.
+
+
+ Gene regulatory network processing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Notions of pathway and network were mixed up, EDAM 1.24 disentangles them.
+ 1.24
+
+
+
+ Generate, process or analyse a biological pathway or network.
+
+ Pathway or network analysis
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Analyse SAGE, MPSS or SBS experimental data, typically to identify or quantify mRNA transcripts.
+
+ Sequencing-based expression profile data analysis
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict, analyse, characterize or model splice sites, splicing events and so on, typically by comparing multiple nucleic acid sequences.
+ Splicing model analysis
+
+
+ Splicing analysis
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Analyse raw microarray data.
+
+ Microarray raw data analysis
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.19
+
+
+
+ Process (read and / or write) nucleic acid sequence or structural data.
+
+ Nucleic acid analysis
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.19
+
+
+
+ Process (read and / or write) protein sequence or structural data.
+
+ Protein analysis
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+ Process (read and / or write) molecular sequence data.
+
+
+ Sequence data processing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Process (read and / or write) molecular structural data.
+
+ Structural data processing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+
+ Process (read and / or write) text.
+
+ Text processing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.18
+
+
+
+ Analyse a protein sequence alignment, typically to detect features or make predictions.
+
+
+ Protein sequence alignment analysis
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.18
+
+
+
+ Analyse a protein sequence alignment, typically to detect features or make predictions.
+
+
+ Nucleic acid sequence alignment analysis
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.18
+
+
+
+ Compare two or more nucleic acid sequences.
+
+
+ Nucleic acid sequence comparison
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.18
+
+ Compare two or more protein sequences.
+
+
+ Protein sequence comparison
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Back-translate a protein sequence into DNA.
+
+
+ DNA back-translation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Edit or change a nucleic acid sequence, either randomly or specifically.
+
+
+ Sequence editing (nucleic acid)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Edit or change a protein sequence, either randomly or specifically.
+
+
+ Sequence editing (protein)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.22
+
+ Generate a nucleic acid sequence by some means.
+
+
+ Sequence generation (nucleic acid)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.22
+
+ Generate a protein sequence by some means.
+
+
+ Sequence generation (protein)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Visualise, format or render a nucleic acid sequence.
+
+
+ Various nucleic acid sequence analysis methods might generate a sequence rendering but are not (for brevity) listed under here.
+ Nucleic acid sequence visualisation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Visualise, format or render a protein sequence.
+
+
+ Various protein sequence analysis methods might generate a sequence rendering but are not (for brevity) listed under here.
+ Protein sequence visualisation
+ true
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Compare nucleic acid tertiary structures.
+ Structure comparison (nucleic acid)
+
+
+ Nucleic acid structure comparison
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Process (read and / or write) nucleic acid tertiary structure data.
+
+ Structure processing (nucleic acid)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Generate a map of a DNA sequence annotated with positional or non-positional features of some type.
+
+
+ DNA mapping
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Process (read and / or write) a DNA map of some type.
+
+ Map data processing
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse the hydrophobic, hydrophilic or charge properties of a protein (from analysis of sequence or structural information).
+
+
+ Protein hydropathy calculation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identify or predict catalytic residues, active sites or other ligand-binding sites in protein sequences or structures.
+ Protein binding site detection
+ Protein binding site prediction
+
+
+ Binding site prediction
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Build clusters of similar structures, typically using scores from structural alignment methods.
+ Structural clustering
+
+
+ Structure clustering
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Generate a physical DNA map (sequence map) from analysis of sequence tagged sites (STS).
+ Sequence mapping
+
+
+ An STS is a short subsequence of known sequence and location that occurs only once in the chromosome or genome that is being mapped. Sources of STSs include 1. expressed sequence tags (ESTs), simple sequence length polymorphisms (SSLPs), and random genomic sequences from cloned genomic DNA or database sequences.
+ Sequence tagged site (STS) mapping
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Compare two or more entities, typically the sequence or structure (or derivatives) of macromolecules, to identify equivalent subunits.
+ Alignment construction
+ Alignment generation
+
+
+ Alignment
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate the molecular weight of a protein (or fragments) and compare it to another protein or reference data. Generally used for protein identification.
+ PMF
+ Peptide mass fingerprinting
+ Protein fingerprinting
+
+
+ Protein fragment weight comparison
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Compare the physicochemical properties of two or more proteins (or reference data).
+
+
+ Protein property comparison
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.18
+
+
+
+ Compare two or more molecular secondary structures.
+
+ Secondary structure comparison
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Generate a Hopp and Woods plot of antigenicity of a protein.
+
+
+ Hopp and Woods plotting
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.19
+
+ Generate a view of clustered quantitative data, annotated with textual information.
+
+
+ Cluster textual view generation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Visualise clustered quantitative data as set of different profiles, where each profile is plotted versus different entities or samples on the X-axis.
+ Clustered quantitative data plotting
+ Clustered quantitative data rendering
+ Wave graph plotting
+ Microarray cluster temporal graph rendering
+ Microarray wave graph plotting
+ Microarray wave graph rendering
+
+
+ In the case of microarray data, visualise clustered gene expression data as a set of profiles, where each profile shows the gene expression values of a cluster across samples on the X-axis.
+ Clustering profile plotting
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.19
+
+ Generate a dendrograph of raw, preprocessed or clustered expression (e.g. microarray) data.
+
+
+ Dendrograph plotting
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Generate a plot of distances (distance or correlation matrix) between expression values.
+ Distance map rendering
+ Distance matrix plotting
+ Distance matrix rendering
+ Proximity map rendering
+ Correlation matrix plotting
+ Correlation matrix rendering
+ Microarray distance map rendering
+ Microarray proximity map plotting
+ Microarray proximity map rendering
+
+
+ Proximity map plotting
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Visualise clustered expression data using a tree diagram.
+ Dendrogram plotting
+ Dendrograph plotting
+ Dendrograph visualisation
+ Expression data tree or dendrogram rendering
+ Expression data tree visualisation
+ Microarray 2-way dendrogram rendering
+ Microarray checks view rendering
+ Microarray matrix tree plot rendering
+ Microarray tree or dendrogram rendering
+
+
+ Dendrogram visualisation
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Visualize the results of a principal component analysis (orthogonal data transformation). For example, visualization of the principal components (essential subspace) coming from a Principal Component Analysis (PCA) on the trajectory atomistic coordinates of a molecular structure.
+ PCA plotting
+ Principal component plotting
+ ED visualization
+ Essential Dynamics visualization
+ Microarray principal component plotting
+ Microarray principal component rendering
+ PCA visualization
+ Principal modes visualization
+
+
+ Examples for visualization are the distribution of variance over the components, loading and score plots.
+ The use of Principal Component Analysis (PCA), a multivariate statistical analysis to obtain collective variables on the atomic positional fluctuations, helps to separate the configurational space in two subspaces: an essential subspace containing relevant motions, and another one containing irrelevant local fluctuations.
+ Principal component visualisation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Render a graph in which the values of two variables are plotted along two axes; the pattern of the points reveals any correlation.
+ Scatter chart plotting
+ Microarray scatter plot plotting
+ Microarray scatter plot rendering
+
+
+ Comparison of two sets of quantitative data such as two samples of gene expression values.
+ Scatter plot plotting
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.18
+
+ Visualise gene expression data where each band (or line graph) corresponds to a sample.
+
+
+ Whole microarray graph plotting
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Visualise gene expression data after hierarchical clustering for representing hierarchical relationships.
+ Expression data tree-map rendering
+ Treemapping
+ Microarray tree-map rendering
+
+
+ Treemap visualisation
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Generate a box plot, i.e. a depiction of groups of numerical data through their quartiles.
+ Box plot plotting
+ Microarray Box-Whisker plot plotting
+
+
+ In the case of micorarray data, visualise raw and pre-processed gene expression data, via a plot showing over- and under-expression along with mean, upper and lower quartiles.
+ Box-Whisker plot plotting
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Generate a physical (sequence) map of a DNA sequence showing the physical distance (base pairs) between features or landmarks such as restriction sites, cloned DNA fragments, genes and other genetic markers.
+ Physical cartography
+
+
+ Physical mapping
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Apply analytical methods to existing data of a specific type.
+
+
+ This excludes non-analytical methods that read and write the same basic type of data (for that, see 'Data handling').
+ Analysis
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+
+ Process or analyse an alignment of molecular sequences or structures.
+
+ Alignment analysis
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.16
+
+
+
+ Analyse a body of scientific text (typically a full text article from a scientific journal).
+
+ Article analysis
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Analyse the interactions of two or more molecules (or parts of molecules) that are known to interact.
+
+ Molecular interaction analysis
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse the interactions of proteins with other proteins.
+ Protein interaction analysis
+ Protein interaction raw data analysis
+ Protein interaction simulation
+
+
+ Includes analysis of raw experimental protein-protein interaction data from for example yeast two-hybrid analysis, protein microarrays, immunoaffinity chromatography followed by mass spectrometry, phage display etc.
+ Protein-protein interaction analysis
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ WHATIF: HETGroupNames
+ WHATIF:HasMetalContacts
+ WHATIF:HasMetalContactsPlus
+ WHATIF:HasNegativeIonContacts
+ WHATIF:HasNegativeIonContactsPlus
+ WHATIF:HasNucleicContacts
+ WHATIF:ShowDrugContacts
+ WHATIF:ShowDrugContactsShort
+ WHATIF:ShowLigandContacts
+ WHATIF:ShowProteiNucleicContacts
+ Calculate contacts between residues, or between residues and other groups, in a protein structure, on the basis of distance calculations.
+ HET group detection
+ Residue contact calculation (residue-ligand)
+ Residue contact calculation (residue-metal)
+ Residue contact calculation (residue-negative ion)
+ Residue contact calculation (residue-nucleic acid)
+ WHATIF:SymmetryContact
+
+
+ This includes identifying HET groups, which usually correspond to ligands, lipids, but might also (not consistently) include groups that are attached to amino acids. Each HET group is supposed to have a unique three letter code and a unique name which might be given in the output. It can also include calculation of symmetry contacts, i.e. a contact between two atoms in different asymmetric unit.
+ Residue distance calculation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+
+ Process (read and / or write) an alignment of two or more molecular sequences, structures or derived data.
+
+ Alignment processing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Process (read and / or write) a molecular tertiary (3D) structure alignment.
+
+ Structure alignment processing
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate codon usage bias, e.g. generate a codon usage bias plot.
+ Codon usage bias plotting
+
+
+ Codon usage bias calculation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.22
+
+ Generate a codon usage bias plot.
+
+
+ Codon usage bias plotting
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate the differences in codon usage fractions between two sequences, sets of sequences, codon usage tables etc.
+
+
+ Codon usage fraction calculation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Assign molecular sequences, structures or other biological data to a specific group or category according to qualities it shares with that group or category.
+
+
+ Classification
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Process (read and / or write) molecular interaction data.
+
+ Molecular interaction data processing
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Assign molecular sequence(s) to a group or category.
+
+
+ Sequence classification
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Assign molecular structure(s) to a group or category.
+
+
+ Structure classification
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Compare two or more proteins (or some aspect) to identify similarities.
+
+
+ Protein comparison
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Compare two or more nucleic acids to identify similarities.
+
+
+ Nucleic acid comparison
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.19
+
+ Predict, recognise, detect or identify some properties of proteins.
+
+
+ Prediction and recognition (protein)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.19
+
+ Predict, recognise, detect or identify some properties of nucleic acids.
+
+
+ Prediction and recognition (nucleic acid)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta13
+ Edit, convert or otherwise change a molecular tertiary structure, either randomly or specifically.
+
+
+ Structure editing
+
+
+
+
+
+
+
+
+ beta13
+ Edit, convert or otherwise change a molecular sequence alignment, either randomly or specifically.
+
+
+ Sequence alignment editing
+
+
+
+
+
+
+
+
+ beta13
+ Notions of pathway and network were mixed up, EDAM 1.24 disentangles them.
+ 1.24
+
+
+
+ Render (visualise) a biological pathway or network.
+
+ Pathway or network visualisation
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.6
+
+
+ Predict general (non-positional) functional properties of a protein from analysing its sequence.
+
+ For functional properties that are positional, use 'Protein site detection' instead.
+ Protein function prediction (from sequence)
+ true
+
+
+
+
+
+
+
+
+ beta13
+ (jison)This is a distinction made on basis of input; all features exist can be mapped to a sequence so this isn't needed (consolidate with "Protein feature detection").
+ 1.17
+
+
+
+ Predict, recognise and identify functional or other key sites within protein sequences, typically by scanning for known motifs, patterns and regular expressions.
+
+
+ Protein sequence feature detection
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.18
+
+
+ Calculate (or predict) physical or chemical properties of a protein, including any non-positional properties of the molecular sequence, from processing a protein sequence.
+
+
+ Protein property calculation (from sequence)
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.6
+
+
+ Predict, recognise and identify positional features in proteins from analysing protein structure.
+
+ Protein feature prediction (from structure)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta13
+ Predict, recognise and identify positional features in proteins from analysing protein sequences or structures.
+ Protein feature prediction
+ Protein feature recognition
+ Protein secondary database search
+ Protein site detection
+ Protein site prediction
+ Protein site recognition
+ Sequence feature detection (protein)
+ Sequence profile database search
+
+
+ Features includes functional sites or regions, secondary structure, structural domains and so on. Methods might use fingerprints, motifs, profiles, hidden Markov models, sequence alignment etc to provide a mapping of a query protein sequence to a discriminatory element. This includes methods that search a secondary protein database (Prosite, Blocks, ProDom, Prints, Pfam etc.) to assign a protein sequence(s) to a known protein family or group.
+ Protein feature detection
+
+
+
+
+
+
+
+
+ beta13
+ 1.6
+
+
+ Screen a molecular sequence(s) against a database (of some type) to identify similarities between the sequence and database entries.
+
+ Database search (by sequence)
+ true
+
+
+
+
+
+
+
+
+
+ beta13
+ Predict a network of protein interactions.
+
+
+ Protein interaction network prediction
+
+
+
+
+
+
+
+
+
+ beta13
+ Design (or predict) nucleic acid sequences with specific chemical or physical properties.
+ Gene design
+
+
+ Nucleic acid design
+
+
+
+
+
+
+
+
+
+ beta13
+ Edit a data entity, either randomly or specifically.
+
+
+ Editing
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.1
+ Evaluate a DNA sequence assembly, typically for purposes of quality control.
+ Assembly QC
+ Assembly quality evaluation
+ Sequence assembly QC
+ Sequence assembly quality evaluation
+
+
+ Sequence assembly validation
+
+
+
+
+
+
+
+
+
+ 1.1
+ Align two or more (tpyically huge) molecular sequences that represent genomes.
+ Genome alignment construction
+ Whole genome alignment
+
+
+ Genome alignment
+
+
+
+
+
+
+
+
+ 1.1
+ Reconstruction of a sequence assembly in a localised area.
+
+
+ Localised reassembly
+
+
+
+
+
+
+
+
+ 1.1
+ Render and visualise a DNA sequence assembly.
+ Assembly rendering
+ Assembly visualisation
+ Sequence assembly rendering
+
+
+ Sequence assembly visualisation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.1
+ Identify base (nucleobase) sequence from a fluorescence 'trace' data generated by an automated DNA sequencer.
+ Base calling
+ Phred base calling
+ Phred base-calling
+
+
+ Base-calling
+
+
+
+
+
+
+
+
+
+ 1.1
+ The mapping of methylation sites in a DNA (genome) sequence. Typically, the mapping of high-throughput bisulfite reads to the reference genome.
+ Bisulfite read mapping
+ Bisulfite sequence alignment
+ Bisulfite sequence mapping
+
+
+ Bisulfite mapping follows high-throughput sequencing of DNA which has undergone bisulfite treatment followed by PCR amplification; unmethylated cytosines are specifically converted to thymine, allowing the methylation status of cytosine in the DNA to be detected.
+ Bisulfite mapping
+
+
+
+
+
+
+
+
+ 1.1
+ Identify and filter a (typically large) sequence data set to remove sequences from contaminants in the sample that was sequenced.
+
+
+ Sequence contamination filtering
+
+
+
+
+
+
+
+
+ 1.1
+ 1.12
+
+ Trim sequences (typically from an automated DNA sequencer) to remove misleading ends.
+
+
+ For example trim polyA tails, introns and primer sequence flanking the sequence of amplified exons, or other unwanted sequence.
+ Trim ends
+ true
+
+
+
+
+
+
+
+
+ 1.1
+ 1.12
+
+ Trim sequences (typically from an automated DNA sequencer) to remove sequence-specific end regions, typically contamination from vector sequences.
+
+
+ Trim vector
+ true
+
+
+
+
+
+
+
+
+ 1.1
+ 1.12
+
+ Trim sequences (typically from an automated DNA sequencer) to remove the sequence ends that extend beyond an assembled reference sequence.
+
+
+ Trim to reference
+ true
+
+
+
+
+
+
+
+
+ 1.1
+ Cut (remove) the end from a molecular sequence.
+ Trimming
+ Barcode sequence removal
+ Trim ends
+ Trim to reference
+ Trim vector
+
+
+ This includes end trimming, -- Trim sequences (typically from an automated DNA sequencer) to remove misleading ends. For example trim polyA tails, introns and primer sequence flanking the sequence of amplified exons, or other unwanted sequence.-- trimming to a reference sequence, --Trim sequences (typically from an automated DNA sequencer) to remove the sequence ends that extend beyond an assembled reference sequence. -- vector trimming -- Trim sequences (typically from an automated DNA sequencer) to remove sequence-specific end regions, typically contamination from vector sequences.
+ Sequence trimming
+
+
+
+
+
+
+
+
+
+ 1.1
+ Compare the features of two genome sequences.
+
+
+ Genomic elements that might be compared include genes, indels, single nucleotide polymorphisms (SNPs), retrotransposons, tandem repeats and so on.
+ Genome feature comparison
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.1
+ Detect errors in DNA sequences generated from sequencing projects).
+ Short read error correction
+ Short-read error correction
+
+
+ Sequencing error detection
+
+
+
+
+
+
+
+
+ 1.1
+ Analyse DNA sequence data to identify differences between the genetic composition (genotype) of an individual compared to other individual's or a reference sequence.
+
+
+ Methods might consider cytogenetic analyses, copy number polymorphism (and calculate copy number calls for copy-number variation(CNV) regions), single nucleotide polymorphism (SNP), , rare copy number variation (CNV) identification, loss of heterozygosity data and so on.
+ Genotyping
+
+
+
+
+
+
+
+
+ 1.1
+ Analyse a genetic variation, for example to annotate its location, alleles, classification, and effects on individual transcripts predicted for a gene model.
+ Genetic variation annotation
+ Sequence variation analysis
+ Variant analysis
+ Transcript variant analysis
+
+
+ Genetic variation annotation provides contextual interpretation of coding SNP consequences in transcripts. It allows comparisons to be made between variation data in different populations or strains for the same transcript.
+ Genetic variation analysis
+
+
+
+
+
+
+
+
+
+ 1.1
+ Align short oligonucleotide sequences (reads) to a larger (genomic) sequence.
+ Oligonucleotide alignment
+ Oligonucleotide alignment construction
+ Oligonucleotide alignment generation
+ Oligonucleotide mapping
+ Read alignment
+ Short oligonucleotide alignment
+ Short read alignment
+ Short read mapping
+ Short sequence read mapping
+
+
+ The purpose of read mapping is to identify the location of sequenced fragments within a reference genome and assumes that there is, in fact, at least local similarity between the fragment and reference sequences.
+ Read mapping
+
+
+
+
+
+
+
+
+ 1.1
+ A variant of oligonucleotide mapping where a read is mapped to two separate locations because of possible structural variation.
+ Split-read mapping
+
+
+ Split read mapping
+
+
+
+
+
+
+
+
+ 1.1
+ Analyse DNA sequences in order to identify a DNA 'barcode'; marker genes or any short fragment(s) of DNA that are useful to diagnose the taxa of biological organisms.
+ Community profiling
+ Sample barcoding
+
+
+ DNA barcoding
+
+
+
+
+
+
+
+
+
+ 1.1
+ 1.19
+
+ Identify single nucleotide change in base positions in sequencing data that differ from a reference genome and which might, especially by reference to population frequency or functional data, indicate a polymorphism.
+
+
+ SNP calling
+ true
+
+
+
+
+
+
+
+
+ 1.1
+ "Polymorphism detection" and "Variant calling" are essentially the same thing - keeping the later as a more prevalent term nowadays.
+ 1.24
+
+
+ Detect mutations in multiple DNA sequences, for example, from the alignment and comparison of the fluorescent traces produced by DNA sequencing hardware.
+
+
+ Polymorphism detection
+ true
+
+
+
+
+
+
+
+
+ 1.1
+ Visualise, format or render an image of a Chromatogram.
+ Chromatogram viewing
+
+
+ Chromatogram visualisation
+
+
+
+
+
+
+
+
+ 1.1
+ Analyse cytosine methylation states in nucleic acid sequences.
+ Methylation profile analysis
+
+
+ Methylation analysis
+
+
+
+
+
+
+
+
+ 1.1
+ 1.19
+
+ Determine cytosine methylation status of specific positions in a nucleic acid sequences.
+
+
+ Methylation calling
+ true
+
+
+
+
+
+
+
+
+
+ 1.1
+ Measure the overall level of methyl cytosines in a genome from analysis of experimental data, typically from chromatographic methods and methyl accepting capacity assay.
+ Genome methylation analysis
+ Global methylation analysis
+ Methylation level analysis (global)
+
+
+ Whole genome methylation analysis
+
+
+
+
+
+
+
+
+ 1.1
+ Analysing the DNA methylation of specific genes or regions of interest.
+ Gene-specific methylation analysis
+ Methylation level analysis (gene-specific)
+
+
+ Gene methylation analysis
+
+
+
+
+
+
+
+
+
+ 1.1
+ Visualise, format or render a nucleic acid sequence that is part of (and in context of) a complete genome sequence.
+ Genome browser
+ Genome browsing
+ Genome rendering
+ Genome viewing
+
+
+ Genome visualisation
+
+
+
+
+
+
+
+
+
+ 1.1
+ Compare the sequence or features of two or more genomes, for example, to find matching regions.
+ Genomic region matching
+
+
+ Genome comparison
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.1
+ Generate an index of a genome sequence.
+ Burrows-Wheeler
+ Genome indexing (Burrows-Wheeler)
+ Genome indexing (suffix arrays)
+ Suffix arrays
+
+
+ Many sequence alignment tasks involving many or very large sequences rely on a precomputed index of the sequence to accelerate the alignment. The Burrows-Wheeler Transform (BWT) is a permutation of the genome based on a suffix array algorithm. A suffix array consists of the lexicographically sorted list of suffixes of a genome.
+ Genome indexing
+
+
+
+
+
+
+
+
+ 1.1
+ 1.12
+
+ Generate an index of a genome sequence using the Burrows-Wheeler algorithm.
+
+
+ The Burrows-Wheeler Transform (BWT) is a permutation of the genome based on a suffix array algorithm.
+ Genome indexing (Burrows-Wheeler)
+ true
+
+
+
+
+
+
+
+
+ 1.1
+ 1.12
+
+ Generate an index of a genome sequence using a suffix arrays algorithm.
+
+
+ A suffix array consists of the lexicographically sorted list of suffixes of a genome.
+ Genome indexing (suffix arrays)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.1
+ Analyse one or more spectra from mass spectrometry (or other) experiments.
+ Mass spectrum analysis
+ Spectrum analysis
+
+
+ Spectral analysis
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.1
+ Identify peaks in a spectrum from a mass spectrometry, NMR, or some other spectrum-generating experiment.
+ Peak assignment
+ Peak finding
+
+
+ Peak detection
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.1
+ Link together a non-contiguous series of genomic sequences into a scaffold, consisting of sequences separated by gaps of known length. The sequences that are linked are typically typically contigs; contiguous sequences corresponding to read overlaps.
+ Scaffold construction
+ Scaffold generation
+
+
+ Scaffold may be positioned along a chromosome physical map to create a "golden path".
+ Scaffolding
+
+
+
+
+
+
+
+
+ 1.1
+ Fill the gaps in a sequence assembly (scaffold) by merging in additional sequences.
+
+
+ Different techniques are used to generate gap sequences to connect contigs, depending on the size of the gap. For small (5-20kb) gaps, PCR amplification and sequencing is used. For large (>20kb) gaps, fragments are cloned (e.g. in BAC (Bacterial artificial chromosomes) vectors) and then sequenced.
+ Scaffold gap completion
+
+
+
+
+
+
+
+
+
+ 1.1
+ Raw sequence data quality control.
+ Sequencing QC
+ Sequencing quality assessment
+
+
+ Analyse raw sequence data from a sequencing pipeline and identify (and possiby fix) problems.
+ Sequencing quality control
+
+
+
+
+
+
+
+
+
+ 1.1
+ Pre-process sequence reads to ensure (or improve) quality and reliability.
+ Sequence read pre-processing
+
+
+ For example process paired end reads to trim low quality ends remove short sequences, identify sequence inserts, detect chimeric reads, or remove low quality sequences including vector, adaptor, low complexity and contaminant sequences. Sequences might come from genomic DNA library, EST libraries, SSH library and so on.
+ Read pre-processing
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.1
+ Estimate the frequencies of different species from analysis of the molecular sequences, typically of DNA recovered from environmental samples.
+
+
+ Species frequency estimation
+
+
+
+
+
+
+
+
+ 1.1
+ Identify putative protein-binding regions in a genome sequence from analysis of Chip-sequencing data or ChIP-on-chip data.
+ Protein binding peak detection
+ Peak-pair calling
+
+
+ Chip-sequencing combines chromatin immunoprecipitation (ChIP) with massively parallel DNA sequencing to generate a set of reads, which are aligned to a genome sequence. The enriched areas contain the binding sites of DNA-associated proteins. For example, a transcription factor binding site. ChIP-on-chip in contrast combines chromatin immunoprecipitation ('ChIP') with microarray ('chip'). "Peak-pair calling" is similar to "Peak calling" in the context of ChIP-exo.
+ Peak calling
+
+
+
+
+
+
+
+
+ 1.1
+ Identify from molecular sequence analysis (typically from analysis of microarray or RNA-seq data) genes whose expression levels are significantly different between two sample groups.
+ Differential expression analysis
+ Differential gene analysis
+ Differential gene expression analysis
+ Differentially expressed gene identification
+
+
+ Differential gene expression analysis is used, for example, to identify which genes are up-regulated (increased expression) or down-regulated (decreased expression) between a group treated with a drug and a control groups.
+ Differential gene expression profiling
+
+
+
+
+
+
+
+
+ 1.1
+ 1.21
+
+ Analyse gene expression patterns (typically from DNA microarray datasets) to identify sets of genes that are associated with a specific trait, condition, clinical outcome etc.
+
+
+ Gene set testing
+ true
+
+
+
+
+
+
+
+
+
+ 1.1
+ Classify variants based on their potential effect on genes, especially functional effects on the expressed proteins.
+
+
+ Variants are typically classified by their position (intronic, exonic, etc.) in a gene transcript and (for variants in coding exons) by their effect on the protein sequence (synonymous, non-synonymous, frameshifting, etc.)
+ Variant classification
+
+
+
+
+
+
+
+
+ 1.1
+ Identify biologically interesting variants by prioritizing individual variants, for example, homozygous variants absent in control genomes.
+
+
+ Variant prioritisation can be used for example to produce a list of variants responsible for 'knocking out' genes in specific genomes. Methods amino acid substitution, aggregative approaches, probabilistic approach, inheritance and unified likelihood-frameworks.
+ Variant prioritisation
+
+
+
+
+
+
+
+
+
+ 1.1
+ Detect, identify and map mutations, such as single nucleotide polymorphisms, short indels and structural variants, in multiple DNA sequences. Typically the alignment and comparison of the fluorescent traces produced by DNA sequencing hardware, to study genomic alterations.
+ Variant mapping
+ Allele calling
+ Exome variant detection
+ Genome variant detection
+ Germ line variant calling
+ Mutation detection
+ Somatic variant calling
+ de novo mutation detection
+
+
+ Methods often utilise a database of aligned reads.
+ Somatic variant calling is the detection of variations established in somatic cells and hence not inherited as a germ line variant.
+ Variant detection
+ Variant calling
+
+
+
+
+
+
+
+
+ 1.1
+ Detect large regions in a genome subject to copy-number variation, or other structural variations in genome(s).
+ Structural variation discovery
+
+
+ Methods might involve analysis of whole-genome array comparative genome hybridisation or single-nucleotide polymorphism arrays, paired-end mapping of sequencing data, or from analysis of short reads from new sequencing technologies.
+ Structural variation detection
+
+
+
+
+
+
+
+
+ 1.1
+ Analyse sequencing data from experiments aiming to selectively sequence the coding regions of the genome.
+ Exome sequence analysis
+
+
+ Exome assembly
+
+
+
+
+
+
+
+
+ 1.1
+ Analyse mapping density (read depth) of (typically) short reads from sequencing platforms, for example, to detect deletions and duplications.
+
+
+ Read depth analysis
+
+
+
+
+
+
+
+
+ 1.1
+ Combine classical quantitative trait loci (QTL) analysis with gene expression profiling, for example, to describe describe cis- and trans-controlling elements for the expression of phenotype associated genes.
+ Gene expression QTL profiling
+ Gene expression quantitative trait loci profiling
+ eQTL profiling
+
+
+ Gene expression QTL analysis
+
+
+
+
+
+
+
+
+ 1.1
+ Estimate the number of copies of loci of particular gene(s) in DNA sequences typically from gene-expression profiling technology based on microarray hybridisation-based experiments. For example, estimate copy number (or marker dosage) of a dominant marker in samples from polyploid plant cells or tissues, or chromosomal gains and losses in tumors.
+ Transcript copy number estimation
+
+
+ Methods typically implement some statistical model for hypothesis testing, and methods estimate total copy number, i.e. do not distinguish the two inherited chromosomes quantities (specific copy number).
+ Copy number estimation
+
+
+
+
+
+
+
+
+ 1.2
+ Adapter removal
+ Remove forward and/or reverse primers from nucleic acid sequences (typically PCR products).
+
+
+ Primer removal
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.2
+ Infer a transcriptome sequence by analysis of short sequence reads.
+
+
+ Transcriptome assembly
+
+
+
+
+
+
+
+
+ 1.2
+ 1.6
+
+
+ Infer a transcriptome sequence without the aid of a reference genome, i.e. by comparing short sequences (reads) to each other.
+
+ Transcriptome assembly (de novo)
+ true
+
+
+
+
+
+
+
+
+ 1.2
+ 1.6
+
+
+ Infer a transcriptome sequence by mapping short reads to a reference genome.
+
+ Transcriptome assembly (mapping)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.3
+ Convert one set of sequence coordinates to another, e.g. convert coordinates of one assembly to another, cDNA to genomic, CDS to genomic, protein translation to genomic etc.
+
+
+ Sequence coordinate conversion
+
+
+
+
+
+
+
+
+ 1.3
+ Calculate similarity between 2 or more documents.
+
+
+ Document similarity calculation
+
+
+
+
+
+
+
+
+
+ 1.3
+ Cluster (group) documents on the basis of their calculated similarity.
+
+
+ Document clustering
+
+
+
+
+
+
+
+
+
+ 1.3
+ Recognise named entities, ontology concepts, tags, events, and dictionary terms within documents.
+ Concept mining
+ Entity chunking
+ Entity extraction
+ Entity identification
+ Event extraction
+ NER
+ Named-entity recognition
+
+
+ Named-entity and concept recognition
+
+
+
+
+
+
+
+
+
+
+
+ 1.3
+ Map data identifiers to one another for example to establish a link between two biological databases for the purposes of data integration.
+ Accession mapping
+ Identifier mapping
+
+
+ The mapping can be achieved by comparing identifier values or some other means, e.g. exact matches to a provided sequence.
+ ID mapping
+
+
+
+
+
+
+
+
+ 1.3
+ Process data in such a way that makes it hard to trace to the person which the data concerns.
+ Data anonymisation
+
+
+ Anonymisation
+
+
+
+
+
+
+
+
+ 1.3
+ (jison)Too fine-grained, the operation (Data retrieval) hasn't changed, just what is retrieved.
+ 1.17
+
+ Search for and retrieve a data identifier of some kind, e.g. a database entry accession.
+
+
+ ID retrieval
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.4
+ Generate a checksum of a molecular sequence.
+
+
+ Sequence checksum generation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.4
+ Construct a bibliography from the scientific literature.
+ Bibliography construction
+
+
+ Bibliography generation
+
+
+
+
+
+
+
+
+ 1.4
+ Predict the structure of a multi-subunit protein and particularly how the subunits fit together.
+
+
+ Protein quaternary structure prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.4
+ Analyse the surface properties of proteins or other macromolecules, including surface accessible pockets, interior inaccessible cavities etc.
+
+
+ Molecular surface analysis
+
+
+
+
+
+
+
+
+ 1.4
+ Compare two or more ontologies, e.g. identify differences.
+
+
+ Ontology comparison
+
+
+
+
+
+
+
+
+ 1.4
+ 1.9
+
+ Compare two or more ontologies, e.g. identify differences.
+
+
+ Ontology comparison
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.4
+ Recognition of which format the given data is in.
+ Format identification
+ Format inference
+ Format recognition
+
+
+ 'Format recognition' is not a bioinformatics-specific operation, but of great relevance in bioinformatics. Should be removed from EDAM if/when captured satisfactorily in a suitable domain-generic ontology.
+ Format detection
+
+
+
+
+
+ The has_input "Data" (data_0006) may cause visualisation or other problems although ontologically correct. But on the other hand it may be useful to distinguish from nullary operations without inputs.
+
+
+
+
+
+
+
+
+ 1.4
+ Split a file containing multiple data items into many files, each containing one item.
+ File splitting
+
+
+ Splitting
+
+
+
+
+
+
+
+
+ 1.6
+ true
+ Construct some data entity.
+ Construction
+
+
+ For non-analytical operations, see the 'Processing' branch.
+ Generation
+
+
+
+
+
+
+
+
+ 1.6
+ (jison)This is a distinction made on basis of input; all features exist can be mapped to a sequence so this isn't needed.
+ 1.17
+
+
+ Predict, recognise and identify functional or other key sites within nucleic acid sequences, typically by scanning for known motifs, patterns and regular expressions.
+
+
+ Nucleic acid sequence feature detection
+ true
+
+
+
+
+
+
+
+
+ 1.6
+ Deposit some data in a database or some other type of repository or software system.
+ Data deposition
+ Data submission
+ Database deposition
+ Database submission
+ Submission
+
+
+ For non-analytical operations, see the 'Processing' branch.
+ Deposition
+
+
+
+
+
+
+
+
+ 1.6
+ true
+ Group together some data entities on the basis of similarities such that entities in the same group (cluster) are more similar to each other than to those in other groups (clusters).
+
+
+ Clustering
+
+
+
+
+
+
+
+
+ 1.6
+ 1.19
+
+ Construct some entity (typically a molecule sequence) from component pieces.
+
+
+ Assembly
+ true
+
+
+
+
+
+
+
+
+ 1.6
+ true
+ Convert a data set from one form to another.
+
+
+ Conversion
+
+
+
+
+
+
+
+
+ 1.6
+ Standardize or normalize data by some statistical method.
+ Normalisation
+ Standardisation
+
+
+ In the simplest normalisation means adjusting values measured on different scales to a common scale (often between 0.0 and 1.0), but can refer to more sophisticated adjustment whereby entire probability distributions of adjusted values are brought into alignment. Standardisation typically refers to an operation whereby a range of values are standardised to measure how many standard deviations a value is from its mean.
+ Standardisation and normalisation
+
+
+
+
+
+
+
+
+ 1.6
+ Combine multiple files or data items into a single file or object.
+
+
+ Aggregation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.6
+ Compare two or more scientific articles.
+
+
+ Article comparison
+
+
+
+
+
+
+
+
+ 1.6
+ true
+ Mathematical determination of the value of something, typically a properly of a molecule.
+
+
+ Calculation
+
+
+
+
+
+
+
+
+ 1.6
+ Notions of pathway and network were mixed up, EDAM 1.24 disentangles them.
+ 1.24
+
+
+
+
+ Predict a molecular pathway or network.
+
+ Pathway or network prediction
+ true
+
+
+
+
+
+
+
+
+ 1.6
+ 1.12
+
+ The process of assembling many short DNA sequences together such that they represent the original chromosomes from which the DNA originated.
+
+
+ Genome assembly
+ true
+
+
+
+
+
+
+
+
+ 1.6
+ 1.19
+
+ Generate a graph, or other visual representation, of data, showing the relationship between two or more variables.
+
+
+ Plotting
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.7
+ Image processing
+ The analysis of a image (typically a digital image) of some type in order to extract information from it.
+
+
+ Image analysis
+
+
+
+
+
+
+
+
+
+ 1.7
+ Analysis of data from a diffraction experiment.
+
+
+ Diffraction data analysis
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.7
+ Analysis of cell migration images in order to study cell migration, typically in order to study the processes that play a role in the disease progression.
+
+
+ Cell migration analysis
+
+
+
+
+
+
+
+
+
+ 1.7
+ Processing of diffraction data into a corrected, ordered, and simplified form.
+
+
+ Diffraction data reduction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.7
+ Measurement of neurites; projections (axons or dendrites) from the cell body of a neuron, from analysis of neuron images.
+
+
+ Neurite measurement
+
+
+
+
+
+
+
+
+ 1.7
+ The evaluation of diffraction intensities and integration of diffraction maxima from a diffraction experiment.
+ Diffraction profile fitting
+ Diffraction summation integration
+
+
+ Diffraction data integration
+
+
+
+
+
+
+
+
+ 1.7
+ Phase a macromolecular crystal structure, for example by using molecular replacement or experimental phasing methods.
+
+
+ Phasing
+
+
+
+
+
+
+
+
+ 1.7
+ A technique used to construct an atomic model of an unknown structure from diffraction data, based upon an atomic model of a known structure, either a related protein or the same protein from a different crystal form.
+
+
+ The technique solves the phase problem, i.e. retrieve information concern phases of the structure.
+ Molecular replacement
+
+
+
+
+
+
+
+
+ 1.7
+ A method used to refine a structure by moving the whole molecule or parts of it as a rigid unit, rather than moving individual atoms.
+
+
+ Rigid body refinement usually follows molecular replacement in the assignment of a structure from diffraction data.
+ Rigid body refinement
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.7
+ An image processing technique that combines and analyze multiple images of a particulate sample, in order to produce an image with clearer features that are more easily interpreted.
+
+
+ Single particle analysis is used to improve the information that can be obtained by relatively low resolution techniques, , e.g. an image of a protein or virus from transmission electron microscopy (TEM).
+ Single particle analysis
+
+
+
+
+
+
+
+
+
+
+ 1.7
+ true
+ This is two related concepts.
+ Compare (align and classify) multiple particle images from a micrograph in order to produce a representative image of the particle.
+
+
+ A micrograph can include particles in multiple different orientations and/or conformations. Particles are compared and organised into sets based on their similarity. Typically iterations of classification and alignment and are performed to optimise the final 3D EM map.
+ Single particle alignment and classification
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.7
+ Clustering of molecular sequences on the basis of their function, typically using information from an ontology of gene function, or some other measure of functional phenotype.
+ Functional sequence clustering
+
+
+ Functional clustering
+
+
+
+
+
+
+
+
+ 1.7
+ Classifiication (typically of molecular sequences) by assignment to some taxonomic hierarchy.
+ Taxonomy assignment
+ Taxonomic profiling
+
+
+ Taxonomic classification
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.7
+ The prediction of the degree of pathogenicity of a microorganism from analysis of molecular sequences.
+ Pathogenicity prediction
+
+
+ Virulence prediction
+
+
+
+
+
+
+
+
+
+ 1.7
+ Analyse the correlation patterns among features/molecules across across a variety of experiments, samples etc.
+ Co-expression analysis
+ Gene co-expression network analysis
+ Gene expression correlation
+ Gene expression correlation analysis
+
+
+ Expression correlation analysis
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.7
+ true
+ Identify a correlation, i.e. a statistical relationship between two random variables or two sets of data.
+
+
+ Correlation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.7
+ Compute the covariance model for (a family of) RNA secondary structures.
+
+
+ RNA structure covariance model generation
+
+
+
+
+
+
+
+
+ 1.7
+ 1.18
+
+ Predict RNA secondary structure by analysis, e.g. probabilistic analysis, of the shape of RNA folds.
+
+
+ RNA secondary structure prediction (shape-based)
+ true
+
+
+
+
+
+
+
+
+ 1.7
+ 1.18
+
+ Prediction of nucleic-acid folding using sequence alignments as a source of data.
+
+
+ Nucleic acid folding prediction (alignment-based)
+ true
+
+
+
+
+
+
+
+
+ 1.7
+ Count k-mers (substrings of length k) in DNA sequence data.
+
+
+ k-mer counting is used in genome and transcriptome assembly, metagenomic sequencing, and for error correction of sequence reads.
+ k-mer counting
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.7
+ Reconstructing the inner node labels of a phylogenetic tree from its leafes.
+ Phylogenetic tree reconstruction
+ Gene tree reconstruction
+ Species tree reconstruction
+
+
+ Note that this is somewhat different from simply analysing an existing tree or constructing a completely new one.
+ Phylogenetic reconstruction
+
+
+
+
+
+
+
+
+ 1.7
+ Generate some data from a chosen probibalistic model, possibly to evaluate algorithms.
+
+
+ Probabilistic data generation
+
+
+
+
+
+
+
+
+
+ 1.7
+ Generate sequences from some probabilistic model, e.g. a model that simulates evolution.
+
+
+ Probabilistic sequence generation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.7
+ Identify or predict causes for antibiotic resistance from molecular sequence analysis.
+
+
+ Antimicrobial resistance prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.8
+ Analysis of a set of objects, such as genes, annotated with given categories, where eventual over-/under-representation of certain categories within the studied set of objects is revealed.
+ Enrichment
+ Over-representation analysis
+ Functional enrichment
+
+
+ Categories from a relevant ontology can be used. The input is typically a set of genes or other biological objects, possibly represented by their identifiers, and the output of the analysis is typically a ranked list of categories, each associated with a statistical metric of over-/under-representation within the studied data.
+ Enrichment analysis
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.8
+ Analyse a dataset with respect to concepts from an ontology of chemical structure, leveraging chemical similarity information.
+ Chemical class enrichment
+
+
+ Chemical similarity enrichment
+
+
+
+
+
+
+
+
+ 1.8
+ Plot an incident curve such as a survival curve, death curve, mortality curve.
+
+
+ Incident curve plotting
+
+
+
+
+
+
+
+
+ 1.8
+ Identify and map patterns of genomic variations.
+
+
+ Methods often utilise a database of aligned reads.
+ Variant pattern analysis
+
+
+
+
+
+
+
+
+ 1.8
+ 1.12
+
+ Model some biological system using mathematical techniques including dynamical systems, statistical models, differential equations, and game theoretic models.
+
+
+ Mathematical modelling
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.9
+ Visualise images resulting from various types of microscopy.
+
+
+ Microscope image visualisation
+
+
+
+
+
+
+
+
+ 1.9
+ Annotate an image of some sort, typically with terms from a controlled vocabulary.
+
+
+ Image annotation
+
+
+
+
+
+
+
+
+ 1.9
+ Replace missing data with substituted values, usually by using some statistical or other mathematical approach.
+ Data imputation
+
+
+ Imputation
+
+
+
+
+
+
+
+
+
+ 1.9
+ Visualise, format or render data from an ontology, typically a tree of terms.
+ Ontology browsing
+
+
+ Ontology visualisation
+
+
+
+
+
+
+
+
+ 1.9
+ A method for making numerical assessments about the maximum percent of time that a conformer of a flexible macromolecule can exist and still be compatible with the experimental data.
+
+
+ Maximum occurrence analysis
+
+
+
+
+
+
+
+
+
+ 1.9
+ Compare the models or schemas used by two or more databases, or any other general comparison of databases rather than a detailed comparison of the entries themselves.
+ Data model comparison
+ Schema comparison
+
+
+ Database comparison
+
+
+
+
+
+
+
+
+ 1.9
+ 1.24
+
+
+
+ Simulate the bevaviour of a biological pathway or network.
+
+ Notions of pathway and network were mixed up, EDAM 1.24 disentangles them.
+ Network simulation
+ true
+
+
+
+
+
+
+
+
+ 1.9
+ Analyze read counts from RNA-seq experiments.
+
+
+ RNA-seq read count analysis
+
+
+
+
+
+
+
+
+ 1.9
+ Identify and remove redundancy from a set of small molecule structures.
+
+
+ Chemical redundancy removal
+
+
+
+
+
+
+
+
+ 1.9
+ Analyze time series data from an RNA-seq experiment.
+
+
+ RNA-seq time series data analysis
+
+
+
+
+
+
+
+
+ 1.9
+ Simulate gene expression data, e.g. for purposes of benchmarking.
+
+
+ Simulated gene expression data generation
+
+
+
+
+
+
+
+
+ 1.12
+ Identify semantic relations among entities and concepts within a text, using text mining techniques.
+ Relation discovery
+ Relation inference
+ Relationship discovery
+ Relationship extraction
+ Relationship inference
+
+
+ Relation extraction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.12
+ Re-adjust the output of mass spectrometry experiments with shifted ppm values.
+
+
+ Mass spectra calibration
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.12
+ Align multiple data sets using information from chromatography and/or peptide identification, from mass spectrometry experiments.
+
+
+ Chromatographic alignment
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.12
+ The removal of isotope peaks in a spectrum, to represent the fragment ion as one data point.
+ Deconvolution
+
+
+ Deisotoping is commonly done to reduce complexity, and done in conjunction with the charge state deconvolution.
+ Deisotoping
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.12
+ Technique for determining the amount of proteins in a sample.
+ Protein quantitation
+
+
+ Protein quantification
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.12
+ Determination of peptide sequence from mass spectrum.
+ Peptide-spectrum-matching
+
+
+ Peptide identification
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.12
+ Calculate the isotope distribution of a given chemical species.
+
+
+ Isotopic distributions calculation
+
+
+
+
+
+
+
+
+ 1.12
+ Prediction of retention time in a mass spectrometry experiment based on compositional and structural properties of the separated species.
+ Retention time calculation
+
+
+ Retention time prediction
+
+
+
+
+
+
+
+
+ 1.12
+ Quantification without the use of chemical tags.
+
+
+ Label-free quantification
+
+
+
+
+
+
+
+
+ 1.12
+ Quantification based on the use of chemical tags.
+
+
+ Labeled quantification
+
+
+
+
+
+
+
+
+ 1.12
+ Quantification by Selected/multiple Reaction Monitoring workflow (XIC quantitation of precursor / fragment mass pair).
+
+
+ MRM/SRM
+
+
+
+
+
+
+
+
+ 1.12
+ Calculate number of identified MS2 spectra as approximation of peptide / protein quantity.
+
+
+ Spectral counting
+
+
+
+
+
+
+
+
+ 1.12
+ Quantification analysis using stable isotope labeling by amino acids in cell culture.
+
+
+ SILAC
+
+
+
+
+
+
+
+
+ 1.12
+ Quantification analysis using the AB SCIEX iTRAQ isobaric labelling workflow, wherein 2-8 reporter ions are measured in MS2 spectra near 114 m/z.
+
+
+ iTRAQ
+
+
+
+
+
+
+
+
+ 1.12
+ Quantification analysis using labeling based on 18O-enriched H2O.
+
+
+ 18O labeling
+
+
+
+
+
+
+
+
+ 1.12
+ Quantification analysis using the Thermo Fisher tandem mass tag labelling workflow.
+
+
+ TMT-tag
+
+
+
+
+
+
+
+
+ 1.12
+ Quantification analysis using chemical labeling by stable isotope dimethylation.
+
+
+ Stable isotope dimethyl labelling
+
+
+
+
+
+
+
+
+ 1.12
+ Peptide sequence tags are used as piece of information about a peptide obtained by tandem mass spectrometry.
+
+
+ Tag-based peptide identification
+
+
+
+
+
+
+
+
+
+ 1.12
+ Analytical process that derives a peptide's amino acid sequence from its tandem mass spectrum (MS/MS) without the assistance of a sequence database.
+
+
+ de Novo sequencing
+
+
+
+
+
+
+
+
+ 1.12
+ Identification of post-translational modifications (PTMs) of peptides/proteins in mass spectrum.
+
+
+ PTM identification
+
+
+
+
+
+
+
+
+
+ 1.12
+ Determination of best matches between MS/MS spectrum and a database of protein or nucleic acid sequences.
+
+
+ Peptide database search
+
+
+
+
+
+
+
+
+ 1.12
+ Peptide database search for identification of known and unknown PTMs looking for mass difference mismatches.
+ Modification-tolerant peptide database search
+ Unrestricted peptide database search
+
+
+ Blind peptide database search
+
+
+
+
+
+
+
+
+ 1.12
+ 1.19
+
+
+ Statistical estimation of false discovery rate from score distribution for peptide-spectrum-matches, following a peptide database search.
+
+
+ Validation of peptide-spectrum matches
+ true
+
+
+
+
+
+
+
+
+
+ 1.12
+ Validation of peptide-spectrum matches
+ Statistical estimation of false discovery rate from score distribution for peptide-spectrum-matches, following a peptide database search, and by comparison to search results with a database containing incorrect information.
+
+
+ Target-Decoy
+
+
+
+
+
+
+
+
+ 1.12
+ Analyse data in order to deduce properties of an underlying distribution or population.
+ Empirical Bayes
+
+
+ Statistical inference
+
+
+
+
+
+
+
+
+
+ 1.12
+ A statistical calculation to estimate the relationships among variables.
+ Regression
+
+
+ Regression analysis
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.12
+ Model a metabolic network. This can include 1) reconstruction to break down a metabolic pathways into reactions, enzymes, and other relevant information, and compilation of this into a mathematical model and 2) simulations of metabolism based on the model.
+
+
+ Metabolic network reconstruction
+ Metabolic network simulation
+ Metabolic pathway simulation
+ Metabolic reconstruction
+
+
+ The terms and synyonyms here reflect that for practical intents and purposes, "pathway" and "network" can be treated the same.
+ Metabolic network modelling
+
+
+
+
+
+
+
+
+
+ 1.12
+ Predict the effect or function of an individual single nucleotide polymorphism (SNP).
+
+
+ SNP annotation
+
+
+
+
+
+
+
+
+ 1.12
+ Prediction of genes or gene components from first principles, i.e. without reference to existing genes.
+ Gene prediction (ab-initio)
+
+
+ Ab-initio gene prediction
+
+
+
+
+
+
+
+
+
+ 1.12
+ Prediction of genes or gene components by reference to homologous genes.
+ Empirical gene finding
+ Empirical gene prediction
+ Evidence-based gene prediction
+ Gene prediction (homology-based)
+ Similarity-based gene prediction
+ Homology prediction
+ Orthology prediction
+
+
+ Homology-based gene prediction
+
+
+
+
+
+
+
+
+
+ 1.12
+ Construction of a statistical model, or a set of assumptions around some observed data, usually by describing a set of probability distributions which approximate the distribution of data.
+
+
+ Statistical modelling
+
+
+
+
+
+
+
+
+
+
+ 1.12
+ Compare two or more molecular surfaces.
+
+
+ Molecular surface comparison
+
+
+
+
+
+
+
+
+ 1.12
+ Annotate one or more sequences with functional information, such as cellular processes or metaobolic pathways, by reference to a controlled vocabulary - invariably the Gene Ontology (GO).
+ Sequence functional annotation
+
+
+ Gene functional annotation
+
+
+
+
+
+
+
+
+ 1.12
+ Variant filtering is used to eliminate false positive variants based for example on base calling quality, strand and position information, and mapping info.
+
+
+ Variant filtering
+
+
+
+
+
+
+
+
+ 1.12
+ Identify binding sites in nucleic acid sequences that are statistically significantly differentially bound between sample groups.
+
+
+ Differential binding analysis
+
+
+
+
+
+
+
+
+
+ 1.13
+ Analyze data from RNA-seq experiments.
+
+
+ RNA-Seq analysis
+
+
+
+
+
+
+
+
+ 1.13
+ Visualise, format or render a mass spectrum.
+
+
+ Mass spectrum visualisation
+
+
+
+
+
+
+
+
+ 1.13
+ Filter a set of files or data items according to some property.
+ Sequence filtering
+ rRNA filtering
+
+
+ Filtering
+
+
+
+
+
+
+
+
+ 1.14
+ Identification of the best reference for mapping for a specific dataset from a list of potential references, when performing genetic variation analysis.
+
+
+ Reference identification
+
+
+
+
+
+
+
+
+ 1.14
+ Label-free quantification by integration of ion current (ion counting).
+ Ion current integration
+
+
+ Ion counting
+
+
+
+
+
+
+
+
+ 1.14
+ Chemical tagging free amino groups of intact proteins with stable isotopes.
+ ICPL
+
+
+ Isotope-coded protein label
+
+
+
+
+
+
+
+
+ 1.14
+ Labeling all proteins and (possibly) all amino acids using C-13 or N-15 enriched grown medium or feed.
+ C-13 metabolic labeling
+ N-15 metabolic labeling
+
+
+ This includes N-15 metabolic labeling (labeling all proteins and (possibly) all amino acids using N-15 enriched grown medium or feed) and C-13 metabolic labeling (labeling all proteins and (possibly) all amino acids using C-13 enriched grown medium or feed).
+ Metabolic labeling
+
+
+
+
+
+
+
+
+ 1.15
+ Construction of a single sequence assembly of all reads from different samples, typically as part of a comparative metagenomic analysis.
+ Sequence assembly (cross-assembly)
+
+
+ Cross-assembly
+
+
+
+
+
+
+
+
+ 1.15
+ The comparison of samples from a metagenomics study, for example, by comparison of metagenome shotgun reads or assembled contig sequences, by comparison of functional profiles, or some other method.
+
+
+ Sample comparison
+
+
+
+
+
+
+
+
+
+ 1.15
+ Differential protein analysis
+ The analysis, using proteomics techniques, to identify proteins whose encoding genes are differentially expressed under a given experimental setup.
+ Differential protein expression analysis
+
+
+ Differential protein expression profiling
+
+
+
+
+
+
+
+
+ 1.15
+ 1.17
+
+ The analysis, using any of diverse techniques, to identify genes that are differentially expressed under a given experimental setup.
+
+
+ Differential gene expression analysis
+ true
+
+
+
+
+
+
+
+
+ 1.15
+ Visualise, format or render data arising from an analysis of multiple samples from a metagenomics/community experiment.
+
+
+ Multiple sample visualisation
+
+
+
+
+
+
+
+
+ 1.15
+ The extrapolation of empirical characteristics of individuals or populations, backwards in time, to their common ancestors.
+ Ancestral sequence reconstruction
+ Character mapping
+ Character optimisation
+
+
+ Ancestral reconstruction is often used to recover possible ancestral character states of ancient, extinct organisms.
+ Ancestral reconstruction
+
+
+
+
+
+
+
+
+ 1.16
+ Site localisation of post-translational modifications in peptide or protein mass spectra.
+ PTM scoring
+ Site localisation
+
+
+ PTM localisation
+
+
+
+
+
+
+
+
+ 1.16
+ Operations concerning the handling and use of other tools.
+ Endpoint management
+
+
+ Service management
+
+
+
+
+
+
+
+
+ 1.16
+ An operation supporting the browsing or discovery of other tools and services.
+
+
+ Service discovery
+
+
+
+
+
+
+
+
+ 1.16
+ An operation supporting the aggregation of other services (at least two) into a functional unit, for the automation of some task.
+
+
+ Service composition
+
+
+
+
+
+
+
+
+ 1.16
+ An operation supporting the calling (invocation) of other tools and services.
+
+
+ Service invocation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.16
+ A data mining method typically used for studying biological networks based on pairwise correlations between variables.
+ WGCNA
+ Weighted gene co-expression network analysis
+
+
+ Weighted correlation network analysis
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.16
+ Identification of protein, for example from one or more peptide identifications by tandem mass spectrometry.
+ Protein inference
+
+
+ Protein identification
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.16
+ Text annotation is the operation of adding notes, data and metadata, recognised entities and concepts, and their relations to a text (such as a scientific article).
+ Article annotation
+ Literature annotation
+
+
+ Text annotation
+
+
+
+
+
+
+
+
+
+ 1.17
+ A method whereby data on several variants are "collapsed" into a single covariate based on regions such as genes.
+
+
+ Genome-wide association studies (GWAS) analyse a genome-wide set of genetic variants in different individuals to see if any variant is associated with a trait. Traditional association techniques can lack the power to detect the significance of rare variants individually, or measure their compound effect (rare variant burden). "Collapsing methods" were developed to overcome these problems.
+ Collapsing methods
+
+
+
+
+
+
+
+
+ 1.17
+ miRNA analysis
+ The analysis of microRNAs (miRNAs) : short, highly conserved small noncoding RNA molecules that are naturally occurring plant and animal genomes.
+ miRNA expression profiling
+
+
+ miRNA expression analysis
+
+
+
+
+
+
+
+
+ 1.17
+ Counting and summarising the number of short sequence reads that map to genomic features.
+
+
+ Read summarisation
+
+
+
+
+
+
+
+
+ 1.17
+ A technique whereby molecules with desired properties and function are isolated from libraries of random molecules, through iterative cycles of selection, amplification, and mutagenesis.
+
+
+ In vitro selection
+
+
+
+
+
+
+
+
+ 1.17
+ The calculation of species richness for a number of individual samples, based on plots of the number of species as a function of the number of samples (rarefaction curves).
+ Species richness assessment
+
+
+ Rarefaction
+
+
+
+
+
+
+
+
+
+ 1.17
+ An operation which groups reads or contigs and assigns them to operational taxonomic units.
+ Binning
+ Binning shotgun reads
+
+
+ Binning methods use one or a combination of compositional features or sequence similarity.
+ Read binning
+
+
+
+
+
+
+
+
+
+ 1.17
+ true
+ Counting and measuring experimentally determined observations into quantities.
+ Quantitation
+
+
+ Quantification
+
+
+
+
+
+
+
+
+ 1.17
+ Quantification of data arising from RNA-Seq high-throughput sequencing, typically the quantification of transcript abundances durnig transcriptome analysis in a gene expression study.
+ RNA-Seq quantitation
+
+
+ RNA-Seq quantification
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.17
+ Match experimentally measured mass spectrum to a spectrum in a spectral library or database.
+
+
+ Spectral library search
+
+
+
+
+
+
+
+
+ 1.17
+ Sort a set of files or data items according to some property.
+
+
+ Sorting
+
+
+
+
+
+
+
+
+ 1.17
+ Mass spectra identification of compounds that are produced by living systems. Including polyketides, terpenoids, phenylpropanoids, alkaloids and antibiotics.
+ De novo metabolite identification
+ Fragmenation tree generation
+ Metabolite identification
+
+
+ Natural product identification
+
+
+
+
+
+
+
+
+ 1.19
+ Identify and assess specific genes or regulatory regions of interest that are differentially methylated.
+ Differentially-methylated region identification
+
+
+ DMR identification
+
+
+
+
+
+
+
+
+ 1.21
+
+
+ Genotyping of multiple loci, typically characterizing microbial species isolates using internal fragments of multiple housekeeping genes.
+ MLST
+
+
+ Multilocus sequence typing
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.21
+ Calculate a theoretical mass spectrometry spectra for given sequences.
+ Spectrum prediction
+
+
+ Spectrum calculation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.22
+ 3D visualization of a molecular trajectory.
+
+
+ Trajectory visualization
+
+
+
+
+
+
+
+
+
+ 1.22
+ Compute Essential Dynamics (ED) on a simulation trajectory: an analysis of molecule dynamics using PCA (Principal Component Analysis) applied to the atomic positional fluctuations.
+ ED
+ PCA
+ Principal modes
+
+
+ Principal Component Analysis (PCA) is a multivariate statistical analysis to obtain collective variables and reduce the dimensionality of the system.
+ Essential dynamics
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.22
+ Obtain force field parameters (charge, bonds, dihedrals, etc.) from a molecule, to be used in molecular simulations.
+ Ligand parameterization
+ Molecule parameterization
+
+
+ Forcefield parameterisation
+
+
+
+
+
+
+
+
+ 1.22
+ Analyse DNA sequences in order to determine an individual's DNA characteristics, for example in criminal forensics, parentage testing and so on.
+ DNA fingerprinting
+ DNA profiling
+
+
+
+
+
+
+
+
+
+ 1.22
+ Predict or detect active sites in proteins; the region of an enzyme which binds a substrate bind and catalyses a reaction.
+ Active site detection
+
+
+ Active site prediction
+
+
+
+
+
+
+
+
+
+ 1.22
+ Predict or detect ligand-binding sites in proteins; a region of a protein which reversibly binds a ligand for some biochemical purpose, such as transport or regulation of protein function.
+ Ligand-binding site detection
+ Peptide-protein binding prediction
+
+
+ Ligand-binding site prediction
+
+
+
+
+
+
+
+
+
+ 1.22
+ Predict or detect metal ion-binding sites in proteins.
+ Metal-binding site detection
+ Protein metal-binding site prediction
+
+
+ Metal-binding site prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.22
+ Model or simulate protein-protein binding using comparative modelling or other techniques.
+ Protein docking
+
+
+ Protein-protein docking
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.22
+ Predict DNA-binding proteins.
+ DNA-binding protein detection
+ DNA-protein interaction prediction
+ Protein-DNA interaction prediction
+
+
+ DNA-binding protein prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.22
+ Predict RNA-binding proteins.
+ Protein-RNA interaction prediction
+ RNA-binding protein detection
+ RNA-protein interaction prediction
+
+
+ RNA-binding protein prediction
+
+
+
+
+
+
+
+
+ 1.22
+ Predict or detect RNA-binding sites in protein sequences.
+ Protein-RNA binding site detection
+ Protein-RNA binding site prediction
+ RNA binding site detection
+
+
+ RNA binding site prediction
+
+
+
+
+
+
+
+
+ 1.22
+ Predict or detect DNA-binding sites in protein sequences.
+ Protein-DNA binding site detection
+ Protein-DNA binding site prediction
+ DNA binding site detection
+
+
+ DNA binding site prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.22
+ Identify or predict intrinsically disordered regions in proteins.
+
+
+ Protein disorder prediction
+
+
+
+
+
+
+
+
+
+ 1.22
+ Extract structured information from unstructured ("free") or semi-structured textual documents.
+ IE
+
+
+ Information extraction
+
+
+
+
+
+
+
+
+
+ 1.22
+ Retrieve resources from information systems matching a specific information need.
+
+
+ Information retrieval
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.24
+ Study of genomic feature structure, variation, function and evolution at a genomic scale.
+ Genomic analysis
+ Genome analysis
+
+
+
+
+
+
+
+
+ 1.24
+ The determination of cytosine methylation status of specific positions in a nucleic acid sequences (usually reads from a bisulfite sequencing experiment).
+
+
+ Methylation calling
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.24
+ The identification of changes in DNA sequence or chromosome structure, usually in the context of diagnostic tests for disease, or to study ancestry or phylogeny.
+ Genetic testing
+
+
+ This can include indirect methods which reveal the results of genetic changes, such as RNA analysis to indicate gene expression, or biochemical analysis to identify expressed proteins.
+ DNA testing
+
+
+
+
+
+
+
+
+
+ 1.24
+ The processing of reads from high-throughput sequencing machines.
+
+
+ Sequence read processing
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.24
+ Render (visualise) a network - typically a biological network of some sort.
+ Network rendering
+ Protein interaction network rendering
+ Protein interaction network visualisation
+ Network visualisation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.24
+ Render (visualise) a biological pathway.
+ Pathway rendering
+
+
+ Pathway visualisation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.24
+ Generate, process or analyse a biological network.
+ Biological network analysis
+ Biological network modelling
+ Biological network prediction
+ Network comparison
+ Network modelling
+ Network prediction
+ Network simulation
+ Network topology simulation
+
+
+ Network analysis
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.24
+ Generate, process or analyse a biological pathway.
+ Biological pathway analysis
+ Biological pathway modelling
+ Biological pathway prediction
+ Functional pathway analysis
+ Pathway comparison
+ Pathway modelling
+ Pathway prediction
+ Pathway simulation
+
+
+ Pathway analysis
+
+
+
+
+
+
+
+
+
+
+ 1.24
+ Predict a metabolic pathway.
+
+
+ Metabolic pathway prediction
+
+
+
+
+
+
+
+
+ 1.24
+ Assigning sequence reads to separate groups / files based on their index tag (sample origin).
+ Sequence demultiplexing
+
+
+ NGS sequence runs are often performed with multiple samples pooled together. In such cases, an index tag (or "barcode") - a unique sequence of between 6 and 12bp - is ligated to each sample's genetic material so that the sequence reads from different samples can be identified. The process of demultiplexing (dividing sequence reads into separate files for each index tag/sample) may be performed automatically by the sequencing hardware. Alternatively the reads may be lumped together in one file with barcodes still attached, requiring you to do the splitting using software. In such cases, a "mapping" file is used which indicates which barcodes correspond to which samples.
+ Demultiplexing
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.24
+ A process used in statistics, machine learning, and information theory that reduces the number of random variables by obtaining a set of principal variables.
+ Dimension reduction
+
+
+ Dimensionality reduction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.24
+ A dimensionality reduction process that selects a subset of relevant features (variables, predictors) for use in model construction.
+ Attribute selection
+ Variable selection
+ Variable subset selection
+
+
+ Feature selection
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.24
+ A dimensionality reduction process which builds (ideally) informative and non-redundant values (features) from an initial set of measured data, to aid subsequent generalization, learning or interpretation.
+ Feature projection
+
+
+ Feature extraction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.24
+ Virtual screening is used in drug discovery to identify potential drug compounds. It involves searching libraries of small molecules in order to identify those molecules which are most likely to bind to a drug target (typically a protein receptor or enzyme).
+ Ligand-based screening
+ Ligand-based virtual screening
+ Structure-based screening
+ Structured-based virtual screening
+ Virtual ligand screening
+
+
+ Virtual screening is widely used for lead identification, lead optimization, and scaffold hopping during drug design and discovery.
+ Virtual screening
+
+
+
+
+
+
+
+
+ 1.24
+ The application of phylogenetic and other methods to estimate paleogeographical events such as speciation.
+ Biogeographic dating
+ Speciation dating
+ Species tree dating
+ Tree-dating
+
+
+ Tree dating
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.24
+ The development and use of mathematical models and systems analysis for the description of ecological processes, and applications such as the sustainable management of resources.
+
+
+ Ecological modelling
+
+
+
+
+
+
+
+
+ 1.24
+ Mapping between gene tree nodes and species tree nodes or branches, to analyse and account for possible differences between gene histories and species histories, explaining this in terms of gene-scale events such as duplication, loss, transfer etc.
+ Gene tree / species tree reconciliation
+
+
+ Methods typically test for topological similarity between trees using for example a congruence index.
+ Phylogenetic tree reconciliation
+
+
+
+
+
+
+
+
+ 1.24
+ The detection of genetic selection, or (the end result of) the process by which certain traits become more prevalent in a species than other traits.
+
+
+ Selection detection
+
+
+
+
+
+
+
+
+ 1.25
+ A statistical procedure that uses an orthogonal transformation to convert a set of observations of possibly correlated variables into a set of values of linearly uncorrelated variables called principal components.
+
+
+ Principal component analysis
+
+
+
+
+
+
+
+
+
+ 1.25
+ Identify where sections of the genome are repeated and the number of repeats in the genome varies between individuals.
+ CNV detection
+
+
+ Copy number variation detection
+
+
+
+
+
+
+
+
+ 1.25
+ Identify deletion events causing the number of repeats in the genome to vary between individuals.
+
+
+ Deletion detection
+
+
+
+
+
+
+
+
+ 1.25
+ Identify duplication events causing the number of repeats in the genome to vary between individuals.
+
+
+ Duplication detection
+
+
+
+
+
+
+
+
+ 1.25
+ Identify copy number variations which are complex, e.g. multi-allelic variations that have many structural alleles and have rearranged multiple times in the ancestral genomes.
+
+
+ Complex CNV detection
+
+
+
+
+
+
+
+
+ 1.25
+ Identify amplification events causing the number of repeats in the genome to vary between individuals.
+
+
+ Amplification detection
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.25
+ Predict adhesins in protein sequences.
+
+
+ An adhesin is a cell-surface component that facilitate the adherence of a microorganism to a cell or surface. They are important virulence factors during establishment of infection and thus are targeted during vaccine development approaches that seek to block adhesin function and prevent adherence to host cell.
+ Adhesin prediction
+
+
+
+
+
+
+
+
+ 1.25
+ Design new protein molecules with specific structural or functional properties.
+ Protein redesign
+ Rational protein design
+ de novo protein design
+
+
+ Protein design
+
+
+
+
+
+
+
+
+
+ 1.25
+ The design of small molecules with specific biological activity, such as inhibitors or modulators for proteins that are of therapeutic interest. This can involve the modification of individual atoms, the addition or removal of molecular fragments, and the use reaction-based design to explore tractable synthesis options for the small molecule.
+ Drug design
+ Ligand-based drug design
+ Structure-based drug design
+ Structure-based small molecule design
+ Small molecule design can involve assessment of target druggability and flexibility, molecular docking, in silico fragment screening, molecular dynamics, and homology modeling.
+ There are two broad categories of small molecule design techniques when applied to the design of drugs: ligand-based drug design (e.g. ligand similarity) and structure-based drug design (ligand docking) methods. Ligand similarity methods exploit structural similarities to known active ligands, whereas ligand docking methods use the 3D structure of a target protein to predict the binding modes and affinities of ligands to it.
+ Small molecule design
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.26
+ The estimation of the power of a test; that is the probability of correctly rejecting the null hypothesis when it is false.
+ Estimation of statistical power
+ Power analysis
+
+
+ Power test
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.26
+ The prediction of DNA modifications (e.g. N4-methylcytosine and N6-Methyladenine) using, for example, statistical models.
+
+
+ DNA modification prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.26
+ The analysis and simulation of disease transmission using, for example, statistical methods such as the SIR-model.
+
+
+ Disease transmission analysis
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.26
+ The correction of p-values from multiple statistical tests to correct for false positives.
+ FDR estimation
+ False discovery rate estimation
+
+
+ Multiple testing correction
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ A category denoting a rather broad domain or field of interest, of study, application, work, data, or technology. Topics have no clearly defined borders between each other.
+ sumo:FieldOfStudy
+
+
+ Topic
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ The processing and analysis of nucleic acid sequence, structural and other data.
+ Nucleic acid bioinformatics
+ Nucleic acid informatics
+ Nucleic_acids
+ Nucleic acid physicochemistry
+ Nucleic acid properties
+
+
+ Nucleic acids
+
+ http://purl.bioontology.org/ontology/MSH/D017422
+ http://purl.bioontology.org/ontology/MSH/D017423
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Archival, processing and analysis of protein data, typically molecular sequence and structural data.
+ Protein bioinformatics
+ Protein informatics
+ Proteins
+ Protein databases
+
+
+ Proteins
+
+ http://purl.bioontology.org/ontology/MSH/D020539
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ The structures of reactants or products of metabolism, for example small molecules such as including vitamins, polyols, nucleotides and amino acids.
+
+
+ Metabolites
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ The archival, processing and analysis of molecular sequences (monomer composition of polymers) including molecular sequence data resources, sequence sites, alignments, motifs and profiles.
+ Sequence_analysis
+ Biological sequences
+ Sequence databases
+
+
+
+ Sequence analysis
+
+ http://purl.bioontology.org/ontology/MSH/D017421
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ The curation, processing, analysis and prediction of data about the structure of biological molecules, typically proteins and nucleic acids and other macromolecules.
+ Biomolecular structure
+ Structural bioinformatics
+ Structure_analysis
+ Computational structural biology
+ Molecular structure
+ Structure data resources
+ Structure databases
+ Structures
+
+
+
+ This includes related concepts such as structural properties, alignments and structural motifs.
+ Structure analysis
+
+ http://purl.bioontology.org/ontology/MSH/D015394
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ The prediction of molecular structure, including the prediction, modelling, recognition or design of protein secondary or tertiary structure or other structural features, and the folding of nucleic acid molecules and the prediction or design of nucleic acid (typically RNA) sequences with specific conformations.
+ Structure_prediction
+ DNA structure prediction
+ Nucleic acid design
+ Nucleic acid folding
+ Nucleic acid structure prediction
+ Protein fold recognition
+ Protein structure prediction
+ RNA structure prediction
+
+
+ This includes the recognition (prediction and assignment) of known protein structural domains or folds in protein sequence(s), for example by threading, or the alignment of molecular sequences to structures, structural (3D) profiles or templates (representing a structure or structure alignment).
+ Structure prediction
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+
+ The alignment (equivalence between sites) of molecular sequences, structures or profiles (representing a sequence or structure alignment).
+
+ Alignment
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ The study of evolutionary relationships amongst organisms.
+ Phylogeny
+ Phylogenetic clocks
+ Phylogenetic dating
+ Phylogenetic simulation
+ Phylogenetic stratigraphy
+ Phylogeny reconstruction
+
+
+
+ This includes diverse phylogenetic methods, including phylogenetic tree construction, typically from molecular sequence or morphological data, methods that simulate DNA sequence evolution, a phylogenetic tree or the underlying data, or which estimate or use molecular clock and stratigraphic (age) data, methods for studying gene evolution etc.
+ Phylogeny
+
+ http://purl.bioontology.org/ontology/MSH/D010802
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ The study of gene or protein functions and their interactions in totality in a given organism, tissue, cell etc.
+ Functional_genomics
+
+
+
+ Functional genomics
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ The conceptualisation, categorisation and nomenclature (naming) of entities or phenomena within biology or bioinformatics. This includes formal ontologies, controlled vocabularies, structured glossary, symbols and terminology or other related resource.
+ Ontology_and_terminology
+ Applied ontology
+ Ontologies
+ Ontology
+ Ontology relations
+ Terminology
+ Upper ontology
+
+
+
+ Ontology and terminology
+
+ http://purl.bioontology.org/ontology/MSH/D002965
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+
+
+ The search and query of data sources (typically databases or ontologies) in order to retrieve entries or other information.
+
+ Information retrieval
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ VT 1.5.6 Bioinformatics
+ The archival, curation, processing and analysis of complex biological data.
+ Bioinformatics
+
+
+
+ This includes data processing in general, including basic handling of files and databases, datatypes, workflows and annotation.
+ Bioinformatics
+
+ http://purl.bioontology.org/ontology/MSH/D016247
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Computer graphics
+ VT 1.2.5 Computer graphics
+ Rendering (drawing on a computer screen) or visualisation of molecular sequences, structures or other biomolecular data.
+ Data rendering
+ Data_visualisation
+
+
+ Data visualisation
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ The study of the thermodynamic properties of a nucleic acid.
+
+ Nucleic acid thermodynamics
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The archival, curation, processing and analysis of nucleic acid structural information, such as whole structures, structural features and alignments, and associated annotation.
+ Nucleic acid structure
+ Nucleic_acid_structure_analysis
+ DNA melting
+ DNA structure
+ Nucleic acid denaturation
+ Nucleic acid thermodynamics
+ RNA alignment
+ RNA structure
+ RNA structure alignment
+
+
+ Includes secondary and tertiary nucleic acid structural data, nucleic acid thermodynamic, thermal and conformational properties including DNA or DNA/RNA denaturation (melting) etc.
+ Nucleic acid structure analysis
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ RNA sequences and structures.
+ RNA
+ Small RNA
+
+
+ RNA
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Topic for the study of restriction enzymes, their cleavage sites and the restriction of nucleic acids.
+
+ Nucleic acid restriction
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ The mapping of complete (typically nucleotide) sequences. Mapping (in the sense of short read alignment, or more generally, just alignment) has application in RNA-Seq analysis (mapping of transcriptomics reads), variant discovery (e.g. mapping of exome capture), and re-sequencing (mapping of WGS reads).
+ Mapping
+ Genetic linkage
+ Linkage
+ Linkage mapping
+ Synteny
+
+
+ This includes resources that aim to identify, map or analyse genetic markers in DNA sequences, for example to produce a genetic (linkage) map of a chromosome or genome or to analyse genetic linkage and synteny. It also includes resources for physical (sequence) maps of a DNA sequence showing the physical distance (base pairs) between features or landmarks such as restriction sites, cloned DNA fragments, genes and other genetic markers. It also covers for example the alignment of sequences of (typically millions) of short reads to a reference genome.
+ Mapping
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ The study of codon usage in nucleotide sequence(s), genetic codes and so on.
+
+ Genetic codes and codon usage
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The translation of mRNA into protein and subsequent protein processing in the cell.
+ Protein_expression
+ Translation
+
+
+
+ Protein expression
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Methods that aims to identify, predict, model or analyse genes or gene structure in DNA sequences.
+
+ This includes the study of promoters, coding regions, splice sites, etc. Methods for gene prediction might be ab initio, based on phylogenetic comparisons, use motifs, sequence features, support vector machine, alignment etc.
+ Gene finding
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ The transcription of DNA into mRNA.
+
+ Transcription
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Promoters in DNA sequences (region of DNA that facilitates the transcription of a particular gene by binding RNA polymerase and transcription factor proteins).
+
+ Promoters
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+ The folding (in 3D space) of nucleic acid molecules.
+
+
+ Nucleic acid folding
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Gene structure, regions which make an RNA product and features such as promoters, coding regions, gene fusion, splice sites etc.
+ Gene features
+ Gene_structure
+ Fusion genes
+
+
+ This includes operons (operators, promoters and genes) from a bacterial genome. For example the operon leader and trailer gene, gene composition of the operon and associated information.
+ This includes the study of promoters, coding regions etc.
+ Gene structure
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Protein and peptide identification, especially in the study of whole proteomes of organisms.
+ Proteomics
+ Bottom-up proteomics
+ Discovery proteomics
+ MS-based targeted proteomics
+ MS-based untargeted proteomics
+ Metaproteomics
+ Peptide identification
+ Protein and peptide identification
+ Quantitative proteomics
+ Targeted proteomics
+ Top-down proteomics
+
+
+
+ Includes metaproteomics: proteomics analysis of an environmental sample.
+ Proteomics includes any methods (especially high-throughput) that separate, characterize and identify expressed proteins such as mass spectrometry, two-dimensional gel electrophoresis and protein microarrays, as well as in-silico methods that perform proteolytic or mass calculations on a protein sequence and other analyses of protein production data, for example in different cells or tissues.
+ Proteomics
+
+ http://purl.bioontology.org/ontology/MSH/D040901
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ The elucidation of the three dimensional structure for all (available) proteins in a given organism.
+ Structural_genomics
+
+
+
+ Structural genomics
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ The study of the physical and biochemical properties of peptides and proteins, for example the hydrophobic, hydrophilic and charge properties of a protein.
+ Protein physicochemistry
+ Protein_properties
+ Protein hydropathy
+
+
+ Protein properties
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Protein-protein, protein-DNA/RNA and protein-ligand interactions, including analysis of known interactions and prediction of putative interactions.
+ Protein_interactions
+ Protein interaction map
+ Protein interaction networks
+ Protein interactome
+ Protein-DNA interaction
+ Protein-DNA interactions
+ Protein-RNA interaction
+ Protein-RNA interactions
+ Protein-ligand interactions
+ Protein-nucleic acid interactions
+ Protein-protein interactions
+
+
+ This includes experimental (e.g. yeast two-hybrid) and computational analysis techniques.
+ Protein interactions
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Protein stability, folding (in 3D space) and protein sequence-structure-function relationships. This includes for example study of inter-atomic or inter-residue interactions in protein (3D) structures, the effect of mutation, and the design of proteins with specific properties, typically by designing changes (via site-directed mutagenesis) to an existing protein.
+ Protein_folding_stability_and_design
+ Protein design
+ Protein folding
+ Protein residue interactions
+ Protein stability
+ Rational protein design
+
+
+ Protein folding, stability and design
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Two-dimensional gel electrophoresis image and related data.
+
+ Two-dimensional gel electrophoresis
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ An analytical chemistry technique that measures the mass-to-charge ratio and abundance of ions in the gas phase.
+
+
+ Mass spectrometry
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Protein microarray data.
+
+ Protein microarrays
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ The study of the hydrophobic, hydrophilic and charge properties of a protein.
+
+ Protein hydropathy
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The study of how proteins are transported within and without the cell, including signal peptides, protein subcellular localisation and export.
+ Protein_targeting_and_localisation
+ Protein localisation
+ Protein sorting
+ Protein targeting
+
+
+ Protein targeting and localisation
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Enzyme or chemical cleavage sites and proteolytic or mass calculations on a protein sequence.
+
+ Protein cleavage sites and proteolysis
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+ The comparison of two or more protein structures.
+
+
+ Use this concept for methods that are exclusively for protein structure.
+ Protein structure comparison
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ The processing and analysis of inter-atomic or inter-residue interactions in protein (3D) structures.
+
+ Protein residue interactions
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Protein-protein interactions, individual interactions and networks, protein complexes, protein functional coupling etc.
+
+ Protein-protein interactions
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Protein-ligand (small molecule) interactions.
+
+ Protein-ligand interactions
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Protein-DNA/RNA interactions.
+
+ Protein-nucleic acid interactions
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ The design of proteins with specific properties, typically by designing changes (via site-directed mutagenesis) to an existing protein.
+
+ Protein design
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ G-protein coupled receptors (GPCRs).
+
+ G protein-coupled receptors (GPCR)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Carbohydrates, typically including structural information.
+ Carbohydrates
+
+
+ Carbohydrates
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Lipids and their structures.
+ Lipidomics
+ Lipids
+
+
+ Lipids
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Small molecules of biological significance, typically archival, curation, processing and analysis of structural information.
+ Small_molecules
+ Amino acids
+ Chemical structures
+ Drug structures
+ Drug targets
+ Drugs and target structures
+ Metabolite structures
+ Peptides
+ Peptides and amino acids
+ Target structures
+ Targets
+ Toxins
+ Toxins and targets
+ CHEBI:23367
+
+
+ Small molecules include organic molecules, metal-organic compounds, small polypeptides, small polysaccharides and oligonucleotides. Structural data is usually included.
+ This concept excludes macromolecules such as proteins and nucleic acids.
+ This includes the structures of drugs, drug target, their interactions and binding affinities. Also the structures of reactants or products of metabolism, for example small molecules such as including vitamins, polyols, nucleotides and amino acids. Also the physicochemical, biochemical or structural properties of amino acids or peptides. Also structural and associated data for toxic chemical substances.
+ Small molecules
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+
+ Edit, convert or otherwise change a molecular sequence, either randomly or specifically.
+
+ Sequence editing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ The archival, processing and analysis of the basic character composition of molecular sequences, for example character or word frequency, ambiguity, complexity, particularly regions of low complexity, and repeats or the repetitive nature of molecular sequences.
+ Sequence_composition_complexity_and_repeats
+ Low complexity sequences
+ Nucleic acid repeats
+ Protein repeats
+ Protein sequence repeats
+ Repeat sequences
+ Sequence complexity
+ Sequence composition
+ Sequence repeats
+
+
+ This includes repetitive elements within a nucleic acid sequence, e.g. long terminal repeats (LTRs); sequences (typically retroviral) directly repeated at both ends of a sequence and other types of repeating unit.
+ This includes short repetitive subsequences (repeat sequences) in a protein sequence.
+ Sequence composition, complexity and repeats
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Conserved patterns (motifs) in molecular sequences, that (typically) describe functional or other key sites.
+
+ Sequence motifs
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ The comparison of two or more molecular sequences, for example sequence alignment and clustering.
+
+
+ The comparison might be on the basis of sequence, physico-chemical or some other properties of the sequences.
+ Sequence comparison
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ The archival, detection, prediction and analysis of positional features such as functional and other key sites, in molecular sequences and the conserved patterns (motifs, profiles etc.) that may be used to describe them.
+ Sequence_sites_features_and_motifs
+ Functional sites
+ HMMs
+ Sequence features
+ Sequence motifs
+ Sequence profiles
+ Sequence sites
+
+
+ Sequence sites, features and motifs
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Search and retrieve molecular sequences that are similar to a sequence-based query (typically a simple sequence).
+
+ The query is a sequence-based entity such as another sequence, a motif or profile.
+ Sequence database search
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.7
+
+ The comparison and grouping together of molecular sequences on the basis of their similarities.
+
+
+ This includes systems that generate, process and analyse sequence clusters.
+ Sequence clustering
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Structural features or common 3D motifs within protein structures, including the surface of a protein structure, such as biological interfaces with other molecules.
+ Protein 3D motifs
+ Protein_structural_motifs_and_surfaces
+ Protein structural features
+ Protein structural motifs
+ Protein surfaces
+ Structural motifs
+
+
+ This includes conformation of conserved substructures, conserved geometry (spatial arrangement) of secondary structure or protein backbone, solvent-exposed surfaces, internal cavities, the analysis of shape, hydropathy, electrostatic patches, role and functions etc.
+ Protein structural motifs and surfaces
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ The processing, analysis or use of some type of structural (3D) profile or template; a computational entity (typically a numerical matrix) that is derived from and represents a structure or structure alignment.
+
+ Structural (3D) profiles
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ The prediction, modelling, recognition or design of protein secondary or tertiary structure or other structural features.
+
+
+ Protein structure prediction
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ The folding of nucleic acid molecules and the prediction or design of nucleic acid (typically RNA) sequences with specific conformations.
+
+
+ Nucleic acid structure prediction
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.7
+
+ The prediction of three-dimensional structure of a (typically protein) sequence from first principles, using a physics-based or empirical scoring function and without using explicit structural templates.
+
+
+ Ab initio structure prediction
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ The modelling of the three-dimensional structure of a protein using known sequence and structural data.
+
+ Homology modelling
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Molecular flexibility
+ Molecular motions
+ The study and simulation of molecular (typically protein) conformation using a computational model of physical forces and computer simulation.
+ Molecular_dynamics
+ Protein dynamics
+
+
+ This includes methods such as Molecular Dynamics, Coarse-grained dynamics, metadynamics, Quantum Mechanics, QM/MM, Markov State Models, etc. This includes resources concerning flexibility and motion in protein and other molecular structures.
+ Molecular dynamics
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ 1.12
+
+ The modelling the structure of proteins in complex with small molecules or other macromolecules.
+
+
+ Molecular docking
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+ The prediction of secondary or supersecondary structure of protein sequences.
+
+
+ Protein secondary structure prediction
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+ The prediction of tertiary structure of protein sequences.
+
+
+ Protein tertiary structure prediction
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ The recognition (prediction and assignment) of known protein structural domains or folds in protein sequence(s).
+
+
+ Protein fold recognition
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.7
+
+ The alignment of molecular sequences or sequence profiles (representing sequence alignments).
+
+
+ This includes the generation of alignments (the identification of equivalent sites), the analysis of alignments, editing, visualisation, alignment databases, the alignment (equivalence between sites) of sequence profiles (representing sequence alignments) and so on.
+ Sequence alignment
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.7
+
+ The superimposition of molecular tertiary structures or structural (3D) profiles (representing a structure or structure alignment).
+
+
+ This includes the generation, storage, analysis, rendering etc. of structure alignments.
+ Structure alignment
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+ The alignment of molecular sequences to structures, structural (3D) profiles or templates (representing a structure or structure alignment).
+
+
+ Threading
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Sequence profiles; typically a positional, numerical matrix representing a sequence alignment.
+
+ Sequence profiles include position-specific scoring matrix (position weight matrix), hidden Markov models etc.
+ Sequence profiles and HMMs
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ The reconstruction of a phylogeny (evolutionary relatedness amongst organisms), for example, by building a phylogenetic tree.
+
+ Currently too specific for the topic sub-ontology (but might be unobsoleted).
+ Phylogeny reconstruction
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ The integrated study of evolutionary relationships and whole genome data, for example, in the analysis of species trees, horizontal gene transfer and evolutionary reconstruction.
+ Phylogenomics
+
+
+
+ Phylogenomics
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Simulated polymerase chain reaction (PCR).
+
+ Virtual PCR
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ The assembly of fragments of a DNA sequence to reconstruct the original sequence.
+ Sequence_assembly
+ Assembly
+
+
+ Assembly has two broad types, de-novo and re-sequencing. Re-sequencing is a specialised case of assembly, where an assembled (typically de-novo assembled) reference genome is available and is about 95% identical to the re-sequenced genome. All other cases of assembly are 'de-novo'.
+ Sequence assembly
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Stable, naturally occurring mutations in a nucleotide sequence including alleles, naturally occurring mutations such as single base nucleotide substitutions, deletions and insertions, RFLPs and other polymorphisms.
+ DNA variation
+ Genetic_variation
+ Genomic variation
+ Mutation
+ Polymorphism
+ Somatic mutations
+
+
+ Genetic variation
+
+ http://purl.bioontology.org/ontology/MSH/D014644
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Microarrays, for example, to process microarray data or design probes and experiments.
+
+ Microarrays
+ http://purl.bioontology.org/ontology/MSH/D046228
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ VT 3.1.7 Pharmacology and pharmacy
+ The study of drugs and their effects or responses in living systems.
+ Pharmacology
+ Computational pharmacology
+ Pharmacoinformatics
+
+
+
+ Pharmacology
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ http://edamontology.org/topic_0197
+ The analysis of levels and patterns of synthesis of gene products (proteins and functional RNA) including interpretation in functional terms of gene expression data.
+ Expression
+ Gene_expression
+ Codon usage
+ DNA chips
+ DNA microarrays
+ Gene expression profiling
+ Gene transcription
+ Gene translation
+ Transcription
+
+
+
+ Gene expression levels are analysed by identifying, quantifying or comparing mRNA transcripts, for example using microarrays, RNA-seq, northern blots, gene-indexed expression profiles etc.
+ This includes the study of codon usage in nucleotide sequence(s), genetic codes and so on.
+ Gene expression
+
+ http://purl.bioontology.org/ontology/MSH/D015870
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ The regulation of gene expression.
+ Regulatory genomics
+
+
+ Gene regulation
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ The influence of genotype on drug response, for example by correlating gene expression or single-nucleotide polymorphisms with drug efficacy or toxicity.
+ Pharmacogenomics
+ Pharmacogenetics
+
+
+
+ Pharmacogenomics
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ VT 3.1.4 Medicinal chemistry
+ The design and chemical synthesis of bioactive molecules, for example drugs or potential drug compounds, for medicinal purposes.
+ Drug design
+ Medicinal_chemistry
+
+
+
+ This includes methods that search compound collections, generate or analyse drug 3D conformations, identify drug targets with structural docking etc.
+ Medicinal chemistry
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Information on a specific fish genome including molecular sequences, genes and annotation.
+
+ Fish
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Information on a specific fly genome including molecular sequences, genes and annotation.
+
+ Flies
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ (jison)Out of EDAM scope. While very useful to have a basic set of IDs for organisms, should find a better way to provide this e.g. in bio.tools (NCBI taxon ID subset).
+ 1.17
+
+
+ Information on a specific mouse or rat genome including molecular sequences, genes and annotation.
+
+ The resource may be specific to a group of mice / rats or all mice / rats.
+ Mice or rats
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Information on a specific worm genome including molecular sequences, genes and annotation.
+
+ Worms
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+ The processing and analysis of the bioinformatics literature and bibliographic data, such as literature search and query.
+
+
+ Literature analysis
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The processing and analysis of natural language, such as scientific literature in English, in order to extract data and information, or to enable human-computer interaction.
+ NLP
+ Natural_language_processing
+ BioNLP
+ Literature mining
+ Text analytics
+ Text data mining
+ Text mining
+
+
+
+ Natural language processing
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Deposition and curation of database accessions, including annotation, typically with terms from a controlled vocabulary.
+ Data_submission_annotation_and_curation
+ Data curation
+ Data provenance
+ Database curation
+
+
+
+ Data submission, annotation, and curation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ The management and manipulation of digital documents, including database records, files and reports.
+
+
+ Document, record and content management
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Annotation of a molecular sequence.
+
+ Sequence annotation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+
+
+ Annotation of a genome.
+
+ Genome annotation
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Spectroscopy
+ An analytical technique that exploits the magenetic properties of certain atomic nuclei to provide information on the structure, dynamics, reaction state and chemical environment of molecules.
+ NMR spectroscopy
+ Nuclear magnetic resonance spectroscopy
+ NMR
+ HOESY
+ Heteronuclear Overhauser Effect Spectroscopy
+ NOESY
+ Nuclear Overhauser Effect Spectroscopy
+ ROESY
+ Rotational Frame Nuclear Overhauser Effect Spectroscopy
+
+
+
+ NMR
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ The classification of molecular sequences based on some measure of their similarity.
+
+
+ Methods including sequence motifs, profile and other diagnostic elements which (typically) represent conserved patterns (of residues or properties) in molecular sequences.
+ Sequence classification
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ primarily the classification of proteins (from sequence or structural data) into clusters, groups, families etc.
+
+ Protein classification
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Sequence motifs, or sequence profiles derived from an alignment of molecular sequences of a particular type.
+
+ This includes comparison, discovery, recognition etc. of sequence motifs.
+ Sequence motif or profile
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Protein chemical modifications, e.g. post-translational modifications.
+ PTMs
+ Post-translational modifications
+ Protein post-translational modification
+ Protein_modifications
+ Post-translation modifications
+ Protein chemical modifications
+ Protein post-translational modifications
+ GO:0006464
+ MOD:00000
+
+
+ EDAM does not describe all possible protein modifications. For fine-grained annotation of protein modification use the Gene Ontology (children of concept GO:0006464) and/or the Protein Modifications ontology (children of concept MOD:00000)
+ Protein modifications
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ http://edamontology.org/topic_3076
+ Molecular interactions, biological pathways, networks and other models.
+ Molecular_interactions_pathways_and_networks
+ Biological models
+ Biological networks
+ Biological pathways
+ Cellular process pathways
+ Disease pathways
+ Environmental information processing pathways
+ Gene regulatory networks
+ Genetic information processing pathways
+ Interactions
+ Interactome
+ Metabolic pathways
+ Molecular interactions
+ Networks
+ Pathways
+ Signal transduction pathways
+ Signaling pathways
+
+
+
+ Molecular interactions, pathways and networks
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ VT 1.3 Information sciences
+ VT 1.3.3 Information retrieval
+ VT 1.3.4 Information management
+ VT 1.3.5 Knowledge management
+ VT 1.3.99 Other
+ The study and practice of information processing and use of computer information systems.
+ Information management
+ Information science
+ Knowledge management
+ Informatics
+
+
+ Informatics
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+ Data resources for the biological or biomedical literature, either a primary source of literature or some derivative.
+
+
+ Literature data resources
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Laboratory management and resources, for example, catalogues of biological resources for use in the lab including cell lines, viruses, plasmids, phages, DNA probes and primers and so on.
+ Laboratory_Information_management
+ Laboratory resources
+
+
+
+ Laboratory information management
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ General cell culture or data on a specific cell lines.
+
+ Cell and tissue culture
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ VT 1.5.15 Ecology
+ The ecological and environmental sciences and especially the application of information technology (ecoinformatics).
+ Ecology
+ Computational ecology
+ Ecoinformatics
+ Ecological informatics
+ Ecosystem science
+
+
+
+ Ecology
+
+ http://purl.bioontology.org/ontology/MSH/D004777
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Electron diffraction experiment
+ The study of matter by studying the interference pattern from firing electrons at a sample, to analyse structures at resolutions higher than can be achieved using light.
+ Electron_microscopy
+ Electron crystallography
+ SEM
+ Scanning electron microscopy
+ Single particle electron microscopy
+ TEM
+ Transmission electron microscopy
+
+
+
+ Electron microscopy
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ The cell cycle including key genes and proteins.
+
+ Cell cycle
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ The physicochemical, biochemical or structural properties of amino acids or peptides.
+
+
+ Peptides and amino acids
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ A specific organelle, or organelles in general, typically the genes and proteins (or genome and proteome).
+
+ Organelles
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Ribosomes, typically of ribosome-related genes and proteins.
+
+ Ribosomes
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ A database about scents.
+
+ Scents
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ The structures of drugs, drug target, their interactions and binding affinities.
+
+
+ Drugs and target structures
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ A specific organism, or group of organisms, used to study a particular aspect of biology.
+ Organisms
+ Model_organisms
+
+
+
+ This may include information on the genome (including molecular sequences and map, genes and annotation), proteome, as well as more general information about an organism.
+ Model organisms
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Whole genomes of one or more organisms, or genomes in general, such as meta-information on genomes, genome projects, gene names etc.
+ Genomics
+ Exomes
+ Genome annotation
+ Genomes
+ Personal genomics
+ Synthetic genomics
+ Viral genomics
+ Whole genomes
+
+
+
+ Genomics
+
+ http://purl.bioontology.org/ontology/MSH/D023281
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Particular gene(s), gene family or other gene group or system and their encoded proteins.Primarily the classification of proteins (from sequence or structural data) into clusters, groups, families etc., curation of a particular protein or protein family, or any other proteins that have been classified as members of a common group.
+ Genes, gene family or system
+ Gene_and protein_families
+ Gene families
+ Gene family
+ Gene system
+ Protein families
+ Protein sequence classification
+
+
+
+ A protein families database might include the classifier (e.g. a sequence profile) used to build the classification.
+ Gene and protein families
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Study of chromosomes.
+
+
+ Chromosomes
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ The study of genetic constitution of a living entity, such as an individual, and organism, a cell and so on, typically with respect to a particular observable phenotypic traits, or resources concerning such traits, which might be an aspect of biochemistry, physiology, morphology, anatomy, development and so on.
+ Genotype and phenotype resources
+ Genotype-phenotype
+ Genotype-phenotype analysis
+ Genotype_and_phenotype
+ Genotype
+ Genotyping
+ Phenotype
+ Phenotyping
+
+
+
+ Genotype and phenotype
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Gene expression e.g. microarray data, northern blots, gene-indexed expression profiles etc.
+
+ Gene expression and microarray
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Molecular probes (e.g. a peptide probe or DNA microarray probe) or PCR primers and hybridisation oligos in a nucleic acid sequence.
+ Probes_and_primers
+ Primer quality
+ Primers
+ Probes
+
+
+ This includes the design of primers for PCR and DNA amplification or the design of molecular probes.
+ Probes and primers
+ http://purl.bioontology.org/ontology/MSH/D015335
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ VT 3.1.6 Pathology
+ Diseases, including diseases in general and the genes, gene variations and proteins involved in one or more specific diseases.
+ Disease
+ Pathology
+
+
+
+ Pathology
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ A particular protein, protein family or other group of proteins.
+
+ Specific protein resources
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ VT 1.5.25 Taxonomy
+ Organism classification, identification and naming.
+ Taxonomy
+
+
+ Taxonomy
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Archival, processing and analysis of protein sequences and sequence-based entities such as alignments, motifs and profiles.
+
+
+ Protein sequence analysis
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ The archival, processing and analysis of nucleotide sequences and and sequence-based entities such as alignments, motifs and profiles.
+
+
+ Nucleic acid sequence analysis
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ The repetitive nature of molecular sequences.
+
+ Repeat sequences
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ The (character) complexity of molecular sequences, particularly regions of low complexity.
+
+ Low complexity sequences
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ A specific proteome including protein sequences and annotation.
+
+ Proteome
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ DNA sequences and structure, including processes such as methylation and replication.
+ DNA analysis
+ DNA
+ Ancient DNA
+ Chromosomes
+
+
+ The DNA sequences might be coding or non-coding sequences.
+ DNA
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Protein-coding regions including coding sequences (CDS), exons, translation initiation sites and open reading frames.
+
+
+ Coding RNA
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Non-coding or functional RNA sequences, including regulatory RNA sequences, ribosomal RNA (rRNA) and transfer RNA (tRNA).
+ Functional_regulatory_and_non-coding_RNA
+ Functional RNA
+ Long ncRNA
+ Long non-coding RNA
+ Non-coding RNA
+ Regulatory RNA
+ Small and long non-coding RNAs
+ Small interfering RNA
+ Small ncRNA
+ Small non-coding RNA
+ Small nuclear RNA
+ Small nucleolar RNA
+ lncRNA
+ miRNA
+ microRNA
+ ncRNA
+ piRNA
+ piwi-interacting RNA
+ siRNA
+ snRNA
+ snoRNA
+
+
+ Non-coding RNA includes piwi-interacting RNA (piRNA), small nuclear RNA (snRNA) and small nucleolar RNA (snoRNA). Regulatory RNA includes microRNA (miRNA) - short single stranded RNA molecules that regulate gene expression, and small interfering RNA (siRNA).
+ Functional, regulatory and non-coding RNA
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ One or more ribosomal RNA (rRNA) sequences.
+
+ rRNA
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ One or more transfer RNA (tRNA) sequences.
+
+ tRNA
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Protein secondary structure or secondary structure alignments.
+
+
+ This includes assignment, analysis, comparison, prediction, rendering etc. of secondary structure data.
+ Protein secondary structure
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ RNA secondary or tertiary structure and alignments.
+
+ RNA structure
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Protein tertiary structures.
+
+
+ Protein tertiary structure
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Classification of nucleic acid sequences and structures.
+
+ Nucleic acid classification
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.14
+
+ Primarily the classification of proteins (from sequence or structural data) into clusters, groups, families etc., curation of a particular protein or protein family, or any other proteins that have been classified as members of a common group.
+
+
+ Protein families
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Protein tertiary structural domains and folds in a protein or polypeptide chain.
+ Protein_folds_and_structural_domains
+ Intramembrane regions
+ Protein domains
+ Protein folds
+ Protein membrane regions
+ Protein structural domains
+ Protein topological domains
+ Protein transmembrane regions
+ Transmembrane regions
+
+
+ This includes topological domains such as cytoplasmic regions in a protein.
+ This includes trans- or intra-membrane regions of a protein, typically describing physicochemical properties of the secondary structure elements. For example, the location and size of the membrane spanning segments and intervening loop regions, transmembrane region IN/OUT orientation relative to the membrane, plus the following data for each amino acid: A Z-coordinate (the distance to the membrane center), the free energy of membrane insertion (calculated in a sliding window over the sequence) and a reliability score. The z-coordinate implies information about re-entrant helices, interfacial helices, the tilt of a transmembrane helix and loop lengths.
+ Protein folds and structural domains
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+ Nucleotide sequence alignments.
+
+
+ Nucleic acid sequence alignment
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Protein sequence alignments.
+
+ A sequence profile typically represents a sequence alignment.
+ Protein sequence alignment
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+
+ The archival, detection, prediction and analysis ofpositional features such as functional sites in nucleotide sequences.
+
+ Nucleic acid sites and features
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+
+ The detection, identification and analysis of positional features in proteins, such as functional sites.
+
+ Protein sites and features
+ true
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Proteins that bind to DNA and control transcription of DNA to mRNA (transcription factors) and also transcriptional regulatory sites, elements and regions (such as promoters, enhancers, silencers and boundary elements / insulators) in nucleotide sequences.
+ Transcription_factors_and_regulatory_sites
+ -10 signals
+ -35 signals
+ Attenuators
+ CAAT signals
+ CAT box
+ CCAAT box
+ CpG islands
+ Enhancers
+ GC signals
+ Isochores
+ Promoters
+ TATA signals
+ TFBS
+ Terminators
+ Transcription factor binding sites
+ Transcription factors
+ Transcriptional regulatory sites
+
+
+ This includes CpG rich regions (isochores) in a nucleotide sequence.
+ This includes promoters, CAAT signals, TATA signals, -35 signals, -10 signals, GC signals, primer binding sites for initiation of transcription or reverse transcription, enhancer, attenuator, terminators and ribosome binding sites.
+ Transcription factor proteins either promote (as an activator) or block (as a repressor) the binding to DNA of RNA polymerase. Regulatory sites including transcription factor binding site as well as promoters, enhancers, silencers and boundary elements / insulators.
+ Transcription factors and regulatory sites
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.0
+
+
+
+ Protein phosphorylation and phosphorylation sites in protein sequences.
+
+ Phosphorylation sites
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Metabolic pathways.
+
+
+ Metabolic pathways
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Signaling pathways.
+
+
+ Signaling pathways
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Protein and peptide identification.
+
+ Protein and peptide identification
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Biological or biomedical analytical workflows or pipelines.
+ Pipelines
+ Workflows
+ Software integration
+ Tool integration
+ Tool interoperability
+
+
+ Workflows
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.0
+
+
+ Structuring data into basic types and (computational) objects.
+
+ Data types and objects
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Theoretical biology.
+
+ Theoretical biology
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Mitochondria, typically of mitochondrial genes and proteins.
+
+ Mitochondria
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ VT 1.5.10 Botany
+ VT 1.5.22 Plant science
+ Plants, e.g. information on a specific plant genome including molecular sequences, genes and annotation.
+ Botany
+ Plant
+ Plant science
+ Plants
+ Plant_biology
+ Plant anatomy
+ Plant cell biology
+ Plant ecology
+ Plant genetics
+ Plant physiology
+
+
+ The resource may be specific to a plant, a group of plants or all plants.
+ Plant biology
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ VT 1.5.28
+ Study of viruses, e.g. sequence and structural data, interactions of viral proteins, or a viral genome including molecular sequences, genes and annotation.
+ Virology
+
+
+ Virology
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ (jison)Out of EDAM scope. While very useful to have a basic set of IDs for organisms, should find a better way to provide this e.g. in bio.tools (NCBI taxon ID subset).
+ 1.17
+
+
+ Fungi and molds, e.g. information on a specific fungal genome including molecular sequences, genes and annotation.
+
+ The resource may be specific to a fungus, a group of fungi or all fungi.
+ Fungi
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ (jison)Out of EDAM scope. While very useful to have a basic set of IDs for organisms, should find a better way to provide this e.g. in bio.tools (NCBI taxon ID subset). Definition is wrong anyway.
+ 1.17
+
+
+ Pathogens, e.g. information on a specific vertebrate genome including molecular sequences, genes and annotation.
+
+ The resource may be specific to a pathogen, a group of pathogens or all pathogens.
+ Pathogens
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Arabidopsis-specific data.
+
+ Arabidopsis
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Rice-specific data.
+
+ Rice
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Informatics resources that aim to identify, map or analyse genetic markers in DNA sequences, for example to produce a genetic (linkage) map of a chromosome or genome or to analyse genetic linkage and synteny.
+
+ Genetic mapping and linkage
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ The study (typically comparison) of the sequence, structure or function of multiple genomes.
+ Comparative_genomics
+
+
+
+ Comparative genomics
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Mobile genetic elements, such as transposons, Plasmids, Bacteriophage elements and Group II introns.
+ Mobile_genetic_elements
+ Transposons
+
+
+ Mobile genetic elements
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Human diseases, typically describing the genes, mutations and proteins implicated in disease.
+
+ Human disease
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ VT 3.1.3 Immunology
+ The application of information technology to immunology such as immunological processes, immunological genes, proteins and peptide ligands, antigens and so on.
+ Immunology
+
+
+
+ Immunology
+
+ http://purl.bioontology.org/ontology/MSH/D007120
+ http://purl.bioontology.org/ontology/MSH/D007125
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Lipoproteins (protein-lipid assemblies), and proteins or region of a protein that spans or are associated with a membrane.
+ Membrane_and_lipoproteins
+ Lipoproteins
+ Membrane proteins
+ Transmembrane proteins
+
+
+ Membrane and lipoproteins
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Proteins that catalyze chemical reaction, the kinetics of enzyme-catalysed reactions, enzyme nomenclature etc.
+ Enzymology
+ Enzymes
+
+
+ Enzymes
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ PCR primers and hybridisation oligos in a nucleic acid sequence.
+
+
+ Primers
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Regions or sites in a eukaryotic and eukaryotic viral RNA sequence which directs endonuclease cleavage or polyadenylation of an RNA transcript.
+
+
+ PolyA signal or sites
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ CpG rich regions (isochores) in a nucleotide sequence.
+
+
+ CpG island and isochores
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Restriction enzyme recognition sites (restriction sites) in a nucleic acid sequence.
+
+
+ Restriction sites
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+
+
+ Splice sites in a nucleotide sequence or alternative RNA splicing events.
+
+ Splice sites
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Matrix/scaffold attachment regions (MARs/SARs) in a DNA sequence.
+
+
+ Matrix/scaffold attachment sites
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Operons (operators, promoters and genes) from a bacterial genome.
+
+
+ Operon
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Whole promoters or promoter elements (transcription start sites, RNA polymerase binding site, transcription factor binding sites, promoter enhancers etc) in a DNA sequence.
+
+
+ Promoters
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ VT 1.5.24 Structural biology
+ The molecular structure of biological molecules, particularly macromolecules such as proteins and nucleic acids.
+ Structural_biology
+ Structural assignment
+ Structural determination
+ Structure determination
+
+
+
+ This includes experimental methods for biomolecular structure determination, such as X-ray crystallography, nuclear magnetic resonance (NMR), circular dichroism (CD) spectroscopy, microscopy etc., including the assignment or modelling of molecular structure from such data.
+ Structural biology
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Trans- or intra-membrane regions of a protein, typically describing physicochemical properties of the secondary structure elements.
+
+
+ Protein membrane regions
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ The comparison of two or more molecular structures, for example structure alignment and clustering.
+
+
+ This might involve comparison of secondary or tertiary (3D) structural information.
+ Structure comparison
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ The study of gene and protein function including the prediction of functional properties of a protein.
+ Functional analysis
+ Function_analysis
+ Protein function analysis
+ Protein function prediction
+
+
+
+ Function analysis
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ (jison)Out of EDAM scope. While very useful to have a basic set of IDs for organisms, should find a better way to provide this e.g. in bio.tools (NCBI taxon ID subset).
+ 1.17
+
+
+ Specific bacteria or archaea, e.g. information on a specific prokaryote genome including molecular sequences, genes and annotation.
+
+ The resource may be specific to a prokaryote, a group of prokaryotes or all prokaryotes.
+ Prokaryotes and Archaea
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Protein data resources.
+
+ Protein databases
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Experimental methods for biomolecular structure determination, such as X-ray crystallography, nuclear magnetic resonance (NMR), circular dichroism (CD) spectroscopy, microscopy etc., including the assignment or modelling of molecular structure from such data.
+
+ Structure determination
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ VT 1.5.11 Cell biology
+ Cells, such as key genes and proteins involved in the cell cycle.
+ Cell_biology
+ Cells
+ Cellular processes
+ Protein subcellular localization
+
+
+ Cell biology
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Topic focused on identifying, grouping, or naming things in a structured way according to some schema based on observable relationships.
+
+ Classification
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Lipoproteins (protein-lipid assemblies).
+
+ Lipoproteins
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Visualise a phylogeny, for example, render a phylogenetic tree.
+
+ Phylogeny visualisation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ The application of information technology to chemistry in biological research environment.
+ Chemical informatics
+ Chemoinformatics
+ Cheminformatics
+
+
+
+ Cheminformatics
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ The holistic modelling and analysis of complex biological systems and the interactions therein.
+ Systems_biology
+ Biological modelling
+ Biological system modelling
+ Systems modelling
+
+
+
+ This includes databases of models and methods to construct or analyse a model.
+ Systems biology
+
+ http://purl.bioontology.org/ontology/MSH/D049490
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The application of statistical methods to biological problems.
+ Statistics_and_probability
+ Bayesian methods
+ Biostatistics
+ Descriptive statistics
+ Gaussian processes
+ Inferential statistics
+ Markov processes
+ Multivariate statistics
+ Probabilistic graphical model
+ Probability
+ Statistics
+
+
+
+ Statistics and probability
+
+
+
+ http://purl.bioontology.org/ontology/MSH/D056808
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Search for and retrieve molecular structures that are similar to a structure-based query (typically another structure or part of a structure).
+
+ The query is a structure-based entity such as another structure, a 3D (structural) motif, 3D profile or template.
+ Structure database search
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ The construction, analysis, evaluation, refinement etc. of models of a molecules properties or behaviour, including the modelling the structure of proteins in complex with small molecules or other macromolecules (docking).
+ Molecular_modelling
+ Comparative modelling
+ Docking
+ Homology modeling
+ Homology modelling
+ Molecular docking
+
+
+ Molecular modelling
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.2
+
+
+ The prediction of functional properties of a protein.
+
+ Protein function prediction
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Single nucleotide polymorphisms (SNP) and associated data, for example, the discovery and annotation of SNPs.
+
+
+ SNP
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+
+ Predict transmembrane domains and topology in protein sequences.
+
+ Transmembrane protein prediction
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+
+ The comparison two or more nucleic acid (typically RNA) secondary or tertiary structures.
+
+ Use this concept for methods that are exclusively for nucleic acid structures.
+ Nucleic acid structure comparison
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Exons in a nucleotide sequences.
+
+
+ Exons
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Transcription of DNA into RNA including the regulation of transcription.
+
+
+ Gene transcription
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ DNA mutation.
+ DNA_mutation
+
+
+ DNA mutation
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ VT 3.2.16 Oncology
+ The study of cancer, for example, genes and proteins implicated in cancer.
+ Cancer biology
+ Oncology
+ Cancer
+ Neoplasm
+ Neoplasms
+
+
+
+ Oncology
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Structural and associated data for toxic chemical substances.
+
+
+ Toxins and targets
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Introns in a nucleotide sequences.
+
+
+ Introns
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+ A topic concerning primarily bioinformatics software tools, typically the broad function or purpose of a tool.
+
+
+ Tool topic
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+ A general area of bioinformatics study, typically the broad scope or category of content of a bioinformatics journal or conference proceeding.
+
+
+ Study topic
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Biological nomenclature (naming), symbols and terminology.
+
+ Nomenclature
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ The genes, gene variations and proteins involved in one or more specific diseases.
+
+ Disease genes and proteins
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ http://edamontology.org/topic_3040
+ Protein secondary or tertiary structural data and/or associated annotation.
+ Protein structure
+ Protein_structure_analysis
+ Protein tertiary structure
+
+
+
+ Protein structure analysis
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The study of human beings in general, including the human genome and proteome.
+ Humans
+ Human_biology
+
+
+ Human biology
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Informatics resource (typically a database) primarily focused on genes.
+
+ Gene resources
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Yeast, e.g. information on a specific yeast genome including molecular sequences, genes and annotation.
+
+ Yeast
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ (jison) Out of EDAM scope. While very useful to have a basic set of IDs for organisms, should find a better way to provide this e.g. in bio.tools (NCBI taxon ID subset).
+ 1.17
+
+
+ Eukaryotes or data concerning eukaryotes, e.g. information on a specific eukaryote genome including molecular sequences, genes and annotation.
+
+ The resource may be specific to a eukaryote, a group of eukaryotes or all eukaryotes.
+ Eukaryotes
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ (jison)Out of EDAM scope. While very useful to have a basic set of IDs for organisms, should find a better way to provide this e.g. in bio.tools (NCBI taxon ID subset).
+ 1.17
+
+
+ Invertebrates, e.g. information on a specific invertebrate genome including molecular sequences, genes and annotation.
+
+ The resource may be specific to an invertebrate, a group of invertebrates or all invertebrates.
+ Invertebrates
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ (jison)Out of EDAM scope. While very useful to have a basic set of IDs for organisms, should find a better way to provide this e.g. in bio.tools (NCBI taxon ID subset).
+ 1.17
+
+
+ Vertebrates, e.g. information on a specific vertebrate genome including molecular sequences, genes and annotation.
+
+ The resource may be specific to a vertebrate, a group of vertebrates or all vertebrates.
+ Vertebrates
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ (jison)Out of EDAM scope. While very useful to have a basic set of IDs for organisms, should find a better way to provide this e.g. in bio.tools (NCBI taxon ID subset).
+ 1.17
+
+
+ Unicellular eukaryotes, e.g. information on a unicellular eukaryote genome including molecular sequences, genes and annotation.
+
+ The resource may be specific to a unicellular eukaryote, a group of unicellular eukaryotes or all unicellular eukaryotes.
+ Unicellular eukaryotes
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Protein secondary or tertiary structure alignments.
+
+ Protein structure alignment
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The study of matter and their structure by means of the diffraction of X-rays, typically the diffraction pattern caused by the regularly spaced atoms of a crystalline sample.
+ Crystallography
+ X-ray_diffraction
+ X-ray crystallography
+ X-ray microscopy
+
+
+
+ X-ray diffraction
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Conceptualisation, categorisation and naming of entities or phenomena within biology or bioinformatics.
+
+ Ontologies, nomenclature and classification
+ http://purl.bioontology.org/ontology/MSH/D002965
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Immunity-related proteins and their ligands.
+ Immunoproteins_and_antigens
+ Antigens
+ Immunopeptides
+ Immunoproteins
+ Therapeutic antibodies
+
+
+
+ This includes T cell receptors (TR), major histocompatibility complex (MHC), immunoglobulin superfamily (IgSF) / antibodies, major histocompatibility complex superfamily (MhcSF), etc."
+ Immunoproteins and antigens
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Specific molecules, including large molecules built from repeating subunits (macromolecules) and small molecules of biological significance.
+ CHEBI:23367
+
+ Molecules
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ VT 3.1.9 Toxicology
+ Toxins and the adverse effects of these chemical substances on living organisms.
+ Toxicology
+ Computational toxicology
+ Toxicoinformatics
+
+
+
+ Toxicology
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Parallelised sequencing processes that are capable of sequencing many thousands of sequences simultaneously.
+
+ High-throughput sequencing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Gene regulatory networks.
+
+
+ Gene regulatory networks
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Informatics resources dedicated to one or more specific diseases (not diseases in general).
+
+ Disease (specific)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Variable number of tandem repeat (VNTR) polymorphism in a DNA sequence.
+
+
+ VNTR
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+
+ Microsatellite polymorphism in a DNA sequence.
+
+
+ Microsatellites
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+
+ Restriction fragment length polymorphisms (RFLP) in a DNA sequence.
+
+
+ RFLP
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ DNA polymorphism.
+ DNA_polymorphism
+ Microsatellites
+ RFLP
+ SNP
+ Single nucleotide polymorphism
+ VNTR
+ Variable number of tandem repeat polymorphism
+ snps
+
+
+ Includes microsatellite polymorphism in a DNA sequence. A microsatellite polymorphism is a very short subsequence that is repeated a variable number of times between individuals. These repeats consist of the nucleotides cytosine and adenosine.
+ Includes restriction fragment length polymorphisms (RFLP) in a DNA sequence. An RFLP is defined by the presence or absence of a specific restriction site of a bacterial restriction enzyme.
+ Includes single nucleotide polymorphisms (SNP) and associated data, for example, the discovery and annotation of SNPs. A SNP is a DNA sequence variation where a single nucleotide differs between members of a species or paired chromosomes in an individual.
+ Includes variable number of tandem repeat (VNTR) polymorphism in a DNA sequence. VNTRs occur in non-coding regions of DNA and consists sub-sequence that is repeated a multiple (and varied) number of times.
+ DNA polymorphism
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Topic for the design of nucleic acid sequences with specific conformations.
+
+ Nucleic acid design
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.3
+
+
+ The design of primers for PCR and DNA amplification or the design of molecular probes.
+
+ Primer or probe design
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.2
+
+
+ Molecular secondary or tertiary (3D) structural data resources, typically of proteins and nucleic acids.
+
+ Structure databases
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.2
+
+
+ Nucleic acid (secondary or tertiary) structure, such as whole structures, structural features and associated annotation.
+
+ Nucleic acid structure
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.3
+
+
+ Molecular sequence data resources, including sequence sites, alignments, motifs and profiles.
+
+ Sequence databases
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.3
+
+
+ Nucleotide sequences and associated concepts such as sequence sites, alignments, motifs and profiles.
+
+ Nucleic acid sequences
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.3
+
+ Protein sequences and associated concepts such as sequence sites, alignments, motifs and profiles.
+
+
+ Protein sequences
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.3
+
+
+ Protein interaction networks.
+
+ Protein interaction networks
+ true
+
+
+
+
+
+
+
+
+ beta13
+ true
+ VT 1.5.4 Biochemistry and molecular biology
+ The molecular basis of biological activity, particularly the macromolecules (e.g. proteins and nucleic acids) that are essential to life.
+ Molecular_biology
+ Biological processes
+
+
+
+ Molecular biology
+
+
+
+
+
+
+
+
+
+ beta13
+ 1.3
+
+
+ Mammals, e.g. information on a specific mammal genome including molecular sequences, genes and annotation.
+
+ Mammals
+ true
+
+
+
+
+
+
+
+
+ beta13
+ true
+ VT 1.5.5 Biodiversity conservation
+ The degree of variation of life forms within a given ecosystem, biome or an entire planet.
+ Biodiversity
+
+
+
+ Biodiversity
+
+ http://purl.bioontology.org/ontology/MSH/D044822
+
+
+
+
+
+
+
+
+ beta13
+ 1.3
+
+
+ The comparison, grouping together and classification of macromolecules on the basis of sequence similarity.
+
+ This includes the results of sequence clustering, ortholog identification, assignment to families, annotation etc.
+ Sequence clusters and classification
+ true
+
+
+
+
+
+
+
+
+ beta13
+ true
+ The study of genes, genetic variation and heredity in living organisms.
+ Genetics
+ Genes
+ Heredity
+
+
+
+ Genetics
+
+ http://purl.bioontology.org/ontology/MSH/D005823
+
+
+
+
+
+
+
+
+ beta13
+ true
+ The genes and genetic mechanisms such as Mendelian inheritance that underly continuous phenotypic traits (such as height or weight).
+ Quantitative_genetics
+
+
+ Quantitative genetics
+
+
+
+
+
+
+
+
+
+ beta13
+ true
+ The distribution of allele frequencies in a population of organisms and its change subject to evolutionary processes including natural selection, genetic drift, mutation and gene flow.
+ Population_genetics
+
+
+
+ Population genetics
+
+
+
+
+
+
+
+
+
+ beta13
+ 1.3
+
+ Regulatory RNA sequences including microRNA (miRNA) and small interfering RNA (siRNA).
+
+
+ Regulatory RNA
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.13
+
+ The documentation of resources such as tools, services and databases and how to get help.
+
+
+ Documentation and help
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.3
+
+
+ The structural and functional organisation of genes and other genetic elements.
+
+ Genetic organisation
+ true
+
+
+
+
+
+
+
+
+ beta13
+ true
+ The application of information technology to health, disease and biomedicine.
+ Biomedical informatics
+ Clinical informatics
+ Health and disease
+ Health informatics
+ Healthcare informatics
+ Medical_informatics
+
+
+
+ Medical informatics
+
+
+
+
+
+
+
+
+
+ beta13
+ true
+ VT 1.5.14 Developmental biology
+ How organisms grow and develop.
+ Developmental_biology
+ Development
+
+
+
+ Developmental biology
+
+
+
+
+
+
+
+
+
+ beta13
+ true
+ The development of organisms between the one-cell stage (typically the zygote) and the end of the embryonic stage.
+ Embryology
+
+
+
+ Embryology
+
+
+
+
+
+
+
+
+
+ beta13
+ true
+ VT 3.1.1 Anatomy and morphology
+ The form and function of the structures of living organisms.
+ Anatomy
+
+
+
+ Anatomy
+
+
+
+
+
+
+
+
+
+ beta13
+ true
+ The scientific literature, language processing, reference information, and documentation.
+ Language
+ Literature
+ Literature_and_language
+ Bibliography
+ Citations
+ Documentation
+ References
+ Scientific literature
+
+
+
+ This includes the documentation of resources such as tools, services and databases, user support, how to get help etc.
+ Literature and language
+ http://purl.bioontology.org/ontology/MSH/D011642
+
+
+
+
+
+
+
+
+ beta13
+ true
+ VT 1.5 Biological sciences
+ VT 1.5.1 Aerobiology
+ VT 1.5.13 Cryobiology
+ VT 1.5.23 Reproductive biology
+ VT 1.5.3 Behavioural biology
+ VT 1.5.7 Biological rhythm
+ VT 1.5.8 Biology
+ VT 1.5.99 Other
+ The study of life and living organisms, including their morphology, biochemistry, physiology, development, evolution, and so on.
+ Biological science
+ Biology
+ Aerobiology
+ Behavioural biology
+ Biological rhythms
+ Chronobiology
+ Cryobiology
+ Reproductive biology
+
+
+
+ Biology
+
+
+
+
+
+
+
+
+
+ beta13
+ true
+ Data stewardship
+ VT 1.3.1 Data management
+ Data management comprises the practices and principles of taking care of data, other than analysing them. This includes for example taking care of the associated metadata, formatting, storage, archiving, or access.
+ Metadata management
+
+
+
+ Data management
+
+
+ http://purl.bioontology.org/ontology/MSH/D000079803
+
+
+
+
+
+
+
+
+ beta13
+ 1.3
+
+
+ The detection of the positional features, such as functional and other key sites, in molecular sequences.
+
+ Sequence feature detection
+ http://purl.bioontology.org/ontology/MSH/D058977
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.3
+
+
+ The detection of positional features such as functional sites in nucleotide sequences.
+
+ Nucleic acid feature detection
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.3
+
+
+ The detection, identification and analysis of positional protein sequence features, such as functional sites.
+
+ Protein feature detection
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.2
+
+
+ Topic for modelling biological systems in mathematical terms.
+
+ Biological system modelling
+ true
+
+
+
+
+
+
+
+
+ beta13
+ The acquisition of data, typically measurements of physical systems using any type of sampling system, or by another other means.
+ Data collection
+
+
+ Data acquisition
+
+
+
+
+
+
+
+
+
+ beta13
+ 1.3
+
+
+ Specific genes and/or their encoded proteins or a family or other grouping of related genes and proteins.
+
+ Genes and proteins resources
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.13
+
+ Topological domains such as cytoplasmic regions in a protein.
+
+
+ Protein topological domains
+ true
+
+
+
+
+
+
+
+
+ beta13
+ true
+
+ Protein sequence variants produced e.g. from alternative splicing, alternative promoter usage, alternative initiation and ribosomal frameshifting.
+ Protein_variants
+
+
+ Protein variants
+
+
+
+
+
+
+
+
+
+ beta13
+ 1.12
+
+
+ Regions within a nucleic acid sequence containing a signal that alters a biological function.
+
+ Expression signals
+ true
+
+
+
+
+
+
+
+
+
+ beta13
+
+ Nucleic acids binding to some other molecule.
+ DNA_binding_sites
+ Matrix-attachment region
+ Matrix/scaffold attachment region
+ Nucleosome exclusion sequences
+ Restriction sites
+ Ribosome binding sites
+ Scaffold-attachment region
+
+
+ This includes ribosome binding sites (Shine-Dalgarno sequence in prokaryotes), restriction enzyme recognition sites (restriction sites) etc.
+ This includes sites involved with DNA replication and recombination. This includes binding sites for initiation of replication (origin of replication), regions where transfer is initiated during the conjugation or mobilisation (origin of transfer), starting sites for DNA duplication (origin of replication) and regions which are eliminated through any of kind of recombination. Also nucleosome exclusion regions, i.e. specific patterns or regions which exclude nucleosomes (the basic structural units of eukaryotic chromatin which play a significant role in regulating gene expression).
+ DNA binding sites
+
+
+
+
+
+
+
+
+
+ beta13
+ 1.13
+
+ Repetitive elements within a nucleic acid sequence.
+
+
+ This includes long terminal repeats (LTRs); sequences (typically retroviral) directly repeated at both ends of a defined sequence and other types of repeating unit.
+ Nucleic acid repeats
+ true
+
+
+
+
+
+
+
+
+ beta13
+ true
+ DNA replication or recombination.
+ DNA_replication_and_recombination
+
+
+ DNA replication and recombination
+
+
+
+
+
+
+
+
+
+
+ beta13
+ 1.13
+
+ Coding sequences for a signal or transit peptide.
+
+
+ Signal or transit peptide
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.13
+
+ Sequence tagged sites (STS) in nucleic acid sequences.
+
+
+ Sequence tagged sites
+ true
+
+
+
+
+
+
+
+
+ 1.1
+ true
+ The determination of complete (typically nucleotide) sequences, including those of genomes (full genome sequencing, de novo sequencing and resequencing), amplicons and transcriptomes.
+ DNA-Seq
+ Sequencing
+ Chromosome walking
+ Clone verification
+ DNase-Seq
+ High throughput sequencing
+ High-throughput sequencing
+ NGS
+ NGS data analysis
+ Next gen sequencing
+ Next generation sequencing
+ Panels
+ Primer walking
+ Sanger sequencing
+ Targeted next-generation sequencing panels
+
+
+
+ Sequencing
+
+ http://purl.bioontology.org/ontology/MSH/D059014
+
+
+
+
+
+
+
+
+
+ 1.1
+ The analysis of protein-DNA interactions where chromatin immunoprecipitation (ChIP) is used in combination with massively parallel DNA sequencing to identify the binding sites of DNA-associated proteins.
+ ChIP-sequencing
+ Chip Seq
+ Chip sequencing
+ Chip-sequencing
+ ChIP-seq
+ ChIP-exo
+
+
+ ChIP-seq
+
+
+
+
+
+
+
+
+
+ 1.1
+ A topic concerning high-throughput sequencing of cDNA to measure the RNA content (transcriptome) of a sample, for example, to investigate how different alleles of a gene are expressed, detect post-transcriptional mutations or identify gene fusions.
+ RNA sequencing
+ RNA-Seq analysis
+ Small RNA sequencing
+ Small RNA-Seq
+ Small-Seq
+ Transcriptome profiling
+ WTSS
+ Whole transcriptome shotgun sequencing
+ RNA-Seq
+ MicroRNA sequencing
+ miRNA-seq
+
+
+ This includes small RNA profiling (small RNA-Seq), for example to find novel small RNAs, characterize mutations and analyze expression of small RNAs.
+ RNA-Seq
+
+
+
+
+
+
+
+
+
+
+ 1.1
+ 1.3
+
+ DNA methylation including bisulfite sequencing, methylation sites and analysis, for example of patterns and profiles of DNA methylation in a population, tissue etc.
+
+
+ DNA methylation
+ true
+
+
+
+
+
+
+
+
+ 1.1
+ true
+ The systematic study of metabolites, the chemical processes they are involved, and the chemical fingerprints of specific cellular processes in a whole cell, tissue, organ or organism.
+ Metabolomics
+ Exometabolomics
+ LC-MS-based metabolomics
+ MS-based metabolomics
+ MS-based targeted metabolomics
+ MS-based untargeted metabolomics
+ Mass spectrometry-based metabolomics
+ Metabolites
+ Metabolome
+ Metabonomics
+ NMR-based metabolomics
+
+
+
+ Metabolomics
+
+ http://purl.bioontology.org/ontology/MSH/D055432
+
+
+
+
+
+
+
+
+
+ 1.1
+ true
+ The study of the epigenetic modifications of a whole cell, tissue, organism etc.
+ Epigenomics
+
+
+
+ Epigenetics concerns the heritable changes in gene expression owing to mechanisms other than DNA sequence variation.
+ Epigenomics
+
+ http://purl.bioontology.org/ontology/MSH/D057890
+
+
+
+
+
+
+
+
+
+ 1.1
+ true
+ Environmental DNA (eDNA)
+ Environmental sequencing
+ Biome sequencing
+ Community genomics
+ Ecogenomics
+ Environmental genomics
+ Environmental omics
+ The study of genetic material recovered from environmental samples, and associated environmental data.
+ Metagenomics
+ Shotgun metagenomics
+
+
+
+ Metagenomics
+
+
+
+
+
+
+
+
+
+
+
+ 1.1
+ Variation in chromosome structure including microscopic and submicroscopic types of variation such as deletions, duplications, copy-number variants, insertions, inversions and translocations.
+ DNA structural variation
+ Genomic structural variation
+ DNA_structural_variation
+ Deletion
+ Duplication
+ Insertion
+ Inversion
+ Translocation
+
+
+ Structural variation
+
+
+
+
+
+
+
+
+
+ 1.1
+ DNA-histone complexes (chromatin), organisation of chromatin into nucleosomes and packaging into higher-order structures.
+ DNA_packaging
+ Nucleosome positioning
+
+
+ DNA packaging
+
+ http://purl.bioontology.org/ontology/MSH/D042003
+
+
+
+
+
+
+
+
+ 1.1
+ 1.3
+
+
+ A topic concerning high-throughput sequencing of randomly fragmented genomic DNA, for example, to investigate whole-genome sequencing and resequencing, SNP discovery, identification of copy number variations and chromosomal rearrangements.
+
+ DNA-Seq
+ true
+
+
+
+
+
+
+
+
+ 1.1
+ 1.3
+
+
+ The alignment of sequences of (typically millions) of short reads to a reference genome. This is a specialised topic within sequence alignment, especially because of complications arising from RNA splicing.
+
+ RNA-Seq alignment
+ true
+
+
+
+
+
+
+
+
+ 1.1
+ Experimental techniques that combine chromatin immunoprecipitation ('ChIP') with microarray ('chip'). ChIP-on-chip is used for high-throughput study protein-DNA interactions.
+ ChIP-chip
+ ChIP-on-chip
+ ChiP
+
+
+ ChIP-on-chip
+
+
+
+
+
+
+
+
+
+ 1.3
+ The protection of data, such as patient health data, from damage or unwanted access from unauthorised users.
+ Data privacy
+ Data_security
+
+
+ Data security
+
+
+
+
+
+
+
+
+
+ 1.3
+ Biological samples and specimens.
+ Specimen collections
+ Sample_collections
+ biosamples
+ samples
+
+
+
+ Sample collections
+
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ VT 1.5.4 Biochemistry and molecular biology
+ Chemical substances and physico-chemical processes and that occur within living organisms.
+ Biological chemistry
+ Biochemistry
+ Glycomics
+ Pathobiochemistry
+ Phytochemistry
+
+
+
+ Biochemistry
+
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ The study of evolutionary relationships amongst organisms from analysis of genetic information (typically gene or protein sequences).
+ Phylogenetics
+
+
+ Phylogenetics
+
+ http://purl.bioontology.org/ontology/MSH/D010802
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ Topic concerning the study of heritable changes, for example in gene expression or phenotype, caused by mechanisms other than changes in the DNA sequence.
+ Epigenetics
+ DNA methylation
+ Histone modification
+ Methylation profiles
+
+
+
+ This includes sub-topics such as histone modification and DNA methylation (methylation sites and analysis, for example of patterns and profiles of DNA methylation in a population, tissue etc.)
+ Epigenetics
+
+ http://purl.bioontology.org/ontology/MSH/D019175
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ The exploitation of biological process, structure and function for industrial purposes, for example the genetic manipulation of microorganisms for the antibody production.
+ Biotechnology
+ Applied microbiology
+
+
+
+ Biotechnology
+
+
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ Phenomes, or the study of the change in phenotype (the physical and biochemical traits of organisms) in response to genetic and environmental factors.
+ Phenomics
+
+
+
+ Phenomics
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ VT 1.5.16 Evolutionary biology
+ The evolutionary processes, from the genetic to environmental scale, that produced life in all its diversity.
+ Evolution
+ Evolutionary_biology
+
+
+
+ Evolutionary biology
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ VT 3.1.8 Physiology
+ The functions of living organisms and their constituent parts.
+ Physiology
+ Electrophysiology
+
+
+
+ Physiology
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ VT 1.5.20 Microbiology
+ The biology of microorganisms.
+ Microbiology
+ Antimicrobial stewardship
+ Medical microbiology
+ Microbial genetics
+ Microbial physiology
+ Microbial surveillance
+ Microbiological surveillance
+ Molecular infection biology
+ Molecular microbiology
+
+
+
+ Microbiology
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ The biology of parasites.
+ Parasitology
+
+
+
+ Parasitology
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ VT 3.1 Basic medicine
+ VT 3.2 Clinical medicine
+ VT 3.2.9 General and internal medicine
+ Research in support of healing by diagnosis, treatment, and prevention of disease.
+ Biomedical research
+ Clinical medicine
+ Experimental medicine
+ Medicine
+ General medicine
+ Internal medicine
+
+
+
+ Medicine
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ Neuroscience
+ VT 3.1.5 Neuroscience
+ The study of the nervous system and brain; its anatomy, physiology and function.
+ Neurobiology
+ Molecular neuroscience
+ Neurophysiology
+ Systemetic neuroscience
+
+
+
+ Neurobiology
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ VT 3.3.1 Epidemiology
+ Topic concerning the the patterns, cause, and effect of disease within populations.
+ Public_health_and_epidemiology
+ Epidemiology
+ Public health
+
+
+
+ Public health and epidemiology
+
+
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ VT 1.5.9 Biophysics
+ The use of physics to study biological system.
+ Biophysics
+ Medical physics
+
+
+
+ Biophysics
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ VT 1.5.12 Computational biology
+ VT 1.5.19 Mathematical biology
+ VT 1.5.26 Theoretical biology
+ The development and application of theory, analytical methods, mathematical models and computational simulation of biological systems.
+ Computational_biology
+ Biomathematics
+ Mathematical biology
+ Theoretical biology
+
+
+
+ This includes the modeling and treatment of biological processes and systems in mathematical terms (theoretical biology).
+ Computational biology
+
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ The analysis of transcriptomes, or a set of all the RNA molecules in a specific cell, tissue etc.
+ Transcriptomics
+ Comparative transcriptomics
+ Transcriptome
+
+
+
+ Transcriptomics
+
+
+
+
+
+
+
+
+
+ 1.3
+ Chemical science
+ Polymer science
+ VT 1.7.10 Polymer science
+ VT 1.7 Chemical sciences
+ VT 1.7.2 Chemistry
+ VT 1.7.3 Colloid chemistry
+ VT 1.7.5 Electrochemistry
+ VT 1.7.6 Inorganic and nuclear chemistry
+ VT 1.7.7 Mathematical chemistry
+ VT 1.7.8 Organic chemistry
+ VT 1.7.9 Physical chemistry
+ The composition and properties of matter, reactions, and the use of reactions to create new substances.
+ Chemistry
+ Inorganic chemistry
+ Mathematical chemistry
+ Nuclear chemistry
+ Organic chemistry
+ Physical chemistry
+
+
+
+ Chemistry
+
+
+
+
+
+
+
+
+
+ 1.3
+ VT 1.1.99 Other
+ VT:1.1 Mathematics
+ The study of numbers (quantity) and other topics including structure, space, and change.
+ Maths
+ Mathematics
+ Dynamic systems
+ Dynamical systems
+ Dynymical systems theory
+ Graph analytics
+ Monte Carlo methods
+ Multivariate analysis
+
+
+
+ Mathematics
+
+
+
+
+
+
+
+
+
+ 1.3
+ VT 1.2 Computer sciences
+ VT 1.2.99 Other
+ The theory and practical use of computer systems.
+ Computer_science
+ Cloud computing
+ HPC
+ High performance computing
+ High-performance computing
+
+
+
+ Computer science
+
+
+
+
+
+
+
+
+
+ 1.3
+ The study of matter, space and time, and related concepts such as energy and force.
+ Physics
+
+
+
+ Physics
+
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ RNA splicing; post-transcription RNA modification involving the removal of introns and joining of exons.
+ Alternative splicing
+ RNA_splicing
+ Splice sites
+
+
+ This includes the study of splice sites, splicing patterns, alternative splicing events and variants, isoforms, etc..
+ RNA splicing
+
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ The structure and function of genes at a molecular level.
+ Molecular_genetics
+
+
+
+ Molecular genetics
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ VT 3.2.25 Respiratory systems
+ The study of respiratory system.
+ Pulmonary medicine
+ Pulmonology
+ Respiratory_medicine
+ Pulmonary disorders
+ Respiratory disease
+
+
+
+ Respiratory medicine
+
+
+
+
+
+
+
+
+
+ 1.3
+ 1.4
+
+
+ The study of metabolic diseases.
+
+ Metabolic disease
+ true
+
+
+
+
+
+
+
+
+ 1.3
+ VT 3.3.4 Infectious diseases
+ The branch of medicine that deals with the prevention, diagnosis and management of transmissible disease with clinically evident illness resulting from infection with pathogenic biological agents (viruses, bacteria, fungi, protozoa, parasites and prions).
+ Communicable disease
+ Transmissible disease
+ Infectious_disease
+
+
+
+ Infectious disease
+
+
+
+
+
+
+
+
+
+ 1.3
+ The study of rare diseases.
+ Rare_diseases
+
+
+
+ Rare diseases
+
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ VT 1.7.4 Computational chemistry
+ Topic concerning the development and application of theory, analytical methods, mathematical models and computational simulation of chemical systems.
+ Computational_chemistry
+
+
+
+ Computational chemistry
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ The branch of medicine that deals with the anatomy, functions and disorders of the nervous system.
+ Neurology
+ Neurological disorders
+
+
+
+ Neurology
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ VT 3.2.22 Peripheral vascular disease
+ VT 3.2.4 Cardiac and Cardiovascular systems
+ The diseases and abnormalities of the heart and circulatory system.
+ Cardiovascular medicine
+ Cardiology
+ Cardiovascular disease
+ Heart disease
+
+
+
+ Cardiology
+
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ The discovery and design of drugs or potential drug compounds.
+ Drug_discovery
+
+
+
+ This includes methods that search compound collections, generate or analyse drug 3D conformations, identify drug targets with structural docking etc.
+ Drug discovery
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ Repositories of biological samples, typically human, for basic biological and clinical research.
+ Tissue collection
+ biobanking
+ Biobank
+
+
+
+ Biobank
+
+
+
+
+
+
+
+
+
+ 1.3
+ Laboratory study of mice, for example, phenotyping, and mutagenesis of mouse cell lines.
+ Laboratory mouse
+ Mouse_clinic
+
+
+
+ Mouse clinic
+
+
+
+
+
+
+
+
+
+ 1.3
+ Collections of microbial cells including bacteria, yeasts and moulds.
+ Microbial_collection
+
+
+
+ Microbial collection
+
+
+
+
+
+
+
+
+
+ 1.3
+ Collections of cells grown under laboratory conditions, specifically, cells from multi-cellular eukaryotes and especially animal cells.
+ Cell_culture_collection
+
+
+
+ Cell culture collection
+
+
+
+
+
+
+
+
+
+ 1.3
+ Collections of DNA, including both collections of cloned molecules, and populations of micro-organisms that store and propagate cloned DNA.
+ Clone_library
+
+
+
+ Clone library
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ 'translating' the output of basic and biomedical research into better diagnostic tools, medicines, medical procedures, policies and advice.
+ Translational_medicine
+
+
+
+ Translational medicine
+
+
+
+
+
+
+
+
+
+ 1.3
+ Collections of chemicals, typically for use in high-throughput screening experiments.
+ Compound_libraries_and_screening
+ Chemical library
+ Chemical screening
+ Compound library
+ Small chemical compounds libraries
+ Small compounds libraries
+ Target identification and validation
+
+
+
+ Compound libraries and screening
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ VT 3.3 Health sciences
+ Topic concerning biological science that is (typically) performed in the context of medicine.
+ Biomedical sciences
+ Health science
+ Biomedical_science
+
+
+
+ Biomedical science
+
+
+
+
+
+
+
+
+
+ 1.3
+ Topic concerning the identity of biological entities, or reports on such entities, and the mapping of entities and records in different databases.
+ Data_identity_and_mapping
+
+
+
+ Data identity and mapping
+
+
+
+
+
+
+
+
+ 1.3
+ 1.12
+
+ The search and retrieval from a database on the basis of molecular sequence similarity.
+
+
+ Sequence search
+ true
+
+
+
+
+
+
+
+
+ 1.4
+ true
+ Objective indicators of biological state often used to assess health, and determinate treatment.
+ Diagnostic markers
+ Biomarkers
+
+
+ Biomarkers
+
+
+
+
+
+
+
+
+
+ 1.4
+ The procedures used to conduct an experiment.
+ Experimental techniques
+ Lab method
+ Lab techniques
+ Laboratory method
+ Laboratory_techniques
+ Experiments
+ Laboratory experiments
+
+
+
+ Laboratory techniques
+
+
+
+
+
+
+
+
+
+ 1.4
+ The development of policies, models and standards that cover data acquisition, storage and integration, such that it can be put to use, typically through a process of systematically applying statistical and / or logical techniques to describe, illustrate, summarise or evaluate data.
+ Data_architecture_analysis_and_design
+ Data analysis
+ Data architecture
+ Data design
+
+
+
+ Data architecture, analysis and design
+
+
+
+
+
+
+
+
+
+ 1.4
+ The combination and integration of data from different sources, for example into a central repository or warehouse, to provide users with a unified view of these data.
+ Data_integration_and_warehousing
+ Data integration
+ Data warehousing
+
+
+
+ Data integration and warehousing
+
+
+
+
+
+
+
+
+
+
+ 1.4
+ Any matter, surface or construct that interacts with a biological system.
+ Biomaterials
+
+
+
+ Biomaterials
+
+
+
+
+
+
+
+
+
+
+ 1.4
+ true
+ The use of synthetic chemistry to study and manipulate biological systems.
+ Chemical_biology
+
+
+
+ Chemical biology
+
+
+
+
+
+
+
+
+
+ 1.4
+ VT 1.7.1 Analytical chemistry
+ The study of the separation, identification, and quantification of the chemical components of natural and artificial materials.
+ Analytical_chemistry
+
+
+
+ Analytical chemistry
+
+
+
+
+
+
+
+
+
+ 1.4
+ The use of chemistry to create new compounds.
+ Synthetic_chemistry
+ Synthetic organic chemistry
+
+
+
+ Synthetic chemistry
+
+
+
+
+
+
+
+
+
+ 1.4
+ 1.2.12 Programming languages
+ Software engineering
+ VT 1.2.1 Algorithms
+ VT 1.2.14 Software engineering
+ VT 1.2.7 Data structures
+ The process that leads from an original formulation of a computing problem to executable programs.
+ Computer programming
+ Software development
+ Software_engineering
+ Algorithms
+ Data structures
+ Programming languages
+
+
+
+ Software engineering
+
+
+
+
+
+
+
+
+
+ 1.4
+ true
+ The process of bringing a new drug to market once a lead compounds has been identified through drug discovery.
+ Drug development science
+ Medicine development
+ Medicines development
+ Drug_development
+
+
+
+ Drug development
+
+
+
+
+
+
+
+
+
+ 1.4
+ Drug delivery
+ Drug formulation
+ Drug formulation and delivery
+ The process of formulating and administering a pharmaceutical compound to achieve a therapeutic effect.
+ Biotherapeutics
+
+
+
+ Biotherapeutics
+
+
+
+
+
+
+
+
+ 1.4
+ true
+ The study of how a drug interacts with the body.
+ Drug_metabolism
+ ADME
+ Drug absorption
+ Drug distribution
+ Drug excretion
+ Pharmacodynamics
+ Pharmacokinetics
+ Pharmacokinetics and pharmacodynamics
+
+
+
+ Drug metabolism
+
+
+
+
+
+
+
+
+
+ 1.4
+ Health care research
+ Health care science
+ The discovery, development and approval of medicines.
+ Drug discovery and development
+ Medicines_research_and_development
+
+
+
+ Medicines research and development
+
+
+
+
+
+
+
+
+
+
+ 1.4
+ The safety (or lack) of drugs and other medical interventions.
+ Patient safety
+ Safety_sciences
+ Drug safety
+
+
+
+ Safety sciences
+
+
+
+
+
+
+
+
+
+ 1.4
+ The detection, assessment, understanding and prevention of adverse effects of medicines.
+ Pharmacovigilence
+
+
+
+ Pharmacovigilence concerns safety once a drug has gone to market.
+ Pharmacovigilance
+
+
+
+
+
+
+
+
+
+
+ 1.4
+ The testing of new medicines, vaccines or procedures on animals (preclinical) and humans (clinical) prior to their approval by regulatory authorities.
+ Preclinical_and_clinical_studies
+ Clinical studies
+ Clinical study
+ Clinical trial
+ Drug trials
+ Preclinical studies
+ Preclinical study
+
+
+
+ Preclinical and clinical studies
+
+
+
+
+
+
+
+
+
+ 1.4
+ true
+ The visual representation of an object.
+ Imaging
+ Diffraction experiment
+ Microscopy
+ Microscopy imaging
+ Optical super resolution microscopy
+ Photonic force microscopy
+ Photonic microscopy
+
+
+
+ This includes diffraction experiments that are based upon the interference of waves, typically electromagnetic waves such as X-rays or visible light, by some object being studied, typical in order to produce an image of the object or determine its structure.
+ Imaging
+
+
+
+
+
+
+
+
+
+ 1.4
+ The use of imaging techniques to understand biology.
+ Biological imaging
+ Biological_imaging
+
+
+
+ Bioimaging
+
+
+
+
+
+
+
+
+
+ 1.4
+ VT 3.2.13 Medical imaging
+ VT 3.2.14 Nuclear medicine
+ VT 3.2.24 Radiology
+ The use of imaging techniques for clinical purposes for medical research.
+ Medical_imaging
+ Neuroimaging
+ Nuclear medicine
+ Radiology
+
+
+
+ Medical imaging
+
+
+
+
+
+
+
+
+
+ 1.4
+ The use of optical instruments to magnify the image of an object.
+ Light_microscopy
+
+
+
+ Light microscopy
+
+
+
+
+
+
+
+
+
+ 1.4
+ The use of animals and alternatives in experimental research.
+ Animal experimentation
+ Animal research
+ Animal testing
+ In vivo testing
+ Laboratory_animal_science
+
+
+
+ Laboratory animal science
+
+
+
+
+
+
+
+
+
+ 1.4
+ true
+ VT 1.5.18 Marine and Freshwater biology
+ The study of organisms in the ocean or brackish waters.
+ Marine_biology
+
+
+
+ Marine biology
+
+
+
+
+
+
+
+
+
+ 1.4
+ true
+ The identification of molecular and genetic causes of disease and the development of interventions to correct them.
+ Molecular_medicine
+
+
+
+ Molecular medicine
+
+
+
+
+
+
+
+
+
+ 1.4
+ VT 3.3.7 Nutrition and Dietetics
+ The study of the effects of food components on the metabolism, health, performance and disease resistance of humans and animals. It also includes the study of human behaviours related to food choices.
+ Nutrition
+ Nutrition science
+ Nutritional_science
+ Dietetics
+
+
+
+ Nutritional science
+
+
+
+
+
+
+
+
+
+ 1.4
+ true
+ The collective characterisation and quantification of pools of biological molecules that translate into the structure, function, and dynamics of an organism or organisms.
+ Omics
+
+
+
+ Omics
+
+
+
+
+
+
+
+
+
+ 1.4
+ The processes that need to be in place to ensure the quality of products for human or animal use.
+ Quality assurance
+ Quality_affairs
+ Good clinical practice
+ Good laboratory practice
+ Good manufacturing practice
+
+
+
+ Quality affairs
+
+
+
+
+
+
+
+
+ 1.4
+ The protection of public health by controlling the safety and efficacy of products in areas including pharmaceuticals, veterinary medicine, medical devices, pesticides, agrochemicals, cosmetics, and complementary medicines.
+ Healthcare RA
+ Regulatory_affairs
+
+
+
+ Regulatory affairs
+
+
+
+
+
+
+
+
+
+ 1.4
+ true
+ Biomedical approaches to clinical interventions that involve the use of stem cells.
+ Stem cell research
+ Regenerative_medicine
+
+
+
+ Regenerative medicine
+
+
+
+
+
+
+
+
+
+ 1.4
+ true
+ An interdisciplinary field of study that looks at the dynamic systems of the human body as part of an integrted whole, incorporating biochemical, physiological, and environmental interactions that sustain life.
+ Systems_medicine
+
+
+
+ Systems medicine
+
+
+
+
+
+
+
+
+
+ 1.4
+ Topic concerning the branch of medicine that deals with the prevention, diagnosis, and treatment of disease, disorder and injury in animals.
+ Veterinary_medicine
+ Clinical veterinary medicine
+
+
+
+ Veterinary medicine
+
+
+
+
+
+
+
+
+
+ 1.4
+ The application of biological concepts and methods to the analytical and synthetic methodologies of engineering.
+ Biological engineering
+ Bioengineering
+
+
+
+ Bioengineering
+
+
+
+
+
+
+
+
+
+ 1.4
+ true
+ Ageing
+ Aging
+ Gerontology
+ VT 3.2.10 Geriatrics and gerontology
+ The branch of medicine dealing with the diagnosis, treatment and prevention of disease in older people, and the problems specific to aging.
+ Geriatrics
+ Geriatric_medicine
+
+
+
+ Geriatric medicine
+
+
+
+
+
+
+
+
+
+ 1.4
+ true
+ VT 3.2.1 Allergy
+ Health issues related to the immune system and their prevention, diagnosis and management.
+ Allergy_clinical_immunology_and_immunotherapeutics
+ Allergy
+ Clinical immunology
+ Immune disorders
+ Immunomodulators
+ Immunotherapeutics
+
+
+
+ Allergy, clinical immunology and immunotherapeutics
+
+
+
+
+
+
+
+
+
+
+
+ 1.4
+ true
+ The prevention of pain and the evaluation, treatment and rehabilitation of persons in pain.
+ Algiatry
+ Pain management
+ Pain_medicine
+
+
+
+ Pain medicine
+
+
+
+
+
+
+
+
+
+ 1.4
+ VT 3.2.2 Anaesthesiology
+ Anaesthesia and anaesthetics.
+ Anaesthetics
+ Anaesthesiology
+
+
+
+ Anaesthesiology
+
+
+
+
+
+
+
+
+
+ 1.4
+ VT 3.2.5 Critical care/Emergency medicine
+ The multidisciplinary that cares for patients with acute, life-threatening illness or injury.
+ Acute medicine
+ Emergency medicine
+ Intensive care medicine
+ Critical_care_medicine
+
+
+
+ Critical care medicine
+
+
+
+
+
+
+
+
+
+ 1.4
+ VT 3.2.7 Dermatology and venereal diseases
+ The branch of medicine that deals with prevention, diagnosis and treatment of disorders of the skin, scalp, hair and nails.
+ Dermatology
+ Dermatological disorders
+
+
+
+ Dermatology
+
+
+
+
+
+
+
+
+
+ 1.4
+ The study, diagnosis, prevention and treatments of disorders of the oral cavity, maxillofacial area and adjacent structures.
+ Dentistry
+
+
+
+ Dentistry
+
+
+
+
+
+
+
+
+
+ 1.4
+ VT 3.2.20 Otorhinolaryngology
+ The branch of medicine that deals with the prevention, diagnosis, and treatment of disorders of the ear, nose and throat.
+ Audiovestibular medicine
+ Otolaryngology
+ Otorhinolaryngology
+ Ear_nose_and_throat_medicine
+ Head and neck disorders
+
+
+
+ Ear, nose and throat medicine
+
+
+
+
+
+
+
+
+
+ 1.4
+ true
+ The branch of medicine dealing with diseases of endocrine organs, hormone systems, their target organs, and disorders of the pathways of glucose and lipid metabolism.
+ Endocrinology_and_metabolism
+ Endocrine disorders
+ Endocrinology
+ Metabolic disorders
+ Metabolism
+
+
+
+ Endocrinology and metabolism
+
+
+
+
+
+
+
+
+
+ 1.4
+ true
+ VT 3.2.11 Hematology
+ The branch of medicine that deals with the blood, blood-forming organs and blood diseases.
+ Haematology
+ Blood disorders
+ Haematological disorders
+
+
+
+ Haematology
+
+
+
+
+
+
+
+
+
+ 1.4
+ true
+ VT 3.2.8 Gastroenterology and hepatology
+ The branch of medicine that deals with disorders of the oesophagus, stomach, duodenum, jejenum, ileum, large intestine, sigmoid colon and rectum.
+ Gastroenterology
+ Gastrointestinal disorders
+
+
+
+ Gastroenterology
+
+
+
+
+
+
+
+
+
+ 1.4
+ The study of the biological and physiological differences between males and females and how they effect differences in disease presentation and management.
+ Gender_medicine
+
+
+
+ Gender medicine
+
+
+
+
+
+
+
+
+
+ 1.4
+ true
+ VT 3.2.15 Obstetrics and gynaecology
+ The branch of medicine that deals with the health of the female reproductive system, pregnancy and birth.
+ Gynaecology_and_obstetrics
+ Gynaecological disorders
+ Gynaecology
+ Obstetrics
+
+
+
+ Gynaecology and obstetrics
+
+
+
+
+
+
+
+
+
+
+ 1.4
+ true
+ The branch of medicine that deals with the liver, gallbladder, bile ducts and bile.
+ Hepatology
+ Hepatic_and_biliary_medicine
+ Liver disorders
+
+
+
+ Hepatic and biliary medicine
+
+ Hepatobiliary medicine
+
+
+
+
+
+
+
+
+ 1.4
+ 1.13
+
+ The branch of medicine that deals with the infectious diseases of the tropics.
+
+
+ Infectious tropical disease
+ true
+
+
+
+
+
+
+
+
+ 1.4
+ The branch of medicine that treats body wounds or shock produced by sudden physical injury, as from violence or accident.
+ Traumatology
+ Trauma_medicine
+
+
+
+ Trauma medicine
+
+
+
+
+
+
+
+
+
+ 1.4
+ true
+ The branch of medicine that deals with the diagnosis, management and prevention of poisoning and other adverse health effects caused by medications, occupational and environmental toxins, and biological agents.
+ Medical_toxicology
+
+
+
+ Medical toxicology
+
+
+
+
+
+
+
+
+
+ 1.4
+ VT 3.2.19 Orthopaedics
+ VT 3.2.26 Rheumatology
+ The branch of medicine that deals with the prevention, diagnosis, and treatment of disorders of the muscle, bone and connective tissue. It incorporates aspects of orthopaedics, rheumatology, rehabilitation medicine and pain medicine.
+ Musculoskeletal_medicine
+ Musculoskeletal disorders
+ Orthopaedics
+ Rheumatology
+
+
+
+ Musculoskeletal medicine
+
+
+
+
+
+
+
+
+
+
+
+ 1.4
+ Optometry
+ VT 3.2.17 Ophthalmology
+ VT 3.2.18 Optometry
+ The branch of medicine that deals with disorders of the eye, including eyelid, optic nerve/visual pathways and occular muscles.
+ Ophthalmology
+ Eye disoders
+
+
+
+ Ophthalmology
+
+
+
+
+
+
+
+
+
+ 1.4
+ VT 3.2.21 Paediatrics
+ The branch of medicine that deals with the medical care of infants, children and adolescents.
+ Child health
+ Paediatrics
+
+
+
+ Paediatrics
+
+
+
+
+
+
+
+
+
+ 1.4
+ Mental health
+ VT 3.2.23 Psychiatry
+ The branch of medicine that deals with the management of mental illness, emotional disturbance and abnormal behaviour.
+ Psychiatry
+ Psychiatric disorders
+
+
+
+ Psychiatry
+
+
+
+
+
+
+
+
+
+ 1.4
+ VT 3.2.3 Andrology
+ The health of the reproductive processes, functions and systems at all stages of life.
+ Reproductive_health
+ Andrology
+ Family planning
+ Fertility medicine
+ Reproductive disorders
+
+
+
+ Reproductive health
+
+
+
+
+
+
+
+
+
+ 1.4
+ VT 3.2.28 Transplantation
+ The use of operative, manual and instrumental techniques on a patient to investigate and/or treat a pathological condition or help improve bodily function or appearance.
+ Surgery
+ Transplantation
+
+
+
+ Surgery
+
+
+
+
+
+
+
+
+
+ 1.4
+ VT 3.2.29 Urology and nephrology
+ The branches of medicine and physiology focussing on the function and disorders of the urinary system in males and females, the reproductive system in males, and the kidney.
+ Urology_and_nephrology
+ Kidney disease
+ Nephrology
+ Urological disorders
+ Urology
+
+
+
+ Urology and nephrology
+
+
+
+
+
+
+
+
+
+
+ 1.4
+ Alternative medicine
+ Holistic medicine
+ Integrative medicine
+ VT 3.2.12 Integrative and Complementary medicine
+ Medical therapies that fall beyond the scope of conventional medicine but may be used alongside it in the treatment of disease and ill health.
+ Complementary_medicine
+
+
+
+ Complementary medicine
+
+
+
+
+
+
+
+
+
+ 1.7
+ Techniques that uses magnetic fields and radiowaves to form images, typically to investigate the anatomy and physiology of the human body.
+ MRT
+ Magnetic resonance imaging
+ Magnetic resonance tomography
+ NMRI
+ Nuclear magnetic resonance imaging
+ MRI
+
+
+ MRI
+
+
+
+
+
+
+
+
+
+
+ 1.7
+ The study of matter by studying the diffraction pattern from firing neutrons at a sample, typically to determine atomic and/or magnetic structure.
+ Neutron diffraction experiment
+ Neutron_diffraction
+ Elastic neutron scattering
+ Neutron microscopy
+
+
+ Neutron diffraction
+
+
+
+
+
+
+
+
+
+ 1.7
+ Imaging in sections (sectioning), through the use of a wave-generating device (tomograph) that generates an image (a tomogram).
+ CT
+ Computed tomography
+ TDM
+ Tomography
+ Electron tomography
+ PET
+ Positron emission tomography
+ X-ray tomography
+
+
+ Tomography
+
+
+
+
+
+
+
+
+
+ 1.7
+ true
+ KDD
+ Knowledge discovery in databases
+ VT 1.3.2 Data mining
+ The discovery of patterns in large data sets and the extraction and trasnsformation of those patterns into a useful format.
+ Data_mining
+ Pattern recognition
+
+
+ Data mining
+
+
+
+
+
+
+
+
+
+ 1.7
+ Artificial Intelligence
+ VT 1.2.2 Artificial Intelligence (expert systems, machine learning, robotics)
+ A topic concerning the application of artificial intelligence methods to algorithms, in order to create methods that can learn from data in order to generate an output, rather than relying on explicitly encoded information only.
+ Machine_learning
+ Active learning
+ Ensembl learning
+ Kernel methods
+ Knowledge representation
+ Neural networks
+ Recommender system
+ Reinforcement learning
+ Supervised learning
+ Unsupervised learning
+
+
+ Machine learning
+
+
+
+
+
+
+
+
+
+ 1.8
+ Database administration
+ Information systems
+ Databases
+ The general handling of data stored in digital archives such as databases, databanks, web portals, and other data resources.
+ Database_management
+ Content management
+ Document management
+ File management
+ Record management
+
+
+ This includes databases for the results of scientific experiments, the application of high-throughput technology, computational analysis and the scientific literature. It covers the management and manipulation of digital documents, including database records, files, and reports.
+ Database management
+
+
+
+
+
+
+
+
+
+ 1.8
+ VT 1.5.29 Zoology
+ Animals, e.g. information on a specific animal genome including molecular sequences, genes and annotation.
+ Animal
+ Animal biology
+ Animals
+ Metazoa
+ Zoology
+ Animal genetics
+ Animal physiology
+ Entomology
+
+
+ The study of the animal kingdom.
+ Zoology
+
+
+
+
+
+
+
+
+
+
+ 1.8
+ The biology, archival, detection, prediction and analysis of positional features such as functional and other key sites, in protein sequences and the conserved patterns (motifs, profiles etc.) that may be used to describe them.
+ Protein_sites_features_and_motifs
+ Protein sequence features
+ Signal peptide cleavage sites
+
+
+ A signal peptide coding sequence encodes an N-terminal domain of a secreted protein, which is involved in attaching the polypeptide to a membrane leader sequence. A transit peptide coding sequence encodes an N-terminal domain of a nuclear-encoded organellar protein; which is involved in import of the protein into the organelle.
+ Protein sites, features and motifs
+
+
+
+
+
+
+
+
+
+ 1.8
+ The biology, archival, detection, prediction and analysis of positional features such as functional and other key sites, in nucleic acid sequences and the conserved patterns (motifs, profiles etc.) that may be used to describe them.
+ Nucleic_acid_sites_features_and_motifs
+ Nucleic acid functional sites
+ Nucleic acid sequence features
+ Primer binding sites
+ Sequence tagged sites
+
+
+ Sequence tagged sites are short DNA sequences that are unique within a genome and serve as a mapping landmark, detectable by PCR they allow a genome to be mapped via an ordering of STSs.
+ Nucleic acid sites, features and motifs
+
+
+
+
+
+
+
+
+
+ 1.8
+ Transcription of DNA into RNA and features of a messenger RNA (mRNA) molecules including precursor RNA, primary (unprocessed) transcript and fully processed molecules.
+ Gene_transcripts
+ Coding RNA
+ EST
+ Exons
+ Fusion transcripts
+ Gene transcript features
+ Introns
+ PolyA signal
+ PolyA site
+ Signal peptide coding sequence
+ Transit peptide coding sequence
+ cDNA
+ mRNA
+ mRNA features
+
+
+ This includes 5'untranslated region (5'UTR), coding sequences (CDS), exons, intervening sequences (intron) and 3'untranslated regions (3'UTR).
+ This includes Introns, and protein-coding regions including coding sequences (CDS), exons, translation initiation sites and open reading frames. Also expressed sequence tag (EST) or complementary DNA (cDNA) sequences.
+ This includes coding sequences for a signal or transit peptide. A signal peptide coding sequence encodes an N-terminal domain of a secreted protein, which is involved in attaching the polypeptide to a membrane leader sequence. A transit peptide coding sequence encodes an N-terminal domain of a nuclear-encoded organellar protein; which is involved in import of the protein into the organelle.
+ This includes regions or sites in a eukaryotic and eukaryotic viral RNA sequence which directs endonuclease cleavage or polyadenylation of an RNA transcript. A polyA signal is required for endonuclease cleavage of an RNA transcript that is followed by polyadenylation. A polyA site is a site on an RNA transcript to which adenine residues will be added during post-transcriptional polyadenylation.
+ Gene transcripts
+
+
+
+
+
+
+
+
+
+ 1.8
+ 1.13
+
+ Protein-ligand (small molecule) interaction(s).
+
+
+ Protein-ligand interactions
+ true
+
+
+
+
+
+
+
+
+ 1.8
+ 1.13
+
+ Protein-drug interaction(s).
+
+
+ Protein-drug interactions
+ true
+
+
+
+
+
+
+
+
+ 1.8
+ Genotype experiment including case control, population, and family studies. These might use array based methods and re-sequencing methods.
+ Genotyping_experiment
+
+
+ Genotyping experiment
+
+
+
+
+
+
+
+
+
+ 1.8
+ Genome-wide association study experiments.
+ GWAS
+ GWAS analysis
+ Genome-wide association study
+ GWAS_study
+
+
+ GWAS study
+
+
+
+
+
+
+
+
+
+ 1.8
+ Microarray experiments including conditions, protocol, sample:data relationships etc.
+ Microarrays
+ Microarray_experiment
+ Gene expression microarray
+ Genotyping array
+ Methylation array
+ MicroRNA array
+ Multichannel microarray
+ One channel microarray
+ Proprietary platform micoarray
+ RNA chips
+ RNA microarrays
+ Reverse phase protein array
+ SNP array
+ Tiling arrays
+ Tissue microarray
+ Two channel microarray
+ aCGH microarray
+ mRNA microarray
+ miRNA array
+
+
+ This might specify which raw data file relates to which sample and information on hybridisations, e.g. which are technical and which are biological replicates.
+ Microarray experiment
+
+
+
+
+
+
+
+
+
+ 1.8
+ PCR experiments, e.g. quantitative real-time PCR.
+ Polymerase chain reaction
+ PCR_experiment
+ Quantitative PCR
+ RT-qPCR
+ Real Time Quantitative PCR
+
+
+ PCR experiment
+
+
+
+
+
+
+
+
+
+ 1.8
+ Proteomics experiments.
+ Proteomics_experiment
+ 2D PAGE experiment
+ DIA
+ Data-independent acquisition
+ MS
+ MS experiments
+ Mass spectrometry
+ Mass spectrometry experiments
+ Northern blot experiment
+ Spectrum demultiplexing
+
+
+ This includes two-dimensional gel electrophoresis (2D PAGE) experiments, gels or spots in a gel. Also mass spectrometry - an analytical chemistry technique that measures the mass-to-charge ratio and abundance of ions in the gas phase. Also Northern blot experiments.
+ Proteomics experiment
+
+
+
+
+
+
+
+
+
+
+ 1.8
+ 1.13
+
+ Two-dimensional gel electrophoresis experiments, gels or spots in a gel.
+
+
+ 2D PAGE experiment
+ true
+
+
+
+
+
+
+
+
+ 1.8
+ 1.13
+
+ Northern Blot experiments.
+
+
+ Northern blot experiment
+ true
+
+
+
+
+
+
+
+
+ 1.8
+ RNAi experiments.
+ RNAi_experiment
+
+
+ RNAi experiment
+
+
+
+
+
+
+
+
+
+ 1.8
+ Biological computational model experiments (simulation), for example the minimum information required in order to permit its correct interpretation and reproduction.
+ Simulation_experiment
+
+
+ Simulation experiment
+
+
+
+
+
+
+
+
+ 1.8
+ 1.13
+
+ Protein-DNA/RNA interaction(s).
+
+
+ Protein-nucleic acid interactions
+ true
+
+
+
+
+
+
+
+
+ 1.8
+ 1.13
+
+ Protein-protein interaction(s), including interactions between protein domains.
+
+
+ Protein-protein interactions
+ true
+
+
+
+
+
+
+
+
+ 1.8
+ 1.13
+
+ Cellular process pathways.
+
+
+ Cellular process pathways
+ true
+
+
+
+
+
+
+
+
+ 1.8
+ 1.13
+
+ Disease pathways, typically of human disease.
+
+
+ Disease pathways
+ true
+
+
+
+
+
+
+
+
+ 1.8
+ 1.13
+
+ Environmental information processing pathways.
+
+
+ Environmental information processing pathways
+ true
+
+
+
+
+
+
+
+
+ 1.8
+ 1.13
+
+ Genetic information processing pathways.
+
+
+ Genetic information processing pathways
+ true
+
+
+
+
+
+
+
+
+ 1.8
+ 1.13
+
+ Super-secondary structure of protein sequence(s).
+
+
+ Protein super-secondary structure
+ true
+
+
+
+
+
+
+
+
+ 1.8
+ 1.13
+
+ Catalytic residues (active site) of an enzyme.
+
+
+ Protein active sites
+ true
+
+
+
+
+
+
+
+
+ 1.8
+ Binding sites in proteins, including cleavage sites (for a proteolytic enzyme or agent), key residues involved in protein folding, catalytic residues (active site) of an enzyme, ligand-binding (non-catalytic) residues of a protein, such as sites that bind metal, prosthetic groups or lipids, RNA and DNA-binding proteins and binding sites etc.
+ Protein_binding_sites
+ Enzyme active site
+ Protein cleavage sites
+ Protein functional sites
+ Protein key folding sites
+ Protein-nucleic acid binding sites
+
+
+ Protein binding sites
+
+
+
+
+
+
+
+
+
+ 1.8
+ 1.13
+
+ RNA and DNA-binding proteins and binding sites in protein sequences.
+
+
+ Protein-nucleic acid binding sites
+ true
+
+
+
+
+
+
+
+
+ 1.8
+ 1.13
+
+ Cleavage sites (for a proteolytic enzyme or agent) in a protein sequence.
+
+
+ Protein cleavage sites
+ true
+
+
+
+
+
+
+
+
+ 1.8
+ 1.13
+
+ Chemical modification of a protein.
+
+
+ Protein chemical modifications
+ true
+
+
+
+
+
+
+
+
+ 1.8
+ Disordered structure in a protein.
+ Protein features (disordered structure)
+ Protein_disordered_structure
+
+
+ Protein disordered structure
+
+
+
+
+
+
+
+
+
+ 1.8
+ 1.13
+
+ Structural domains or 3D folds in a protein or polypeptide chain.
+
+
+ Protein domains
+ true
+
+
+
+
+
+
+
+
+ 1.8
+ 1.13
+
+ Key residues involved in protein folding.
+
+
+ Protein key folding sites
+ true
+
+
+
+
+
+
+
+
+ 1.8
+ 1.13
+
+ Post-translation modifications in a protein sequence, typically describing the specific sites involved.
+
+
+ Protein post-translational modifications
+ true
+
+
+
+
+
+
+
+
+ 1.8
+ Secondary structure (predicted or real) of a protein, including super-secondary structure.
+ Protein features (secondary structure)
+ Protein_secondary_structure
+ Protein super-secondary structure
+
+
+ Super-secondary structures include leucine zippers, coiled coils, Helix-Turn-Helix etc.
+ The location and size of the secondary structure elements and intervening loop regions is typically given. The report can include disulphide bonds and post-translationally formed peptide bonds (crosslinks).
+ Protein secondary structure
+
+
+
+
+
+
+
+
+
+ 1.8
+ 1.13
+
+ Short repetitive subsequences (repeat sequences) in a protein sequence.
+
+
+ Protein sequence repeats
+ true
+
+
+
+
+
+
+
+
+ 1.8
+ 1.13
+
+ Signal peptides or signal peptide cleavage sites in protein sequences.
+
+
+ Protein signal peptides
+ true
+
+
+
+
+
+
+
+
+ 1.10
+ VT 1.1.1 Applied mathematics
+ The application of mathematics to specific problems in science, typically by the formulation and analysis of mathematical models.
+ Applied_mathematics
+
+
+ Applied mathematics
+
+
+
+
+
+
+
+
+
+ 1.10
+ VT 1.1.1 Pure mathematics
+ The study of abstract mathematical concepts.
+ Pure_mathematics
+ Linear algebra
+
+
+ Pure mathematics
+
+
+
+
+
+
+
+
+
+ 1.10
+ The control of data entry and maintenance to ensure the data meets defined standards, qualities or constraints.
+ Data_governance
+ Data stewardship
+
+
+ Data governance
+
+ http://purl.bioontology.org/ontology/MSH/D030541
+
+
+
+
+
+
+
+
+ 1.10
+ The quality, integrity, and cleaning up of data.
+ Data_quality_management
+ Data clean-up
+ Data cleaning
+ Data integrity
+ Data quality
+
+
+ Data quality management
+
+
+
+
+
+
+
+
+
+ 1.10
+ Freshwater science
+ VT 1.5.18 Marine and Freshwater biology
+ The study of organisms in freshwater ecosystems.
+ Freshwater_biology
+
+
+
+ Freshwater biology
+
+
+
+
+
+
+
+
+
+ 1.10
+ true
+ VT 3.1.2 Human genetics
+ The study of inheritance in human beings.
+ Human_genetics
+
+
+
+ Human genetics
+
+
+
+
+
+
+
+
+
+ 1.10
+ VT 3.3.14 Tropical medicine
+ Health problems that are prevalent in tropical and subtropical regions.
+ Tropical_medicine
+
+
+
+ Tropical medicine
+
+
+
+
+
+
+
+
+
+ 1.10
+ true
+ VT 3.3.14 Tropical medicine
+ VT 3.4 Medical biotechnology
+ VT 3.4.1 Biomedical devices
+ VT 3.4.2 Health-related biotechnology
+ Biotechnology applied to the medical sciences and the development of medicines.
+ Medical_biotechnology
+ Pharmaceutical biotechnology
+
+
+
+ Medical biotechnology
+
+
+
+
+
+
+
+
+
+ 1.10
+ true
+ VT 3.4.5 Molecular diagnostics
+ An approach to medicine whereby decisions, practices and are tailored to the individual patient based on their predicted response or risk of disease.
+ Precision medicine
+ Personalised_medicine
+ Molecular diagnostics
+
+
+
+ Personalised medicine
+
+
+
+
+
+
+
+
+
+ 1.12
+ Experimental techniques to purify a protein-DNA crosslinked complex. Usually sequencing follows e.g. in the techniques ChIP-chip, ChIP-seq and MeDIP-seq.
+ Chromatin immunoprecipitation
+ Immunoprecipitation_experiment
+
+
+ Immunoprecipitation experiment
+
+
+
+
+
+
+
+
+
+ 1.12
+ Laboratory technique to sequence the complete DNA sequence of an organism's genome at a single time.
+ Genome sequencing
+ WGS
+ Whole_genome_sequencing
+ De novo genome sequencing
+ Whole genome resequencing
+
+
+ Whole genome sequencing
+
+
+
+
+
+
+
+
+
+ 1.12
+
+ Laboratory technique to sequence the methylated regions in DNA.
+ MeDIP-chip
+ MeDIP-seq
+ mDIP
+ Methylated_DNA_immunoprecipitation
+ BS-Seq
+ Bisulfite sequencing
+ MeDIP
+ Methylated DNA immunoprecipitation (MeDIP)
+ Methylation sequencing
+ WGBS
+ Whole-genome bisulfite sequencing
+ methy-seq
+ methyl-seq
+
+
+ Methylated DNA immunoprecipitation
+
+
+
+
+
+
+
+
+
+ 1.12
+ Laboratory technique to sequence all the protein-coding regions in a genome, i.e., the exome.
+ Exome
+ Exome analysis
+ Exome capture
+ Targeted exome capture
+ WES
+ Whole exome sequencing
+ Exome_sequencing
+
+
+ Exome sequencing is considered a cheap alternative to whole genome sequencing.
+ Exome sequencing
+
+
+
+
+
+
+
+
+
+ 1.12
+
+ true
+ The design of an experiment intended to test a hypothesis, and describe or explain empirical data obtained under various experimental conditions.
+ Design of experiments
+ Experimental design
+ Studies
+ Experimental_design_and_studies
+
+
+ Experimental design and studies
+
+
+
+
+
+
+
+
+
+
+ 1.12
+ The design of an experiment involving non-human animals.
+ Animal_study
+ Challenge study
+
+
+ Animal study
+
+
+
+
+
+
+
+
+
+
+ 1.13
+ true
+ The ecology of microorganisms including their relationship with one another and their environment.
+ Environmental microbiology
+ Microbial_ecology
+ Community analysis
+ Microbiome
+ Molecular community analysis
+
+
+ Microbial ecology
+
+
+
+
+
+
+
+
+
+ 1.17
+ An antibody-based technique used to map in vivo RNA-protein interactions.
+ RIP
+ RNA_immunoprecipitation
+ CLIP
+ CLIP-seq
+ HITS-CLIP
+ PAR-CLIP
+ iCLIP
+
+
+ RNA immunoprecipitation
+
+
+
+
+
+
+
+
+
+ 1.17
+ Large-scale study (typically comparison) of DNA sequences of populations.
+ Population_genomics
+
+
+
+ Population genomics
+
+
+
+
+
+
+
+
+
+ 1.20
+ Agriculture
+ Agroecology
+ Agronomy
+ Multidisciplinary study, research and development within the field of agriculture.
+ Agricultural_science
+ Agricultural biotechnology
+ Agricultural economics
+ Animal breeding
+ Animal husbandry
+ Animal nutrition
+ Farming systems research
+ Food process engineering
+ Food security
+ Horticulture
+ Phytomedicine
+ Plant breeding
+ Plant cultivation
+ Plant nutrition
+ Plant pathology
+ Soil science
+
+
+ Agricultural science
+
+
+
+
+
+
+
+
+
+ 1.20
+ Approach which samples, in parallel, all genes in all organisms present in a given sample, e.g. to provide insight into biodiversity and function.
+ Shotgun metagenomic sequencing
+ Metagenomic_sequencing
+
+
+ Metagenomic sequencing
+
+
+
+
+
+
+
+
+
+ 1.21
+ Environment
+ Study of the environment, the interactions between its physical, chemical, and biological components and it's effect on life. Also how humans impact upon the environment, and how we can manage and utilise natural resources.
+ Environmental_science
+
+
+ Environmental sciences
+
+
+
+
+
+
+
+
+
+ 1.22
+ The study and simulation of molecular conformations using a computational model and computer simulations.
+
+
+ This includes methods such as Molecular Dynamics, Coarse-grained dynamics, metadynamics, Quantum Mechanics, QM/MM, Markov State Models, etc.
+ Biomolecular simulation
+
+
+
+
+
+
+
+
+
+ 1.22
+ The application of multi-disciplinary science and technology for the construction of artificial biological systems for diverse applications.
+ Biomimeic chemistry
+
+
+ Synthetic biology
+
+
+
+
+
+
+
+
+
+
+ 1.22
+ The application of biotechnology to directly manipulate an organism's genes.
+ Genetic manipulation
+ Genetic modification
+ Genetic_engineering
+ Genome editing
+ Genome engineering
+
+
+ Genetic engineering
+
+
+
+
+
+
+
+
+
+ 1.24
+ A field of biological research focused on the discovery and identification of peptides, typically by comparing mass spectra against a protein database.
+ Proteogenomics
+
+
+ Proteogenomics
+
+
+
+
+
+
+
+
+
+ 1.24
+ Amplicon panels
+ Resequencing
+ Laboratory experiment to identify the differences between a specific genome (of an individual) and a reference genome (developed typically from many thousands of individuals). WGS re-sequencing is used as golden standard to detect variations compared to a given reference genome, including small variants (SNP and InDels) as well as larger genome re-organisations (CNVs, translocations, etc.).
+ Highly targeted resequencing
+ Whole genome resequencing (WGR)
+ Whole-genome re-sequencing (WGSR)
+ Amplicon sequencing
+ Amplicon-based sequencing
+ Ultra-deep sequencing
+ Amplicon sequencing is the ultra-deep sequencing of PCR products (amplicons), usually for the purpose of efficient genetic variant identification and characterisation in specific genomic regions.
+ Genome resequencing
+
+
+
+
+
+
+
+
+
+ 1.24
+ A biomedical field that bridges immunology and genetics, to study the genetic basis of the immune system.
+ Immune system genetics
+ Immungenetics
+ Immunology and genetics
+ Immunogenetics
+ Immunogenes
+
+
+ This involves the study of often complex genetic traits underlying diseases involving defects in the immune system. For example, identifying target genes for therapeutic approaches, or genetic variations involved in immunological pathology.
+ Immunogenetics
+
+
+
+
+
+
+
+
+
+ 1.24
+ Interdisciplinary science focused on extracting information from chemical systems by data analytical approaches, for example multivariate statistics, applied mathematics, and computer science.
+ Chemometrics
+
+
+ Chemometrics
+
+
+
+
+
+
+
+
+
+ 1.24
+ Cytometry is the measurement of the characteristics of cells.
+ Cytometry
+ Flow cytometry
+ Image cytometry
+ Mass cytometry
+
+
+ Cytometry
+
+
+
+
+
+
+
+
+
+ 1.24
+ Biotechnology approach that seeks to optimize cellular genetic and regulatory processes in order to increase the cells' production of a certain substance.
+
+
+ Metabolic engineering
+
+
+
+
+
+
+
+
+
+ 1.24
+ Molecular biology methods used to analyze the spatial organization of chromatin in a cell.
+ 3C technologies
+ 3C-based methods
+ Chromosome conformation analysis
+ Chromosome_conformation_capture
+ Chromatin accessibility
+ Chromatin accessibility assay
+ Chromosome conformation capture
+
+
+
+
+
+
+
+
+
+
+ 1.24
+ The study of microbe gene expression within natural environments (i.e. the metatranscriptome).
+ Metatranscriptomics
+
+
+ Metatranscriptomics methods can be used for whole gene expression profiling of complex microbial communities.
+ Metatranscriptomics
+
+
+
+
+
+
+
+
+
+ 1.24
+ The reconstruction and analysis of genomic information in extinct species.
+ Paleogenomics
+ Ancestral genomes
+ Paleogenetics
+ Paleogenomics
+
+
+
+
+
+
+
+
+
+
+ 1.24
+ The biological classification of organisms by categorizing them in groups ("clades") based on their most recent common ancestor.
+ Cladistics
+ Tree of life
+
+
+ Cladistics
+
+
+
+
+
+
+
+
+
+
+
+ 1.24
+ The study of the process and mechanism of change of biomolecules such as DNA, RNA, and proteins across generations.
+ Molecular_evolution
+
+
+ Molecular evolution
+
+
+
+
+
+
+
+
+
+
+ 1.24
+ Immunoinformatics is the field of computational biology that deals with the study of immunoloogical questions. Immunoinformatics is at the interface between immunology and computer science. It takes advantage of computational, statistical, mathematical approaches and enhances the understanding of immunological knowledge.
+ Computational immunology
+ Immunoinformatics
+ This involves the study of often complex genetic traits underlying diseases involving defects in the immune system. For example, identifying target genes for therapeutic approaches, or genetic variations involved in immunological pathology.
+ Immunoinformatics
+
+
+
+
+
+
+
+
+
+ 1.24
+ A diagnostic imaging technique based on the application of ultrasound.
+ Standardized echography
+ Ultrasound imaging
+ Echography
+ Diagnostic sonography
+ Medical ultrasound
+ Standard echography
+ Ultrasonography
+
+
+ Echography
+
+
+
+
+
+
+
+
+
+ 1.24
+ Experimental approaches to determine the rates of metabolic reactions - the metabolic fluxes - within a biological entity.
+ Fluxomics
+ The "fluxome" is the complete set of metabolic fluxes in a cell, and is a dynamic aspect of phenotype.
+ Fluxomics
+
+
+
+
+
+
+
+
+
+ 1.12
+ An experiment for studying protein-protein interactions.
+ Protein_interaction_experiment
+ Co-immunoprecipitation
+ Phage display
+ Yeast one-hybrid
+ Yeast two-hybrid
+
+
+ This used to have the ID http://edamontology.org/topic_3557 but the numerical part (owing to an error) duplicated http://edamontology.org/operation_3557 ('Imputation'). ID of this concept set to http://edamontology.org/topic_3957 in EDAM 1.24.
+ Protein interaction experiment
+
+
+
+
+
+
+
+
+
+ 1.25
+ A DNA structural variation, specifically a duplication or deletion event, resulting in sections of the genome to be repeated, or the number of repeats in the genome to vary between individuals.
+ Copy_number_variation
+ CNV deletion
+ CNV duplication
+ CNV insertion / amplification
+ Complex CNV
+ Copy number variant
+ Copy number variation
+
+
+
+
+
+
+
+
+
+ 1.25
+ The branch of genetics concerned with the relationships between chromosomes and cellular behaviour, especially during mitosis and meiosis.
+
+
+ Cytogenetics
+
+
+
+
+
+
+
+
+
+ 1.25
+ The design of vaccines to protect against a particular pathogen, including antigens, delivery systems, and adjuvants to elicit a predictable immune response against specific epitopes.
+ Vaccinology
+ Rational vaccine design
+ Reverse vaccinology
+ Structural vaccinology
+ Structure-based immunogen design
+ Vaccine design
+
+
+ Vaccinology
+
+
+
+
+
+
+
+
+
+ 1.25
+ The study of immune system as a whole, its regulation and response to pathogens using genome-wide approaches.
+
+
+ Immunomics
+
+
+
+
+
+
+
+
+
+ 1.25
+ Epistasis can be defined as the ability of the genotype at one locus to supersede the phenotypic effect of a mutation at another locus. This interaction between genes can occur at different level: gene expression, protein levels, etc...
+ Epistatic genetic interaction
+ Epistatic interactions
+
+
+ Epistasis
+
+ http://purl.bioontology.org/ontology/MSH/D004843
+
+
+
+
+
+
+
+
+ 1.26
+ Open science encompasses the practices of making scientific research transparent and participatory, and its outputs publicly accessible.
+
+
+ Open science
+
+
+
+
+
+
+
+
+
+ 1.26
+ Data rescue denotes digitalisation, formatting, archival, and publication of data that were not available in accessible or usable form. Examples are data from private archives, data inside publications, or in paper records stored privately or publicly.
+
+
+ Data rescue
+
+
+
+
+
+
+
+
+
+
+ 1.26
+ FAIR data principles
+ FAIRification
+ FAIR data is data that meets the principles of being findable, accessible, interoperable, and reusable.
+ Findable, accessible, interoperable, reusable data
+ Open data
+
+
+ A substantially overlapping term is 'open data', i.e. publicly available data that is free to use, distribute, and create derivative work from, without restrictions. Open data does not automatically have to be FAIR (e.g. findable or interoperable), while FAIR data does in some cases not have to be publicly available without restrictions (especially sensitive personal data).
+ FAIR data
+
+
+
+
+
+
+
+
+
+
+
+ 1.26
+ Microbial mechanisms for protecting microorganisms against antimicrobial agents.
+ AMR
+ Antifungal resistance
+ Antiprotozoal resistance
+ Antiviral resistance
+ Extensive drug resistance (XDR)
+ Multidrug resistance
+ Multiple drug resistance (MDR)
+ Multiresistance
+ Pandrug resistance (PDR)
+ Total drug resistance (TDR)
+
+
+ Antimicrobial Resistance
+
+
+
+
+
+
+
+
+
+
+ 1.26
+ The monitoring method for measuring electrical activity in the brain.
+ EEG
+
+
+ Electroencephalography
+
+
+
+
+
+
+
+
+
+
+ 1.26
+ The monitoring method for measuring electrical activity in the heart.
+ ECG
+ EKG
+
+
+ Electrocardiography
+
+
+
+
+
+
+
+
+
+ 1.26
+ A method for studying biomolecules and other structures at very low (cryogenic) temperature using electron microscopy.
+ cryo-EM
+
+
+ Cryogenic electron microscopy
+
+
+
+
+
+
+
+
+
+ 1.26
+ Biosciences, or life sciences, include fields of study related to life, living beings, and biomolecules.
+ Life sciences
+
+
+ Biosciences
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.26
+ Biogeochemical cycle
+ The carbon cycle is the biogeochemical pathway of carbon moving through the different parts of the Earth (such as ocean, atmosphere, soil), or eventually another planet.
+
+
+ Note that the carbon-nitrogen-oxygen (CNO) cycle (https://en.wikipedia.org/wiki/CNO_cycle) is a completely different, thermonuclear reaction in stars.
+ Carbon cycle
+
+
+
+
+
+
+
+
+
+
+ 1.26
+ Multiomics concerns integration of data from multiple omics (e.g. transcriptomics, proteomics, epigenomics).
+ Integrative omics
+ Multi-omics
+ Pan-omics
+ Panomics
+
+
+ Multiomics
+
+
+
+
+
+
+
+
+
+
+ 1.26
+ With ribosome profiling, ribosome-protected mRNA fragments are analyzed with RNA-seq techniques leading to a genome-wide measurement of the translation landscape.
+ RIBO-seq
+ Ribo-Seq
+ RiboSeq
+ ribo-seq
+ ribosomal footprinting
+ translation footprinting
+
+
+ Ribosome Profiling
+
+
+
+
+
+
+
+
+
+ 1.26
+ Combined with NGS (Next Generation Sequencing) technologies, single-cell sequencing allows the study of genetic information (DNA, RNA, epigenome...) at a single cell level. It is often used for differential analysis and gene expression profiling.
+ Single Cell Genomics
+
+
+ Single-Cell Sequencing
+
+
+
+
+
+
+
+
+
+
+ 1.26
+ The study of mechanical waves in liquids, solids, and gases.
+
+
+ Acoustics
+
+
+
+
+
+
+
+
+
+
+
+ 1.26
+ Interdisplinary study of behavior, precise control, and manipulation of low (microlitre) volume fluids in constrained space.
+ Fluidics
+
+
+ Microfluidics
+
+
+
+
+
+
+
+
+
+ 1.26
+ Genomic imprinting is a gene regulation mechanism by which a subset of genes are expressed from one of the two parental chromosomes only. Imprinted genes are organized in clusters, their silencing/activation of the imprinted loci involves epigenetic marks (DNA methylation, etc) and so-called imprinting control regions (ICR). It has been described in mammals, but also plants and insects.
+ Gene imprinting
+
+
+ Genomic imprinting
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.26
+ Environmental DNA (eDNA)
+ Environmental RNA (eRNA)
+ Environmental sequencing
+ Taxonomic profiling
+ Metabarcoding is the barcoding of (environmental) DNA or RNA to identify multiple taxa from the same sample.
+ DNA metabarcoding
+ Environmental metabarcoding
+ RNA metabarcoding
+ eDNA metabarcoding
+ eRNA metabarcoding
+
+
+ Typically, high-throughput sequencing is performed and the resulting sequence reads are matched to DNA barcodes in a reference database.
+ Metabarcoding
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.2
+
+ An obsolete concept (redefined in EDAM).
+
+ Needed for conversion to the OBO format.
+ Obsolete concept (EDAM)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A serialisation format conforming to the Web Ontology Language (OWL) model.
+
+
+ OWL format
+
+
+ 1.2
+ rdf
+
+ Resource Description Framework (RDF) XML format.
+
+
+ RDF/XML can be used as a standard serialisation syntax for OWL DL, but not for OWL Full.
+ RDF/XML
+ http://www.ebi.ac.uk/SWO/data/SWO_3000006
+
+
+
+
+
+
+
diff --git a/edamfu/tests/edamontology.org.owl b/edamfu/tests/edamontology.org.owl
new file mode 100644
index 0000000..af5b2c7
--- /dev/null
+++ b/edamfu/tests/edamontology.org.owl
@@ -0,0 +1,61094 @@
+
+
+
+
+ 4040
+
+ 03.10.2023 11:14 UTC
+ EDAM http://edamontology.org/ "EDAM relations, concept properties, and subsets"
+ EDAM_data http://edamontology.org/data_ "EDAM types of data"
+ EDAM_format http://edamontology.org/format_ "EDAM data formats"
+ EDAM_operation http://edamontology.org/operation_ "EDAM operations"
+ EDAM_topic http://edamontology.org/topic_ "EDAM topics"
+ EDAM is a community project and its development can be followed and contributed to at https://github.com/edamontology/edamontology.
+ EDAM is particularly suitable for semantic annotations and categorisation of diverse resources related to data analysis and management: e.g. tools, workflows, learning materials, or standards. EDAM is also useful in data management itself, for recording provenance metadata of processed data.
+ https://github.com/edamontology/edamontology/graphs/contributors and many more!
+ Hervé Ménager
+ Jon Ison
+ Matúš Kalaš
+ EDAM is a domain ontology of data analysis and data management in bio- and other sciences, and science-based applications. It comprises concepts related to analysis, modelling, optimisation, and data life-cycle. Targetting usability by diverse users, the structure of EDAM is relatively simple, divided into 4 main sections: Topic, Operation, Data (incl. Identifier), and Format.
+ application/rdf+xml
+ EDAM - The ontology of data analysis and management
+
+
+ 1.26_dev
+
+
+
+
+
+
+
+
+
+ Matúš Kalaš
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.13
+ true
+ Publication reference
+ 'Citation' concept property ('citation' metadata tag) contains a dereferenceable URI, preferably including a DOI, pointing to a citeable publication of the given data format.
+ Publication
+
+ Citation
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true
+ Version in which a concept was created.
+
+ Created in
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true
+ A comment explaining why the comment should be or was deprecated, including name of person commenting (jison, mkalas etc.).
+
+ deprecation_comment
+
+
+
+
+
+
+
+ true
+ 'Documentation' trailing modifier (qualifier, 'documentation') of 'xref' links of 'Format' concepts. When 'true', the link is pointing to a page with explanation, description, documentation, or specification of the given data format.
+ Specification
+
+ Documentation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true
+ 'Example' concept property ('example' metadata tag) lists examples of valid values of types of identifiers (accessions). Applicable to some other types of data, too.
+
+ Separated by bar ('|'). For more complex data and data formats, it can be a link to a website with examples, instead.
+ Example
+
+
+
+
+
+
+
+ true
+ 'File extension' concept property ('file_extension' metadata tag) lists examples of usual file extensions of formats.
+
+ N.B.: File extensions that are not correspondigly defined at http://filext.com are recorded in EDAM only if not in conflict with http://filext.com, and/or unique and usual within life-science computing.
+ Separated by bar ('|'), without a dot ('.') prefix, preferably not all capital characters.
+ File extension
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true
+ 'Information standard' trailing modifier (qualifier, 'information_standard') of 'xref' links of 'Format' concepts. When 'true', the link is pointing to an information standard supported by the given data format.
+ Minimum information checklist
+ Minimum information standard
+
+ "Supported by the given data format" here means, that the given format enables representation of data that satisfies the information standard.
+ Information standard
+
+
+
+
+
+
+
+ true
+ When 'true', the concept has been proposed to be deprecated.
+
+ deprecation_candidate
+
+
+
+
+
+
+
+ true
+ When 'true', the concept has been proposed to be refactored.
+
+ refactor_candidate
+
+
+
+
+
+
+
+ true
+ When 'true', the concept has been proposed or is supported within Debian as a tag.
+
+ isdebtag
+
+
+
+
+
+
+
+ true
+ 'Media type' trailing modifier (qualifier, 'media_type') of 'xref' links of 'Format' concepts. When 'true', the link is pointing to a page specifying a media type of the given data format.
+ MIME type
+
+ Media type
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true
+ Whether terms associated with this concept are recommended for use in annotation.
+
+ notRecommendedForAnnotation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true
+ Version in which a concept was made obsolete.
+
+ Obsolete since
+
+
+
+
+
+
+
+ true
+ EDAM concept URI of the erstwhile "parent" of a now deprecated concept.
+
+ Old parent
+
+
+
+
+
+
+
+ true
+ EDAM concept URI of an erstwhile related concept (by has_input, has_output, has_topic, is_format_of, etc.) of a now deprecated concept.
+
+ Old related
+
+
+
+
+
+
+
+ true
+ 'Ontology used' concept property ('ontology_used' metadata tag) of format concepts links to a domain ontology that is used inside the given data format, or contains a note about ontology use within the format.
+
+ Ontology used
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true
+ 'Organisation' trailing modifier (qualifier, 'organisation') of 'xref' links of 'Format' concepts. When 'true', the link is pointing to an organisation that developed, standardised, and maintains the given data format.
+ Organization
+
+ Organisation
+
+
+
+
+
+
+
+ true
+ A comment explaining the proposed refactoring, including name of person commenting (jison, mkalas etc.).
+
+ refactor_comment
+
+
+
+
+
+
+
+ true
+ 'Regular expression' concept property ('regex' metadata tag) specifies the allowed values of types of identifiers (accessions). Applicable to some other types of data, too.
+
+ Regular expression
+
+
+
+
+
+
+
+ 'Related term' concept property ('related_term'; supposedly a synonym modifier in OBO format) states a related term - not necessarily closely semantically related - that users (also non-specialists) may use when searching.
+
+ Related term
+
+
+
+
+
+
+
+
+ true
+ 'Repository' trailing modifier (qualifier, 'repository') of 'xref' links of 'Format' concepts. When 'true', the link is pointing to the public source-code repository where the given data format is developed or maintained.
+ Public repository
+ Source-code repository
+
+ Repository
+
+
+
+
+
+
+
+ true
+ Name of thematic editor (http://biotools.readthedocs.io/en/latest/governance.html#registry-editors) responsible for this concept and its children.
+
+ thematic_editor
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ false
+ false
+ false
+ OBO_REL:is_a
+ 'A has_format B' defines for the subject A, that it has the object B as its data format.
+
+ false
+ Subject A can be any concept or entity outside of an ontology (or an ontology concept in a role of an entity being semantically annotated) that is (or is in a role of) 'Data', or an input, output, input or output argument of an 'Operation'. Object B can either be a concept that is a 'Format', or in unexpected cases an entity outside of an ontology that is a 'Format' or is in the role of a 'Format'. In EDAM, 'has_format' is not explicitly defined between EDAM concepts, only the inverse 'is_format_of'.
+ has format
+
+
+
+
+
+
+
+
+
+
+
+ false
+ false
+ false
+ OBO_REL:is_a
+ 'A has_function B' defines for the subject A, that it has the object B as its function.
+ OBO_REL:bearer_of
+
+ true
+ Subject A can be any concept or entity outside of an ontology (or an ontology concept in a role of an entity being semantically annotated). Object B can either be a concept that is (or is in a role of) a function, or an entity outside of an ontology that is (or is in a role of) a function specification. In the scope of EDAM, 'has_function' serves only for relating annotated entities outside of EDAM with 'Operation' concepts.
+ has function
+
+
+
+
+
+
+
+ OBO_REL:bearer_of
+ Is defined anywhere? Not in the 'unknown' version of RO. 'OBO_REL:bearer_of' is narrower in the sense that it only relates ontological categories (concepts) that are an 'independent_continuant' (snap:IndependentContinuant) with ontological categories that are a 'specifically_dependent_continuant' (snap:SpecificallyDependentContinuant), and broader in the sense that it relates with any borne objects not just functions of the subject.
+
+
+
+
+ true
+ In very unusual cases.
+
+
+
+
+
+
+
+
+
+
+ false
+ false
+ false
+ OBO_REL:is_a
+ 'A has_identifier B' defines for the subject A, that it has the object B as its identifier.
+
+ false
+ Subject A can be any concept or entity outside of an ontology (or an ontology concept in a role of an entity being semantically annotated). Object B can either be a concept that is an 'Identifier', or an entity outside of an ontology that is an 'Identifier' or is in the role of an 'Identifier'. In EDAM, 'has_identifier' is not explicitly defined between EDAM concepts, only the inverse 'is_identifier_of'.
+ has identifier
+
+
+
+
+
+
+
+
+
+
+ false
+ false
+ false
+ OBO_REL:is_a
+ 'A has_input B' defines for the subject A, that it has the object B as a necessary or actual input or input argument.
+ OBO_REL:has_participant
+
+ true
+ Subject A can either be concept that is or has an 'Operation' function, or an entity outside of an ontology (or an ontology concept in a role of an entity being semantically annotated) that has an 'Operation' function or is an 'Operation'. Object B can be any concept or entity. In EDAM, only 'has_input' is explicitly defined between EDAM concepts ('Operation' 'has_input' 'Data'). The inverse, 'is_input_of', is not explicitly defined.
+ has input
+
+
+
+
+
+
+ OBO_REL:has_participant
+ 'OBO_REL:has_participant' is narrower in the sense that it only relates ontological categories (concepts) that are a 'process' (span:Process) with ontological categories that are a 'continuant' (snap:Continuant), and broader in the sense that it relates with any participating objects not just inputs or input arguments of the subject.
+
+
+
+
+ true
+ In very unusual cases.
+
+
+
+
+
+
+
+
+
+
+ false
+ false
+ false
+ OBO_REL:is_a
+ 'A has_output B' defines for the subject A, that it has the object B as a necessary or actual output or output argument.
+ OBO_REL:has_participant
+
+ true
+ Subject A can either be concept that is or has an 'Operation' function, or an entity outside of an ontology (or an ontology concept in a role of an entity being semantically annotated) that has an 'Operation' function or is an 'Operation'. Object B can be any concept or entity. In EDAM, only 'has_output' is explicitly defined between EDAM concepts ('Operation' 'has_output' 'Data'). The inverse, 'is_output_of', is not explicitly defined.
+ has output
+
+
+
+
+
+
+ OBO_REL:has_participant
+ 'OBO_REL:has_participant' is narrower in the sense that it only relates ontological categories (concepts) that are a 'process' (span:Process) with ontological categories that are a 'continuant' (snap:Continuant), and broader in the sense that it relates with any participating objects not just outputs or output arguments of the subject. It is also not clear whether an output (result) actually participates in the process that generates it.
+
+
+
+
+ true
+ In very unusual cases.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ false
+ false
+ false
+ OBO_REL:is_a
+ 'A has_topic B' defines for the subject A, that it has the object B as its topic (A is in the scope of a topic B).
+
+ true
+ Subject A can be any concept or entity outside of an ontology (or an ontology concept in a role of an entity being semantically annotated). Object B can either be a concept that is a 'Topic', or in unexpected cases an entity outside of an ontology that is a 'Topic' or is in the role of a 'Topic'. In EDAM, only 'has_topic' is explicitly defined between EDAM concepts ('Operation' or 'Data' 'has_topic' 'Topic'). The inverse, 'is_topic_of', is not explicitly defined.
+ has topic
+
+
+
+
+
+
+
+
+ true
+ In very unusual cases.
+
+
+
+
+
+
+
+
+
+ false
+ false
+ false
+ OBO_REL:is_a
+ 'A is_format_of B' defines for the subject A, that it is a data format of the object B.
+ OBO_REL:quality_of
+
+ false
+ Subject A can either be a concept that is a 'Format', or in unexpected cases an entity outside of an ontology (or an ontology concept in a role of an entity being semantically annotated) that is a 'Format' or is in the role of a 'Format'. Object B can be any concept or entity outside of an ontology that is (or is in a role of) 'Data', or an input, output, input or output argument of an 'Operation'. In EDAM, only 'is_format_of' is explicitly defined between EDAM concepts ('Format' 'is_format_of' 'Data'). The inverse, 'has_format', is not explicitly defined.
+ is format of
+
+
+
+
+
+ OBO_REL:quality_of
+ Is defined anywhere? Not in the 'unknown' version of RO. 'OBO_REL:quality_of' might be seen narrower in the sense that it only relates subjects that are a 'quality' (snap:Quality) with objects that are an 'independent_continuant' (snap:IndependentContinuant), and is broader in the sense that it relates any qualities of the object.
+
+
+
+
+
+
+
+
+ false
+ false
+ false
+ OBO_REL:is_a
+ 'A is_function_of B' defines for the subject A, that it is a function of the object B.
+ OBO_REL:function_of
+ OBO_REL:inheres_in
+
+ true
+ Subject A can either be concept that is (or is in a role of) a function, or an entity outside of an ontology (or an ontology concept in a role of an entity being semantically annotated) that is (or is in a role of) a function specification. Object B can be any concept or entity. Within EDAM itself, 'is_function_of' is not used.
+ is function of
+
+
+
+
+
+
+ OBO_REL:function_of
+ Is defined anywhere? Not in the 'unknown' version of RO. 'OBO_REL:function_of' only relates subjects that are a 'function' (snap:Function) with objects that are an 'independent_continuant' (snap:IndependentContinuant), so for example no processes. It does not define explicitly that the subject is a function of the object.
+
+
+
+
+ OBO_REL:inheres_in
+ Is defined anywhere? Not in the 'unknown' version of RO. 'OBO_REL:inheres_in' is narrower in the sense that it only relates ontological categories (concepts) that are a 'specifically_dependent_continuant' (snap:SpecificallyDependentContinuant) with ontological categories that are an 'independent_continuant' (snap:IndependentContinuant), and broader in the sense that it relates any borne subjects not just functions.
+
+
+
+
+ true
+ In very unusual cases.
+
+
+
+
+
+
+
+
+
+ false
+ false
+ false
+ OBO_REL:is_a
+ 'A is_identifier_of B' defines for the subject A, that it is an identifier of the object B.
+
+ false
+ Subject A can either be a concept that is an 'Identifier', or an entity outside of an ontology (or an ontology concept in a role of an entity being semantically annotated) that is an 'Identifier' or is in the role of an 'Identifier'. Object B can be any concept or entity outside of an ontology. In EDAM, only 'is_identifier_of' is explicitly defined between EDAM concepts (only 'Identifier' 'is_identifier_of' 'Data'). The inverse, 'has_identifier', is not explicitly defined.
+ is identifier of
+
+
+
+
+
+
+
+
+
+ false
+ false
+ false
+ OBO_REL:is_a
+ 'A is_input_of B' defines for the subject A, that it as a necessary or actual input or input argument of the object B.
+ OBO_REL:participates_in
+
+ true
+ Subject A can be any concept or entity outside of an ontology (or an ontology concept in a role of an entity being semantically annotated). Object B can either be a concept that is or has an 'Operation' function, or an entity outside of an ontology that has an 'Operation' function or is an 'Operation'. In EDAM, 'is_input_of' is not explicitly defined between EDAM concepts, only the inverse 'has_input'.
+ is input of
+
+
+
+
+
+
+ OBO_REL:participates_in
+ 'OBO_REL:participates_in' is narrower in the sense that it only relates ontological categories (concepts) that are a 'continuant' (snap:Continuant) with ontological categories that are a 'process' (span:Process), and broader in the sense that it relates any participating subjects not just inputs or input arguments.
+
+
+
+
+ true
+ In very unusual cases.
+
+
+
+
+
+
+
+
+
+ false
+ false
+ false
+ OBO_REL:is_a
+ 'A is_output_of B' defines for the subject A, that it as a necessary or actual output or output argument of the object B.
+ OBO_REL:participates_in
+
+ true
+ Subject A can be any concept or entity outside of an ontology (or an ontology concept in a role of an entity being semantically annotated). Object B can either be a concept that is or has an 'Operation' function, or an entity outside of an ontology that has an 'Operation' function or is an 'Operation'. In EDAM, 'is_output_of' is not explicitly defined between EDAM concepts, only the inverse 'has_output'.
+ is output of
+
+
+
+
+
+
+ OBO_REL:participates_in
+ 'OBO_REL:participates_in' is narrower in the sense that it only relates ontological categories (concepts) that are a 'continuant' (snap:Continuant) with ontological categories that are a 'process' (span:Process), and broader in the sense that it relates any participating subjects not just outputs or output arguments. It is also not clear whether an output (result) actually participates in the process that generates it.
+
+
+
+
+ true
+ In very unusual cases.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ false
+ false
+ false
+ OBO_REL:is_a
+ 'A is_topic_of B' defines for the subject A, that it is a topic of the object B (a topic A is the scope of B).
+ OBO_REL:quality_of
+
+ true
+ Subject A can either be a concept that is a 'Topic', or in unexpected cases an entity outside of an ontology (or an ontology concept in a role of an entity being semantically annotated) that is a 'Topic' or is in the role of a 'Topic'. Object B can be any concept or entity outside of an ontology. In EDAM, 'is_topic_of' is not explicitly defined between EDAM concepts, only the inverse 'has_topic'.
+ is topic of
+
+
+
+
+
+ OBO_REL:quality_of
+ Is defined anywhere? Not in the 'unknown' version of RO. 'OBO_REL:quality_of' might be seen narrower in the sense that it only relates subjects that are a 'quality' (snap:Quality) with objects that are an 'independent_continuant' (snap:IndependentContinuant), and is broader in the sense that it relates any qualities of the object.
+
+
+
+
+ true
+ In very unusual cases.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A type of computational resource used in bioinformatics.
+
+ Resource type
+ true
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Information, represented in an information artefact (data record) that is 'understandable' by dedicated computational tools that can use the data as input or produce it as output.
+ Data record
+ Data set
+ Datum
+
+
+ Data
+
+
+
+
+
+
+
+
+
+
+
+
+ Data record
+ EDAM does not distinguish a data record (a tool-understandable information artefact) from data or datum (its content, the tool-understandable encoding of an information).
+
+
+
+
+ Data set
+ EDAM does not distinguish the multiplicity of data, such as one data item (datum) versus a collection of data (data set).
+
+
+
+
+ Datum
+ EDAM does not distinguish the multiplicity of data, such as one data item (datum) versus a collection of data (data set).
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+ A bioinformatics package or tool, e.g. a standalone application or web service.
+
+
+ Tool
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+ A digital data archive typically based around a relational model but sometimes using an object-oriented, tree or graph-based model.
+
+
+ Database
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An ontology of biological or bioinformatics concepts and relations, a controlled vocabulary, structured glossary etc.
+
+
+ Ontology
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A directory on disk from which files are read.
+
+ Directory metadata
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Controlled vocabulary from National Library of Medicine. The MeSH thesaurus is used to index articles in biomedical journals for the Medline/PubMED databases.
+
+ MeSH vocabulary
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Controlled vocabulary for gene names (symbols) from HUGO Gene Nomenclature Committee.
+
+ HGNC vocabulary
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Compendium of controlled vocabularies for the biomedical domain (Unified Medical Language System).
+
+ UMLS vocabulary
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ A text token, number or something else which identifies an entity, but which may not be persistent (stable) or unique (the same identifier may identify multiple things).
+ ID
+
+
+
+ Identifier
+
+
+
+
+
+
+
+
+ Almost exact but limited to identifying resources, and being unambiguous.
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An entry (retrievable via URL) from a biological database.
+
+ Database entry
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Mass of a molecule.
+
+
+ Molecular mass
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PDBML:pdbx_formal_charge
+ Net charge of a molecule.
+
+
+ Molecular charge
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A specification of a chemical structure.
+ Chemical structure specification
+
+
+ Chemical formula
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A QSAR quantitative descriptor (name-value pair) of chemical structure.
+
+
+ QSAR descriptors have numeric values that quantify chemical information encoded in a symbolic representation of a molecule. They are used in quantitative structure activity relationship (QSAR) applications. Many subtypes of individual descriptors (not included in EDAM) cover various types of protein properties.
+ QSAR descriptor
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Deprecated because this is bloat / confusing & better handled as an EDAM Format concept - "raw" sequences just imply a particular format (i.e. one with a vanilla string, possible in a particular alphabet, with no metadata).
+ 1.23
+
+ A raw molecular sequence (string of characters) which might include ambiguity, unknown positions and non-sequence characters.
+
+
+ Non-sequence characters may be used for example for gaps and translation stop.
+ Raw sequence
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ SO:2000061
+ A molecular sequence and associated metadata.
+
+
+ Sequence record
+ http://purl.bioontology.org/ontology/MSH/D058977
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A collection of one or typically multiple molecular sequences (which can include derived data or metadata) that do not (typically) correspond to molecular sequence database records or entries and which (typically) are derived from some analytical method.
+ Alignment reference
+ SO:0001260
+
+
+ An example is an alignment reference; one or a set of reference molecular sequences, structures, or profiles used for alignment of genomic, transcriptomic, or proteomic experimental data.
+ This concept may be used for arbitrary sequence sets and associated data arising from processing.
+ Sequence set
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A character used to replace (mask) other characters in a molecular sequence.
+
+ Sequence mask character
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A label (text token) describing the type of sequence masking to perform.
+
+ Sequence masking is where specific characters or positions in a molecular sequence are masked (replaced) with an another (mask character). The mask type indicates what is masked, for example regions that are not of interest or which are information-poor including acidic protein regions, basic protein regions, proline-rich regions, low compositional complexity regions, short-periodicity internal repeats, simple repeats and low complexity regions. Masked sequences are used in database search to eliminate statistically significant but biologically uninteresting hits.
+ Sequence mask type
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.20
+
+
+ The strand of a DNA sequence (forward or reverse).
+
+ The forward or 'top' strand might specify a sequence is to be used as given, the reverse or 'bottom' strand specifying the reverse complement of the sequence is to be used.
+ DNA sense specification
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A specification of sequence length(s).
+
+ Sequence length specification
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Basic or general information concerning molecular sequences.
+
+ This is used for such things as a report including the sequence identifier, type and length.
+ Sequence metadata
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ How the annotation of a sequence feature (for example in EMBL or Swiss-Prot) was derived.
+
+
+ This might be the name and version of a software tool, the name of a database, or 'curated' to indicate a manual annotation (made by a human).
+ Sequence feature source
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A report of sequence hits and associated data from searching a database of sequences (for example a BLAST search). This will typically include a list of scores (often with statistical evaluation) and a set of alignments for the hits.
+ Database hits (sequence)
+ Sequence database hits
+ Sequence database search results
+ Sequence search hits
+
+
+ The score list includes the alignment score, percentage of the query sequence matched, length of the database sequence entry in this alignment, identifier of the database sequence entry, excerpt of the database sequence entry description etc.
+ Sequence search results
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Report on the location of matches ("hits") between sequences, sequence profiles, motifs (conserved or functional patterns) and other types of sequence signatures.
+ Profile-profile alignment
+ Protein secondary database search results
+ Search results (protein secondary database)
+ Sequence motif hits
+ Sequence motif matches
+ Sequence profile alignment
+ Sequence profile hits
+ Sequence profile matches
+ Sequence-profile alignment
+
+
+ A "profile-profile alignment" is an alignment of two sequence profiles, each profile typically representing a sequence alignment.
+ A "sequence-profile alignment" is an alignment of one or more molecular sequence(s) to one or more sequence profile(s) (each profile typically representing a sequence alignment).
+ This includes reports of hits from a search of a protein secondary or domain database. Data associated with the search or alignment might also be included, e.g. ranked list of best-scoring sequences, a graphical representation of scores etc.
+ Sequence signature matches
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Data files used by motif or profile methods.
+
+ Sequence signature model
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data concerning concerning specific or conserved pattern in molecular sequences and the classifiers used for their identification, including sequence motifs, profiles or other diagnostic element.
+
+
+ This can include metadata about a motif or sequence profile such as its name, length, technical details about the profile construction, and so on.
+ Sequence signature data
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Alignment of exact matches between subsequences (words) within two or more molecular sequences.
+
+ Sequence alignment (words)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A dotplot of sequence similarities identified from word-matching or character comparison.
+
+
+ Dotplot
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alignment of multiple molecular sequences.
+ Multiple sequence alignment
+ msa
+
+
+ Sequence alignment
+
+ http://purl.bioontology.org/ontology/MSH/D016415
+ http://semanticscience.org/resource/SIO_010066
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Some simple value controlling a sequence alignment (or similar 'match') operation.
+
+ Sequence alignment parameter
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A value representing molecular sequence similarity.
+
+
+ Sequence similarity score
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Report of general information on a sequence alignment, typically include a description, sequence identifiers and alignment score.
+
+ Sequence alignment metadata
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An informative report of molecular sequence alignment-derived data or metadata.
+ Sequence alignment metadata
+
+
+ Use this for any computer-generated reports on sequence alignments, and for general information (metadata) on a sequence alignment, such as a description, sequence identifiers and alignment score.
+ Sequence alignment report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ "Sequence-profile alignment" and "Profile-profile alignment" are synonymous with "Sequence signature matches" which was already stated as including matches (alignment) and other data.
+ 1.25 or earlier
+
+ A profile-profile alignment (each profile typically representing a sequence alignment).
+
+
+ Profile-profile alignment
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ "Sequence-profile alignment" and "Profile-profile alignment" are synonymous with "Sequence signature matches" which was already stated as including matches (alignment) and other data.
+ 1.24
+
+ Alignment of one or more molecular sequence(s) to one or more sequence profile(s) (each profile typically representing a sequence alignment).
+
+
+ Sequence-profile alignment
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby:phylogenetic_distance_matrix
+ A matrix of estimated evolutionary distance between molecular sequences, such as is suitable for phylogenetic tree calculation.
+ Phylogenetic distance matrix
+
+
+ Methods might perform character compatibility analysis or identify patterns of similarity in an alignment or data matrix.
+ Sequence distance matrix
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Basic character data from which a phylogenetic tree may be generated.
+
+
+ As defined, this concept would also include molecular sequences, microsatellites, polymorphisms (RAPDs, RFLPs, or AFLPs), restriction sites and fragments
+ Phylogenetic character data
+ http://www.evolutionaryontology.org/cdao.owl#Character
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby:Tree
+ Moby:myTree
+ Moby:phylogenetic_tree
+ The raw data (not just an image) from which a phylogenetic tree is directly generated or plotted, such as topology, lengths (in time or in expected amounts of variance) and a confidence interval for each length.
+ Phylogeny
+
+
+ A phylogenetic tree is usually constructed from a set of sequences from which an alignment (or data matrix) is calculated. See also 'Phylogenetic tree image'.
+ Phylogenetic tree
+ http://purl.bioontology.org/ontology/MSH/D010802
+ http://www.evolutionaryontology.org/cdao.owl#Tree
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Matrix of integer or floating point numbers for amino acid or nucleotide sequence comparison.
+ Substitution matrix
+
+
+ The comparison matrix might include matrix name, optional comment, height and width (or size) of matrix, an index row/column (of characters) and data rows/columns (of integers or floats).
+ Comparison matrix
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+ Predicted or actual protein topology represented as a string of protein secondary structure elements.
+
+
+ The location and size of the secondary structure elements and intervening loop regions is usually indicated.
+ Protein topology
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Secondary structure (predicted or real) of a protein.
+
+
+ Protein features report (secondary structure)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Super-secondary structure of protein sequence(s).
+
+
+ Super-secondary structures include leucine zippers, coiled coils, Helix-Turn-Helix etc.
+ Protein features report (super-secondary)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Alignment of the (1D representations of) secondary structure of two or more proteins.
+ Secondary structure alignment (protein)
+
+
+ Protein secondary structure alignment
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An informative report on protein secondary structure alignment-derived data or metadata.
+
+ Secondary structure alignment metadata (protein)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby:RNAStructML
+ An informative report of secondary structure (predicted or real) of an RNA molecule.
+ Secondary structure (RNA)
+
+
+ This includes thermodynamically stable or evolutionarily conserved structures such as knots, pseudoknots etc.
+ RNA secondary structure
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Moby:RNAStructAlignmentML
+ Alignment of the (1D representations of) secondary structure of two or more RNA molecules.
+ Secondary structure alignment (RNA)
+
+
+ RNA secondary structure alignment
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An informative report of RNA secondary structure alignment-derived data or metadata.
+
+ Secondary structure alignment metadata (RNA)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 3D coordinate and associated data for a macromolecular tertiary (3D) structure or part of a structure.
+ Coordinate model
+ Structure data
+
+
+ The coordinate data may be predicted or real.
+ Structure
+ http://purl.bioontology.org/ontology/MSH/D015394
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An entry from a molecular tertiary (3D) structure database.
+
+ Tertiary structure record
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+
+ Results (hits) from searching a database of tertiary structure.
+
+ Structure database search results
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alignment (superimposition) of molecular tertiary (3D) structures.
+
+
+ A tertiary structure alignment will include the untransformed coordinates of one macromolecule, followed by the second (or subsequent) structure(s) with all the coordinates transformed (by rotation / translation) to give a superposition.
+ Structure alignment
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An informative report of molecular tertiary structure alignment-derived data.
+
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Structure alignment report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A value representing molecular structure similarity, measured from structure alignment or some other type of structure comparison.
+
+
+ Structure similarity score
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Some type of structural (3D) profile or template (representing a structure or structure alignment).
+ 3D profile
+ Structural (3D) profile
+
+
+ Structural profile
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A 3D profile-3D profile alignment (each profile representing structures or a structure alignment).
+ Structural profile alignment
+
+
+ Structural (3D) profile alignment
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ An alignment of a sequence to a 3D profile (representing structures or a structure alignment).
+
+ Sequence-3D profile alignment
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Matrix of values used for scoring sequence-structure compatibility.
+
+
+ Protein sequence-structure scoring matrix
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An alignment of molecular sequence to structure (from threading sequence(s) through 3D structure or representation of structure(s)).
+
+
+ Sequence-structure alignment
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ An informative report about a specific amino acid.
+
+ Amino acid annotation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ An informative report about a specific peptide.
+
+ Peptide annotation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An informative human-readable report about one or more specific protein molecules or protein structural domains, derived from analysis of primary (sequence or structural) data.
+ Gene product annotation
+
+
+ Protein report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A report of primarily non-positional data describing intrinsic physical, chemical or other properties of a protein molecule or model.
+ Protein physicochemical property
+ Protein properties
+ Protein sequence statistics
+
+
+ This is a broad data type and is used a placeholder for other, more specific types. Data may be based on analysis of nucleic acid sequence or structural data, for example reports on the surface properties (shape, hydropathy, electrostatic patches etc) of a protein structure, protein flexibility or motion, and protein architecture (spatial arrangement of secondary structure).
+ Protein property
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ 3D structural motifs in a protein.
+
+ Protein structural motifs and surfaces
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Data concerning the classification of the sequences and/or structures of protein structural domain(s).
+
+ Protein domain classification
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ structural domains or 3D folds in a protein or polypeptide chain.
+
+
+ Protein features report (domains)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ An informative report on architecture (spatial arrangement of secondary structure) of a protein structure.
+
+ Protein architecture report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ A report on an analysis or model of protein folding properties, folding pathways, residues or sites that are key to protein folding, nucleation or stabilisation centers etc.
+
+
+ Protein folding report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Data on the effect of (typically point) mutation on protein folding, stability, structure and function.
+
+ This is a broad data type and is used a placeholder for other, more specific types. It is primarily intended to help navigation of EDAM and would not typically be used for annotation.
+ Protein features (mutation)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Protein-protein interaction data from for example yeast two-hybrid analysis, protein microarrays, immunoaffinity chromatography followed by mass spectrometry, phage display etc.
+
+
+ This is a broad data type and is used a placeholder for other, more specific types. It is primarily intended to help navigation of EDAM and would not typically be used for annotation.
+ Protein interaction raw data
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data concerning the interactions (predicted or known) within or between a protein, structural domain or part of a protein. This includes intra- and inter-residue contacts and distances, as well as interactions with other proteins and non-protein entities such as nucleic acid, metal atoms, water, ions etc.
+ Protein interaction record
+ Protein interaction report
+ Protein report (interaction)
+ Protein-protein interaction data
+ Atom interaction data
+ Protein non-covalent interactions report
+ Residue interaction data
+
+
+ Protein interaction data
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Protein classification data
+ An informative report on a specific protein family or other classification or group of protein sequences or structures.
+ Protein family annotation
+
+
+ Protein family report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The maximum initial velocity or rate of a reaction. It is the limiting velocity as substrate concentrations get very large.
+
+
+ Vmax
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Km is the concentration (usually in Molar units) of substrate that leads to half-maximal velocity of an enzyme-catalysed reaction.
+
+
+ Km
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ An informative report about a specific nucleotide base.
+
+ Nucleotide base annotation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A report of primarily non-positional data describing intrinsic physical, chemical or other properties of a nucleic acid molecule.
+ Nucleic acid physicochemical property
+ GC-content
+ Nucleic acid property (structural)
+ Nucleic acid structural property
+
+
+ Nucleic acid structural properties stiffness, curvature, twist/roll data or other conformational parameters or properties.
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Nucleic acid property
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data derived from analysis of codon usage (typically a codon usage table) of DNA sequences.
+ Codon usage report
+
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Codon usage data
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby:GeneInfo
+ Moby:gene
+ Moby_namespace:Human_Readable_Description
+ A report on predicted or actual gene structure, regions which make an RNA product and features such as promoters, coding regions, splice sites etc.
+ Gene and transcript structure (report)
+ Gene annotation
+ Gene features report
+ Gene function (report)
+ Gene structure (repot)
+ Nucleic acid features (gene and transcript structure)
+
+
+ This includes any report on a particular locus or gene. This might include the gene name, description, summary and so on. It can include details about the function of a gene, such as its encoded protein or a functional classification of the gene sequence along according to the encoded protein(s).
+ Gene report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A report on the classification of nucleic acid / gene sequences according to the functional classification of their gene products.
+
+ Gene classification
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ stable, naturally occurring mutations in a nucleotide sequence including alleles, naturally occurring mutations such as single base nucleotide substitutions, deletions and insertions, RFLPs and other polymorphisms.
+
+
+ DNA variation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A human-readable collection of information about a specific chromosome.
+
+
+ This includes basic information. e.g. chromosome number, length, karyotype features, chromosome sequence etc.
+ Chromosome report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A human-readable collection of information about the set of genes (or allelic forms) present in an individual, organism or cell and associated with a specific physical characteristic, or a report concerning an organisms traits and phenotypes.
+ Genotype/phenotype annotation
+
+
+ Genotype/phenotype report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ PCR experiments, e.g. quantitative real-time PCR.
+
+
+ PCR experiment report
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Fluorescence trace data generated by an automated DNA sequencer, which can be interpreted as a molecular sequence (reads), given associated sequencing metadata such as base-call quality scores.
+
+
+ This is the raw data produced by a DNA sequencing machine.
+ Sequence trace
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An assembly of fragments of a (typically genomic) DNA sequence.
+ Contigs
+ SO:0000353
+ SO:0001248
+
+
+ Typically, an assembly is a collection of contigs (for example ESTs and genomic DNA fragments) that are ordered, aligned and merged. Annotation of the assembled sequence might be included.
+ Sequence assembly
+
+
+
+
+
+ SO:0001248
+ Perhaps surprisingly, the definition of 'SO:assembly' is narrower than the 'SO:sequence_assembly'.
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Radiation hybrid scores (RH) scores for one or more markers.
+ Radiation Hybrid (RH) scores
+
+
+ Radiation Hybrid (RH) scores are used in Radiation Hybrid mapping.
+ RH scores
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A human-readable collection of information about the linkage of alleles.
+ Gene annotation (linkage)
+ Linkage disequilibrium (report)
+
+
+ This includes linkage disequilibrium; the non-random association of alleles or polymorphisms at two or more loci (not necessarily on the same chromosome).
+ Genetic linkage report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data quantifying the level of expression of (typically) multiple genes, derived for example from microarray experiments.
+ Gene expression pattern
+
+
+ Gene expression profile
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ microarray experiments including conditions, protocol, sample:data relationships etc.
+
+
+ Microarray experiment report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Data on oligonucleotide probes (typically for use with DNA microarrays).
+
+ Oligonucleotide probe data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Output from a serial analysis of gene expression (SAGE) experiment.
+
+ SAGE experimental data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Massively parallel signature sequencing (MPSS) data.
+
+ MPSS experimental data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Sequencing by synthesis (SBS) data.
+
+ SBS experimental data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.14
+
+ Tag to gene assignments (tag mapping) of SAGE, MPSS and SBS data. Typically this is the sequencing-based expression profile annotated with gene identifiers.
+
+
+ Sequence tag profile (with gene assignment)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Protein X-ray crystallographic data
+ X-ray crystallography data.
+
+
+ Electron density map
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Nuclear magnetic resonance (NMR) raw data, typically for a protein.
+ Protein NMR data
+
+
+ Raw NMR data
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Protein secondary structure from protein coordinate or circular dichroism (CD) spectroscopic data.
+ CD spectrum
+ Protein circular dichroism (CD) spectroscopic data
+
+
+ CD spectra
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Volume map data from electron microscopy.
+ 3D volume map
+ EM volume map
+ Electron microscopy volume map
+
+
+ Volume map
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.19
+
+ Annotation on a structural 3D model (volume map) from electron microscopy.
+
+
+ Electron microscopy model
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Two-dimensional gel electrophoresis image.
+
+
+ 2D PAGE image
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Spectra from mass spectrometry.
+ Mass spectrometry spectra
+
+
+ Mass spectrum
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A set of peptide masses (peptide mass fingerprint) from mass spectrometry.
+ Peak list
+ Protein fingerprint
+ Molecular weights standard fingerprint
+
+
+ A molecular weight standard fingerprint is standard protonated molecular masses e.g. from trypsin (modified porcine trypsin, Promega) and keratin peptides.
+ Peptide mass fingerprint
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Protein or peptide identifications with evidence supporting the identifications, for example from comparing a peptide mass fingerprint (from mass spectrometry) to a sequence database, or the set of typical spectra one obtains when running a protein through a mass spectrometer.
+ 'Protein identification'
+ Peptide spectrum match
+
+
+ Peptide identification
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An informative report about a specific biological pathway or network, typically including a map (diagram) of the pathway.
+
+ Pathway or network annotation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A map (typically a diagram) of a biological pathway.
+
+ Biological pathway map
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A definition of a data resource serving one or more types of data, including metadata and links to the resource or data proper.
+
+ Data resource definition
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Basic information, annotation or documentation concerning a workflow (but not the workflow itself).
+
+
+ Workflow metadata
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A biological model represented in mathematical terms.
+ Biological model
+
+
+ Mathematical model
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A value representing estimated statistical significance of some observed data; typically sequence database hits.
+
+
+ Statistical estimate score
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Resource definition for an EMBOSS database.
+
+ EMBOSS database resource definition
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Information on a version of software or data, for example name, version number and release date.
+
+ Development status / maturity may be part of the version information, for example in case of tools, standards, or some data records.
+ Version information
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A mapping of the accession numbers (or other database identifier) of entries between (typically) two biological or biomedical databases.
+
+
+ The cross-mapping is typically a table where each row is an accession number and each column is a database being cross-referenced. The cells give the accession number or identifier of the corresponding entry in a database. If a cell in the table is not filled then no mapping could be found for the database. Additional information might be given on version, date etc.
+ Database cross-mapping
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An index of data of biological relevance.
+
+
+ Data index
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A human-readable collection of information concerning an analysis of an index of biological data.
+ Database index annotation
+
+
+ Data index report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Basic information on bioinformatics database(s) or other data sources such as name, type, description, URL etc.
+
+
+ Database metadata
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Basic information about one or more bioinformatics applications or packages, such as name, type, description, or other documentation.
+
+
+ Tool metadata
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Textual metadata on a submitted or completed job.
+
+ Job metadata
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Textual metadata on a software author or end-user, for example a person or other software.
+
+
+ User metadata
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A human-readable collection of information about a specific chemical compound.
+ Chemical compound annotation
+ Chemical structure report
+ Small molecule annotation
+
+
+ Small molecule report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A human-readable collection of information about a particular strain of organism cell line including plants, virus, fungi and bacteria. The data typically includes strain number, organism type, growth conditions, source and so on.
+ Cell line annotation
+ Organism strain data
+
+
+ Cell line report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ An informative report about a specific scent.
+
+ Scent annotation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A term (name) from an ontology.
+ Ontology class name
+ Ontology terms
+
+
+ Ontology term
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data concerning or derived from a concept from a biological ontology.
+ Ontology class metadata
+ Ontology term metadata
+
+
+ Ontology concept data
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby:BooleanQueryString
+ Moby:Global_Keyword
+ Moby:QueryString
+ Moby:Wildcard_Query
+ Keyword(s) or phrase(s) used (typically) for text-searching purposes.
+ Phrases
+ Term
+
+
+ Boolean operators (AND, OR and NOT) and wildcard characters may be allowed.
+ Keyword
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby:GCP_SimpleCitation
+ Moby:Publication
+ Bibliographic data that uniquely identifies a scientific article, book or other published material.
+ Bibliographic reference
+ Reference
+
+
+ A bibliographic reference might include information such as authors, title, journal name, date and (possibly) a link to the abstract or full-text of the article if available.
+ Citation
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A scientific text, typically a full text article from a scientific journal.
+ Article text
+ Scientific article
+
+
+ Article
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A human-readable collection of information resulting from text mining.
+ Text mining output
+
+
+ A text mining abstract will typically include an annotated a list of words or sentences extracted from one or more scientific articles.
+ Text mining report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An identifier of a biological entity or phenomenon.
+
+ Entity identifier
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An identifier of a data resource.
+
+ Data resource identifier
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An identifier that identifies a particular type of data.
+ Identifier (typed)
+
+
+
+ This concept exists only to assist EDAM maintenance and navigation in graphical browsers. It does not add semantic information. This branch provides an alternative organisation of the concepts nested under 'Accession' and 'Name'. All concepts under here are already included under 'Accession' or 'Name'.
+ Identifier (by type of entity)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An identifier of a bioinformatics tool, e.g. an application or web service.
+
+
+
+ Tool identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Name or other identifier of a discrete entity (any biological thing with a distinct, discrete physical existence).
+
+ Discrete entity identifier
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Name or other identifier of an entity feature (a physical part or region of a discrete biological entity, or a feature that can be mapped to such a thing).
+
+ Entity feature identifier
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Name or other identifier of a collection of discrete biological entities.
+
+ Entity collection identifier
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Name or other identifier of a physical, observable biological occurrence or event.
+
+ Phenomenon identifier
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Name or other identifier of a molecule.
+
+
+
+ Molecule identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier (e.g. character symbol) of a specific atom.
+ Atom identifier
+
+
+
+ Atom ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Name of a specific molecule.
+
+
+
+ Molecule name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A label (text token) describing the type a molecule.
+
+ For example, 'Protein', 'DNA', 'RNA' etc.
+ Molecule type
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Unique identifier of a chemical compound.
+
+ Chemical identifier
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Name of a chromosome.
+
+
+
+ Chromosome name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a peptide chain.
+
+
+
+ Peptide identifier
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a protein.
+
+
+
+ Protein identifier
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique name of a chemical compound.
+ Chemical name
+
+
+
+ Compound name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique registry number of a chemical compound.
+
+
+
+ Chemical registry number
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Code word for a ligand, for example from a PDB file.
+
+ Ligand identifier
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a drug.
+
+
+
+ Drug identifier
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of an amino acid.
+ Residue identifier
+
+
+
+ Amino acid identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Name or other identifier of a nucleotide.
+
+
+
+ Nucleotide identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a monosaccharide.
+
+
+
+ Monosaccharide identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique name from Chemical Entities of Biological Interest (ChEBI) of a chemical compound.
+ ChEBI chemical name
+
+
+
+ This is the recommended chemical name for use for example in database annotation.
+ Chemical name (ChEBI)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ IUPAC recommended name of a chemical compound.
+ IUPAC chemical name
+
+
+
+ Chemical name (IUPAC)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ International Non-proprietary Name (INN or 'generic name') of a chemical compound, assigned by the World Health Organisation (WHO).
+ INN chemical name
+
+
+
+ Chemical name (INN)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Brand name of a chemical compound.
+ Brand chemical name
+
+
+
+ Chemical name (brand)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Synonymous name of a chemical compound.
+ Synonymous chemical name
+
+
+
+ Chemical name (synonymous)
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ CAS registry number of a chemical compound; a unique numerical identifier of chemicals in the scientific literature, as assigned by the Chemical Abstracts Service.
+ CAS chemical registry number
+ Chemical registry number (CAS)
+
+
+
+ CAS number
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Beilstein registry number of a chemical compound.
+ Beilstein chemical registry number
+
+
+
+ Chemical registry number (Beilstein)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Gmelin registry number of a chemical compound.
+ Gmelin chemical registry number
+
+
+
+ Chemical registry number (Gmelin)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 3-letter code word for a ligand (HET group) from a PDB file, for example ATP.
+ Component identifier code
+ Short ligand name
+
+
+
+ HET group name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ String of one or more ASCII characters representing an amino acid.
+
+
+
+ Amino acid name
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ String of one or more ASCII characters representing a nucleotide.
+
+
+
+ Nucleotide code
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PDBML:pdbx_PDB_strand_id
+ WHATIF: chain
+ Identifier of a polypeptide chain from a protein.
+ Chain identifier
+ PDB chain identifier
+ PDB strand id
+ Polypeptide chain identifier
+ Protein chain identifier
+
+
+
+ This is typically a character (for the chain) appended to a PDB identifier, e.g. 1cukA
+ Polypeptide chain ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Name of a protein.
+
+
+
+ Protein name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Name or other identifier of an enzyme or record from a database of enzymes.
+
+
+
+ Enzyme identifier
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]+\.-\.-\.-|[0-9]+\.[0-9]+\.-\.-|[0-9]+\.[0-9]+\.[0-9]+\.-|[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+
+ Moby:Annotated_EC_Number
+ Moby:EC_Number
+ An Enzyme Commission (EC) number of an enzyme.
+ EC
+ EC code
+ Enzyme Commission number
+
+
+
+ EC number
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Name of an enzyme.
+
+
+
+ Enzyme name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Name of a restriction enzyme.
+
+
+
+ Restriction enzyme name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A specification (partial or complete) of one or more positions or regions of a molecular sequence or map.
+
+ Sequence position specification
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A unique identifier of molecular sequence feature, for example an ID of a feature that is unique within the scope of the GFF file.
+
+
+
+ Sequence feature ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PDBML:_atom_site.id
+ WHATIF: PDBx_atom_site
+ WHATIF: number
+ A position of one or more points (base or residue) in a sequence, or part of such a specification.
+ SO:0000735
+
+
+ Sequence position
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Specification of range(s) of sequence positions.
+
+
+ Sequence range
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Name or other identifier of an nucleic acid feature.
+
+ Nucleic acid feature identifier
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Name or other identifier of a protein feature.
+
+ Protein feature identifier
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The type of a sequence feature, typically a term or accession from the Sequence Ontology, for example an EMBL or Swiss-Prot sequence feature key.
+ Sequence feature method
+ Sequence feature type
+
+
+ A feature key indicates the biological nature of the feature or information about changes to or versions of the sequence.
+ Sequence feature key
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Typically one of the EMBL or Swiss-Prot feature qualifiers.
+
+
+ Feature qualifiers hold information about a feature beyond that provided by the feature key and location.
+ Sequence feature qualifier
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A name of a sequence feature, e.g. the name of a feature to be displayed to an end-user. Typically an EMBL or Swiss-Prot feature label.
+ Sequence feature name
+
+
+ A feature label identifies a feature of a sequence database entry. When used with the database name and the entry's primary accession number, it is a unique identifier of that feature.
+ Sequence feature label
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a sequence feature-containing entity adhering to the standard feature naming scheme used by all EMBOSS applications.
+ UFO
+
+
+ EMBOSS Uniform Feature Object
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ String of one or more ASCII characters representing a codon.
+
+ Codon name
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Moby:GeneAccessionList
+ An identifier of a gene, such as a name/symbol or a unique identifier of a gene in a database.
+
+
+
+ Gene identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby_namespace:Global_GeneCommonName
+ Moby_namespace:Global_GeneSymbol
+ The short name of a gene; a single word that does not contain white space characters. It is typically derived from the gene name.
+
+
+
+ Gene symbol
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ http://www.geneontology.org/doc/GO.xrf_abbs:LocusID
+ http://www.geneontology.org/doc/GO.xrf_abbs:NCBI_Gene
+ An NCBI unique identifier of a gene.
+ Entrez gene ID
+ Gene identifier (Entrez)
+ Gene identifier (NCBI)
+ NCBI gene ID
+ NCBI geneid
+
+
+
+ Gene ID (NCBI)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An NCBI RefSeq unique identifier of a gene.
+
+ Gene identifier (NCBI RefSeq)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An NCBI UniGene unique identifier of a gene.
+
+ Gene identifier (NCBI UniGene)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An Entrez unique identifier of a gene.
+
+ Gene identifier (Entrez)
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a gene or feature from the CGD database.
+ CGD ID
+
+
+
+ Gene ID (CGD)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a gene from DictyBase.
+
+
+
+ Gene ID (DictyBase)
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier for a gene (or other feature) from the Ensembl database.
+ Gene ID (Ensembl)
+
+
+
+ Ensembl gene ID
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ S[0-9]+
+ Identifier of an entry from the SGD database.
+ SGD identifier
+
+
+
+ Gene ID (SGD)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [a-zA-Z_0-9\.-]*
+ Moby_namespace:GeneDB
+ Identifier of a gene from the GeneDB database.
+ GeneDB identifier
+
+
+
+ Gene ID (GeneDB)
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the TIGR database.
+
+
+
+ TIGR identifier
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Gene:[0-9]{7}
+ Identifier of an gene from the TAIR database.
+
+
+
+ TAIR accession (gene)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a protein structural domain.
+
+
+
+ This is typically a character or string concatenated with a PDB identifier and a chain identifier.
+ Protein domain ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a protein domain (or other node) from the SCOP database.
+
+
+
+ SCOP domain identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1nr3A00
+ Identifier of a protein domain from CATH.
+ CATH domain identifier
+
+
+
+ CATH domain ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A SCOP concise classification string (sccs) is a compact representation of a SCOP domain classification.
+
+
+
+ An scss includes the class (alphabetical), fold, superfamily and family (all numerical) to which a given domain belongs.
+ SCOP concise classification string (sccs)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 33229
+ Unique identifier (number) of an entry in the SCOP hierarchy, for example 33229.
+ SCOP unique identifier
+ sunid
+
+
+
+ A sunid uniquely identifies an entry in the SCOP hierarchy, including leaves (the SCOP domains) and higher level nodes including entries corresponding to the protein level.
+ SCOP sunid
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 3.30.1190.10.1.1.1.1.1
+ A code number identifying a node from the CATH database.
+ CATH code
+ CATH node identifier
+
+
+
+ CATH node ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a biological kingdom (Bacteria, Archaea, or Eukaryotes).
+
+
+
+ Kingdom name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a species (typically a taxonomic group) of organism.
+ Organism species
+
+
+
+ Species name
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a strain of an organism variant, typically a plant, virus or bacterium.
+
+
+
+ Strain name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ A string of characters that name or otherwise identify a resource on the Internet.
+ URIs
+
+
+ URI
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An identifier of a biological or bioinformatics database.
+ Database identifier
+
+
+
+ Database ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a directory.
+
+
+
+ Directory name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name (or part of a name) of a file (of any type).
+
+
+
+ File name
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Name of an ontology of biological or bioinformatics concepts and relations.
+
+
+
+ Ontology name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby:Link
+ Moby:URL
+ A Uniform Resource Locator (URL).
+
+
+ URL
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A Uniform Resource Name (URN).
+
+
+ URN
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A Life Science Identifier (LSID) - a unique identifier of some data.
+ Life Science Identifier
+
+
+ LSIDs provide a standard way to locate and describe data. An LSID is represented as a Uniform Resource Name (URN) with the following format: URN:LSID:<Authority>:<Namespace>:<ObjectID>[:<Version>]
+ LSID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a biological or bioinformatics database.
+
+
+
+ Database name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ The name of a molecular sequence database.
+
+ Sequence database name
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a file (of any type) with restricted possible values.
+
+
+
+ Enumerated file name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The extension of a file name.
+
+
+
+ A file extension is the characters appearing after the final '.' in the file name.
+ File name extension
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The base name of a file.
+
+
+
+ A file base name is the file name stripped of its directory specification and extension.
+ File base name
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Name of a QSAR descriptor.
+
+
+
+ QSAR descriptor name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An identifier of an entry from a database where the same type of identifier is used for objects (data) of different semantic type.
+
+ This concept is required for completeness. It should never have child concepts.
+ Database entry identifier
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An identifier of molecular sequence(s) or entries from a molecular sequence database.
+
+
+
+ Sequence identifier
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An identifier of a set of molecular sequence(s).
+
+
+
+ Sequence set ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+
+ Identifier of a sequence signature (motif or profile) for example from a database of sequence patterns.
+
+ Sequence signature identifier
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a molecular sequence alignment, for example a record from an alignment database.
+
+
+
+ Sequence alignment ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of a phylogenetic distance matrix.
+
+ Phylogenetic distance matrix identifier
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a phylogenetic tree for example from a phylogenetic tree database.
+
+
+
+ Phylogenetic tree ID
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An identifier of a comparison matrix.
+ Substitution matrix identifier
+
+
+
+ Comparison matrix identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ A unique and persistent identifier of a molecular tertiary structure, typically an entry from a structure database.
+
+
+
+ Structure ID
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier or name of a structural (3D) profile or template (representing a structure or structure alignment).
+ Structural profile identifier
+
+
+
+ Structural (3D) profile ID
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of an entry from a database of tertiary structure alignments.
+
+
+
+ Structure alignment ID
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of an index of amino acid physicochemical and biochemical property data.
+
+
+
+ Amino acid index ID
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Molecular interaction ID
+ Identifier of a report of protein interactions from a protein interaction database (typically).
+
+
+
+ Protein interaction ID
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a protein family.
+ Protein secondary database record identifier
+
+
+
+ Protein family identifier
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique name of a codon usage table.
+
+
+
+ Codon usage table name
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a transcription factor (or a TF binding site).
+
+
+
+ Transcription factor identifier
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of an entry from a database of microarray data.
+
+
+
+ Experiment annotation ID
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of an entry from a database of electron microscopy data.
+
+
+
+ Electron microscopy model ID
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Accession of a report of gene expression (e.g. a gene expression profile) from a database.
+ Gene expression profile identifier
+
+
+
+ Gene expression report ID
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of an entry from a database of genotypes and phenotypes.
+
+
+
+ Genotype and phenotype annotation ID
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of an entry from a database of biological pathways or networks.
+
+
+
+ Pathway or network identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a biological or biomedical workflow, typically from a database of workflows.
+
+
+
+ Workflow ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a data type definition from some provider.
+ Data resource definition identifier
+
+
+
+ Data resource definition ID
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a mathematical model, typically an entry from a database.
+ Biological model identifier
+
+
+
+ Biological model ID
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of an entry from a database of chemicals.
+ Chemical compound identifier
+ Compound ID
+ Small molecule identifier
+
+
+
+ Compound identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A unique (typically numerical) identifier of a concept in an ontology of biological or bioinformatics concepts and relations.
+
+
+
+ Ontology concept ID
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Unique identifier of a scientific article.
+ Article identifier
+
+
+
+ Article ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ FB[a-zA-Z_0-9]{2}[0-9]{7}
+ Identifier of an object from the FlyBase database.
+
+
+
+ FlyBase ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Name of an object from the WormBase database, usually a human-readable name.
+
+
+
+ WormBase name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Class of an object from the WormBase database.
+
+
+
+ A WormBase class describes the type of object such as 'sequence' or 'protein'.
+ WormBase class
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ A persistent, unique identifier of a molecular sequence database entry.
+ Sequence accession number
+
+
+
+ Sequence accession
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A label (text token) describing a type of molecular sequence.
+
+ Sequence type might reflect the molecule (protein, nucleic acid etc) or the sequence itself (gapped, ambiguous etc).
+ Sequence type
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a sequence-based entity adhering to the standard sequence naming scheme used by all EMBOSS applications.
+ EMBOSS USA
+
+
+
+ EMBOSS Uniform Sequence Address
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Accession number of a protein sequence database entry.
+ Protein sequence accession number
+
+
+
+ Sequence accession (protein)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Accession number of a nucleotide sequence database entry.
+ Nucleotide sequence accession number
+
+
+
+ Sequence accession (nucleic acid)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ (NC|AC|NG|NT|NW|NZ|NM|NR|XM|XR|NP|AP|XP|YP|ZP)_[0-9]+
+ Accession number of a RefSeq database entry.
+ RefSeq ID
+
+
+
+ RefSeq accession
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.0
+
+
+ Accession number of a UniProt (protein sequence) database entry. May contain version or isoform number.
+
+ UniProt accession (extended)
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An identifier of PIR sequence database entry.
+ PIR ID
+ PIR accession number
+
+
+
+ PIR identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.2
+
+ Identifier of a TREMBL sequence database entry.
+
+
+ TREMBL accession
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Primary identifier of a Gramene database entry.
+ Gramene primary ID
+
+
+
+ Gramene primary identifier
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a (nucleic acid) entry from the EMBL/GenBank/DDBJ databases.
+
+
+
+ EMBL/GenBank/DDBJ ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A unique identifier of an entry (gene cluster) from the NCBI UniGene database.
+ UniGene ID
+ UniGene cluster ID
+ UniGene identifier
+
+
+
+ Sequence cluster ID (UniGene)
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a dbEST database entry.
+ dbEST ID
+
+
+
+ dbEST accession
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a dbSNP database entry.
+ dbSNP identifier
+
+
+
+ dbSNP ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The EMBOSS type of a molecular sequence.
+
+ See the EMBOSS documentation (http://emboss.sourceforge.net/) for a definition of what this includes.
+ EMBOSS sequence type
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ List of EMBOSS Uniform Sequence Addresses (EMBOSS listfile).
+
+ EMBOSS listfile
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An identifier of a cluster of molecular sequence(s).
+
+
+
+ Sequence cluster ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier of an entry from the COG database.
+ COG ID
+
+
+
+ Sequence cluster ID (COG)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a sequence motif, for example an entry from a motif database.
+
+
+
+ Sequence motif identifier
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a sequence profile.
+
+
+
+ A sequence profile typically represents a sequence alignment.
+ Sequence profile ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the ELMdb database of protein functional sites.
+
+
+
+ ELM ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PS[0-9]{5}
+ Accession number of an entry from the Prosite database.
+ Prosite ID
+
+
+
+ Prosite accession number
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier or name of a HMMER hidden Markov model.
+
+
+
+ HMMER hidden Markov model ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier or name of a profile from the JASPAR database.
+
+
+
+ JASPAR profile ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A label (text token) describing the type of a sequence alignment.
+
+ Possible values include for example the EMBOSS alignment types, BLAST alignment types and so on.
+ Sequence alignment type
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The type of a BLAST sequence alignment.
+
+ BLAST sequence alignment type
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A label (text token) describing the type of a phylogenetic tree.
+
+ For example 'nj', 'upgmp' etc.
+ Phylogenetic tree type
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Accession number of an entry from the TreeBASE database.
+
+
+
+ TreeBASE study accession number
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Accession number of an entry from the TreeFam database.
+
+
+
+ TreeFam accession number
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A label (text token) describing the type of a comparison matrix.
+
+ For example 'blosum', 'pam', 'gonnet', 'id' etc. Comparison matrix type may be required where a series of matrices of a certain type are used.
+ Comparison matrix type
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique name or identifier of a comparison matrix.
+ Substitution matrix name
+
+
+
+ See for example http://www.ebi.ac.uk/Tools/webservices/help/matrix.
+ Comparison matrix name
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9][a-zA-Z_0-9]{3}
+ An identifier of an entry from the PDB database.
+ PDB identifier
+ PDBID
+
+
+
+ A PDB identification code which consists of 4 characters, the first of which is a digit in the range 0 - 9; the remaining 3 are alphanumeric, and letters are upper case only. (source: https://cdn.rcsb.org/wwpdb/docs/documentation/file-format/PDB_format_1996.pdf)
+ PDB ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the AAindex database.
+
+
+
+ AAindex ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Accession number of an entry from the BIND database.
+
+
+
+ BIND accession number
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ EBI\-[0-9]+
+ Accession number of an entry from the IntAct database.
+
+
+
+ IntAct accession number
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Name of a protein family.
+
+
+
+ Protein family name
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Name of an InterPro entry, usually indicating the type of protein matches for that entry.
+
+
+
+ InterPro entry name
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ IPR015590
+ IPR[0-9]{6}
+ Primary accession number of an InterPro entry.
+ InterPro primary accession
+ InterPro primary accession number
+
+
+
+ Every InterPro entry has a unique accession number to provide a persistent citation of database records.
+ InterPro accession
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Secondary accession number of an InterPro entry.
+ InterPro secondary accession number
+
+
+
+ InterPro secondary accession
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier of an entry from the Gene3D database.
+
+
+
+ Gene3D ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PIRSF[0-9]{6}
+ Unique identifier of an entry from the PIRSF database.
+
+
+
+ PIRSF ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PR[0-9]{5}
+ The unique identifier of an entry in the PRINTS database.
+
+
+
+ PRINTS code
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PF[0-9]{5}
+ Accession number of a Pfam entry.
+
+
+
+ Pfam accession number
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ SM[0-9]{5}
+ Accession number of an entry from the SMART database.
+
+
+
+ SMART accession number
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier (number) of a hidden Markov model from the Superfamily database.
+
+
+
+ Superfamily hidden Markov model number
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Accession number of an entry (family) from the TIGRFam database.
+ TIGRFam accession number
+
+
+
+ TIGRFam ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PD[0-9]+
+ A ProDom domain family accession number.
+
+
+
+ ProDom is a protein domain family database.
+ ProDom accession number
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the TRANSFAC database.
+
+
+
+ TRANSFAC accession number
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [AEP]-[a-zA-Z_0-9]{4}-[0-9]+
+ Accession number of an entry from the ArrayExpress database.
+ ArrayExpress experiment ID
+
+
+
+ ArrayExpress accession number
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]+
+ PRIDE experiment accession number.
+
+
+
+ PRIDE experiment accession number
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the EMDB electron microscopy database.
+
+
+
+ EMDB ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [GDS|GPL|GSE|GSM][0-9]+
+ Accession number of an entry from the GEO database.
+
+
+
+ GEO accession number
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the GermOnline database.
+
+
+
+ GermOnline ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the EMAGE database.
+
+
+
+ EMAGE ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Accession number of an entry from a database of disease.
+
+
+
+ Disease ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the HGVbase database.
+
+
+
+ HGVbase ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry from the HIVDB database.
+
+ HIVDB identifier
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [*#+%^]?[0-9]{6}
+ Identifier of an entry from the OMIM database.
+
+
+
+ OMIM ID
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier of an object from one of the KEGG databases (excluding the GENES division).
+
+
+
+ KEGG object identifier
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ REACT_[0-9]+(\.[0-9]+)?
+ Identifier of an entry from the Reactome database.
+ Reactome ID
+
+
+
+ Pathway ID (reactome)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry from the aMAZE database.
+
+ Pathway ID (aMAZE)
+ true
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an pathway from the BioCyc biological pathways database.
+ BioCyc pathway ID
+
+
+
+ Pathway ID (BioCyc)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the INOH database.
+ INOH identifier
+
+
+
+ Pathway ID (INOH)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the PATIKA database.
+ PATIKA ID
+
+
+
+ Pathway ID (PATIKA)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the CPDB (ConsensusPathDB) biological pathways database, which is an identifier from an external database integrated into CPDB.
+ CPDB ID
+
+
+
+ This concept refers to identifiers used by the databases collated in CPDB; CPDB identifiers are not independently defined.
+ Pathway ID (CPDB)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PTHR[0-9]{5}
+ Identifier of a biological pathway from the Panther Pathways database.
+ Panther Pathways ID
+
+
+
+ Pathway ID (Panther)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ MIR:00100005
+ MIR:[0-9]{8}
+ Unique identifier of a MIRIAM data resource.
+
+
+
+ This is the identifier used internally by MIRIAM for a data type.
+ MIRIAM identifier
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a data type from the MIRIAM database.
+
+
+
+ MIRIAM data type name
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ urn:miriam:pubmed:16333295|urn:miriam:obo.go:GO%3A0045202
+ The URI (URL or URN) of a data entity from the MIRIAM database.
+ identifiers.org synonym
+
+
+
+ A MIRIAM URI consists of the URI of the MIRIAM data type (PubMed, UniProt etc) followed by the identifier of an element of that data type, for example PMID for a publication or an accession number for a GO term.
+ MIRIAM URI
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ UniProt|Enzyme Nomenclature
+ The primary name of a data type from the MIRIAM database.
+
+
+
+ The primary name of a MIRIAM data type is taken from a controlled vocabulary.
+ MIRIAM data type primary name
+
+
+
+
+ UniProt|Enzyme Nomenclature
+ A protein entity has the MIRIAM data type 'UniProt', and an enzyme has the MIRIAM data type 'Enzyme Nomenclature'.
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A synonymous name of a data type from the MIRIAM database.
+
+
+
+ A synonymous name for a MIRIAM data type taken from a controlled vocabulary.
+ MIRIAM data type synonymous name
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier of a Taverna workflow.
+
+
+
+ Taverna workflow ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Name of a biological (mathematical) model.
+
+
+
+ Biological model name
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ (BIOMD|MODEL)[0-9]{10}
+ Unique identifier of an entry from the BioModel database.
+
+
+
+ BioModel ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]+
+ Chemical structure specified in PubChem Compound Identification (CID), a non-zero integer identifier for a unique chemical structure.
+ PubChem compound accession identifier
+
+
+
+ PubChem CID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]+
+ Identifier of an entry from the ChemSpider database.
+
+
+
+ ChemSpider ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ CHEBI:[0-9]+
+ Identifier of an entry from the ChEBI database.
+ ChEBI IDs
+ ChEBI identifier
+
+
+
+ ChEBI ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An identifier of a concept from the BioPax ontology.
+
+
+
+ BioPax concept ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]{7}|GO:[0-9]{7}
+ An identifier of a concept from The Gene Ontology.
+ GO concept identifier
+
+
+
+ GO concept ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An identifier of a concept from the MeSH vocabulary.
+
+
+
+ MeSH concept ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An identifier of a concept from the HGNC controlled vocabulary.
+
+
+
+ HGNC concept ID
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 9662|3483|182682
+ [1-9][0-9]{0,8}
+ A stable unique identifier for each taxon (for a species, a family, an order, or any other group in the NCBI taxonomy database.
+ NCBI tax ID
+ NCBI taxonomy identifier
+
+
+
+ NCBI taxonomy ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An identifier of a concept from the Plant Ontology (PO).
+
+
+
+ Plant Ontology concept ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An identifier of a concept from the UMLS vocabulary.
+
+
+
+ UMLS concept ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ FMA:[0-9]+
+ An identifier of a concept from Foundational Model of Anatomy.
+
+
+
+ Classifies anatomical entities according to their shared characteristics (genus) and distinguishing characteristics (differentia). Specifies the part-whole and spatial relationships of the entities, morphological transformation of the entities during prenatal development and the postnatal life cycle and principles, rules and definitions according to which classes and relationships in the other three components of FMA are represented.
+ FMA concept ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An identifier of a concept from the EMAP mouse ontology.
+
+
+
+ EMAP concept ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An identifier of a concept from the ChEBI ontology.
+
+
+
+ ChEBI concept ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An identifier of a concept from the MGED ontology.
+
+
+
+ MGED concept ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An identifier of a concept from the myGrid ontology.
+
+
+
+ The ontology is provided as two components, the service ontology and the domain ontology. The domain ontology acts provides concepts for core bioinformatics data types and their relations. The service ontology describes the physical and operational features of web services.
+ myGrid concept ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 4963447
+ [1-9][0-9]{0,8}
+ PubMed unique identifier of an article.
+ PMID
+
+
+
+ PubMed ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ (doi\:)?[0-9]{2}\.[0-9]{4}/.*
+ Digital Object Identifier (DOI) of a published article.
+ Digital Object Identifier
+
+
+
+ DOI
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Medline UI (unique identifier) of an article.
+ Medline unique identifier
+
+
+
+ The use of Medline UI has been replaced by the PubMed unique identifier.
+ Medline UI
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a computer package, application, method or function.
+
+
+
+ Tool name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The unique name of a signature (sequence classifier) method.
+
+
+
+ Signature methods from http://www.ebi.ac.uk/Tools/InterProScan/help.html#results include BlastProDom, FPrintScan, HMMPIR, HMMPfam, HMMSmart, HMMTigr, ProfileScan, ScanRegExp, SuperFamily and HAMAP.
+ Tool name (signature)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a BLAST tool.
+ BLAST name
+
+
+
+ This include 'blastn', 'blastp', 'blastx', 'tblastn' and 'tblastx'.
+ Tool name (BLAST)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a FASTA tool.
+
+
+
+ This includes 'fasta3', 'fastx3', 'fasty3', 'fastf3', 'fasts3' and 'ssearch'.
+ Tool name (FASTA)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of an EMBOSS application.
+
+
+
+ Tool name (EMBOSS)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of an EMBASSY package.
+
+
+
+ Tool name (EMBASSY package)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A QSAR constitutional descriptor.
+ QSAR constitutional descriptor
+
+
+ QSAR descriptor (constitutional)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A QSAR electronic descriptor.
+ QSAR electronic descriptor
+
+
+ QSAR descriptor (electronic)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A QSAR geometrical descriptor.
+ QSAR geometrical descriptor
+
+
+ QSAR descriptor (geometrical)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A QSAR topological descriptor.
+ QSAR topological descriptor
+
+
+ QSAR descriptor (topological)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A QSAR molecular descriptor.
+ QSAR molecular descriptor
+
+
+ QSAR descriptor (molecular)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Any collection of multiple protein sequences and associated metadata that do not (typically) correspond to common sequence database records or database entries.
+
+
+ Sequence set (protein)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Any collection of multiple nucleotide sequences and associated metadata that do not (typically) correspond to common sequence database records or database entries.
+
+
+ Sequence set (nucleic acid)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A set of sequences that have been clustered or otherwise classified as belonging to a group including (typically) sequence cluster information.
+
+
+ The cluster might include sequences identifiers, short descriptions, alignment and summary information.
+ Sequence cluster
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A file of intermediate results from a PSIBLAST search that is used for priming the search in the next PSIBLAST iteration.
+
+ A Psiblast checkpoint file uses ASN.1 Binary Format and usually has the extension '.asn'.
+ Psiblast checkpoint file
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Sequences generated by HMMER package in FASTA-style format.
+
+ HMMER synthetic sequences set
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A protein sequence cleaved into peptide fragments (by enzymatic or chemical cleavage) with fragment masses.
+
+
+ Proteolytic digest
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ SO:0000412
+ Restriction digest fragments from digesting a nucleotide sequence with restriction sites using a restriction endonuclease.
+
+
+ Restriction digest
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Oligonucleotide primer(s) for PCR and DNA amplification, for example a minimal primer set.
+
+
+ PCR primers
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ File of sequence vectors used by EMBOSS vectorstrip application, or any file in same format.
+
+ vectorstrip cloning vector definition file
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A library of nucleotide sequences to avoid during hybridisation events. Hybridisation of the internal oligo to sequences in this library is avoided, rather than priming from them. The file is in a restricted FASTA format.
+
+ Primer3 internal oligo mishybridizing library
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A nucleotide sequence library of sequences to avoid during amplification (for example repetitive sequences, or possibly the sequences of genes in a gene family that should not be amplified. The file must is in a restricted FASTA format.
+
+ Primer3 mispriming library file
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ File of one or more pairs of primer sequences, as used by EMBOSS primersearch application.
+
+ primersearch primer pairs sequence record
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A cluster of protein sequences.
+ Protein sequence cluster
+
+
+ The sequences are typically related, for example a family of sequences.
+ Sequence cluster (protein)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A cluster of nucleotide sequences.
+ Nucleotide sequence cluster
+
+
+ The sequences are typically related, for example a family of sequences.
+ Sequence cluster (nucleic acid)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The size (length) of a sequence, subsequence or region in a sequence, or range(s) of lengths.
+
+
+ Sequence length
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Size of a sequence word.
+
+ Word size is used for example in word-based sequence database search methods.
+ Word size
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Size of a sequence window.
+
+ A window is a region of fixed size but not fixed position over a molecular sequence. It is typically moved (computationally) over a sequence during scoring.
+ Window size
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Specification of range(s) of length of sequences.
+
+ Sequence length range
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+ Report on basic information about a molecular sequence such as name, accession number, type (nucleic or protein), length, description etc.
+
+
+ Sequence information report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An informative report about non-positional sequence features, typically a report on general molecular sequence properties derived from sequence analysis.
+ Sequence properties report
+
+
+ Sequence property
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Annotation of positional features of molecular sequence(s), i.e. that can be mapped to position(s) in the sequence.
+ Feature record
+ Features
+ General sequence features
+ Sequence features report
+ SO:0000110
+
+
+ This includes annotation of positional sequence features, organised into a standard feature table, or any other report of sequence features. General feature reports are a source of sequence feature table information although internal conversion would be required.
+ Sequence features
+ http://purl.bioontology.org/ontology/MSH/D058977
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Comparative data on sequence features such as statistics, intersections (and data on intersections), differences etc.
+
+ This is a broad data type and is used a placeholder for other, more specific types. It is primarily intended to help navigation of EDAM and would not typically be used for annotation.
+ Sequence features (comparative)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A report of general sequence properties derived from protein sequence data.
+
+ Sequence property (protein)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A report of general sequence properties derived from nucleotide sequence data.
+
+ Sequence property (nucleic acid)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A report on sequence complexity, for example low-complexity or repeat regions in sequences.
+ Sequence property (complexity)
+
+
+ Sequence complexity report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A report on ambiguity in molecular sequence(s).
+ Sequence property (ambiguity)
+
+
+ Sequence ambiguity report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A report (typically a table) on character or word composition / frequency of a molecular sequence(s).
+ Sequence composition
+ Sequence property (composition)
+
+
+ Sequence composition report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A report on peptide fragments of certain molecular weight(s) in one or more protein sequences.
+
+
+ Peptide molecular weight hits
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A plot of third base position variability in a nucleotide sequence.
+
+
+ Base position variability plot
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A table of character or word composition / frequency of a molecular sequence.
+
+ Sequence composition table
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A table of base frequencies of a nucleotide sequence.
+
+
+ Base frequencies table
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A table of word composition of a nucleotide sequence.
+
+
+ Base word frequencies table
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A table of amino acid frequencies of a protein sequence.
+ Sequence composition (amino acid frequencies)
+
+
+ Amino acid frequencies table
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A table of amino acid word composition of a protein sequence.
+ Sequence composition (amino acid words)
+
+
+ Amino acid word frequencies table
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Annotation of a molecular sequence in DAS format.
+
+ DAS sequence feature annotation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Annotation of positional sequence features, organised into a standard feature table.
+ Sequence feature table
+
+
+ Feature table
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A map of (typically one) DNA sequence annotated with positional or non-positional features.
+ DNA map
+
+
+ Map
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An informative report on intrinsic positional features of a nucleotide sequence, formatted to be machine-readable.
+ Feature table (nucleic acid)
+ Nucleic acid feature table
+ Genome features
+ Genomic features
+
+
+ This includes nucleotide sequence feature annotation in any known sequence feature table format and any other report of nucleic acid features.
+ Nucleic acid features
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An informative report on intrinsic positional features of a protein sequence.
+ Feature table (protein)
+ Protein feature table
+
+
+ This includes protein sequence feature annotation in any known sequence feature table format and any other report of protein features.
+ Protein features
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby:GeneticMap
+ A map showing the relative positions of genetic markers in a nucleic acid sequence, based on estimation of non-physical distance such as recombination frequencies.
+ Linkage map
+
+
+ A genetic (linkage) map indicates the proximity of two genes on a chromosome, whether two genes are linked and the frequency they are transmitted together to an offspring. They are limited to genetic markers of traits observable only in whole organisms.
+ Genetic map
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A map of genetic markers in a contiguous, assembled genomic sequence, with the sizes and separation of markers measured in base pairs.
+
+
+ A sequence map typically includes annotation on significant subsequences such as contigs, haplotypes and genes. The contigs shown will (typically) be a set of small overlapping clones representing a complete chromosomal segment.
+ Sequence map
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A map of DNA (linear or circular) annotated with physical features or landmarks such as restriction sites, cloned DNA fragments, genes or genetic markers, along with the physical distances between them.
+
+
+ Distance in a physical map is measured in base pairs. A physical map might be ordered relative to a reference map (typically a genetic map) in the process of genome sequencing.
+ Physical map
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+ Image of a sequence with matches to signatures, motifs or profiles.
+
+
+ Sequence signature map
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A map showing banding patterns derived from direct observation of a stained chromosome.
+ Chromosome map
+ Cytogenic map
+ Cytologic map
+
+
+ This is the lowest-resolution physical map and can provide only rough estimates of physical (base pair) distances. Like a genetic map, they are limited to genetic markers of traits observable only in whole organisms.
+ Cytogenetic map
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A gene map showing distances between loci based on relative cotransduction frequencies.
+
+
+ DNA transduction map
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Sequence map of a single gene annotated with genetic features such as introns, exons, untranslated regions, polyA signals, promoters, enhancers and (possibly) mutations defining alleles of a gene.
+
+
+ Gene map
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Sequence map of a plasmid (circular DNA).
+
+
+ Plasmid map
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Sequence map of a whole genome.
+
+
+ Genome map
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Image of the restriction enzyme cleavage sites (restriction sites) in a nucleic acid sequence.
+
+
+ Restriction map
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+ Image showing matches between protein sequence(s) and InterPro Entries.
+
+
+ The sequence(s) might be screened against InterPro, or be the sequences from the InterPro entry itself. Each protein is represented as a scaled horizontal line with colored bars indicating the position of the matches.
+ InterPro compact match image
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+ Image showing detailed information on matches between protein sequence(s) and InterPro Entries.
+
+
+ The sequence(s) might be screened against InterPro, or be the sequences from the InterPro entry itself.
+ InterPro detailed match image
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+ Image showing the architecture of InterPro domains in a protein sequence.
+
+
+ The sequence(s) might be screened against InterPro, or be the sequences from the InterPro entry itself. Domain architecture is shown as a series of non-overlapping domains in the protein.
+ InterPro architecture image
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ SMART protein schematic in PNG format.
+
+ SMART protein schematic
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+ Images based on GlobPlot prediction of intrinsic disordered regions and globular domains in protein sequences.
+
+
+ GlobPlot domain image
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Report on the location of matches to profiles, motifs (conserved or functional patterns) or other signatures in one or more sequences.
+
+
+ Sequence motif matches
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Location of short repetitive subsequences (repeat sequences) in (typically nucleotide) sequences.
+
+ The report might include derived data map such as classification, annotation, organisation, periodicity etc.
+ Sequence features (repeats)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A report on predicted or actual gene structure, regions which make an RNA product and features such as promoters, coding regions, splice sites etc.
+
+ Gene and transcript structure (report)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ regions of a nucleic acid sequence containing mobile genetic elements.
+
+
+ Mobile genetic elements
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A report on quadruplex-forming motifs in a nucleotide sequence.
+
+ Nucleic acid features (quadruplexes)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Report on nucleosome formation potential or exclusion sequence(s).
+
+
+ Nucleosome exclusion sequences
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+ A report on exonic splicing enhancers (ESE) in an exon.
+
+
+ Gene features (exonic splicing enhancer)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A report on microRNA sequence (miRNA) or precursor, microRNA targets, miRNA binding sites in an RNA sequence etc.
+
+ Nucleic acid features (microRNA)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ protein-coding regions including coding sequences (CDS), exons, translation initiation sites and open reading frames.
+
+
+ Coding region
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ A report on selenocysteine insertion sequence (SECIS) element in a DNA sequence.
+
+ Gene features (SECIS element)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ transcription factor binding sites (TFBS) in a DNA sequence.
+
+
+ Transcription factor binding sites
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A report on predicted or known key residue positions (sites) in a protein sequence, such as binding or functional sites.
+
+ Use this concept for collections of specific sites which are not necessarily contiguous, rather than contiguous stretches of amino acids.
+ Protein features (sites)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ signal peptides or signal peptide cleavage sites in protein sequences.
+
+
+ Protein features report (signal peptides)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ cleavage sites (for a proteolytic enzyme or agent) in a protein sequence.
+
+
+ Protein features report (cleavage sites)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ post-translation modifications in a protein sequence, typically describing the specific sites involved.
+
+
+ Protein features (post-translation modifications)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ catalytic residues (active site) of an enzyme.
+
+
+ Protein features report (active sites)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ ligand-binding (non-catalytic) residues of a protein, such as sites that bind metal, prosthetic groups or lipids.
+
+
+ Protein features report (binding sites)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+ A report on antigenic determinant sites (epitopes) in proteins, from sequence and / or structural data.
+
+
+ Epitope mapping is commonly done during vaccine design.
+ Protein features (epitopes)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ RNA and DNA-binding proteins and binding sites in protein sequences.
+
+
+ Protein features report (nucleic acid binding sites)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A report on epitopes that bind to MHC class I molecules.
+
+ MHC Class I epitopes report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A report on predicted epitopes that bind to MHC class II molecules.
+
+ MHC Class II epitopes report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+ A report or plot of PEST sites in a protein sequence.
+
+
+ 'PEST' motifs target proteins for proteolytic degradation and reduce the half-lives of proteins dramatically.
+ Protein features (PEST sites)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Scores from a sequence database search (for example a BLAST search).
+
+ Sequence database hits scores list
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Alignments from a sequence database search (for example a BLAST search).
+
+ Sequence database hits alignments list
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A report on the evaluation of the significance of sequence similarity scores from a sequence database search (for example a BLAST search).
+
+ Sequence database hits evaluation data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Alphabet for the motifs (patterns) that MEME will search for.
+
+ MEME motif alphabet
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ MEME background frequencies file.
+
+ MEME background frequencies file
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ File of directives for ordering and spacing of MEME motifs.
+
+ MEME motifs directive file
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Dirichlet distribution used by hidden Markov model analysis programs.
+
+
+ Dirichlet distribution
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+
+ Emission and transition counts of a hidden Markov model, generated once HMM has been determined, for example after residues/gaps have been assigned to match, delete and insert states.
+
+ HMM emission and transition counts
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Regular expression pattern.
+
+
+ Regular expression
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Any specific or conserved pattern (typically expressed as a regular expression) in a molecular sequence.
+
+
+ Sequence motif
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Some type of statistical model representing a (typically multiple) sequence alignment.
+
+
+ Sequence profile
+ http://semanticscience.org/resource/SIO_010531
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An informative report about a specific or conserved protein sequence pattern.
+ InterPro entry
+ Protein domain signature
+ Protein family signature
+ Protein region signature
+ Protein repeat signature
+ Protein site signature
+
+
+ Protein signature
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A nucleotide regular expression pattern from the Prosite database.
+
+ Prosite nucleotide pattern
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A protein regular expression pattern from the Prosite database.
+
+ Prosite protein pattern
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A profile (typically representing a sequence alignment) that is a simple matrix of nucleotide (or amino acid) counts per position.
+ PFM
+
+
+ Position frequency matrix
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A profile (typically representing a sequence alignment) that is weighted matrix of nucleotide (or amino acid) counts per position.
+ PWM
+
+
+ Contributions of individual sequences to the matrix might be uneven (weighted).
+ Position weight matrix
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A profile (typically representing a sequence alignment) derived from a matrix of nucleotide (or amino acid) counts per position that reflects information content at each position.
+ ICM
+
+
+ Information content matrix
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A statistical Markov model of a system which is assumed to be a Markov process with unobserved (hidden) states. For example, a hidden Markov model representation of a set or alignment of sequences.
+ HMM
+
+
+ Hidden Markov model
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ One or more fingerprints (sequence classifiers) as used in the PRINTS database.
+
+
+ Fingerprint
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A protein signature of the type used in the EMBASSY Signature package.
+
+ Domainatrix signature
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ NULL hidden Markov model representation used by the HMMER package.
+
+ HMMER NULL hidden Markov model
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A protein family signature (sequence classifier) from the InterPro database.
+
+ Protein family signatures cover all domains in the matching proteins and span >80% of the protein length and with no adjacent protein domain signatures or protein region signatures.
+ Protein family signature
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A protein domain signature (sequence classifier) from the InterPro database.
+
+ Protein domain signatures identify structural or functional domains or other units with defined boundaries.
+ Protein domain signature
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A protein region signature (sequence classifier) from the InterPro database.
+
+ A protein region signature defines a region which cannot be described as a protein family or domain signature.
+ Protein region signature
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A protein repeat signature (sequence classifier) from the InterPro database.
+
+ A protein repeat signature is a repeated protein motif, that is not in single copy expected to independently fold into a globular domain.
+ Protein repeat signature
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A protein site signature (sequence classifier) from the InterPro database.
+
+ A protein site signature is a classifier for a specific site in a protein.
+ Protein site signature
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ A protein conserved site signature (sequence classifier) from the InterPro database.
+
+ A protein conserved site signature is any short sequence pattern that may contain one or more unique residues and is cannot be described as a active site, binding site or post-translational modification.
+ Protein conserved site signature
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ A protein active site signature (sequence classifier) from the InterPro database.
+
+ A protein active site signature corresponds to an enzyme catalytic pocket. An active site typically includes non-contiguous residues, therefore multiple signatures may be required to describe an active site. ; residues involved in enzymatic reactions for which mutational data is typically available.
+ Protein active site signature
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ A protein binding site signature (sequence classifier) from the InterPro database.
+
+ A protein binding site signature corresponds to a site that reversibly binds chemical compounds, which are not themselves substrates of the enzymatic reaction. This includes enzyme cofactors and residues involved in electron transport or protein structure modification.
+ Protein binding site signature
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ A protein post-translational modification signature (sequence classifier) from the InterPro database.
+
+ A protein post-translational modification signature corresponds to sites that undergo modification of the primary structure, typically to activate or de-activate a function. For example, methylation, sumoylation, glycosylation etc. The modification might be permanent or reversible.
+ Protein post-translational modification signature
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Alignment of exactly two molecular sequences.
+ Sequence alignment (pair)
+
+
+ Pair sequence alignment
+ http://semanticscience.org/resource/SIO_010068
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Alignment of more than two molecular sequences.
+
+ Sequence alignment (multiple)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alignment of multiple nucleotide sequences.
+ Sequence alignment (nucleic acid)
+ DNA sequence alignment
+ RNA sequence alignment
+
+
+ Nucleic acid sequence alignment
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alignment of multiple protein sequences.
+ Sequence alignment (protein)
+
+
+ Protein sequence alignment
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alignment of multiple molecular sequences of different types.
+ Sequence alignment (hybrid)
+
+
+ Hybrid sequence alignments include for example genomic DNA to EST, cDNA or mRNA.
+ Hybrid sequence alignment
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+
+
+ Alignment of exactly two nucleotide sequences.
+
+ Sequence alignment (nucleic acid pair)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+
+
+ Alignment of exactly two protein sequences.
+
+ Sequence alignment (protein pair)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Alignment of exactly two molecular sequences of different types.
+
+ Hybrid sequence alignment (pair)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Alignment of more than two nucleotide sequences.
+
+ Multiple nucleotide sequence alignment
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Alignment of more than two protein sequences.
+
+ Multiple protein sequence alignment
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A simple floating point number defining the penalty for opening or extending a gap in an alignment.
+
+
+ Alignment score or penalty
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Whether end gaps are scored or not.
+
+ Score end gaps control
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Controls the order of sequences in an output sequence alignment.
+
+ Aligned sequence order
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A penalty for opening a gap in an alignment.
+
+
+ Gap opening penalty
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A penalty for extending a gap in an alignment.
+
+
+ Gap extension penalty
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A penalty for gaps that are close together in an alignment.
+
+
+ Gap separation penalty
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+
+ A penalty for gaps at the termini of an alignment, either from the N/C terminal of protein or 5'/3' terminal of nucleotide sequences.
+
+ Terminal gap penalty
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The score for a 'match' used in various sequence database search applications with simple scoring schemes.
+
+
+ Match reward score
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The score (penalty) for a 'mismatch' used in various alignment and sequence database search applications with simple scoring schemes.
+
+
+ Mismatch penalty score
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ This is the threshold drop in score at which extension of word alignment is halted.
+
+
+ Drop off score
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A simple floating point number defining the penalty for opening a gap in an alignment.
+
+ Gap opening penalty (integer)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A simple floating point number defining the penalty for opening a gap in an alignment.
+
+ Gap opening penalty (float)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A simple floating point number defining the penalty for extending a gap in an alignment.
+
+ Gap extension penalty (integer)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A simple floating point number defining the penalty for extending a gap in an alignment.
+
+ Gap extension penalty (float)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A simple floating point number defining the penalty for gaps that are close together in an alignment.
+
+ Gap separation penalty (integer)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A simple floating point number defining the penalty for gaps that are close together in an alignment.
+
+ Gap separation penalty (float)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A number defining the penalty for opening gaps at the termini of an alignment, either from the N/C terminal of protein or 5'/3' terminal of nucleotide sequences.
+
+
+ Terminal gap opening penalty
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A number defining the penalty for extending gaps at the termini of an alignment, either from the N/C terminal of protein or 5'/3' terminal of nucleotide sequences.
+
+
+ Terminal gap extension penalty
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Sequence identity is the number (%) of matches (identical characters) in positions from an alignment of two molecular sequences.
+
+
+ Sequence identity
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Sequence similarity is the similarity (expressed as a percentage) of two molecular sequences calculated from their alignment, a scoring matrix for scoring characters substitutions and penalties for gap insertion and extension.
+
+
+ Data Type is float probably.
+ Sequence similarity
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Data on molecular sequence alignment quality (estimated accuracy).
+
+ Sequence alignment metadata (quality report)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ Data on character conservation in a molecular sequence alignment.
+
+ This is a broad data type and is used a placeholder for other, more specific types. It is primarily intended to help navigation of EDAM and would not typically be used for annotation. Use this concept for calculated substitution rates, relative site variability, data on sites with biased properties, highly conserved or very poorly conserved sites, regions, blocks etc.
+ Sequence alignment report (site conservation)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ Data on correlations between sites in a molecular sequence alignment, typically to identify possible covarying positions and predict contacts or structural constraints in protein structures.
+
+ Sequence alignment report (site correlation)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Alignment of molecular sequences to a Domainatrix signature (representing a sequence alignment).
+
+ Sequence-profile alignment (Domainatrix signature)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Alignment of molecular sequence(s) to a hidden Markov model(s).
+
+ Sequence-profile alignment (HMM)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Alignment of molecular sequences to a protein fingerprint from the PRINTS database.
+
+ Sequence-profile alignment (fingerprint)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Continuous quantitative data that may be read during phylogenetic tree calculation.
+ Phylogenetic continuous quantitative characters
+ Quantitative traits
+
+
+ Phylogenetic continuous quantitative data
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Character data with discrete states that may be read during phylogenetic tree calculation.
+ Discrete characters
+ Discretely coded characters
+ Phylogenetic discrete states
+
+
+ Phylogenetic discrete data
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ One or more cliques of mutually compatible characters that are generated, for example from analysis of discrete character data, and are used to generate a phylogeny.
+ Phylogenetic report (cliques)
+
+
+ Phylogenetic character cliques
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Phylogenetic invariants data for testing alternative tree topologies.
+ Phylogenetic report (invariants)
+
+
+ Phylogenetic invariants
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A report of data concerning or derived from a phylogenetic tree, or from comparing two or more phylogenetic trees.
+
+ This is a broad data type and is used for example for reports on confidence, shape or stratigraphic (age) data derived from phylogenetic tree analysis.
+ Phylogenetic report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A model of DNA substitution that explains a DNA sequence alignment, derived from phylogenetic tree analysis.
+ Phylogenetic tree report (DNA substitution model)
+ Sequence alignment report (DNA substitution model)
+ Substitution model
+
+
+ DNA substitution model
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ Data about the shape of a phylogenetic tree.
+
+ Phylogenetic tree report (tree shape)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ Data on the confidence of a phylogenetic tree.
+
+ Phylogenetic tree report (tree evaluation)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Distances, such as Branch Score distance, between two or more phylogenetic trees.
+ Phylogenetic tree report (tree distances)
+
+
+ Phylogenetic tree distances
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ Molecular clock and stratigraphic (age) data derived from phylogenetic tree analysis.
+
+ Phylogenetic tree report (tree stratigraphic)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Independent contrasts for characters used in a phylogenetic tree, or covariances, regressions and correlations between characters for those contrasts.
+ Phylogenetic report (character contrasts)
+
+
+ Phylogenetic character contrasts
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Matrix of integer numbers for sequence comparison.
+
+ Comparison matrix (integers)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Matrix of floating point numbers for sequence comparison.
+
+ Comparison matrix (floats)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Matrix of integer or floating point numbers for nucleotide comparison.
+ Nucleotide comparison matrix
+ Nucleotide substitution matrix
+
+
+ Comparison matrix (nucleotide)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Matrix of integer or floating point numbers for amino acid comparison.
+ Amino acid comparison matrix
+ Amino acid substitution matrix
+
+
+ Comparison matrix (amino acid)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Matrix of integer numbers for nucleotide comparison.
+
+ Nucleotide comparison matrix (integers)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Matrix of floating point numbers for nucleotide comparison.
+
+ Nucleotide comparison matrix (floats)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Matrix of integer numbers for amino acid comparison.
+
+ Amino acid comparison matrix (integers)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Matrix of floating point numbers for amino acid comparison.
+
+ Amino acid comparison matrix (floats)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 3D coordinate and associated data for a nucleic acid tertiary (3D) structure.
+
+
+ Nucleic acid structure
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 3D coordinate and associated data for a protein tertiary (3D) structure, or part of a structure, possibly in complex with other molecules.
+ Protein structures
+
+
+ Protein structure
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The structure of a protein in complex with a ligand, typically a small molecule such as an enzyme substrate or cofactor, but possibly another macromolecule.
+
+
+ This includes interactions of proteins with atoms, ions and small molecules or macromolecules such as nucleic acids or other polypeptides. For stable inter-polypeptide interactions use 'Protein complex' instead.
+ Protein-ligand complex
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 3D coordinate and associated data for a carbohydrate (3D) structure.
+
+
+ Carbohydrate structure
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 3D coordinate and associated data for the (3D) structure of a small molecule, such as any common chemical compound.
+ CHEBI:23367
+
+
+ Small molecule structure
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 3D coordinate and associated data for a DNA tertiary (3D) structure.
+
+
+ DNA structure
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 3D coordinate and associated data for an RNA tertiary (3D) structure.
+
+
+ RNA structure
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 3D coordinate and associated data for a tRNA tertiary (3D) structure, including tmRNA, snoRNAs etc.
+
+
+ tRNA structure
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 3D coordinate and associated data for the tertiary (3D) structure of a polypeptide chain.
+
+
+ Protein chain
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 3D coordinate and associated data for the tertiary (3D) structure of a protein domain.
+
+
+ Protein domain
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ 3D coordinate and associated data for a protein tertiary (3D) structure (all atoms).
+
+ Protein structure (all atoms)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 3D coordinate and associated data for a protein tertiary (3D) structure (typically C-alpha atoms only).
+ Protein structure (C-alpha atoms)
+
+
+ C-beta atoms from amino acid side-chains may be included.
+ C-alpha trace
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ 3D coordinate and associated data for a polypeptide chain tertiary (3D) structure (all atoms).
+
+ Protein chain (all atoms)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ 3D coordinate and associated data for a polypeptide chain tertiary (3D) structure (typically C-alpha atoms only).
+
+ C-beta atoms from amino acid side-chains may be included.
+ Protein chain (C-alpha atoms)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ 3D coordinate and associated data for a protein domain tertiary (3D) structure (all atoms).
+
+ Protein domain (all atoms)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ 3D coordinate and associated data for a protein domain tertiary (3D) structure (typically C-alpha atoms only).
+
+ C-beta atoms from amino acid side-chains may be included.
+ Protein domain (C-alpha atoms)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alignment (superimposition) of exactly two molecular tertiary (3D) structures.
+ Pair structure alignment
+
+
+ Structure alignment (pair)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Alignment (superimposition) of more than two molecular tertiary (3D) structures.
+
+ Structure alignment (multiple)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alignment (superimposition) of protein tertiary (3D) structures.
+ Structure alignment (protein)
+
+
+ Protein structure alignment
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alignment (superimposition) of nucleic acid tertiary (3D) structures.
+ Structure alignment (nucleic acid)
+
+
+ Nucleic acid structure alignment
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+
+
+ Alignment (superimposition) of exactly two protein tertiary (3D) structures.
+
+ Structure alignment (protein pair)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Alignment (superimposition) of more than two protein tertiary (3D) structures.
+
+ Multiple protein tertiary structure alignment
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Alignment (superimposition) of protein tertiary (3D) structures (all atoms considered).
+
+ Structure alignment (protein all atoms)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Alignment (superimposition) of protein tertiary (3D) structures (typically C-alpha atoms only considered).
+
+ C-beta atoms from amino acid side-chains may be considered.
+ Structure alignment (protein C-alpha atoms)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Alignment (superimposition) of exactly two protein tertiary (3D) structures (all atoms considered).
+
+ Pairwise protein tertiary structure alignment (all atoms)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Alignment (superimposition) of exactly two protein tertiary (3D) structures (typically C-alpha atoms only considered).
+
+ C-beta atoms from amino acid side-chains may be included.
+ Pairwise protein tertiary structure alignment (C-alpha atoms)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Alignment (superimposition) of exactly two protein tertiary (3D) structures (all atoms considered).
+
+ Multiple protein tertiary structure alignment (all atoms)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Alignment (superimposition) of exactly two protein tertiary (3D) structures (typically C-alpha atoms only considered).
+
+ C-beta atoms from amino acid side-chains may be included.
+ Multiple protein tertiary structure alignment (C-alpha atoms)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+
+
+ Alignment (superimposition) of exactly two nucleic acid tertiary (3D) structures.
+
+ Structure alignment (nucleic acid pair)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Alignment (superimposition) of more than two nucleic acid tertiary (3D) structures.
+
+ Multiple nucleic acid tertiary structure alignment
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alignment (superimposition) of RNA tertiary (3D) structures.
+ Structure alignment (RNA)
+
+
+ RNA structure alignment
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Matrix to transform (rotate/translate) 3D coordinates, typically the transformation necessary to superimpose two molecular structures.
+
+
+ Structural transformation matrix
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ DaliLite hit table of protein chain tertiary structure alignment data.
+
+ The significant and top-scoring hits for regions of the compared structures is shown. Data such as Z-Scores, number of aligned residues, root-mean-square deviation (RMSD) of atoms and sequence identity are given.
+ DaliLite hit table
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A score reflecting structural similarities of two molecules.
+
+ Molecular similarity score
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Root-mean-square deviation (RMSD) is calculated to measure the average distance between superimposed macromolecular coordinates.
+ RMSD
+
+
+ Root-mean-square deviation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A measure of the similarity between two ligand fingerprints.
+
+
+ A ligand fingerprint is derived from ligand structural data from a Protein DataBank file. It reflects the elements or groups present or absent, covalent bonds and bond orders and the bonded environment in terms of SATIS codes and BLEEP atom types.
+ Tanimoto similarity score
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A matrix of 3D-1D scores reflecting the probability of amino acids to occur in different tertiary structural environments.
+
+
+ 3D-1D scoring matrix
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A table of 20 numerical values which quantify a property (e.g. physicochemical or biochemical) of the common amino acids.
+
+
+ Amino acid index
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Chemical classification (small, aliphatic, aromatic, polar, charged etc) of amino acids.
+ Chemical classes (amino acids)
+
+
+ Amino acid index (chemical classes)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Statistical protein contact potentials.
+ Contact potentials (amino acid pair-wise)
+
+
+ Amino acid pair-wise contact potentials
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Molecular weights of amino acids.
+ Molecular weight (amino acids)
+
+
+ Amino acid index (molecular weight)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Hydrophobic, hydrophilic or charge properties of amino acids.
+ Hydropathy (amino acids)
+
+
+ Amino acid index (hydropathy)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Experimental free energy values for the water-interface and water-octanol transitions for the amino acids.
+ White-Wimley data (amino acids)
+
+
+ Amino acid index (White-Wimley data)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Van der Waals radii of atoms for different amino acid residues.
+ van der Waals radii (amino acids)
+
+
+ Amino acid index (van der Waals radii)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ An informative report on a specific enzyme.
+
+ Enzyme report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ An informative report on a specific restriction enzyme such as enzyme reference data.
+
+ This might include name of enzyme, organism, isoschizomers, methylation, source, suppliers, literature references, or data on restriction enzyme patterns such as name of enzyme, recognition site, length of pattern, number of cuts made by enzyme, details of blunt or sticky end cut etc.
+ Restriction enzyme report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ List of molecular weight(s) of one or more proteins or peptides, for example cut by proteolytic enzymes or reagents.
+
+
+ The report might include associated data such as frequency of peptide fragment molecular weights.
+ Peptide molecular weights
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Report on the hydrophobic moment of a polypeptide sequence.
+
+
+ Hydrophobic moment is a peptides hydrophobicity measured for different angles of rotation.
+ Peptide hydrophobic moment
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The aliphatic index of a protein.
+
+
+ The aliphatic index is the relative protein volume occupied by aliphatic side chains.
+ Protein aliphatic index
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A protein sequence with annotation on hydrophobic or hydrophilic / charged regions, hydrophobicity plot etc.
+
+
+ Hydrophobic moment is a peptides hydrophobicity measured for different angles of rotation.
+ Protein sequence hydropathy plot
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A plot of the mean charge of the amino acids within a window of specified length as the window is moved along a protein sequence.
+
+
+ Protein charge plot
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The solubility or atomic solvation energy of a protein sequence or structure.
+ Protein solubility data
+
+
+ Protein solubility
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data on the crystallizability of a protein sequence.
+ Protein crystallizability data
+
+
+ Protein crystallizability
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data on the stability, intrinsic disorder or globularity of a protein sequence.
+ Protein globularity data
+
+
+ Protein globularity
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The titration curve of a protein.
+
+
+ Protein titration curve
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The isoelectric point of one proteins.
+
+
+ Protein isoelectric point
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The pKa value of a protein.
+
+
+ Protein pKa value
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The hydrogen exchange rate of a protein.
+
+
+ Protein hydrogen exchange rate
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The extinction coefficient of a protein.
+
+
+ Protein extinction coefficient
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The optical density of a protein.
+
+
+ Protein optical density
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ An informative report on protein subcellular localisation (nuclear, cytoplasmic, mitochondrial, chloroplast, plastid, membrane etc) or destination (exported / extracellular proteins).
+
+ Protein subcellular localisation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An report on allergenicity / immunogenicity of peptides and proteins.
+ Peptide immunogenicity
+ Peptide immunogenicity report
+
+
+ This includes data on peptide ligands that elicit an immune response (immunogens), allergic cross-reactivity, predicted antigenicity (Hopp and Woods plot) etc. These data are useful in the development of peptide-specific antibodies or multi-epitope vaccines. Methods might use sequence data (for example motifs) and / or structural data.
+ Peptide immunogenicity data
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ A report on the immunogenicity of MHC class I or class II binding peptides.
+
+ MHC peptide immunogenicity report
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A human-readable collection of information about one or more specific protein 3D structure(s) or structural domains.
+ Protein property (structural)
+ Protein report (structure)
+ Protein structural property
+ Protein structure report (domain)
+ Protein structure-derived report
+
+
+ Protein structure report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Report on the quality of a protein three-dimensional model.
+ Protein property (structural quality)
+ Protein report (structural quality)
+ Protein structure report (quality evaluation)
+ Protein structure validation report
+
+
+ Model validation might involve checks for atomic packing, steric clashes, agreement with electron density maps etc.
+ Protein structural quality report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Data on inter-atomic or inter-residue contacts, distances and interactions in protein structure(s) or on the interactions of protein atoms or residues with non-protein groups.
+
+
+ Protein non-covalent interactions report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ Informative report on flexibility or motion of a protein structure.
+
+ This is a broad data type and is used a placeholder for other, more specific types. It is primarily intended to help navigation of EDAM and would not typically be used for annotation.
+ Protein flexibility or motion report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data on the solvent accessible or buried surface area of a protein structure.
+
+
+ This concept covers definitions of the protein surface, interior and interfaces, accessible and buried residues, surface accessible pockets, interior inaccessible cavities etc.
+ Protein solvent accessibility
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ Data on the surface properties (shape, hydropathy, electrostatic patches etc) of a protein structure.
+
+ This is a broad data type and is used a placeholder for other, more specific types. It is primarily intended to help navigation of EDAM and would not typically be used for annotation.
+ Protein surface report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Phi/psi angle data or a Ramachandran plot of a protein structure.
+
+
+ Ramachandran plot
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data on the net charge distribution (dipole moment) of a protein structure.
+
+
+ Protein dipole moment
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A matrix of distances between amino acid residues (for example the C-alpha atoms) in a protein structure.
+
+
+ Protein distance matrix
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An amino acid residue contact map for a protein structure.
+
+
+ Protein contact map
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Report on clusters of contacting residues in protein structures such as a key structural residue network.
+
+
+ Protein residue 3D cluster
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Patterns of hydrogen bonding in protein structures.
+
+
+ Protein hydrogen bonds
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ Non-canonical atomic interactions in protein structures.
+
+ Protein non-canonical interactions
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Information on a node from the CATH database.
+
+ The report (for example http://www.cathdb.info/cathnode/1.10.10.10) includes CATH code (of the node and upper levels in the hierarchy), classification text (of appropriate levels in hierarchy), list of child nodes, representative domain and other relevant data and links.
+ CATH node
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Information on a node from the SCOP database.
+
+ SCOP node
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+ An EMBASSY domain classification file (DCF) of classification and other data for domains from SCOP or CATH, in EMBL-like format.
+
+
+ EMBASSY domain classification
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Information on a protein 'class' node from the CATH database.
+
+ CATH class
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Information on a protein 'architecture' node from the CATH database.
+
+ CATH architecture
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Information on a protein 'topology' node from the CATH database.
+
+ CATH topology
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Information on a protein 'homologous superfamily' node from the CATH database.
+
+ CATH homologous superfamily
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Information on a protein 'structurally similar group' node from the CATH database.
+
+ CATH structurally similar group
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Information on a protein 'functional category' node from the CATH database.
+
+ CATH functional category
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A report on known protein structural domains or folds that are recognised (identified) in protein sequence(s).
+
+ Methods use some type of mapping between sequence and fold, for example secondary structure prediction and alignment, profile comparison, sequence properties, homologous sequence search, kernel machines etc. Domains and folds might be taken from SCOP or CATH.
+ Protein fold recognition report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ protein-protein interaction(s), including interactions between protein domains.
+
+
+ Protein-protein interaction report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An informative report on protein-ligand (small molecule) interaction(s).
+ Protein-drug interaction report
+
+
+ Protein-ligand interaction report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ protein-DNA/RNA interaction(s).
+
+
+ Protein-nucleic acid interactions report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data on the dissociation characteristics of a double-stranded nucleic acid molecule (DNA or a DNA/RNA hybrid) during heating.
+ Nucleic acid stability profile
+ Melting map
+ Nucleic acid melting curve
+
+
+ A melting (stability) profile calculated the free energy required to unwind and separate the nucleic acid strands, plotted for sliding windows over a sequence.
+ Nucleic acid melting curve: a melting curve of a double-stranded nucleic acid molecule (DNA or DNA/RNA). Shows the proportion of nucleic acid which are double-stranded versus temperature.
+ Nucleic acid probability profile: a probability profile of a double-stranded nucleic acid molecule (DNA or DNA/RNA). Shows the probability of a base pair not being melted (i.e. remaining as double-stranded DNA) at a specified temperature
+ Nucleic acid stitch profile: stitch profile of hybridised or double stranded nucleic acid (DNA or RNA/DNA). A stitch profile diagram shows partly melted DNA conformations (with probabilities) at a range of temperatures. For example, a stitch profile might show possible loop openings with their location, size, probability and fluctuations at a given temperature.
+ Nucleic acid temperature profile: a temperature profile of a double-stranded nucleic acid molecule (DNA or DNA/RNA). Plots melting temperature versus base position.
+ Nucleic acid melting profile
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Enthalpy of hybridised or double stranded nucleic acid (DNA or RNA/DNA).
+
+
+ Nucleic acid enthalpy
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Entropy of hybridised or double stranded nucleic acid (DNA or RNA/DNA).
+
+
+ Nucleic acid entropy
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Melting temperature of hybridised or double stranded nucleic acid (DNA or RNA/DNA).
+
+ Nucleic acid melting temperature
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.21
+
+ Stitch profile of hybridised or double stranded nucleic acid (DNA or RNA/DNA).
+
+
+ Nucleic acid stitch profile
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ DNA base pair stacking energies data.
+
+
+ DNA base pair stacking energies data
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ DNA base pair twist angle data.
+
+
+ DNA base pair twist angle data
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ DNA base trimer roll angles data.
+
+
+ DNA base trimer roll angles data
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ RNA parameters used by the Vienna package.
+
+ Vienna RNA parameters
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Structure constraints used by the Vienna package.
+
+ Vienna RNA structure constraints
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ RNA concentration data used by the Vienna package.
+
+ Vienna RNA concentration data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ RNA calculated energy data generated by the Vienna package.
+
+ Vienna RNA calculated energy
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Dotplot of RNA base pairing probability matrix.
+
+
+ Such as generated by the Vienna package.
+ Base pairing probability matrix dotplot
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A human-readable collection of information about RNA/DNA folding, minimum folding energies for DNA or RNA sequences, energy landscape of RNA mutants etc.
+ Nucleic acid report (folding model)
+ Nucleic acid report (folding)
+ RNA secondary structure folding classification
+ RNA secondary structure folding probabilities
+
+
+ Nucleic acid folding report
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Table of codon usage data calculated from one or more nucleic acid sequences.
+
+
+ A codon usage table might include the codon usage table name, optional comments and a table with columns for codons and corresponding codon usage data. A genetic code can be extracted from or represented by a codon usage table.
+ Codon usage table
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A genetic code for an organism.
+
+
+ A genetic code need not include detailed codon usage information.
+ Genetic code
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A simple measure of synonymous codon usage bias often used to predict gene expression levels.
+
+ Codon adaptation index
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A plot of the synonymous codon usage calculated for windows over a nucleotide sequence.
+ Synonymous codon usage statistic plot
+
+
+ Codon usage bias plot
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The effective number of codons used in a gene sequence. This reflects how far codon usage of a gene departs from equal usage of synonymous codons.
+
+ Nc statistic
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The differences in codon usage fractions between two codon usage tables.
+
+
+ Codon usage fraction difference
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A human-readable collection of information about the influence of genotype on drug response.
+
+
+ The report might correlate gene expression or single-nucleotide polymorphisms with drug efficacy or toxicity.
+ Pharmacogenomic test report
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A human-readable collection of information about a specific disease.
+
+
+ For example, an informative report on a specific tumor including nature and origin of the sample, anatomic site, organ or tissue, tumor type, including morphology and/or histologic type, and so on.
+ Disease report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ A report on linkage disequilibrium; the non-random association of alleles or polymorphisms at two or more loci (not necessarily on the same chromosome).
+
+
+ Linkage disequilibrium (report)
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A graphical 2D tabular representation of expression data, typically derived from an omics experiment. A heat map is a table where rows and columns correspond to different features and contexts (for example, cells or samples) and the cell colour represents the level of expression of a gene that context.
+
+
+ Heat map
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Affymetrix library file of information about which probes belong to which probe set.
+
+ Affymetrix probe sets library file
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Affymetrix library file of information about the probe sets such as the gene name with which the probe set is associated.
+ GIN file
+
+ Affymetrix probe sets information library file
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Standard protonated molecular masses from trypsin (modified porcine trypsin, Promega) and keratin peptides, used in EMBOSS.
+
+
+ Molecular weights standard fingerprint
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ A report typically including a map (diagram) of a metabolic pathway.
+
+
+ This includes carbohydrate, energy, lipid, nucleotide, amino acid, glycan, PK/NRP, cofactor/vitamin, secondary metabolite, xenobiotics etc.
+ Metabolic pathway report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ genetic information processing pathways.
+
+
+ Genetic information processing pathway report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ environmental information processing pathways.
+
+
+ Environmental information processing pathway report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ A report typically including a map (diagram) of a signal transduction pathway.
+
+
+ Signal transduction pathway report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Topic concernning cellular process pathways.
+
+
+ Cellular process pathways report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ disease pathways, typically of human disease.
+
+
+ Disease pathway or network report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.21
+
+ A report typically including a map (diagram) of drug structure relationships.
+
+
+ Drug structure relationship map
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+
+ networks of protein interactions.
+
+ Protein interaction networks
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ An entry (data type) from the Minimal Information Requested in the Annotation of Biochemical Models (MIRIAM) database of data resources.
+
+ A MIRIAM entry describes a MIRIAM data type including the official name, synonyms, root URI, identifier pattern (regular expression applied to a unique identifier of the data type) and documentation. Each data type can be associated with several resources. Each resource is a physical location of a service (typically a database) providing information on the elements of a data type. Several resources may exist for each data type, provided the same (mirrors) or different information. MIRIAM provides a stable and persistent reference to its data types.
+ MIRIAM datatype
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A simple floating point number defining the lower or upper limit of an expectation value (E-value).
+ Expectation value
+
+
+ An expectation value (E-Value) is the expected number of observations which are at least as extreme as observations expected to occur by random chance. The E-value describes the number of hits with a given score or better that are expected to occur at random when searching a database of a particular size. It decreases exponentially with the score (S) of a hit. A low E value indicates a more significant score.
+ E-value
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The z-value is the number of standard deviations a data value is above or below a mean value.
+
+
+ A z-value might be specified as a threshold for reporting hits from database searches.
+ Z-value
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The P-value is the probability of obtaining by random chance a result that is at least as extreme as an observed result, assuming a NULL hypothesis is true.
+
+
+ A z-value might be specified as a threshold for reporting hits from database searches.
+ P-value
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Information on a database (or ontology) version, for example name, version number and release date.
+
+ Database version information
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Information on an application version, for example name, version number and release date.
+
+ Tool version information
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Information on a version of the CATH database.
+
+ CATH version information
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Cross-mapping of Swiss-Prot codes to PDB identifiers.
+
+ Swiss-Prot to PDB mapping
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Cross-references from a sequence record to other databases.
+
+ Sequence database cross-references
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Metadata on the status of a submitted job.
+
+ Values for EBI services are 'DONE' (job has finished and the results can then be retrieved), 'ERROR' (the job failed or no results where found), 'NOT_FOUND' (the job id is no longer available; job results might be deleted, 'PENDING' (the job is in a queue waiting processing), 'RUNNING' (the job is currently being processed).
+ Job status
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.0
+
+
+ The (typically numeric) unique identifier of a submitted job.
+
+ Job ID
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A label (text token) describing the type of job, for example interactive or non-interactive.
+
+ Job type
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A report of tool-specific metadata on some analysis or process performed, for example a log of diagnostic or error messages.
+
+ Tool log
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ DaliLite log file describing all the steps taken by a DaliLite alignment of two protein structures.
+
+ DaliLite log file
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ STRIDE log file.
+
+ STRIDE log file
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ NACCESS log file.
+
+ NACCESS log file
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ EMBOSS wordfinder log file.
+
+ EMBOSS wordfinder log file
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ EMBOSS (EMBASSY) domainatrix application log file.
+
+ EMBOSS domainatrix log file
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ EMBOSS (EMBASSY) sites application log file.
+
+ EMBOSS sites log file
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ EMBOSS (EMBASSY) supermatcher error file.
+
+ EMBOSS supermatcher error file
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ EMBOSS megamerger log file.
+
+ EMBOSS megamerger log file
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ EMBOSS megamerger log file.
+
+ EMBOSS whichdb log file
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ EMBOSS vectorstrip log file.
+
+ EMBOSS vectorstrip log file
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A username on a computer system or a website.
+
+
+
+ Username
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A password on a computer system, or a website.
+
+
+
+ Password
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby:Email
+ Moby:EmailAddress
+ A valid email address of an end-user.
+
+
+
+ Email address
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a person.
+
+
+
+ Person name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Number of iterations of an algorithm.
+
+ Number of iterations
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Number of entities (for example database hits, sequences, alignments etc) to write to an output file.
+
+ Number of output entities
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Controls the order of hits (reported matches) in an output file from a database search.
+
+ Hit sort order
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A human-readable collection of information about a specific drug.
+ Drug annotation
+ Drug structure relationship map
+
+
+ A drug structure relationship map is report (typically a map diagram) of drug structure relationships.
+ Drug report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An image (for viewing or printing) of a phylogenetic tree including (typically) a plot of rooted or unrooted phylogenies, cladograms, circular trees or phenograms and associated information.
+
+
+ See also 'Phylogenetic tree'
+ Phylogenetic tree image
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Image of RNA secondary structure, knots, pseudoknots etc.
+
+
+ RNA secondary structure image
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Image of protein secondary structure.
+
+
+ Protein secondary structure image
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Image of one or more molecular tertiary (3D) structures.
+
+
+ Structure image
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Image of two or more aligned molecular sequences possibly annotated with alignment features.
+
+
+ Sequence alignment image
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An image of the structure of a small chemical compound.
+ Small molecule structure image
+ Chemical structure sketch
+ Small molecule sketch
+
+
+ The molecular identifier and formula are typically included.
+ Chemical structure image
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A fate map is a plan of early stage of an embryo such as a blastula, showing areas that are significance to development.
+
+
+ Fate map
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An image of spots from a microarray experiment.
+
+
+ Microarray spots image
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A term from the BioPax ontology.
+
+ BioPax term
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A term definition from The Gene Ontology (GO).
+
+ GO
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A term from the MeSH vocabulary.
+
+ MeSH
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A term from the HGNC controlled vocabulary.
+
+ HGNC
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A term from the NCBI taxonomy vocabulary.
+
+ NCBI taxonomy vocabulary
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A term from the Plant Ontology (PO).
+
+ Plant ontology term
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A term from the UMLS vocabulary.
+
+ UMLS
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A term from Foundational Model of Anatomy.
+
+ Classifies anatomical entities according to their shared characteristics (genus) and distinguishing characteristics (differentia). Specifies the part-whole and spatial relationships of the entities, morphological transformation of the entities during prenatal development and the postnatal life cycle and principles, rules and definitions according to which classes and relationships in the other three components of FMA are represented.
+ FMA
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A term from the EMAP mouse ontology.
+
+ EMAP
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A term from the ChEBI ontology.
+
+ ChEBI
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A term from the MGED ontology.
+
+ MGED
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A term from the myGrid ontology.
+
+ The ontology is provided as two components, the service ontology and the domain ontology. The domain ontology acts provides concepts for core bioinformatics data types and their relations. The service ontology describes the physical and operational features of web services.
+ myGrid
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A term definition for a biological process from the Gene Ontology (GO).
+
+ Data Type is an enumerated string.
+ GO (biological process)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A term definition for a molecular function from the Gene Ontology (GO).
+
+ Data Type is an enumerated string.
+ GO (molecular function)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A term definition for a cellular component from the Gene Ontology (GO).
+
+ Data Type is an enumerated string.
+ GO (cellular component)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A relation type defined in an ontology.
+
+ Ontology relation type
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The definition of a concept from an ontology.
+ Ontology class definition
+
+
+ Ontology concept definition
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ A comment on a concept from an ontology.
+
+ Ontology concept comment
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Reference for a concept from an ontology.
+
+ Ontology concept reference
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Information on a published article provided by the doc2loc program.
+
+ The doc2loc output includes the url, format, type and availability code of a document for every service provider.
+ doc2loc document information
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PDBML:PDB_residue_no
+ WHATIF: pdb_number
+ A residue identifier (a string) from a PDB file.
+
+
+ PDB residue number
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Cartesian coordinate of an atom (in a molecular structure).
+ Cartesian coordinate
+
+
+ Atomic coordinate
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.21
+
+ Cartesian x coordinate of an atom (in a molecular structure).
+
+
+ Atomic x coordinate
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.21
+
+ Cartesian y coordinate of an atom (in a molecular structure).
+
+
+ Atomic y coordinate
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.21
+
+ Cartesian z coordinate of an atom (in a molecular structure).
+
+
+ Atomic z coordinate
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PDBML:pdbx_PDB_atom_name
+ WHATIF: PDBx_auth_atom_id
+ WHATIF: PDBx_type_symbol
+ WHATIF: alternate_atom
+ WHATIF: atom_type
+ Identifier (a string) of a specific atom from a PDB file for a molecular structure.
+
+
+
+ PDB atom name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data on a single atom from a protein structure.
+ Atom data
+ CHEBI:33250
+
+
+ This is a broad data type and is used a placeholder for other, more specific types. It is primarily intended to help navigation of EDAM and would not typically be used for annotation.
+ Protein atom
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data on a single amino acid residue position in a protein structure.
+ Residue
+
+
+ This is a broad data type and is used a placeholder for other, more specific types. It is primarily intended to help navigation of EDAM and would not typically be used for annotation.
+ Protein residue
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Name of an atom.
+
+
+
+ Atom name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ WHATIF: type
+ Three-letter amino acid residue names as used in PDB files.
+
+
+
+ PDB residue name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PDBML:pdbx_PDB_model_num
+ WHATIF: model_number
+ Identifier of a model structure from a PDB file.
+ Model number
+
+
+
+ PDB model number
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Summary of domain classification information for a CATH domain.
+
+ The report (for example http://www.cathdb.info/domain/1cukA01) includes CATH codes for levels in the hierarchy for the domain, level descriptions and relevant data and links.
+ CATH domain report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ FASTA sequence database (based on ATOM records in PDB) for CATH domains (clustered at different levels of sequence identity).
+
+ CATH representative domain sequences (ATOM)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ FASTA sequence database (based on COMBS sequence data) for CATH domains (clustered at different levels of sequence identity).
+
+ CATH representative domain sequences (COMBS)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ FASTA sequence database for all CATH domains (based on PDB ATOM records).
+
+ CATH domain sequences (ATOM)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ FASTA sequence database for all CATH domains (based on COMBS sequence data).
+
+ CATH domain sequences (COMBS)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Information on an molecular sequence version.
+ Sequence version information
+
+
+ Sequence version
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A numerical value, that is some type of scored value arising for example from a prediction method.
+
+
+ Score
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Report on general functional properties of specific protein(s).
+
+ For properties that can be mapped to a sequence, use 'Sequence report' instead.
+ Protein report (function)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Name of a gene from Aspergillus Genome Database.
+
+ Gene name (ASPGD)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Name of a gene from Candida Genome Database.
+
+ Gene name (CGD)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Name of a gene from dictyBase database.
+
+ Gene name (dictyBase)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Primary name of a gene from EcoGene Database.
+
+ Gene name (EcoGene primary)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Name of a gene from MaizeGDB (maize genes) database.
+
+ Gene name (MaizeGDB)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Name of a gene from Saccharomyces Genome Database.
+
+ Gene name (SGD)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Name of a gene from Tetrahymena Genome Database.
+
+ Gene name (TGD)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Symbol of a gene from E.coli Genetic Stock Center.
+
+ Gene name (CGSC)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Symbol of a gene approved by the HUGO Gene Nomenclature Committee.
+
+ Gene name (HGNC)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Symbol of a gene from the Mouse Genome Database.
+
+ Gene name (MGD)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Symbol of a gene from Bacillus subtilis Genome Sequence Project.
+
+ Gene name (Bacillus subtilis)
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ http://www.geneontology.org/doc/GO.xrf_abbs: ApiDB_PlasmoDB
+ Identifier of a gene from PlasmoDB Plasmodium Genome Resource.
+
+
+
+ Gene ID (PlasmoDB)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a gene from EcoGene Database.
+ EcoGene Accession
+ EcoGene ID
+
+
+
+ Gene ID (EcoGene)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ http://www.geneontology.org/doc/GO.xrf_abbs: FB
+ http://www.geneontology.org/doc/GO.xrf_abbs: FlyBase
+ Gene identifier from FlyBase database.
+
+
+
+ Gene ID (FlyBase)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Gene identifier from Glossina morsitans GeneDB database.
+
+ Gene ID (GeneDB Glossina morsitans)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Gene identifier from Leishmania major GeneDB database.
+
+ Gene ID (GeneDB Leishmania major)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ http://www.geneontology.org/doc/GO.xrf_abbs: GeneDB_Pfalciparum
+ Gene identifier from Plasmodium falciparum GeneDB database.
+
+ Gene ID (GeneDB Plasmodium falciparum)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ http://www.geneontology.org/doc/GO.xrf_abbs: GeneDB_Spombe
+ Gene identifier from Schizosaccharomyces pombe GeneDB database.
+
+ Gene ID (GeneDB Schizosaccharomyces pombe)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ http://www.geneontology.org/doc/GO.xrf_abbs: GeneDB_Tbrucei
+ Gene identifier from Trypanosoma brucei GeneDB database.
+
+ Gene ID (GeneDB Trypanosoma brucei)
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ http://www.geneontology.org/doc/GO.xrf_abbs: GR_GENE
+ http://www.geneontology.org/doc/GO.xrf_abbs: GR_gene
+ Gene identifier from Gramene database.
+
+
+
+ Gene ID (Gramene)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ http://www.geneontology.org/doc/GO.xrf_abbs: PAMGO_VMD
+ http://www.geneontology.org/doc/GO.xrf_abbs: VMD
+ Gene identifier from Virginia Bioinformatics Institute microbial database.
+
+
+
+ Gene ID (Virginia microbial)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ http://www.geneontology.org/doc/GO.xrf_abbs: SGN
+ Gene identifier from Sol Genomics Network.
+
+
+
+ Gene ID (SGN)
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ WBGene[0-9]{8}
+ http://www.geneontology.org/doc/GO.xrf_abbs: WB
+ http://www.geneontology.org/doc/GO.xrf_abbs: WormBase
+ Gene identifier used by WormBase database.
+
+
+
+ Gene ID (WormBase)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Any name (other than the recommended one) for a gene.
+
+ Gene synonym
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of an open reading frame attributed by a sequencing project.
+
+
+
+ ORF name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A component of a larger sequence assembly.
+
+ Sequence assembly component
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A report on a chromosome aberration such as abnormalities in chromosome structure.
+
+ Chromosome annotation (aberration)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An identifier of a clone (cloned molecular sequence) from a database.
+
+
+
+ Clone ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PDBML:pdbx_PDB_ins_code
+ WHATIF: insertion_code
+ An insertion code (part of the residue number) for an amino acid residue from a PDB file.
+
+
+ PDB insertion code
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ WHATIF: PDBx_occupancy
+ The fraction of an atom type present at a site in a molecular structure.
+
+
+ The sum of the occupancies of all the atom types at a site should not normally significantly exceed 1.0.
+ Atomic occupancy
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ WHATIF: PDBx_B_iso_or_equiv
+ Isotropic B factor (atomic displacement parameter) for an atom from a PDB file.
+
+
+ Isotropic B factor
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A cytogenetic map showing chromosome banding patterns in mutant cell lines relative to the wild type.
+ Deletion-based cytogenetic map
+
+
+ A cytogenetic map is built from a set of mutant cell lines with sub-chromosomal deletions and a reference wild-type line ('genome deletion panel'). The panel is used to map markers onto the genome by comparing mutant to wild-type banding patterns. Markers are linked (occur in the same deleted region) if they share the same banding pattern (presence or absence) as the deletion panel.
+ Deletion map
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A genetic map which shows the approximate location of quantitative trait loci (QTL) between two or more markers.
+ Quantitative trait locus map
+
+
+ QTL map
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby:Haplotyping_Study_obj
+ A map of haplotypes in a genome or other sequence, describing common patterns of genetic variation.
+
+
+ Haplotype map
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.21
+
+ Data describing a set of multiple genetic or physical maps, typically sharing a common set of features which are mapped.
+
+
+ Map set data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+
+
+ A feature which may mapped (positioned) on a genetic or other type of map.
+
+ Mappable features may be based on Gramene's notion of map features; see http://www.gramene.org/db/cmap/feature_type_info.
+ Map feature
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A designation of the type of map (genetic map, physical map, sequence map etc) or map set.
+
+ Map types may be based on Gramene's notion of a map type; see http://www.gramene.org/db/cmap/map_type_info.
+ Map type
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a protein fold.
+
+
+
+ Protein fold name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby:BriefTaxonConcept
+ Moby:PotentialTaxon
+ The name of a group of organisms belonging to the same taxonomic rank.
+ Taxonomic rank
+ Taxonomy rank
+
+
+
+ For a complete list of taxonomic ranks see https://www.phenoscape.org/wiki/Taxonomic_Rank_Vocabulary.
+ Taxon
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ A unique identifier of a (group of) organisms.
+
+
+
+ Organism identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a genus of organism.
+
+
+
+ Genus name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby:GCP_Taxon
+ Moby:TaxonName
+ Moby:TaxonScientificName
+ Moby:TaxonTCS
+ Moby:iANT_organism-xml
+ The full name for a group of organisms, reflecting their biological classification and (usually) conforming to a standard nomenclature.
+ Taxonomic information
+ Taxonomic name
+
+
+
+ Name components correspond to levels in a taxonomic hierarchy (e.g. 'Genus', 'Species', etc.) Meta information such as a reference where the name was defined and a date might be included.
+ Taxonomic classification
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby_namespace:iHOPorganism
+ A unique identifier for an organism used in the iHOP database.
+
+
+
+ iHOP organism ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Common name for an organism as used in the GenBank database.
+
+
+
+ Genbank common name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a taxon from the NCBI taxonomy database.
+
+
+
+ NCBI taxon
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An alternative for a word.
+
+ Synonym
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A common misspelling of a word.
+
+ Misspelling
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An abbreviation of a phrase or word.
+
+ Acronym
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A term which is likely to be misleading of its meaning.
+
+ Misnomer
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby:Author
+ Information on the authors of a published work.
+
+
+
+ Author ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An identifier representing an author in the DragonDB database.
+
+
+
+ DragonDB author identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby:DescribedLink
+ A URI along with annotation describing the data found at the address.
+
+
+ Annotated URI
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A controlled vocabulary for words and phrases that can appear in the keywords field (KW line) of entries from the UniProt database.
+
+ UniProt keywords
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby_namespace:GENEFARM_GeneID
+ Identifier of a gene from the GeneFarm database.
+
+
+
+ Gene ID (GeneFarm)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby_namespace:Blattner_number
+ The blattner identifier for a gene.
+
+
+
+ Blattner number
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Moby_namespace:MIPS_GE_Maize
+ Identifier for genetic elements in MIPS Maize database.
+
+ Gene ID (MIPS Maize)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Moby_namespace:MIPS_GE_Medicago
+ Identifier for genetic elements in MIPS Medicago database.
+
+ Gene ID (MIPS Medicago)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ The name of an Antirrhinum Gene from the DragonDB database.
+
+ Gene name (DragonDB)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ A unique identifier for an Arabidopsis gene, which is an acronym or abbreviation of the gene name.
+
+ Gene name (Arabidopsis)
+ true
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby_namespace:iHOPsymbol
+ A unique identifier of a protein or gene used in the iHOP database.
+
+
+
+ iHOP symbol
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Name of a gene from the GeneFarm database.
+
+ Gene name (GeneFarm)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ A unique name or other identifier of a genetic locus, typically conforming to a scheme that names loci (such as predicted genes) depending on their position in a molecular sequence, for example a completely sequenced genome or chromosome.
+ Locus identifier
+ Locus name
+
+
+
+ Locus ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ AT[1-5]G[0-9]{5}
+ http://www.geneontology.org/doc/GO.xrf_abbs:AGI_LocusCode
+ Locus identifier for Arabidopsis Genome Initiative (TAIR, TIGR and MIPS databases).
+ AGI ID
+ AGI identifier
+ AGI locus code
+ Arabidopsis gene loci number
+
+
+
+ Locus ID (AGI)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ http://www.geneontology.org/doc/GO.xrf_abbs: ASPGD
+ http://www.geneontology.org/doc/GO.xrf_abbs: ASPGDID
+ Identifier for loci from ASPGD (Aspergillus Genome Database).
+
+
+
+ Locus ID (ASPGD)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ http://www.geneontology.org/doc/GO.xrf_abbs: Broad_MGG
+ Identifier for loci from Magnaporthe grisea Database at the Broad Institute.
+
+
+
+ Locus ID (MGG)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ http://www.geneontology.org/doc/GO.xrf_abbs: CGD
+ http://www.geneontology.org/doc/GO.xrf_abbs: CGDID
+ Identifier for loci from CGD (Candida Genome Database).
+ CGD locus identifier
+ CGDID
+
+
+
+ Locus ID (CGD)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ http://www.geneontology.org/doc/GO.xrf_abbs: JCVI_CMR
+ http://www.geneontology.org/doc/GO.xrf_abbs: TIGR_CMR
+ Locus identifier for Comprehensive Microbial Resource at the J. Craig Venter Institute.
+
+
+
+ Locus ID (CMR)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby_namespace:LocusID
+ http://www.geneontology.org/doc/GO.xrf_abbs: NCBI_locus_tag
+ Identifier for loci from NCBI database.
+ Locus ID (NCBI)
+
+
+
+ NCBI locus tag
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ http://www.geneontology.org/doc/GO.xrf_abbs: SGD
+ http://www.geneontology.org/doc/GO.xrf_abbs: SGDID
+ Identifier for loci from SGD (Saccharomyces Genome Database).
+ SGDID
+
+
+
+ Locus ID (SGD)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby_namespace:MMP_Locus
+ Identifier of loci from Maize Mapping Project.
+
+
+
+ Locus ID (MMP)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby_namespace:DDB_gene
+ Identifier of locus from DictyBase (Dictyostelium discoideum).
+
+
+
+ Locus ID (DictyBase)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby_namespace:EntrezGene_EntrezGeneID
+ Moby_namespace:EntrezGene_ID
+ Identifier of a locus from EntrezGene database.
+
+
+
+ Locus ID (EntrezGene)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby_namespace:MaizeGDB_Locus
+ Identifier of locus from MaizeGDB (Maize genome database).
+
+
+
+ Locus ID (MaizeGDB)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Moby:SO_QTL
+ A stretch of DNA that is closely linked to the genes underlying a quantitative trait (a phenotype that varies in degree and depends upon the interactions between multiple genes and their environment).
+
+ A QTL sometimes but does not necessarily correspond to a gene.
+ Quantitative trait locus
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby_namespace:GeneId
+ Identifier of a gene from the KOME database.
+
+
+
+ Gene ID (KOME)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby:Tropgene_locus
+ Identifier of a locus from the Tropgene database.
+
+
+
+ Locus ID (Tropgene)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An alignment of molecular sequences, structures or profiles derived from them.
+
+
+ Alignment
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data for an atom (in a molecular structure).
+ General atomic property
+
+
+ Atomic property
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby_namespace:SP_KW
+ http://www.geneontology.org/doc/GO.xrf_abbs: SP_KW
+ A word or phrase that can appear in the keywords field (KW line) of entries from the UniProt database.
+
+
+ UniProt keyword
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A name for a genetic locus conforming to a scheme that names loci (such as predicted genes) depending on their position in a molecular sequence, for example a completely sequenced genome or chromosome.
+
+ Ordered locus name
+ true
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby:GCP_MapInterval
+ Moby:GCP_MapPoint
+ Moby:GCP_MapPosition
+ Moby:GenePosition
+ Moby:HitPosition
+ Moby:Locus
+ Moby:MapPosition
+ Moby:Position
+ PDBML:_atom_site.id
+ A position in a map (for example a genetic map), either a single position (point) or a region / interval.
+ Locus
+ Map position
+
+
+ This includes positions in genomes based on a reference sequence. A position may be specified for any mappable object, i.e. anything that may have positional information such as a physical position in a chromosome. Data might include sequence region name, strand, coordinate system name, assembly name, start position and end position.
+ Sequence coordinates
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data concerning the intrinsic physical (e.g. structural) or chemical properties of one, more or all amino acids.
+ Amino acid data
+
+
+ Amino acid property
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ A human-readable collection of information which (typically) is generated or collated by hand and which describes a biological entity, phenomena or associated primary (e.g. sequence or structural) data, as distinct from the primary data itself and computer-generated reports derived from it.
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Annotation
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data describing a molecular map (genetic or physical) or a set of such maps, including various attributes of, data extracted from or derived from the analysis of them, but excluding the map(s) themselves. This includes metadata for map sets that share a common set of features which are mapped.
+ Map attribute
+ Map set data
+
+
+ Map data
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Data used by the Vienna RNA analysis package.
+
+ Vienna RNA structural data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Data used to replace (mask) characters in a molecular sequence.
+
+ Sequence mask parameter
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data concerning chemical reaction(s) catalysed by enzyme(s).
+
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Enzyme kinetics data
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A plot giving an approximation of the kinetics of an enzyme-catalysed reaction, assuming simple kinetics (i.e. no intermediate or product inhibition, allostericity or cooperativity). It plots initial reaction rate to the substrate concentration (S) from which the maximum rate (vmax) is apparent.
+
+
+ Michaelis Menten plot
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A plot based on the Michaelis Menten equation of enzyme kinetics plotting the ratio of the initial substrate concentration (S) against the reaction velocity (v).
+
+
+ Hanes Woolf plot
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+
+ Raw data from or annotation on laboratory experiments.
+
+ This is a broad data type and is used a placeholder for other, more specific types. It is primarily intended to help navigation of EDAM and would not typically be used for annotation.
+ Experimental data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Information on a genome version.
+
+ Genome version information
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Typically a human-readable summary of body of facts or information indicating why a statement is true or valid. This may include a computational prediction, laboratory experiment, literature reference etc.
+
+
+ Evidence
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ A molecular sequence and minimal metadata, typically an identifier of the sequence and/or a comment.
+
+
+ Sequence record lite
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ One or more molecular sequences, possibly with associated annotation.
+ Sequences
+
+
+ This concept is a placeholder of concepts for primary sequence data including raw sequences and sequence records. It should not normally be used for derivatives such as sequence alignments, motifs or profiles.
+ Sequence
+ http://purl.bioontology.org/ontology/MSH/D008969
+ http://purl.org/biotop/biotop.owl#BioMolecularSequenceInformation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ A nucleic acid sequence and minimal metadata, typically an identifier of the sequence and/or a comment.
+
+
+ Nucleic acid sequence record (lite)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ A protein sequence and minimal metadata, typically an identifier of the sequence and/or a comment.
+
+
+ Protein sequence record (lite)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A human-readable collection of information including annotation on a biological entity or phenomena, computer-generated reports of analysis of primary data (e.g. sequence or structural), and metadata (data about primary data) or any other free (essentially unformatted) text, as distinct from the primary data itself.
+ Document
+ Record
+
+
+ You can use this term by default for any textual report, in case you can't find another, more specific term. Reports may be generated automatically or collated by hand and can include metadata on the origin, source, history, ownership or location of some thing.
+ Report
+ http://semanticscience.org/resource/SIO_000148
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ General data for a molecule.
+ General molecular property
+
+
+ Molecular property (general)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+
+ Data concerning molecular structural data.
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Structural data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A nucleotide sequence motif.
+ Nucleic acid sequence motif
+ DNA sequence motif
+ RNA sequence motif
+
+
+ Sequence motif (nucleic acid)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An amino acid sequence motif.
+ Protein sequence motif
+
+
+ Sequence motif (protein)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Some simple value controlling a search operation, typically a search of a database.
+
+ Search parameter
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A report of hits from searching a database of some type.
+ Database hits
+ Search results
+
+
+ Database search results
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ The secondary structure assignment (predicted or real) of a nucleic acid or protein.
+
+ Secondary structure
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An array of numerical values.
+ Array
+
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Matrix
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+
+ Data concerning, extracted from, or derived from the analysis of molecular alignment of some type.
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Alignment data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A human-readable collection of information about one or more specific nucleic acid molecules.
+
+
+ Nucleic acid report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A human-readable collection of information about one or more molecular tertiary (3D) structures. It might include annotation on the structure, a computer-generated report of analysis of structural data, and metadata (data about primary data) or any other free (essentially unformatted) text, as distinct from the primary data itself.
+ Structure-derived report
+
+
+ Structure report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.21
+
+
+
+ A report on nucleic acid structure-derived data, describing structural properties of a DNA molecule, or any other annotation or information about specific nucleic acid 3D structure(s).
+
+ Nucleic acid structure data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A report on the physical (e.g. structural) or chemical properties of molecules, or parts of a molecule.
+ Physicochemical property
+ SO:0000400
+
+
+ Molecular property
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Structural data for DNA base pairs or runs of bases, such as energy or angle data.
+
+
+ DNA base structural data
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Information on a database (or ontology) entry version, such as name (or other identifier) or parent database, unique identifier of entry, data, author and so on.
+
+ Database entry version information
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ A persistent (stable) and unique identifier, typically identifying an object (entry) from a database.
+
+
+
+ Accession
+ http://semanticscience.org/resource/SIO_000675
+ http://semanticscience.org/resource/SIO_000731
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ single nucleotide polymorphism (SNP) in a DNA sequence.
+
+
+ SNP
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Reference to a dataset (or a cross-reference between two datasets), typically one or more entries in a biological database or ontology.
+
+
+ A list of database accessions or identifiers are usually included.
+ Data reference
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An identifier of a submitted job.
+
+
+
+ Job identifier
+ http://wsio.org/data_009
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+
+ A name of a thing, which need not necessarily uniquely identify it.
+ Symbolic name
+
+
+
+ Name
+ "http://www.w3.org/2000/01/rdf-schema#label
+ http://semanticscience.org/resource/SIO_000116
+ http://usefulinc.com/ns/doap#name
+
+
+
+
+
+ Closely related, but focusing on labeling and human readability but not on identification.
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A label (text token) describing the type of a thing, typically an enumerated string (a string with one of a limited set of values).
+
+ Type
+ http://purl.org/dc/elements/1.1/type
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Authentication data usually used to log in into an account on an information system such as a web application or a database.
+
+
+
+ Account authentication
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A three-letter code used in the KEGG databases to uniquely identify organisms.
+
+
+
+ KEGG organism code
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Name of an entry (gene) from the KEGG GENES database.
+
+ Gene name (KEGG GENES)
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an object from one of the BioCyc databases.
+
+
+
+ BioCyc ID
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a compound from the BioCyc chemical compounds database.
+ BioCyc compound ID
+ BioCyc compound identifier
+
+
+
+ Compound ID (BioCyc)
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a biological reaction from the BioCyc reactions database.
+
+
+
+ Reaction ID (BioCyc)
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an enzyme from the BioCyc enzymes database.
+ BioCyc enzyme ID
+
+
+
+ Enzyme ID (BioCyc)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a biological reaction from a database.
+
+
+
+ Reaction ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An identifier that is re-used for data objects of fundamentally different types (typically served from a single database).
+
+
+
+ This branch provides an alternative organisation of the concepts nested under 'Accession' and 'Name'. All concepts under here are already included under 'Accession' or 'Name'.
+ Identifier (hybrid)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a molecular property.
+
+
+
+ Molecular property identifier
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a codon usage table, for example a genetic code.
+ Codon usage table identifier
+
+
+
+ Codon usage table ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Primary identifier of an object from the FlyBase database.
+
+
+
+ FlyBase primary identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an object from the WormBase database.
+
+
+
+ WormBase identifier
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ CE[0-9]{5}
+ Protein identifier used by WormBase database.
+
+
+
+ WormBase wormpep ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An informative report on a trinucleotide sequence that encodes an amino acid including the triplet sequence, the encoded amino acid or whether it is a start or stop codon.
+
+ Nucleic acid features (codon)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An identifier of a map of a molecular sequence.
+
+
+
+ Map identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An identifier of a software end-user on a website or a database (typically a person or an entity).
+
+
+
+ Person identifier
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Name or other identifier of a nucleic acid molecule.
+
+
+
+ Nucleic acid identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.20
+
+
+ Frame for translation of DNA (3 forward and 3 reverse frames relative to a chromosome).
+
+ Translation frame specification
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An identifier of a genetic code.
+
+
+
+ Genetic code identifier
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Informal name for a genetic code, typically an organism name.
+
+
+
+ Genetic code name
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Name of a file format such as HTML, PNG, PDF, EMBL, GenBank and so on.
+
+
+
+ File format name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A label (text token) describing a type of sequence profile such as frequency matrix, Gribskov profile, hidden Markov model etc.
+
+ Sequence profile type
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Name of a computer operating system such as Linux, PC or Mac.
+
+
+
+ Operating system name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A type of point or block mutation, including insertion, deletion, change, duplication and moves.
+
+ Mutation type
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A logical operator such as OR, AND, XOR, and NOT.
+
+
+
+ Logical operator
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A control of the order of data that is output, for example the order of sequences in an alignment.
+
+ Possible options including sorting by score, rank, by increasing P-value (probability, i.e. most statistically significant hits given first) and so on.
+ Results sort order
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A simple parameter that is a toggle (boolean value), typically a control for a modal tool.
+
+ Toggle
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The width of an output sequence or alignment.
+
+ Sequence width
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A penalty for introducing or extending a gap in an alignment.
+
+
+ Gap penalty
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A temperature concerning nucleic acid denaturation, typically the temperature at which the two strands of a hybridised or double stranded nucleic acid (DNA or RNA/DNA) molecule separate.
+ Melting temperature
+
+
+ Nucleic acid melting temperature
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The concentration of a chemical compound.
+
+
+ Concentration
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Size of the incremental 'step' a sequence window is moved over a sequence.
+
+ Window step size
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An image of a graph generated by the EMBOSS suite.
+
+ EMBOSS graph
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An application report generated by the EMBOSS suite.
+
+ EMBOSS report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ An offset for a single-point sequence position.
+
+ Sequence offset
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A value that serves as a threshold for a tool (usually to control scoring or output).
+
+ Threshold
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ An informative report on a transcription factor protein.
+
+ This might include conformational or physicochemical properties, as well as sequence information for transcription factor(s) binding sites.
+ Protein report (transcription factor)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The name of a category of biological or bioinformatics database.
+
+ Database category name
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Name of a sequence profile.
+
+ Sequence profile name
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Specification of one or more colors.
+
+ Color
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A parameter that is used to control rendering (drawing) to a device or image.
+
+ Rendering parameter
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Any arbitrary name of a molecular sequence.
+
+
+
+ Sequence name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A temporal date.
+
+ Date
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+
+ Word composition data for a molecular sequence.
+
+ Word composition
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A plot of Fickett testcode statistic (identifying protein coding regions) in a nucleotide sequences.
+
+
+ Fickett testcode plot
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A plot of sequence similarities identified from word-matching or character comparison.
+ Sequence conservation report
+
+
+ Use this concept for calculated substitution rates, relative site variability, data on sites with biased properties, highly conserved or very poorly conserved sites, regions, blocks etc.
+ Sequence similarity plot
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An image of peptide sequence sequence looking down the axis of the helix for highlighting amphipathicity and other properties.
+
+
+ Helical wheel
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An image of peptide sequence sequence in a simple 3,4,3,4 repeating pattern that emulates at a simple level the arrangement of residues around an alpha helix.
+
+
+ Useful for highlighting amphipathicity and other properties.
+ Helical net
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A plot of general physicochemical properties of a protein sequence.
+
+ Protein sequence properties plot
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A plot of pK versus pH for a protein.
+
+
+ Protein ionisation curve
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A plot of character or word composition / frequency of a molecular sequence.
+
+
+ Sequence composition plot
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Density plot (of base composition) for a nucleotide sequence.
+
+
+ Nucleic acid density plot
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Image of a sequence trace (nucleotide sequence versus probabilities of each of the 4 bases).
+
+
+ Sequence trace image
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A report on siRNA duplexes in mRNA.
+
+ Nucleic acid features (siRNA)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A collection of multiple molecular sequences and (typically) associated metadata that is intended for sequential processing.
+
+ This concept may be used for sequence sets that are expected to be read and processed a single sequence at a time.
+ Sequence set (stream)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Secondary identifier of an object from the FlyBase database.
+
+
+
+ Secondary identifier are used to handle entries that were merged with or split from other entries in the database.
+ FlyBase secondary identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The number of a certain thing.
+
+ Cardinality
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A single thing.
+
+ Exactly 1
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ One or more things.
+
+ 1 or more
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Exactly two things.
+
+ Exactly 2
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Two or more things.
+
+ 2 or more
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A fixed-size datum calculated (by using a hash function) for a molecular sequence, typically for purposes of error detection or indexing.
+ Hash
+ Hash code
+ Hash sum
+ Hash value
+
+
+ Sequence checksum
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ chemical modification of a protein.
+
+
+ Protein features report (chemical modifications)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Data on an error generated by computer system or tool.
+
+ Error
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Basic information on any arbitrary database entry.
+
+
+ Database entry metadata
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ A cluster of similar genes.
+
+ Gene cluster
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ A molecular sequence and comprehensive metadata (such as a feature table), typically corresponding to a full entry from a molecular sequence database.
+
+
+ Sequence record full
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An identifier of a plasmid in a database.
+
+
+
+ Plasmid identifier
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ A unique identifier of a specific mutation catalogued in a database.
+
+
+
+ Mutation ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Information describing the mutation itself, the organ site, tissue and type of lesion where the mutation has been identified, description of the patient origin and life-style.
+
+ Mutation annotation (basic)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An informative report on the prevalence of mutation(s), including data on samples and mutation prevalence (e.g. by tumour type)..
+
+ Mutation annotation (prevalence)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An informative report on mutation prognostic data, such as information on patient cohort, the study settings and the results of the study.
+
+ Mutation annotation (prognostic)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An informative report on the functional properties of mutant proteins including transcriptional activities, promotion of cell growth and tumorigenicity, dominant negative effects, capacity to induce apoptosis, cell-cycle arrest or checkpoints in human cells and so on.
+
+ Mutation annotation (functional)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The number of a codon, for instance, at which a mutation is located.
+
+
+ Codon number
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ An informative report on a specific tumor including nature and origin of the sample, anatomic site, organ or tissue, tumor type, including morphology and/or histologic type, and so on.
+
+ Tumor annotation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Basic information about a server on the web, such as an SRS server.
+
+ Server metadata
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a field in a database.
+
+
+
+ Database field name
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier of a sequence cluster from the SYSTERS database.
+ SYSTERS cluster ID
+
+
+
+ Sequence cluster ID (SYSTERS)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data concerning a biological ontology.
+
+
+ Ontology metadata
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Raw SCOP domain classification data files.
+
+ These are the parsable data files provided by SCOP.
+ Raw SCOP domain classification
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Raw CATH domain classification data files.
+
+ These are the parsable data files provided by CATH.
+ Raw CATH domain classification
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ An informative report on the types of small molecules or 'heterogens' (non-protein groups) that are represented in PDB files.
+
+ Heterogen annotation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Phylogenetic property values data.
+
+ Phylogenetic property values
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A collection of sequences output from a bootstrapping (resampling) procedure.
+
+ Bootstrapping is often performed in phylogenetic analysis.
+ Sequence set (bootstrapped)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A consensus phylogenetic tree derived from comparison of multiple trees.
+
+ Phylogenetic consensus tree
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A data schema for organising or transforming data of some type.
+
+ Schema
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A DTD (document type definition).
+
+ DTD
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ An XML Schema.
+
+ XML Schema
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A relax-NG schema.
+
+ Relax-NG schema
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ An XSLT stylesheet.
+
+ XSLT stylesheet
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a data type.
+
+
+
+ Data resource definition name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Name of an OBO file format such as OBO-XML, plain and so on.
+
+
+
+ OBO file format name
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier for genetic elements in MIPS database.
+ MIPS genetic element identifier
+
+
+
+ Gene ID (MIPS)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An identifier of protein sequence(s) or protein sequence database entries.
+
+ Sequence identifier (protein)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An identifier of nucleotide sequence(s) or nucleotide sequence database entries.
+
+ Sequence identifier (nucleic acid)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An accession number of an entry from the EMBL sequence database.
+ EMBL ID
+ EMBL accession number
+ EMBL identifier
+
+
+
+ EMBL accession
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An identifier of a polypeptide in the UniProt database.
+ UniProt entry name
+ UniProt identifier
+ UniProtKB entry name
+ UniProtKB identifier
+
+
+
+ UniProt ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Accession number of an entry from the GenBank sequence database.
+ GenBank ID
+ GenBank accession number
+ GenBank identifier
+
+
+
+ GenBank accession
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Secondary (internal) identifier of a Gramene database entry.
+ Gramene internal ID
+ Gramene internal identifier
+ Gramene secondary ID
+
+
+
+ Gramene secondary identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An identifier of an entry from a database of molecular sequence variation.
+
+
+
+ Sequence variation ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ A unique (and typically persistent) identifier of a gene in a database, that is (typically) different to the gene name/symbol.
+ Gene accession
+ Gene code
+
+
+
+ Gene ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Name of an entry (gene) from the AceView genes database.
+
+ Gene name (AceView)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ http://www.geneontology.org/doc/GO.xrf_abbs: ECK
+ Identifier of an E. coli K-12 gene from EcoGene Database.
+ E. coli K-12 gene identifier
+ ECK accession
+
+
+
+ Gene ID (ECK)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier for a gene approved by the HUGO Gene Nomenclature Committee.
+ HGNC ID
+
+
+
+ Gene ID (HGNC)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a gene, (typically) assigned by a person and/or according to a naming scheme. It may contain white space characters and is typically more intuitive and readable than a gene symbol. It (typically) may be used to identify similar genes in different species and to derive a gene symbol.
+ Allele name
+
+
+
+ Gene name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Name of an entry (gene) from the NCBI genes database.
+
+ Gene name (NCBI)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A specification of a chemical structure in SMILES format.
+
+
+ SMILES string
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier of an entry from the STRING database of protein-protein interactions.
+
+
+
+ STRING ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ An informative report on a specific virus.
+
+ Virus annotation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ An informative report on the taxonomy of a specific virus.
+
+ Virus annotation (taxonomy)
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]+
+ Identifier of a biological reaction from the SABIO-RK reactions database.
+
+
+
+ Reaction ID (SABIO-RK)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A human-readable collection of information about one or more specific carbohydrate 3D structure(s).
+
+
+ Carbohydrate report
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A series of digits that are assigned consecutively to each sequence record processed by NCBI. The GI number bears no resemblance to the Accession number of the sequence record.
+ NCBI GI number
+
+
+
+ Nucleotide sequence GI number is shown in the VERSION field of the database record. Protein sequence GI number is shown in the CDS/db_xref field of a nucleotide database record, and the VERSION field of a protein database record.
+ GI number
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An identifier assigned to sequence records processed by NCBI, made of the accession number of the database record followed by a dot and a version number.
+ NCBI accession.version
+ accession.version
+
+
+
+ Nucleotide sequence version contains two letters followed by six digits, a dot, and a version number (or for older nucleotide sequence records, the format is one letter followed by five digits, a dot, and a version number). Protein sequence version contains three letters followed by five digits, a dot, and a version number.
+ NCBI version
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a cell line.
+
+
+
+ Cell line name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The exact name of a cell line.
+
+
+
+ Cell line name (exact)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The truncated name of a cell line.
+
+
+
+ Cell line name (truncated)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a cell line without any punctuation.
+
+
+
+ Cell line name (no punctuation)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The assonant name of a cell line.
+
+
+
+ Cell line name (assonant)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ A unique, persistent identifier of an enzyme.
+ Enzyme accession
+
+
+
+ Enzyme ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an enzyme from the REBASE enzymes database.
+
+
+
+ REBASE enzyme number
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ DB[0-9]{5}
+ Unique identifier of a drug from the DrugBank database.
+
+
+
+ DrugBank ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A unique identifier assigned to NCBI protein sequence records.
+ protein gi
+ protein gi number
+
+
+
+ Nucleotide sequence GI number is shown in the VERSION field of the database record. Protein sequence GI number is shown in the CDS/db_xref field of a nucleotide database record, and the VERSION field of a protein database record.
+ GI number (protein)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A score derived from the alignment of two sequences, which is then normalised with respect to the scoring system.
+
+
+ Bit scores are normalised with respect to the scoring system and therefore can be used to compare alignment scores from different searches.
+ Bit score
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.20
+
+
+ Phase for translation of DNA (0, 1 or 2) relative to a fragment of the coding sequence.
+
+ Translation phase specification
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data concerning or describing some core computational resource, as distinct from primary data. This includes metadata on the origin, source, history, ownership or location of some thing.
+ Provenance metadata
+
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Resource metadata
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Any arbitrary identifier of an ontology.
+
+
+
+ Ontology identifier
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a concept in an ontology.
+
+
+
+ Ontology concept name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An identifier of a build of a particular genome.
+
+
+
+ Genome build identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a biological pathway or network.
+
+
+
+ Pathway or network name
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [a-zA-Z_0-9]{2,3}[0-9]{5}
+ Identifier of a pathway from the KEGG pathway database.
+ KEGG pathway ID
+
+
+
+ Pathway ID (KEGG)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [a-zA-Z_0-9]+
+ Identifier of a pathway from the NCI-Nature pathway database.
+
+
+
+ Pathway ID (NCI-Nature)
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a pathway from the ConsensusPathDB pathway database.
+
+
+
+ Pathway ID (ConsensusPathDB)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier of an entry from the UniRef database.
+ UniRef cluster id
+ UniRef entry accession
+
+
+
+ Sequence cluster ID (UniRef)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier of an entry from the UniRef100 database.
+ UniRef100 cluster id
+ UniRef100 entry accession
+
+
+
+ Sequence cluster ID (UniRef100)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier of an entry from the UniRef90 database.
+ UniRef90 cluster id
+ UniRef90 entry accession
+
+
+
+ Sequence cluster ID (UniRef90)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier of an entry from the UniRef50 database.
+ UniRef50 cluster id
+ UniRef50 entry accession
+
+
+
+ Sequence cluster ID (UniRef50)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data concerning or derived from an ontology.
+ Ontological data
+
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Ontology data
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A human-readable collection of information about a specific RNA family or other group of classified RNA sequences.
+ RNA family annotation
+
+
+ RNA family report
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of an RNA family, typically an entry from a RNA sequence classification database.
+
+
+
+ RNA family identifier
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Stable accession number of an entry (RNA family) from the RFAM database.
+
+
+
+ RFAM accession
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A label (text token) describing a type of protein family signature (sequence classifier) from the InterPro database.
+
+ Protein signature type
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ An informative report on protein domain-DNA/RNA interaction(s).
+
+ Domain-nucleic acid interaction report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+
+ An informative report on protein domain-protein domain interaction(s).
+
+ Domain-domain interactions
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Data on indirect protein domain-protein domain interaction(s).
+
+ Domain-domain interaction (indirect)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Accession number of a nucleotide or protein sequence database entry.
+
+
+
+ Sequence accession (hybrid)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+ Data concerning two-dimensional polygel electrophoresis.
+
+
+ This is a broad data type and is used a placeholder for other, more specific types. It is primarily intended to help navigation of EDAM and would not typically be used for annotation.
+ 2D PAGE data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ two-dimensional gel electrophoresis experiments, gels or spots in a gel.
+
+
+ 2D PAGE report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ A persistent, unique identifier of a biological pathway or network (typically a database entry).
+
+
+
+ Pathway or network accession
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alignment of the (1D representations of) secondary structure of two or more molecules.
+
+
+ Secondary structure alignment
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an object from the ASTD database.
+
+
+
+ ASTD ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an exon from the ASTD database.
+
+
+
+ ASTD ID (exon)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an intron from the ASTD database.
+
+
+
+ ASTD ID (intron)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a polyA signal from the ASTD database.
+
+
+
+ ASTD ID (polya)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a transcription start site from the ASTD database.
+
+
+
+ ASTD ID (tss)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ An informative report on individual spot(s) from a two-dimensional (2D PAGE) gel.
+
+
+ 2D PAGE spot report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Unique identifier of a spot from a two-dimensional (protein) gel.
+
+
+
+ Spot ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier of a spot from a two-dimensional (protein) gel in the SWISS-2DPAGE database.
+
+
+
+ Spot serial number
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier of a spot from a two-dimensional (protein) gel from a HSC-2DPAGE database.
+
+
+
+ Spot ID (HSC-2DPAGE)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Data on the interaction of a protein (or protein domain) with specific structural (3D) and/or sequence motifs.
+
+ Protein-motif interaction
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a strain of an organism variant, typically a plant, virus or bacterium.
+
+
+
+ Strain identifier
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A unique identifier of an item from the CABRI database.
+
+
+
+ CABRI accession
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Report of genotype experiment including case control, population, and family studies. These might use array based methods and re-sequencing methods.
+
+
+ Experiment report (genotyping)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of an entry from a database of genotype experiment metadata.
+
+
+
+ Genotype experiment ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the EGA database.
+
+
+
+ EGA accession
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ IPI[0-9]{8}
+ Identifier of a protein entry catalogued in the International Protein Index (IPI) database.
+
+
+
+ IPI protein ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Accession number of a protein from the RefSeq database.
+ RefSeq protein ID
+
+
+
+ RefSeq accession (protein)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry (promoter) from the EPD database.
+ EPD identifier
+
+
+
+ EPD ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the TAIR database.
+
+
+
+ TAIR accession
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an Arabidopsis thaliana gene from the TAIR database.
+
+
+
+ TAIR accession (At gene)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the UniSTS database.
+
+
+
+ UniSTS accession
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the UNITE database.
+
+
+
+ UNITE accession
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the UTR database.
+
+
+
+ UTR accession
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ UPI[A-F0-9]{10}
+ Accession number of a UniParc (protein sequence) database entry.
+ UPI
+ UniParc ID
+
+
+
+ UniParc accession
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the Rouge or HUGE databases.
+
+
+
+ mFLJ/mKIAA number
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ An informative report on a specific fungus.
+
+ Fungi annotation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ An informative report on a specific fungus anamorph.
+
+ Fungi annotation (anamorph)
+ true
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier for a protein from the Ensembl database.
+ Ensembl ID (protein)
+ Protein ID (Ensembl)
+
+
+
+ Ensembl protein ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ An informative report on a specific toxin.
+
+ Toxin annotation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An informative report on a membrane protein.
+
+ Protein report (membrane protein)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ An informative report on tentative or known protein-drug interaction(s).
+
+
+ Protein-drug interaction report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+
+ Data concerning a map of molecular sequence(s).
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Map data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data concerning phylogeny, typically of molecular sequences, including reports of information concerning or derived from a phylogenetic tree, or from comparing two or more phylogenetic trees.
+
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Phylogenetic data
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Data concerning one or more protein molecules.
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Protein data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Data concerning one or more nucleic acid molecules.
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Nucleic acid data
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data concerning, extracted from, or derived from the analysis of a scientific text (or texts) such as a full text article from a scientific journal.
+ Article data
+ Scientific text data
+
+
+ This is a broad data type and is used a placeholder for other, more specific types. It is primarily intended to help navigation of EDAM and would not typically be used for annotation. It includes concepts that are best described as scientific text or closely concerned with or derived from text.
+ Text data
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.16
+
+
+ Typically a simple numerical or string value that controls the operation of a tool.
+
+ Parameter
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Data concerning a specific type of molecule.
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Molecular data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+
+ An informative report on a specific molecule.
+
+ Molecule report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A human-readable collection of information about a specific organism.
+ Organism annotation
+
+
+ Organism report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A human-readable collection of information about about how a scientific experiment or analysis was carried out that results in a specific set of data or results used for further analysis or to test a specific hypothesis.
+ Experiment annotation
+ Experiment metadata
+ Experiment report
+
+
+ Protocol
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An attribute of a molecular sequence, possibly in reference to some other sequence.
+ Sequence parameter
+
+
+ Sequence attribute
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Output from a serial analysis of gene expression (SAGE), massively parallel signature sequencing (MPSS) or sequencing by synthesis (SBS) experiment. In all cases this is a list of short sequence tags and the number of times it is observed.
+ Sequencing-based expression profile
+ Sequence tag profile (with gene assignment)
+
+
+ SAGE, MPSS and SBS experiments are usually performed to study gene expression. The sequence tags are typically subsequently annotated (after a database search) with the mRNA (and therefore gene) the tag was extracted from.
+ This includes tag to gene assignments (tag mapping) of SAGE, MPSS and SBS data. Typically this is the sequencing-based expression profile annotated with gene identifiers.
+ Sequence tag profile
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data concerning a mass spectrometry measurement.
+
+
+ Mass spectrometry data
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Raw data from experimental methods for determining protein structure.
+
+
+ This is a broad data type and is used a placeholder for other, more specific types. It is primarily intended to help navigation of EDAM and would not typically be used for annotation.
+ Protein structure raw data
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An identifier of a mutation.
+
+
+
+ Mutation identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+
+ Data concerning an alignment of two or more molecular sequences, structures or derived data.
+
+ This is a broad data type and is used a placeholder for other, more specific types. This includes entities derived from sequences and structures such as motifs and profiles.
+ Alignment data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Data concerning an index of data.
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Data index data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Single letter amino acid identifier, e.g. G.
+
+
+
+ Amino acid name (single letter)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Three letter amino acid identifier, e.g. GLY.
+
+
+
+ Amino acid name (three letter)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Full name of an amino acid, e.g. Glycine.
+
+
+
+ Amino acid name (full name)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a toxin.
+
+
+
+ Toxin identifier
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier of a toxin from the ArachnoServer database.
+
+
+
+ ArachnoServer ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A simple summary of expressed genes.
+
+ Expressed gene list
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier of a monomer from the BindingDB database.
+
+
+
+ BindingDB Monomer ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The name of a concept from the GO ontology.
+
+ GO concept name
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]{7}|GO:[0-9]{7}
+ An identifier of a 'biological process' concept from the the Gene Ontology.
+
+
+
+ GO concept ID (biological process)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]{7}|GO:[0-9]{7}
+ An identifier of a 'molecular function' concept from the the Gene Ontology.
+
+
+
+ GO concept ID (molecular function)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The name of a concept for a cellular component from the GO ontology.
+
+ GO concept name (cellular component)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An image arising from a Northern Blot experiment.
+
+
+ Northern blot image
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Unique identifier of a blot from a Northern Blot.
+
+
+
+ Blot ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier of a blot from a Northern Blot from the BlotBase database.
+
+
+
+ BlotBase blot ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Raw data on a biological hierarchy, describing the hierarchy proper, hierarchy components and possibly associated annotation.
+ Hierarchy annotation
+
+
+ Hierarchy
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry from a database of biological hierarchies.
+
+ Hierarchy identifier
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the Brite database of biological hierarchies.
+
+
+
+ Brite hierarchy ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A type (represented as a string) of cancer.
+
+ Cancer type
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A unique identifier for an organism used in the BRENDA database.
+
+
+
+ BRENDA organism ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a taxon using the controlled vocabulary of the UniGene database.
+ UniGene organism abbreviation
+
+
+
+ UniGene taxon
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a taxon using the controlled vocabulary of the UTRdb database.
+
+
+
+ UTRdb taxon
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An identifier of a catalogue of biological resources.
+ Catalogue identifier
+
+
+
+ Catalogue ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a catalogue of biological resources from the CABRI database.
+
+
+
+ CABRI catalogue name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An informative report on protein secondary structure alignment-derived data or metadata.
+
+ Secondary structure alignment metadata
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Was deprecated since 1.5, but not correctly (fully) obsoleted until 1.19.
+ 1.5
+
+
+ An informative report on the physical, chemical or other information concerning the interaction of two or more molecules (or parts of molecules).
+
+ Molecule interaction report
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Primary data about a specific biological pathway or network (the nodes and connections within the pathway or network).
+ Network
+ Pathway
+
+
+ Pathway or network
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Data concerning one or more small molecules.
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Small molecule data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Data concerning a particular genotype, phenotype or a genotype / phenotype relation.
+
+ Genotype and phenotype data
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Image, hybridisation or some other data arising from a study of feature/molecule expression, typically profiling or quantification.
+ Gene expression data
+ Gene product profile
+ Gene product quantification data
+ Gene transcription profile
+ Gene transcription quantification data
+ Metabolite expression data
+ Microarray data
+ Non-coding RNA profile
+ Non-coding RNA quantification data
+ Protein expression data
+ RNA profile
+ RNA quantification data
+ RNA-seq data
+ Transcriptome profile
+ Transcriptome quantification data
+ mRNA profile
+ mRNA quantification data
+ Protein profile
+ Protein quantification data
+ Proteome profile
+ Proteome quantification data
+
+
+ Expression data
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ C[0-9]+
+ Unique identifier of a chemical compound from the KEGG database.
+ KEGG compound ID
+ KEGG compound identifier
+
+
+
+ Compound ID (KEGG)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Name (not necessarily stable) an entry (RNA family) from the RFAM database.
+
+
+
+ RFAM name
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ R[0-9]+
+ Identifier of a biological reaction from the KEGG reactions database.
+
+
+
+ Reaction ID (KEGG)
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ D[0-9]+
+ Unique identifier of a drug from the KEGG Drug database.
+
+
+
+ Drug ID (KEGG)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ ENS[A-Z]*[FPTG][0-9]{11}
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl database.
+ Ensembl IDs
+
+
+
+ Ensembl ID
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [A-Z][0-9]+(\.[-[0-9]+])?
+ An identifier of a disease from the International Classification of Diseases (ICD) database.
+
+
+
+ ICD identifier
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9A-Za-z]+:[0-9]+:[0-9]{1,5}(\.[0-9])?
+ Unique identifier of a sequence cluster from the CluSTr database.
+ CluSTr ID
+ CluSTr cluster ID
+
+
+
+ Sequence cluster ID (CluSTr)
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ G[0-9]+
+ Unique identifier of a glycan ligand from the KEGG GLYCAN database (a subset of KEGG LIGAND).
+
+
+
+ KEGG Glycan ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]+\.[A-Z]\.[0-9]+\.[0-9]+\.[0-9]+
+ A unique identifier of a family from the transport classification database (TCDB) of membrane transport proteins.
+ TC number
+
+
+
+ OBO file for regular expression.
+ TCDB ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ MINT\-[0-9]{1,5}
+ Unique identifier of an entry from the MINT database of protein-protein interactions.
+
+
+
+ MINT ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ DIP[\:\-][0-9]{3}[EN]
+ Unique identifier of an entry from the DIP database of protein-protein interactions.
+
+
+
+ DIP ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A[0-9]{6}
+ Unique identifier of a protein listed in the UCSD-Nature Signaling Gateway Molecule Pages database.
+
+
+
+ Signaling Gateway protein ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a protein modification catalogued in a database.
+
+
+
+ Protein modification ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ AA[0-9]{4}
+ Identifier of a protein modification catalogued in the RESID database.
+
+
+
+ RESID ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]{4,7}
+ Identifier of an entry from the RGD database.
+
+
+
+ RGD ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ AASequence:[0-9]{10}
+ Identifier of a protein sequence from the TAIR database.
+
+
+
+ TAIR accession (protein)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ HMDB[0-9]{5}
+ Identifier of a small molecule metabolite from the Human Metabolome Database (HMDB).
+ HMDB ID
+
+
+
+ Compound ID (HMDB)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ LM(FA|GL|GP|SP|ST|PR|SL|PK)[0-9]{4}([0-9a-zA-Z]{4})?
+ Identifier of an entry from the LIPID MAPS database.
+ LM ID
+
+
+
+ LIPID MAPS ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PAp[0-9]{8}
+ PDBML:pdbx_PDB_strand_id
+ Identifier of a peptide from the PeptideAtlas peptide databases.
+
+
+
+ PeptideAtlas ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.7
+
+ Identifier of a report of molecular interactions from a database (typically).
+
+
+ Molecular interaction ID
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]+
+ A unique identifier of an interaction from the BioGRID database.
+
+
+
+ BioGRID interaction ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ S[0-9]{2}\.[0-9]{3}
+ Unique identifier of a peptidase enzyme from the MEROPS database.
+ MEROPS ID
+
+
+
+ Enzyme ID (MEROPS)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An identifier of a mobile genetic element.
+
+
+
+ Mobile genetic element ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ mge:[0-9]+
+ An identifier of a mobile genetic element from the Aclame database.
+
+
+
+ ACLAME ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PWY[a-zA-Z_0-9]{2}\-[0-9]{3}
+ Identifier of an entry from the Saccharomyces genome database (SGD).
+
+
+
+ SGD ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Unique identifier of a book.
+
+
+
+ Book ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ (ISBN)?(-13|-10)?[:]?[ ]?([0-9]{2,3}[ -]?)?[0-9]{1,5}[ -]?[0-9]{1,7}[ -]?[0-9]{1,6}[ -]?([0-9]|X)
+ The International Standard Book Number (ISBN) is for identifying printed books.
+
+
+
+ ISBN
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ B[0-9]{5}
+ Identifier of a metabolite from the 3DMET database.
+ 3DMET ID
+
+
+
+ Compound ID (3DMET)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ ([A-NR-Z][0-9][A-Z][A-Z0-9][A-Z0-9][0-9])_.*|([OPQ][0-9][A-Z0-9][A-Z0-9][A-Z0-9][0-9]_.*)|(GAG_.*)|(MULT_.*)|(PFRAG_.*)|(LIP_.*)|(CAT_.*)
+ A unique identifier of an interaction from the MatrixDB database.
+
+
+
+ MatrixDB interaction ID
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]+
+ A unique identifier for pathways, reactions, complexes and small molecules from the cPath (Pathway Commons) database.
+
+
+
+ These identifiers are unique within the cPath database, however, they are not stable between releases.
+ cPath ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ [0-9]+
+ Identifier of an assay from the PubChem database.
+
+
+
+ PubChem bioassay ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the PubChem database.
+ PubChem identifier
+
+
+
+ PubChem ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ M[0-9]{4}
+ Identifier of an enzyme reaction mechanism from the MACie database.
+ MACie entry number
+
+
+
+ Reaction ID (MACie)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ MI[0-9]{7}
+ Identifier for a gene from the miRBase database.
+ miRNA ID
+ miRNA identifier
+ miRNA name
+
+
+
+ Gene ID (miRBase)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ ZDB\-GENE\-[0-9]+\-[0-9]+
+ Identifier for a gene from the Zebrafish information network genome (ZFIN) database.
+
+
+
+ Gene ID (ZFIN)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]{5}
+ Identifier of an enzyme-catalysed reaction from the Rhea database.
+
+
+
+ Reaction ID (Rhea)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ UPA[0-9]{5}
+ Identifier of a biological pathway from the Unipathway database.
+ upaid
+
+
+
+ Pathway ID (Unipathway)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]+
+ Identifier of a small molecular from the ChEMBL database.
+ ChEMBL ID
+
+
+
+ Compound ID (ChEMBL)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [a-zA-Z_0-9]+
+ Unique identifier of an entry from the Ligand-gated ion channel (LGICdb) database.
+
+
+
+ LGICdb identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]+
+ Identifier of a biological reaction (kinetics entry) from the SABIO-RK reactions database.
+
+
+
+ Reaction kinetics ID (SABIO-RK)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PA[0-9]+
+ Identifier of an entry from the pharmacogenetics and pharmacogenomics knowledge base (PharmGKB).
+
+
+
+ PharmGKB ID
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PA[0-9]+
+ Identifier of a pathway from the pharmacogenetics and pharmacogenomics knowledge base (PharmGKB).
+
+
+
+ Pathway ID (PharmGKB)
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PA[0-9]+
+ Identifier of a disease from the pharmacogenetics and pharmacogenomics knowledge base (PharmGKB).
+
+
+
+ Disease ID (PharmGKB)
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PA[0-9]+
+ Identifier of a drug from the pharmacogenetics and pharmacogenomics knowledge base (PharmGKB).
+
+
+
+ Drug ID (PharmGKB)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ DAP[0-9]+
+ Identifier of a drug from the Therapeutic Target Database (TTD).
+
+
+
+ Drug ID (TTD)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ TTDS[0-9]+
+ Identifier of a target protein from the Therapeutic Target Database (TTD).
+
+
+
+ Target ID (TTD)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ A unique identifier of a type or group of cells.
+
+
+
+ Cell type identifier
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]+
+ A unique identifier of a neuron from the NeuronDB database.
+
+
+
+ NeuronDB ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [a-zA-Z_0-9]+
+ A unique identifier of a neuron from the NeuroMorpho database.
+
+
+
+ NeuroMorpho ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]+
+ Identifier of a chemical from the ChemIDplus database.
+ ChemIDplus ID
+
+
+
+ Compound ID (ChemIDplus)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ SMP[0-9]{5}
+ Identifier of a pathway from the Small Molecule Pathway Database (SMPDB).
+
+
+
+ Pathway ID (SMPDB)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]+
+ Identifier of an entry from the BioNumbers database of key numbers and associated data in molecular biology.
+
+
+
+ BioNumbers ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ T3D[0-9]+
+ Unique identifier of a toxin from the Toxin and Toxin Target Database (T3DB) database.
+
+
+
+ T3DB ID
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a carbohydrate.
+
+
+
+ Carbohydrate identifier
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]+
+ Identifier of an entry from the GlycomeDB database.
+
+
+
+ GlycomeDB ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [a-zA-Z_0-9]+[0-9]+
+ Identifier of an entry from the LipidBank database.
+
+
+
+ LipidBank ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ cd[0-9]{5}
+ Identifier of a conserved domain from the Conserved Domain Database.
+
+
+
+ CDD ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]{1,5}
+ An identifier of an entry from the MMDB database.
+ MMDB accession
+
+
+
+ MMDB ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]+
+ Unique identifier of an entry from the iRefIndex database of protein-protein interactions.
+
+
+
+ iRefIndex ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]+
+ Unique identifier of an entry from the ModelDB database.
+
+
+
+ ModelDB ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]+
+ Identifier of a signaling pathway from the Database of Quantitative Cellular Signaling (DQCS).
+
+
+
+ Pathway ID (DQCS)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database (Homo sapiens division).
+
+ Ensembl ID (Homo sapiens)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Bos taurus' division).
+
+ Ensembl ID ('Bos taurus')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Canis familiaris' division).
+
+ Ensembl ID ('Canis familiaris')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Cavia porcellus' division).
+
+ Ensembl ID ('Cavia porcellus')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Ciona intestinalis' division).
+
+ Ensembl ID ('Ciona intestinalis')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Ciona savignyi' division).
+
+ Ensembl ID ('Ciona savignyi')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Danio rerio' division).
+
+ Ensembl ID ('Danio rerio')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Dasypus novemcinctus' division).
+
+ Ensembl ID ('Dasypus novemcinctus')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Echinops telfairi' division).
+
+ Ensembl ID ('Echinops telfairi')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Erinaceus europaeus' division).
+
+ Ensembl ID ('Erinaceus europaeus')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Felis catus' division).
+
+ Ensembl ID ('Felis catus')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Gallus gallus' division).
+
+ Ensembl ID ('Gallus gallus')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Gasterosteus aculeatus' division).
+
+ Ensembl ID ('Gasterosteus aculeatus')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Homo sapiens' division).
+
+ Ensembl ID ('Homo sapiens')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Loxodonta africana' division).
+
+ Ensembl ID ('Loxodonta africana')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Macaca mulatta' division).
+
+ Ensembl ID ('Macaca mulatta')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Monodelphis domestica' division).
+
+ Ensembl ID ('Monodelphis domestica')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Mus musculus' division).
+
+ Ensembl ID ('Mus musculus')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Myotis lucifugus' division).
+
+ Ensembl ID ('Myotis lucifugus')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Ornithorhynchus anatinus' division).
+
+ Ensembl ID ("Ornithorhynchus anatinus")
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Oryctolagus cuniculus' division).
+
+ Ensembl ID ('Oryctolagus cuniculus')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Oryzias latipes' division).
+
+ Ensembl ID ('Oryzias latipes')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Otolemur garnettii' division).
+
+ Ensembl ID ('Otolemur garnettii')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Pan troglodytes' division).
+
+ Ensembl ID ('Pan troglodytes')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Rattus norvegicus' division).
+
+ Ensembl ID ('Rattus norvegicus')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Spermophilus tridecemlineatus' division).
+
+ Ensembl ID ('Spermophilus tridecemlineatus')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Takifugu rubripes' division).
+
+ Ensembl ID ('Takifugu rubripes')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Tupaia belangeri' division).
+
+ Ensembl ID ('Tupaia belangeri')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Xenopus tropicalis' division).
+
+ Ensembl ID ('Xenopus tropicalis')
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a protein domain (or other node) from the CATH database.
+
+
+
+ CATH identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 2.10.10.10
+ A code number identifying a family from the CATH database.
+
+
+
+ CATH node ID (family)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an enzyme from the CAZy enzymes database.
+ CAZy ID
+
+
+
+ Enzyme ID (CAZy)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A unique identifier assigned by the I.M.A.G.E. consortium to a clone (cloned molecular sequence).
+ I.M.A.G.E. cloneID
+ IMAGE cloneID
+
+
+
+ Clone ID (IMAGE)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]{7}|GO:[0-9]{7}
+ An identifier of a 'cellular component' concept from the Gene Ontology.
+ GO concept identifier (cellular compartment)
+
+
+
+ GO concept ID (cellular component)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Name of a chromosome as used in the BioCyc database.
+
+
+
+ Chromosome name (BioCyc)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An identifier of a gene expression profile from the CleanEx database.
+
+
+
+ CleanEx entry name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An identifier of (typically a list of) gene expression experiments catalogued in the CleanEx database.
+
+
+
+ CleanEx dataset code
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A human-readable collection of information concerning a genome as a whole.
+
+
+ Genome report
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier for a protein complex from the CORUM database.
+ CORUM complex ID
+
+
+
+ Protein ID (CORUM)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier of a position-specific scoring matrix from the CDD database.
+
+
+
+ CDD PSSM-ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier for a protein from the CuticleDB database.
+ CuticleDB ID
+
+
+
+ Protein ID (CuticleDB)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a predicted transcription factor from the DBD database.
+
+
+
+ DBD ID
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ General annotation on an oligonucleotide probe, or a set of probes.
+ Oligonucleotide probe sets annotation
+
+
+ Oligonucleotide probe annotation
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of an oligonucleotide from a database.
+
+
+
+ Oligonucleotide ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an oligonucleotide probe from the dbProbe database.
+
+
+
+ dbProbe ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Physicochemical property data for one or more dinucleotides.
+
+
+ Dinucleotide property
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an dinucleotide property from the DiProDB database.
+
+
+
+ DiProDB ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ disordered structure in a protein.
+
+
+ Protein features report (disordered structure)
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier for a protein from the DisProt database.
+ DisProt ID
+
+
+
+ Protein ID (DisProt)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Annotation on an embryo or concerning embryological development.
+
+ Embryo report
+ true
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier for a gene transcript from the Ensembl database.
+ Transcript ID (Ensembl)
+
+
+
+ Ensembl transcript ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ An informative report on one or more small molecules that are enzyme inhibitors.
+
+ Inhibitor annotation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Moby:GeneAccessionList
+ An identifier of a promoter of a gene that is catalogued in a database.
+
+
+
+ Promoter ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an EST sequence.
+
+
+
+ EST accession
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an EST sequence from the COGEME database.
+
+
+
+ COGEME EST ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a unisequence from the COGEME database.
+
+
+
+ A unisequence is a single sequence assembled from ESTs.
+ COGEME unisequence ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Accession number of an entry (protein family) from the GeneFarm database.
+ GeneFarm family ID
+
+
+
+ Protein family ID (GeneFarm)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a family of organism.
+
+
+
+ Family name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ The name of a genus of viruses.
+
+ Genus name (virus)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ The name of a family of viruses.
+
+ Family name (virus)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ The name of a SwissRegulon database.
+
+ Database name (SwissRegulon)
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A feature identifier as used in the SwissRegulon database.
+
+
+
+ This can be name of a gene, the ID of a TFBS, or genomic coordinates in form "chr:start..end".
+ Sequence feature ID (SwissRegulon)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A unique identifier of gene in the NMPDR database.
+
+
+
+ A FIG ID consists of four parts: a prefix, genome id, locus type and id number.
+ FIG ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A unique identifier of gene in the Xenbase database.
+
+
+
+ Gene ID (Xenbase)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A unique identifier of gene in the Genolist database.
+
+
+
+ Gene ID (Genolist)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Name of an entry (gene) from the Genolist genes database.
+
+ Gene name (Genolist)
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry (promoter) from the ABS database.
+ ABS identifier
+
+
+
+ ABS ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a transcription factor from the AraC-XylS database.
+
+
+
+ AraC-XylS ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Name of an entry (gene) from the HUGO database.
+
+ Gene name (HUGO)
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a locus from the PseudoCAP database.
+
+
+
+ Locus ID (PseudoCAP)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a locus from the UTR database.
+
+
+
+ Locus ID (UTR)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier of a monosaccharide from the MonosaccharideDB database.
+
+
+
+ MonosaccharideDB ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ The name of a subdivision of the Collagen Mutation Database (CMD) database.
+
+ Database name (CMD)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ The name of a subdivision of the Osteogenesis database.
+
+ Database name (Osteogenesis)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An identifier of a particular genome.
+
+
+
+ Genome identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.26
+
+
+ An identifier of a particular genome.
+
+
+ GenomeReviews ID
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]+
+ Identifier of an entry from the GlycoMapsDB (Glycosciences.de) database.
+
+
+
+ GlycoMap ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A conformational energy map of the glycosidic linkages in a carbohydrate molecule.
+
+
+ Carbohydrate conformational map
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a transcription factor.
+
+
+
+ Transcription factor name
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a membrane transport proteins from the transport classification database (TCDB).
+
+
+
+ TCID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PF[0-9]{5}
+ Name of a domain from the Pfam database.
+
+
+
+ Pfam domain name
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ CL[0-9]{4}
+ Accession number of a Pfam clan.
+
+
+
+ Pfam clan ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier for a gene from the VectorBase database.
+ VectorBase ID
+
+
+
+ Gene ID (VectorBase)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the UTRSite database of regulatory motifs in eukaryotic UTRs.
+
+
+
+ UTRSite ID
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An informative report about a specific or conserved pattern in a molecular sequence, such as its context in genes or proteins, its role, origin or method of construction, etc.
+ Sequence motif report
+ Sequence profile report
+
+
+ Sequence signature report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An informative report on a particular locus.
+
+ Locus annotation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Official name of a protein as used in the UniProt database.
+
+
+
+ Protein name (UniProt)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ One or more terms from one or more controlled vocabularies which are annotations on an entity.
+
+ The concepts are typically provided as a persistent identifier or some other link the source ontologies. Evidence of the validity of the annotation might be included.
+ Term ID list
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Name of a protein family from the HAMAP database.
+
+
+
+ HAMAP ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+
+ Basic information concerning an identifier of data (typically including the identifier itself). For example, a gene symbol with information concerning its provenance.
+
+ Identifier with metadata
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Annotation about a gene symbol.
+
+ Gene symbol annotation
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a RNA transcript.
+
+
+
+ Transcript ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an RNA transcript from the H-InvDB database.
+
+
+
+ HIT ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A unique identifier of gene cluster in the H-InvDB database.
+
+
+
+ HIX ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a antibody from the HPA database.
+
+
+
+ HPA antibody id
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a human major histocompatibility complex (HLA) or other protein from the IMGT/HLA database.
+
+
+
+ IMGT/HLA ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A unique identifier of gene assigned by the J. Craig Venter Institute (JCVI).
+
+
+
+ Gene ID (JCVI)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a kinase protein.
+
+
+
+ Kinase name
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a physical entity from the ConsensusPathDB database.
+
+
+
+ ConsensusPathDB entity ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Name of a physical entity from the ConsensusPathDB database.
+
+
+
+ ConsensusPathDB entity name
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The number of a strain of algae and protozoa from the CCAP database.
+
+
+
+ CCAP strain number
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An identifier of stock from a catalogue of biological resources.
+
+
+
+ Stock number
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A stock number from The Arabidopsis information resource (TAIR).
+
+
+
+ Stock number (TAIR)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the RNA editing database (REDIdb).
+
+
+
+ REDIdb ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Name of a domain from the SMART database.
+
+
+
+ SMART domain name
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Accession number of an entry (family) from the PANTHER database.
+ Panther family ID
+
+
+
+ Protein family ID (PANTHER)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A unique identifier for a virus from the RNAVirusDB database.
+
+
+
+ Could list (or reference) other taxa here from https://www.phenoscape.org/wiki/Taxonomic_Rank_Vocabulary.
+ RNAVirusDB ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An accession of annotation on a (group of) viruses (catalogued in a database).
+ Virus ID
+
+
+
+ Virus identifier
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An identifier of a genome project assigned by NCBI.
+
+
+
+ NCBI Genome Project ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A unique identifier of a whole genome assigned by the NCBI.
+
+
+
+ NCBI genome accession
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Data concerning, extracted from, or derived from the analysis of a sequence profile, such as its name, length, technical details about the profile or it's construction, the biological role or annotation, and so on.
+
+
+ Sequence profile data
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier for a membrane protein from the TopDB database.
+ TopDB ID
+
+
+
+ Protein ID (TopDB)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a two-dimensional (protein) gel.
+ Gel identifier
+
+
+
+ Gel ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Name of a reference map gel from the SWISS-2DPAGE database.
+
+
+
+ Reference map name (SWISS-2DPAGE)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier for a peroxidase protein from the PeroxiBase database.
+ PeroxiBase ID
+
+
+
+ Protein ID (PeroxiBase)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the SISYPHUS database of tertiary structure alignments.
+
+
+
+ SISYPHUS ID
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Accession of an open reading frame (catalogued in a database).
+
+
+
+ ORF ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An identifier of an open reading frame.
+
+
+
+ ORF identifier
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [1-9][0-9]*
+ Identifier of an entry from the GlycosciencesDB database.
+ LInear Notation for Unique description of Carbohydrate Sequences ID
+
+
+
+ LINUCS ID
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier for a ligand-gated ion channel protein from the LGICdb database.
+ LGICdb ID
+
+
+
+ Protein ID (LGICdb)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an EST sequence from the MaizeDB database.
+
+
+
+ MaizeDB ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A unique identifier of gene in the MfunGD database.
+
+
+
+ Gene ID (MfunGD)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An identifier of a disease from the Orpha database.
+
+
+
+ Orpha number
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier for a protein from the EcID database.
+
+
+
+ Protein ID (EcID)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A unique identifier of a cDNA molecule catalogued in the RefSeq database.
+
+
+
+ Clone ID (RefSeq)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier for a cone snail toxin protein from the ConoServer database.
+
+
+
+ Protein ID (ConoServer)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a GeneSNP database entry.
+
+
+
+ GeneSNP ID
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a lipid.
+
+
+
+ Lipid identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+ A flat-file (textual) data archive.
+
+
+ Databank
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+ A web site providing data (web pages) on a common theme to a HTTP client.
+
+
+ Web portal
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier for a gene from the VBASE2 database.
+ VBASE2 ID
+
+
+
+ Gene ID (VBASE2)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A unique identifier for a virus from the DPVweb database.
+ DPVweb virus ID
+
+
+
+ DPVweb ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]+
+ Identifier of a pathway from the BioSystems pathway database.
+
+
+
+ Pathway ID (BioSystems)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Data concerning a proteomics experiment.
+
+ Experimental data (proteomics)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An abstract of a scientific article.
+
+
+ Abstract
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 3D coordinate and associated data for a lipid structure.
+
+
+ Lipid structure
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 3D coordinate and associated data for the (3D) structure of a drug.
+
+
+ Drug structure
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 3D coordinate and associated data for the (3D) structure of a toxin.
+
+
+ Toxin structure
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A simple matrix of numbers, where each value (or column of values) is derived derived from analysis of the corresponding position in a sequence alignment.
+ PSSM
+
+
+ Position-specific scoring matrix
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A matrix of distances between molecular entities, where a value (distance) is (typically) derived from comparison of two entities and reflects their similarity.
+
+
+ Distance matrix
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Distances (values representing similarity) between a group of molecular structures.
+
+
+ Structural distance matrix
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Bibliographic data concerning scientific article(s).
+
+ Article metadata
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A concept from a biological ontology.
+
+
+ This includes any fields from the concept definition such as concept name, definition, comments and so on.
+ Ontology concept
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A numerical measure of differences in the frequency of occurrence of synonymous codons in DNA sequences.
+
+
+ Codon usage bias
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Northern Blot experiments.
+
+
+ Northern blot report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A map showing distance between genetic markers estimated by radiation-induced breaks in a chromosome.
+ RH map
+
+
+ The radiation method can break very closely linked markers providing a more detailed map. Most genetic markers and subsequences may be located to a defined map position and with a more precise estimates of distance than a linkage map.
+ Radiation hybrid map
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A simple list of data identifiers (such as database accessions), possibly with additional basic information on the addressed data.
+
+
+ ID list
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Gene frequencies data that may be read during phylogenetic tree calculation.
+
+
+ Phylogenetic gene frequencies data
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ A set of sub-sequences displaying some type of polymorphism, typically indicating the sequence in which they occur, their position and other metadata.
+
+ Sequence set (polymorphic)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ An entry (resource) from the DRCAT bioinformatics resource catalogue.
+
+ DRCAT resource
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 3D coordinate and associated data for a multi-protein complex; two or more polypeptides chains in a stable, functional association with one another.
+
+
+ Protein complex
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 3D coordinate and associated data for a protein (3D) structural motif; any group of contiguous or non-contiguous amino acid residues but typically those forming a feature with a structural or functional role.
+
+
+ Protein structural motif
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A human-readable collection of information about one or more specific lipid 3D structure(s).
+
+
+ Lipid report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ Image of one or more molecular secondary structures.
+
+ Secondary structure image
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ An informative report on general information, properties or features of one or more molecular secondary structures.
+
+ Secondary structure report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ DNA sequence-specific feature annotation (not in a feature table).
+
+ DNA features
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Features concerning RNA or regions of DNA that encode an RNA molecule.
+
+ RNA features report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Biological data that has been plotted as a graph of some type, or plotting instructions for rendering such a graph.
+ Graph data
+
+
+ Plot
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A protein sequence and associated metadata.
+ Sequence record (protein)
+
+
+ Protein sequence record
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A nucleic acid sequence and associated metadata.
+ Nucleotide sequence record
+ Sequence record (nucleic acid)
+ DNA sequence record
+ RNA sequence record
+
+
+ Nucleic acid sequence record
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ A protein sequence and comprehensive metadata (such as a feature table), typically corresponding to a full entry from a molecular sequence database.
+
+
+ Protein sequence record (full)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ A nucleic acid sequence and comprehensive metadata (such as a feature table), typically corresponding to a full entry from a molecular sequence database.
+
+
+ Nucleic acid sequence record (full)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Accession of a mathematical model, typically an entry from a database.
+
+
+
+ Biological model accession
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a type or group of cells.
+
+
+
+ Cell type name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Accession of a type or group of cells (catalogued in a database).
+ Cell type ID
+
+
+
+ Cell type accession
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Accession of an entry from a database of chemicals.
+ Chemical compound accession
+ Small molecule accession
+
+
+
+ Compound accession
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Accession of a drug.
+
+
+
+ Drug accession
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Name of a toxin.
+
+
+
+ Toxin name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Accession of a toxin (catalogued in a database).
+
+
+
+ Toxin accession
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Accession of a monosaccharide (catalogued in a database).
+
+
+
+ Monosaccharide accession
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Common name of a drug.
+
+
+
+ Drug name
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Accession of an entry from a database of carbohydrates.
+
+
+
+ Carbohydrate accession
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Accession of a specific molecule (catalogued in a database).
+
+
+
+ Molecule accession
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Accession of a data definition (catalogued in a database).
+
+
+
+ Data resource definition accession
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An accession of a particular genome (in a database).
+
+
+
+ Genome accession
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An accession of a map of a molecular sequence (deposited in a database).
+
+
+
+ Map accession
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Accession of an entry from a database of lipids.
+
+
+
+ Lipid accession
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Accession of a peptide deposited in a database.
+
+
+
+ Peptide ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Accession of a protein deposited in a database.
+ Protein accessions
+
+
+
+ Protein accession
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An accession of annotation on a (group of) organisms (catalogued in a database).
+
+
+
+ Organism accession
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby:BriefOccurrenceRecord
+ Moby:FirstEpithet
+ Moby:InfraspecificEpithet
+ Moby:OccurrenceRecord
+ Moby:Organism_Name
+ Moby:OrganismsLongName
+ Moby:OrganismsShortName
+ The name of an organism (or group of organisms).
+
+
+
+ Organism name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Accession of a protein family (that is deposited in a database).
+
+
+
+ Protein family accession
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Accession of an entry from a database of transcription factors or binding sites.
+
+
+
+ Transcription factor accession
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Accession number of a strain of an organism variant, typically a plant, virus or bacterium.
+
+
+
+ Strain accession
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ 1.26
+
+ An accession of annotation on a (group of) viruses (catalogued in a database).
+
+
+ Virus identifier
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Metadata on sequence features.
+
+
+ Sequence features metadata
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a Gramene database entry.
+
+
+
+ Gramene identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An identifier of an entry from the DDBJ sequence database.
+ DDBJ ID
+ DDBJ accession number
+ DDBJ identifier
+
+
+
+ DDBJ accession
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An identifier of an entity from the ConsensusPathDB database.
+
+
+
+ ConsensusPathDB identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+
+ Data concerning, extracted from, or derived from the analysis of molecular sequence(s).
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Sequence data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Data concerning codon usage.
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Codon usage
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+
+ Data derived from the analysis of a scientific text such as a full text article from a scientific journal.
+
+ Article report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An informative report of information about molecular sequence(s), including basic information (metadata), and reports generated from molecular sequence analysis, including positional features and non-positional properties.
+ Sequence-derived report
+
+
+ Sequence report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data concerning the properties or features of one or more protein secondary structures.
+
+
+ Protein secondary structure
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A Hopp and Woods plot of predicted antigenicity of a peptide or protein.
+
+
+ Hopp and Woods plot
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.21
+
+
+ A melting curve of a double-stranded nucleic acid molecule (DNA or DNA/RNA).
+
+
+ Nucleic acid melting curve
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.21
+
+ A probability profile of a double-stranded nucleic acid molecule (DNA or DNA/RNA).
+
+
+ Nucleic acid probability profile
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.21
+
+ A temperature profile of a double-stranded nucleic acid molecule (DNA or DNA/RNA).
+
+
+ Nucleic acid temperature profile
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ A report typically including a map (diagram) of a gene regulatory network.
+
+
+ Gene regulatory network report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ An informative report on a two-dimensional (2D PAGE) gel.
+
+
+ 2D PAGE gel report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.14
+
+ General annotation on a set of oligonucleotide probes, such as the gene name with which the probe set is associated and which probes belong to the set.
+
+
+ Oligonucleotide probe sets annotation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ An image from a microarray experiment which (typically) allows a visualisation of probe hybridisation and gene-expression data.
+
+ Microarray image
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data (typically biological or biomedical) that has been rendered into an image, typically for display on screen.
+ Image data
+
+
+ Image
+ http://semanticscience.org/resource/SIO_000079
+ http://semanticscience.org/resource/SIO_000081
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Image of a molecular sequence, possibly with sequence features or properties shown.
+
+
+ Sequence image
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A report on protein properties concerning hydropathy.
+ Protein hydropathy report
+
+
+ Protein hydropathy data
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Data concerning a computational workflow.
+
+ Workflow data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A computational workflow.
+
+ Workflow
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Data concerning molecular secondary structure data.
+
+ Secondary structure data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Deprecated because this is bloat / confusing & better handled as an EDAM Format concept - "raw" sequences just imply a particular format (i.e. one with a vanilla string, possible in a particular alphabet, with no metadata).
+ 1.23
+
+ A raw protein sequence (string of characters).
+
+
+ Protein sequence (raw)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Deprecated because this is bloat / confusing & better handled as an EDAM Format concept - "raw" sequences just imply a particular format (i.e. one with a vanilla string, possible in a particular alphabet, with no metadata).
+ 1.23
+
+ A raw nucleic acid sequence.
+
+
+ Nucleic acid sequence (raw)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+ One or more protein sequences, possibly with associated annotation.
+ Amino acid sequence
+ Amino acid sequences
+ Protein sequences
+
+
+ Protein sequence
+ http://purl.org/biotop/biotop.owl#AminoAcidSequenceInformation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ One or more nucleic acid sequences, possibly with associated annotation.
+ Nucleic acid sequences
+ Nucleotide sequence
+ Nucleotide sequences
+ DNA sequence
+
+
+ Nucleic acid sequence
+ http://purl.org/biotop/biotop.owl#NucleotideSequenceInformation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data concerning a biochemical reaction, typically data and more general annotation on the kinetics of enzyme-catalysed reaction.
+ Enzyme kinetics annotation
+ Reaction annotation
+
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Reaction data
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data concerning small peptides.
+ Peptide data
+
+
+ Peptide property
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Was deprecated since 1.5, but not correctly (fully) obsoleted until 1.19.
+ 1.5
+
+
+ An informative report concerning the classification of protein sequences or structures.
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Protein classification
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Data concerning specific or conserved pattern in molecular sequences.
+
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Sequence motif data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Data concerning models representing a (typically multiple) sequence alignment.
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Sequence profile data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+
+ Data concerning a specific biological pathway or network.
+
+ Pathway or network data
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An informative report concerning or derived from the analysis of a biological pathway or network, such as a map (diagram) or annotation.
+
+
+ Pathway or network report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A thermodynamic or kinetic property of a nucleic acid molecule.
+ Nucleic acid property (thermodynamic or kinetic)
+ Nucleic acid thermodynamic property
+
+
+ Nucleic acid thermodynamic data
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Was deprecated since 1.5, but not correctly (fully) obsoleted until 1.19.
+ 1.5
+
+
+ Data concerning the classification of nucleic acid sequences or structures.
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Nucleic acid classification
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A report on a classification of molecular sequences, structures or other entities.
+
+ This can include an entire classification, components such as classifiers, assignments of entities to a classification and so on.
+ Classification report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ key residues involved in protein folding.
+
+
+ Protein features report (key folding sites)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Geometry data for a protein structure, for example bond lengths, bond angles, torsion angles, chiralities, planaraties etc.
+ Torsion angle data
+
+
+ Protein geometry data
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An image of protein structure.
+ Structure image (protein)
+
+
+ Protein structure image
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Weights for sequence positions or characters in phylogenetic analysis where zero is defined as unweighted.
+
+
+ Phylogenetic character weights
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Annotation of one particular positional feature on a biomolecular (typically genome) sequence, suitable for import and display in a genome browser.
+ Genome annotation track
+ Genome track
+ Genome-browser track
+ Genomic track
+ Sequence annotation track
+
+
+ Annotation track
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+ P43353|Q7M1G0|Q9C199|A5A6J6
+ [OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}
+ Accession number of a UniProt (protein sequence) database entry.
+ UniProt accession number
+ UniProt entry accession
+ UniProtKB accession
+ UniProtKB accession number
+ Swiss-Prot entry accession
+ TrEMBL entry accession
+
+
+
+ UniProt accession
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 16
+ [1-9][0-9]?
+ Identifier of a genetic code in the NCBI list of genetic codes.
+
+
+
+ NCBI genetic code ID
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a concept in an ontology of biological or bioinformatics concepts and relations.
+
+
+
+ Ontology concept identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The name of a concept for a biological process from the GO ontology.
+
+ GO concept name (biological process)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The name of a concept for a molecular function from the GO ontology.
+
+ GO concept name (molecular function)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data concerning the classification, identification and naming of organisms.
+ Taxonomic data
+
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Taxonomy
+
+
+
+
+
+
+
+
+
+ beta13
+ EMBL/GENBANK/DDBJ coding feature protein identifier, issued by International collaborators.
+
+
+
+ This qualifier consists of a stable ID portion (3+5 format with 3 position letters and 5 numbers) plus a version number after the decimal point. When the protein sequence encoded by the CDS changes, only the version number of the /protein_id value is incremented; the stable part of the /protein_id remains unchanged and as a result will permanently be associated with a given protein; this qualifier is valid only on CDS features which translate into a valid protein.
+ Protein ID (EMBL/GenBank/DDBJ)
+
+
+
+
+
+
+
+
+ beta13
+ 1.5
+
+ A type of data that (typically) corresponds to entries from the primary biological databases and which is (typically) the primary input or output of a tool, i.e. the data the tool processes or generates, as distinct from metadata and identifiers which describe and identify such core data, parameters that control the behaviour of tools, reports of derivative data generated by tools and annotation.
+
+
+ Core data entities typically have a format and may be identified by an accession number.
+ Core data
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta13
+ true
+ Name or other identifier of molecular sequence feature(s).
+
+
+
+ Sequence feature identifier
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta13
+ true
+ An identifier of a molecular tertiary structure, typically an entry from a structure database.
+
+
+
+ Structure identifier
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta13
+ true
+ An identifier of an array of numerical values, such as a comparison matrix.
+
+
+
+ Matrix identifier
+
+
+
+
+
+
+
+
+ beta13
+ 1.8
+
+ A report (typically a table) on character or word composition / frequency of protein sequence(s).
+
+
+ Protein sequence composition
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.8
+
+ A report (typically a table) on character or word composition / frequency of nucleic acid sequence(s).
+
+
+ Nucleic acid sequence composition (report)
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.5
+
+
+ A node from a classification of protein structural domain(s).
+
+ Protein domain classification node
+ true
+
+
+
+
+
+
+
+
+ beta13
+ Duplicates http://edamontology.org/data_1002, hence deprecated.
+ 1.23
+
+ Unique numerical identifier of chemicals in the scientific literature, as assigned by the Chemical Abstracts Service.
+
+
+ CAS number
+ true
+
+
+
+
+
+
+
+
+
+ beta13
+ Unique identifier of a drug conforming to the Anatomical Therapeutic Chemical (ATC) Classification System, a drug classification system controlled by the WHO Collaborating Centre for Drug Statistics Methodology (WHOCC).
+
+
+
+ ATC code
+
+
+
+
+
+
+
+
+ beta13
+ A unique, unambiguous, alphanumeric identifier of a chemical substance as catalogued by the Substance Registration System of the Food and Drug Administration (FDA).
+ Unique Ingredient Identifier
+
+
+
+ UNII
+
+
+
+
+
+
+
+
+ beta13
+ 1.5
+
+
+ Basic information concerning geographical location or time.
+
+ Geotemporal metadata
+ true
+
+
+
+
+
+
+
+
+ beta13
+ Metadata concerning the software, hardware or other aspects of a computer system.
+
+
+ System metadata
+
+
+
+
+
+
+
+
+ beta13
+ 1.15
+
+ A name of a sequence feature, e.g. the name of a feature to be displayed to an end-user.
+
+
+ Sequence feature name
+ true
+
+
+
+
+
+
+
+
+ beta13
+ Raw data such as measurements or other results from laboratory experiments, as generated from laboratory hardware.
+ Experimental measurement data
+ Experimentally measured data
+ Measured data
+ Measurement
+ Measurement data
+ Measurement metadata
+ Raw experimental data
+
+
+ This is a broad data type and is used a placeholder for other, more specific types. It is primarily intended to help navigation of EDAM and would not typically be used for annotation.
+ Experimental measurement
+
+
+
+
+
+
+
+
+
+ beta13
+ Raw data (typically MIAME-compliant) for hybridisations from a microarray experiment.
+
+
+ Such data as found in Affymetrix CEL or GPR files.
+ Raw microarray data
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta13
+ Data generated from processing and analysis of probe set data from a microarray experiment.
+ Gene annotation (expression)
+ Gene expression report
+ Microarray probe set data
+
+
+ Such data as found in Affymetrix .CHP files or data from other software such as RMA or dChip.
+ Processed microarray data
+
+
+
+
+
+
+
+
+
+ beta13
+ The final processed (normalised) data for a set of hybridisations in a microarray experiment.
+ Gene expression data matrix
+ Normalised microarray data
+
+
+ This combines data from all hybridisations.
+ Gene expression matrix
+
+
+
+
+
+
+
+
+ beta13
+ Annotation on a biological sample, for example experimental factors and their values.
+
+
+ This might include compound and dose in a dose response experiment.
+ Sample annotation
+
+
+
+
+
+
+
+
+ beta13
+ Annotation on the array itself used in a microarray experiment.
+
+
+ This might include gene identifiers, genomic coordinates, probe oligonucleotide sequences etc.
+ Microarray metadata
+
+
+
+
+
+
+
+
+ beta13
+ 1.8
+
+ Annotation on laboratory and/or data processing protocols used in an microarray experiment.
+
+
+ This might describe e.g. the normalisation methods used to process the raw data.
+ Microarray protocol annotation
+ true
+
+
+
+
+
+
+
+
+ beta13
+ Data concerning the hybridisations measured during a microarray experiment.
+
+
+ Microarray hybridisation data
+
+
+
+
+
+
+
+
+ beta13
+ 1.5
+
+
+ A report of regions in a molecular sequence that are biased to certain characters.
+
+ Sequence features (compositionally-biased regions)
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.5
+
+ A report on features in a nucleic acid sequence that indicate changes to or differences between sequences.
+
+
+ Nucleic acid features (difference and change)
+ true
+
+
+
+
+
+
+
+
+
+ beta13
+ A human-readable collection of information about regions within a nucleic acid sequence which form secondary or tertiary (3D) structures.
+ Nucleic acid features (structure)
+ Quadruplexes (report)
+ Stem loop (report)
+ d-loop (report)
+
+
+ The report may be based on analysis of nucleic acid sequence or structural data, or any annotation or information about specific nucleic acid 3D structure(s) or such structures in general.
+ Nucleic acid structure report
+
+
+
+
+
+
+
+
+ beta13
+ 1.8
+
+ short repetitive subsequences (repeat sequences) in a protein sequence.
+
+
+ Protein features report (repeats)
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.8
+
+ Report on the location of matches to profiles, motifs (conserved or functional patterns) or other signatures in one or more protein sequences.
+
+
+ Sequence motif matches (protein)
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.8
+
+ Report on the location of matches to profiles, motifs (conserved or functional patterns) or other signatures in one or more nucleic acid sequences.
+
+
+ Sequence motif matches (nucleic acid)
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.5
+
+
+ A report on displacement loops in a mitochondrial DNA sequence.
+
+ A displacement loop is a region of mitochondrial DNA in which one of the strands is displaced by an RNA molecule.
+ Nucleic acid features (d-loop)
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.5
+
+
+ A report on stem loops in a DNA sequence.
+
+ A stem loop is a hairpin structure; a double-helical structure formed when two complementary regions of a single strand of RNA or DNA molecule form base-pairs.
+ Nucleic acid features (stem loop)
+ true
+
+
+
+
+
+
+
+
+ beta13
+ An informative report on features of a messenger RNA (mRNA) molecules including precursor RNA, primary (unprocessed) transcript and fully processed molecules. This includes reports on a specific gene transcript, clone or EST.
+ Clone or EST (report)
+ Gene transcript annotation
+ Nucleic acid features (mRNA features)
+ Transcript (report)
+ mRNA (report)
+ mRNA features
+
+
+ This includes 5'untranslated region (5'UTR), coding sequences (CDS), exons, intervening sequences (intron) and 3'untranslated regions (3'UTR).
+ Gene transcript report
+
+
+
+
+
+
+
+
+ beta13
+ 1.8
+
+ features of non-coding or functional RNA molecules, including tRNA and rRNA.
+
+
+ Non-coding RNA
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.5
+
+
+ Features concerning transcription of DNA into RNA including the regulation of transcription.
+
+ This includes promoters, CAAT signals, TATA signals, -35 signals, -10 signals, GC signals, primer binding sites for initiation of transcription or reverse transcription, enhancer, attenuator, terminators and ribosome binding sites.
+ Transcriptional features (report)
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.5
+
+
+ A report on predicted or actual immunoglobulin gene structure including constant, switch and variable regions and diversity, joining and variable segments.
+
+ Nucleic acid features (immunoglobulin gene structure)
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.5
+
+
+ Information on a 'class' node from the SCOP database.
+
+ SCOP class
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.5
+
+
+ Information on a 'fold' node from the SCOP database.
+
+ SCOP fold
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.5
+
+
+ Information on a 'superfamily' node from the SCOP database.
+
+ SCOP superfamily
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.5
+
+
+ Information on a 'family' node from the SCOP database.
+
+ SCOP family
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.5
+
+
+ Information on a 'protein' node from the SCOP database.
+
+ SCOP protein
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.5
+
+
+ Information on a 'species' node from the SCOP database.
+
+ SCOP species
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.8
+
+ mass spectrometry experiments.
+
+
+ Mass spectrometry experiment
+ true
+
+
+
+
+
+
+
+
+ beta13
+ Nucleic acid classification
+ A human-readable collection of information about a particular family of genes, typically a set of genes with similar sequence that originate from duplication of a common ancestor gene, or any other classification of nucleic acid sequences or structures that reflects gene structure.
+ Gene annotation (homology information)
+ Gene annotation (homology)
+ Gene family annotation
+ Gene homology (report)
+ Homology information
+
+
+ This includes reports on on gene homologues between species.
+ Gene family report
+
+
+
+
+
+
+
+
+ beta13
+ An image of a protein.
+
+
+ Protein image
+
+
+
+
+
+
+
+
+ beta13
+ 1.24
+
+
+
+
+ An alignment of protein sequences and/or structures.
+
+ Protein alignment
+ true
+
+
+
+
+
+
+
+
+ 1.0
+ 1.8
+
+ sequencing experiment, including samples, sampling, preparation, sequencing, and analysis.
+
+
+ NGS experiment
+ true
+
+
+
+
+
+
+
+
+ 1.1
+ An informative report about a DNA sequence assembly.
+ Assembly report
+
+
+ This might include an overall quality assessment of the assembly and summary statistics including counts, average length and number of bases for reads, matches and non-matches, contigs, reads in pairs etc.
+ Sequence assembly report
+
+
+
+
+
+
+
+
+ 1.1
+ An index of a genome sequence.
+
+
+ Many sequence alignment tasks involving many or very large sequences rely on a precomputed index of the sequence to accelerate the alignment.
+ Genome index
+
+
+
+
+
+
+
+
+ 1.1
+ 1.8
+
+ Report concerning genome-wide association study experiments.
+
+
+ GWAS report
+ true
+
+
+
+
+
+
+
+
+ 1.2
+ The position of a cytogenetic band in a genome.
+
+
+ Information might include start and end position in a chromosome sequence, chromosome identifier, name of band and so on.
+ Cytoband position
+
+
+
+
+
+
+
+
+
+
+ 1.2
+ CL_[0-9]{7}
+ Cell type ontology concept ID.
+ CL ID
+
+
+
+ Cell type ontology ID
+
+
+
+
+
+
+
+
+ 1.2
+ Mathematical model of a network, that contains biochemical kinetics.
+
+
+ Kinetic model
+
+
+
+
+
+
+
+
+
+ 1.3
+ Identifier of a COSMIC database entry.
+ COSMIC identifier
+
+
+
+ COSMIC ID
+
+
+
+
+
+
+
+
+
+ 1.3
+ Identifier of a HGMD database entry.
+ HGMD identifier
+
+
+
+ HGMD ID
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ Unique identifier of sequence assembly.
+ Sequence assembly version
+
+
+
+ Sequence assembly ID
+
+
+
+
+
+
+
+
+ 1.3
+ 1.5
+
+
+ A label (text token) describing a type of sequence feature such as gene, transcript, cds, exon, repeat, simple, misc, variation, somatic variation, structural variation, somatic structural variation, constrained or regulatory.
+
+ Sequence feature type
+ true
+
+
+
+
+
+
+
+
+ 1.3
+ 1.5
+
+
+ An informative report on gene homologues between species.
+
+ Gene homology (report)
+ true
+
+
+
+
+
+
+
+
+
+
+ 1.3
+ ENSGT00390000003602
+ Unique identifier for a gene tree from the Ensembl database.
+ Ensembl ID (gene tree)
+
+
+
+ Ensembl gene tree ID
+
+
+
+
+
+
+
+
+ 1.3
+ A phylogenetic tree that is an estimate of the character's phylogeny.
+
+
+ Gene tree
+
+
+
+
+
+
+
+
+ 1.3
+ A phylogenetic tree that reflects phylogeny of the taxa from which the characters (used in calculating the tree) were sampled.
+
+
+ Species tree
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ Name or other identifier of an entry from a biosample database.
+ Sample accession
+
+
+
+ Sample ID
+
+
+
+
+
+
+
+
+
+ 1.3
+ Identifier of an object from the MGI database.
+
+
+
+ MGI accession
+
+
+
+
+
+
+
+
+ 1.3
+ Name of a phenotype.
+ Phenotype
+ Phenotypes
+
+
+
+ Phenotype name
+
+
+
+
+
+
+
+
+ 1.4
+ A HMM transition matrix contains the probabilities of switching from one HMM state to another.
+ HMM transition matrix
+
+
+ Consider for example an HMM with two states (AT-rich and GC-rich). The transition matrix will hold the probabilities of switching from the AT-rich to the GC-rich state, and vica versa.
+ Transition matrix
+
+
+
+
+
+
+
+
+ 1.4
+ A HMM emission matrix holds the probabilities of choosing the four nucleotides (A, C, G and T) in each of the states of a HMM.
+ HMM emission matrix
+
+
+ Consider for example an HMM with two states (AT-rich and GC-rich). The emission matrix holds the probabilities of choosing each of the four nucleotides (A, C, G and T) in the AT-rich state and in the GC-rich state.
+ Emission matrix
+
+
+
+
+
+
+
+
+ 1.4
+ 1.15
+
+ A statistical Markov model of a system which is assumed to be a Markov process with unobserved (hidden) states.
+
+
+ Hidden Markov model
+ true
+
+
+
+
+
+
+
+
+ 1.4
+ true
+ An identifier of a data format.
+
+
+ Format identifier
+
+
+
+
+
+
+
+
+ 1.5
+ Raw biological or biomedical image generated by some experimental technique.
+
+
+ Raw image
+ http://semanticscience.org/resource/SIO_000081
+
+
+
+
+
+
+
+
+ 1.5
+ Data concerning the intrinsic physical (e.g. structural) or chemical properties of one, more or all carbohydrates.
+ Carbohydrate data
+
+
+ Carbohydrate property
+
+
+
+
+
+
+
+
+ 1.5
+ 1.8
+
+ Report concerning proteomics experiments.
+
+
+ Proteomics experiment report
+ true
+
+
+
+
+
+
+
+
+ 1.5
+ 1.8
+
+ RNAi experiments.
+
+
+ RNAi report
+ true
+
+
+
+
+
+
+
+
+ 1.5
+ 1.8
+
+ biological computational model experiments (simulation), for example the minimum information required in order to permit its correct interpretation and reproduction.
+
+
+ Simulation experiment report
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.7
+ An imaging technique that uses magnetic fields and radiowaves to form images, typically to investigate the anatomy and physiology of the human body.
+ MRT image
+ Magnetic resonance imaging image
+ Magnetic resonance tomography image
+ NMRI image
+ Nuclear magnetic resonance imaging image
+
+
+ MRI image
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.7
+ An image from a cell migration track assay.
+
+
+ Cell migration track image
+
+
+
+
+
+
+
+
+ 1.7
+ Rate of association of a protein with another protein or some other molecule.
+ kon
+
+
+ Rate of association
+
+
+
+
+
+
+
+
+ 1.7
+ Multiple gene identifiers in a specific order.
+
+
+ Such data are often used for genome rearrangement tools and phylogenetic tree labeling.
+ Gene order
+
+
+
+
+
+
+
+
+ 1.7
+ The spectrum of frequencies of electromagnetic radiation emitted from a molecule as a result of some spectroscopy experiment.
+ Spectra
+
+
+ Spectrum
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.7
+ Spectral information for a molecule from a nuclear magnetic resonance experiment.
+ NMR spectra
+
+
+ NMR spectrum
+
+
+
+
+
+
+
+
+ 1.8
+ 1.21
+
+ A sketch of a small molecule made with some specialised drawing package.
+
+
+ Chemical structure sketches are used for presentational purposes but also as inputs to various analysis software.
+ Chemical structure sketch
+ true
+
+
+
+
+
+
+
+
+ 1.8
+ An informative report about a specific or conserved nucleic acid sequence pattern.
+
+
+ Nucleic acid signature
+
+
+
+
+
+
+
+
+ 1.8
+ A DNA sequence.
+ DNA sequences
+
+
+ DNA sequence
+
+
+
+
+
+
+
+
+ 1.8
+ An RNA sequence.
+ RNA sequences
+
+
+ RNA sequence
+
+
+
+
+
+
+
+
+ 1.8
+ Deprecated because this is bloat / confusing & better handled as an EDAM Format concept - "raw" sequences just imply a particular format (i.e. one with a vanilla string, possible in a particular alphabet, with no metadata).
+ 1.23
+
+ A raw RNA sequence.
+
+
+ RNA sequence (raw)
+ true
+
+
+
+
+
+
+
+
+ 1.8
+ Deprecated because this is bloat / confusing & better handled as an EDAM Format concept - "raw" sequences just imply a particular format (i.e. one with a vanilla string, possible in a particular alphabet, with no metadata).
+ 1.23
+
+ A raw DNA sequence.
+
+
+ DNA sequence (raw)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.8
+ Data on gene sequence variations resulting large-scale genotyping and DNA sequencing projects.
+ Gene sequence variations
+
+
+ Variations are stored along with a reference genome.
+ Sequence variations
+
+
+
+
+
+
+
+
+ 1.8
+ A list of publications such as scientic papers or books.
+
+
+ Bibliography
+
+
+
+
+
+
+
+
+ 1.8
+ A mapping of supplied textual terms or phrases to ontology concepts (URIs).
+
+
+ Ontology mapping
+
+
+
+
+
+
+
+
+ 1.9
+ Any data concerning a specific biological or biomedical image.
+ Image-associated data
+ Image-related data
+
+
+ This can include basic provenance and technical information about the image, scientific annotation and so on.
+ Image metadata
+
+
+
+
+
+
+
+
+ 1.9
+ A human-readable collection of information concerning a clinical trial.
+ Clinical trial information
+
+
+ Clinical trial report
+
+
+
+
+
+
+
+
+ 1.10
+ A report about a biosample.
+ Biosample report
+
+
+ Reference sample report
+
+
+
+
+
+
+
+
+ 1.10
+ Accession number of an entry from the Gene Expression Atlas.
+
+
+
+ Gene Expression Atlas Experiment ID
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.12
+ true
+ Identifier of an entry from a database of disease.
+
+
+
+ Disease identifier
+
+
+
+
+
+
+
+
+
+ 1.12
+ The name of some disease.
+
+
+
+ Disease name
+
+
+
+
+
+
+
+
+ 1.12
+ Some material that is used for educational (training) purposes.
+ OER
+ Open educational resource
+
+
+ Training material
+
+
+
+
+
+
+
+
+ 1.12
+ A training course available for use on the Web.
+ On-line course
+ MOOC
+ Massive open online course
+
+
+ Online course
+
+
+
+
+
+
+
+
+ 1.12
+ Any free or plain text, typically for human consumption and in English. Can instantiate also as a textual search query.
+ Free text
+ Plain text
+ Textual search query
+
+
+ Text
+
+
+
+
+
+
+
+
+
+ 1.14
+ Machine-readable biodiversity data.
+ Biodiversity information
+ OTU table
+
+
+ Biodiversity data
+
+
+
+
+
+
+
+
+ 1.14
+ A human-readable collection of information concerning biosafety data.
+ Biosafety information
+
+
+ Biosafety report
+
+
+
+
+
+
+
+
+ 1.14
+ A report about any kind of isolation of biological material.
+ Geographic location
+ Isolation source
+
+
+ Isolation report
+
+
+
+
+
+
+
+
+ 1.14
+ Information about the ability of an organism to cause disease in a corresponding host.
+ Pathogenicity
+
+
+ Pathogenicity report
+
+
+
+
+
+
+
+
+ 1.14
+ Information about the biosafety classification of an organism according to corresponding law.
+ Biosafety level
+
+
+ Biosafety classification
+
+
+
+
+
+
+
+
+ 1.14
+ A report about localisation of the isolaton of biological material e.g. country or coordinates.
+
+
+ Geographic location
+
+
+
+
+
+
+
+
+ 1.14
+ A report about any kind of isolation source of biological material e.g. blood, water, soil.
+
+
+ Isolation source
+
+
+
+
+
+
+
+
+ 1.14
+ Experimentally determined parameter of the physiology of an organism, e.g. substrate spectrum.
+
+
+ Physiology parameter
+
+
+
+
+
+
+
+
+ 1.14
+ Experimentally determined parameter of the morphology of an organism, e.g. size & shape.
+
+
+ Morphology parameter
+
+
+
+
+
+
+
+
+ 1.14
+ Experimental determined parameter for the cultivation of an organism.
+ Cultivation conditions
+ Carbon source
+ Culture media composition
+ Nitrogen source
+ Salinity
+ Temperature
+ pH value
+
+
+ Cultivation parameter
+
+
+
+
+
+
+
+
+ 1.15
+ Data concerning a sequencing experiment, that may be specified as an input to some tool.
+
+
+ Sequencing metadata name
+
+
+
+
+
+
+
+
+ 1.15
+ An identifier of a flow cell of a sequencing machine.
+
+
+ A flow cell is used to immobilise, amplify and sequence millions of molecules at once. In Illumina machines, a flowcell is composed of 8 "lanes" which allows 8 experiments in a single analysis.
+ Flow cell identifier
+
+
+
+
+
+
+
+
+ 1.15
+ An identifier of a lane within a flow cell of a sequencing machine, within which millions of sequences are immobilised, amplified and sequenced.
+
+
+ Lane identifier
+
+
+
+
+
+
+
+
+ 1.15
+ A number corresponding to the number of an analysis performed by a sequencing machine. For example, if it's the 13th analysis, the run is 13.
+
+
+ Run number
+
+
+
+
+
+
+
+
+ 1.15
+ Data concerning ecology; for example measurements and reports from the study of interactions among organisms and their environment.
+
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Ecological data
+
+
+
+
+
+
+
+
+ 1.15
+ The mean species diversity in sites or habitats at a local scale.
+ α-diversity
+
+
+ Alpha diversity data
+
+
+
+
+
+
+
+
+ 1.15
+ The ratio between regional and local species diversity.
+ True beta diversity
+ β-diversity
+
+
+ Beta diversity data
+
+
+
+
+
+
+
+
+ 1.15
+ The total species diversity in a landscape.
+ ɣ-diversity
+
+
+ Gamma diversity data
+
+
+
+
+
+
+
+
+
+ 1.15
+ A plot in which community data (e.g. species abundance data) is summarised. Similar species and samples are plotted close together, and dissimilar species and samples are plotted placed far apart.
+
+
+ Ordination plot
+
+
+
+
+
+
+
+
+ 1.16
+ A ranked list of categories (usually ontology concepts), each associated with a statistical metric of over-/under-representation within the studied data.
+ Enrichment report
+ Over-representation report
+ Functional enrichment report
+
+
+ Over-representation data
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.16
+ GO-term report
+ A ranked list of Gene Ontology concepts, each associated with a p-value, concerning or derived from the analysis of e.g. a set of genes or proteins.
+ GO-term enrichment report
+ Gene ontology concept over-representation report
+ Gene ontology enrichment report
+ Gene ontology term enrichment report
+
+
+ GO-term enrichment data
+
+
+
+
+
+
+
+
+ 1.16
+ Score for localization of one or more post-translational modifications in peptide sequence measured by mass spectrometry.
+ False localisation rate
+ PTM localisation
+ PTM score
+
+
+ Localisation score
+
+
+
+
+
+
+
+
+
+ 1.16
+ Identifier of a protein modification catalogued in the Unimod database.
+
+
+
+ Unimod ID
+
+
+
+
+
+
+
+
+ 1.16
+ Identifier for mass spectrometry proteomics data in the proteomexchange.org repository.
+
+
+
+ ProteomeXchange ID
+
+
+
+
+
+
+
+
+ 1.16
+ Groupings of expression profiles according to a clustering algorithm.
+ Clustered gene expression profiles
+
+
+ Clustered expression profiles
+
+
+
+
+
+
+
+
+
+ 1.16
+ An identifier of a concept from the BRENDA ontology.
+
+
+
+ BRENDA ontology concept ID
+
+
+
+
+
+
+
+
+
+ 1.16
+ A text (such as a scientific article), annotated with notes, data and metadata, such as recognised entities, concepts, and their relations.
+
+
+ Annotated text
+
+
+
+
+
+
+
+
+ 1.16
+ A structured query, in form of a script, that defines a database search task.
+
+
+ Query script
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.19
+ Structural 3D model (volume map) from electron microscopy.
+
+
+ 3D EM Map
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.19
+ Annotation on a structural 3D EM Map from electron microscopy. This might include one or several locations in the map of the known features of a particular macromolecule.
+
+
+ 3D EM Mask
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.19
+ Raw DDD movie acquisition from electron microscopy.
+
+
+ EM Movie
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.19
+ Raw acquisition from electron microscopy or average of an aligned DDD movie.
+
+
+ EM Micrograph
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.21
+ Data coming from molecular simulations, computer "experiments" on model molecules.
+
+
+ Typically formed by two separated but indivisible pieces of information: topology data (static) and trajectory data (dynamic).
+ Molecular simulation data
+
+
+
+
+
+
+
+
+
+ 1.21
+ Identifier of an entry from the RNA central database of annotated human miRNAs.
+
+
+
+ There are canonical and taxon-specific forms of RNAcentral ID. Canonical form e.g. urs_9or10digits identifies an RNA sequence (within the RNA central database) which may appear in multiple sequences. Taxon-specific form identifies a sequence in the specific taxon (e.g. urs_9or10digits_taxonID).
+ RNA central ID
+
+
+
+
+
+
+
+
+ 1.21
+ A human-readable systematic collection of patient (or population) health information in a digital format.
+ EHR
+ EMR
+ Electronic medical record
+
+
+ Electronic health record
+
+
+
+
+
+
+
+
+ 1.22
+ Data coming from molecular simulations, computer "experiments" on model molecules. Typically formed by two separated but indivisible pieces of information: topology data (static) and trajectory data (dynamic).
+
+
+ Simulation
+
+
+
+
+
+
+
+
+ 1.22
+ Dynamic information of a structure molecular system coming from a molecular simulation: XYZ 3D coordinates (sometimes with their associated velocities) for every atom along time.
+
+
+ Trajectory data
+
+
+
+
+
+
+
+
+ 1.22
+ Force field parameters: charges, masses, radii, bond lengths, bond dihedrals, etc. define the structural molecular system, and are essential for the proper description and simulation of a molecular system.
+
+
+ Forcefield parameters
+
+
+
+
+
+
+
+
+ 1.22
+ Static information of a structure molecular system that is needed for a molecular simulation: the list of atoms, their non-bonded parameters for Van der Waals and electrostatic interactions, and the complete connectivity in terms of bonds, angles and dihedrals.
+
+
+ Topology data
+
+
+
+
+
+
+
+
+ 1.22
+ Visualization of distribution of quantitative data, e.g. expression data, by histograms, violin plots and density plots.
+ Density plot
+
+
+ Histogram
+
+
+
+
+
+
+
+
+ 1.23
+ Report of the quality control review that was made of factors involved in a procedure.
+ QC metrics
+ QC report
+ Quality control metrics
+ Quality control report
+
+
+
+
+
+
+
+
+ 1.23
+ A table of unnormalized values representing summarised read counts per genomic region (e.g. gene, transcript, peak).
+ Read count matrix
+
+
+ Count matrix
+
+
+
+
+
+
+
+
+ 1.24
+ Alignment (superimposition) of DNA tertiary (3D) structures.
+ Structure alignment (DNA)
+
+
+ DNA structure alignment
+
+
+
+
+
+
+
+
+ 1.24
+ A score derived from the P-value to ensure correction for multiple tests. The Q-value provides an estimate of the positive False Discovery Rate (pFDR), i.e. the rate of false positives among all the cases reported positive: pFDR = FP / (FP + TP).
+ Adjusted P-value
+ FDR
+ Padj
+ pFDR
+
+
+ Q-values are widely used in high-throughput data analysis (e.g. detection of differentially expressed genes from transcriptome data).
+ Q-value
+
+
+
+
+
+
+
+
+
+
+ 1.24
+ A profile HMM is a variant of a Hidden Markov model that is derived specifically from a set of (aligned) biological sequences. Profile HMMs provide the basis for a position-specific scoring system, which can be used to align sequences and search databases for related sequences.
+
+
+ Profile HMM
+
+
+
+
+
+
+
+
+
+
+
+ 1.24
+
+ WP[0-9]+
+ Identifier of a pathway from the WikiPathways pathway database.
+ WikiPathways ID
+ WikiPathways pathway ID
+
+
+
+ Pathway ID (WikiPathways)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.24
+ A ranked list of pathways, each associated with z-score, p-value or similar, concerning or derived from the analysis of e.g. a set of genes or proteins.
+ Pathway analysis results
+ Pathway enrichment report
+ Pathway over-representation report
+ Pathway report
+ Pathway term enrichment report
+
+
+ Pathway overrepresentation data
+
+
+
+
+
+
+
+
+ 1.26
+
+
+ \d{4}-\d{4}-\d{4}-\d{3}(\d|X)
+ Identifier of a researcher registered with the ORCID database. Used to identify author IDs.
+
+
+
+ ORCID Identifier
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+
+ Chemical structure specified in Simplified Molecular Input Line Entry System (SMILES) line notation.
+
+
+ SMILES
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Chemical structure specified in IUPAC International Chemical Identifier (InChI) line notation.
+
+
+ InChI
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Chemical structure specified by Molecular Formula (MF), including a count of each element in a compound.
+
+
+ The general MF query format consists of a series of valid atomic symbols, with an optional number or range.
+ mf
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The InChIKey (hashed InChI) is a fixed length (25 character) condensed digital representation of an InChI chemical structure specification. It uniquely identifies a chemical compound.
+
+
+ An InChIKey identifier is not human- nor machine-readable but is more suitable for web searches than an InChI chemical structure specification.
+ InChIKey
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ SMILES ARbitrary Target Specification (SMARTS) format for chemical structure specification, which is a subset of the SMILES line notation.
+
+
+ smarts
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for a molecular sequence with possible unknown positions but without ambiguity or non-sequence characters.
+
+
+ unambiguous pure
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for a nucleotide sequence with possible ambiguity, unknown positions and non-sequence characters.
+
+
+ Non-sequence characters may be used for example for gaps.
+ nucleotide
+ http://onto.eva.mpg.de/ontologies/gfo-bio.owl#Nucleotide_sequence
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for a protein sequence with possible ambiguity, unknown positions and non-sequence characters.
+
+
+ Non-sequence characters may be used for gaps and translation stop.
+ protein
+ http://onto.eva.mpg.de/ontologies/gfo-bio.owl#Amino_acid_sequence
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for the consensus of two or more molecular sequences.
+
+
+ consensus
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for a nucleotide sequence with possible ambiguity and unknown positions but without non-sequence characters.
+
+
+ pure nucleotide
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for a nucleotide sequence (characters ACGTU only) with possible unknown positions but without ambiguity or non-sequence characters .
+
+
+ unambiguous pure nucleotide
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for a DNA sequence with possible ambiguity, unknown positions and non-sequence characters.
+
+
+ dna
+ http://onto.eva.mpg.de/ontologies/gfo-bio.owl#DNA_sequence
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for an RNA sequence with possible ambiguity, unknown positions and non-sequence characters.
+
+
+ rna
+ http://onto.eva.mpg.de/ontologies/gfo-bio.owl#RNA_sequence
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for a DNA sequence (characters ACGT only) with possible unknown positions but without ambiguity or non-sequence characters.
+
+
+ unambiguous pure dna
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for a DNA sequence with possible ambiguity and unknown positions but without non-sequence characters.
+
+
+ pure dna
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for an RNA sequence (characters ACGU only) with possible unknown positions but without ambiguity or non-sequence characters.
+
+
+ unambiguous pure rna sequence
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for an RNA sequence with possible ambiguity and unknown positions but without non-sequence characters.
+
+
+ pure rna
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for any protein sequence with possible unknown positions but without ambiguity or non-sequence characters.
+
+
+ unambiguous pure protein
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for any protein sequence with possible ambiguity and unknown positions but without non-sequence characters.
+
+
+ pure protein
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of an entry from UniGene.
+
+ A UniGene entry includes a set of transcript sequences assigned to the same transcription locus (gene or expressed pseudogene), with information on protein similarities, gene expression, cDNA clone reagents, and genomic location.
+ UniGene entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of an entry from the COG database of clusters of (related) protein sequences.
+
+ COG sequence cluster format
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format for sequence positions (feature location) as used in DDBJ/EMBL/GenBank database.
+ Feature location
+
+
+ EMBL feature location
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Report format for tandem repeats in a nucleotide sequence (format generated by the Sanger Centre quicktandem program).
+
+
+ quicktandem
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Report format for inverted repeats in a nucleotide sequence (format generated by the Sanger Centre inverted program).
+
+
+ Sanger inverted repeats
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Report format for tandem repeats in a sequence (an EMBOSS report format).
+
+
+ EMBOSS repeat
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of a report on exon-intron structure generated by EMBOSS est2genome.
+
+
+ est2genome format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Report format for restriction enzyme recognition sites used by EMBOSS restrict program.
+
+
+ restrict format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Report format for restriction enzyme recognition sites used by EMBOSS restover program.
+
+
+ restover format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Report format for restriction enzyme recognition sites used by REBASE database.
+
+
+ REBASE restriction sites
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of results of a sequence database search using FASTA.
+
+
+ This includes (typically) score data, alignment data and a histogram (of observed and expected distribution of E values.)
+ FASTA search results format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of results of a sequence database search using some variant of BLAST.
+
+
+ This includes score data, alignment data and summary table.
+ BLAST results
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of results of a sequence database search using some variant of MSPCrunch.
+
+
+ mspcrunch
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of results of a sequence database search using some variant of Smith Waterman.
+
+
+ Smith-Waterman format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of EMBASSY domain hits file (DHF) of hits (sequences) with domain classification information.
+
+
+ The hits are relatives to a SCOP or CATH family and are found from a search of a sequence database.
+ dhf
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of EMBASSY ligand hits file (LHF) of database hits (sequences) with ligand classification information.
+
+
+ The hits are putative ligand-binding sequences and are found from a search of a sequence database.
+ lhf
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Results format for searches of the InterPro database.
+
+
+ InterPro hits format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of results of a search of the InterPro database showing matches of query protein sequence(s) to InterPro entries.
+
+
+ The report includes a classification of regions in a query protein sequence which are assigned to a known InterPro protein family or group.
+ InterPro protein view report format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of results of a search of the InterPro database showing matches between protein sequence(s) and signatures for an InterPro entry.
+
+
+ The table presents matches between query proteins (rows) and signature methods (columns) for this entry. Alternatively the sequence(s) might be from from the InterPro entry itself. The match position in the protein sequence and match status (true positive, false positive etc) are indicated.
+ InterPro match table format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Dirichlet distribution HMMER format.
+
+
+ HMMER Dirichlet prior
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Dirichlet distribution MEME format.
+
+
+ MEME Dirichlet prior
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of a report from the HMMER package on the emission and transition counts of a hidden Markov model.
+
+
+ HMMER emission and transition
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of a regular expression pattern from the Prosite database.
+
+
+ prosite-pattern
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of an EMBOSS sequence pattern.
+
+
+ EMBOSS sequence pattern
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A motif in the format generated by the MEME program.
+
+
+ meme-motif
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Sequence profile (sequence classifier) format used in the PROSITE database.
+
+
+ prosite-profile
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A profile (sequence classifier) in the format used in the JASPAR database.
+
+
+ JASPAR format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of the model of random sequences used by MEME.
+
+
+ MEME background Markov model
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of a hidden Markov model representation used by the HMMER package.
+
+
+ HMMER format
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ FASTA-style format for multiple sequences aligned by HMMER package to an HMM.
+
+
+ HMMER-aln
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of multiple sequences aligned by DIALIGN package.
+
+
+ DIALIGN format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ EMBASSY 'domain alignment file' (DAF) format, containing a sequence alignment of protein domains belonging to the same SCOP or CATH family.
+
+
+ The format is clustal-like and includes annotation of domain family classification information.
+ daf
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format for alignment of molecular sequences to MEME profiles (position-dependent scoring matrices) as generated by the MAST tool from the MEME package.
+
+
+ Sequence-MEME profile alignment
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format used by the HMMER package for an alignment of a sequence against a hidden Markov model database.
+
+
+ HMMER profile alignment (sequences versus HMMs)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format used by the HMMER package for of an alignment of a hidden Markov model against a sequence database.
+
+
+ HMMER profile alignment (HMM versus sequences)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of PHYLIP phylogenetic distance matrix data.
+
+
+ Data Type must include the distance matrix, probably as pairs of sequence identifiers with a distance (integer or float).
+ Phylip distance matrix
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Dendrogram (tree file) format generated by ClustalW.
+
+
+ ClustalW dendrogram
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Raw data file format used by Phylip from which a phylogenetic tree is directly generated or plotted.
+
+
+ Phylip tree raw
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PHYLIP file format for continuous quantitative character data.
+
+
+ Phylip continuous quantitative characters
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of phylogenetic property data.
+
+ Phylogenetic property values format
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PHYLIP file format for phylogenetics character frequency data.
+
+
+ Phylip character frequencies format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of PHYLIP discrete states data.
+
+
+ Phylip discrete states format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of PHYLIP cliques data.
+
+
+ Phylip cliques format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Phylogenetic tree data format used by the PHYLIP program.
+
+
+ Phylip tree format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The format of an entry from the TreeBASE database of phylogenetic data.
+
+
+ TreeBASE format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The format of an entry from the TreeFam database of phylogenetic data.
+
+
+ TreeFam format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format for distances, such as Branch Score distance, between two or more phylogenetic trees as used by the Phylip package.
+
+
+ Phylip tree distance format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of an entry from the DSSP database (Dictionary of Secondary Structure in Proteins).
+
+
+ The DSSP database is built using the DSSP application which defines secondary structure, geometrical features and solvent exposure of proteins, given atomic coordinates in PDB format.
+ dssp
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Entry format of the HSSP database (Homology-derived Secondary Structure in Proteins).
+
+
+ hssp
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of RNA secondary structure in dot-bracket notation, originally generated by the Vienna RNA package/server.
+ Vienna RNA format
+ Vienna RNA secondary structure format
+
+
+ Dot-bracket format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of local RNA secondary structure components with free energy values, generated by the Vienna RNA package/server.
+
+
+ Vienna local RNA secondary structure format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of an entry (or part of an entry) from the PDB database.
+ PDB entry format
+
+
+ PDB database entry format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Entry format of PDB database in PDB format.
+ PDB format
+
+
+ PDB
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Entry format of PDB database in mmCIF format.
+
+
+ mmCIF
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Entry format of PDB database in PDBML (XML) format.
+
+
+ PDBML
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+ Format of a matrix of 3D-1D scores used by the EMBOSS Domainatrix applications.
+
+
+ Domainatrix 3D-1D scoring matrix format
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Amino acid index format used by the AAindex database.
+
+
+ aaindex
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of an entry from IntEnz (The Integrated Relational Enzyme Database).
+
+ IntEnz is the master copy of the Enzyme Nomenclature, the recommendations of the NC-IUBMB on the Nomenclature and Classification of Enzyme-Catalysed Reactions.
+ IntEnz enzyme report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of an entry from the BRENDA enzyme database.
+
+ BRENDA enzyme report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of an entry from the KEGG REACTION database of biochemical reactions.
+
+ KEGG REACTION enzyme report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of an entry from the KEGG ENZYME database.
+
+ KEGG ENZYME enzyme report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of an entry from the proto section of the REBASE enzyme database.
+
+ REBASE proto enzyme report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of an entry from the withrefm section of the REBASE enzyme database.
+
+ REBASE withrefm enzyme report format
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of output of the Pcons Model Quality Assessment Program (MQAP).
+
+
+ Pcons ranks protein models by assessing their quality based on the occurrence of recurring common three-dimensional structural patterns. Pcons returns a score reflecting the overall global quality and a score for each individual residue in the protein reflecting the local residue quality.
+ Pcons report format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of output of the ProQ protein model quality predictor.
+
+
+ ProQ is a neural network-based predictor that predicts the quality of a protein model based on the number of structural features.
+ ProQ report format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of SMART domain assignment data.
+
+ The SMART output file includes data on genetically mobile domains / analysis of domain architectures, including phyletic distributions, functional class, tertiary structures and functionally important residues.
+ SMART domain assignment report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format for the BIND database of protein interaction.
+
+ BIND entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format for the IntAct database of protein interaction.
+
+ IntAct entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format for the InterPro database of protein signatures (sequence classifiers) and classified sequences.
+
+ This includes signature metadata, sequence references and a reference to the signature itself. There is normally a header (entry accession numbers and name), abstract, taxonomy information, example proteins etc. Each entry also includes a match list which give a number of different views of the signature matches for the sequences in each InterPro entry.
+ InterPro entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format for the textual abstract of signatures in an InterPro entry and its protein matches.
+
+ References are included and a functional inference is made where possible.
+ InterPro entry abstract format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format for the Gene3D protein secondary database.
+
+ Gene3D entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format for the PIRSF protein secondary database.
+
+ PIRSF entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format for the PRINTS protein secondary database.
+
+ PRINTS entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format for the Panther library of protein families and subfamilies.
+
+ Panther Families and HMMs entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format for the Pfam protein secondary database.
+
+ Pfam entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format for the SMART protein secondary database.
+
+ SMART entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format for the Superfamily protein secondary database.
+
+ Superfamily entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format for the TIGRFam protein secondary database.
+
+ TIGRFam entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format for the ProDom protein domain classification database.
+
+ ProDom entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format for the FSSP database.
+
+ FSSP entry format
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A report format for the kinetics of enzyme-catalysed reaction(s) in a format generated by EMBOSS findkm. This includes Michaelis Menten plot, Hanes Woolf plot, Michaelis Menten constant (Km) and maximum velocity (Vmax).
+
+
+ findkm
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format of Ensembl genome database.
+
+ Ensembl gene report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format of DictyBase genome database.
+
+ DictyBase gene report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format of Candida Genome database.
+
+ CGD gene report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format of DragonDB genome database.
+
+ DragonDB gene report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format of EcoCyc genome database.
+
+ EcoCyc gene report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format of FlyBase genome database.
+
+ FlyBase gene report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format of Gramene genome database.
+
+ Gramene gene report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format of KEGG GENES genome database.
+
+ KEGG GENES gene report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format of the Maize genetics and genomics database (MaizeGDB).
+
+ MaizeGDB gene report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format of the Mouse Genome Database (MGD).
+
+ MGD gene report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format of the Rat Genome Database (RGD).
+
+ RGD gene report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format of the Saccharomyces Genome Database (SGD).
+
+ SGD gene report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format of the Sanger GeneDB genome database.
+
+ GeneDB gene report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format of The Arabidopsis Information Resource (TAIR) genome database.
+
+ TAIR gene report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format of the WormBase genomes database.
+
+ WormBase gene report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format of the Zebrafish Information Network (ZFIN) genome database.
+
+ ZFIN gene report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format of the TIGR genome database.
+
+ TIGR gene report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format for the dbSNP database.
+
+ dbSNP polymorphism report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of an entry from the OMIM database of genotypes and phenotypes.
+
+ OMIM entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of a record from the HGVbase database of genotypes and phenotypes.
+
+ HGVbase entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of a record from the HIVDB database of genotypes and phenotypes.
+
+ HIVDB entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of an entry from the KEGG DISEASE database.
+
+ KEGG DISEASE entry format
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Report format on PCR primers and hybridisation oligos as generated by Whitehead primer3 program.
+
+
+ Primer3 primer
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A format of raw sequence read data from an Applied Biosystems sequencing machine.
+
+
+ ABI
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of MIRA sequence trace information file.
+
+
+ mira
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+ caf
+
+ Common Assembly Format (CAF). A sequence assembly format including contigs, base-call qualities, and other metadata.
+
+
+ CAF
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+ Sequence assembly project file EXP format.
+ Affymetrix EXP format
+
+
+ EXP
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+ Staden Chromatogram Files format (SCF) of base-called sequence reads, qualities, and other metadata.
+
+
+ SCF
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+ PHD sequence trace format to store serialised chromatogram data (reads).
+
+
+ PHD
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of Affymetrix data file of raw image data.
+ Affymetrix image data file format
+
+
+ dat
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of Affymetrix data file of information about (raw) expression levels of the individual probes.
+ Affymetrix probe raw data format
+
+
+ cel
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of affymetrix gene cluster files (hc-genes.txt, hc-chips.txt) from hierarchical clustering.
+
+
+ affymetrix
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format for the ArrayExpress microarrays database.
+
+ ArrayExpress entry format
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Affymetrix data file format for information about experimental conditions and protocols.
+ Affymetrix experimental conditions data file format
+
+
+ affymetrix-exp
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+ chp
+ Format of Affymetrix data file of information about (normalised) expression levels of the individual probes.
+ Affymetrix probe normalised data format
+
+
+ CHP
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of an entry from the Electron Microscopy DataBase (EMDB).
+
+ EMDB entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The format of an entry from the KEGG PATHWAY database of pathway maps for molecular interactions and reaction networks.
+
+ KEGG PATHWAY entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The format of an entry from the MetaCyc metabolic pathways database.
+
+ MetaCyc entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The format of a report from the HumanCyc metabolic pathways database.
+
+ HumanCyc entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The format of an entry from the INOH signal transduction pathways database.
+
+ INOH entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The format of an entry from the PATIKA biological pathways database.
+
+ PATIKA entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The format of an entry from the reactome biological pathways database.
+
+ Reactome entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The format of an entry from the aMAZE biological pathways and molecular interactions database.
+
+ aMAZE entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The format of an entry from the CPDB database.
+
+ CPDB entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The format of an entry from the Panther Pathways database.
+
+ Panther Pathways entry format
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of Taverna workflows.
+
+
+ Taverna workflow format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of mathematical models from the BioModel database.
+
+ Models are annotated and linked to relevant data resources, such as publications, databases of compounds and pathways, controlled vocabularies, etc.
+ BioModel mathematical model format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The format of an entry from the KEGG LIGAND chemical database.
+
+ KEGG LIGAND entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The format of an entry from the KEGG COMPOUND database.
+
+ KEGG COMPOUND entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The format of an entry from the KEGG PLANT database.
+
+ KEGG PLANT entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The format of an entry from the KEGG GLYCAN database.
+
+ KEGG GLYCAN entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The format of an entry from PubChem.
+
+ PubChem entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The format of an entry from a database of chemical structures and property predictions.
+
+ ChemSpider entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The format of an entry from Chemical Entities of Biological Interest (ChEBI).
+
+ ChEBI includes an ontological classification defining relations between entities or classes of entities.
+ ChEBI entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The format of an entry from the MSDchem ligand dictionary.
+
+ MSDchem ligand dictionary entry format
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The format of an entry from the HET group dictionary (HET groups from PDB files).
+
+
+ HET group dictionary entry format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The format of an entry from the KEGG DRUG database.
+
+ KEGG DRUG entry format
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of bibliographic reference as used by the PubMed database.
+
+
+ PubMed citation
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format for abstracts of scientific articles from the Medline database.
+
+
+ Bibliographic reference information including citation information is included
+ Medline Display Format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ CiteXplore 'core' citation format including title, journal, authors and abstract.
+
+
+ CiteXplore-core
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ CiteXplore 'all' citation format includes all known details such as Mesh terms and cross-references.
+
+
+ CiteXplore-all
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Article format of the PubMed Central database.
+
+
+ pmc
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The format of iHOP (Information Hyperlinked over Proteins) text-mining result.
+
+
+ iHOP format
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ OSCAR format of annotated chemical text.
+
+
+ OSCAR (Open-Source Chemistry Analysis Routines) software performs chemistry-specific parsing of chemical documents. It attempts to identify chemical names, ontology concepts, and chemical data from a document.
+ OSCAR format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Format of an ATOM record (describing data for an individual atom) from a PDB file.
+
+ PDB atom record format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of CATH domain classification information for a polypeptide chain.
+
+ The report (for example http://www.cathdb.info/chain/1cukA) includes chain identifiers, domain identifiers and CATH codes for domains in a given protein chain.
+ CATH chain report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of CATH domain classification information for a protein PDB file.
+
+ The report (for example http://www.cathdb.info/pdb/1cuk) includes chain identifiers, domain identifiers and CATH codes for domains in a given PDB file.
+ CATH PDB report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry (gene) format of the NCBI database.
+
+ NCBI gene report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Moby:GI_Gene
+ Report format for biological functions associated with a gene name and its alternative names (synonyms, homonyms), as generated by the GeneIlluminator service.
+
+ This includes a gene name and abbreviation of the name which may be in a name space indicating the gene status and relevant organisation.
+ GeneIlluminator gene report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Moby:BacMapGeneCard
+ Format of a report on the DNA and protein sequences for a given gene label from a bacterial chromosome maps from the BacMap database.
+
+ BacMap gene card format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of a report on Escherichia coli genes, proteins and molecules from the CyberCell Database (CCDB).
+
+ ColiCard report format
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Map of a plasmid (circular DNA) in PlasMapper TextMap format.
+
+
+ PlasMapper TextMap
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Phylogenetic tree Newick (text) format.
+ nh
+
+
+ newick
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Phylogenetic tree TreeCon (text) format.
+
+
+ TreeCon format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Phylogenetic tree Nexus (text) format.
+
+
+ Nexus format
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ A defined way or layout of representing and structuring data in a computer file, blob, string, message, or elsewhere.
+ Data format
+ Data model
+ Exchange format
+ File format
+
+
+ The main focus in EDAM lies on formats as means of structuring data exchanged between different tools or resources. The serialisation, compression, or encoding of concrete data formats/models is not in scope of EDAM. Format 'is format of' Data.
+ Format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Data model
+ A defined data format has its implicit or explicit data model, and EDAM does not distinguish the two. Some data models, however, do not have any standard way of serialisation into an exchange format, and those are thus not considered formats in EDAM. (Remark: even broader - or closely related - term to 'Data model' would be an 'Information model'.)
+
+
+
+
+ File format
+ File format denotes only formats of a computer file, but the same formats apply also to data blobs or exchanged messages.
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Data format for an individual atom.
+
+ Atomic data format
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Data format for a molecular sequence record.
+
+
+ Sequence record format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Data format for molecular sequence feature information.
+
+
+ Sequence feature annotation format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Data format for molecular sequence alignment information.
+
+
+ Alignment format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ ACEDB sequence format.
+
+
+ acedb
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Clustalw output format.
+
+ clustal sequence format
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Codata entry format.
+
+
+ codata
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Fasta format variant with database name before ID.
+
+
+ dbid
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ EMBL entry format.
+ EMBL
+ EMBL sequence format
+
+
+ EMBL format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Staden experiment file format.
+
+
+ Staden experiment format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ FASTA format including NCBI-style IDs.
+ FASTA format
+ FASTA sequence format
+
+
+ FASTA
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ fastq
+ fq
+ FASTQ short read format ignoring quality scores.
+ FASTAQ
+ fq
+
+
+ FASTQ
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ FASTQ Illumina 1.3 short read format.
+
+
+ FASTQ-illumina
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ FASTQ short read format with phred quality.
+
+
+ FASTQ-sanger
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ FASTQ Solexa/Illumina 1.0 short read format.
+
+
+ FASTQ-solexa
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Fitch program format.
+
+
+ fitch program
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ GCG sequence file format.
+ GCG SSF
+
+
+ GCG SSF (single sequence file) file format.
+ GCG
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Genbank entry format.
+ GenBank
+
+
+ GenBank format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Genpept protein entry format.
+
+
+ Currently identical to refseqp format
+ genpept
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ GFF feature file format with sequence in the header.
+
+
+ GFF2-seq
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ GFF3 feature file format with sequence.
+
+
+ GFF3-seq
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ FASTA sequence format including NCBI-style GIs.
+
+
+ giFASTA format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Hennig86 output sequence format.
+
+
+ hennig86
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Intelligenetics sequence format.
+
+
+ ig
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Intelligenetics sequence format (strict version).
+
+
+ igstrict
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Jackknifer interleaved and non-interleaved sequence format.
+
+
+ jackknifer
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Mase program sequence format.
+
+
+ mase format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Mega interleaved and non-interleaved sequence format.
+
+
+ mega-seq
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ GCG MSF (multiple sequence file) file format.
+
+
+ GCG MSF
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ pir
+ NBRF/PIR entry sequence format.
+ nbrf
+ pir
+
+
+ nbrf/pir
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Nexus/paup interleaved sequence format.
+
+
+ nexus-seq
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PDB sequence format (ATOM lines).
+
+
+ pdb format in EMBOSS.
+ pdbatom
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PDB nucleotide sequence format (ATOM lines).
+
+
+ pdbnuc format in EMBOSS.
+ pdbatomnuc
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PDB nucleotide sequence format (SEQRES lines).
+
+
+ pdbnucseq format in EMBOSS.
+ pdbseqresnuc
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PDB sequence format (SEQRES lines).
+
+
+ pdbseq format in EMBOSS.
+ pdbseqres
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Plain old FASTA sequence format (unspecified format for IDs).
+
+
+ Pearson format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Phylip interleaved sequence format.
+
+ phylip sequence format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ PHYLIP non-interleaved sequence format.
+
+ phylipnon sequence format
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Raw sequence format with no non-sequence characters.
+
+
+ raw
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Refseq protein entry sequence format.
+
+
+ Currently identical to genpept format
+ refseqp
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Selex sequence format.
+
+ selex sequence format
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+
+
+ Staden suite sequence format.
+
+
+ Staden format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+ Stockholm multiple sequence alignment format (used by Pfam and Rfam).
+
+
+ Stockholm format
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ DNA strider output sequence format.
+
+
+ strider format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ UniProtKB entry sequence format.
+ SwissProt format
+ UniProt format
+
+
+ UniProtKB format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ txt
+ Plain text sequence format (essentially unformatted).
+
+
+ plain text format (unformatted)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Treecon output sequence format.
+
+ treecon sequence format
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ NCBI ASN.1-based sequence format.
+
+
+ ASN.1 sequence format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ DAS sequence (XML) format (any type).
+ das sequence format
+
+
+ DAS format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ DAS sequence (XML) format (nucleotide-only).
+
+
+ The use of this format is deprecated.
+ dasdna
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ EMBOSS debugging trace sequence format of full internal data content.
+
+
+ debug-seq
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Jackknifer output sequence non-interleaved format.
+
+
+ jackknifernon
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Mega non-interleaved output sequence format.
+
+ meganon sequence format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ NCBI FASTA sequence format with NCBI-style IDs.
+
+
+ There are several variants of this.
+ NCBI format
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Nexus/paup non-interleaved sequence format.
+
+
+ nexusnon
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+ General Feature Format (GFF) of sequence features.
+
+
+ GFF2
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+
+ Generic Feature Format version 3 (GFF3) of sequence features.
+
+
+ GFF3
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.7
+
+ PIR feature format.
+
+
+ pir
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Swiss-Prot feature format.
+
+ swiss feature
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ DAS GFF (XML) feature format.
+ DASGFF feature
+ das feature
+
+
+ DASGFF
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ EMBOSS debugging trace feature format of full internal data content.
+
+
+ debug-feat
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ EMBL feature format.
+
+ EMBL feature
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Genbank feature format.
+
+ GenBank feature
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ ClustalW format for (aligned) sequences.
+ clustal
+
+
+ ClustalW format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ EMBOSS alignment format for debugging trace of full internal data content.
+
+
+ debug
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Fasta format for (aligned) sequences.
+
+
+ FASTA-aln
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Pearson MARKX0 alignment format.
+
+
+ markx0
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Pearson MARKX1 alignment format.
+
+
+ markx1
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Pearson MARKX10 alignment format.
+
+
+ markx10
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Pearson MARKX2 alignment format.
+
+
+ markx2
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Pearson MARKX3 alignment format.
+
+
+ markx3
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alignment format for start and end of matches between sequence pairs.
+
+
+ match
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Mega format for (typically aligned) sequences.
+
+
+ mega
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Mega non-interleaved format for (typically aligned) sequences.
+
+
+ meganon
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ MSF format for (aligned) sequences.
+
+ msf alignment format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Nexus/paup format for (aligned) sequences.
+
+ nexus alignment format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Nexus/paup non-interleaved format for (aligned) sequences.
+
+ nexusnon alignment format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ EMBOSS simple sequence pairwise alignment format.
+
+
+ pair
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ http://www.bioperl.org/wiki/PHYLIP_multiple_alignment_format
+ Phylip format for (aligned) sequences.
+ PHYLIP
+ PHYLIP interleaved format
+ ph
+ phy
+
+
+ PHYLIP format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ http://www.bioperl.org/wiki/PHYLIP_multiple_alignment_format
+ Phylip non-interleaved format for (aligned) sequences.
+ PHYLIP sequential format
+ phylipnon
+
+
+ PHYLIP sequential
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alignment format for score values for pairs of sequences.
+
+
+ scores format
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ SELEX format for (aligned) sequences.
+
+
+ selex
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ EMBOSS simple multiple alignment format.
+
+
+ EMBOSS simple format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Simple multiple sequence (alignment) format for SRS.
+
+
+ srs format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Simple sequence pair (alignment) format for SRS.
+
+
+ srspair
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ T-Coffee program alignment format.
+
+
+ T-Coffee format
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Treecon format for (aligned) sequences.
+
+
+ TreeCon-seq
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Data format for a phylogenetic tree.
+
+
+ Phylogenetic tree format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Data format for a biological pathway or network.
+
+
+ Biological pathway or network format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Data format for a sequence-profile alignment.
+
+
+ Sequence-profile alignment format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Data format for a sequence-HMM profile alignment.
+
+ Sequence-profile alignment (HMM) format
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data format for an amino acid index.
+
+
+ Amino acid index format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Data format for a full-text scientific article.
+ Literature format
+
+
+ Article format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Data format of a report from text mining.
+
+
+ Text mining report format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Data format for reports on enzyme kinetics.
+
+
+ Enzyme kinetics report format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format of a report on a chemical compound.
+ Chemical compound annotation format
+ Chemical structure format
+ Small molecule report format
+ Small molecule structure format
+
+
+ Chemical data format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format of a report on a particular locus, gene, gene system or groups of genes.
+ Gene features format
+
+
+ Gene annotation format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format of a workflow.
+ Programming language
+ Script format
+
+
+ Workflow format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Data format for a molecular tertiary structure.
+
+
+ Tertiary structure format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.2
+
+
+ Data format for a biological model.
+
+ Biological model format
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Text format of a chemical formula.
+
+
+ Chemical formula format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format of raw (unplotted) phylogenetic data.
+
+
+ Phylogenetic character data format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of phylogenetic continuous quantitative character data.
+
+
+ Phylogenetic continuous quantitative character format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of phylogenetic discrete states data.
+
+
+ Phylogenetic discrete states format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of phylogenetic cliques data.
+
+
+ Phylogenetic tree report (cliques) format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of phylogenetic invariants data.
+
+
+ Phylogenetic tree report (invariants) format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Annotation format for electron microscopy models.
+
+ Electron microscopy model format
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format for phylogenetic tree distance data.
+
+
+ Phylogenetic tree report (tree distances) format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.0
+
+
+ Format for sequence polymorphism data.
+
+ Polymorphism report format
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format for reports on a protein family.
+
+
+ Protein family report format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format for molecular interaction data.
+ Molecular interaction format
+
+
+ Protein interaction format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format for sequence assembly data.
+
+
+ Sequence assembly format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format for information about a microarray experimental per se (not the data generated from that experiment).
+
+
+ Microarray experiment data format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format for sequence trace data (i.e. including base call information).
+
+
+ Sequence trace format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format of a file of gene expression data, e.g. a gene expression matrix or profile.
+ Gene expression data format
+
+
+ Gene expression report format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of a report on genotype / phenotype information.
+
+ Genotype and phenotype annotation format
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format of a map of (typically one) molecular sequence annotated with features.
+
+
+ Map format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format of a report on PCR primers or hybridisation oligos in a nucleic acid sequence.
+
+
+ Nucleic acid features (primers) format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format of a report of general information about a specific protein.
+
+
+ Protein report format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of a report of general information about a specific enzyme.
+
+ Protein report (enzyme) format
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of a matrix of 3D-1D scores (amino acid environment probabilities).
+
+
+ 3D-1D scoring matrix format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format of a report on the quality of a protein three-dimensional model.
+
+
+ Protein structure report (quality evaluation) format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format of a report on sequence hits and associated data from searching a sequence database.
+
+
+ Database hits (sequence) format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format of a matrix of genetic distances between molecular sequences.
+
+
+ Sequence distance matrix format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format of a sequence motif.
+
+
+ Sequence motif format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format of a sequence profile.
+
+
+ Sequence profile format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of a hidden Markov model.
+
+
+ Hidden Markov model format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Data format of a dirichlet distribution.
+
+
+ Dirichlet distribution format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Data format for the emission and transition counts of a hidden Markov model.
+
+
+ HMM emission and transition counts format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format for secondary structure (predicted or real) of an RNA molecule.
+
+
+ RNA secondary structure format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format for secondary structure (predicted or real) of a protein molecule.
+
+
+ Protein secondary structure format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format used to specify range(s) of sequence positions.
+
+
+ Sequence range format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for molecular sequence with possible unknown positions but without non-sequence characters.
+
+
+ pure
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for a molecular sequence with possible unknown positions but possibly with non-sequence characters.
+
+
+ unpure
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for a molecular sequence with possible unknown positions but without ambiguity characters.
+
+
+ unambiguous sequence
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for a molecular sequence with possible unknown positions and possible ambiguity characters.
+
+
+ ambiguous
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format used for map of repeats in molecular (typically nucleotide) sequences.
+
+
+ Sequence features (repeats) format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format used for report on restriction enzyme recognition sites in nucleotide sequences.
+
+
+ Nucleic acid features (restriction sites) format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.10
+
+ Format used for report on coding regions in nucleotide sequences.
+
+
+ Gene features (coding region) format
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format used for clusters of molecular sequences.
+
+
+ Sequence cluster format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format used for clusters of protein sequences.
+
+
+ Sequence cluster format (protein)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format used for clusters of nucleotide sequences.
+
+
+ Sequence cluster format (nucleic acid)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Format used for clusters of genes.
+
+ Gene cluster format
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A text format resembling EMBL entry format.
+
+
+ This concept may be used for the many non-standard EMBL-like text formats.
+ EMBL-like (text)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A text format resembling FASTQ short read format.
+
+
+ This concept may be used for non-standard FASTQ short read-like formats.
+ FASTQ-like format (text)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+ true
+ XML format for EMBL entries.
+
+
+ EMBLXML
+ https://fairsharing.org/bsg-s001452/
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+ true
+ Specific XML format for EMBL entries (only uses certain sections).
+
+
+ cdsxml
+ https://fairsharing.org/bsg-s001452/
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+ INSDSeq provides the elements of a sequence as presented in the GenBank/EMBL/DDBJ-style flatfile formats, with a small amount of additional structure.
+ INSD XML
+ INSDC XML
+
+
+ INSDSeq
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Geneseq sequence format.
+
+
+ geneseq
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A text sequence format resembling uniprotkb entry format.
+
+
+ UniProt-like (text)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ UniProt entry sequence format.
+
+
+ UniProt format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+
+ ipi sequence format.
+
+ ipi
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Abstract format used by MedLine database.
+
+
+ medline
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format used for ontologies.
+
+
+ Ontology format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A serialisation format conforming to the Open Biomedical Ontologies (OBO) model.
+
+
+ OBO format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A text format resembling FASTA format.
+
+
+ This concept may also be used for the many non-standard FASTA-like formats.
+ FASTA-like (text)
+ http://filext.com/file-extension/FASTA
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Data format for a molecular sequence record, typically corresponding to a full entry from a molecular sequence database.
+
+
+ Sequence record full format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Data format for a molecular sequence record 'lite', typically molecular sequence and minimal metadata, such as an identifier of the sequence and/or a comment.
+
+
+ Sequence record lite format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An XML format for EMBL entries.
+
+
+ This is a placeholder for other more specific concepts. It should not normally be used for annotation.
+ EMBL format (XML)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A text format resembling GenBank entry (plain text) format.
+
+
+ This concept may be used for the non-standard GenBank-like text formats.
+ GenBank-like format (text)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Text format for a sequence feature table.
+
+
+ Sequence feature table format (text)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.0
+
+
+ Format of a report on organism strain data / cell line.
+
+ Strain data format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format for a report of strain data as used for CIP database entries.
+
+ CIP strain data format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ PHYLIP file format for phylogenetic property data.
+
+ phylip property values
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format (HTML) for the STRING database of protein interaction.
+
+ STRING entry format (HTML)
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Entry format (XML) for the STRING database of protein interaction.
+
+
+ STRING entry format (XML)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ GFF feature format (of indeterminate version).
+
+
+ GFF
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+
+ Gene Transfer Format (GTF), a restricted version of GFF.
+
+
+ GTF
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ FASTA format wrapped in HTML elements.
+
+
+ FASTA-HTML
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ EMBL entry format wrapped in HTML elements.
+
+
+ EMBL-HTML
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of an entry from the BioCyc enzyme database.
+
+ BioCyc enzyme report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of an entry from the Enzyme nomenclature database (ENZYME).
+
+ ENZYME enzyme report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of a report on a gene from the PseudoCAP database.
+
+ PseudoCAP gene report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of a report on a gene from the GeneCards database.
+
+ GeneCards gene report format
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Textual format.
+ Plain text format
+ txt
+
+
+ Data in text format can be compressed into binary format, or can be a value of an XML element or attribute. Markup formats are not considered textual (or more precisely, not plain-textual).
+ Textual format
+ http://filext.com/file-extension/TXT
+ http://www.iana.org/assignments/media-types/media-types.xhtml#text
+ http://www.iana.org/assignments/media-types/text/plain
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ HTML format.
+ Hypertext Markup Language
+
+
+ HTML
+ http://filext.com/file-extension/HTML
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ xml
+
+
+
+ eXtensible Markup Language (XML) format.
+ eXtensible Markup Language
+
+
+ Data in XML format can be serialised into text, or binary format.
+ XML
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Binary format.
+
+
+ Only specific native binary formats are listed under 'Binary format' in EDAM. Generic binary formats - such as any data being zipped, or any XML data being serialised into the Efficient XML Interchange (EXI) format - are not modelled in EDAM. Refer to http://wsio.org/compression_004.
+ Binary format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Typical textual representation of a URI.
+
+ URI format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The format of an entry from the NCI-Nature pathways database.
+
+ NCI-Nature pathway entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ A placeholder concept for visual navigation by dividing data formats by the content of the data that is represented.
+ Format (typed)
+
+
+ This concept exists only to assist EDAM maintenance and navigation in graphical browsers. It does not add semantic information. The concept branch under 'Format (typed)' provides an alternative organisation of the concepts nested under the other top-level branches ('Binary', 'HTML', 'RDF', 'Text' and 'XML'. All concepts under here are already included under those branches.
+ Format (by type of data)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+
+
+
+
+ Any ontology allowed, none mandatory. Preferably with URIs but URIs are not mandatory. Non-ontology terms are also allowed as the last resort in case of a lack of suitable ontology.
+
+
+
+ BioXSD-schema-based XML format of sequence-based data and some other common data - sequence records, alignments, feature records, references to resources, and more - optimised for integrative bioinformatics, Web services, and object-oriented programming.
+ BioJSON
+ BioXSD
+ BioXSD XML
+ BioXSD XML format
+ BioXSD data model
+ BioXSD format
+ BioXSD in XML
+ BioXSD in XML format
+ BioXSD+XML
+ BioXSD/GTrack
+ BioXSD|GTrack
+ BioYAML
+
+
+ 'BioXSD' belongs to the 'BioXSD|GTrack' ecosystem of generic formats. 'BioXSD in XML' is the XML format based on the common, unified 'BioXSD data model', a.k.a. 'BioXSD|BioJSON|BioYAML'.
+ BioXSD (XML)
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A serialisation format conforming to the Resource Description Framework (RDF) model.
+ Resource Description Framework format
+ RDF
+ Resource Description Framework
+
+
+ RDF format
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Genbank entry format wrapped in HTML elements.
+
+
+ GenBank-HTML
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of a report on protein features (domain composition).
+
+ Protein features (domains) format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A format resembling EMBL entry (plain text) format.
+
+
+ This concept may be used for the many non-standard EMBL-like formats.
+ EMBL-like format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A format resembling FASTQ short read format.
+
+
+ This concept may be used for non-standard FASTQ short read-like formats.
+ FASTQ-like format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A format resembling FASTA format.
+
+
+ This concept may be used for the many non-standard FASTA-like formats.
+ FASTA-like
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A sequence format resembling uniprotkb entry format.
+
+
+ uniprotkb-like format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format for a sequence feature table.
+
+
+ Sequence feature table format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ OBO ontology text format.
+
+
+ OBO
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ OBO ontology XML format.
+
+
+ OBO-XML
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data format for a molecular sequence record (text).
+
+
+ Sequence record format (text)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data format for a molecular sequence record (XML).
+
+
+ Sequence record format (XML)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ XML format for a sequence feature table.
+
+
+ Sequence feature table format (XML)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Text format for molecular sequence alignment information.
+
+
+ Alignment format (text)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ XML format for molecular sequence alignment information.
+
+
+ Alignment format (XML)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Text format for a phylogenetic tree.
+
+
+ Phylogenetic tree format (text)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ XML format for a phylogenetic tree.
+
+
+ Phylogenetic tree format (XML)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An XML format resembling EMBL entry format.
+
+
+ This concept may be used for the any non-standard EMBL-like XML formats.
+ EMBL-like (XML)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A format resembling GenBank entry (plain text) format.
+
+
+ This concept may be used for the non-standard GenBank-like formats.
+ GenBank-like format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format for the STRING database of protein interaction.
+
+ STRING entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Text format for sequence assembly data.
+
+
+ Sequence assembly format (text)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Text format (representation) of amino acid residues.
+
+ Amino acid identifier format
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for a molecular sequence without any unknown positions or ambiguity characters.
+
+
+ completely unambiguous
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for a molecular sequence without unknown positions, ambiguity or non-sequence characters.
+
+
+ completely unambiguous pure
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for a nucleotide sequence (characters ACGTU only) without unknown positions, ambiguity or non-sequence characters .
+
+
+ completely unambiguous pure nucleotide
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for a DNA sequence (characters ACGT only) without unknown positions, ambiguity or non-sequence characters.
+
+
+ completely unambiguous pure dna
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for an RNA sequence (characters ACGU only) without unknown positions, ambiguity or non-sequence characters.
+
+
+ completely unambiguous pure rna sequence
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format of a raw molecular sequence (i.e. the alphabet used).
+
+
+ Raw sequence format
+ http://www.onto-med.de/ontologies/gfo.owl#Symbol_sequence
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+ BAM format, the binary, BGZF-formatted compressed version of SAM format for alignment of nucleotide sequences (e.g. sequencing reads) to (a) reference sequence(s). May contain base-call and alignment qualities and other data.
+
+
+ BAM
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+ Sequence Alignment/Map (SAM) format for alignment of nucleotide sequences (e.g. sequencing reads) to (a) reference sequence(s). May contain base-call and alignment qualities and other data.
+
+
+ The format supports short and long reads (up to 128Mbp) produced by different sequencing platforms and is used to hold mapped data within the GATK and across the Broad Institute, the Sanger Centre, and throughout the 1000 Genomes project.
+ SAM
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+ Systems Biology Markup Language (SBML), the standard XML format for models of biological processes such as for example metabolism, cell signaling, and gene regulation.
+
+
+ SBML
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for any protein sequence without unknown positions, ambiguity or non-sequence characters.
+
+
+ completely unambiguous pure protein
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format of a bibliographic reference.
+
+
+ Bibliographic reference format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of a sequence annotation track.
+
+
+ Sequence annotation track format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data format for molecular sequence alignment information that can hold sequence alignment(s) of only 2 sequences.
+
+
+ Alignment format (pair only)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format of sequence variation annotation.
+
+
+ Sequence variation annotation format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Some variant of Pearson MARKX alignment format.
+
+
+ markx0 variant
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Some variant of Mega format for (typically aligned) sequences.
+
+
+ mega variant
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Some variant of Phylip format for (aligned) sequences.
+
+
+ Phylip format variant
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ AB1 binary format of raw DNA sequence reads (output of Applied Biosystems' sequencing analysis software). Contains an electropherogram and the DNA base sequence.
+
+
+ AB1 uses the generic binary Applied Biosystems, Inc. Format (ABIF).
+ AB1
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+ ACE sequence assembly format including contigs, base-call qualities, and other metadata (version Aug 1998 and onwards).
+
+
+ ACE
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+ Browser Extensible Data (BED) format of sequence annotation track, typically to be displayed in a genome browser.
+
+
+ BED detail format includes 2 additional columns (http://genome.ucsc.edu/FAQ/FAQformat#format1.7) and BED 15 includes 3 additional columns for experiment scores (http://genomewiki.ucsc.edu/index.php/Microarray_track).
+ BED
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+ bigBed format for large sequence annotation tracks, similar to textual BED format.
+
+
+ bigBed
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+ wig
+
+ Wiggle format (WIG) of a sequence annotation track that consists of a value for each sequence position. Typically to be displayed in a genome browser.
+
+
+ WIG
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+ bigWig format for large sequence annotation tracks that consist of a value for each sequence position. Similar to textual WIG format.
+
+
+ bigWig
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+ PSL format of alignments, typically generated by BLAT or psLayout. Can be displayed in a genome browser like a sequence annotation track.
+
+
+ PSL
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+ Multiple Alignment Format (MAF) supporting alignments of whole genomes with rearrangements, directions, multiple pieces to the alignment, and so forth.
+
+
+ Typically generated by Multiz and TBA aligners; can be displayed in a genome browser like a sequence annotation track. This should not be confused with MIRA Assembly Format or Mutation Annotation Format.
+ MAF
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+
+ 2bit binary format of nucleotide sequences using 2 bits per nucleotide. In addition encodes unknown nucleotides and lower-case 'masking'.
+
+
+ 2bit
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+ .nib (nibble) binary format of a nucleotide sequence using 4 bits per nucleotide (including unknown) and its lower-case 'masking'.
+
+
+ .nib
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+ gp
+
+ genePred table format for gene prediction tracks.
+
+
+ genePred format has 3 main variations (http://genome.ucsc.edu/FAQ/FAQformat#format9 http://www.broadinstitute.org/software/igv/genePred). They reflect UCSC Browser DB tables.
+ genePred
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+ Personal Genome SNP (pgSnp) format for sequence variation tracks (indels and polymorphisms), supported by the UCSC Genome Browser.
+
+
+ pgSnp
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+ axt format of alignments, typically produced from BLASTZ.
+
+
+ axt
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+ lav
+
+ LAV format of alignments generated by BLASTZ and LASTZ.
+
+
+ LAV
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+ Pileup format of alignment of sequences (e.g. sequencing reads) to (a) reference sequence(s). Contains aligned bases per base of the reference sequence(s).
+
+
+ Pileup
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+ vcf
+ vcf.gz
+ Variant Call Format (VCF) is tabular format for storing genomic sequence variations.
+
+
+ 1000 Genomes Project has its own specification for encoding structural variations in VCF (https://www.internationalgenome.org/wiki/Analysis/Variant%20Call%20Format/VCF%20(Variant%20Call%20Format)%20version%204.0/encoding-structural-variants). This is based on VCF version 4.0 and not directly compatible with VCF version 4.3.
+ VCF
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+ Sequence Read Format (SRF) of sequence trace data. Supports submission to the NCBI Short Read Archive.
+
+
+ SRF
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+ ZTR format for storing chromatogram data from DNA sequencing instruments.
+
+
+ ZTR
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+ Genome Variation Format (GVF). A GFF3-compatible format with defined header and attribute tags for sequence variation.
+
+
+ GVF
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+ bcf
+ bcf.gz
+
+ BCF is the binary version of Variant Call Format (VCF) for sequence variation (indels, polymorphisms, structural variation).
+
+
+ BCF
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta13
+ true
+ Format of a matrix (array) of numerical values.
+
+
+ Matrix format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta13
+ true
+ Format of data concerning the classification of the sequences and/or structures of protein structural domain(s).
+
+
+ Protein domain classification format
+
+
+
+
+
+
+
+
+ beta13
+ Format of raw SCOP domain classification data files.
+
+
+ These are the parsable data files provided by SCOP.
+ Raw SCOP domain classification format
+
+
+
+
+
+
+
+
+ beta13
+ Format of raw CATH domain classification data files.
+
+
+ These are the parsable data files provided by CATH.
+ Raw CATH domain classification format
+
+
+
+
+
+
+
+
+ beta13
+ Format of summary of domain classification information for a CATH domain.
+
+
+ The report (for example http://www.cathdb.info/domain/1cukA01) includes CATH codes for levels in the hierarchy for the domain, level descriptions and relevant data and links.
+ CATH domain report format
+
+
+
+
+
+
+
+
+
+ 1.0
+
+
+ Systems Biology Result Markup Language (SBRML), the standard XML format for simulated or calculated results (e.g. trajectories) of systems biology models.
+
+
+ SBRML
+
+
+
+
+
+
+
+
+ 1.0
+
+
+ BioPAX is an exchange format for pathway data, with its data model defined in OWL.
+
+
+ BioPAX
+
+
+
+
+
+
+
+
+
+
+ 1.0
+
+
+ EBI Application Result XML is a format returned by sequence similarity search Web services at EBI.
+
+
+ EBI Application Result XML
+
+
+
+
+
+
+
+
+
+ 1.0
+
+
+ XML Molecular Interaction Format (MIF), standardised by HUPO PSI MI.
+ MIF
+
+
+ PSI MI XML (MIF)
+
+
+
+
+
+
+
+
+
+ 1.0
+
+
+ phyloXML is a standardised XML format for phylogenetic trees, networks, and associated data.
+
+
+ phyloXML
+
+
+
+
+
+
+
+
+
+ 1.0
+
+
+ NeXML is a standardised XML format for rich phyloinformatic data.
+
+
+ NeXML
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.0
+
+
+ MAGE-ML XML format for microarray expression data, standardised by MGED (now FGED).
+
+
+ MAGE-ML
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.0
+
+
+ MAGE-TAB textual format for microarray expression data, standardised by MGED (now FGED).
+
+
+ MAGE-TAB
+
+
+
+
+
+
+
+
+
+ 1.0
+
+
+ GCDML XML format for genome and metagenome metadata according to MIGS/MIMS/MIMARKS information standards, standardised by the Genomic Standards Consortium (GSC).
+
+
+ GCDML
+
+
+
+
+
+
+
+
+
+
+
+ 1.0
+
+
+
+
+
+
+
+
+
+ GTrack is a generic and optimised tabular format for genome or sequence feature tracks. GTrack unifies the power of other track formats (e.g. GFF3, BED, WIG), and while optimised in size, adds more flexibility, customisation, and automation ("machine understandability").
+ BioXSD/GTrack GTrack
+ BioXSD|GTrack GTrack
+ GTrack ecosystem of formats
+ GTrack format
+ GTrack|BTrack|GSuite GTrack
+ GTrack|GSuite|BTrack GTrack
+
+
+ 'GTrack' belongs to the 'BioXSD|GTrack' ecosystem of generic formats, and particular to its subset, the 'GTrack ecosystem' (GTrack, GSuite, BTrack). 'GTrack' is the tabular format for representing features of sequences and genomes.
+ GTrack
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.0
+ true
+ Data format for a report of information derived from a biological pathway or network.
+
+
+ Biological pathway or network report format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.0
+ true
+ Data format for annotation on a laboratory experiment.
+
+
+ Experiment annotation format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.2
+
+
+ Cytoband format for chromosome cytobands.
+
+
+ Reflects a UCSC Browser DB table.
+ Cytoband format
+
+
+
+
+
+
+
+
+
+
+ 1.2
+
+
+ CopasiML, the native format of COPASI.
+
+
+ CopasiML
+
+
+
+
+
+
+
+
+
+ 1.2
+
+
+
+
+ CellML, the format for mathematical models of biological and other networks.
+
+
+ CellML
+
+
+
+
+
+
+
+
+
+ 1.2
+
+
+
+
+
+ Tabular Molecular Interaction format (MITAB), standardised by HUPO PSI MI.
+
+
+ PSI MI TAB (MITAB)
+
+
+
+
+
+
+
+
+ 1.2
+
+
+ Protein affinity format (PSI-PAR), standardised by HUPO PSI MI. It is compatible with PSI MI XML (MIF) and uses the same XML Schema.
+
+
+ PSI-PAR
+
+
+
+
+
+
+
+
+
+ 1.2
+
+
+ mzML format for raw spectrometer output data, standardised by HUPO PSI MSS.
+
+
+ mzML is the successor and unifier of the mzData format developed by PSI and mzXML developed at the Seattle Proteome Center.
+ mzML
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.2
+ true
+ Format for mass pectra and derived data, include peptide sequences etc.
+
+
+ Mass spectrometry data format
+
+
+
+
+
+
+
+
+
+ 1.2
+
+
+ TraML (Transition Markup Language) is the format for mass spectrometry transitions, standardised by HUPO PSI MSS.
+
+
+ TraML
+
+
+
+
+
+
+
+
+
+ 1.2
+
+
+ mzIdentML is the exchange format for peptides and proteins identified from mass spectra, standardised by HUPO PSI PI. It can be used for outputs of proteomics search engines.
+
+
+ mzIdentML
+
+
+
+
+
+
+
+
+
+ 1.2
+
+
+ mzQuantML is the format for quantitation values associated with peptides, proteins and small molecules from mass spectra, standardised by HUPO PSI PI. It can be used for outputs of quantitation software for proteomics.
+
+
+ mzQuantML
+
+
+
+
+
+
+
+
+
+ 1.2
+
+
+ GelML is the format for describing the process of gel electrophoresis, standardised by HUPO PSI PS.
+
+
+ GelML
+
+
+
+
+
+
+
+
+
+ 1.2
+
+
+ spML is the format for describing proteomics sample processing, other than using gels, prior to mass spectrometric protein identification, standardised by HUPO PSI PS. It may also be applicable for metabolomics.
+
+
+ spML
+
+
+
+
+
+
+
+
+
+ 1.2
+ A human-readable encoding for the Web Ontology Language (OWL).
+
+
+ OWL Functional Syntax
+
+
+
+
+
+
+
+
+
+ 1.2
+ A syntax for writing OWL class expressions.
+
+
+ This format was influenced by the OWL Abstract Syntax and the DL style syntax.
+ Manchester OWL Syntax
+
+
+
+
+
+
+
+
+
+ 1.2
+ A superset of the "Description-Logic Knowledge Representation System Specification from the KRSS Group of the ARPA Knowledge Sharing Effort".
+
+
+ This format is used in Protege 4.
+ KRSS2 Syntax
+
+
+
+
+
+
+
+
+
+ 1.2
+ The Terse RDF Triple Language (Turtle) is a human-friendly serialisation format for RDF (Resource Description Framework) graphs.
+
+
+ The SPARQL Query Language incorporates a very similar syntax.
+ Turtle
+
+
+
+
+
+
+
+
+
+ 1.2
+ nt
+ A plain text serialisation format for RDF (Resource Description Framework) graphs, and a subset of the Turtle (Terse RDF Triple Language) format.
+
+
+ N-Triples should not be confused with Notation 3 which is a superset of Turtle.
+ N-Triples
+
+
+
+
+
+
+
+
+
+ 1.2
+ n3
+ A shorthand non-XML serialisation of Resource Description Framework model, designed with human-readability in mind.
+ N3
+
+
+ Notation3
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.2
+ OWL ontology XML serialisation format.
+ OWL
+
+
+ OWL/XML
+
+
+
+
+
+
+
+
+
+ 1.3
+
+
+ The A2M format is used as the primary format for multiple alignments of protein or nucleic-acid sequences in the SAM suite of tools. It is a small modification of FASTA format for sequences and is compatible with most tools that read FASTA.
+
+
+ A2M
+
+
+
+
+
+
+
+
+
+ 1.3
+
+
+ Standard flowgram format (SFF) is a binary file format used to encode results of pyrosequencing from the 454 Life Sciences platform for high-throughput sequencing.
+ Standard flowgram format
+
+
+ SFF
+
+
+
+
+
+
+
+
+ 1.3
+
+ The MAP file describes SNPs and is used by the Plink package.
+ Plink MAP
+
+
+ MAP
+
+
+
+
+
+
+
+
+ 1.3
+
+ The PED file describes individuals and genetic data and is used by the Plink package.
+ Plink PED
+
+
+ PED
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ Data format for a metadata on an individual and their genetic data.
+
+
+ Individual genetic data format
+
+
+
+
+
+
+
+
+
+ 1.3
+
+ The PED/MAP file describes data used by the Plink package.
+ Plink PED/MAP
+
+
+ PED/MAP
+
+
+
+
+
+
+
+
+
+ 1.3
+
+
+ File format of a CT (Connectivity Table) file from the RNAstructure package.
+ Connect format
+ Connectivity Table file format
+
+
+ CT
+
+
+
+
+
+
+
+
+
+ 1.3
+
+ XRNA old input style format.
+
+
+ SS
+
+
+
+
+
+
+
+
+
+
+ 1.3
+
+ RNA Markup Language.
+
+
+ RNAML
+
+
+
+
+
+
+
+
+
+ 1.3
+
+ Format for the Genetic Data Environment (GDE).
+
+
+ GDE
+
+
+
+
+
+
+
+
+ 1.3
+
+ A multiple alignment in vertical format, as used in the AMPS (Alignment of Multiple Protein Sequences) package.
+ Block file format
+
+
+ BLC
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ Format of a data index of some type.
+
+
+ Data index format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.3
+
+ BAM indexing format.
+
+
+ BAI
+
+
+
+
+
+
+
+
+ 1.3
+
+ HMMER profile HMM file for HMMER versions 2.x.
+
+
+ HMMER2
+
+
+
+
+
+
+
+
+ 1.3
+
+ HMMER profile HMM file for HMMER versions 3.x.
+
+
+ HMMER3
+
+
+
+
+
+
+
+
+ 1.3
+
+ PO is the output format of Partial Order Alignment program (POA) performing Multiple Sequence Alignment (MSA).
+
+
+ PO
+
+
+
+
+
+
+
+
+
+ 1.3
+ XML format as produced by the NCBI Blast package.
+
+
+ BLAST XML results format
+
+
+
+
+
+
+
+
+
+ 1.7
+ http://www.ebi.ac.uk/ena/software/cram-usage#format_specification http://samtools.github.io/hts-specs/CRAMv2.1.pdf
+ http://www.ebi.ac.uk/ena/software/cram-usage#format_specification http://samtools.github.io/hts-specs/CRAMv2.1.pdf
+ Reference-based compression of alignment format.
+
+
+ CRAM
+
+
+
+
+
+
+
+
+
+ 1.7
+ json
+
+
+
+ JavaScript Object Notation format; a lightweight, text-based format to represent tree-structured data using key-value pairs.
+ JavaScript Object Notation
+
+
+ JSON
+
+
+
+
+
+
+
+
+
+ 1.7
+ Encapsulated PostScript format.
+
+
+ EPS
+
+
+
+
+
+
+
+
+ 1.7
+ Graphics Interchange Format.
+
+
+ GIF
+
+
+
+
+
+
+
+
+
+ 1.7
+ Microsoft Excel spreadsheet format.
+ Microsoft Excel format
+
+
+ xls
+
+
+
+
+
+
+
+
+ 1.7
+ tab
+ tsv
+
+
+
+ Tabular data represented as tab-separated values in a text file.
+ Tab-delimited
+ Tab-separated values
+ tab
+
+
+ TSV
+
+
+
+
+
+
+
+
+ 1.7
+ 1.10
+
+ Format of a file of gene expression data, e.g. a gene expression matrix or profile.
+
+
+ Gene expression data format
+ true
+
+
+
+
+
+
+
+
+
+ 1.7
+ Format of the cytoscape input file of gene expression ratios or values are specified over one or more experiments.
+
+
+ Cytoscape input file format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.7
+ https://github.com/BenLangmead/bowtie/blob/master/MANUAL
+ Bowtie format for indexed reference genome for "small" genomes.
+ Bowtie index format
+
+
+ ebwt
+
+
+
+
+
+
+
+
+ 1.7
+ http://www.molbiol.ox.ac.uk/tutorials/Seqlab_GCG.pdf
+ Rich sequence format.
+ GCG RSF
+
+
+ RSF-format files contain one or more sequences that may or may not be related. In addition to the sequence data, each sequence can be annotated with descriptive sequence information (from the GCG manual).
+ RSF
+
+
+
+
+
+
+
+
+
+
+ 1.7
+ Some format based on the GCG format.
+
+
+ GCG format variant
+
+
+
+
+
+
+
+
+
+ 1.7
+ http://rothlab.ucdavis.edu/genhelp/chapter_2_using_sequences.html#_Creating_and_Editing_Single_Sequenc
+ Bioinformatics Sequence Markup Language format.
+
+
+ BSML
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.7
+ https://github.com/BenLangmead/bowtie/blob/master/MANUAL
+ Bowtie format for indexed reference genome for "large" genomes.
+ Bowtie long index format
+
+
+ ebwtl
+
+
+
+
+
+
+
+
+
+ 1.8
+
+ Ensembl standard format for variation data.
+
+
+ Ensembl variation file format
+
+
+
+
+
+
+
+
+
+ 1.8
+ Microsoft Word format.
+ Microsoft Word format
+ doc
+
+
+ docx
+
+
+
+
+
+
+
+
+ 1.8
+ true
+ Format of documents including word processor, spreadsheet and presentation.
+
+
+ Document format
+
+
+
+
+
+
+
+
+
+ 1.8
+ Portable Document Format.
+
+
+ PDF
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.9
+ true
+ Format used for images and image metadata.
+
+
+ Image format
+
+
+
+
+
+
+
+
+
+ 1.9
+
+ Medical image format corresponding to the Digital Imaging and Communications in Medicine (DICOM) standard.
+
+
+ DICOM format
+
+
+
+
+
+
+
+
+
+ 1.9
+
+ nii
+ An open file format from the Neuroimaging Informatics Technology Initiative (NIfTI) commonly used to store brain imaging data obtained using Magnetic Resonance Imaging (MRI) methods.
+ NIFTI format
+ NIfTI-1 format
+
+
+ nii
+
+
+
+
+
+
+
+
+
+ 1.9
+
+ Text-based tagged file format for medical images generated using the MetaImage software package.
+ Metalmage format
+
+
+ mhd
+
+
+
+
+
+
+
+
+
+ 1.9
+
+ Nearly Raw Rasta Data format designed to support scientific visualisation and image processing involving N-dimensional raster data.
+
+
+ nrrd
+
+
+
+
+
+
+
+
+ 1.9
+ File format used for scripts written in the R programming language for execution within the R software environment, typically for statistical computation and graphics.
+
+
+ R file format
+
+
+
+
+
+
+
+
+ 1.9
+ File format used for scripts for the Statistical Package for the Social Sciences.
+
+
+ SPSS
+
+
+
+
+
+
+
+
+ 1.9
+
+ eml
+ mht
+ mhtml
+
+
+
+ MIME HTML format for Web pages, which can include external resources, including images, Flash animations and so on.
+ HTML email format
+ HTML email message format
+ MHT
+ MHT format
+ MHTML format
+ MIME HTML
+ MIME HTML format
+ eml
+ MIME multipart
+ MIME multipart format
+ MIME multipart message
+ MIME multipart message format
+
+
+ MHTML is not strictly an HTML format, it is encoded as an HTML email message (although with multipart/related instead of multipart/alternative). It, however, contains the main HTML block as its core, and thus it is for practical reasons included in EDAM as a specialisation of 'HTML'.
+ MHTML
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.10
+ Proprietary file format for (raw) BeadArray data used by genomewide profiling platforms from Illumina Inc. This format is output directly from the scanner and stores summary intensities for each probe-type on an array.
+
+
+ IDAT
+
+
+
+
+
+
+
+
+
+ 1.10
+
+ Joint Picture Group file format for lossy graphics file.
+ JPEG
+ jpeg
+
+
+ Sequence of segments with markers. Begins with byte of 0xFF and follows by marker type.
+ JPG
+
+
+
+
+
+
+
+
+
+ 1.10
+ Reporter Code Count-A data file (.csv) output by the Nanostring nCounter Digital Analyzer, which contains gene sample information, probe information and probe counts.
+
+
+ rcc
+
+
+
+
+
+
+
+
+ 1.11
+
+
+ ARFF (Attribute-Relation File Format) is an ASCII text file format that describes a list of instances sharing a set of attributes.
+
+
+ This file format is for machine learning.
+ arff
+
+
+
+
+
+
+
+
+
+ 1.11
+
+
+ AFG is a single text-based file assembly format that holds read and consensus information together.
+
+
+ afg
+
+
+
+
+
+
+
+
+
+ 1.11
+
+
+ The bedGraph format allows display of continuous-valued data in track format. This display type is useful for probability scores and transcriptome data.
+
+
+ Holds a tab-delimited chromosome /start /end / datavalue dataset.
+ bedgraph
+
+
+
+
+
+
+
+
+ 1.11
+
+
+ Browser Extensible Data (BED) format of sequence annotation track that strictly does not contain non-standard fields beyond the first 3 columns.
+
+
+ Galaxy allows BED files to contain non-standard fields beyond the first 3 columns, some other implementations do not.
+ bedstrict
+
+
+
+
+
+
+
+
+ 1.11
+
+
+ BED file format where each feature is described by chromosome, start, end, name, score, and strand.
+
+
+ Tab delimited data in strict BED format - no non-standard columns allowed; column count forced to 6
+ bed6
+
+
+
+
+
+
+
+
+ 1.11
+
+
+ A BED file where each feature is described by all twelve columns.
+
+
+ Tab delimited data in strict BED format - no non-standard columns allowed; column count forced to 12
+ bed12
+
+
+
+
+
+
+
+
+
+ 1.11
+
+
+ Tabular format of chromosome names and sizes used by Galaxy.
+
+
+ Galaxy allows BED files to contain non-standard fields beyond the first 3 columns, some other implementations do not.
+ chrominfo
+
+
+
+
+
+
+
+
+
+ 1.11
+
+
+ Custom Sequence annotation track format used by Galaxy.
+
+
+ Used for tracks/track views within galaxy.
+ customtrack
+
+
+
+
+
+
+
+
+
+ 1.11
+
+
+ Color space FASTA format sequence variant.
+
+
+ FASTA format extended for color space information.
+ csfasta
+
+
+
+
+
+
+
+
+
+ 1.11
+
+
+ HDF5 is a data model, library, and file format for storing and managing data, based on Hierarchical Data Format (HDF).
+ h5
+
+
+ An HDF5 file appears to the user as a directed graph. The nodes of this graph are the higher-level HDF5 objects that are exposed by the HDF5 APIs: Groups, Datasets, Named datatypes. Currently supported by the Python MDTraj package.
+ HDF5 is the new version, according to the HDF group, a completely different technology (https://support.hdfgroup.org/products/hdf4/ compared to HDF.
+ HDF5
+
+
+
+
+
+
+
+
+
+ 1.11
+
+ A versatile bitmap format.
+
+
+ The TIFF format is perhaps the most versatile and diverse bitmap format in existence. Its extensible nature and support for numerous data compression schemes allow developers to customize the TIFF format to fit any peculiar data storage needs.
+ TIFF
+
+
+
+
+
+
+
+
+
+ 1.11
+
+ Standard bitmap storage format in the Microsoft Windows environment.
+
+
+ Although it is based on Windows internal bitmap data structures, it is supported by many non-Windows and non-PC applications.
+ BMP
+
+
+
+
+
+
+
+
+
+ 1.11
+
+ IM is a format used by LabEye and other applications based on the IFUNC image processing library.
+
+
+ IFUNC library reads and writes most uncompressed interchange versions of this format.
+ im
+
+
+
+
+
+
+
+
+
+ 1.11
+
+ pcd
+ Photo CD format, which is the highest resolution format for images on a CD.
+
+
+ PCD was developed by Kodak. A PCD file contains five different resolution (ranging from low to high) of a slide or film negative. Due to it PCD is often used by many photographers and graphics professionals for high-end printed applications.
+ pcd
+
+
+
+
+
+
+
+
+
+ 1.11
+
+ PCX is an image file format that uses a simple form of run-length encoding. It is lossless.
+
+
+ pcx
+
+
+
+
+
+
+
+
+
+ 1.11
+
+ The PPM format is a lowest common denominator color image file format.
+
+
+ ppm
+
+
+
+
+
+
+
+
+
+ 1.11
+
+ PSD (Photoshop Document) is a proprietary file that allows the user to work with the images' individual layers even after the file has been saved.
+
+
+ psd
+
+
+
+
+
+
+
+
+
+ 1.11
+
+ X BitMap is a plain text binary image format used by the X Window System used for storing cursor and icon bitmaps used in the X GUI.
+
+
+ The XBM format was replaced by XPM for X11 in 1989.
+ xbm
+
+
+
+
+
+
+
+
+
+ 1.11
+
+ X PixMap (XPM) is an image file format used by the X Window System, it is intended primarily for creating icon pixmaps, and supports transparent pixels.
+
+
+ Sequence of segments with markers. Begins with byte of 0xFF and follows by marker type.
+ xpm
+
+
+
+
+
+
+
+
+
+ 1.11
+
+ RGB file format is the native raster graphics file format for Silicon Graphics workstations.
+
+
+ rgb
+
+
+
+
+
+
+
+
+
+ 1.11
+
+ The PBM format is a lowest common denominator monochrome file format. It serves as the common language of a large family of bitmap image conversion filters.
+
+
+ pbm
+
+
+
+
+
+
+
+
+
+ 1.11
+
+ The PGM format is a lowest common denominator grayscale file format.
+
+
+ It is designed to be extremely easy to learn and write programs for.
+ pgm
+
+
+
+
+
+
+
+
+
+ 1.11
+
+ png
+ PNG is a file format for image compression.
+
+
+ It iis expected to replace the Graphics Interchange Format (GIF).
+ PNG
+
+
+
+
+
+
+
+
+
+ 1.11
+
+ Scalable Vector Graphics (SVG) is an XML-based vector image format for two-dimensional graphics with support for interactivity and animation.
+ Scalable Vector Graphics
+
+
+ The SVG specification is an open standard developed by the World Wide Web Consortium (W3C) since 1999.
+ SVG
+
+
+
+
+
+
+
+
+
+ 1.11
+
+ Sun Raster is a raster graphics file format used on SunOS by Sun Microsystems.
+
+
+ The SVG specification is an open standard developed by the World Wide Web Consortium (W3C) since 1999.
+ rast
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.11
+ true
+ Textual report format for sequence quality for reports from sequencing machines.
+
+
+ Sequence quality report format (text)
+
+
+
+
+
+
+
+
+
+
+ 1.11
+ http://en.wikipedia.org/wiki/Phred_quality_score
+ FASTQ format subset for Phred sequencing quality score data only (no sequences).
+
+
+ Phred quality scores are defined as a property which is logarithmically related to the base-calling error probabilities.
+ qual
+
+
+
+
+
+
+
+
+
+ 1.11
+ FASTQ format subset for Phred sequencing quality score data only (no sequences) for Solexa/Illumina 1.0 format.
+
+
+ Solexa/Illumina 1.0 format can encode a Solexa/Illumina quality score from -5 to 62 using ASCII 59 to 126 (although in raw read data Solexa scores from -5 to 40 only are expected)
+ qualsolexa
+
+
+
+
+
+
+
+
+
+ 1.11
+ http://en.wikipedia.org/wiki/Phred_quality_score
+ FASTQ format subset for Phred sequencing quality score data only (no sequences) from Illumina 1.5 and before Illumina 1.8.
+
+
+ Starting in Illumina 1.5 and before Illumina 1.8, the Phred scores 0 to 2 have a slightly different meaning. The values 0 and 1 are no longer used and the value 2, encoded by ASCII 66 "B", is used also at the end of reads as a Read Segment Quality Control Indicator.
+ qualillumina
+
+
+
+
+
+
+
+
+ 1.11
+ http://en.wikipedia.org/wiki/Phred_quality_score
+ FASTQ format subset for Phred sequencing quality score data only (no sequences) for SOLiD data.
+
+
+ For SOLiD data, the sequence is in color space, except the first position. The quality values are those of the Sanger format.
+ qualsolid
+
+
+
+
+
+
+
+
+ 1.11
+ http://en.wikipedia.org/wiki/Phred_quality_score
+ FASTQ format subset for Phred sequencing quality score data only (no sequences) from 454 sequencers.
+
+
+ qual454
+
+
+
+
+
+
+
+
+ 1.11
+
+
+ Human ENCODE peak format.
+
+
+ Format that covers both the broad peak format and narrow peak format from ENCODE.
+ ENCODE peak format
+
+
+
+
+
+
+
+
+ 1.11
+
+
+ Human ENCODE narrow peak format.
+
+
+ Format that covers both the broad peak format and narrow peak format from ENCODE.
+ ENCODE narrow peak format
+
+
+
+
+
+
+
+
+ 1.11
+
+
+ Human ENCODE broad peak format.
+
+
+ ENCODE broad peak format
+
+
+
+
+
+
+
+
+
+ 1.11
+
+ bgz
+ Blocked GNU Zip format.
+
+
+ BAM files are compressed using a variant of GZIP (GNU ZIP), into a format called BGZF (Blocked GNU Zip Format).
+ bgzip
+
+
+
+
+
+
+
+
+
+ 1.11
+
+
+ TAB-delimited genome position file index format.
+
+
+ tabix
+
+
+
+
+
+
+
+
+ 1.11
+ true
+ Data format for graph data.
+
+
+ Graph format
+
+
+
+
+
+
+
+
+ 1.11
+
+ XML-based format used to store graph descriptions within Galaxy.
+
+
+ xgmml
+
+
+
+
+
+
+
+
+ 1.11
+
+ SIF (simple interaction file) Format - a network/pathway format used for instance in cytoscape.
+
+
+ sif
+
+
+
+
+
+
+
+
+
+ 1.11
+ MS Excel spreadsheet format consisting of a set of XML documents stored in a ZIP-compressed file.
+
+
+ xlsx
+
+
+
+
+
+
+
+
+ 1.11
+
+ Data format used by the SQLite database.
+
+
+ SQLite format
+
+
+
+
+
+
+
+
+
+ 1.11
+
+ Data format used by the SQLite database conformant to the Gemini schema.
+
+
+ Gemini SQLite format
+
+
+
+
+
+
+
+
+ 1.11
+ Duplicate of http://edamontology.org/format_3326
+ 1.20
+
+
+ Format of a data index of some type.
+
+
+ Index format
+ true
+
+
+
+
+
+
+
+
+
+ 1.11
+ An index of a genome database, indexed for use by the snpeff tool.
+
+
+ snpeffdb
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.12
+
+ Binary format used by MATLAB files to store workspace variables.
+ .mat file format
+ MAT file format
+ MATLAB file format
+
+
+ MAT
+
+
+
+
+
+
+
+
+
+
+ 1.12
+
+ Format used by netCDF software library for writing and reading chromatography-MS data files. Also used to store trajectory atom coordinates information, such as the ones obtained by Molecular Dynamics simulations.
+ ANDI-MS
+
+
+ Network Common Data Form (NetCDF) library is supported by AMBER MD package from version 9.
+ netCDF
+
+
+
+
+
+
+
+
+ 1.12
+ mgf
+ Mascot Generic Format. Encodes multiple MS/MS spectra in a single file.
+
+
+ Files includes *m*/*z*, intensity pairs separated by headers; headers can contain a bit more information, including search engine instructions.
+ MGF
+
+
+
+
+
+
+
+
+ 1.12
+ Spectral data format file where each spectrum is written to a separate file.
+
+
+ Each file contains one header line for the known or assumed charge and the mass of the precursor peptide ion, calculated from the measured *m*/*z* and the charge. This one line was then followed by all the *m*/*z*, intensity pairs that represent the spectrum.
+ dta
+
+
+
+
+
+
+
+
+ 1.12
+ Spectral data file similar to dta.
+
+
+ Differ from .dta only in subtleties of the header line format and content and support the added feature of being able to.
+ pkl
+
+
+
+
+
+
+
+
+ 1.12
+ https://dx.doi.org/10.1038%2Fnbt1031
+ Common file format for proteomics mass spectrometric data developed at the Seattle Proteome Center/Institute for Systems Biology.
+
+
+ mzXML
+
+
+
+
+
+
+
+
+
+ 1.12
+ http://sashimi.sourceforge.net/schema_revision/pepXML/pepXML_v118.xsd
+ Open data format for the storage, exchange, and processing of peptide sequence assignments of MS/MS scans, intended to provide a common data output format for many different MS/MS search engines and subsequent peptide-level analyses.
+
+
+ pepXML
+
+
+
+
+
+
+
+
+
+ 1.12
+
+ Graphical Pathway Markup Language (GPML) is an XML format used for exchanging biological pathways.
+
+
+ GPML
+
+
+
+
+
+
+
+
+
+ 1.12
+
+ oxlicg
+
+
+
+ A list of k-mers and their occurrences in a dataset. Can also be used as an implicit De Bruijn graph.
+ K-mer countgraph
+
+
+
+
+
+
+
+
+
+
+ 1.13
+
+
+ mzTab is a tab-delimited format for mass spectrometry-based proteomics and metabolomics results.
+
+
+ mzTab
+
+
+
+
+
+
+
+
+
+
+ 1.13
+
+ imzml
+
+ imzML metadata is a data format for mass spectrometry imaging metadata.
+
+
+ imzML data are recorded in 2 files: '.imzXML' is a metadata XML file based on mzML by HUPO-PSI, and '.ibd' is a binary file containing the mass spectra. This entry is for the metadata XML file
+ imzML metadata file
+
+
+
+
+
+
+
+
+
+
+ 1.13
+
+
+ qcML is an XML format for quality-related data of mass spectrometry and other high-throughput measurements.
+
+
+ The focus of qcML is towards mass spectrometry based proteomics, but the format is suitable for metabolomics and sequencing as well.
+ qcML
+
+
+
+
+
+
+
+
+
+
+ 1.13
+
+
+ PRIDE XML is an XML format for mass spectra, peptide and protein identifications, and metadata about a corresponding measurement, sample, experiment.
+
+
+ PRIDE XML
+
+
+
+
+
+
+
+
+
+
+
+ 1.13
+
+
+ Simulation Experiment Description Markup Language (SED-ML) is an XML format for encoding simulation setups, according to the MIASE (Minimum Information About a Simulation Experiment) requirements.
+
+
+ SED-ML
+
+
+
+
+
+
+
+
+
+
+
+ 1.13
+
+
+ Open Modeling EXchange format (OMEX) is a ZIPped format for encapsulating all information necessary for a modeling and simulation project in systems biology.
+
+
+ An OMEX file is a ZIP container that includes a manifest file, listing the content of the archive, an optional metadata file adding information about the archive and its content, and the files describing the model. OMEX is one of the standardised formats within COMBINE (Computational Modeling in Biology Network).
+ COMBINE OMEX
+
+
+
+
+
+
+
+
+
+
+ 1.13
+
+
+ The Investigation / Study / Assay (ISA) tab-delimited (TAB) format incorporates metadata from experiments employing a combination of technologies.
+
+
+ ISA-TAB is based on MAGE-TAB. Other than tabular, the ISA model can also be represented in RDF, and in JSON (compliable with a set of defined JSON Schemata).
+ ISA-TAB
+
+
+
+
+
+
+
+
+
+
+ 1.13
+
+
+ SBtab is a tabular format for biochemical network models.
+
+
+ SBtab
+
+
+
+
+
+
+
+
+
+ 1.13
+
+
+ Biological Connection Markup Language (BCML) is an XML format for biological pathways.
+
+
+ BCML
+
+
+
+
+
+
+
+
+
+ 1.13
+
+
+ Biological Dynamics Markup Language (BDML) is an XML format for quantitative data describing biological dynamics.
+
+
+ BDML
+
+
+
+
+
+
+
+
+ 1.13
+
+
+ Biological Expression Language (BEL) is a textual format for representing scientific findings in life sciences in a computable form.
+
+
+ BEL
+
+
+
+
+
+
+
+
+
+ 1.13
+
+
+ SBGN-ML is an XML format for Systems Biology Graphical Notation (SBGN) diagrams of biological pathways or networks.
+
+
+ SBGN-ML
+
+
+
+
+
+
+
+
+
+ 1.13
+
+ agp
+
+ AGP is a tabular format for a sequence assembly (a contig, a scaffold/supercontig, or a chromosome).
+
+
+ AGP
+
+
+
+
+
+
+
+
+ 1.13
+ PostScript format.
+ PostScript
+
+
+ PS
+
+
+
+
+
+
+
+
+ 1.13
+
+ sra
+ SRA archive format (SRA) is the archive format used for input to the NCBI Sequence Read Archive.
+ SRA
+ SRA archive format
+
+
+ SRA format
+
+
+
+
+
+
+
+
+ 1.13
+
+ VDB ('vertical database') is the native format used for export from the NCBI Sequence Read Archive.
+ SRA native format
+
+
+ VDB
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.13
+
+ Index file format used by the samtools package to index TAB-delimited genome position files.
+
+
+ Tabix index file format
+
+
+
+
+
+
+
+
+ 1.13
+ A five-column, tab-delimited table of feature locations and qualifiers for importing annotation into an existing Sequin submission (an NCBI tool for submitting and updating GenBank entries).
+
+
+ Sequin format
+
+
+
+
+
+
+
+
+ 1.14
+ Proprietary mass-spectrometry format of Thermo Scientific's ProteomeDiscoverer software.
+ Magellan storage file format
+
+
+ This format corresponds to an SQLite database, and you can look into the files with e.g. SQLiteStudio3. There are also some readers (http://doi.org/10.1021/pr2005154) and converters (http://doi.org/10.1016/j.jprot.2015.06.015) for this format available, which re-engineered the database schema, but there is no official DB schema specification of Thermo Scientific for the format.
+ MSF
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.14
+ true
+ Data format for biodiversity data.
+
+
+ Biodiversity data format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.14
+
+ Exchange format of the Access to Biological Collections Data (ABCD) Schema; a standard for the access to and exchange of data about specimens and observations (primary biodiversity data).
+ ABCD
+
+
+ ABCD format
+
+
+
+
+
+
+
+
+
+ 1.14
+ Tab-delimited text files of GenePattern that contain a column for each sample, a row for each gene, and an expression value for each gene in each sample.
+ GCT format
+ Res format
+
+
+ GCT/Res format
+
+
+
+
+
+
+
+
+
+ 1.14
+ wiff
+ Mass spectrum file format from QSTAR and QTRAP instruments (ABI/Sciex).
+ wiff
+
+
+ WIFF format
+
+
+
+
+
+
+
+
+
+ 1.14
+
+ Output format used by X! series search engines that is based on the XML language BIOML.
+
+
+ X!Tandem XML
+
+
+
+
+
+
+
+
+
+ 1.14
+ Proprietary file format for mass spectrometry data from Thermo Scientific.
+
+
+ Proprietary format for which documentation is not available.
+ Thermo RAW
+
+
+
+
+
+
+
+
+
+ 1.14
+
+ "Raw" result file from Mascot database search.
+
+
+ Mascot .dat file
+
+
+
+
+
+
+
+
+
+ 1.14
+
+ Format of peak list files from Andromeda search engine (MaxQuant) that consist of arbitrarily many spectra.
+ MaxQuant APL
+
+
+ MaxQuant APL peaklist format
+
+
+
+
+
+
+
+
+ 1.14
+
+ Synthetic Biology Open Language (SBOL) is an XML format for the specification and exchange of biological design information in synthetic biology.
+
+
+ SBOL introduces a standardised format for the electronic exchange of information on the structural and functional aspects of biological designs.
+ SBOL
+
+
+
+
+
+
+
+
+ 1.14
+
+ PMML uses XML to represent mining models. The structure of the models is described by an XML Schema.
+
+
+ One or more mining models can be contained in a PMML document.
+ PMML
+
+
+
+
+
+
+
+
+
+ 1.14
+
+ Image file format used by the Open Microscopy Environment (OME).
+
+
+ An OME-TIFF dataset consists of one or more files in standard TIFF or BigTIFF format, with the file extension .ome.tif or .ome.tiff, and an identical (or in the case of multiple files, nearly identical) string of OME-XML metadata embedded in the ImageDescription tag of each file's first IFD (Image File Directory). BigTIFF file extensions are also permitted, with the file extension .ome.tf2, .ome.tf8 or .ome.btf, but note these file extensions are an addition to the original specification, and software using an older version of the specification may not be able to handle these file extensions.
+ OME develops open-source software and data format standards for the storage and manipulation of biological microscopy data. It is a joint project between universities, research establishments, industry and the software development community.
+ OME-TIFF
+
+
+
+
+
+
+
+
+ 1.14
+
+ The LocARNA PP format combines sequence or alignment information and (respectively, single or consensus) ensemble probabilities into an PP 2.0 record.
+
+
+ Format for multiple aligned or single sequences together with the probabilistic description of the (consensus) RNA secondary structure ensemble by probabilities of base pairs, base pair stackings, and base pairs and unpaired bases in the loop of base pairs.
+ LocARNA PP
+
+
+
+
+
+
+
+
+ 1.14
+
+ Input format used by the Database of Genotypes and Phenotypes (dbGaP).
+
+
+ The Database of Genotypes and Phenotypes (dbGaP) is a National Institutes of Health (NIH) sponsored repository charged to archive, curate and distribute information produced by studies investigating the interaction of genotype and phenotype.
+ dbGaP format
+
+
+
+
+
+
+
+
+
+
+ 1.15
+
+ biom
+ The BIological Observation Matrix (BIOM) is a format for representing biological sample by observation contingency tables in broad areas of comparative omics. The primary use of this format is to represent OTU tables and metagenome tables.
+ BIological Observation Matrix format
+ biom
+
+
+ BIOM is a recognised standard for the Earth Microbiome Project, and is a project supported by Genomics Standards Consortium. Supported in QIIME, Mothur, MEGAN, etc.
+ BIOM format
+
+
+
+
+
+
+
+
+
+ 1.15
+
+
+ A format for storage, exchange, and processing of protein identifications created from ms/ms-derived peptide sequence data.
+
+
+ No human-consumable information about this format is available (see http://tools.proteomecenter.org/wiki/index.php?title=Formats:protXML).
+ protXML
+ http://doi.org/10.1038/msb4100024
+ http://sashimi.sourceforge.net/schema_revision/protXML/protXML_v3.xsd
+
+
+
+
+
+
+
+
+
+
+ 1.15
+ true
+ A linked data format enables publishing structured data as linked data (Linked Data), so that the data can be interlinked and become more useful through semantic queries.
+ Semantic Web format
+
+
+ Linked data format
+
+
+
+
+
+
+
+
+
+
+
+ 1.15
+
+ jsonld
+
+
+ JSON-LD, or JavaScript Object Notation for Linked Data, is a method of encoding Linked Data using JSON.
+ JavaScript Object Notation for Linked Data
+ jsonld
+
+
+ JSON-LD
+
+
+
+
+
+
+
+
+
+ 1.15
+
+ yaml
+ yml
+
+ YAML (YAML Ain't Markup Language) is a human-readable tree-structured data serialisation language.
+ YAML Ain't Markup Language
+ yml
+
+
+ Data in YAML format can be serialised into text, or binary format.
+ YAML version 1.2 is a superset of JSON; prior versions were "not strictly compatible".
+ YAML
+
+
+
+
+
+
+
+
+
+ 1.16
+ Tabular data represented as values in a text file delimited by some character.
+ Delimiter-separated values
+ Tabular format
+
+
+ DSV
+
+
+
+
+
+
+
+
+
+ 1.16
+ csv
+
+
+
+ Tabular data represented as comma-separated values in a text file.
+ Comma-separated values
+
+
+ CSV
+
+
+
+
+
+
+
+
+
+ 1.16
+ out
+ "Raw" result file from SEQUEST database search.
+
+
+ SEQUEST .out file
+
+
+
+
+
+
+
+
+
+ 1.16
+ http://ftp.mi.fu-berlin.de/pub/OpenMS/release1.9-documentation/html/classOpenMS_1_1IdXMLFile.html
+ http://open-ms.sourceforge.net/schemas/
+ XML file format for files containing information about peptide identifications from mass spectrometry data analysis carried out with OpenMS.
+
+
+ idXML
+
+
+
+
+
+
+
+
+ 1.16
+ Data table formatted such that it can be passed/streamed within the KNIME platform.
+
+
+ KNIME datatable format
+
+
+
+
+
+
+
+
+
+
+ 1.16
+
+ UniProtKB XML sequence features format is an XML format available for downloading UniProt entries.
+ UniProt XML
+ UniProt XML format
+ UniProtKB XML format
+
+
+ UniProtKB XML
+
+
+
+
+
+
+
+
+
+ 1.16
+
+ UniProtKB RDF sequence features format is an RDF format available for downloading UniProt entries (in RDF/XML).
+ UniProt RDF
+ UniProt RDF format
+ UniProt RDF/XML
+ UniProt RDF/XML format
+ UniProtKB RDF format
+ UniProtKB RDF/XML
+ UniProtKB RDF/XML format
+
+
+ UniProtKB RDF
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.16
+
+
+
+
+
+ BioJSON is a BioXSD-schema-based JSON format of sequence-based data and some other common data - sequence records, alignments, feature records, references to resources, and more - optimised for integrative bioinformatics, web applications and APIs, and object-oriented programming.
+ BioJSON (BioXSD data model)
+ BioJSON format (BioXSD)
+ BioXSD BioJSON
+ BioXSD BioJSON format
+ BioXSD JSON
+ BioXSD JSON format
+ BioXSD in JSON
+ BioXSD in JSON format
+ BioXSD+JSON
+ BioXSD/GTrack BioJSON
+ BioXSD|BioJSON|BioYAML BioJSON
+ BioXSD|GTrack BioJSON
+
+
+ Work in progress. 'BioXSD' belongs to the 'BioXSD|GTrack' ecosystem of generic formats. 'BioJSON' is the JSON format based on the common, unified 'BioXSD data model', a.k.a. 'BioXSD|BioJSON|BioYAML'.
+ BioJSON (BioXSD)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.16
+
+
+
+
+
+ BioYAML is a BioXSD-schema-based YAML format of sequence-based data and some other common data - sequence records, alignments, feature records, references to resources, and more - optimised for integrative bioinformatics, web APIs, human readability and editing, and object-oriented programming.
+ BioXSD BioYAML
+ BioXSD BioYAML format
+ BioXSD YAML
+ BioXSD YAML format
+ BioXSD in YAML
+ BioXSD in YAML format
+ BioXSD+YAML
+ BioXSD/GTrack BioYAML
+ BioXSD|BioJSON|BioYAML BioYAML
+ BioXSD|GTrack BioYAML
+ BioYAML (BioXSD data model)
+ BioYAML (BioXSD)
+ BioYAML format
+ BioYAML format (BioXSD)
+
+
+ Work in progress. 'BioXSD' belongs to the 'BioXSD|GTrack' ecosystem of generic formats. 'BioYAML' is the YAML format based on the common, unified 'BioXSD data model', a.k.a. 'BioXSD|BioJSON|BioYAML'.
+ BioYAML
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.16
+
+
+ BioJSON is a JSON format of single multiple sequence alignments, with their annotations, features, and custom visualisation and application settings for the Jalview workbench.
+ BioJSON format (Jalview)
+ JSON (Jalview)
+ JSON format (Jalview)
+ Jalview BioJSON
+ Jalview BioJSON format
+ Jalview JSON
+ Jalview JSON format
+
+
+ BioJSON (Jalview)
+
+
+
+
+
+
+
+
+
+
+
+ 1.16
+
+
+
+
+
+
+ GSuite is a tabular format for collections of genome or sequence feature tracks, suitable for integrative multi-track analysis. GSuite contains links to genome/sequence tracks, with additional metadata.
+ BioXSD/GTrack GSuite
+ BioXSD|GTrack GSuite
+ GSuite (GTrack ecosystem of formats)
+ GSuite format
+ GTrack|BTrack|GSuite GSuite
+ GTrack|GSuite|BTrack GSuite
+
+
+ 'GSuite' belongs to the 'BioXSD|GTrack' ecosystem of generic formats, and particular to its subset, the 'GTrack ecosystem' (GTrack, GSuite, BTrack). 'GSuite' is the tabular format for an annotated collection of individual GTrack files.
+ GSuite
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.16
+
+ BTrack is an HDF5-based binary format for genome or sequence feature tracks and their collections, suitable for integrative multi-track analysis. BTrack is a binary, compressed alternative to the GTrack and GSuite formats.
+ BTrack (GTrack ecosystem of formats)
+ BTrack format
+ BioXSD/GTrack BTrack
+ BioXSD|GTrack BTrack
+ GTrack|BTrack|GSuite BTrack
+ GTrack|GSuite|BTrack BTrack
+
+
+ 'BTrack' belongs to the 'BioXSD|GTrack' ecosystem of generic formats, and particular to its subset, the 'GTrack ecosystem' (GTrack, GSuite, BTrack). 'BTrack' is the binary, optionally compressed HDF5-based version of the GTrack and GSuite formats.
+ BTrack
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.16
+
+
+
+
+
+
+
+
+
+
+ The FAO/Bioversity/IPGRI Multi-Crop Passport Descriptors (MCPD) is an international standard format for exchange of germplasm information.
+ Bioversity MCPD
+ FAO MCPD
+ IPGRI MCPD
+ MCPD V.1
+ MCPD V.2
+ MCPD format
+ Multi-Crop Passport Descriptors
+ Multi-Crop Passport Descriptors format
+
+
+ Multi-Crop Passport Descriptors is a format available in 2 successive versions, V.1 (FAO/IPGRI 2001) and V.2 (FAO/Bioversity 2012).
+ MCPD
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.16
+ true
+ Data format of an annotated text, e.g. with recognised entities, concepts, and relations.
+
+
+ Annotated text format
+
+
+
+
+
+
+
+
+
+
+ 1.16
+
+
+ JSON format of annotated scientific text used by PubAnnotations and other tools.
+
+
+ PubAnnotation format
+
+
+
+
+
+
+
+
+
+
+ 1.16
+
+
+ BioC is a standardised XML format for sharing and integrating text data and annotations.
+
+
+ BioC
+
+
+
+
+
+
+
+
+
+
+
+ 1.16
+
+
+ Native textual export format of annotated scientific text from PubTator.
+
+
+ PubTator format
+
+
+
+
+
+
+
+
+
+
+ 1.16
+
+
+ A format of text annotation using the linked-data Open Annotation Data Model, serialised typically in RDF or JSON-LD.
+
+
+ Open Annotation format
+
+
+
+
+
+
+
+
+
+
+ 1.16
+
+
+
+
+
+
+
+
+
+
+
+
+ A family of similar formats of text annotation, used by BRAT and other tools, known as BioNLP Shared Task format (BioNLP 2009 Shared Task on Event Extraction, BioNLP Shared Task 2011, BioNLP Shared Task 2013), BRAT format, BRAT standoff format, and similar.
+ BRAT format
+ BRAT standoff format
+
+
+ BioNLP Shared Task format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.16
+ true
+ A query language (format) for structured database queries.
+ Query format
+
+
+ Query language
+
+
+
+
+
+
+
+
+ 1.16
+ sql
+
+
+
+ SQL (Structured Query Language) is the de-facto standard query language (format of queries) for querying and manipulating data in relational databases.
+ Structured Query Language
+
+
+ SQL
+
+
+
+
+
+
+
+
+
+ 1.16
+
+ xq
+ xquery
+ xqy
+
+ XQuery (XML Query) is a query language (format of queries) for querying and manipulating structured and unstructured data, usually in the form of XML, text, and with vendor-specific extensions for other data formats (JSON, binary, etc.).
+ XML Query
+ xq
+ xqy
+
+
+ XQuery
+
+
+
+
+
+
+
+
+
+ 1.16
+
+
+ SPARQL (SPARQL Protocol and RDF Query Language) is a semantic query language for querying and manipulating data stored in Resource Description Framework (RDF) format.
+ SPARQL Protocol and RDF Query Language
+
+
+ SPARQL
+
+
+
+
+
+
+
+
+
+ 1.17
+ XML format for XML Schema.
+
+
+ xsd
+
+
+
+
+
+
+
+
+
+ 1.20
+
+ XMFA format stands for eXtended Multi-FastA format and is used to store collinear sub-alignments that constitute a single genome alignment.
+ eXtended Multi-FastA format
+
+
+ XMFA
+
+
+
+
+
+
+
+
+
+
+ 1.20
+
+ The GEN file format contains genetic data and describes SNPs.
+ Genotype file format
+
+
+ GEN
+
+
+
+
+
+
+
+
+ 1.20
+
+ The SAMPLE file format contains information about each individual i.e. individual IDs, covariates, phenotypes and missing data proportions, from a GWAS study.
+
+
+ SAMPLE file format
+
+
+
+
+
+
+
+
+
+ 1.20
+
+ SDF is one of a family of chemical-data file formats developed by MDL Information Systems; it is intended especially for structural information.
+
+
+ SDF
+
+
+
+
+
+
+
+
+
+ 1.20
+
+ An MDL Molfile is a file format for holding information about the atoms, bonds, connectivity and coordinates of a molecule.
+
+
+ Molfile
+
+
+
+
+
+
+
+
+
+ 1.20
+
+ Complete, portable representation of a SYBYL molecule. ASCII file which contains all the information needed to reconstruct a SYBYL molecule.
+
+
+ Mol2
+
+
+
+
+
+
+
+
+
+ 1.20
+
+ format for the LaTeX document preparation system.
+ LaTeX format
+
+
+ uses the TeX typesetting program format
+ latex
+
+
+
+
+
+
+
+
+
+ 1.20
+
+ Tab-delimited text file format used by Eland - the read-mapping program distributed by Illumina with its sequencing analysis pipeline - which maps short Solexa sequence reads to the human reference genome.
+ ELAND
+ eland
+
+
+ ELAND format
+
+
+
+
+
+
+
+
+ 1.20
+
+
+ Phylip multiple alignment sequence format, less stringent than PHYLIP format.
+ PHYLIP Interleaved format
+
+
+ It differs from Phylip Format (format_1997) on length of the ID sequence. There no length restrictions on the ID, but whitespaces aren't allowed in the sequence ID/Name because one space separates the longest ID and the beginning of the sequence. Sequences IDs must be padded to the longest ID length.
+ Relaxed PHYLIP Interleaved
+
+
+
+
+
+
+
+
+ 1.20
+
+
+ Phylip multiple alignment sequence format, less stringent than PHYLIP sequential format (format_1998).
+ Relaxed PHYLIP non-interleaved
+ Relaxed PHYLIP non-interleaved format
+ Relaxed PHYLIP sequential format
+
+
+ It differs from Phylip sequential format (format_1997) on length of the ID sequence. There no length restrictions on the ID, but whitespaces aren't allowed in the sequence ID/Name because one space separates the longest ID and the beginning of the sequence. Sequences IDs must be padded to the longest ID length.
+ Relaxed PHYLIP Sequential
+
+
+
+
+
+
+
+
+
+ 1.20
+
+ Default XML format of VisANT, containing all the network information.
+ VisANT xml
+ VisANT xml format
+
+
+ VisML
+
+
+
+
+
+
+
+
+
+ 1.20
+
+ GML (Graph Modeling Language) is a text file format supporting network data with a very easy syntax. It is used by Graphlet, Pajek, yEd, LEDA and NetworkX.
+ GML format
+
+
+ GML
+
+
+
+
+
+
+
+
+
+ 1.20
+
+
+ FASTG is a format for faithfully representing genome assemblies in the face of allelic polymorphism and assembly uncertainty.
+ FASTG assembly graph format
+
+
+ It is called FASTG, like FASTA, but the G stands for "graph".
+ FASTG
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.20
+ true
+ Data format for raw data from a nuclear magnetic resonance (NMR) spectroscopy experiment.
+ NMR peak assignment data format
+ NMR processed data format
+ NMR raw data format
+ Nuclear magnetic resonance spectroscopy data format
+ Processed NMR data format
+ Raw NMR data format
+
+
+ NMR data format
+
+
+
+
+
+
+
+
+
+ 1.20
+
+
+ nmrML is an MSI supported XML-based open access format for metabolomics NMR raw and processed spectral data. It is accompanies by an nmrCV (controlled vocabulary) to allow ontology-based annotations.
+
+
+ nmrML
+
+
+
+
+
+
+
+
+
+
+ 1.20
+
+ . proBAM is an adaptation of BAM (format_2572), which was extended to meet specific requirements entailed by proteomics data.
+
+
+ proBAM
+
+
+
+
+
+
+
+
+
+ 1.20
+
+ . proBED is an adaptation of BED (format_3003), which was extended to meet specific requirements entailed by proteomics data.
+
+
+ proBED
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.20
+ true
+ Data format for raw microarray data.
+ Microarray data format
+
+
+ Raw microarray data format
+
+
+
+
+
+
+
+
+
+ 1.20
+
+ GenePix Results (GPR) text file format developed by Axon Instruments that is used to save GenePix Results data.
+
+
+ GPR
+
+
+
+
+
+
+
+
+
+ 1.20
+ Binary format used by the ARB software suite.
+ ARB binary format
+
+
+ ARB
+
+
+
+
+
+
+
+
+
+ 1.20
+ http://ftp.mi.fu-berlin.de/pub/OpenMS/release1.9-documentation/html/classOpenMS_1_1ConsensusXMLFile.html
+ OpenMS format for grouping features in one map or across several maps.
+
+
+ consensusXML
+
+
+
+
+
+
+
+
+
+ 1.20
+ http://ftp.mi.fu-berlin.de/pub/OpenMS/release1.9-documentation/html/classOpenMS_1_1FeatureXMLFile.html
+ OpenMS format for quantitation results (LC/MS features).
+
+
+ featureXML
+
+
+
+
+
+
+
+
+
+ 1.20
+ http://www.psidev.info/mzdata-1_0_5-docs
+ Now deprecated data format of the HUPO Proteomics Standards Initiative. Replaced by mzML (format_3244).
+
+
+ mzData
+
+
+
+
+
+
+
+
+
+ 1.20
+ http://cruxtoolkit.sourceforge.net/tide-search.html
+ Format supported by the Tide tool for identifying peptides from tandem mass spectra.
+
+
+ TIDE TXT
+
+
+
+
+
+
+
+
+
+ 1.20
+ ftp://ftp.ncbi.nlm.nih.gov/blast/documents/NEWXML/ProposedBLASTXMLChanges.pdf
+ ftp://ftp.ncbi.nlm.nih.gov/blast/documents/NEWXML/xml2.pdf
+ http://www.ncbi.nlm.nih.gov/data_specs/schema/NCBI_BlastOutput2.mod.xsd
+ XML format as produced by the NCBI Blast package v2.
+
+
+ BLAST XML v2 results format
+
+
+
+
+
+
+
+
+
+ 1.20
+
+
+ Microsoft Powerpoint format.
+
+
+ pptx
+
+
+
+
+
+
+
+
+
+
+ 1.20
+
+ ibd
+
+ ibd is a data format for mass spectrometry imaging data.
+
+
+ imzML data is recorded in 2 files: '.imzXML' is a metadata XML file based on mzML by HUPO-PSI, and '.ibd' is a binary file containing the mass spectra.
+ ibd
+
+
+
+
+
+
+
+
+ 1.21
+ Data format used in Natural Language Processing.
+ Natural Language Processing format
+
+
+ NLP format
+
+
+
+
+
+
+
+
+
+ 1.21
+
+ XML input file format for BEAST Software (Bayesian Evolutionary Analysis Sampling Trees).
+
+
+ BEAST
+
+
+
+
+
+
+
+
+
+ 1.21
+
+ Chado-XML format is a direct mapping of the Chado relational schema into XML.
+
+
+ Chado-XML
+
+
+
+
+
+
+
+
+
+ 1.21
+
+ An alignment format generated by PRANK/PRANKSTER consisting of four elements: newick, nodes, selection and model.
+
+
+ HSAML
+
+
+
+
+
+
+
+
+
+ 1.21
+
+ Output xml file from the InterProScan sequence analysis application.
+
+
+ InterProScan XML
+
+
+
+
+
+
+
+
+
+ 1.21
+
+ The KEGG Markup Language (KGML) is an exchange format of the KEGG pathway maps, which is converted from internally used KGML+ (KGML+SVG) format.
+ KEGG Markup Language
+
+
+ KGML
+
+
+
+
+
+
+
+
+
+ 1.21
+ XML format for collected entries from bibliographic databases MEDLINE and PubMed.
+ MEDLINE XML
+
+
+ PubMed XML
+
+
+
+
+
+
+
+
+
+ 1.21
+
+ A set of XML compliant markup components for describing multiple sequence alignments.
+
+
+ MSAML
+
+
+
+
+
+
+
+
+
+
+ 1.21
+
+ OrthoXML is designed broadly to allow the storage and comparison of orthology data from any ortholog database. It establishes a structure for describing orthology relationships while still allowing flexibility for database-specific information to be encapsulated in the same format.
+
+
+ OrthoXML
+
+
+
+
+
+
+
+
+
+ 1.21
+
+ Tree structure of Protein Sequence Database Markup Language generated using Matra software.
+
+
+ PSDML
+
+
+
+
+
+
+
+
+
+ 1.21
+
+ SeqXML is an XML Schema to describe biological sequences, developed by the Stockholm Bioinformatics Centre.
+
+
+ SeqXML
+
+
+
+
+
+
+
+
+
+ 1.21
+
+ XML format for the UniParc database.
+
+
+ UniParc XML
+
+
+
+
+
+
+
+
+
+ 1.21
+
+ XML format for the UniRef reference clusters.
+
+
+ UniRef XML
+
+
+
+
+
+
+
+
+
+
+ 1.21
+
+
+
+
+ cwl
+
+
+
+ Common Workflow Language (CWL) format for description of command-line tools and workflows.
+ Common Workflow Language
+ CommonWL
+
+
+ CWL
+
+
+
+
+
+
+
+
+
+ 1.21
+ Proprietary file format for mass spectrometry data from Waters.
+
+
+ Proprietary format for which documentation is not available, but used by multiple tools.
+ Waters RAW
+
+
+
+
+
+
+
+
+
+ 1.21
+
+ A standardized file format for data exchange in mass spectrometry, initially developed for infrared spectrometry.
+
+
+ JCAMP-DX is an ASCII based format and therefore not very compact even though it includes standards for file compression.
+ JCAMP-DX
+
+
+
+
+
+
+
+
+
+ 1.21
+ An NLP format used for annotated textual documents.
+
+
+ NLP annotation format
+
+
+
+
+
+
+
+
+ 1.21
+ NLP format used by a specific type of corpus (collection of texts).
+
+
+ NLP corpus format
+
+
+
+
+
+
+
+
+
+
+ 1.21
+
+
+
+ mirGFF3 is a common format for microRNA data resulting from small-RNA RNA-Seq workflows.
+ miRTop format
+
+
+ mirGFF3 is a specialisation of GFF3; produced by small-RNA-Seq analysis workflows, usable and convertible with the miRTop API (https://mirtop.readthedocs.io/en/latest/), and consumable by tools for downstream analysis.
+ mirGFF3
+
+
+
+
+
+
+
+
+ 1.21
+ A "placeholder" concept for formats of annotated RNA data, including e.g. microRNA and RNA-Seq data.
+ RNA data format
+ miRNA data format
+ microRNA data format
+
+
+ RNA annotation format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.22
+ true
+ File format to store trajectory information for a 3D structure .
+ CG trajectory formats
+ MD trajectory formats
+ NA trajectory formats
+ Protein trajectory formats
+
+
+ Formats differ on what they are able to store (coordinates, velocities, topologies) and how they are storing it (raw, compressed, textual, binary).
+ Trajectory format
+
+
+
+
+
+
+
+
+ 1.22
+ true
+ Binary file format to store trajectory information for a 3D structure .
+
+
+ Trajectory format (binary)
+
+
+
+
+
+
+
+
+ 1.22
+ true
+ Textual file format to store trajectory information for a 3D structure .
+
+
+ Trajectory format (text)
+
+
+
+
+
+
+
+
+
+ 1.22
+ HDF is the name of a set of file formats and libraries designed to store and organize large amounts of numerical data, originally developed at the National Center for Supercomputing Applications at the University of Illinois.
+
+
+ HDF is currently supported by many commercial and non-commercial software platforms such as Java, MATLAB/Scilab, Octave, Python and R.
+ HDF
+
+
+
+
+
+
+
+
+
+ 1.22
+ PCAZip format is a binary compressed file to store atom coordinates based on Essential Dynamics (ED) and Principal Component Analysis (PCA).
+
+
+ The compression is made projecting the Cartesian snapshots collected along the trajectory into an orthogonal space defined by the most relevant eigenvectors obtained by diagonalization of the covariance matrix (PCA). In the compression/decompression process, part of the original information is lost, depending on the final number of eigenvectors chosen. However, with a reasonable choice of the set of eigenvectors the compression typically reduces the trajectory file to less than one tenth of their original size with very acceptable loss of information. Compression with PCAZip can only be applied to unsolvated structures.
+ PCAzip
+
+
+
+
+
+
+
+
+
+ 1.22
+ Portable binary format for trajectories produced by GROMACS package.
+
+
+ XTC uses the External Data Representation (xdr) routines for writing and reading data which were created for the Unix Network File System (NFS). XTC files use a reduced precision (lossy) algorithm which works multiplying the coordinates by a scaling factor (typically 1000), so converting them to pm (GROMACS standard distance unit is nm). This allows an integer rounding of the values. Several other tricks are performed, such as making use of atom proximity information: atoms close in sequence are usually close in space (e.g. water molecules). That makes XTC format the most efficient in terms of disk usage, in most cases reducing by a factor of 2 the size of any other binary trajectory format.
+ XTC
+
+
+
+
+
+
+
+
+
+ 1.22
+ Trajectory Next Generation (TNG) is a format for storage of molecular simulation data. It is designed and implemented by the GROMACS development group, and it is called to be the substitute of the XTC format.
+ Trajectory Next Generation format
+
+
+ Fully architecture-independent format, regarding both endianness and the ability to mix single/double precision trajectories and I/O libraries. Self-sufficient, it should not require any other files for reading, and all the data should be contained in a single file for easy transport. Temporal compression of data, improving the compression rate of the previous XTC format. Possibility to store meta-data with information about the simulation. Direct access to a particular frame. Efficient parallel I/O.
+ TNG
+
+
+
+
+
+
+
+
+
+
+ 1.22
+ The XYZ chemical file format is widely supported by many programs, although many slightly different XYZ file formats coexist (Tinker XYZ, UniChem XYZ, etc.). Basic information stored for each atom in the system are x, y and z coordinates and atom element/atomic number.
+
+
+ XYZ files are structured in this way: First line contains the number of atoms in the file. Second line contains a title, comment, or filename. Remaining lines contain atom information. Each line starts with the element symbol, followed by x, y and z coordinates in angstroms separated by whitespace. Multiple molecules or frames can be contained within one file, so it supports trajectory storage. XYZ files can be directly represented by a molecular viewer, as they contain all the basic information needed to build the 3D model.
+ XYZ
+
+
+
+
+
+
+
+
+
+
+ 1.22
+
+ AMBER trajectory (also called mdcrd), with 10 coordinates per line and format F8.3 (fixed point notation with field width 8 and 3 decimal places).
+ AMBER trajectory format
+ inpcrd
+
+
+ mdcrd
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.22
+ true
+ Format of topology files; containing the static information of a structure molecular system that is needed for a molecular simulation.
+ CG topology format
+ MD topology format
+ NA topology format
+ Protein topology format
+
+
+ Many different file formats exist describing structural molecular topology. Typically, each MD package or simulation software works with their own implementation (e.g. GROMACS top, CHARMM psf, AMBER prmtop).
+ Topology format
+
+
+
+
+
+
+
+
+
+
+ 1.22
+
+ GROMACS MD package top textual files define an entire structure system topology, either directly, or by including itp files.
+
+
+ There is currently no tool available for conversion between GROMACS topology format and other formats, due to the internal differences in both approaches. There is, however, a method to convert small molecules parameterized with AMBER force-field into GROMACS format, allowing simulations of these systems with GROMACS MD package.
+ GROMACS top
+
+
+
+
+
+
+
+
+
+
+ 1.22
+
+ AMBER Prmtop file (version 7) is a structure topology text file divided in several sections designed to be parsed easily using simple Fortran code. Each section contains particular topology information, such as atom name, charge, mass, angles, dihedrals, etc.
+ AMBER Parm
+ AMBER Parm7
+ Parm7
+ Prmtop
+ Prmtop7
+
+
+ It can be modified manually, but as the size of the system increases, the hand-editing becomes increasingly complex. AMBER Parameter-Topology file format is used extensively by the AMBER software suite and is referred to as the Prmtop file for short.
+ version 7 is written to distinguish it from old versions of AMBER Prmtop. Similarly to HDF5, it is a completely different format, according to AMBER group: a drastic change to the file format occurred with the 2004 release of Amber 7 (http://ambermd.org/prmtop.pdf)
+ AMBER top
+
+
+
+
+
+
+
+
+
+
+ 1.22
+
+ X-Plor Protein Structure Files (PSF) are structure topology files used by NAMD and CHARMM molecular simulations programs. PSF files contain six main sections of interest: atoms, bonds, angles, dihedrals, improper dihedrals (force terms used to maintain planarity) and cross-terms.
+
+
+ The high similarity in the functional form of the two potential energy functions used by AMBER and CHARMM force-fields gives rise to the possible use of one force-field within the other MD engine. Therefore, the conversion of PSF files to AMBER Prmtop format is possible with the use of AMBER chamber (CHARMM - AMBER) program.
+ PSF
+
+
+
+
+
+
+
+
+
+
+
+ 1.22
+
+ GROMACS itp files (include topology) contain structure topology information, and are typically included in GROMACS topology files (GROMACS top). Itp files are used to define individual (or multiple) components of a topology as a separate file. This is particularly useful if there is a molecule that is used frequently, and also reduces the size of the system topology file, splitting it in different parts.
+
+
+ GROMACS itp files are used also to define position restrictions on the molecule, or to define the force field parameters for a particular ligand.
+ GROMACS itp
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.22
+ Format of force field parameter files, which store the set of parameters (charges, masses, radii, bond lengths, bond dihedrals, etc.) that are essential for the proper description and simulation of a molecular system.
+ Many different file formats exist describing force field parameters. Typically, each MD package or simulation software works with their own implementation (e.g. GROMACS itp, CHARMM rtf, AMBER off / frcmod).
+ FF parameter format
+
+
+
+
+
+
+
+
+
+ 1.22
+ Scripps Research Institute BinPos format is a binary formatted file to store atom coordinates.
+ Scripps Research Institute BinPos
+
+
+ It is basically a translation of the ASCII atom coordinate format to binary code. The only additional information stored is a magic number that identifies the BinPos format and the number of atoms per snapshot. The remainder is the chain of coordinates binary encoded. A drawback of this format is its architecture dependency. Integers and floats codification depends on the architecture, thus it needs to be converted if working in different platforms (little endian, big endian).
+ BinPos
+
+
+
+
+
+
+
+
+
+
+ 1.22
+
+ AMBER coordinate/restart file with 6 coordinates per line and decimal format F12.7 (fixed point notation with field width 12 and 7 decimal places).
+ restrt
+ rst7
+
+
+ RST
+
+
+
+
+
+
+
+
+
+
+ 1.22
+ Format of CHARMM Residue Topology Files (RTF), which define groups by including the atoms, the properties of the group, and bond and charge information.
+
+
+ There is currently no tool available for conversion between GROMACS topology format and other formats, due to the internal differences in both approaches. There is, however, a method to convert small molecules parameterized with AMBER force-field into GROMACS format, allowing simulations of these systems with GROMACS MD package.
+ CHARMM rtf
+
+
+
+
+
+
+
+
+
+ 1.22
+
+ AMBER frcmod (Force field Modification) is a file format to store any modification to the standard force field needed for a particular molecule to be properly represented in the simulation.
+
+
+ AMBER frcmod
+
+
+
+
+
+
+
+
+
+ 1.22
+
+ AMBER Object File Format library files (OFF library files) store residue libraries (forcefield residue parameters).
+ AMBER Object File Format
+ AMBER lib
+ AMBER off
+
+
+
+
+
+
+
+
+
+ 1.22
+ MReData is a text based data standard for processed NMR data. It is relying on SDF molecule data and allows to store assignments of NMR peaks to molecule features. The NMR-extracted data (or "NMReDATA") includes: Chemical shift,scalar coupling, 2D correlation, assignment, etc.
+
+
+ NMReData is a text based data standard for processed NMR data. It is relying on SDF molecule data and allows to store assignments of NMR peaks to molecule features. The NMR-extracted data (or "NMReDATA") includes: Chemical shift,scalar coupling, 2D correlation, assignment, etc. Find more in the paper at https://doi.org/10.1002/mrc.4527.
+ NMReDATA
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.22
+
+
+
+
+ BpForms is a string format for concretely representing the primary structures of biopolymers, including DNA, RNA, and proteins that include non-canonical nucleic and amino acids. See https://www.bpforms.org for more information.
+
+
+ BpForms
+
+
+
+
+
+
+
+
+
+
+ 1.22
+
+ Format of trr files that contain the trajectory of a simulation experiment used by GROMACS.
+ The first 4 bytes of any trr file containing 1993. See https://github.com/galaxyproject/galaxy/pull/6597/files#diff-409951594551183dbf886e24de6cb129R760
+ trr
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.22
+
+
+
+
+
+ msh
+
+
+
+ Mash sketch is a format for sequence / sequence checksum information. To make a sketch, each k-mer in a sequence is hashed, which creates a pseudo-random identifier. By sorting these hashes, a small subset from the top of the sorted list can represent the entire sequence.
+ Mash sketch
+ min-hash sketch
+
+
+ msh
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.23
+
+
+
+ loom
+ The Loom file format is based on HDF5, a standard for storing large numerical datasets. The Loom format is designed to efficiently hold large omics datasets. Typically, such data takes the form of a large matrix of numbers, along with metadata for the rows and columns.
+ Loom
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.23
+
+
+
+ zarray
+ zgroup
+ The Zarr format is an implementation of chunked, compressed, N-dimensional arrays for storing data.
+ Zarr
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.23
+
+
+ mtx
+
+ The Matrix Market matrix (MTX) format stores numerical or pattern matrices in a dense (array format) or sparse (coordinate format) representation.
+ MTX
+
+
+
+
+
+
+
+
+
+
+ 1.24
+
+
+
+
+
+ text/plain
+
+
+ BcForms is a format for abstractly describing the molecular structure (atoms and bonds) of macromolecular complexes as a collection of subunits and crosslinks. Each subunit can be described with BpForms (http://edamontology.org/format_3909) or SMILES (http://edamontology.org/data_2301). BcForms uses an ontology of crosslinks to abstract the chemical details of crosslinks from the descriptions of complexes (see https://bpforms.org/crosslink.html).
+ BcForms is related to http://edamontology.org/format_3909. (BcForms uses BpForms to describe subunits which are DNA, RNA, or protein polymers.) However, that format isn't the parent of BcForms. BcForms is similarly related to SMILES (http://edamontology.org/data_2301).
+ BcForms
+
+
+
+
+
+
+
+
+
+ 1.24
+
+ nq
+ N-Quads is a line-based, plain text format for encoding an RDF dataset. It includes information about the graph each triple belongs to.
+
+
+ N-Quads should not be confused with N-Triples which does not contain graph information.
+ N-Quads
+
+
+
+
+
+
+
+
+
+ 1.25
+
+
+
+
+ json
+ application/json
+
+ Vega is a visualization grammar, a declarative language for creating, saving, and sharing interactive visualization designs. With Vega, you can describe the visual appearance and interactive behavior of a visualization in a JSON format, and generate web-based views using Canvas or SVG.
+
+
+ Vega
+
+
+
+
+
+
+
+
+
+ 1.25
+
+
+
+
+ json
+ application/json
+
+ Vega-Lite is a high-level grammar of interactive graphics. It provides a concise JSON syntax for rapidly generating visualizations to support analysis. Vega-Lite specifications can be compiled to Vega specifications.
+
+
+ Vega-lite
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.25
+
+
+
+
+ application/xml
+
+ A model description language for computational neuroscience.
+
+
+ NeuroML
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.25
+
+
+
+
+ bngl
+ application/xml
+ plain/text
+
+ BioNetGen is a format for the specification and simulation of rule-based models of biochemical systems, including signal transduction, metabolic, and genetic regulatory networks.
+ BioNetGen Language
+
+
+ BNGL
+
+
+
+
+
+
+
+
+ 1.25
+
+
+
+ A Docker image is a file, comprised of multiple layers, that is used to execute code in a Docker container. An image is essentially built from the instructions for a complete and executable version of an application, which relies on the host OS kernel.
+
+
+ Docker image
+
+
+
+
+
+
+
+
+
+
+ 1.25
+
+
+ gfa
+
+ Graphical Fragment Assembly captures sequence graphs as the product of an assembly, a representation of variation in genomes, splice graphs in genes, or even overlap between reads from long-read sequencing technology.
+ Graphical Fragment Assembly (GFA) 1.0
+
+
+ GFA 1
+
+
+
+
+
+
+
+
+
+
+ 1.25
+
+
+ gfa
+
+ Graphical Fragment Assembly captures sequence graphs as the product of an assembly, a representation of variation in genomes, splice graphs in genes, or even overlap between reads from long-read sequencing technology. GFA2 is an update of GFA1 which is not compatible with GFA1.
+ Graphical Fragment Assembly (GFA) 2.0
+
+
+ GFA 2
+
+
+
+
+
+
+
+
+
+ 1.25
+
+
+ xlsx
+ application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
+
+ ObjTables is a toolkit for creating re-usable datasets that are both human and machine-readable, combining the ease of spreadsheets (e.g., Excel workbooks) with the rigor of schemas (classes, their attributes, the type of each attribute, and the possible relationships between instances of classes). ObjTables consists of a format for describing schemas for spreadsheets, numerous data types for science, a syntax for indicating the class and attribute represented by each table and column in a workbook, and software for using schemas to rigorously validate, merge, split, compare, and revision datasets.
+
+
+ ObjTables
+
+
+
+
+
+
+
+
+
+ 1.25
+ contig
+ The CONTIG format used for output of the SOAPdenovo alignment program. It contains contig sequences generated without using mate pair information.
+
+
+ CONTIG
+
+
+
+
+
+
+
+
+
+ 1.25
+ wego
+ WEGO native format used by the Web Gene Ontology Annotation Plot application. Tab-delimited format with gene names and others GO IDs (columns) with one annotation record per line.
+
+
+ WEGO
+
+
+
+
+
+
+
+
+
+ 1.25
+ rpkm
+ Tab-delimited format for gene expression levels table, calculated as Reads Per Kilobase per Million (RPKM) mapped reads.
+ Gene expression levels table format
+
+
+ For example a 1kb transcript with 1000 alignments in a sample of 10 million reads (out of which 8 million reads can be mapped) will have RPKM = 1000/(1 * 8) = 125
+ RPKM
+
+
+
+
+
+
+
+
+ 1.25
+ tar
+ TAR archive file format generated by the Unix-based utility tar.
+ TAR
+ Tarball
+ tar
+
+
+ For example a 1kb transcript with 1000 alignments in a sample of 10 million reads (out of which 8 million reads can be mapped) will have RPKM = 1000/(1 * 8) = 125
+ TAR format
+
+
+
+
+
+
+
+
+
+
+ 1.25
+ chain
+ The CHAIN format describes a pairwise alignment that allow gaps in both sequences simultaneously and is used by the UCSC Genome Browser.
+
+
+ CHAIN
+ https://genome.ucsc.edu/goldenPath/help/chain.html
+
+
+
+
+
+
+
+
+
+ 1.25
+ net
+ The NET file format is used to describe the data that underlie the net alignment annotations in the UCSC Genome Browser.
+
+
+ NET
+ https://genome.ucsc.edu/goldenPath/help/net.html
+
+
+
+
+
+
+
+
+
+ 1.25
+ qmap
+ Format of QMAP files generated for methylation data from an internal BGI pipeline.
+
+
+ QMAP
+
+
+
+
+
+
+
+
+
+ 1.25
+ ga
+ An emerging format for high-level Galaxy workflow description.
+ Galaxy workflow format
+ GalaxyWF
+ ga
+
+
+ gxformat2
+ https://github.com/galaxyproject/gxformat2
+
+
+
+
+
+
+
+
+
+ 1.25
+ wmv
+ The proprietary native video format of various Microsoft programs such as Windows Media Player.
+ Windows Media Video format
+ Windows movie file format
+
+
+ WMV
+
+
+
+
+
+
+
+
+
+ 1.25
+ zip
+ ZIP is an archive file format that supports lossless data compression.
+ ZIP
+
+
+ A ZIP file may contain one or more files or directories that may have been compressed.
+ ZIP format
+
+
+
+
+
+
+
+
+
+
+ 1.25
+ lsm
+ Zeiss' proprietary image format based on TIFF.
+
+
+ LSM files are the default data export for the Zeiss LSM series confocal microscopes (e.g. LSM 510, LSM 710). In addition to the image data, LSM files contain most imaging settings.
+ LSM
+
+
+
+
+
+
+
+
+ 1.25
+ gz
+ gzip
+ GNU zip compressed file format common to Unix-based operating systems.
+ GNU Zip
+ gz
+ gzip
+
+
+ GZIP format
+
+
+
+
+
+
+
+
+
+
+ 1.25
+ avi
+ Audio Video Interleaved (AVI) format is a multimedia container format for AVI files, that allows synchronous audio-with-video playback.
+ Audio Video Interleaved
+
+
+ AVI
+
+
+
+
+
+
+
+
+
+
+ 1.25
+ trackdb
+ A declaration file format for UCSC browsers track dataset display charateristics.
+
+
+ TrackDB
+
+
+
+
+
+
+
+
+
+ 1.25
+ cigar
+ Compact Idiosyncratic Gapped Alignment Report format is a compressed (run-length encoded) pairwise alignment format. It is useful for representing long (e.g. genomic) pairwise alignments.
+ CIGAR
+
+
+ CIGAR format
+ http://wiki.bits.vib.be/index.php/CIGAR/
+
+
+
+
+
+
+
+
+
+ 1.25
+ stl
+ STL is a file format native to the stereolithography CAD software created by 3D Systems. The format is used to save and share surface-rendered 3D images and also for 3D printing.
+ stl
+
+
+ Stereolithography format
+
+
+
+
+
+
+
+
+
+ 1.25
+ u3d
+ U3D (Universal 3D) is a compressed file format and data structure for 3D computer graphics. It contains 3D model information such as triangle meshes, lighting, shading, motion data, lines and points with color and structure.
+ Universal 3D
+ Universal 3D format
+
+
+ U3D
+
+
+
+
+
+
+
+
+
+ 1.25
+ tex
+ Bitmap image format used for storing textures.
+
+
+ Texture files can create the appearance of different surfaces and can be applied to both 2D and 3D objects. Note the file extension .tex is also used for LaTex documents which are a completely different format and they are NOT interchangeable.
+ Texture file format
+
+
+
+
+
+
+
+
+
+ 1.25
+ py
+ Format for scripts writtenin Python - a widely used high-level programming language for general-purpose programming.
+ Python
+ Python program
+ py
+
+
+ Python script
+
+
+
+
+
+
+
+
+
+ 1.25
+ mp4
+ A digital multimedia container format most commonly used to store video and audio.
+ MP4
+
+
+ MPEG-4
+
+
+
+
+
+
+
+
+
+
+ 1.25
+ pl
+ Format for scripts written in Perl - a family of high-level, general-purpose, interpreted, dynamic programming languages.
+ Perl
+ Perl program
+ pl
+
+
+ Perl script
+
+
+
+
+
+
+
+
+
+ 1.25
+ r
+ Format for scripts written in the R language - an open source programming language and software environment for statistical computing and graphics that is supported by the R Foundation for Statistical Computing.
+ R
+ R program
+
+
+ R script
+
+
+
+
+
+
+
+
+
+ 1.25
+ rmd
+ A file format for making dynamic documents (R Markdown scripts) with the R language.
+
+
+ R markdown
+ https://rmarkdown.rstudio.com/articles_intro.html
+
+
+
+
+
+
+
+
+ 1.25
+ This duplicates an existing concept (http://edamontology.org/format_3549).
+ 1.26
+
+ An open file format from the Neuroimaging Informatics Technology Initiative (NIfTI) commonly used to store brain imaging data obtained using Magnetic Resonance Imaging (MRI) methods.
+
+
+ NIFTI format
+ true
+
+
+
+
+
+
+
+
+ 1.25
+ pickle
+ Format used by Python pickle module for serializing and de-serializing a Python object structure.
+
+
+ pickle
+ https://docs.python.org/2/library/pickle.html
+
+
+
+
+
+
+
+
+ 1.25
+ npy
+ The standard binary file format used by NumPy - a fundamental package for scientific computing with Python - for persisting a single arbitrary NumPy array on disk. The format stores all of the shape and dtype information necessary to reconstruct the array correctly.
+ NumPy
+ npy
+
+
+ NumPy format
+
+
+
+
+
+
+
+
+ 1.25
+ repz
+ Format of repertoire (archive) files that can be read by SimToolbox (a MATLAB toolbox for structured illumination fluorescence microscopy) or alternatively extracted with zip file archiver software.
+
+
+ SimTools repertoire file format
+ https://pdfs.semanticscholar.org/5f25/f1cc6cdf2225fe22dc6fd4fc0296d486a85c.pdf
+
+
+
+
+
+
+
+
+ 1.25
+ cfg
+ A configuration file used by various programs to store settings that are specific to their respective software.
+
+
+ Configuration file format
+
+
+
+
+
+
+
+
+ 1.25
+ zst
+ Format used by the Zstandard real-time compression algorithm.
+ Zstandard compression format
+ Zstandard-compressed file format
+ zst
+
+
+ Zstandard format
+ https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md
+
+
+
+
+
+
+
+
+
+ 1.25
+ m
+ The file format for MATLAB scripts or functions.
+ MATLAB
+ m
+
+
+ MATLAB script
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.26
+
+
+
+ A data format for specifying parameter estimation problems in systems biology.
+
+
+ PEtab
+
+
+
+
+
+
+
+
+
+ 1.26
+
+
+ g.vcf
+ g.vcf.gz
+ Genomic Variant Call Format (gVCF) is a version of VCF that includes not only the positions that are variant when compared to a reference genome, but also the non-variant positions as ranges, including metrics of confidence that the positions in the range are actually non-variant e.g. minimum read-depth and genotype quality.
+ g.vcf
+ g.vcf.gz
+
+
+ gVCF
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.26
+
+
+ cml
+
+ Chemical Markup Language (CML) is an XML-based format for encoding detailed information about a wide range of chemical concepts.
+ ChemML
+
+
+ cml
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.26
+
+
+ cif
+
+ Crystallographic Information File (CIF) is a data exchange standard file format for Crystallographic Information and related Structural Science data.
+
+
+ cif
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.26
+
+
+ json
+
+
+
+
+
+
+
+
+
+ Format for describing the capabilities of a biosimulation tool including the modeling frameworks, simulation algorithms, and modeling formats that it supports, as well as metadata such as a list of the interfaces, programming languages, and operating systems supported by the tool; a link to download the tool; a list of the authors of the tool; and the license to the tool.
+
+
+ BioSimulators format for the specifications of biosimulation tools
+
+
+
+
+
+
+
+
+
+ 1.26
+
+
+
+ Outlines the syntax and semantics of the input and output arguments for command-line interfaces for biosimulation tools.
+
+
+ BioSimulators standard for command-line interfaces for biosimulation tools
+
+
+
+
+
+
+
+
+
+ 1.26
+ Data format derived from the standard PDB format, which enables user to incorporate parameters for charge and radius to the existing PDB data file.
+
+
+ PQR
+
+
+
+
+
+
+
+
+
+
+ 1.26
+ Data format used in AutoDock 4 for storing atomic coordinates, partial atomic charges and AutoDock atom types for both receptors and ligands.
+
+
+ PDBQT
+
+
+
+
+
+
+
+
+
+
+
+ 1.26
+
+
+ msp
+ MSP is a data format for mass spectrometry data.
+
+
+ NIST Text file format for storing MS∕MS spectra (m∕z and intensity of mass peaks) along with additional annotations for each spectrum. A single MSP file can thus contain single or multiple spectra. This format is frequently used to share spectra libraries.
+ MSP
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Function
+ A function that processes a set of inputs and results in a set of outputs, or associates arguments (inputs) with values (outputs).
+ Computational method
+ Computational operation
+ Computational procedure
+ Computational subroutine
+ Function (programming)
+ Lambda abstraction
+ Mathematical function
+ Mathematical operation
+ Computational tool
+ Process
+ sumo:Function
+
+
+ Special cases are: a) An operation that consumes no input (has no input arguments). Such operation is either a constant function, or an operation depending only on the underlying state. b) An operation that may modify the underlying state but has no output. c) The singular-case operation with no input or output, that still may modify the underlying state.
+ Operation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Function
+ Operation is a function that is computational. It typically has input(s) and output(s), which are always data.
+
+
+
+
+ Computational tool
+ Computational tool provides one or more operations.
+
+
+
+
+ Process
+ Process can have a function (as its quality/attribute), and can also perform an operation with inputs and outputs.
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Search or query a data resource and retrieve entries and / or annotation.
+ Database retrieval
+ Query
+
+
+ Query and retrieval
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Search database to retrieve all relevant references to a particular entity or entry.
+
+ Data retrieval (database cross-reference)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Annotate an entity (typically a biological or biomedical database entity) with terms from a controlled vocabulary.
+
+
+ This is a broad concept and is used a placeholder for other, more specific concepts.
+ Annotation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Generate an index of (typically a file of) biological data.
+ Data indexing
+ Database indexing
+
+
+ Indexing
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Analyse an index of biological data.
+
+ Data index analysis
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Retrieve basic information about a molecular sequence.
+
+ Annotation retrieval (sequence)
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Generate a molecular sequence by some means.
+ Sequence generation (nucleic acid)
+ Sequence generation (protein)
+
+
+ Sequence generation
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Edit or change a molecular sequence, either randomly or specifically.
+
+
+ Sequence editing
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Merge two or more (typically overlapping) molecular sequences.
+ Sequence splicing
+ Paired-end merging
+ Paired-end stitching
+ Read merging
+ Read stitching
+
+
+ Sequence merging
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Convert a molecular sequence from one type to another.
+
+
+ Sequence conversion
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate sequence complexity, for example to find low-complexity regions in sequences.
+
+
+ Sequence complexity calculation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate sequence ambiguity, for example identity regions in protein or nucleotide sequences with many ambiguity codes.
+
+
+ Sequence ambiguity calculation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate character or word composition or frequency of a molecular sequence.
+
+
+ Sequence composition calculation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Find and/or analyse repeat sequences in (typically nucleotide) sequences.
+
+
+ Repeat sequences include tandem repeats, inverted or palindromic repeats, DNA microsatellites (Simple Sequence Repeats or SSRs), interspersed repeats, maximal duplications and reverse, complemented and reverse complemented repeats etc. Repeat units can be exact or imperfect, in tandem or dispersed, of specified or unspecified length.
+ Repeat sequence analysis
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Discover new motifs or conserved patterns in sequences or sequence alignments (de-novo discovery).
+ Motif discovery
+
+
+ Motifs and patterns might be conserved or over-represented (occur with improbable frequency).
+ Sequence motif discovery
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Find (scan for) known motifs, patterns and regular expressions in molecular sequence(s).
+ Motif scanning
+ Sequence signature detection
+ Sequence signature recognition
+ Motif detection
+ Motif recognition
+ Motif search
+ Sequence motif detection
+ Sequence motif search
+ Sequence profile search
+
+
+ Sequence motif recognition
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Find motifs shared by molecular sequences.
+
+
+ Sequence motif comparison
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Analyse the sequence, conformational or physicochemical properties of transcription regulatory elements in DNA sequences.
+
+ For example transcription factor binding sites (TFBS) analysis to predict accessibility of DNA to binding factors.
+ Transcription regulatory sequence analysis
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.19
+
+ Identify common, conserved (homologous) or synonymous transcriptional regulatory motifs (transcription factor binding sites).
+
+
+ Conserved transcription regulatory sequence identification
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.18
+
+
+ Extract, calculate or predict non-positional (physical or chemical) properties of a protein from processing a protein (3D) structure.
+
+
+ Protein property calculation (from structure)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse flexibility and motion in protein structure.
+ CG analysis
+ MD analysis
+ Protein Dynamics Analysis
+ Trajectory analysis
+ Nucleic Acid Dynamics Analysis
+ Protein flexibility and motion analysis
+ Protein flexibility prediction
+ Protein motion prediction
+
+
+ Use this concept for analysis of flexible and rigid residues, local chain deformability, regions undergoing conformational change, molecular vibrations or fluctuational dynamics, domain motions or other large-scale structural transitions in a protein structure.
+ Simulation analysis
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identify or screen for 3D structural motifs in protein structure(s).
+ Protein structural feature identification
+ Protein structural motif recognition
+
+
+ This includes conserved substructures and conserved geometry, such as spatial arrangement of secondary structure or protein backbone. Methods might use structure alignment, structural templates, searches for similar electrostatic potential and molecular surface shape, surface-mapping of phylogenetic information etc.
+ Structural motif discovery
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identify structural domains in a protein structure from first principles (for example calculations on structural compactness).
+
+
+ Protein domain recognition
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse the architecture (spatial arrangement of secondary structure) of protein structure(s).
+
+
+ Protein architecture analysis
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ WHATIF: SymShellFiveXML
+ WHATIF: SymShellOneXML
+ WHATIF: SymShellTenXML
+ WHATIF: SymShellTwoXML
+ WHATIF:ListContactsNormal
+ WHATIF:ListContactsRelaxed
+ WHATIF:ListSideChainContactsNormal
+ WHATIF:ListSideChainContactsRelaxed
+ Calculate or extract inter-atomic, inter-residue or residue-atom contacts, distances and interactions in protein structure(s).
+
+
+ Residue interaction calculation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ WHATIF:CysteineTorsions
+ WHATIF:ResidueTorsions
+ WHATIF:ResidueTorsionsBB
+ WHATIF:ShowTauAngle
+ Calculate, visualise or analyse phi/psi angles of a protein structure.
+ Backbone torsion angle calculation
+ Cysteine torsion angle calculation
+ Tau angle calculation
+ Torsion angle calculation
+
+
+ Protein geometry calculation
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Extract, calculate or predict non-positional (physical or chemical) properties of a protein, including any non-positional properties of the molecular sequence, from processing a protein sequence or 3D structure.
+ Protein property rendering
+ Protein property calculation (from sequence)
+ Protein property calculation (from structure)
+ Protein structural property calculation
+ Structural property calculation
+
+
+ This includes methods to render and visualise the properties of a protein sequence, and a residue-level search for properties such as solvent accessibility, hydropathy, secondary structure, ligand-binding etc.
+ Protein property calculation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Immunogen design
+ Predict antigenicity, allergenicity / immunogenicity, allergic cross-reactivity etc of peptides and proteins.
+ Antigenicity prediction
+ Immunogenicity prediction
+ B cell peptide immunogenicity prediction
+ Hopp and Woods plotting
+ MHC peptide immunogenicity prediction
+
+
+ Immunological system are cellular or humoral. In vaccine design to induces a cellular immune response, methods must search for antigens that can be recognized by the major histocompatibility complex (MHC) molecules present in T lymphocytes. If a humoral response is required, antigens for B cells must be identified.
+ This includes methods that generate a graphical rendering of antigenicity of a protein, such as a Hopp and Woods plot.
+ This is usually done in the development of peptide-specific antibodies or multi-epitope vaccines. Methods might use sequence data (for example motifs) and / or structural data.
+ Peptide immunogenicity prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict, recognise and identify positional features in molecular sequences such as key functional sites or regions.
+ Sequence feature prediction
+ Sequence feature recognition
+ Motif database search
+ SO:0000110
+
+
+ Look at "Protein feature detection" (http://edamontology.org/operation_3092) and "Nucleic acid feature detection" (http://edamontology.org/operation_0415) in case more specific terms are needed.
+ Sequence feature detection
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Extract a sequence feature table from a sequence database entry.
+
+ Data retrieval (feature table)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Query the features (in a feature table) of molecular sequence(s).
+
+ Feature table query
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Compare the feature tables of two or more molecular sequences.
+ Feature comparison
+ Feature table comparison
+
+
+ Sequence feature comparison
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Display basic information about a sequence alignment.
+
+ Data retrieval (sequence alignment)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse a molecular sequence alignment.
+
+
+ Sequence alignment analysis
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Compare (typically by aligning) two molecular sequence alignments.
+
+
+ See also 'Sequence profile alignment'.
+ Sequence alignment comparison
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Convert a molecular sequence alignment from one type to another (for example amino acid to coding nucleotide sequence).
+
+
+ Sequence alignment conversion
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Process (read and / or write) physicochemical property data of nucleic acids.
+
+ Nucleic acid property processing
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate or predict physical or chemical properties of nucleic acid molecules, including any non-positional properties of the molecular sequence.
+
+
+ Nucleic acid property calculation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict splicing alternatives or transcript isoforms from analysis of sequence data.
+ Alternative splicing analysis
+ Alternative splicing detection
+ Differential splicing analysis
+ Splice transcript prediction
+
+
+ Alternative splicing prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Detect frameshifts in DNA sequences, including frameshift sites and signals, and frameshift errors from sequencing projects.
+ Frameshift error detection
+
+
+ Methods include sequence alignment (if related sequences are available) and word-based sequence comparison.
+ Frameshift detection
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Detect vector sequences in nucleotide sequence, typically by comparison to a set of known vector sequences.
+
+
+ Vector sequence detection
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict secondary structure of protein sequences.
+ Secondary structure prediction (protein)
+
+
+ Methods might use amino acid composition, local sequence information, multiple sequence alignments, physicochemical features, estimated energy content, statistical algorithms, hidden Markov models, support vector machines, kernel machines, neural networks etc.
+ Protein secondary structure prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict super-secondary structure of protein sequence(s).
+
+
+ Super-secondary structures include leucine zippers, coiled coils, Helix-Turn-Helix etc.
+ Protein super-secondary structure prediction
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict and/or classify transmembrane proteins or transmembrane (helical) domains or regions in protein sequences.
+
+
+ Transmembrane protein prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse transmembrane protein(s), typically by processing sequence and / or structural data, and write an informative report for example about the protein and its transmembrane domains / regions.
+
+
+ Use this (or child) concept for analysis of transmembrane domains (buried and exposed faces), transmembrane helices, helix topology, orientation, inter-helical contacts, membrane dipping (re-entrant) loops and other secondary structure etc. Methods might use pattern discovery, hidden Markov models, sequence alignment, structural profiles, amino acid property analysis, comparison to known domains or some combination (hybrid methods).
+ Transmembrane protein analysis
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ This is a "organisational class" not very useful for annotation per se.
+ 1.19
+
+
+
+
+ Predict tertiary structure of a molecular (biopolymer) sequence.
+
+ Structure prediction
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict contacts, non-covalent interactions and distance (constraints) between amino acids in protein sequences.
+ Residue interaction prediction
+ Contact map prediction
+ Protein contact map prediction
+
+
+ Methods usually involve multiple sequence alignment analysis.
+ Residue contact prediction
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.19
+
+ Analyse experimental protein-protein interaction data from for example yeast two-hybrid analysis, protein microarrays, immunoaffinity chromatography followed by mass spectrometry, phage display etc.
+
+
+ Protein interaction raw data analysis
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Identify or predict protein-protein interactions, interfaces, binding sites etc in protein sequences.
+
+
+ Protein-protein interaction prediction (from protein sequence)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Identify or predict protein-protein interactions, interfaces, binding sites etc in protein structures.
+
+
+ Protein-protein interaction prediction (from protein structure)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse a network of protein interactions.
+ Protein interaction network comparison
+
+
+ Protein interaction network analysis
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Notions of pathway and network were mixed up, EDAM 1.24 disentangles them.
+ 1.24
+
+
+
+ Compare two or more biological pathways or networks.
+
+ Pathway or network comparison
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict RNA secondary structure (for example knots, pseudoknots, alternative structures etc).
+ RNA shape prediction
+
+
+ Methods might use RNA motifs, predicted intermolecular contacts, or RNA sequence-structure compatibility (inverse RNA folding).
+ RNA secondary structure prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse some aspect of RNA/DNA folding, typically by processing sequence and/or structural data. For example, compute folding energies such as minimum folding energies for DNA or RNA sequences or energy landscape of RNA mutants.
+ Nucleic acid folding
+ Nucleic acid folding modelling
+ Nucleic acid folding prediction
+ Nucleic acid folding energy calculation
+
+
+ Nucleic acid folding analysis
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Retrieve information on restriction enzymes or restriction enzyme sites.
+
+ Data retrieval (restriction enzyme annotation)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Identify genetic markers in DNA sequences.
+
+ A genetic marker is any DNA sequence of known chromosomal location that is associated with and specific to a particular gene or trait. This includes short sequences surrounding a SNP, Sequence-Tagged Sites (STS) which are well suited for PCR amplification, a longer minisatellites sequence etc.
+ Genetic marker identification
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Generate a genetic (linkage) map of a DNA sequence (typically a chromosome) showing the relative positions of genetic markers based on estimation of non-physical distances.
+ Functional mapping
+ Genetic cartography
+ Genetic map construction
+ Genetic map generation
+ Linkage mapping
+ QTL mapping
+
+
+ Mapping involves ordering genetic loci along a chromosome and estimating the physical distance between loci. A genetic map shows the relative (not physical) position of known genes and genetic markers.
+ This includes mapping of the genetic architecture of dynamic complex traits (functional mapping), e.g. by characterisation of the underlying quantitative trait loci (QTLs) or nucleotides (QTNs).
+ Genetic mapping
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse genetic linkage.
+
+
+ For example, estimate how close two genes are on a chromosome by calculating how often they are transmitted together to an offspring, ascertain whether two genes are linked and parental linkage, calculate linkage map distance etc.
+ Linkage analysis
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate codon usage statistics and create a codon usage table.
+ Codon usage table construction
+
+
+ Codon usage table generation
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Compare two or more codon usage tables.
+
+
+ Codon usage table comparison
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse codon usage in molecular sequences or process codon usage data (e.g. a codon usage table).
+ Codon usage data analysis
+ Codon usage table analysis
+
+
+ Codon usage analysis
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identify and plot third base position variability in a nucleotide sequence.
+
+
+ Base position variability plotting
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Find exact character or word matches between molecular sequences without full sequence alignment.
+
+
+ Sequence word comparison
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate a sequence distance matrix or otherwise estimate genetic distances between molecular sequences.
+ Phylogenetic distance matrix generation
+ Sequence distance calculation
+ Sequence distance matrix construction
+
+
+ Sequence distance matrix generation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Compare two or more molecular sequences, identify and remove redundant sequences based on some criteria.
+
+
+ Sequence redundancy removal
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Build clusters of similar sequences, typically using scores from pair-wise alignment or other comparison of the sequences.
+ Sequence cluster construction
+ Sequence cluster generation
+
+
+ The clusters may be output or used internally for some other purpose.
+ Sequence clustering
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Align (identify equivalent sites within) molecular sequences.
+ Sequence alignment construction
+ Sequence alignment generation
+ Consensus-based sequence alignment
+ Constrained sequence alignment
+ Multiple sequence alignment (constrained)
+ Sequence alignment (constrained)
+
+
+ Includes methods that align sequence profiles (representing sequence alignments): ethods might perform one-to-one, one-to-many or many-to-many comparisons. See also 'Sequence alignment comparison'.
+ See also "Read mapping"
+ Sequence alignment
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Align two or more molecular sequences of different types (for example genomic DNA to EST, cDNA or mRNA).
+
+ Hybrid sequence alignment construction
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Align molecular sequences using sequence and structural information.
+ Sequence alignment (structure-based)
+
+
+ Structure-based sequence alignment
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Align (superimpose) molecular tertiary structures.
+ Structural alignment
+ 3D profile alignment
+ 3D profile-to-3D profile alignment
+ Structural profile alignment
+
+
+ Includes methods that align structural (3D) profiles or templates (representing structures or structure alignments) - including methods that perform one-to-one, one-to-many or many-to-many comparisons.
+ Structure alignment
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Generate some type of sequence profile (for example a hidden Markov model) from a sequence alignment.
+ Sequence profile construction
+
+
+ Sequence profile generation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Generate some type of structural (3D) profile or template from a structure or structure alignment.
+ Structural profile construction
+ Structural profile generation
+
+
+ 3D profile generation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.19
+
+ Align sequence profiles (representing sequence alignments).
+
+
+ Profile-profile alignment
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.19
+
+ Align structural (3D) profiles or templates (representing structures or structure alignments).
+
+
+ 3D profile-to-3D profile alignment
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Align molecular sequence(s) to sequence profile(s), or profiles to other profiles. A profile typically represents a sequence alignment.
+ Profile-profile alignment
+ Profile-to-profile alignment
+ Sequence-profile alignment
+ Sequence-to-profile alignment
+
+
+ A sequence profile typically represents a sequence alignment. Methods might perform one-to-one, one-to-many or many-to-many comparisons.
+ Sequence profile alignment
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.19
+
+ Align molecular sequence(s) to structural (3D) profile(s) or template(s) (representing a structure or structure alignment).
+
+
+ Sequence-to-3D-profile alignment
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Align molecular sequence to structure in 3D space (threading).
+ Sequence-structure alignment
+ Sequence-3D profile alignment
+ Sequence-to-3D-profile alignment
+
+
+ This includes sequence-to-3D-profile alignment methods, which align molecular sequence(s) to structural (3D) profile(s) or template(s) (representing a structure or structure alignment) - methods might perform one-to-one, one-to-many or many-to-many comparisons.
+ Use this concept for methods that evaluate sequence-structure compatibility by assessing residue interactions in 3D. Methods might perform one-to-one, one-to-many or many-to-many comparisons.
+ Protein threading
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Recognize (predict and identify) known protein structural domains or folds in protein sequence(s) which (typically) are not accompanied by any significant sequence similarity to know structures.
+ Domain prediction
+ Fold prediction
+ Protein domain prediction
+ Protein fold prediction
+ Protein fold recognition
+
+
+ Methods use some type of mapping between sequence and fold, for example secondary structure prediction and alignment, profile comparison, sequence properties, homologous sequence search, kernel machines etc. Domains and folds might be taken from SCOP or CATH.
+ Fold recognition
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ (jison)Too fine-grained, the operation (Data retrieval) hasn't changed, just what is retrieved.
+ 1.17
+
+ Search for and retrieve data concerning or describing some core data, as distinct from the primary data that is being described.
+
+
+ This includes documentation, general information and other metadata on entities such as databases, database entries and tools.
+ Metadata retrieval
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Query scientific literature, in search for articles, article data, concepts, named entities, or for statistics.
+
+
+ Literature search
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Text analysis
+ Process and analyse text (typically scientific literature) to extract information from it.
+ Literature mining
+ Text analytics
+ Text data mining
+ Article analysis
+ Literature analysis
+
+
+ Text mining
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Perform in-silico (virtual) PCR.
+
+
+ Virtual PCR
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Design or predict oligonucleotide primers for PCR and DNA amplification etc.
+ PCR primer prediction
+ Primer design
+ PCR primer design (based on gene structure)
+ PCR primer design (for conserved primers)
+ PCR primer design (for gene transcription profiling)
+ PCR primer design (for genotyping polymorphisms)
+ PCR primer design (for large scale sequencing)
+ PCR primer design (for methylation PCRs)
+ Primer quality estimation
+
+
+ Primer design involves predicting or selecting primers that are specific to a provided PCR template. Primers can be designed with certain properties such as size of product desired, primer size etc. The output might be a minimal or overlapping primer set.
+ This includes predicting primers based on gene structure, promoters, exon-exon junctions, predicting primers that are conserved across multiple genomes or species, primers for for gene transcription profiling, for genotyping polymorphisms, for example single nucleotide polymorphisms (SNPs), for large scale sequencing, or for methylation PCRs.
+ PCR primer design
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict and/or optimize oligonucleotide probes for DNA microarrays, for example for transcription profiling of genes, or for genomes and gene families.
+ Microarray probe prediction
+
+
+ Microarray probe design
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Combine (align and merge) overlapping fragments of a DNA sequence to reconstruct the original sequence.
+ Metagenomic assembly
+ Sequence assembly editing
+
+
+ For example, assemble overlapping reads from paired-end sequencers into contigs (a contiguous sequence corresponding to read overlaps). Or assemble contigs, for example ESTs and genomic DNA fragments, depending on the detected fragment overlaps.
+ Sequence assembly
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.16
+
+ Standardize or normalize microarray data.
+
+
+ Microarray data standardisation and normalisation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Process (read and / or write) SAGE, MPSS or SBS experimental data.
+
+ Sequencing-based expression profile data processing
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Perform cluster analysis of expression data to identify groups with similar expression profiles, for example by clustering.
+ Gene expression clustering
+ Gene expression profile clustering
+
+
+ Expression profile clustering
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The measurement of the activity (expression) of multiple genes in a cell, tissue, sample etc., in order to get an impression of biological function.
+ Feature expression analysis
+ Functional profiling
+ Gene expression profile construction
+ Gene expression profile generation
+ Gene expression quantification
+ Gene transcription profiling
+ Non-coding RNA profiling
+ Protein profiling
+ RNA profiling
+ mRNA profiling
+
+
+ Gene expression profiling generates some sort of gene expression profile, for example from microarray data.
+ Gene expression profiling
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Comparison of expression profiles.
+ Gene expression comparison
+ Gene expression profile comparison
+
+
+ Expression profile comparison
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+ Interpret (in functional terms) and annotate gene expression data.
+
+
+ Functional profiling
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Analyse EST or cDNA sequences.
+
+ For example, identify full-length cDNAs from EST sequences or detect potential EST antisense transcripts.
+ EST and cDNA sequence analysis
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identify and select targets for protein structural determination.
+
+ Methods will typically navigate a graph of protein families of known structure.
+ Structural genomics target selection
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Assign secondary structure from protein coordinate or experimental data.
+
+
+ Includes secondary structure assignment from circular dichroism (CD) spectroscopic data, and from protein coordinate data.
+ Protein secondary structure assignment
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Assign a protein tertiary structure (3D coordinates), or other aspects of protein structure, from raw experimental data.
+ NOE assignment
+ Structure calculation
+
+
+ Protein structure assignment
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ WHATIF: CorrectedPDBasXML
+ WHATIF: UseFileDB
+ WHATIF: UseResidueDB
+ Evaluate the quality or correctness a protein three-dimensional model.
+ Protein model validation
+ Residue validation
+
+
+ Model validation might involve checks for atomic packing, steric clashes (bumps), volume irregularities, agreement with electron density maps, number of amino acid residues, percentage of residues with missing or bad atoms, irregular Ramachandran Z-scores, irregular Chi-1 / Chi-2 normality scores, RMS-Z score on bonds and angles etc.
+ The PDB file format has had difficulties, inconsistencies and errors. Corrections can include identifying a meaningful sequence, removal of alternate atoms, correction of nomenclature problems, removal of incomplete residues and spurious waters, addition or removal of water, modelling of missing side chains, optimisation of cysteine bonds, regularisation of bond lengths, bond angles and planarities etc.
+ This includes methods that calculate poor quality residues. The scoring function to identify poor quality residues may consider residues with bad atoms or atoms with high B-factor, residues in the N- or C-terminal position, adjacent to an unstructured residue, non-canonical residues, glycine and proline (or adjacent to these such residues).
+ Protein structure validation
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ WHATIF: CorrectedPDBasXML
+ Refine (after evaluation) a model of a molecular structure (typically a protein structure) to reduce steric clashes, volume irregularities etc.
+ Protein model refinement
+
+
+ Molecular model refinement
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Construct a phylogenetic tree.
+ Phlyogenetic tree construction
+ Phylogenetic reconstruction
+ Phylogenetic tree generation
+
+
+ Phylogenetic trees are usually constructed from a set of sequences from which an alignment (or data matrix) is calculated.
+ Phylogenetic inference
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse an existing phylogenetic tree or trees, typically to detect features or make predictions.
+ Phylogenetic tree analysis
+ Phylogenetic modelling
+
+
+ Phylgenetic modelling is the modelling of trait evolution and prediction of trait values using phylogeny as a basis.
+ Phylogenetic analysis
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Compare two or more phylogenetic trees.
+
+
+ For example, to produce a consensus tree, subtrees, supertrees, calculate distances between trees or test topological similarity between trees (e.g. a congruence index) etc.
+ Phylogenetic tree comparison
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Edit a phylogenetic tree.
+
+
+ Phylogenetic tree editing
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Comparison of a DNA sequence to orthologous sequences in different species and inference of a phylogenetic tree, in order to identify regulatory elements such as transcription factor binding sites (TFBS).
+ Phylogenetic shadowing
+
+
+ Phylogenetic shadowing is a type of footprinting where many closely related species are used. A phylogenetic 'shadow' represents the additive differences between individual sequences. By masking or 'shadowing' variable positions a conserved sequence is produced with few or none of the variations, which is then compared to the sequences of interest to identify significant regions of conservation.
+ Phylogenetic footprinting
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.20
+
+ Simulate the folding of a protein.
+
+
+ Protein folding simulation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.19
+
+ Predict the folding pathway(s) or non-native structural intermediates of a protein.
+
+
+ Protein folding pathway prediction
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Map and model the effects of single nucleotide polymorphisms (SNPs) on protein structure(s).
+
+
+ Protein SNP mapping
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict the effect of point mutation on a protein structure, in terms of strucural effects and protein folding, stability and function.
+ Variant functional prediction
+ Protein SNP mapping
+ Protein mutation modelling
+ Protein stability change prediction
+
+
+ Protein SNP mapping maps and modesl the effects of single nucleotide polymorphisms (SNPs) on protein structure(s). Methods might predict silent or pathological mutations.
+ Variant effect prediction
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+ Design molecules that elicit an immune response (immunogens).
+
+
+ Immunogen design
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.18
+
+ Predict and optimise zinc finger protein domains for DNA/RNA binding (for example for transcription factors and nucleases).
+
+
+ Zinc finger prediction
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate Km, Vmax and derived data for an enzyme reaction.
+
+
+ Enzyme kinetics calculation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Reformat a file of data (or equivalent entity in memory).
+ File format conversion
+ File formatting
+ File reformatting
+ Format conversion
+ Reformatting
+
+
+ Formatting
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Test and validate the format and content of a data file.
+ File format validation
+
+
+ Format validation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Visualise, plot or render (graphically) biomolecular data such as molecular sequences or structures.
+ Data visualisation
+ Rendering
+ Molecular visualisation
+ Plotting
+
+
+ This includes methods to render and visualise molecules.
+ Visualisation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Search a sequence database by sequence comparison and retrieve similar sequences. Sequences matching a given sequence motif or pattern, such as a Prosite pattern or regular expression.
+
+
+ This excludes direct retrieval methods (e.g. the dbfetch program).
+ Sequence database search
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Search a tertiary structure database, typically by sequence and/or structure comparison, or some other means, and retrieve structures and associated data.
+
+
+ Structure database search
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Search a secondary protein database (of classification information) to assign a protein sequence(s) to a known protein family or group.
+
+
+ Protein secondary database search
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+
+ Screen a sequence against a motif or pattern database.
+
+ Motif database search
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ Search a database of sequence profiles with a query sequence.
+
+ Sequence profile database search
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Search a database of transmembrane proteins, for example for sequence or structural similarities.
+
+ Transmembrane protein database search
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Query a database and retrieve sequences with a given entry code or accession number.
+
+ Sequence retrieval (by code)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Query a database and retrieve sequences containing a given keyword.
+
+ Sequence retrieval (by keyword)
+ true
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Search a sequence database and retrieve sequences that are similar to a query sequence.
+ Sequence database search (by sequence)
+ Structure database search (by sequence)
+
+
+ Sequence similarity search
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Search a sequence database and retrieve sequences matching a given sequence motif or pattern, such as a Prosite pattern or regular expression.
+
+
+ Sequence database search (by motif or pattern)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Search a sequence database and retrieve sequences of a given amino acid composition.
+
+ Sequence database search (by amino acid composition)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Search a sequence database and retrieve sequences with a specified property, typically a physicochemical or compositional property.
+
+
+ Sequence database search (by property)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Search a sequence database and retrieve sequences that are similar to a query sequence using a word-based method.
+
+ Word-based methods (for example BLAST, gapped BLAST, MEGABLAST, WU-BLAST etc.) are usually quicker than alignment-based methods. They may or may not handle gaps.
+ Sequence database search (by sequence using word-based methods)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Search a sequence database and retrieve sequences that are similar to a query sequence using a sequence profile-based method, or with a supplied profile as query.
+
+ This includes tools based on PSI-BLAST.
+ Sequence database search (by sequence using profile-based methods)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Search a sequence database for sequences that are similar to a query sequence using a local alignment-based method.
+
+ This includes tools based on the Smith-Waterman algorithm or FASTA.
+ Sequence database search (by sequence using local alignment-based methods)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Search sequence(s) or a sequence database for sequences that are similar to a query sequence using a global alignment-based method.
+
+ This includes tools based on the Needleman and Wunsch algorithm.
+ Sequence database search (by sequence using global alignment-based methods)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Search a DNA database (for example a database of conserved sequence tags) for matches to Sequence-Tagged Site (STS) primer sequences.
+
+ STSs are genetic markers that are easily detected by the polymerase chain reaction (PCR) using specific primers.
+ Sequence database search (by sequence for primer sequences)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+ Search sequence(s) or a sequence database for sequences which match a set of peptide masses, for example a peptide mass fingerprint from mass spectrometry.
+
+
+ Sequence database search (by molecular weight)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Search sequence(s) or a sequence database for sequences of a given isoelectric point.
+
+ Sequence database search (by isoelectric point)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Query a tertiary structure database and retrieve entries with a given entry code or accession number.
+
+ Structure retrieval (by code)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Query a tertiary structure database and retrieve entries containing a given keyword.
+
+ Structure retrieval (by keyword)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Search a tertiary structure database and retrieve structures with a sequence similar to a query sequence.
+
+
+ Structure database search (by sequence)
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Search a database of molecular structure and retrieve structures that are similar to a query structure.
+ Structure database search (by structure)
+ Structure retrieval by structure
+
+
+ Structural similarity search
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Annotate a molecular sequence record with terms from a controlled vocabulary.
+
+
+ Sequence annotation
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Annotate a genome sequence with terms from a controlled vocabulary.
+ Functional genome annotation
+ Metagenome annotation
+ Structural genome annotation
+
+
+ Genome annotation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Generate the reverse and / or complement of a nucleotide sequence.
+ Nucleic acid sequence reverse and complement
+ Reverse / complement
+ Reverse and complement
+
+
+ Reverse complement
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Generate a random sequence, for example, with a specific character composition.
+
+
+ Random sequence generation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Generate digest fragments for a nucleotide sequence containing restriction sites.
+ Nucleic acid restriction digest
+
+
+ Restriction digest
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Cleave a protein sequence into peptide fragments (corresponding to enzymatic or chemical cleavage).
+
+
+ This is often followed by calculation of protein fragment masses (http://edamontology.org/operation_0398).
+ Protein sequence cleavage
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Mutate a molecular sequence a specified amount or shuffle it to produce a randomised sequence with the same overall composition.
+
+
+ Sequence mutation and randomisation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Mask characters in a molecular sequence (replacing those characters with a mask character).
+
+
+ For example, SNPs or repeats in a DNA sequence might be masked.
+ Sequence masking
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Cut (remove) characters or a region from a molecular sequence.
+
+
+ Sequence cutting
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Create (or remove) restriction sites in sequences, for example using silent mutations.
+
+
+ Restriction site creation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Translate a DNA sequence into protein.
+
+
+ DNA translation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Transcribe a nucleotide sequence into mRNA sequence(s).
+
+
+ DNA transcription
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Calculate base frequency or word composition of a nucleotide sequence.
+
+
+ Sequence composition calculation (nucleic acid)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Calculate amino acid frequency or word composition of a protein sequence.
+
+
+ Sequence composition calculation (protein)
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Find (and possibly render) short repetitive subsequences (repeat sequences) in (typically nucleotide) sequences.
+
+
+ Repeat sequence detection
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse repeat sequence organisation such as periodicity.
+
+
+ Repeat sequence organisation analysis
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Analyse the hydrophobic, hydrophilic or charge properties of a protein structure.
+
+
+ Protein hydropathy calculation (from structure)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ WHATIF:AtomAccessibilitySolvent
+ WHATIF:AtomAccessibilitySolventPlus
+ Calculate solvent accessible or buried surface areas in protein or other molecular structures.
+ Protein solvent accessibility calculation
+
+
+ Solvent accessibility might be calculated for the backbone, sidechain and total (backbone plus sidechain).
+ Accessible surface calculation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Identify clusters of hydrophobic or charged residues in a protein structure.
+
+
+ Protein hydropathy cluster calculation
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate whether a protein structure has an unusually large net charge (dipole moment).
+
+
+ Protein dipole moment calculation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ WHATIF:AtomAccessibilityMolecular
+ WHATIF:AtomAccessibilityMolecularPlus
+ WHATIF:ResidueAccessibilityMolecular
+ WHATIF:ResidueAccessibilitySolvent
+ WHATIF:ResidueAccessibilityVacuum
+ WHATIF:ResidueAccessibilityVacuumMolecular
+ WHATIF:TotAccessibilityMolecular
+ WHATIF:TotAccessibilitySolvent
+ Calculate the molecular surface area in proteins and other macromolecules.
+ Protein atom surface calculation
+ Protein residue surface calculation
+ Protein surface and interior calculation
+ Protein surface calculation
+
+
+ Molecular surface calculation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Identify or predict catalytic residues, active sites or other ligand-binding sites in protein structures.
+
+
+ Protein binding site prediction (from structure)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse the interaction of protein with nucleic acids, e.g. RNA or DNA-binding sites, interfaces etc.
+ Protein-nucleic acid binding site analysis
+ Protein-DNA interaction analysis
+ Protein-RNA interaction analysis
+
+
+ Protein-nucleic acid interaction analysis
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Decompose a structure into compact or globular fragments (protein peeling).
+
+
+ Protein peeling
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate a matrix of distance between residues (for example the C-alpha atoms) in a protein structure.
+
+
+ Protein distance matrix calculation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate a residue contact map (typically all-versus-all inter-residue contacts) for a protein structure.
+ Protein contact map calculation
+
+
+ Contact map calculation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate clusters of contacting residues in protein structures.
+
+
+ This includes for example clusters of hydrophobic or charged residues, or clusters of contacting residues which have a key structural or functional role.
+ Residue cluster calculation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ WHATIF:HasHydrogenBonds
+ WHATIF:ShowHydrogenBonds
+ WHATIF:ShowHydrogenBondsM
+ Identify potential hydrogen bonds between amino acids and other groups.
+
+
+ The output might include the atoms involved in the bond, bond geometric parameters and bond enthalpy.
+ Hydrogen bond calculation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+
+ Calculate non-canonical atomic interactions in protein structures.
+
+ Residue non-canonical interaction detection
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate a Ramachandran plot of a protein structure.
+
+
+ Ramachandran plot calculation
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.22
+
+ Validate a Ramachandran plot of a protein structure.
+
+
+ Ramachandran plot validation
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate the molecular weight of a protein sequence or fragments.
+ Peptide mass calculation
+
+
+ Protein molecular weight calculation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict extinction coefficients or optical density of a protein sequence.
+
+
+ Protein extinction coefficient calculation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate pH-dependent properties from pKa calculations of a protein sequence.
+ Protein pH-dependent property calculation
+
+
+ Protein pKa calculation
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Hydropathy calculation on a protein sequence.
+
+
+ Protein hydropathy calculation (from sequence)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Plot a protein titration curve.
+
+
+ Protein titration curve plotting
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate isoelectric point of a protein sequence.
+
+
+ Protein isoelectric point calculation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Estimate hydrogen exchange rate of a protein sequence.
+
+
+ Protein hydrogen exchange rate calculation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate hydrophobic or hydrophilic / charged regions of a protein sequence.
+
+
+ Protein hydrophobic region calculation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate aliphatic index (relative volume occupied by aliphatic side chains) of a protein.
+
+
+ Protein aliphatic index calculation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate the hydrophobic moment of a peptide sequence and recognize amphiphilicity.
+
+
+ Hydrophobic moment is a peptides hydrophobicity measured for different angles of rotation.
+ Protein hydrophobic moment plotting
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict the stability or globularity of a protein sequence, whether it is intrinsically unfolded etc.
+
+
+ Protein globularity prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict the solubility or atomic solvation energy of a protein sequence.
+
+
+ Protein solubility prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict crystallizability of a protein sequence.
+
+
+ Protein crystallizability prediction
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ (jison)Too fine-grained.
+ 1.17
+
+ Detect or predict signal peptides (and typically predict subcellular localisation) of eukaryotic proteins.
+
+
+ Protein signal peptide detection (eukaryotes)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ (jison)Too fine-grained.
+ 1.17
+
+ Detect or predict signal peptides (and typically predict subcellular localisation) of bacterial proteins.
+
+
+ Protein signal peptide detection (bacteria)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Predict MHC class I or class II binding peptides, promiscuous binding peptides, immunogenicity etc.
+
+
+ MHC peptide immunogenicity prediction
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Predict, recognise and identify positional features in protein sequences such as functional sites or regions and secondary structure.
+
+ Methods typically involve scanning for known motifs, patterns and regular expressions.
+ Protein feature prediction (from sequence)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict, recognise and identify features in nucleotide sequences such as functional sites or regions, typically by scanning for known motifs, patterns and regular expressions.
+ Sequence feature detection (nucleic acid)
+ Nucleic acid feature prediction
+ Nucleic acid feature recognition
+ Nucleic acid site detection
+ Nucleic acid site prediction
+ Nucleic acid site recognition
+
+
+ Methods typically involve scanning for known motifs, patterns and regular expressions.
+ This is placeholder but does not comprehensively include all child concepts - please inspect other concepts under "Nucleic acid sequence analysis" for example "Gene prediction", for other feature detection operations.
+ Nucleic acid feature detection
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict antigenic determinant sites (epitopes) in protein sequences.
+ Antibody epitope prediction
+ Epitope prediction
+ B cell epitope mapping
+ B cell epitope prediction
+ Epitope mapping (MHC Class I)
+ Epitope mapping (MHC Class II)
+ T cell epitope mapping
+ T cell epitope prediction
+
+
+ Epitope mapping is commonly done during vaccine design.
+ Epitope mapping
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict post-translation modification sites in protein sequences.
+ PTM analysis
+ PTM prediction
+ PTM site analysis
+ PTM site prediction
+ Post-translation modification site prediction
+ Post-translational modification analysis
+ Protein post-translation modification site prediction
+ Acetylation prediction
+ Acetylation site prediction
+ Dephosphorylation prediction
+ Dephosphorylation site prediction
+ GPI anchor prediction
+ GPI anchor site prediction
+ GPI modification prediction
+ GPI modification site prediction
+ Glycosylation prediction
+ Glycosylation site prediction
+ Hydroxylation prediction
+ Hydroxylation site prediction
+ Methylation prediction
+ Methylation site prediction
+ N-myristoylation prediction
+ N-myristoylation site prediction
+ N-terminal acetylation prediction
+ N-terminal acetylation site prediction
+ N-terminal myristoylation prediction
+ N-terminal myristoylation site prediction
+ Palmitoylation prediction
+ Palmitoylation site prediction
+ Phosphoglycerylation prediction
+ Phosphoglycerylation site prediction
+ Phosphorylation prediction
+ Phosphorylation site prediction
+ Phosphosite localization
+ Prenylation prediction
+ Prenylation site prediction
+ Pupylation prediction
+ Pupylation site prediction
+ S-nitrosylation prediction
+ S-nitrosylation site prediction
+ S-sulfenylation prediction
+ S-sulfenylation site prediction
+ Succinylation prediction
+ Succinylation site prediction
+ Sulfation prediction
+ Sulfation site prediction
+ Sumoylation prediction
+ Sumoylation site prediction
+ Tyrosine nitration prediction
+ Tyrosine nitration site prediction
+ Ubiquitination prediction
+ Ubiquitination site prediction
+
+
+ Methods might predict sites of methylation, N-terminal myristoylation, N-terminal acetylation, sumoylation, palmitoylation, phosphorylation, sulfation, glycosylation, glycosylphosphatidylinositol (GPI) modification sites (GPI lipid anchor signals) etc.
+ Post-translational modification site prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Detect or predict signal peptides and signal peptide cleavage sites in protein sequences.
+
+
+ Methods might use sequence motifs and features, amino acid composition, profiles, machine-learned classifiers, etc.
+ Protein signal peptide detection
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Predict catalytic residues, active sites or other ligand-binding sites in protein sequences.
+
+
+ Protein binding site prediction (from sequence)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict or detect RNA and DNA-binding binding sites in protein sequences.
+ Protein-nucleic acid binding detection
+ Protein-nucleic acid binding prediction
+ Protein-nucleic acid binding site detection
+ Protein-nucleic acid binding site prediction
+ Zinc finger prediction
+
+
+ This includes methods that predict and optimise zinc finger protein domains for DNA/RNA binding (for example for transcription factors and nucleases).
+ Nucleic acids-binding site prediction
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.20
+
+ Predict protein sites that are key to protein folding, such as possible sites of nucleation or stabilisation.
+
+
+ Protein folding site prediction
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Detect or predict cleavage sites (enzymatic or chemical) in protein sequences.
+
+
+ Protein cleavage site prediction
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Predict epitopes that bind to MHC class I molecules.
+
+
+ Epitope mapping (MHC Class I)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Predict epitopes that bind to MHC class II molecules.
+
+
+ Epitope mapping (MHC Class II)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Detect, predict and identify whole gene structure in DNA sequences. This includes protein coding regions, exon-intron structure, regulatory regions etc.
+
+
+ Whole gene prediction
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Detect, predict and identify genetic elements such as promoters, coding regions, splice sites, etc in DNA sequences.
+
+
+ Methods for gene prediction might be ab initio, based on phylogenetic comparisons, use motifs, sequence features, support vector machine, alignment etc.
+ Gene component prediction
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Detect or predict transposons, retrotransposons / retrotransposition signatures etc.
+
+
+ Transposon prediction
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Detect polyA signals in nucleotide sequences.
+ PolyA detection
+ PolyA prediction
+ PolyA signal prediction
+ Polyadenylation signal detection
+ Polyadenylation signal prediction
+
+
+ PolyA signal detection
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Detect quadruplex-forming motifs in nucleotide sequences.
+ Quadruplex structure prediction
+
+
+ Quadruplex (4-stranded) structures are formed by guanine-rich regions and are implicated in various important biological processes and as therapeutic targets.
+ Quadruplex formation site detection
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Find CpG rich regions in a nucleotide sequence or isochores in genome sequences.
+ CpG island and isochores detection
+ CpG island and isochores rendering
+
+
+ An isochore is long region (> 3 KB) of DNA with very uniform GC content, in contrast to the rest of the genome. Isochores tend tends to have more genes, higher local melting or denaturation temperatures, and different flexibility. Methods might calculate fractional GC content or variation of GC content, predict methylation status of CpG islands etc. This includes methods that visualise CpG rich regions in a nucleotide sequence, for example plot isochores in a genome sequence.
+ CpG island and isochore detection
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Find and identify restriction enzyme cleavage sites (restriction sites) in (typically) DNA sequences, for example to generate a restriction map.
+
+
+ Restriction site recognition
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identify or predict nucleosome exclusion sequences (nucleosome free regions) in DNA.
+ Nucleosome exclusion sequence prediction
+ Nucleosome formation sequence prediction
+
+
+ Nucleosome position prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identify, predict or analyse splice sites in nucleotide sequences.
+ Splice prediction
+
+
+ Methods might require a pre-mRNA or genomic DNA sequence.
+ Splice site prediction
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.19
+
+ Predict whole gene structure using a combination of multiple methods to achieve better predictions.
+
+
+ Integrated gene prediction
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Find operons (operators, promoters and genes) in bacteria genes.
+
+
+ Operon prediction
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict protein-coding regions (CDS or exon) or open reading frames in nucleotide sequences.
+ ORF finding
+ ORF prediction
+
+
+ Coding region prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict selenocysteine insertion sequence (SECIS) in a DNA sequence.
+ Selenocysteine insertion sequence (SECIS) prediction
+
+
+ SECIS elements are around 60 nucleotides in length with a stem-loop structure directs the cell to translate UGA codons as selenocysteines.
+ SECIS element prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identify or predict transcriptional regulatory motifs, patterns, elements or regions in DNA sequences.
+ Regulatory element prediction
+ Transcription regulatory element prediction
+ Conserved transcription regulatory sequence identification
+ Translational regulatory element prediction
+
+
+ This includes comparative genomics approaches that identify common, conserved (homologous) or synonymous transcriptional regulatory elements. For example cross-species comparison of transcription factor binding sites (TFBS). Methods might analyse co-regulated or co-expressed genes, or sets of oppositely expressed genes.
+ This includes promoters, enhancers, silencers and boundary elements / insulators, regulatory protein or transcription factor binding sites etc. Methods might be specific to a particular genome and use motifs, word-based / grammatical methods, position-specific frequency matrices, discriminative pattern analysis etc.
+ Transcriptional regulatory element prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict translation initiation sites, possibly by searching a database of sites.
+
+
+ Translation initiation site prediction
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identify or predict whole promoters or promoter elements (transcription start sites, RNA polymerase binding site, transcription factor binding sites, promoter enhancers etc) in DNA sequences.
+
+
+ Methods might recognize CG content, CpG islands, splice sites, polyA signals etc.
+ Promoter prediction
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identify, predict or analyse cis-regulatory elements in DNA sequences (TATA box, Pribnow box, SOS box, CAAT box, CCAAT box, operator etc.) or in RNA sequences (e.g. riboswitches).
+ Transcriptional regulatory element prediction (DNA-cis)
+ Transcriptional regulatory element prediction (RNA-cis)
+
+
+ Cis-regulatory elements (cis-elements) regulate the expression of genes located on the same strand from which the element was transcribed. Cis-elements are found in the 5' promoter region of the gene, in an intron, or in the 3' untranslated region. Cis-elements are often binding sites of one or more trans-acting factors. They also occur in RNA sequences, e.g. a riboswitch is a region of an mRNA molecule that bind a small target molecule that regulates the gene's activity.
+ cis-regulatory element prediction
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.19
+
+ Identify, predict or analyse cis-regulatory elements (for example riboswitches) in RNA sequences.
+
+
+ Transcriptional regulatory element prediction (RNA-cis)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identify or predict functional RNA sequences with a gene regulatory role (trans-regulatory elements) or targets.
+ Functional RNA identification
+ Transcriptional regulatory element prediction (trans)
+
+
+ Trans-regulatory elements regulate genes distant from the gene from which they were transcribed.
+ trans-regulatory element prediction
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identify matrix/scaffold attachment regions (MARs/SARs) in DNA sequences.
+ MAR/SAR prediction
+ Matrix/scaffold attachment site prediction
+
+
+ MAR/SAR sites often flank a gene or gene cluster and are found nearby cis-regulatory sequences. They might contribute to transcription regulation.
+ S/MAR prediction
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identify or predict transcription factor binding sites in DNA sequences.
+
+
+ Transcription factor binding site prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identify or predict exonic splicing enhancers (ESE) in exons.
+
+
+ An exonic splicing enhancer (ESE) is 6-base DNA sequence motif in an exon that enhances or directs splicing of pre-mRNA or hetero-nuclear RNA (hnRNA) into mRNA.
+ Exonic splicing enhancer prediction
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Evaluate molecular sequence alignment accuracy.
+ Sequence alignment quality evaluation
+
+
+ Evaluation might be purely sequence-based or use structural information.
+ Sequence alignment validation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse character conservation in a molecular sequence alignment, for example to derive a consensus sequence.
+ Residue conservation analysis
+
+
+ Use this concept for methods that calculate substitution rates, estimate relative site variability, identify sites with biased properties, derive a consensus sequence, or identify highly conserved or very poorly conserved sites, regions, blocks etc.
+ Sequence alignment analysis (conservation)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse correlations between sites in a molecular sequence alignment.
+
+
+ This is typically done to identify possible covarying positions and predict contacts or structural constraints in protein structures.
+ Sequence alignment analysis (site correlation)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Detects chimeric sequences (chimeras) from a sequence alignment.
+ Chimeric sequence detection
+
+
+ A chimera includes regions from two or more phylogenetically distinct sequences. They are usually artifacts of PCR and are thought to occur when a prematurely terminated amplicon reanneals to another DNA strand and is subsequently copied to completion in later PCR cycles.
+ Chimera detection
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Detect recombination (hotspots and coldspots) and identify recombination breakpoints in a sequence alignment.
+ Sequence alignment analysis (recombination detection)
+
+
+ Tools might use a genetic algorithm, quartet-mapping, bootscanning, graphical methods, random forest model and so on.
+ Recombination detection
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identify insertion, deletion and duplication events from a sequence alignment.
+ Indel discovery
+ Sequence alignment analysis (indel detection)
+
+
+ Tools might use a genetic algorithm, quartet-mapping, bootscanning, graphical methods, random forest model and so on.
+ Indel detection
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Predict nucleosome formation potential of DNA sequences.
+
+ Nucleosome formation potential prediction
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate a thermodynamic property of DNA or DNA/RNA, such as melting temperature, enthalpy and entropy.
+
+
+ Nucleic acid thermodynamic property calculation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate and plot a DNA or DNA/RNA melting profile.
+
+
+ A melting profile is used to visualise and analyse partly melted DNA conformations.
+ Nucleic acid melting profile plotting
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate and plot a DNA or DNA/RNA stitch profile.
+
+
+ A stitch profile represents the alternative conformations that partly melted DNA can adopt in a temperature range.
+ Nucleic acid stitch profile plotting
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate and plot a DNA or DNA/RNA melting curve.
+
+
+ Nucleic acid melting curve plotting
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate and plot a DNA or DNA/RNA probability profile.
+
+
+ Nucleic acid probability profile plotting
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate and plot a DNA or DNA/RNA temperature profile.
+
+
+ Nucleic acid temperature profile plotting
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate curvature and flexibility / stiffness of a nucleotide sequence.
+
+
+ This includes properties such as.
+ Nucleic acid curvature calculation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identify or predict microRNA sequences (miRNA) and precursors or microRNA targets / binding sites in a DNA sequence.
+ miRNA prediction
+ microRNA detection
+ microRNA target detection
+
+
+ miRNA target prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identify or predict tRNA genes in genomic sequences (tRNA).
+
+
+ tRNA gene prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Assess binding specificity of putative siRNA sequence(s), for example for a functional assay, typically with respect to designing specific siRNA sequences.
+
+
+ siRNA binding specificity prediction
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.18
+
+ Predict secondary structure of protein sequence(s) using multiple methods to achieve better predictions.
+
+
+ Protein secondary structure prediction (integrated)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict helical secondary structure of protein sequences.
+
+
+ Protein secondary structure prediction (helices)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict turn structure (for example beta hairpin turns) of protein sequences.
+
+
+ Protein secondary structure prediction (turns)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict open coils, non-regular secondary structure and intrinsically disordered / unstructured regions of protein sequences.
+
+
+ Protein secondary structure prediction (coils)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict cysteine bonding state and disulfide bond partners in protein sequences.
+
+
+ Disulfide bond prediction
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Not sustainable to have protein type-specific concepts.
+ 1.19
+
+ Predict G protein-coupled receptors (GPCR).
+
+
+ GPCR prediction
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Not sustainable to have protein type-specific concepts.
+ 1.19
+
+ Analyse G-protein coupled receptor proteins (GPCRs).
+
+
+ GPCR analysis
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict tertiary structure (backbone and side-chain conformation) of protein sequences.
+ Protein folding pathway prediction
+
+
+ This includes methods that predict the folding pathway(s) or non-native structural intermediates of a protein.
+ Protein structure prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict structure of DNA or RNA.
+
+
+ Methods might identify thermodynamically stable or evolutionarily conserved structures.
+ Nucleic acid structure prediction
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict tertiary structure of protein sequence(s) without homologs of known structure.
+ de novo structure prediction
+
+
+ Ab initio structure prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Build a three-dimensional protein model based on known (for example homologs) structures.
+ Comparative modelling
+ Homology modelling
+ Homology structure modelling
+ Protein structure comparative modelling
+
+
+ The model might be of a whole, part or aspect of protein structure. Molecular modelling methods might use sequence-structure alignment, structural templates, molecular dynamics, energy minimisation etc.
+ Protein modelling
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Model the structure of a protein in complex with a small molecule or another macromolecule.
+ Docking simulation
+ Macromolecular docking
+
+
+ This includes protein-protein interactions, protein-nucleic acid, protein-ligand binding etc. Methods might predict whether the molecules are likely to bind in vivo, their conformation when bound, the strength of the interaction, possible mutations to achieve bonding and so on.
+ Molecular docking
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Model protein backbone conformation.
+ Protein modelling (backbone)
+ Design optimization
+ Epitope grafting
+ Scaffold search
+ Scaffold selection
+
+
+ Methods might require a preliminary C(alpha) trace.
+ Scaffold selection, scaffold search, epitope grafting and design optimization are stages of backbone modelling done during rational vaccine design.
+ Backbone modelling
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Model, analyse or edit amino acid side chain conformation in protein structure, optimize side-chain packing, hydrogen bonding etc.
+ Protein modelling (side chains)
+ Antibody optimisation
+ Antigen optimisation
+ Antigen resurfacing
+ Rotamer likelihood prediction
+
+
+ Antibody optimisation is to optimize the antibody-interacting surface of the antigen (epitope). Antigen optimisation is to optimize the antigen-interacting surface of the antibody (paratope). Antigen resurfacing is to resurface the antigen by varying the sequence of non-epitope regions.
+ Methods might use a residue rotamer library.
+ This includes rotamer likelihood prediction: the prediction of rotamer likelihoods for all 20 amino acid types at each position in a protein structure, where output typically includes, for each residue position, the likelihoods for the 20 amino acid types with estimated reliability of the 20 likelihoods.
+ Side chain modelling
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Model loop conformation in protein structures.
+ Protein loop modelling
+ Protein modelling (loops)
+
+
+ Loop modelling
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Model protein-ligand (for example protein-peptide) binding using comparative modelling or other techniques.
+ Ligand-binding simulation
+ Protein-peptide docking
+
+
+ Methods aim to predict the position and orientation of a ligand bound to a protein receptor or enzyme.
+ Virtual screening is used in drug discovery to search libraries of small molecules in order to identify those molecules which are most likely to bind to a drug target (typically a protein receptor or enzyme).
+ Protein-ligand docking
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict or optimise RNA sequences (sequence pools) with likely secondary and tertiary structure for in vitro selection.
+ Nucleic acid folding family identification
+ Structured RNA prediction and optimisation
+
+
+ RNA inverse folding
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Find single nucleotide polymorphisms (SNPs) - single nucleotide change in base positions - between sequences. Typically done for sequences from a high-throughput sequencing experiment that differ from a reference genome and which might, especially by reference to population frequency or functional data, indicate a polymorphism.
+ SNP calling
+ SNP discovery
+ Single nucleotide polymorphism detection
+
+
+ This includes functional SNPs for large-scale genotyping purposes, disease-associated non-synonymous SNPs etc.
+ SNP detection
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Generate a physical (radiation hybrid) map of genetic markers in a DNA sequence using provided radiation hybrid (RH) scores for one or more markers.
+
+
+ Radiation Hybrid Mapping
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Map the genetic architecture of dynamic complex traits.
+
+ This can involve characterisation of the underlying quantitative trait loci (QTLs) or nucleotides (QTNs).
+ Functional mapping
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Infer haplotypes, either alleles at multiple loci that are transmitted together on the same chromosome, or a set of single nucleotide polymorphisms (SNPs) on a single chromatid that are statistically associated.
+ Haplotype inference
+ Haplotype map generation
+ Haplotype reconstruction
+
+
+ Haplotype inference can help in population genetic studies and the identification of complex disease genes, , and is typically based on aligned single nucleotide polymorphism (SNP) fragments. Haplotype comparison is a useful way to characterize the genetic variation between individuals. An individual's haplotype describes which nucleotide base occurs at each position for a set of common SNPs. Tools might use combinatorial functions (for example parsimony) or a likelihood function or model with optimisation such as minimum error correction (MEC) model, expectation-maximisation algorithm (EM), genetic algorithm or Markov chain Monte Carlo (MCMC).
+ Haplotype mapping
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate linkage disequilibrium; the non-random association of alleles or polymorphisms at two or more loci (not necessarily on the same chromosome).
+
+
+ Linkage disequilibrium is identified where a combination of alleles (or genetic markers) occurs more or less frequently in a population than expected by chance formation of haplotypes.
+ Linkage disequilibrium calculation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict genetic code from analysis of codon usage data.
+
+
+ Genetic code prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Render a representation of a distribution that consists of group of data points plotted on a simple scale.
+ Categorical plot plotting
+ Dotplot plotting
+
+
+ Dot plots are useful when having not too many (e.g. 20) data points for each category. Example: draw a dotplot of sequence similarities identified from word-matching or character comparison.
+ Dot plot plotting
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Align exactly two molecular sequences.
+ Pairwise alignment
+
+
+ Methods might perform one-to-one, one-to-many or many-to-many comparisons.
+ Pairwise sequence alignment
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Align more than two molecular sequences.
+ Multiple alignment
+
+
+ This includes methods that use an existing alignment, for example to incorporate sequences into an alignment, or combine several multiple alignments into a single, improved alignment.
+ Multiple sequence alignment
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+
+ Locally align exactly two molecular sequences.
+
+ Local alignment methods identify regions of local similarity.
+ Pairwise sequence alignment generation (local)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+
+ Globally align exactly two molecular sequences.
+
+ Global alignment methods identify similarity across the entire length of the sequences.
+ Pairwise sequence alignment generation (global)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Locally align two or more molecular sequences.
+ Local sequence alignment
+ Sequence alignment (local)
+ Smith-Waterman
+
+
+ Local alignment methods identify regions of local similarity.
+ Local alignment
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Globally align two or more molecular sequences.
+ Global sequence alignment
+ Sequence alignment (global)
+
+
+ Global alignment methods identify similarity across the entire length of the sequences.
+ Global alignment
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.19
+
+ Align two or more molecular sequences with user-defined constraints.
+
+
+ Constrained sequence alignment
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.16
+
+ Align two or more molecular sequences using multiple methods to achieve higher quality.
+
+
+ Consensus-based sequence alignment
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Align multiple sequences using relative gap costs calculated from neighbors in a supplied phylogenetic tree.
+ Multiple sequence alignment (phylogenetic tree-based)
+ Multiple sequence alignment construction (phylogenetic tree-based)
+ Phylogenetic tree-based multiple sequence alignment construction
+ Sequence alignment (phylogenetic tree-based)
+ Sequence alignment generation (phylogenetic tree-based)
+
+
+ This is supposed to give a more biologically meaningful alignment than standard alignments.
+ Tree-based sequence alignment
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Align molecular secondary structure (represented as a 1D string).
+
+ Secondary structure alignment generation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.18
+
+ Align protein secondary structures.
+
+
+ Protein secondary structure alignment generation
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Align RNA secondary structures.
+ RNA secondary structure alignment construction
+ RNA secondary structure alignment generation
+ Secondary structure alignment construction (RNA)
+
+
+ RNA secondary structure alignment
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Align (superimpose) exactly two molecular tertiary structures.
+ Structure alignment (pairwise)
+ Pairwise protein structure alignment
+
+
+ Pairwise structure alignment
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Align (superimpose) more than two molecular tertiary structures.
+ Structure alignment (multiple)
+ Multiple protein structure alignment
+
+
+ This includes methods that use an existing alignment.
+ Multiple structure alignment
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Align protein tertiary structures.
+
+ Structure alignment (protein)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Align RNA tertiary structures.
+
+ Structure alignment (RNA)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+
+ Locally align (superimpose) exactly two molecular tertiary structures.
+
+ Local alignment methods identify regions of local similarity, common substructures etc.
+ Pairwise structure alignment generation (local)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+
+ Globally align (superimpose) exactly two molecular tertiary structures.
+
+ Global alignment methods identify similarity across the entire structures.
+ Pairwise structure alignment generation (global)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Locally align (superimpose) two or more molecular tertiary structures.
+ Structure alignment (local)
+ Local protein structure alignment
+
+
+ Local alignment methods identify regions of local similarity, common substructures etc.
+ Local structure alignment
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Globally align (superimpose) two or more molecular tertiary structures.
+ Structure alignment (global)
+ Global protein structure alignment
+
+
+ Global alignment methods identify similarity across the entire structures.
+ Global structure alignment
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.16
+
+
+
+ Align exactly two molecular profiles.
+
+ Methods might perform one-to-one, one-to-many or many-to-many comparisons.
+ Profile-profile alignment (pairwise)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+
+ Align two or more molecular profiles.
+
+ Sequence alignment generation (multiple profile)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.16
+
+
+
+
+ Align exactly two molecular Structural (3D) profiles.
+
+ 3D profile-to-3D profile alignment (pairwise)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+
+
+ Align two or more molecular 3D profiles.
+
+ Structural profile alignment generation (multiple)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Search and retrieve names of or documentation on bioinformatics tools, for example by keyword or which perform a particular function.
+
+ Data retrieval (tool metadata)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Search and retrieve names of or documentation on bioinformatics databases or query terms, for example by keyword.
+
+ Data retrieval (database metadata)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Predict primers for large scale sequencing.
+
+
+ PCR primer design (for large scale sequencing)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Predict primers for genotyping polymorphisms, for example single nucleotide polymorphisms (SNPs).
+
+
+ PCR primer design (for genotyping polymorphisms)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Predict primers for gene transcription profiling.
+
+
+ PCR primer design (for gene transcription profiling)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Predict primers that are conserved across multiple genomes or species.
+
+
+ PCR primer design (for conserved primers)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Predict primers based on gene structure.
+
+
+ PCR primer design (based on gene structure)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Predict primers for methylation PCRs.
+
+
+ PCR primer design (for methylation PCRs)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Sequence assembly by combining fragments using an existing backbone sequence, typically a reference genome.
+ Sequence assembly (mapping assembly)
+
+
+ The final sequence will resemble the backbone sequence. Mapping assemblers are usually much faster and less memory intensive than de-novo assemblers.
+ Mapping assembly
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Sequence assembly by combining fragments without the aid of a reference sequence or genome.
+ De Bruijn graph
+ Sequence assembly (de-novo assembly)
+
+
+ De-novo assemblers are much slower and more memory intensive than mapping assemblers.
+ De-novo assembly
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The process of assembling many short DNA sequences together such that they represent the original chromosomes from which the DNA originated.
+ Genomic assembly
+ Sequence assembly (genome assembly)
+ Breakend assembly
+
+
+ Genome assembly
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Sequence assembly for EST sequences (transcribed mRNA).
+ Sequence assembly (EST assembly)
+
+
+ Assemblers must handle (or be complicated by) alternative splicing, trans-splicing, single-nucleotide polymorphism (SNP), recoding, and post-transcriptional modification.
+ EST assembly
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Make sequence tag to gene assignments (tag mapping) of SAGE, MPSS and SBS data.
+ Tag to gene assignment
+
+
+ Sequence tag mapping assigns experimentally obtained sequence tags to known transcripts or annotate potential virtual sequence tags in a genome.
+ Sequence tag mapping
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Process (read and / or write) serial analysis of gene expression (SAGE) data.
+
+ SAGE data processing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Process (read and / or write) massively parallel signature sequencing (MPSS) data.
+
+ MPSS data processing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Process (read and / or write) sequencing by synthesis (SBS) data.
+
+ SBS data processing
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Generate a heat map of expression data from e.g. microarray data.
+ Heat map construction
+ Heatmap generation
+
+
+ The heat map usually uses a coloring scheme to represent expression values. They can show how quantitative measurements were influenced by experimental conditions.
+ Heat map generation
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Analyse one or more gene expression profiles, typically to interpret them in functional terms.
+
+ Gene expression profile analysis
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Map an expression profile to known biological pathways, for example, to identify or reconstruct a pathway.
+ Pathway mapping
+ Gene expression profile pathway mapping
+ Gene to pathway mapping
+ Gene-to-pathway mapping
+
+
+ Expression profile pathway mapping
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.18
+
+ Assign secondary structure from protein coordinate data.
+
+
+ Protein secondary structure assignment (from coordinate data)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.18
+
+ Assign secondary structure from circular dichroism (CD) spectroscopic data.
+
+
+ Protein secondary structure assignment (from CD data)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.7
+
+ Assign a protein tertiary structure (3D coordinates) from raw X-ray crystallography data.
+
+
+ Protein structure assignment (from X-ray crystallographic data)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.7
+
+ Assign a protein tertiary structure (3D coordinates) from raw NMR spectroscopy data.
+
+
+ Protein structure assignment (from NMR data)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Construct a phylogenetic tree from a specific type of data.
+ Phylogenetic tree construction (data centric)
+ Phylogenetic tree generation (data centric)
+
+
+ Subconcepts of this concept reflect different types of data used to generate a tree, and provide an alternate axis for curation.
+ Phylogenetic inference (data centric)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Construct a phylogenetic tree using a specific method.
+ Phylogenetic tree construction (method centric)
+ Phylogenetic tree generation (method centric)
+
+
+ Subconcepts of this concept reflect different computational methods used to generate a tree, and provide an alternate axis for curation.
+ Phylogenetic inference (method centric)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Phylogenetic tree construction from molecular sequences.
+ Phylogenetic tree construction (from molecular sequences)
+ Phylogenetic tree generation (from molecular sequences)
+
+
+ Methods typically compare multiple molecular sequence and estimate evolutionary distances and relationships to infer gene families or make functional predictions.
+ Phylogenetic inference (from molecular sequences)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Phylogenetic tree construction from continuous quantitative character data.
+ Phylogenetic tree construction (from continuous quantitative characters)
+ Phylogenetic tree generation (from continuous quantitative characters)
+
+
+ Phylogenetic inference (from continuous quantitative characters)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Phylogenetic tree construction from gene frequency data.
+ Phylogenetic tree construction (from gene frequencies)
+ Phylogenetic tree generation (from gene frequencies)
+
+
+ Phylogenetic inference (from gene frequencies)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Phylogenetic tree construction from polymorphism data including microsatellites, RFLP (restriction fragment length polymorphisms), RAPD (random-amplified polymorphic DNA) and AFLP (amplified fragment length polymorphisms) data.
+ Phylogenetic tree construction (from polymorphism data)
+ Phylogenetic tree generation (from polymorphism data)
+
+
+ Phylogenetic inference (from polymorphism data)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Construct a phylogenetic species tree, for example, from a genome-wide sequence comparison.
+ Phylogenetic species tree construction
+ Phylogenetic species tree generation
+
+
+ Species tree construction
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Construct a phylogenetic tree by computing a sequence alignment and searching for the tree with the fewest number of character-state changes from the alignment.
+ Phylogenetic tree construction (parsimony methods)
+ Phylogenetic tree generation (parsimony methods)
+
+
+ This includes evolutionary parsimony (invariants) methods.
+ Phylogenetic inference (parsimony methods)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Construct a phylogenetic tree by computing (or using precomputed) distances between sequences and searching for the tree with minimal discrepancies between pairwise distances.
+ Phylogenetic tree construction (minimum distance methods)
+ Phylogenetic tree generation (minimum distance methods)
+
+
+ This includes neighbor joining (NJ) clustering method.
+ Phylogenetic inference (minimum distance methods)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Construct a phylogenetic tree by relating sequence data to a hypothetical tree topology using a model of sequence evolution.
+ Phylogenetic tree construction (maximum likelihood and Bayesian methods)
+ Phylogenetic tree generation (maximum likelihood and Bayesian methods)
+
+
+ Maximum likelihood methods search for a tree that maximizes a likelihood function, i.e. that is most likely given the data and model. Bayesian analysis estimate the probability of tree for branch lengths and topology, typically using a Monte Carlo algorithm.
+ Phylogenetic inference (maximum likelihood and Bayesian methods)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Construct a phylogenetic tree by computing four-taxon trees (4-trees) and searching for the phylogeny that matches most closely.
+ Phylogenetic tree construction (quartet methods)
+ Phylogenetic tree generation (quartet methods)
+
+
+ Phylogenetic inference (quartet methods)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Construct a phylogenetic tree by using artificial-intelligence methods, for example genetic algorithms.
+ Phylogenetic tree construction (AI methods)
+ Phylogenetic tree generation (AI methods)
+
+
+ Phylogenetic inference (AI methods)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identify a plausible model of DNA substitution that explains a molecular (DNA or protein) sequence alignment.
+ Nucleotide substitution modelling
+
+
+ DNA substitution modelling
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse the shape (topology) of a phylogenetic tree.
+ Phylogenetic tree analysis (shape)
+
+
+ Phylogenetic tree topology analysis
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Apply bootstrapping or other measures to estimate confidence of a phylogenetic tree.
+
+
+ Phylogenetic tree bootstrapping
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Construct a "gene tree" which represents the evolutionary history of the genes included in the study. This can be used to predict families of genes and gene function based on their position in a phylogenetic tree.
+ Phylogenetic tree analysis (gene family prediction)
+
+
+ Gene trees can provide evidence for gene duplication events, as well as speciation events. Where sequences from different homologs are included in a gene tree, subsequent clustering of the orthologs can demonstrate evolutionary history of the orthologs.
+ Gene tree construction
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse a phylogenetic tree to identify allele frequency distribution and change that is subject to evolutionary pressures (natural selection, genetic drift, mutation and gene flow). Identify type of natural selection (such as stabilizing, balancing or disruptive).
+ Phylogenetic tree analysis (natural selection)
+
+
+ Stabilizing/purifying (directional) selection favors a single phenotype and tends to decrease genetic diversity as a population stabilizes on a particular trait, selecting out trait extremes or deleterious mutations. In contrast, balancing selection maintain genetic polymorphisms (or multiple alleles), whereas disruptive (or diversifying) selection favors individuals at both extremes of a trait.
+ Allele frequency distribution analysis
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Compare two or more phylogenetic trees to produce a consensus tree.
+ Phylogenetic tree construction (consensus)
+ Phylogenetic tree generation (consensus)
+
+
+ Methods typically test for topological similarity between trees using for example a congruence index.
+ Consensus tree construction
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Compare two or more phylogenetic trees to detect subtrees or supertrees.
+ Phylogenetic sub/super tree detection
+ Subtree construction
+ Supertree construction
+
+
+ Phylogenetic sub/super tree construction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Compare two or more phylogenetic trees to calculate distances between trees.
+
+
+ Phylogenetic tree distances calculation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Annotate a phylogenetic tree with terms from a controlled vocabulary.
+
+
+ Phylogenetic tree annotation
+ http://www.evolutionaryontology.org/cdao.owl#CDAOAnnotation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Predict and optimise peptide ligands that elicit an immunological response.
+
+
+ Immunogenicity prediction
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict or optimise DNA to elicit (via DNA vaccination) an immunological response.
+
+
+ DNA vaccine design
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Reformat (a file or other report of) molecular sequence(s).
+
+
+ Sequence formatting
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Reformat (a file or other report of) molecular sequence alignment(s).
+
+
+ Sequence alignment formatting
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Reformat a codon usage table.
+
+
+ Codon usage table formatting
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Visualise, format or render a molecular sequence or sequences such as a sequence alignment, possibly with sequence features or properties shown.
+ Sequence rendering
+ Sequence alignment visualisation
+
+
+ Sequence visualisation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.15
+
+ Visualise, format or print a molecular sequence alignment.
+
+
+ Sequence alignment visualisation
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Visualise, format or render sequence clusters.
+ Sequence cluster rendering
+
+
+ Sequence cluster visualisation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Render or visualise a phylogenetic tree.
+ Phylogenetic tree rendering
+
+
+ Phylogenetic tree visualisation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.15
+
+ Visualise RNA secondary structure, knots, pseudoknots etc.
+
+
+ RNA secondary structure visualisation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.15
+
+ Render and visualise protein secondary structure.
+
+
+ Protein secondary structure visualisation
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Visualise or render molecular 3D structure, for example a high-quality static picture or animation.
+ Structure rendering
+ Protein secondary structure visualisation
+ RNA secondary structure visualisation
+
+
+ This includes visualisation of protein secondary structure such as knots, pseudoknots etc. as well as tertiary and quaternary structure.
+ Structure visualisation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Visualise microarray or other expression data.
+ Expression data rendering
+ Gene expression data visualisation
+ Microarray data rendering
+
+
+ Expression data visualisation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.19
+
+ Identify and analyse networks of protein interactions.
+
+
+ Protein interaction network visualisation
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Draw or visualise a DNA map.
+ DNA map drawing
+ Map rendering
+
+
+ Map drawing
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Render a sequence with motifs.
+
+ Sequence motif rendering
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Draw or visualise restriction maps in DNA sequences.
+
+
+ Restriction map drawing
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Draw a linear maps of DNA.
+
+ DNA linear map rendering
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ DNA circular map rendering
+ Draw a circular maps of DNA, for example a plasmid map.
+
+
+ Plasmid map drawing
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Visualise operon structure etc.
+ Operon rendering
+
+
+ Operon drawing
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identify folding families of related RNAs.
+
+ Nucleic acid folding family identification
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.20
+
+ Compute energies of nucleic acid folding, e.g. minimum folding energies for DNA or RNA sequences or energy landscape of RNA mutants.
+
+
+ Nucleic acid folding energy calculation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Retrieve existing annotation (or documentation), typically annotation on a database entity.
+
+ Use this concepts for tools which retrieve pre-existing annotations, not for example prediction methods that might make annotations.
+ Annotation retrieval
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict the biological or biochemical role of a protein, or other aspects of a protein function.
+ Protein function analysis
+ Protein functional analysis
+
+
+ For functional properties that can be mapped to a sequence, use 'Sequence feature detection (protein)' instead.
+ Protein function prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Compare the functional properties of two or more proteins.
+
+
+ Protein function comparison
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Submit a molecular sequence to a database.
+
+ Sequence submission
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse a known network of gene regulation.
+ Gene regulatory network comparison
+ Gene regulatory network modelling
+ Regulatory network comparison
+ Regulatory network modelling
+
+
+ Gene regulatory network analysis
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ WHATIF:UploadPDB
+ Parse, prepare or load a user-specified data file so that it is available for use.
+ Data loading
+ Loading
+
+
+ Parsing
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Query a sequence data resource (typically a database) and retrieve sequences and / or annotation.
+
+ This includes direct retrieval methods (e.g. the dbfetch program) but not those that perform calculations on the sequence.
+ Sequence retrieval
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ WHATIF:DownloadPDB
+ WHATIF:EchoPDB
+ Query a tertiary structure data resource (typically a database) and retrieve structures, structure-related data and annotation.
+
+ This includes direct retrieval methods but not those that perform calculations on the sequence or structure.
+ Structure retrieval
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ WHATIF:GetSurfaceDots
+ Calculate the positions of dots that are homogeneously distributed over the surface of a molecule.
+
+
+ A dot has three coordinates (x,y,z) and (typically) a color.
+ Surface rendering
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Calculate the solvent accessibility ('accessible surface') for each atom in a structure.
+
+
+ Waters are not considered.
+ Protein atom surface calculation (accessible)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Calculate the solvent accessibility ('accessible molecular surface') for each atom in a structure.
+
+
+ Waters are not considered.
+ Protein atom surface calculation (accessible molecular)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Calculate the solvent accessibility ('accessible surface') for each residue in a structure.
+
+
+ Solvent accessibility might be calculated for the backbone, sidechain and total (backbone plus sidechain).
+ Protein residue surface calculation (accessible)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Calculate the solvent accessibility ('vacuum accessible surface') for each residue in a structure. This is the accessibility of the residue when taken out of the protein together with the backbone atoms of any residue it is covalently bound to.
+
+
+ Solvent accessibility might be calculated for the backbone, sidechain and total (backbone plus sidechain).
+ Protein residue surface calculation (vacuum accessible)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Calculate the solvent accessibility ('accessible molecular surface') for each residue in a structure.
+
+
+ Solvent accessibility might be calculated for the backbone, sidechain and total (backbone plus sidechain).
+ Protein residue surface calculation (accessible molecular)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Calculate the solvent accessibility ('vacuum molecular surface') for each residue in a structure. This is the accessibility of the residue when taken out of the protein together with the backbone atoms of any residue it is covalently bound to.
+
+
+ Solvent accessibility might be calculated for the backbone, sidechain and total (backbone plus sidechain).
+ Protein residue surface calculation (vacuum molecular)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Calculate the solvent accessibility ('accessible molecular surface') for a structure as a whole.
+
+
+ Protein surface calculation (accessible molecular)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Calculate the solvent accessibility ('accessible surface') for a structure as a whole.
+
+
+ Protein surface calculation (accessible)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Calculate for each residue in a protein structure all its backbone torsion angles.
+
+
+ Backbone torsion angle calculation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Calculate for each residue in a protein structure all its torsion angles.
+
+
+ Full torsion angle calculation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Calculate for each cysteine (bridge) all its torsion angles.
+
+
+ Cysteine torsion angle calculation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ For each amino acid in a protein structure calculate the backbone angle tau.
+
+
+ Tau is the backbone angle N-Calpha-C (angle over the C-alpha).
+ Tau angle calculation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ WHATIF:ShowCysteineBridge
+ Detect cysteine bridges (from coordinate data) in a protein structure.
+
+
+ Cysteine bridge detection
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ WHATIF:ShowCysteineFree
+ Detect free cysteines in a protein structure.
+
+
+ A free cysteine is neither involved in a cysteine bridge, nor functions as a ligand to a metal.
+ Free cysteine detection
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ WHATIF:ShowCysteineMetal
+ Detect cysteines that are bound to metal in a protein structure.
+
+
+ Metal-bound cysteine detection
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Calculate protein residue contacts with nucleic acids in a structure.
+
+
+ Residue contact calculation (residue-nucleic acid)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate protein residue contacts with metal in a structure.
+ Residue-metal contact calculation
+
+
+ Protein-metal contact calculation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Calculate ion contacts in a structure (all ions for all side chain atoms).
+
+
+ Residue contact calculation (residue-negative ion)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ WHATIF:ShowBumps
+ Detect 'bumps' between residues in a structure, i.e. those with pairs of atoms whose Van der Waals' radii interpenetrate more than a defined distance.
+
+
+ Residue bump detection
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ WHATIF:SymmetryContact
+ Calculate the number of symmetry contacts made by residues in a protein structure.
+
+
+ A symmetry contact is a contact between two atoms in different asymmetric unit.
+ Residue symmetry contact calculation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Calculate contacts between residues and ligands in a protein structure.
+
+
+ Residue contact calculation (residue-ligand)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ WHATIF:HasSaltBridge
+ WHATIF:HasSaltBridgePlus
+ WHATIF:ShowSaltBridges
+ WHATIF:ShowSaltBridgesH
+ Calculate (and possibly score) salt bridges in a protein structure.
+
+
+ Salt bridges are interactions between oppositely charged atoms in different residues. The output might include the inter-atomic distance.
+ Salt bridge calculation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ WHATIF:ShowLikelyRotamers
+ WHATIF:ShowLikelyRotamers100
+ WHATIF:ShowLikelyRotamers200
+ WHATIF:ShowLikelyRotamers300
+ WHATIF:ShowLikelyRotamers400
+ WHATIF:ShowLikelyRotamers500
+ WHATIF:ShowLikelyRotamers600
+ WHATIF:ShowLikelyRotamers700
+ WHATIF:ShowLikelyRotamers800
+ WHATIF:ShowLikelyRotamers900
+ Predict rotamer likelihoods for all 20 amino acid types at each position in a protein structure.
+
+
+ Output typically includes, for each residue position, the likelihoods for the 20 amino acid types with estimated reliability of the 20 likelihoods.
+ Rotamer likelihood prediction
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ WHATIF:ProlineMutationValue
+ Calculate for each position in a protein structure the chance that a proline, when introduced at this position, would increase the stability of the whole protein.
+
+
+ Proline mutation value calculation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ WHATIF: PackingQuality
+ Identify poorly packed residues in protein structures.
+
+
+ Residue packing validation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ WHATIF: ImproperQualityMax
+ WHATIF: ImproperQualitySum
+ Validate protein geometry, for example bond lengths, bond angles, torsion angles, chiralities, planaraties etc. An example is validation of a Ramachandran plot of a protein structure.
+ Ramachandran plot validation
+
+
+ Protein geometry validation
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+ WHATIF: PDB_sequence
+ Extract a molecular sequence from a PDB file.
+
+
+ PDB file sequence retrieval
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Identify HET groups in PDB files.
+
+
+ A HET group usually corresponds to ligands, lipids, but might also (not consistently) include groups that are attached to amino acids. Each HET group is supposed to have a unique three letter code and a unique name which might be given in the output.
+ HET group detection
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Determine for residue the DSSP determined secondary structure in three-state (HSC).
+
+ DSSP secondary structure assignment
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ WHATIF: PDBasXML
+ Reformat (a file or other report of) tertiary structure data.
+
+
+ Structure formatting
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Assign cysteine bonding state and disulfide bond partners in protein structures.
+
+
+ Protein cysteine and disulfide bond assignment
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Identify poor quality amino acid positions in protein structures.
+
+
+ Residue validation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ WHATIF:MovedWaterPDB
+ Query a tertiary structure database and retrieve water molecules.
+
+ Structure retrieval (water)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identify or predict siRNA duplexes in RNA.
+
+
+ siRNA duplex prediction
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Refine an existing sequence alignment.
+
+
+ Sequence alignment refinement
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Process an EMBOSS listfile (list of EMBOSS Uniform Sequence Addresses).
+
+ Listfile processing
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Perform basic (non-analytical) operations on a report or file of sequences (which might include features), such as file concatenation, removal or ordering of sequences, creation of subset or a new file of sequences.
+
+
+ Sequence file editing
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Perform basic (non-analytical) operations on a sequence alignment file, such as copying or removal and ordering of sequences.
+
+ Sequence alignment file processing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Process (read and / or write) physicochemical property data for small molecules.
+
+ Small molecule data processing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Search and retrieve documentation on a bioinformatics ontology.
+
+ Data retrieval (ontology annotation)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Query an ontology and retrieve concepts or relations.
+
+ Data retrieval (ontology concept)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identify a representative sequence from a set of sequences, typically using scores from pair-wise alignment or other comparison of the sequences.
+
+
+ Representative sequence identification
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Perform basic (non-analytical) operations on a file of molecular tertiary structural data.
+
+ Structure file processing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Query a profile data resource and retrieve one or more profile(s) and / or associated annotation.
+
+ This includes direct retrieval methods that retrieve a profile by, e.g. the profile name.
+ Data retrieval (sequence profile)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Perform a statistical data operation of some type, e.g. calibration or validation.
+ Significance testing
+ Statistical analysis
+ Statistical test
+ Statistical testing
+ Expectation maximisation
+ Gibbs sampling
+ Hypothesis testing
+ Omnibus test
+
+
+ Statistical calculation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate a 3D-1D scoring matrix from analysis of protein sequence and structural data.
+ 3D-1D scoring matrix construction
+
+
+ A 3D-1D scoring matrix scores the probability of amino acids occurring in different structural environments.
+ 3D-1D scoring matrix generation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Visualise transmembrane proteins, typically the transmembrane regions within a sequence.
+ Transmembrane protein rendering
+
+
+ Transmembrane protein visualisation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ An operation performing purely illustrative (pedagogical) purposes.
+
+ Demonstration
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Query a biological pathways database and retrieve annotation on one or more pathways.
+
+ Data retrieval (pathway or network)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Query a database and retrieve one or more data identifiers.
+
+ Data retrieval (identifier)
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate a density plot (of base composition) for a nucleotide sequence.
+
+
+ Nucleic acid density plotting
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse one or more known molecular sequences.
+ Sequence analysis (general)
+
+
+ Sequence analysis
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse molecular sequence motifs.
+ Sequence motif processing
+
+
+ Sequence motif analysis
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Process (read and / or write) protein interaction data.
+
+ Protein interaction data processing
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse protein structural data.
+ Structure analysis (protein)
+
+
+ Protein structure analysis
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Process (read and / or write) annotation of some type, typically annotation on an entry from a biological or biomedical database entity.
+
+ Annotation processing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Analyse features in molecular sequences.
+
+ Sequence feature analysis
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Basic (non-analytical) operations of some data, either a file or equivalent entity in memory, such that the same basic type of data is consumed as input and generated as output.
+ File handling
+ File processing
+ Report handling
+ Utility operation
+ Processing
+
+
+ Data handling
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Analyse gene expression and regulation data.
+
+ Gene expression analysis
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Process (read and / or write) one or more structural (3D) profile(s) or template(s) of some type.
+
+ Structural profile processing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Process (read and / or write) an index of (typically a file of) biological data.
+
+ Data index processing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Process (read and / or write) some type of sequence profile.
+
+ Sequence profile processing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.22
+
+ Analyse protein function, typically by processing protein sequence and/or structural data, and generate an informative report.
+
+
+ Protein function analysis
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse, simulate or predict protein folding, typically by processing sequence and / or structural data. For example, predict sites of nucleation or stabilisation key to protein folding.
+ Protein folding modelling
+ Protein folding simulation
+ Protein folding site prediction
+
+
+ Protein folding analysis
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse protein secondary structure data.
+ Secondary structure analysis (protein)
+
+
+ Protein secondary structure analysis
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Process (read and / or write) data on the physicochemical property of a molecule.
+
+ Physicochemical property data processing
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict oligonucleotide primers or probes.
+ Primer and probe prediction
+
+
+ Primer and probe design
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Process (read and / or write) data of a specific type, for example applying analytical methods.
+
+
+ Operation (typed)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Search a database (or other data resource) with a supplied query and retrieve entries (or parts of entries) that are similar to the query.
+ Search
+
+
+ Typically the query is compared to each entry and high scoring matches (hits) are returned. For example, a BLAST search of a sequence database.
+ Database search
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Retrieve an entry (or part of an entry) from a data resource that matches a supplied query. This might include some primary data and annotation. The query is a data identifier or other indexed term. For example, retrieve a sequence record with the specified accession number, or matching supplied keywords.
+ Data extraction
+ Retrieval
+ Data retrieval (metadata)
+ Metadata retrieval
+
+
+ Data retrieval
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Predict, recognise, detect or identify some properties of a biomolecule.
+ Detection
+ Prediction
+ Recognition
+
+
+ Prediction and recognition
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Compare two or more things to identify similarities.
+
+
+ Comparison
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Refine or optimise some data model.
+
+
+ Optimisation and refinement
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Model or simulate some biological entity or system, typically using mathematical techniques including dynamical systems, statistical models, differential equations, and game theoretic models.
+ Mathematical modelling
+
+
+ Modelling and simulation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+ Perform basic operations on some data or a database.
+
+
+ Data handling
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Validate some data.
+ Quality control
+
+
+ Validation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Map properties to positions on an biological entity (typically a molecular sequence or structure), or assemble such an entity from constituent parts.
+ Cartography
+
+
+ Mapping
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Design a biological entity (typically a molecular sequence or structure) with specific properties.
+
+
+ Design
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Process (read and / or write) microarray data.
+
+ Microarray data processing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.18
+
+
+
+ Process (read and / or write) a codon usage table.
+
+ Codon usage table processing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Retrieve a codon usage table and / or associated annotation.
+
+ Data retrieval (codon usage table)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Process (read and / or write) a gene expression profile.
+
+ Gene expression profile processing
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Gene set testing
+ Identify classes of genes or proteins that are over or under-represented in a large set of genes or proteins. For example analysis of a set of genes corresponding to a gene expression profile, annotated with Gene Ontology (GO) concepts, where eventual over-/under-representation of certain GO concept within the studied set of genes is revealed.
+ Functional enrichment analysis
+ GSEA
+ Gene-set over-represenation analysis
+ Gene set analysis
+ GO-term enrichment
+ Gene Ontology concept enrichment
+ Gene Ontology term enrichment
+
+
+ "Gene set analysis" (often used interchangeably or in an overlapping sense with "gene-set enrichment analysis") refers to the functional analysis (term enrichment) of a differentially expressed set of genes, rather than all genes analysed.
+ Analyse gene expression patterns to identify sets of genes that are associated with a specific trait, condition, clinical outcome etc.
+ Gene sets can be defined beforehand by biological function, chromosome locations and so on.
+ The Gene Ontology (GO) is typically used, the input is a set of Gene IDs, and the output of the analysis is typically a ranked list of GO concepts, each associated with a p-value.
+ Gene-set enrichment analysis
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict a network of gene regulation.
+
+
+ Gene regulatory network prediction
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+
+
+ Generate, analyse or handle a biological pathway or network.
+
+ Pathway or network processing
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Process (read and / or write) RNA secondary structure data.
+
+
+ RNA secondary structure analysis
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+ Process (read and / or write) RNA tertiary structure data.
+
+
+ Structure processing (RNA)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict RNA tertiary structure.
+
+
+ RNA structure prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict DNA tertiary structure.
+
+
+ DNA structure prediction
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Generate, process or analyse phylogenetic tree or trees.
+
+
+ Phylogenetic tree processing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Process (read and / or write) protein secondary structure data.
+
+ Protein secondary structure processing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Process (read and / or write) a network of protein interactions.
+
+ Protein interaction network processing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Process (read and / or write) one or more molecular sequences and associated annotation.
+
+ Sequence processing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+ Process (read and / or write) a protein sequence and associated annotation.
+
+
+ Sequence processing (protein)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Process (read and / or write) a nucleotide sequence and associated annotation.
+
+ Sequence processing (nucleic acid)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Compare two or more molecular sequences.
+
+
+ Sequence comparison
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Process (read and / or write) a sequence cluster.
+
+ Sequence cluster processing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Process (read and / or write) a sequence feature table.
+
+ Feature table processing
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Detect, predict and identify genes or components of genes in DNA sequences, including promoters, coding regions, splice sites, etc.
+ Gene calling
+ Gene finding
+ Whole gene prediction
+
+
+ Includes methods that predict whole gene structure using a combination of multiple methods to achieve better predictions.
+ Methods for gene prediction might be ab initio, based on phylogenetic comparisons, use motifs, sequence features, support vector machine, alignment etc.
+ Gene prediction
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.16
+
+ Classify G-protein coupled receptors (GPCRs) into families and subfamilies.
+
+
+ GPCR classification
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Not sustainable to have protein type-specific concepts.
+ 1.19
+
+
+ Predict G-protein coupled receptor (GPCR) coupling selectivity.
+
+ GPCR coupling selectivity prediction
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+ Process (read and / or write) a protein tertiary structure.
+
+
+ Structure processing (protein)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Calculate the solvent accessibility for each atom in a structure.
+
+
+ Waters are not considered.
+ Protein atom surface calculation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Calculate the solvent accessibility for each residue in a structure.
+
+
+ Protein residue surface calculation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Calculate the solvent accessibility of a structure as a whole.
+
+
+ Protein surface calculation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Process (read and / or write) a molecular sequence alignment.
+
+ Sequence alignment processing
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identify or predict protein-protein binding sites.
+ Protein-protein binding site detection
+
+
+ Protein-protein binding site prediction
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Process (read and / or write) a molecular tertiary structure.
+
+ Structure processing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Annotate a DNA map of some type with terms from a controlled vocabulary.
+
+ Map annotation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Retrieve information on a protein.
+
+ Data retrieval (protein annotation)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Retrieve a phylogenetic tree from a data resource.
+
+ Data retrieval (phylogenetic tree)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Retrieve information on a protein interaction.
+
+ Data retrieval (protein interaction annotation)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Retrieve information on a protein family.
+
+ Data retrieval (protein family annotation)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Retrieve information on an RNA family.
+
+ Data retrieval (RNA family annotation)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Retrieve information on a specific gene.
+
+ Data retrieval (gene annotation)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Retrieve information on a specific genotype or phenotype.
+
+ Data retrieval (genotype and phenotype annotation)
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Compare the architecture of two or more protein structures.
+
+
+ Protein architecture comparison
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identify the architecture of a protein structure.
+
+
+ Includes methods that try to suggest the most likely biological unit for a given protein X-ray crystal structure based on crystal symmetry and scoring of putative protein-protein interfaces.
+ Protein architecture recognition
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The simulation of molecular (typically protein) conformation using a computational model of physical forces and computer simulation.
+ Molecular dynamics simulation
+ Protein dynamics
+
+
+ Molecular dynamics
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse a nucleic acid sequence (using methods that are only applicable to nucleic acid sequences).
+ Sequence analysis (nucleic acid)
+ Nucleic acid sequence alignment analysis
+ Sequence alignment analysis (nucleic acid)
+
+
+ Nucleic acid sequence analysis
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse a protein sequence (using methods that are only applicable to protein sequences).
+ Sequence analysis (protein)
+ Protein sequence alignment analysis
+ Sequence alignment analysis (protein)
+
+
+ Protein sequence analysis
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse known molecular tertiary structures.
+
+
+ Structure analysis
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse nucleic acid tertiary structural data.
+
+
+ Nucleic acid structure analysis
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Process (read and / or write) a molecular secondary structure.
+
+ Secondary structure processing
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Compare two or more molecular tertiary structures.
+
+
+ Structure comparison
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Render a helical wheel representation of protein secondary structure.
+ Helical wheel rendering
+
+
+ Helical wheel drawing
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Render a topology diagram of protein secondary structure.
+ Topology diagram rendering
+
+
+ Topology diagram drawing
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Compare protein tertiary structures.
+ Structure comparison (protein)
+
+
+ Methods might identify structural neighbors, find structural similarities or define a structural core.
+ Protein structure comparison
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Compare protein secondary structures.
+ Protein secondary structure
+ Secondary structure comparison (protein)
+ Protein secondary structure alignment
+
+
+ Protein secondary structure comparison
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict the subcellular localisation of a protein sequence.
+ Protein cellular localization prediction
+ Protein subcellular localisation prediction
+ Protein targeting prediction
+
+
+ The prediction might include subcellular localisation (nuclear, cytoplasmic, mitochondrial, chloroplast, plastid, membrane etc) or export (extracellular proteins) of a protein.
+ Subcellular localisation prediction
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Calculate contacts between residues in a protein structure.
+
+
+ Residue contact calculation (residue-residue)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Identify potential hydrogen bonds between amino acid residues.
+
+
+ Hydrogen bond calculation (inter-residue)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict the interactions of proteins with other proteins.
+ Protein-protein interaction detection
+ Protein-protein binding prediction
+ Protein-protein interaction prediction
+
+
+ Protein interaction prediction
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Process (read and / or write) codon usage data.
+
+ Codon usage data processing
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Process (read and/or write) expression data from experiments measuring molecules (e.g. omics data), including analysis of one or more expression profiles, typically to interpret them in functional terms.
+ Expression data analysis
+ Gene expression analysis
+ Gene expression data analysis
+ Gene expression regulation analysis
+ Metagenomic inference
+ Microarray data analysis
+ Protein expression analysis
+
+
+ Metagenomic inference is the profiling of phylogenetic marker genes in order to predict metagenome function.
+ Expression analysis
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+ Process (read and / or write) a network of gene regulation.
+
+
+ Gene regulatory network processing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Notions of pathway and network were mixed up, EDAM 1.24 disentangles them.
+ 1.24
+
+
+
+ Generate, process or analyse a biological pathway or network.
+
+ Pathway or network analysis
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Analyse SAGE, MPSS or SBS experimental data, typically to identify or quantify mRNA transcripts.
+
+ Sequencing-based expression profile data analysis
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict, analyse, characterize or model splice sites, splicing events and so on, typically by comparing multiple nucleic acid sequences.
+ Splicing model analysis
+
+
+ Splicing analysis
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Analyse raw microarray data.
+
+ Microarray raw data analysis
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.19
+
+
+
+ Process (read and / or write) nucleic acid sequence or structural data.
+
+ Nucleic acid analysis
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.19
+
+
+
+ Process (read and / or write) protein sequence or structural data.
+
+ Protein analysis
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+ Process (read and / or write) molecular sequence data.
+
+
+ Sequence data processing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Process (read and / or write) molecular structural data.
+
+ Structural data processing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+
+ Process (read and / or write) text.
+
+ Text processing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.18
+
+
+
+ Analyse a protein sequence alignment, typically to detect features or make predictions.
+
+
+ Protein sequence alignment analysis
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.18
+
+
+
+ Analyse a protein sequence alignment, typically to detect features or make predictions.
+
+
+ Nucleic acid sequence alignment analysis
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.18
+
+
+
+ Compare two or more nucleic acid sequences.
+
+
+ Nucleic acid sequence comparison
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.18
+
+ Compare two or more protein sequences.
+
+
+ Protein sequence comparison
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Back-translate a protein sequence into DNA.
+
+
+ DNA back-translation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Edit or change a nucleic acid sequence, either randomly or specifically.
+
+
+ Sequence editing (nucleic acid)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Edit or change a protein sequence, either randomly or specifically.
+
+
+ Sequence editing (protein)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.22
+
+ Generate a nucleic acid sequence by some means.
+
+
+ Sequence generation (nucleic acid)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.22
+
+ Generate a protein sequence by some means.
+
+
+ Sequence generation (protein)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Visualise, format or render a nucleic acid sequence.
+
+
+ Various nucleic acid sequence analysis methods might generate a sequence rendering but are not (for brevity) listed under here.
+ Nucleic acid sequence visualisation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Visualise, format or render a protein sequence.
+
+
+ Various protein sequence analysis methods might generate a sequence rendering but are not (for brevity) listed under here.
+ Protein sequence visualisation
+ true
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Compare nucleic acid tertiary structures.
+ Structure comparison (nucleic acid)
+
+
+ Nucleic acid structure comparison
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Process (read and / or write) nucleic acid tertiary structure data.
+
+ Structure processing (nucleic acid)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Generate a map of a DNA sequence annotated with positional or non-positional features of some type.
+
+
+ DNA mapping
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Process (read and / or write) a DNA map of some type.
+
+ Map data processing
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse the hydrophobic, hydrophilic or charge properties of a protein (from analysis of sequence or structural information).
+
+
+ Protein hydropathy calculation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identify or predict catalytic residues, active sites or other ligand-binding sites in protein sequences or structures.
+ Protein binding site detection
+ Protein binding site prediction
+
+
+ Binding site prediction
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Build clusters of similar structures, typically using scores from structural alignment methods.
+ Structural clustering
+
+
+ Structure clustering
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Generate a physical DNA map (sequence map) from analysis of sequence tagged sites (STS).
+ Sequence mapping
+
+
+ An STS is a short subsequence of known sequence and location that occurs only once in the chromosome or genome that is being mapped. Sources of STSs include 1. expressed sequence tags (ESTs), simple sequence length polymorphisms (SSLPs), and random genomic sequences from cloned genomic DNA or database sequences.
+ Sequence tagged site (STS) mapping
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Compare two or more entities, typically the sequence or structure (or derivatives) of macromolecules, to identify equivalent subunits.
+ Alignment construction
+ Alignment generation
+
+
+ Alignment
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate the molecular weight of a protein (or fragments) and compare it to another protein or reference data. Generally used for protein identification.
+ PMF
+ Peptide mass fingerprinting
+ Protein fingerprinting
+
+
+ Protein fragment weight comparison
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Compare the physicochemical properties of two or more proteins (or reference data).
+
+
+ Protein property comparison
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.18
+
+
+
+ Compare two or more molecular secondary structures.
+
+ Secondary structure comparison
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Generate a Hopp and Woods plot of antigenicity of a protein.
+
+
+ Hopp and Woods plotting
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.19
+
+ Generate a view of clustered quantitative data, annotated with textual information.
+
+
+ Cluster textual view generation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Visualise clustered quantitative data as set of different profiles, where each profile is plotted versus different entities or samples on the X-axis.
+ Clustered quantitative data plotting
+ Clustered quantitative data rendering
+ Wave graph plotting
+ Microarray cluster temporal graph rendering
+ Microarray wave graph plotting
+ Microarray wave graph rendering
+
+
+ In the case of microarray data, visualise clustered gene expression data as a set of profiles, where each profile shows the gene expression values of a cluster across samples on the X-axis.
+ Clustering profile plotting
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.19
+
+ Generate a dendrograph of raw, preprocessed or clustered expression (e.g. microarray) data.
+
+
+ Dendrograph plotting
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Generate a plot of distances (distance or correlation matrix) between expression values.
+ Distance map rendering
+ Distance matrix plotting
+ Distance matrix rendering
+ Proximity map rendering
+ Correlation matrix plotting
+ Correlation matrix rendering
+ Microarray distance map rendering
+ Microarray proximity map plotting
+ Microarray proximity map rendering
+
+
+ Proximity map plotting
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Visualise clustered expression data using a tree diagram.
+ Dendrogram plotting
+ Dendrograph plotting
+ Dendrograph visualisation
+ Expression data tree or dendrogram rendering
+ Expression data tree visualisation
+ Microarray 2-way dendrogram rendering
+ Microarray checks view rendering
+ Microarray matrix tree plot rendering
+ Microarray tree or dendrogram rendering
+
+
+ Dendrogram visualisation
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Visualize the results of a principal component analysis (orthogonal data transformation). For example, visualization of the principal components (essential subspace) coming from a Principal Component Analysis (PCA) on the trajectory atomistic coordinates of a molecular structure.
+ PCA plotting
+ Principal component plotting
+ ED visualization
+ Essential Dynamics visualization
+ Microarray principal component plotting
+ Microarray principal component rendering
+ PCA visualization
+ Principal modes visualization
+
+
+ Examples for visualization are the distribution of variance over the components, loading and score plots.
+ The use of Principal Component Analysis (PCA), a multivariate statistical analysis to obtain collective variables on the atomic positional fluctuations, helps to separate the configurational space in two subspaces: an essential subspace containing relevant motions, and another one containing irrelevant local fluctuations.
+ Principal component visualisation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Render a graph in which the values of two variables are plotted along two axes; the pattern of the points reveals any correlation.
+ Scatter chart plotting
+ Microarray scatter plot plotting
+ Microarray scatter plot rendering
+
+
+ Comparison of two sets of quantitative data such as two samples of gene expression values.
+ Scatter plot plotting
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.18
+
+ Visualise gene expression data where each band (or line graph) corresponds to a sample.
+
+
+ Whole microarray graph plotting
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Visualise gene expression data after hierarchical clustering for representing hierarchical relationships.
+ Expression data tree-map rendering
+ Treemapping
+ Microarray tree-map rendering
+
+
+ Treemap visualisation
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Generate a box plot, i.e. a depiction of groups of numerical data through their quartiles.
+ Box plot plotting
+ Microarray Box-Whisker plot plotting
+
+
+ In the case of micorarray data, visualise raw and pre-processed gene expression data, via a plot showing over- and under-expression along with mean, upper and lower quartiles.
+ Box-Whisker plot plotting
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Generate a physical (sequence) map of a DNA sequence showing the physical distance (base pairs) between features or landmarks such as restriction sites, cloned DNA fragments, genes and other genetic markers.
+ Physical cartography
+
+
+ Physical mapping
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Apply analytical methods to existing data of a specific type.
+
+
+ This excludes non-analytical methods that read and write the same basic type of data (for that, see 'Data handling').
+ Analysis
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+
+ Process or analyse an alignment of molecular sequences or structures.
+
+ Alignment analysis
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.16
+
+
+
+ Analyse a body of scientific text (typically a full text article from a scientific journal).
+
+ Article analysis
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Analyse the interactions of two or more molecules (or parts of molecules) that are known to interact.
+
+ Molecular interaction analysis
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse the interactions of proteins with other proteins.
+ Protein interaction analysis
+ Protein interaction raw data analysis
+ Protein interaction simulation
+
+
+ Includes analysis of raw experimental protein-protein interaction data from for example yeast two-hybrid analysis, protein microarrays, immunoaffinity chromatography followed by mass spectrometry, phage display etc.
+ Protein-protein interaction analysis
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ WHATIF: HETGroupNames
+ WHATIF:HasMetalContacts
+ WHATIF:HasMetalContactsPlus
+ WHATIF:HasNegativeIonContacts
+ WHATIF:HasNegativeIonContactsPlus
+ WHATIF:HasNucleicContacts
+ WHATIF:ShowDrugContacts
+ WHATIF:ShowDrugContactsShort
+ WHATIF:ShowLigandContacts
+ WHATIF:ShowProteiNucleicContacts
+ Calculate contacts between residues, or between residues and other groups, in a protein structure, on the basis of distance calculations.
+ HET group detection
+ Residue contact calculation (residue-ligand)
+ Residue contact calculation (residue-metal)
+ Residue contact calculation (residue-negative ion)
+ Residue contact calculation (residue-nucleic acid)
+ WHATIF:SymmetryContact
+
+
+ This includes identifying HET groups, which usually correspond to ligands, lipids, but might also (not consistently) include groups that are attached to amino acids. Each HET group is supposed to have a unique three letter code and a unique name which might be given in the output. It can also include calculation of symmetry contacts, i.e. a contact between two atoms in different asymmetric unit.
+ Residue distance calculation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+
+ Process (read and / or write) an alignment of two or more molecular sequences, structures or derived data.
+
+ Alignment processing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Process (read and / or write) a molecular tertiary (3D) structure alignment.
+
+ Structure alignment processing
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate codon usage bias, e.g. generate a codon usage bias plot.
+ Codon usage bias plotting
+
+
+ Codon usage bias calculation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.22
+
+ Generate a codon usage bias plot.
+
+
+ Codon usage bias plotting
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate the differences in codon usage fractions between two sequences, sets of sequences, codon usage tables etc.
+
+
+ Codon usage fraction calculation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Assign molecular sequences, structures or other biological data to a specific group or category according to qualities it shares with that group or category.
+
+
+ Classification
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Process (read and / or write) molecular interaction data.
+
+ Molecular interaction data processing
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Assign molecular sequence(s) to a group or category.
+
+
+ Sequence classification
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Assign molecular structure(s) to a group or category.
+
+
+ Structure classification
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Compare two or more proteins (or some aspect) to identify similarities.
+
+
+ Protein comparison
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Compare two or more nucleic acids to identify similarities.
+
+
+ Nucleic acid comparison
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.19
+
+ Predict, recognise, detect or identify some properties of proteins.
+
+
+ Prediction and recognition (protein)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.19
+
+ Predict, recognise, detect or identify some properties of nucleic acids.
+
+
+ Prediction and recognition (nucleic acid)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta13
+ Edit, convert or otherwise change a molecular tertiary structure, either randomly or specifically.
+
+
+ Structure editing
+
+
+
+
+
+
+
+
+ beta13
+ Edit, convert or otherwise change a molecular sequence alignment, either randomly or specifically.
+
+
+ Sequence alignment editing
+
+
+
+
+
+
+
+
+ beta13
+ Notions of pathway and network were mixed up, EDAM 1.24 disentangles them.
+ 1.24
+
+
+
+ Render (visualise) a biological pathway or network.
+
+ Pathway or network visualisation
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.6
+
+
+ Predict general (non-positional) functional properties of a protein from analysing its sequence.
+
+ For functional properties that are positional, use 'Protein site detection' instead.
+ Protein function prediction (from sequence)
+ true
+
+
+
+
+
+
+
+
+ beta13
+ (jison)This is a distinction made on basis of input; all features exist can be mapped to a sequence so this isn't needed (consolidate with "Protein feature detection").
+ 1.17
+
+
+
+ Predict, recognise and identify functional or other key sites within protein sequences, typically by scanning for known motifs, patterns and regular expressions.
+
+
+ Protein sequence feature detection
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.18
+
+
+ Calculate (or predict) physical or chemical properties of a protein, including any non-positional properties of the molecular sequence, from processing a protein sequence.
+
+
+ Protein property calculation (from sequence)
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.6
+
+
+ Predict, recognise and identify positional features in proteins from analysing protein structure.
+
+ Protein feature prediction (from structure)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta13
+ Predict, recognise and identify positional features in proteins from analysing protein sequences or structures.
+ Protein feature prediction
+ Protein feature recognition
+ Protein secondary database search
+ Protein site detection
+ Protein site prediction
+ Protein site recognition
+ Sequence feature detection (protein)
+ Sequence profile database search
+
+
+ Features includes functional sites or regions, secondary structure, structural domains and so on. Methods might use fingerprints, motifs, profiles, hidden Markov models, sequence alignment etc to provide a mapping of a query protein sequence to a discriminatory element. This includes methods that search a secondary protein database (Prosite, Blocks, ProDom, Prints, Pfam etc.) to assign a protein sequence(s) to a known protein family or group.
+ Protein feature detection
+
+
+
+
+
+
+
+
+ beta13
+ 1.6
+
+
+ Screen a molecular sequence(s) against a database (of some type) to identify similarities between the sequence and database entries.
+
+ Database search (by sequence)
+ true
+
+
+
+
+
+
+
+
+
+ beta13
+ Predict a network of protein interactions.
+
+
+ Protein interaction network prediction
+
+
+
+
+
+
+
+
+
+ beta13
+ Design (or predict) nucleic acid sequences with specific chemical or physical properties.
+ Gene design
+
+
+ Nucleic acid design
+
+
+
+
+
+
+
+
+
+ beta13
+ Edit a data entity, either randomly or specifically.
+
+
+ Editing
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.1
+ Evaluate a DNA sequence assembly, typically for purposes of quality control.
+ Assembly QC
+ Assembly quality evaluation
+ Sequence assembly QC
+ Sequence assembly quality evaluation
+
+
+ Sequence assembly validation
+
+
+
+
+
+
+
+
+
+ 1.1
+ Align two or more (tpyically huge) molecular sequences that represent genomes.
+ Genome alignment construction
+ Whole genome alignment
+
+
+ Genome alignment
+
+
+
+
+
+
+
+
+ 1.1
+ Reconstruction of a sequence assembly in a localised area.
+
+
+ Localised reassembly
+
+
+
+
+
+
+
+
+ 1.1
+ Render and visualise a DNA sequence assembly.
+ Assembly rendering
+ Assembly visualisation
+ Sequence assembly rendering
+
+
+ Sequence assembly visualisation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.1
+ Identify base (nucleobase) sequence from a fluorescence 'trace' data generated by an automated DNA sequencer.
+ Base calling
+ Phred base calling
+ Phred base-calling
+
+
+ Base-calling
+
+
+
+
+
+
+
+
+
+ 1.1
+ The mapping of methylation sites in a DNA (genome) sequence. Typically, the mapping of high-throughput bisulfite reads to the reference genome.
+ Bisulfite read mapping
+ Bisulfite sequence alignment
+ Bisulfite sequence mapping
+
+
+ Bisulfite mapping follows high-throughput sequencing of DNA which has undergone bisulfite treatment followed by PCR amplification; unmethylated cytosines are specifically converted to thymine, allowing the methylation status of cytosine in the DNA to be detected.
+ Bisulfite mapping
+
+
+
+
+
+
+
+
+ 1.1
+ Identify and filter a (typically large) sequence data set to remove sequences from contaminants in the sample that was sequenced.
+
+
+ Sequence contamination filtering
+
+
+
+
+
+
+
+
+ 1.1
+ 1.12
+
+ Trim sequences (typically from an automated DNA sequencer) to remove misleading ends.
+
+
+ For example trim polyA tails, introns and primer sequence flanking the sequence of amplified exons, or other unwanted sequence.
+ Trim ends
+ true
+
+
+
+
+
+
+
+
+ 1.1
+ 1.12
+
+ Trim sequences (typically from an automated DNA sequencer) to remove sequence-specific end regions, typically contamination from vector sequences.
+
+
+ Trim vector
+ true
+
+
+
+
+
+
+
+
+ 1.1
+ 1.12
+
+ Trim sequences (typically from an automated DNA sequencer) to remove the sequence ends that extend beyond an assembled reference sequence.
+
+
+ Trim to reference
+ true
+
+
+
+
+
+
+
+
+ 1.1
+ Cut (remove) the end from a molecular sequence.
+ Trimming
+ Barcode sequence removal
+ Trim ends
+ Trim to reference
+ Trim vector
+
+
+ This includes end trimming, -- Trim sequences (typically from an automated DNA sequencer) to remove misleading ends. For example trim polyA tails, introns and primer sequence flanking the sequence of amplified exons, or other unwanted sequence.-- trimming to a reference sequence, --Trim sequences (typically from an automated DNA sequencer) to remove the sequence ends that extend beyond an assembled reference sequence. -- vector trimming -- Trim sequences (typically from an automated DNA sequencer) to remove sequence-specific end regions, typically contamination from vector sequences.
+ Sequence trimming
+
+
+
+
+
+
+
+
+
+ 1.1
+ Compare the features of two genome sequences.
+
+
+ Genomic elements that might be compared include genes, indels, single nucleotide polymorphisms (SNPs), retrotransposons, tandem repeats and so on.
+ Genome feature comparison
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.1
+ Detect errors in DNA sequences generated from sequencing projects).
+ Short read error correction
+ Short-read error correction
+
+
+ Sequencing error detection
+
+
+
+
+
+
+
+
+ 1.1
+ Analyse DNA sequence data to identify differences between the genetic composition (genotype) of an individual compared to other individual's or a reference sequence.
+
+
+ Methods might consider cytogenetic analyses, copy number polymorphism (and calculate copy number calls for copy-number variation(CNV) regions), single nucleotide polymorphism (SNP), , rare copy number variation (CNV) identification, loss of heterozygosity data and so on.
+ Genotyping
+
+
+
+
+
+
+
+
+ 1.1
+ Analyse a genetic variation, for example to annotate its location, alleles, classification, and effects on individual transcripts predicted for a gene model.
+ Genetic variation annotation
+ Sequence variation analysis
+ Variant analysis
+ Transcript variant analysis
+
+
+ Genetic variation annotation provides contextual interpretation of coding SNP consequences in transcripts. It allows comparisons to be made between variation data in different populations or strains for the same transcript.
+ Genetic variation analysis
+
+
+
+
+
+
+
+
+
+ 1.1
+ Align short oligonucleotide sequences (reads) to a larger (genomic) sequence.
+ Oligonucleotide alignment
+ Oligonucleotide alignment construction
+ Oligonucleotide alignment generation
+ Oligonucleotide mapping
+ Read alignment
+ Short oligonucleotide alignment
+ Short read alignment
+ Short read mapping
+ Short sequence read mapping
+
+
+ The purpose of read mapping is to identify the location of sequenced fragments within a reference genome and assumes that there is, in fact, at least local similarity between the fragment and reference sequences.
+ Read mapping
+
+
+
+
+
+
+
+
+ 1.1
+ A variant of oligonucleotide mapping where a read is mapped to two separate locations because of possible structural variation.
+ Split-read mapping
+
+
+ Split read mapping
+
+
+
+
+
+
+
+
+ 1.1
+ Analyse DNA sequences in order to identify a DNA 'barcode'; marker genes or any short fragment(s) of DNA that are useful to diagnose the taxa of biological organisms.
+ Community profiling
+ Sample barcoding
+
+
+ DNA barcoding
+
+
+
+
+
+
+
+
+
+ 1.1
+ 1.19
+
+ Identify single nucleotide change in base positions in sequencing data that differ from a reference genome and which might, especially by reference to population frequency or functional data, indicate a polymorphism.
+
+
+ SNP calling
+ true
+
+
+
+
+
+
+
+
+ 1.1
+ "Polymorphism detection" and "Variant calling" are essentially the same thing - keeping the later as a more prevalent term nowadays.
+ 1.24
+
+
+ Detect mutations in multiple DNA sequences, for example, from the alignment and comparison of the fluorescent traces produced by DNA sequencing hardware.
+
+
+ Polymorphism detection
+ true
+
+
+
+
+
+
+
+
+ 1.1
+ Visualise, format or render an image of a Chromatogram.
+ Chromatogram viewing
+
+
+ Chromatogram visualisation
+
+
+
+
+
+
+
+
+ 1.1
+ Analyse cytosine methylation states in nucleic acid sequences.
+ Methylation profile analysis
+
+
+ Methylation analysis
+
+
+
+
+
+
+
+
+ 1.1
+ 1.19
+
+ Determine cytosine methylation status of specific positions in a nucleic acid sequences.
+
+
+ Methylation calling
+ true
+
+
+
+
+
+
+
+
+
+ 1.1
+ Measure the overall level of methyl cytosines in a genome from analysis of experimental data, typically from chromatographic methods and methyl accepting capacity assay.
+ Genome methylation analysis
+ Global methylation analysis
+ Methylation level analysis (global)
+
+
+ Whole genome methylation analysis
+
+
+
+
+
+
+
+
+ 1.1
+ Analysing the DNA methylation of specific genes or regions of interest.
+ Gene-specific methylation analysis
+ Methylation level analysis (gene-specific)
+
+
+ Gene methylation analysis
+
+
+
+
+
+
+
+
+
+ 1.1
+ Visualise, format or render a nucleic acid sequence that is part of (and in context of) a complete genome sequence.
+ Genome browser
+ Genome browsing
+ Genome rendering
+ Genome viewing
+
+
+ Genome visualisation
+
+
+
+
+
+
+
+
+
+ 1.1
+ Compare the sequence or features of two or more genomes, for example, to find matching regions.
+ Genomic region matching
+
+
+ Genome comparison
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.1
+ Generate an index of a genome sequence.
+ Burrows-Wheeler
+ Genome indexing (Burrows-Wheeler)
+ Genome indexing (suffix arrays)
+ Suffix arrays
+
+
+ Many sequence alignment tasks involving many or very large sequences rely on a precomputed index of the sequence to accelerate the alignment. The Burrows-Wheeler Transform (BWT) is a permutation of the genome based on a suffix array algorithm. A suffix array consists of the lexicographically sorted list of suffixes of a genome.
+ Genome indexing
+
+
+
+
+
+
+
+
+ 1.1
+ 1.12
+
+ Generate an index of a genome sequence using the Burrows-Wheeler algorithm.
+
+
+ The Burrows-Wheeler Transform (BWT) is a permutation of the genome based on a suffix array algorithm.
+ Genome indexing (Burrows-Wheeler)
+ true
+
+
+
+
+
+
+
+
+ 1.1
+ 1.12
+
+ Generate an index of a genome sequence using a suffix arrays algorithm.
+
+
+ A suffix array consists of the lexicographically sorted list of suffixes of a genome.
+ Genome indexing (suffix arrays)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.1
+ Analyse one or more spectra from mass spectrometry (or other) experiments.
+ Mass spectrum analysis
+ Spectrum analysis
+
+
+ Spectral analysis
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.1
+ Identify peaks in a spectrum from a mass spectrometry, NMR, or some other spectrum-generating experiment.
+ Peak assignment
+ Peak finding
+
+
+ Peak detection
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.1
+ Link together a non-contiguous series of genomic sequences into a scaffold, consisting of sequences separated by gaps of known length. The sequences that are linked are typically typically contigs; contiguous sequences corresponding to read overlaps.
+ Scaffold construction
+ Scaffold generation
+
+
+ Scaffold may be positioned along a chromosome physical map to create a "golden path".
+ Scaffolding
+
+
+
+
+
+
+
+
+ 1.1
+ Fill the gaps in a sequence assembly (scaffold) by merging in additional sequences.
+
+
+ Different techniques are used to generate gap sequences to connect contigs, depending on the size of the gap. For small (5-20kb) gaps, PCR amplification and sequencing is used. For large (>20kb) gaps, fragments are cloned (e.g. in BAC (Bacterial artificial chromosomes) vectors) and then sequenced.
+ Scaffold gap completion
+
+
+
+
+
+
+
+
+
+ 1.1
+ Raw sequence data quality control.
+ Sequencing QC
+ Sequencing quality assessment
+
+
+ Analyse raw sequence data from a sequencing pipeline and identify (and possiby fix) problems.
+ Sequencing quality control
+
+
+
+
+
+
+
+
+
+ 1.1
+ Pre-process sequence reads to ensure (or improve) quality and reliability.
+ Sequence read pre-processing
+
+
+ For example process paired end reads to trim low quality ends remove short sequences, identify sequence inserts, detect chimeric reads, or remove low quality sequences including vector, adaptor, low complexity and contaminant sequences. Sequences might come from genomic DNA library, EST libraries, SSH library and so on.
+ Read pre-processing
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.1
+ Estimate the frequencies of different species from analysis of the molecular sequences, typically of DNA recovered from environmental samples.
+
+
+ Species frequency estimation
+
+
+
+
+
+
+
+
+ 1.1
+ Identify putative protein-binding regions in a genome sequence from analysis of Chip-sequencing data or ChIP-on-chip data.
+ Protein binding peak detection
+ Peak-pair calling
+
+
+ Chip-sequencing combines chromatin immunoprecipitation (ChIP) with massively parallel DNA sequencing to generate a set of reads, which are aligned to a genome sequence. The enriched areas contain the binding sites of DNA-associated proteins. For example, a transcription factor binding site. ChIP-on-chip in contrast combines chromatin immunoprecipitation ('ChIP') with microarray ('chip'). "Peak-pair calling" is similar to "Peak calling" in the context of ChIP-exo.
+ Peak calling
+
+
+
+
+
+
+
+
+ 1.1
+ Identify from molecular sequence analysis (typically from analysis of microarray or RNA-seq data) genes whose expression levels are significantly different between two sample groups.
+ Differential expression analysis
+ Differential gene analysis
+ Differential gene expression analysis
+ Differentially expressed gene identification
+
+
+ Differential gene expression analysis is used, for example, to identify which genes are up-regulated (increased expression) or down-regulated (decreased expression) between a group treated with a drug and a control groups.
+ Differential gene expression profiling
+
+
+
+
+
+
+
+
+ 1.1
+ 1.21
+
+ Analyse gene expression patterns (typically from DNA microarray datasets) to identify sets of genes that are associated with a specific trait, condition, clinical outcome etc.
+
+
+ Gene set testing
+ true
+
+
+
+
+
+
+
+
+
+ 1.1
+ Classify variants based on their potential effect on genes, especially functional effects on the expressed proteins.
+
+
+ Variants are typically classified by their position (intronic, exonic, etc.) in a gene transcript and (for variants in coding exons) by their effect on the protein sequence (synonymous, non-synonymous, frameshifting, etc.)
+ Variant classification
+
+
+
+
+
+
+
+
+ 1.1
+ Identify biologically interesting variants by prioritizing individual variants, for example, homozygous variants absent in control genomes.
+
+
+ Variant prioritisation can be used for example to produce a list of variants responsible for 'knocking out' genes in specific genomes. Methods amino acid substitution, aggregative approaches, probabilistic approach, inheritance and unified likelihood-frameworks.
+ Variant prioritisation
+
+
+
+
+
+
+
+
+
+ 1.1
+ Detect, identify and map mutations, such as single nucleotide polymorphisms, short indels and structural variants, in multiple DNA sequences. Typically the alignment and comparison of the fluorescent traces produced by DNA sequencing hardware, to study genomic alterations.
+ Variant mapping
+ Allele calling
+ Exome variant detection
+ Genome variant detection
+ Germ line variant calling
+ Mutation detection
+ Somatic variant calling
+ de novo mutation detection
+
+
+ Methods often utilise a database of aligned reads.
+ Somatic variant calling is the detection of variations established in somatic cells and hence not inherited as a germ line variant.
+ Variant detection
+ Variant calling
+
+
+
+
+
+
+
+
+ 1.1
+ Detect large regions in a genome subject to copy-number variation, or other structural variations in genome(s).
+ Structural variation discovery
+
+
+ Methods might involve analysis of whole-genome array comparative genome hybridisation or single-nucleotide polymorphism arrays, paired-end mapping of sequencing data, or from analysis of short reads from new sequencing technologies.
+ Structural variation detection
+
+
+
+
+
+
+
+
+ 1.1
+ Analyse sequencing data from experiments aiming to selectively sequence the coding regions of the genome.
+ Exome sequence analysis
+
+
+ Exome assembly
+
+
+
+
+
+
+
+
+ 1.1
+ Analyse mapping density (read depth) of (typically) short reads from sequencing platforms, for example, to detect deletions and duplications.
+
+
+ Read depth analysis
+
+
+
+
+
+
+
+
+ 1.1
+ Combine classical quantitative trait loci (QTL) analysis with gene expression profiling, for example, to describe describe cis- and trans-controlling elements for the expression of phenotype associated genes.
+ Gene expression QTL profiling
+ Gene expression quantitative trait loci profiling
+ eQTL profiling
+
+
+ Gene expression QTL analysis
+
+
+
+
+
+
+
+
+ 1.1
+ Estimate the number of copies of loci of particular gene(s) in DNA sequences typically from gene-expression profiling technology based on microarray hybridisation-based experiments. For example, estimate copy number (or marker dosage) of a dominant marker in samples from polyploid plant cells or tissues, or chromosomal gains and losses in tumors.
+ Transcript copy number estimation
+
+
+ Methods typically implement some statistical model for hypothesis testing, and methods estimate total copy number, i.e. do not distinguish the two inherited chromosomes quantities (specific copy number).
+ Copy number estimation
+
+
+
+
+
+
+
+
+ 1.2
+ Adapter removal
+ Remove forward and/or reverse primers from nucleic acid sequences (typically PCR products).
+
+
+ Primer removal
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.2
+ Infer a transcriptome sequence by analysis of short sequence reads.
+
+
+ Transcriptome assembly
+
+
+
+
+
+
+
+
+ 1.2
+ 1.6
+
+
+ Infer a transcriptome sequence without the aid of a reference genome, i.e. by comparing short sequences (reads) to each other.
+
+ Transcriptome assembly (de novo)
+ true
+
+
+
+
+
+
+
+
+ 1.2
+ 1.6
+
+
+ Infer a transcriptome sequence by mapping short reads to a reference genome.
+
+ Transcriptome assembly (mapping)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.3
+ Convert one set of sequence coordinates to another, e.g. convert coordinates of one assembly to another, cDNA to genomic, CDS to genomic, protein translation to genomic etc.
+
+
+ Sequence coordinate conversion
+
+
+
+
+
+
+
+
+ 1.3
+ Calculate similarity between 2 or more documents.
+
+
+ Document similarity calculation
+
+
+
+
+
+
+
+
+
+ 1.3
+ Cluster (group) documents on the basis of their calculated similarity.
+
+
+ Document clustering
+
+
+
+
+
+
+
+
+
+ 1.3
+ Recognise named entities, ontology concepts, tags, events, and dictionary terms within documents.
+ Concept mining
+ Entity chunking
+ Entity extraction
+ Entity identification
+ Event extraction
+ NER
+ Named-entity recognition
+
+
+ Named-entity and concept recognition
+
+
+
+
+
+
+
+
+
+
+
+ 1.3
+ Map data identifiers to one another for example to establish a link between two biological databases for the purposes of data integration.
+ Accession mapping
+ Identifier mapping
+
+
+ The mapping can be achieved by comparing identifier values or some other means, e.g. exact matches to a provided sequence.
+ ID mapping
+
+
+
+
+
+
+
+
+ 1.3
+ Process data in such a way that makes it hard to trace to the person which the data concerns.
+ Data anonymisation
+
+
+ Anonymisation
+
+
+
+
+
+
+
+
+ 1.3
+ (jison)Too fine-grained, the operation (Data retrieval) hasn't changed, just what is retrieved.
+ 1.17
+
+ Search for and retrieve a data identifier of some kind, e.g. a database entry accession.
+
+
+ ID retrieval
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.4
+ Generate a checksum of a molecular sequence.
+
+
+ Sequence checksum generation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.4
+ Construct a bibliography from the scientific literature.
+ Bibliography construction
+
+
+ Bibliography generation
+
+
+
+
+
+
+
+
+ 1.4
+ Predict the structure of a multi-subunit protein and particularly how the subunits fit together.
+
+
+ Protein quaternary structure prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.4
+ Analyse the surface properties of proteins or other macromolecules, including surface accessible pockets, interior inaccessible cavities etc.
+
+
+ Molecular surface analysis
+
+
+
+
+
+
+
+
+ 1.4
+ Compare two or more ontologies, e.g. identify differences.
+
+
+ Ontology comparison
+
+
+
+
+
+
+
+
+ 1.4
+ 1.9
+
+ Compare two or more ontologies, e.g. identify differences.
+
+
+ Ontology comparison
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.4
+ Recognition of which format the given data is in.
+ Format identification
+ Format inference
+ Format recognition
+
+
+ 'Format recognition' is not a bioinformatics-specific operation, but of great relevance in bioinformatics. Should be removed from EDAM if/when captured satisfactorily in a suitable domain-generic ontology.
+ Format detection
+
+
+
+
+
+ The has_input "Data" (data_0006) may cause visualisation or other problems although ontologically correct. But on the other hand it may be useful to distinguish from nullary operations without inputs.
+
+
+
+
+
+
+
+
+ 1.4
+ Split a file containing multiple data items into many files, each containing one item.
+ File splitting
+
+
+ Splitting
+
+
+
+
+
+
+
+
+ 1.6
+ true
+ Construct some data entity.
+ Construction
+
+
+ For non-analytical operations, see the 'Processing' branch.
+ Generation
+
+
+
+
+
+
+
+
+ 1.6
+ (jison)This is a distinction made on basis of input; all features exist can be mapped to a sequence so this isn't needed.
+ 1.17
+
+
+ Predict, recognise and identify functional or other key sites within nucleic acid sequences, typically by scanning for known motifs, patterns and regular expressions.
+
+
+ Nucleic acid sequence feature detection
+ true
+
+
+
+
+
+
+
+
+ 1.6
+ Deposit some data in a database or some other type of repository or software system.
+ Data deposition
+ Data submission
+ Database deposition
+ Database submission
+ Submission
+
+
+ For non-analytical operations, see the 'Processing' branch.
+ Deposition
+
+
+
+
+
+
+
+
+ 1.6
+ true
+ Group together some data entities on the basis of similarities such that entities in the same group (cluster) are more similar to each other than to those in other groups (clusters).
+
+
+ Clustering
+
+
+
+
+
+
+
+
+ 1.6
+ 1.19
+
+ Construct some entity (typically a molecule sequence) from component pieces.
+
+
+ Assembly
+ true
+
+
+
+
+
+
+
+
+ 1.6
+ true
+ Convert a data set from one form to another.
+
+
+ Conversion
+
+
+
+
+
+
+
+
+ 1.6
+ Standardize or normalize data by some statistical method.
+ Normalisation
+ Standardisation
+
+
+ In the simplest normalisation means adjusting values measured on different scales to a common scale (often between 0.0 and 1.0), but can refer to more sophisticated adjustment whereby entire probability distributions of adjusted values are brought into alignment. Standardisation typically refers to an operation whereby a range of values are standardised to measure how many standard deviations a value is from its mean.
+ Standardisation and normalisation
+
+
+
+
+
+
+
+
+ 1.6
+ Combine multiple files or data items into a single file or object.
+
+
+ Aggregation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.6
+ Compare two or more scientific articles.
+
+
+ Article comparison
+
+
+
+
+
+
+
+
+ 1.6
+ true
+ Mathematical determination of the value of something, typically a properly of a molecule.
+
+
+ Calculation
+
+
+
+
+
+
+
+
+ 1.6
+ Notions of pathway and network were mixed up, EDAM 1.24 disentangles them.
+ 1.24
+
+
+
+
+ Predict a molecular pathway or network.
+
+ Pathway or network prediction
+ true
+
+
+
+
+
+
+
+
+ 1.6
+ 1.12
+
+ The process of assembling many short DNA sequences together such that they represent the original chromosomes from which the DNA originated.
+
+
+ Genome assembly
+ true
+
+
+
+
+
+
+
+
+ 1.6
+ 1.19
+
+ Generate a graph, or other visual representation, of data, showing the relationship between two or more variables.
+
+
+ Plotting
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.7
+ Image processing
+ The analysis of a image (typically a digital image) of some type in order to extract information from it.
+
+
+ Image analysis
+
+
+
+
+
+
+
+
+
+ 1.7
+ Analysis of data from a diffraction experiment.
+
+
+ Diffraction data analysis
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.7
+ Analysis of cell migration images in order to study cell migration, typically in order to study the processes that play a role in the disease progression.
+
+
+ Cell migration analysis
+
+
+
+
+
+
+
+
+
+ 1.7
+ Processing of diffraction data into a corrected, ordered, and simplified form.
+
+
+ Diffraction data reduction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.7
+ Measurement of neurites; projections (axons or dendrites) from the cell body of a neuron, from analysis of neuron images.
+
+
+ Neurite measurement
+
+
+
+
+
+
+
+
+ 1.7
+ The evaluation of diffraction intensities and integration of diffraction maxima from a diffraction experiment.
+ Diffraction profile fitting
+ Diffraction summation integration
+
+
+ Diffraction data integration
+
+
+
+
+
+
+
+
+ 1.7
+ Phase a macromolecular crystal structure, for example by using molecular replacement or experimental phasing methods.
+
+
+ Phasing
+
+
+
+
+
+
+
+
+ 1.7
+ A technique used to construct an atomic model of an unknown structure from diffraction data, based upon an atomic model of a known structure, either a related protein or the same protein from a different crystal form.
+
+
+ The technique solves the phase problem, i.e. retrieve information concern phases of the structure.
+ Molecular replacement
+
+
+
+
+
+
+
+
+ 1.7
+ A method used to refine a structure by moving the whole molecule or parts of it as a rigid unit, rather than moving individual atoms.
+
+
+ Rigid body refinement usually follows molecular replacement in the assignment of a structure from diffraction data.
+ Rigid body refinement
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.7
+ An image processing technique that combines and analyze multiple images of a particulate sample, in order to produce an image with clearer features that are more easily interpreted.
+
+
+ Single particle analysis is used to improve the information that can be obtained by relatively low resolution techniques, , e.g. an image of a protein or virus from transmission electron microscopy (TEM).
+ Single particle analysis
+
+
+
+
+
+
+
+
+
+
+ 1.7
+ true
+ This is two related concepts.
+ Compare (align and classify) multiple particle images from a micrograph in order to produce a representative image of the particle.
+
+
+ A micrograph can include particles in multiple different orientations and/or conformations. Particles are compared and organised into sets based on their similarity. Typically iterations of classification and alignment and are performed to optimise the final 3D EM map.
+ Single particle alignment and classification
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.7
+ Clustering of molecular sequences on the basis of their function, typically using information from an ontology of gene function, or some other measure of functional phenotype.
+ Functional sequence clustering
+
+
+ Functional clustering
+
+
+
+
+
+
+
+
+ 1.7
+ Classifiication (typically of molecular sequences) by assignment to some taxonomic hierarchy.
+ Taxonomy assignment
+ Taxonomic profiling
+
+
+ Taxonomic classification
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.7
+ The prediction of the degree of pathogenicity of a microorganism from analysis of molecular sequences.
+ Pathogenicity prediction
+
+
+ Virulence prediction
+
+
+
+
+
+
+
+
+
+ 1.7
+ Analyse the correlation patterns among features/molecules across across a variety of experiments, samples etc.
+ Co-expression analysis
+ Gene co-expression network analysis
+ Gene expression correlation
+ Gene expression correlation analysis
+
+
+ Expression correlation analysis
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.7
+ true
+ Identify a correlation, i.e. a statistical relationship between two random variables or two sets of data.
+
+
+ Correlation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.7
+ Compute the covariance model for (a family of) RNA secondary structures.
+
+
+ RNA structure covariance model generation
+
+
+
+
+
+
+
+
+ 1.7
+ 1.18
+
+ Predict RNA secondary structure by analysis, e.g. probabilistic analysis, of the shape of RNA folds.
+
+
+ RNA secondary structure prediction (shape-based)
+ true
+
+
+
+
+
+
+
+
+ 1.7
+ 1.18
+
+ Prediction of nucleic-acid folding using sequence alignments as a source of data.
+
+
+ Nucleic acid folding prediction (alignment-based)
+ true
+
+
+
+
+
+
+
+
+ 1.7
+ Count k-mers (substrings of length k) in DNA sequence data.
+
+
+ k-mer counting is used in genome and transcriptome assembly, metagenomic sequencing, and for error correction of sequence reads.
+ k-mer counting
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.7
+ Reconstructing the inner node labels of a phylogenetic tree from its leafes.
+ Phylogenetic tree reconstruction
+ Gene tree reconstruction
+ Species tree reconstruction
+
+
+ Note that this is somewhat different from simply analysing an existing tree or constructing a completely new one.
+ Phylogenetic reconstruction
+
+
+
+
+
+
+
+
+ 1.7
+ Generate some data from a chosen probibalistic model, possibly to evaluate algorithms.
+
+
+ Probabilistic data generation
+
+
+
+
+
+
+
+
+
+ 1.7
+ Generate sequences from some probabilistic model, e.g. a model that simulates evolution.
+
+
+ Probabilistic sequence generation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.7
+ Identify or predict causes for antibiotic resistance from molecular sequence analysis.
+
+
+ Antimicrobial resistance prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.8
+ Analysis of a set of objects, such as genes, annotated with given categories, where eventual over-/under-representation of certain categories within the studied set of objects is revealed.
+ Enrichment
+ Over-representation analysis
+ Functional enrichment
+
+
+ Categories from a relevant ontology can be used. The input is typically a set of genes or other biological objects, possibly represented by their identifiers, and the output of the analysis is typically a ranked list of categories, each associated with a statistical metric of over-/under-representation within the studied data.
+ Enrichment analysis
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.8
+ Analyse a dataset with respect to concepts from an ontology of chemical structure, leveraging chemical similarity information.
+ Chemical class enrichment
+
+
+ Chemical similarity enrichment
+
+
+
+
+
+
+
+
+ 1.8
+ Plot an incident curve such as a survival curve, death curve, mortality curve.
+
+
+ Incident curve plotting
+
+
+
+
+
+
+
+
+ 1.8
+ Identify and map patterns of genomic variations.
+
+
+ Methods often utilise a database of aligned reads.
+ Variant pattern analysis
+
+
+
+
+
+
+
+
+ 1.8
+ 1.12
+
+ Model some biological system using mathematical techniques including dynamical systems, statistical models, differential equations, and game theoretic models.
+
+
+ Mathematical modelling
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.9
+ Visualise images resulting from various types of microscopy.
+
+
+ Microscope image visualisation
+
+
+
+
+
+
+
+
+ 1.9
+ Annotate an image of some sort, typically with terms from a controlled vocabulary.
+
+
+ Image annotation
+
+
+
+
+
+
+
+
+ 1.9
+ Replace missing data with substituted values, usually by using some statistical or other mathematical approach.
+ Data imputation
+
+
+ Imputation
+
+
+
+
+
+
+
+
+
+ 1.9
+ Visualise, format or render data from an ontology, typically a tree of terms.
+ Ontology browsing
+
+
+ Ontology visualisation
+
+
+
+
+
+
+
+
+ 1.9
+ A method for making numerical assessments about the maximum percent of time that a conformer of a flexible macromolecule can exist and still be compatible with the experimental data.
+
+
+ Maximum occurrence analysis
+
+
+
+
+
+
+
+
+
+ 1.9
+ Compare the models or schemas used by two or more databases, or any other general comparison of databases rather than a detailed comparison of the entries themselves.
+ Data model comparison
+ Schema comparison
+
+
+ Database comparison
+
+
+
+
+
+
+
+
+ 1.9
+ 1.24
+
+
+
+ Simulate the bevaviour of a biological pathway or network.
+
+ Notions of pathway and network were mixed up, EDAM 1.24 disentangles them.
+ Network simulation
+ true
+
+
+
+
+
+
+
+
+ 1.9
+ Analyze read counts from RNA-seq experiments.
+
+
+ RNA-seq read count analysis
+
+
+
+
+
+
+
+
+ 1.9
+ Identify and remove redundancy from a set of small molecule structures.
+
+
+ Chemical redundancy removal
+
+
+
+
+
+
+
+
+ 1.9
+ Analyze time series data from an RNA-seq experiment.
+
+
+ RNA-seq time series data analysis
+
+
+
+
+
+
+
+
+ 1.9
+ Simulate gene expression data, e.g. for purposes of benchmarking.
+
+
+ Simulated gene expression data generation
+
+
+
+
+
+
+
+
+ 1.12
+ Identify semantic relations among entities and concepts within a text, using text mining techniques.
+ Relation discovery
+ Relation inference
+ Relationship discovery
+ Relationship extraction
+ Relationship inference
+
+
+ Relation extraction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.12
+ Re-adjust the output of mass spectrometry experiments with shifted ppm values.
+
+
+ Mass spectra calibration
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.12
+ Align multiple data sets using information from chromatography and/or peptide identification, from mass spectrometry experiments.
+
+
+ Chromatographic alignment
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.12
+ The removal of isotope peaks in a spectrum, to represent the fragment ion as one data point.
+ Deconvolution
+
+
+ Deisotoping is commonly done to reduce complexity, and done in conjunction with the charge state deconvolution.
+ Deisotoping
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.12
+ Technique for determining the amount of proteins in a sample.
+ Protein quantitation
+
+
+ Protein quantification
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.12
+ Determination of peptide sequence from mass spectrum.
+ Peptide-spectrum-matching
+
+
+ Peptide identification
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.12
+ Calculate the isotope distribution of a given chemical species.
+
+
+ Isotopic distributions calculation
+
+
+
+
+
+
+
+
+ 1.12
+ Prediction of retention time in a mass spectrometry experiment based on compositional and structural properties of the separated species.
+ Retention time calculation
+
+
+ Retention time prediction
+
+
+
+
+
+
+
+
+ 1.12
+ Quantification without the use of chemical tags.
+
+
+ Label-free quantification
+
+
+
+
+
+
+
+
+ 1.12
+ Quantification based on the use of chemical tags.
+
+
+ Labeled quantification
+
+
+
+
+
+
+
+
+ 1.12
+ Quantification by Selected/multiple Reaction Monitoring workflow (XIC quantitation of precursor / fragment mass pair).
+
+
+ MRM/SRM
+
+
+
+
+
+
+
+
+ 1.12
+ Calculate number of identified MS2 spectra as approximation of peptide / protein quantity.
+
+
+ Spectral counting
+
+
+
+
+
+
+
+
+ 1.12
+ Quantification analysis using stable isotope labeling by amino acids in cell culture.
+
+
+ SILAC
+
+
+
+
+
+
+
+
+ 1.12
+ Quantification analysis using the AB SCIEX iTRAQ isobaric labelling workflow, wherein 2-8 reporter ions are measured in MS2 spectra near 114 m/z.
+
+
+ iTRAQ
+
+
+
+
+
+
+
+
+ 1.12
+ Quantification analysis using labeling based on 18O-enriched H2O.
+
+
+ 18O labeling
+
+
+
+
+
+
+
+
+ 1.12
+ Quantification analysis using the Thermo Fisher tandem mass tag labelling workflow.
+
+
+ TMT-tag
+
+
+
+
+
+
+
+
+ 1.12
+ Quantification analysis using chemical labeling by stable isotope dimethylation.
+
+
+ Stable isotope dimethyl labelling
+
+
+
+
+
+
+
+
+ 1.12
+ Peptide sequence tags are used as piece of information about a peptide obtained by tandem mass spectrometry.
+
+
+ Tag-based peptide identification
+
+
+
+
+
+
+
+
+
+ 1.12
+ Analytical process that derives a peptide's amino acid sequence from its tandem mass spectrum (MS/MS) without the assistance of a sequence database.
+
+
+ de Novo sequencing
+
+
+
+
+
+
+
+
+ 1.12
+ Identification of post-translational modifications (PTMs) of peptides/proteins in mass spectrum.
+
+
+ PTM identification
+
+
+
+
+
+
+
+
+
+ 1.12
+ Determination of best matches between MS/MS spectrum and a database of protein or nucleic acid sequences.
+
+
+ Peptide database search
+
+
+
+
+
+
+
+
+ 1.12
+ Peptide database search for identification of known and unknown PTMs looking for mass difference mismatches.
+ Modification-tolerant peptide database search
+ Unrestricted peptide database search
+
+
+ Blind peptide database search
+
+
+
+
+
+
+
+
+ 1.12
+ 1.19
+
+
+ Statistical estimation of false discovery rate from score distribution for peptide-spectrum-matches, following a peptide database search.
+
+
+ Validation of peptide-spectrum matches
+ true
+
+
+
+
+
+
+
+
+
+ 1.12
+ Validation of peptide-spectrum matches
+ Statistical estimation of false discovery rate from score distribution for peptide-spectrum-matches, following a peptide database search, and by comparison to search results with a database containing incorrect information.
+
+
+ Target-Decoy
+
+
+
+
+
+
+
+
+ 1.12
+ Analyse data in order to deduce properties of an underlying distribution or population.
+ Empirical Bayes
+
+
+ Statistical inference
+
+
+
+
+
+
+
+
+
+ 1.12
+ A statistical calculation to estimate the relationships among variables.
+ Regression
+
+
+ Regression analysis
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.12
+ Model a metabolic network. This can include 1) reconstruction to break down a metabolic pathways into reactions, enzymes, and other relevant information, and compilation of this into a mathematical model and 2) simulations of metabolism based on the model.
+
+
+ Metabolic network reconstruction
+ Metabolic network simulation
+ Metabolic pathway simulation
+ Metabolic reconstruction
+
+
+ The terms and synyonyms here reflect that for practical intents and purposes, "pathway" and "network" can be treated the same.
+ Metabolic network modelling
+
+
+
+
+
+
+
+
+
+ 1.12
+ Predict the effect or function of an individual single nucleotide polymorphism (SNP).
+
+
+ SNP annotation
+
+
+
+
+
+
+
+
+ 1.12
+ Prediction of genes or gene components from first principles, i.e. without reference to existing genes.
+ Gene prediction (ab-initio)
+
+
+ Ab-initio gene prediction
+
+
+
+
+
+
+
+
+
+ 1.12
+ Prediction of genes or gene components by reference to homologous genes.
+ Empirical gene finding
+ Empirical gene prediction
+ Evidence-based gene prediction
+ Gene prediction (homology-based)
+ Similarity-based gene prediction
+ Homology prediction
+ Orthology prediction
+
+
+ Homology-based gene prediction
+
+
+
+
+
+
+
+
+
+ 1.12
+ Construction of a statistical model, or a set of assumptions around some observed data, usually by describing a set of probability distributions which approximate the distribution of data.
+
+
+ Statistical modelling
+
+
+
+
+
+
+
+
+
+
+ 1.12
+ Compare two or more molecular surfaces.
+
+
+ Molecular surface comparison
+
+
+
+
+
+
+
+
+ 1.12
+ Annotate one or more sequences with functional information, such as cellular processes or metaobolic pathways, by reference to a controlled vocabulary - invariably the Gene Ontology (GO).
+ Sequence functional annotation
+
+
+ Gene functional annotation
+
+
+
+
+
+
+
+
+ 1.12
+ Variant filtering is used to eliminate false positive variants based for example on base calling quality, strand and position information, and mapping info.
+
+
+ Variant filtering
+
+
+
+
+
+
+
+
+ 1.12
+ Identify binding sites in nucleic acid sequences that are statistically significantly differentially bound between sample groups.
+
+
+ Differential binding analysis
+
+
+
+
+
+
+
+
+
+ 1.13
+ Analyze data from RNA-seq experiments.
+
+
+ RNA-Seq analysis
+
+
+
+
+
+
+
+
+ 1.13
+ Visualise, format or render a mass spectrum.
+
+
+ Mass spectrum visualisation
+
+
+
+
+
+
+
+
+ 1.13
+ Filter a set of files or data items according to some property.
+ Sequence filtering
+ rRNA filtering
+
+
+ Filtering
+
+
+
+
+
+
+
+
+ 1.14
+ Identification of the best reference for mapping for a specific dataset from a list of potential references, when performing genetic variation analysis.
+
+
+ Reference identification
+
+
+
+
+
+
+
+
+ 1.14
+ Label-free quantification by integration of ion current (ion counting).
+ Ion current integration
+
+
+ Ion counting
+
+
+
+
+
+
+
+
+ 1.14
+ Chemical tagging free amino groups of intact proteins with stable isotopes.
+ ICPL
+
+
+ Isotope-coded protein label
+
+
+
+
+
+
+
+
+ 1.14
+ Labeling all proteins and (possibly) all amino acids using C-13 or N-15 enriched grown medium or feed.
+ C-13 metabolic labeling
+ N-15 metabolic labeling
+
+
+ This includes N-15 metabolic labeling (labeling all proteins and (possibly) all amino acids using N-15 enriched grown medium or feed) and C-13 metabolic labeling (labeling all proteins and (possibly) all amino acids using C-13 enriched grown medium or feed).
+ Metabolic labeling
+
+
+
+
+
+
+
+
+ 1.15
+ Construction of a single sequence assembly of all reads from different samples, typically as part of a comparative metagenomic analysis.
+ Sequence assembly (cross-assembly)
+
+
+ Cross-assembly
+
+
+
+
+
+
+
+
+ 1.15
+ The comparison of samples from a metagenomics study, for example, by comparison of metagenome shotgun reads or assembled contig sequences, by comparison of functional profiles, or some other method.
+
+
+ Sample comparison
+
+
+
+
+
+
+
+
+
+ 1.15
+ Differential protein analysis
+ The analysis, using proteomics techniques, to identify proteins whose encoding genes are differentially expressed under a given experimental setup.
+ Differential protein expression analysis
+
+
+ Differential protein expression profiling
+
+
+
+
+
+
+
+
+ 1.15
+ 1.17
+
+ The analysis, using any of diverse techniques, to identify genes that are differentially expressed under a given experimental setup.
+
+
+ Differential gene expression analysis
+ true
+
+
+
+
+
+
+
+
+ 1.15
+ Visualise, format or render data arising from an analysis of multiple samples from a metagenomics/community experiment.
+
+
+ Multiple sample visualisation
+
+
+
+
+
+
+
+
+ 1.15
+ The extrapolation of empirical characteristics of individuals or populations, backwards in time, to their common ancestors.
+ Ancestral sequence reconstruction
+ Character mapping
+ Character optimisation
+
+
+ Ancestral reconstruction is often used to recover possible ancestral character states of ancient, extinct organisms.
+ Ancestral reconstruction
+
+
+
+
+
+
+
+
+ 1.16
+ Site localisation of post-translational modifications in peptide or protein mass spectra.
+ PTM scoring
+ Site localisation
+
+
+ PTM localisation
+
+
+
+
+
+
+
+
+ 1.16
+ Operations concerning the handling and use of other tools.
+ Endpoint management
+
+
+ Service management
+
+
+
+
+
+
+
+
+ 1.16
+ An operation supporting the browsing or discovery of other tools and services.
+
+
+ Service discovery
+
+
+
+
+
+
+
+
+ 1.16
+ An operation supporting the aggregation of other services (at least two) into a functional unit, for the automation of some task.
+
+
+ Service composition
+
+
+
+
+
+
+
+
+ 1.16
+ An operation supporting the calling (invocation) of other tools and services.
+
+
+ Service invocation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.16
+ A data mining method typically used for studying biological networks based on pairwise correlations between variables.
+ WGCNA
+ Weighted gene co-expression network analysis
+
+
+ Weighted correlation network analysis
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.16
+ Identification of protein, for example from one or more peptide identifications by tandem mass spectrometry.
+ Protein inference
+
+
+ Protein identification
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.16
+ Text annotation is the operation of adding notes, data and metadata, recognised entities and concepts, and their relations to a text (such as a scientific article).
+ Article annotation
+ Literature annotation
+
+
+ Text annotation
+
+
+
+
+
+
+
+
+
+ 1.17
+ A method whereby data on several variants are "collapsed" into a single covariate based on regions such as genes.
+
+
+ Genome-wide association studies (GWAS) analyse a genome-wide set of genetic variants in different individuals to see if any variant is associated with a trait. Traditional association techniques can lack the power to detect the significance of rare variants individually, or measure their compound effect (rare variant burden). "Collapsing methods" were developed to overcome these problems.
+ Collapsing methods
+
+
+
+
+
+
+
+
+ 1.17
+ miRNA analysis
+ The analysis of microRNAs (miRNAs) : short, highly conserved small noncoding RNA molecules that are naturally occurring plant and animal genomes.
+ miRNA expression profiling
+
+
+ miRNA expression analysis
+
+
+
+
+
+
+
+
+ 1.17
+ Counting and summarising the number of short sequence reads that map to genomic features.
+
+
+ Read summarisation
+
+
+
+
+
+
+
+
+ 1.17
+ A technique whereby molecules with desired properties and function are isolated from libraries of random molecules, through iterative cycles of selection, amplification, and mutagenesis.
+
+
+ In vitro selection
+
+
+
+
+
+
+
+
+ 1.17
+ The calculation of species richness for a number of individual samples, based on plots of the number of species as a function of the number of samples (rarefaction curves).
+ Species richness assessment
+
+
+ Rarefaction
+
+
+
+
+
+
+
+
+
+ 1.17
+ An operation which groups reads or contigs and assigns them to operational taxonomic units.
+ Binning
+ Binning shotgun reads
+
+
+ Binning methods use one or a combination of compositional features or sequence similarity.
+ Read binning
+
+
+
+
+
+
+
+
+
+ 1.17
+ true
+ Counting and measuring experimentally determined observations into quantities.
+ Quantitation
+
+
+ Quantification
+
+
+
+
+
+
+
+
+ 1.17
+ Quantification of data arising from RNA-Seq high-throughput sequencing, typically the quantification of transcript abundances durnig transcriptome analysis in a gene expression study.
+ RNA-Seq quantitation
+
+
+ RNA-Seq quantification
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.17
+ Match experimentally measured mass spectrum to a spectrum in a spectral library or database.
+
+
+ Spectral library search
+
+
+
+
+
+
+
+
+ 1.17
+ Sort a set of files or data items according to some property.
+
+
+ Sorting
+
+
+
+
+
+
+
+
+ 1.17
+ Mass spectra identification of compounds that are produced by living systems. Including polyketides, terpenoids, phenylpropanoids, alkaloids and antibiotics.
+ De novo metabolite identification
+ Fragmenation tree generation
+ Metabolite identification
+
+
+ Natural product identification
+
+
+
+
+
+
+
+
+ 1.19
+ Identify and assess specific genes or regulatory regions of interest that are differentially methylated.
+ Differentially-methylated region identification
+
+
+ DMR identification
+
+
+
+
+
+
+
+
+ 1.21
+
+
+ Genotyping of multiple loci, typically characterizing microbial species isolates using internal fragments of multiple housekeeping genes.
+ MLST
+
+
+ Multilocus sequence typing
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.21
+ Calculate a theoretical mass spectrometry spectra for given sequences.
+ Spectrum prediction
+
+
+ Spectrum calculation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.22
+ 3D visualization of a molecular trajectory.
+
+
+ Trajectory visualization
+
+
+
+
+
+
+
+
+
+ 1.22
+ Compute Essential Dynamics (ED) on a simulation trajectory: an analysis of molecule dynamics using PCA (Principal Component Analysis) applied to the atomic positional fluctuations.
+ ED
+ PCA
+ Principal modes
+
+
+ Principal Component Analysis (PCA) is a multivariate statistical analysis to obtain collective variables and reduce the dimensionality of the system.
+ Essential dynamics
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.22
+ Obtain force field parameters (charge, bonds, dihedrals, etc.) from a molecule, to be used in molecular simulations.
+ Ligand parameterization
+ Molecule parameterization
+
+
+ Forcefield parameterisation
+
+
+
+
+
+
+
+
+ 1.22
+ Analyse DNA sequences in order to determine an individual's DNA characteristics, for example in criminal forensics, parentage testing and so on.
+ DNA fingerprinting
+ DNA profiling
+
+
+
+
+
+
+
+
+
+ 1.22
+ Predict or detect active sites in proteins; the region of an enzyme which binds a substrate bind and catalyses a reaction.
+ Active site detection
+
+
+ Active site prediction
+
+
+
+
+
+
+
+
+
+ 1.22
+ Predict or detect ligand-binding sites in proteins; a region of a protein which reversibly binds a ligand for some biochemical purpose, such as transport or regulation of protein function.
+ Ligand-binding site detection
+ Peptide-protein binding prediction
+
+
+ Ligand-binding site prediction
+
+
+
+
+
+
+
+
+
+ 1.22
+ Predict or detect metal ion-binding sites in proteins.
+ Metal-binding site detection
+ Protein metal-binding site prediction
+
+
+ Metal-binding site prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.22
+ Model or simulate protein-protein binding using comparative modelling or other techniques.
+ Protein docking
+
+
+ Protein-protein docking
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.22
+ Predict DNA-binding proteins.
+ DNA-binding protein detection
+ DNA-protein interaction prediction
+ Protein-DNA interaction prediction
+
+
+ DNA-binding protein prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.22
+ Predict RNA-binding proteins.
+ Protein-RNA interaction prediction
+ RNA-binding protein detection
+ RNA-protein interaction prediction
+
+
+ RNA-binding protein prediction
+
+
+
+
+
+
+
+
+ 1.22
+ Predict or detect RNA-binding sites in protein sequences.
+ Protein-RNA binding site detection
+ Protein-RNA binding site prediction
+ RNA binding site detection
+
+
+ RNA binding site prediction
+
+
+
+
+
+
+
+
+ 1.22
+ Predict or detect DNA-binding sites in protein sequences.
+ Protein-DNA binding site detection
+ Protein-DNA binding site prediction
+ DNA binding site detection
+
+
+ DNA binding site prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.22
+ Identify or predict intrinsically disordered regions in proteins.
+
+
+ Protein disorder prediction
+
+
+
+
+
+
+
+
+
+ 1.22
+ Extract structured information from unstructured ("free") or semi-structured textual documents.
+ IE
+
+
+ Information extraction
+
+
+
+
+
+
+
+
+
+ 1.22
+ Retrieve resources from information systems matching a specific information need.
+
+
+ Information retrieval
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.24
+ Study of genomic feature structure, variation, function and evolution at a genomic scale.
+ Genomic analysis
+ Genome analysis
+
+
+
+
+
+
+
+
+ 1.24
+ The determination of cytosine methylation status of specific positions in a nucleic acid sequences (usually reads from a bisulfite sequencing experiment).
+
+
+ Methylation calling
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.24
+ The identification of changes in DNA sequence or chromosome structure, usually in the context of diagnostic tests for disease, or to study ancestry or phylogeny.
+ Genetic testing
+
+
+ This can include indirect methods which reveal the results of genetic changes, such as RNA analysis to indicate gene expression, or biochemical analysis to identify expressed proteins.
+ DNA testing
+
+
+
+
+
+
+
+
+
+ 1.24
+ The processing of reads from high-throughput sequencing machines.
+
+
+ Sequence read processing
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.24
+ Render (visualise) a network - typically a biological network of some sort.
+ Network rendering
+ Protein interaction network rendering
+ Protein interaction network visualisation
+ Network visualisation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.24
+ Render (visualise) a biological pathway.
+ Pathway rendering
+
+
+ Pathway visualisation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.24
+ Generate, process or analyse a biological network.
+ Biological network analysis
+ Biological network modelling
+ Biological network prediction
+ Network comparison
+ Network modelling
+ Network prediction
+ Network simulation
+ Network topology simulation
+
+
+ Network analysis
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.24
+ Generate, process or analyse a biological pathway.
+ Biological pathway analysis
+ Biological pathway modelling
+ Biological pathway prediction
+ Functional pathway analysis
+ Pathway comparison
+ Pathway modelling
+ Pathway prediction
+ Pathway simulation
+
+
+ Pathway analysis
+
+
+
+
+
+
+
+
+
+
+ 1.24
+ Predict a metabolic pathway.
+
+
+ Metabolic pathway prediction
+
+
+
+
+
+
+
+
+ 1.24
+ Assigning sequence reads to separate groups / files based on their index tag (sample origin).
+ Sequence demultiplexing
+
+
+ NGS sequence runs are often performed with multiple samples pooled together. In such cases, an index tag (or "barcode") - a unique sequence of between 6 and 12bp - is ligated to each sample's genetic material so that the sequence reads from different samples can be identified. The process of demultiplexing (dividing sequence reads into separate files for each index tag/sample) may be performed automatically by the sequencing hardware. Alternatively the reads may be lumped together in one file with barcodes still attached, requiring you to do the splitting using software. In such cases, a "mapping" file is used which indicates which barcodes correspond to which samples.
+ Demultiplexing
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.24
+ A process used in statistics, machine learning, and information theory that reduces the number of random variables by obtaining a set of principal variables.
+ Dimension reduction
+
+
+ Dimensionality reduction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.24
+ A dimensionality reduction process that selects a subset of relevant features (variables, predictors) for use in model construction.
+ Attribute selection
+ Variable selection
+ Variable subset selection
+
+
+ Feature selection
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.24
+ A dimensionality reduction process which builds (ideally) informative and non-redundant values (features) from an initial set of measured data, to aid subsequent generalization, learning or interpretation.
+ Feature projection
+
+
+ Feature extraction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.24
+ Virtual screening is used in drug discovery to identify potential drug compounds. It involves searching libraries of small molecules in order to identify those molecules which are most likely to bind to a drug target (typically a protein receptor or enzyme).
+ Ligand-based screening
+ Ligand-based virtual screening
+ Structure-based screening
+ Structured-based virtual screening
+ Virtual ligand screening
+
+
+ Virtual screening is widely used for lead identification, lead optimization, and scaffold hopping during drug design and discovery.
+ Virtual screening
+
+
+
+
+
+
+
+
+ 1.24
+ The application of phylogenetic and other methods to estimate paleogeographical events such as speciation.
+ Biogeographic dating
+ Speciation dating
+ Species tree dating
+ Tree-dating
+
+
+ Tree dating
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.24
+ The development and use of mathematical models and systems analysis for the description of ecological processes, and applications such as the sustainable management of resources.
+
+
+ Ecological modelling
+
+
+
+
+
+
+
+
+ 1.24
+ Mapping between gene tree nodes and species tree nodes or branches, to analyse and account for possible differences between gene histories and species histories, explaining this in terms of gene-scale events such as duplication, loss, transfer etc.
+ Gene tree / species tree reconciliation
+
+
+ Methods typically test for topological similarity between trees using for example a congruence index.
+ Phylogenetic tree reconciliation
+
+
+
+
+
+
+
+
+ 1.24
+ The detection of genetic selection, or (the end result of) the process by which certain traits become more prevalent in a species than other traits.
+
+
+ Selection detection
+
+
+
+
+
+
+
+
+ 1.25
+ A statistical procedure that uses an orthogonal transformation to convert a set of observations of possibly correlated variables into a set of values of linearly uncorrelated variables called principal components.
+
+
+ Principal component analysis
+
+
+
+
+
+
+
+
+
+ 1.25
+ Identify where sections of the genome are repeated and the number of repeats in the genome varies between individuals.
+ CNV detection
+
+
+ Copy number variation detection
+
+
+
+
+
+
+
+
+ 1.25
+ Identify deletion events causing the number of repeats in the genome to vary between individuals.
+
+
+ Deletion detection
+
+
+
+
+
+
+
+
+ 1.25
+ Identify duplication events causing the number of repeats in the genome to vary between individuals.
+
+
+ Duplication detection
+
+
+
+
+
+
+
+
+ 1.25
+ Identify copy number variations which are complex, e.g. multi-allelic variations that have many structural alleles and have rearranged multiple times in the ancestral genomes.
+
+
+ Complex CNV detection
+
+
+
+
+
+
+
+
+ 1.25
+ Identify amplification events causing the number of repeats in the genome to vary between individuals.
+
+
+ Amplification detection
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.25
+ Predict adhesins in protein sequences.
+
+
+ An adhesin is a cell-surface component that facilitate the adherence of a microorganism to a cell or surface. They are important virulence factors during establishment of infection and thus are targeted during vaccine development approaches that seek to block adhesin function and prevent adherence to host cell.
+ Adhesin prediction
+
+
+
+
+
+
+
+
+ 1.25
+ Design new protein molecules with specific structural or functional properties.
+ Protein redesign
+ Rational protein design
+ de novo protein design
+
+
+ Protein design
+
+
+
+
+
+
+
+
+
+ 1.25
+ The design of small molecules with specific biological activity, such as inhibitors or modulators for proteins that are of therapeutic interest. This can involve the modification of individual atoms, the addition or removal of molecular fragments, and the use reaction-based design to explore tractable synthesis options for the small molecule.
+ Drug design
+ Ligand-based drug design
+ Structure-based drug design
+ Structure-based small molecule design
+ Small molecule design can involve assessment of target druggability and flexibility, molecular docking, in silico fragment screening, molecular dynamics, and homology modeling.
+ There are two broad categories of small molecule design techniques when applied to the design of drugs: ligand-based drug design (e.g. ligand similarity) and structure-based drug design (ligand docking) methods. Ligand similarity methods exploit structural similarities to known active ligands, whereas ligand docking methods use the 3D structure of a target protein to predict the binding modes and affinities of ligands to it.
+ Small molecule design
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.26
+ The estimation of the power of a test; that is the probability of correctly rejecting the null hypothesis when it is false.
+ Estimation of statistical power
+ Power analysis
+
+
+ Power test
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.26
+ The prediction of DNA modifications (e.g. N4-methylcytosine and N6-Methyladenine) using, for example, statistical models.
+
+
+ DNA modification prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.26
+ The analysis and simulation of disease transmission using, for example, statistical methods such as the SIR-model.
+
+
+ Disease transmission analysis
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.26
+ The correction of p-values from multiple statistical tests to correct for false positives.
+ FDR estimation
+ False discovery rate estimation
+
+
+ Multiple testing correction
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ A category denoting a rather broad domain or field of interest, of study, application, work, data, or technology. Topics have no clearly defined borders between each other.
+ sumo:FieldOfStudy
+
+
+ Topic
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ The processing and analysis of nucleic acid sequence, structural and other data.
+ Nucleic acid bioinformatics
+ Nucleic acid informatics
+ Nucleic_acids
+ Nucleic acid physicochemistry
+ Nucleic acid properties
+
+
+ Nucleic acids
+
+ http://purl.bioontology.org/ontology/MSH/D017422
+ http://purl.bioontology.org/ontology/MSH/D017423
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Archival, processing and analysis of protein data, typically molecular sequence and structural data.
+ Protein bioinformatics
+ Protein informatics
+ Proteins
+ Protein databases
+
+
+ Proteins
+
+ http://purl.bioontology.org/ontology/MSH/D020539
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ The structures of reactants or products of metabolism, for example small molecules such as including vitamins, polyols, nucleotides and amino acids.
+
+
+ Metabolites
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ The archival, processing and analysis of molecular sequences (monomer composition of polymers) including molecular sequence data resources, sequence sites, alignments, motifs and profiles.
+ Sequence_analysis
+ Biological sequences
+ Sequence databases
+
+
+
+ Sequence analysis
+
+ http://purl.bioontology.org/ontology/MSH/D017421
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ The curation, processing, analysis and prediction of data about the structure of biological molecules, typically proteins and nucleic acids and other macromolecules.
+ Biomolecular structure
+ Structural bioinformatics
+ Structure_analysis
+ Computational structural biology
+ Molecular structure
+ Structure data resources
+ Structure databases
+ Structures
+
+
+
+ This includes related concepts such as structural properties, alignments and structural motifs.
+ Structure analysis
+
+ http://purl.bioontology.org/ontology/MSH/D015394
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ The prediction of molecular structure, including the prediction, modelling, recognition or design of protein secondary or tertiary structure or other structural features, and the folding of nucleic acid molecules and the prediction or design of nucleic acid (typically RNA) sequences with specific conformations.
+ Structure_prediction
+ DNA structure prediction
+ Nucleic acid design
+ Nucleic acid folding
+ Nucleic acid structure prediction
+ Protein fold recognition
+ Protein structure prediction
+ RNA structure prediction
+
+
+ This includes the recognition (prediction and assignment) of known protein structural domains or folds in protein sequence(s), for example by threading, or the alignment of molecular sequences to structures, structural (3D) profiles or templates (representing a structure or structure alignment).
+ Structure prediction
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+
+ The alignment (equivalence between sites) of molecular sequences, structures or profiles (representing a sequence or structure alignment).
+
+ Alignment
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ The study of evolutionary relationships amongst organisms.
+ Phylogeny
+ Phylogenetic clocks
+ Phylogenetic dating
+ Phylogenetic simulation
+ Phylogenetic stratigraphy
+ Phylogeny reconstruction
+
+
+
+ This includes diverse phylogenetic methods, including phylogenetic tree construction, typically from molecular sequence or morphological data, methods that simulate DNA sequence evolution, a phylogenetic tree or the underlying data, or which estimate or use molecular clock and stratigraphic (age) data, methods for studying gene evolution etc.
+ Phylogeny
+
+ http://purl.bioontology.org/ontology/MSH/D010802
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ The study of gene or protein functions and their interactions in totality in a given organism, tissue, cell etc.
+ Functional_genomics
+
+
+
+ Functional genomics
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ The conceptualisation, categorisation and nomenclature (naming) of entities or phenomena within biology or bioinformatics. This includes formal ontologies, controlled vocabularies, structured glossary, symbols and terminology or other related resource.
+ Ontology_and_terminology
+ Applied ontology
+ Ontologies
+ Ontology
+ Ontology relations
+ Terminology
+ Upper ontology
+
+
+
+ Ontology and terminology
+
+ http://purl.bioontology.org/ontology/MSH/D002965
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+
+
+ The search and query of data sources (typically databases or ontologies) in order to retrieve entries or other information.
+
+ Information retrieval
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ VT 1.5.6 Bioinformatics
+ The archival, curation, processing and analysis of complex biological data.
+ Bioinformatics
+
+
+
+ This includes data processing in general, including basic handling of files and databases, datatypes, workflows and annotation.
+ Bioinformatics
+
+ http://purl.bioontology.org/ontology/MSH/D016247
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Computer graphics
+ VT 1.2.5 Computer graphics
+ Rendering (drawing on a computer screen) or visualisation of molecular sequences, structures or other biomolecular data.
+ Data rendering
+ Data_visualisation
+
+
+ Data visualisation
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ The study of the thermodynamic properties of a nucleic acid.
+
+ Nucleic acid thermodynamics
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The archival, curation, processing and analysis of nucleic acid structural information, such as whole structures, structural features and alignments, and associated annotation.
+ Nucleic acid structure
+ Nucleic_acid_structure_analysis
+ DNA melting
+ DNA structure
+ Nucleic acid denaturation
+ Nucleic acid thermodynamics
+ RNA alignment
+ RNA structure
+ RNA structure alignment
+
+
+ Includes secondary and tertiary nucleic acid structural data, nucleic acid thermodynamic, thermal and conformational properties including DNA or DNA/RNA denaturation (melting) etc.
+ Nucleic acid structure analysis
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ RNA sequences and structures.
+ RNA
+ Small RNA
+
+
+ RNA
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Topic for the study of restriction enzymes, their cleavage sites and the restriction of nucleic acids.
+
+ Nucleic acid restriction
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ The mapping of complete (typically nucleotide) sequences. Mapping (in the sense of short read alignment, or more generally, just alignment) has application in RNA-Seq analysis (mapping of transcriptomics reads), variant discovery (e.g. mapping of exome capture), and re-sequencing (mapping of WGS reads).
+ Mapping
+ Genetic linkage
+ Linkage
+ Linkage mapping
+ Synteny
+
+
+ This includes resources that aim to identify, map or analyse genetic markers in DNA sequences, for example to produce a genetic (linkage) map of a chromosome or genome or to analyse genetic linkage and synteny. It also includes resources for physical (sequence) maps of a DNA sequence showing the physical distance (base pairs) between features or landmarks such as restriction sites, cloned DNA fragments, genes and other genetic markers. It also covers for example the alignment of sequences of (typically millions) of short reads to a reference genome.
+ Mapping
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ The study of codon usage in nucleotide sequence(s), genetic codes and so on.
+
+ Genetic codes and codon usage
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The translation of mRNA into protein and subsequent protein processing in the cell.
+ Protein_expression
+ Translation
+
+
+
+ Protein expression
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Methods that aims to identify, predict, model or analyse genes or gene structure in DNA sequences.
+
+ This includes the study of promoters, coding regions, splice sites, etc. Methods for gene prediction might be ab initio, based on phylogenetic comparisons, use motifs, sequence features, support vector machine, alignment etc.
+ Gene finding
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ The transcription of DNA into mRNA.
+
+ Transcription
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Promoters in DNA sequences (region of DNA that facilitates the transcription of a particular gene by binding RNA polymerase and transcription factor proteins).
+
+ Promoters
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+ The folding (in 3D space) of nucleic acid molecules.
+
+
+ Nucleic acid folding
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Gene structure, regions which make an RNA product and features such as promoters, coding regions, gene fusion, splice sites etc.
+ Gene features
+ Gene_structure
+ Fusion genes
+
+
+ This includes operons (operators, promoters and genes) from a bacterial genome. For example the operon leader and trailer gene, gene composition of the operon and associated information.
+ This includes the study of promoters, coding regions etc.
+ Gene structure
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Protein and peptide identification, especially in the study of whole proteomes of organisms.
+ Proteomics
+ Bottom-up proteomics
+ Discovery proteomics
+ MS-based targeted proteomics
+ MS-based untargeted proteomics
+ Metaproteomics
+ Peptide identification
+ Protein and peptide identification
+ Quantitative proteomics
+ Targeted proteomics
+ Top-down proteomics
+
+
+
+ Includes metaproteomics: proteomics analysis of an environmental sample.
+ Proteomics includes any methods (especially high-throughput) that separate, characterize and identify expressed proteins such as mass spectrometry, two-dimensional gel electrophoresis and protein microarrays, as well as in-silico methods that perform proteolytic or mass calculations on a protein sequence and other analyses of protein production data, for example in different cells or tissues.
+ Proteomics
+
+ http://purl.bioontology.org/ontology/MSH/D040901
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ The elucidation of the three dimensional structure for all (available) proteins in a given organism.
+ Structural_genomics
+
+
+
+ Structural genomics
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ The study of the physical and biochemical properties of peptides and proteins, for example the hydrophobic, hydrophilic and charge properties of a protein.
+ Protein physicochemistry
+ Protein_properties
+ Protein hydropathy
+
+
+ Protein properties
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Protein-protein, protein-DNA/RNA and protein-ligand interactions, including analysis of known interactions and prediction of putative interactions.
+ Protein_interactions
+ Protein interaction map
+ Protein interaction networks
+ Protein interactome
+ Protein-DNA interaction
+ Protein-DNA interactions
+ Protein-RNA interaction
+ Protein-RNA interactions
+ Protein-ligand interactions
+ Protein-nucleic acid interactions
+ Protein-protein interactions
+
+
+ This includes experimental (e.g. yeast two-hybrid) and computational analysis techniques.
+ Protein interactions
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Protein stability, folding (in 3D space) and protein sequence-structure-function relationships. This includes for example study of inter-atomic or inter-residue interactions in protein (3D) structures, the effect of mutation, and the design of proteins with specific properties, typically by designing changes (via site-directed mutagenesis) to an existing protein.
+ Protein_folding_stability_and_design
+ Protein design
+ Protein folding
+ Protein residue interactions
+ Protein stability
+ Rational protein design
+
+
+ Protein folding, stability and design
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Two-dimensional gel electrophoresis image and related data.
+
+ Two-dimensional gel electrophoresis
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ An analytical chemistry technique that measures the mass-to-charge ratio and abundance of ions in the gas phase.
+
+
+ Mass spectrometry
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Protein microarray data.
+
+ Protein microarrays
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ The study of the hydrophobic, hydrophilic and charge properties of a protein.
+
+ Protein hydropathy
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The study of how proteins are transported within and without the cell, including signal peptides, protein subcellular localisation and export.
+ Protein_targeting_and_localisation
+ Protein localisation
+ Protein sorting
+ Protein targeting
+
+
+ Protein targeting and localisation
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Enzyme or chemical cleavage sites and proteolytic or mass calculations on a protein sequence.
+
+ Protein cleavage sites and proteolysis
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+ The comparison of two or more protein structures.
+
+
+ Use this concept for methods that are exclusively for protein structure.
+ Protein structure comparison
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ The processing and analysis of inter-atomic or inter-residue interactions in protein (3D) structures.
+
+ Protein residue interactions
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Protein-protein interactions, individual interactions and networks, protein complexes, protein functional coupling etc.
+
+ Protein-protein interactions
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Protein-ligand (small molecule) interactions.
+
+ Protein-ligand interactions
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Protein-DNA/RNA interactions.
+
+ Protein-nucleic acid interactions
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ The design of proteins with specific properties, typically by designing changes (via site-directed mutagenesis) to an existing protein.
+
+ Protein design
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ G-protein coupled receptors (GPCRs).
+
+ G protein-coupled receptors (GPCR)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Carbohydrates, typically including structural information.
+ Carbohydrates
+
+
+ Carbohydrates
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Lipids and their structures.
+ Lipidomics
+ Lipids
+
+
+ Lipids
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Small molecules of biological significance, typically archival, curation, processing and analysis of structural information.
+ Small_molecules
+ Amino acids
+ Chemical structures
+ Drug structures
+ Drug targets
+ Drugs and target structures
+ Metabolite structures
+ Peptides
+ Peptides and amino acids
+ Target structures
+ Targets
+ Toxins
+ Toxins and targets
+ CHEBI:23367
+
+
+ Small molecules include organic molecules, metal-organic compounds, small polypeptides, small polysaccharides and oligonucleotides. Structural data is usually included.
+ This concept excludes macromolecules such as proteins and nucleic acids.
+ This includes the structures of drugs, drug target, their interactions and binding affinities. Also the structures of reactants or products of metabolism, for example small molecules such as including vitamins, polyols, nucleotides and amino acids. Also the physicochemical, biochemical or structural properties of amino acids or peptides. Also structural and associated data for toxic chemical substances.
+ Small molecules
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+
+ Edit, convert or otherwise change a molecular sequence, either randomly or specifically.
+
+ Sequence editing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ The archival, processing and analysis of the basic character composition of molecular sequences, for example character or word frequency, ambiguity, complexity, particularly regions of low complexity, and repeats or the repetitive nature of molecular sequences.
+ Sequence_composition_complexity_and_repeats
+ Low complexity sequences
+ Nucleic acid repeats
+ Protein repeats
+ Protein sequence repeats
+ Repeat sequences
+ Sequence complexity
+ Sequence composition
+ Sequence repeats
+
+
+ This includes repetitive elements within a nucleic acid sequence, e.g. long terminal repeats (LTRs); sequences (typically retroviral) directly repeated at both ends of a sequence and other types of repeating unit.
+ This includes short repetitive subsequences (repeat sequences) in a protein sequence.
+ Sequence composition, complexity and repeats
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Conserved patterns (motifs) in molecular sequences, that (typically) describe functional or other key sites.
+
+ Sequence motifs
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ The comparison of two or more molecular sequences, for example sequence alignment and clustering.
+
+
+ The comparison might be on the basis of sequence, physico-chemical or some other properties of the sequences.
+ Sequence comparison
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ The archival, detection, prediction and analysis of positional features such as functional and other key sites, in molecular sequences and the conserved patterns (motifs, profiles etc.) that may be used to describe them.
+ Sequence_sites_features_and_motifs
+ Functional sites
+ HMMs
+ Sequence features
+ Sequence motifs
+ Sequence profiles
+ Sequence sites
+
+
+ Sequence sites, features and motifs
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Search and retrieve molecular sequences that are similar to a sequence-based query (typically a simple sequence).
+
+ The query is a sequence-based entity such as another sequence, a motif or profile.
+ Sequence database search
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.7
+
+ The comparison and grouping together of molecular sequences on the basis of their similarities.
+
+
+ This includes systems that generate, process and analyse sequence clusters.
+ Sequence clustering
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Structural features or common 3D motifs within protein structures, including the surface of a protein structure, such as biological interfaces with other molecules.
+ Protein 3D motifs
+ Protein_structural_motifs_and_surfaces
+ Protein structural features
+ Protein structural motifs
+ Protein surfaces
+ Structural motifs
+
+
+ This includes conformation of conserved substructures, conserved geometry (spatial arrangement) of secondary structure or protein backbone, solvent-exposed surfaces, internal cavities, the analysis of shape, hydropathy, electrostatic patches, role and functions etc.
+ Protein structural motifs and surfaces
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ The processing, analysis or use of some type of structural (3D) profile or template; a computational entity (typically a numerical matrix) that is derived from and represents a structure or structure alignment.
+
+ Structural (3D) profiles
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ The prediction, modelling, recognition or design of protein secondary or tertiary structure or other structural features.
+
+
+ Protein structure prediction
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ The folding of nucleic acid molecules and the prediction or design of nucleic acid (typically RNA) sequences with specific conformations.
+
+
+ Nucleic acid structure prediction
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.7
+
+ The prediction of three-dimensional structure of a (typically protein) sequence from first principles, using a physics-based or empirical scoring function and without using explicit structural templates.
+
+
+ Ab initio structure prediction
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ The modelling of the three-dimensional structure of a protein using known sequence and structural data.
+
+ Homology modelling
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Molecular flexibility
+ Molecular motions
+ The study and simulation of molecular (typically protein) conformation using a computational model of physical forces and computer simulation.
+ Molecular_dynamics
+ Protein dynamics
+
+
+ This includes methods such as Molecular Dynamics, Coarse-grained dynamics, metadynamics, Quantum Mechanics, QM/MM, Markov State Models, etc. This includes resources concerning flexibility and motion in protein and other molecular structures.
+ Molecular dynamics
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ 1.12
+
+ The modelling the structure of proteins in complex with small molecules or other macromolecules.
+
+
+ Molecular docking
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+ The prediction of secondary or supersecondary structure of protein sequences.
+
+
+ Protein secondary structure prediction
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+ The prediction of tertiary structure of protein sequences.
+
+
+ Protein tertiary structure prediction
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ The recognition (prediction and assignment) of known protein structural domains or folds in protein sequence(s).
+
+
+ Protein fold recognition
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.7
+
+ The alignment of molecular sequences or sequence profiles (representing sequence alignments).
+
+
+ This includes the generation of alignments (the identification of equivalent sites), the analysis of alignments, editing, visualisation, alignment databases, the alignment (equivalence between sites) of sequence profiles (representing sequence alignments) and so on.
+ Sequence alignment
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.7
+
+ The superimposition of molecular tertiary structures or structural (3D) profiles (representing a structure or structure alignment).
+
+
+ This includes the generation, storage, analysis, rendering etc. of structure alignments.
+ Structure alignment
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+ The alignment of molecular sequences to structures, structural (3D) profiles or templates (representing a structure or structure alignment).
+
+
+ Threading
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Sequence profiles; typically a positional, numerical matrix representing a sequence alignment.
+
+ Sequence profiles include position-specific scoring matrix (position weight matrix), hidden Markov models etc.
+ Sequence profiles and HMMs
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ The reconstruction of a phylogeny (evolutionary relatedness amongst organisms), for example, by building a phylogenetic tree.
+
+ Currently too specific for the topic sub-ontology (but might be unobsoleted).
+ Phylogeny reconstruction
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ The integrated study of evolutionary relationships and whole genome data, for example, in the analysis of species trees, horizontal gene transfer and evolutionary reconstruction.
+ Phylogenomics
+
+
+
+ Phylogenomics
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Simulated polymerase chain reaction (PCR).
+
+ Virtual PCR
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ The assembly of fragments of a DNA sequence to reconstruct the original sequence.
+ Sequence_assembly
+ Assembly
+
+
+ Assembly has two broad types, de-novo and re-sequencing. Re-sequencing is a specialised case of assembly, where an assembled (typically de-novo assembled) reference genome is available and is about 95% identical to the re-sequenced genome. All other cases of assembly are 'de-novo'.
+ Sequence assembly
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Stable, naturally occurring mutations in a nucleotide sequence including alleles, naturally occurring mutations such as single base nucleotide substitutions, deletions and insertions, RFLPs and other polymorphisms.
+ DNA variation
+ Genetic_variation
+ Genomic variation
+ Mutation
+ Polymorphism
+ Somatic mutations
+
+
+ Genetic variation
+
+ http://purl.bioontology.org/ontology/MSH/D014644
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Microarrays, for example, to process microarray data or design probes and experiments.
+
+ Microarrays
+ http://purl.bioontology.org/ontology/MSH/D046228
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ VT 3.1.7 Pharmacology and pharmacy
+ The study of drugs and their effects or responses in living systems.
+ Pharmacology
+ Computational pharmacology
+ Pharmacoinformatics
+
+
+
+ Pharmacology
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ http://edamontology.org/topic_0197
+ The analysis of levels and patterns of synthesis of gene products (proteins and functional RNA) including interpretation in functional terms of gene expression data.
+ Expression
+ Gene_expression
+ Codon usage
+ DNA chips
+ DNA microarrays
+ Gene expression profiling
+ Gene transcription
+ Gene translation
+ Transcription
+
+
+
+ Gene expression levels are analysed by identifying, quantifying or comparing mRNA transcripts, for example using microarrays, RNA-seq, northern blots, gene-indexed expression profiles etc.
+ This includes the study of codon usage in nucleotide sequence(s), genetic codes and so on.
+ Gene expression
+
+ http://purl.bioontology.org/ontology/MSH/D015870
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ The regulation of gene expression.
+ Regulatory genomics
+
+
+ Gene regulation
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ The influence of genotype on drug response, for example by correlating gene expression or single-nucleotide polymorphisms with drug efficacy or toxicity.
+ Pharmacogenomics
+ Pharmacogenetics
+
+
+
+ Pharmacogenomics
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ VT 3.1.4 Medicinal chemistry
+ The design and chemical synthesis of bioactive molecules, for example drugs or potential drug compounds, for medicinal purposes.
+ Drug design
+ Medicinal_chemistry
+
+
+
+ This includes methods that search compound collections, generate or analyse drug 3D conformations, identify drug targets with structural docking etc.
+ Medicinal chemistry
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Information on a specific fish genome including molecular sequences, genes and annotation.
+
+ Fish
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Information on a specific fly genome including molecular sequences, genes and annotation.
+
+ Flies
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ (jison)Out of EDAM scope. While very useful to have a basic set of IDs for organisms, should find a better way to provide this e.g. in bio.tools (NCBI taxon ID subset).
+ 1.17
+
+
+ Information on a specific mouse or rat genome including molecular sequences, genes and annotation.
+
+ The resource may be specific to a group of mice / rats or all mice / rats.
+ Mice or rats
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Information on a specific worm genome including molecular sequences, genes and annotation.
+
+ Worms
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+ The processing and analysis of the bioinformatics literature and bibliographic data, such as literature search and query.
+
+
+ Literature analysis
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The processing and analysis of natural language, such as scientific literature in English, in order to extract data and information, or to enable human-computer interaction.
+ NLP
+ Natural_language_processing
+ BioNLP
+ Literature mining
+ Text analytics
+ Text data mining
+ Text mining
+
+
+
+ Natural language processing
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Deposition and curation of database accessions, including annotation, typically with terms from a controlled vocabulary.
+ Data_submission_annotation_and_curation
+ Data curation
+ Data provenance
+ Database curation
+
+
+
+ Data submission, annotation, and curation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ The management and manipulation of digital documents, including database records, files and reports.
+
+
+ Document, record and content management
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Annotation of a molecular sequence.
+
+ Sequence annotation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+
+
+ Annotation of a genome.
+
+ Genome annotation
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Spectroscopy
+ An analytical technique that exploits the magenetic properties of certain atomic nuclei to provide information on the structure, dynamics, reaction state and chemical environment of molecules.
+ NMR spectroscopy
+ Nuclear magnetic resonance spectroscopy
+ NMR
+ HOESY
+ Heteronuclear Overhauser Effect Spectroscopy
+ NOESY
+ Nuclear Overhauser Effect Spectroscopy
+ ROESY
+ Rotational Frame Nuclear Overhauser Effect Spectroscopy
+
+
+
+ NMR
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ The classification of molecular sequences based on some measure of their similarity.
+
+
+ Methods including sequence motifs, profile and other diagnostic elements which (typically) represent conserved patterns (of residues or properties) in molecular sequences.
+ Sequence classification
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ primarily the classification of proteins (from sequence or structural data) into clusters, groups, families etc.
+
+ Protein classification
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Sequence motifs, or sequence profiles derived from an alignment of molecular sequences of a particular type.
+
+ This includes comparison, discovery, recognition etc. of sequence motifs.
+ Sequence motif or profile
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Protein chemical modifications, e.g. post-translational modifications.
+ PTMs
+ Post-translational modifications
+ Protein post-translational modification
+ Protein_modifications
+ Post-translation modifications
+ Protein chemical modifications
+ Protein post-translational modifications
+ GO:0006464
+ MOD:00000
+
+
+ EDAM does not describe all possible protein modifications. For fine-grained annotation of protein modification use the Gene Ontology (children of concept GO:0006464) and/or the Protein Modifications ontology (children of concept MOD:00000)
+ Protein modifications
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ http://edamontology.org/topic_3076
+ Molecular interactions, biological pathways, networks and other models.
+ Molecular_interactions_pathways_and_networks
+ Biological models
+ Biological networks
+ Biological pathways
+ Cellular process pathways
+ Disease pathways
+ Environmental information processing pathways
+ Gene regulatory networks
+ Genetic information processing pathways
+ Interactions
+ Interactome
+ Metabolic pathways
+ Molecular interactions
+ Networks
+ Pathways
+ Signal transduction pathways
+ Signaling pathways
+
+
+
+ Molecular interactions, pathways and networks
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ VT 1.3 Information sciences
+ VT 1.3.3 Information retrieval
+ VT 1.3.4 Information management
+ VT 1.3.5 Knowledge management
+ VT 1.3.99 Other
+ The study and practice of information processing and use of computer information systems.
+ Information management
+ Information science
+ Knowledge management
+ Informatics
+
+
+ Informatics
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+ Data resources for the biological or biomedical literature, either a primary source of literature or some derivative.
+
+
+ Literature data resources
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Laboratory management and resources, for example, catalogues of biological resources for use in the lab including cell lines, viruses, plasmids, phages, DNA probes and primers and so on.
+ Laboratory_Information_management
+ Laboratory resources
+
+
+
+ Laboratory information management
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ General cell culture or data on a specific cell lines.
+
+ Cell and tissue culture
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ VT 1.5.15 Ecology
+ The ecological and environmental sciences and especially the application of information technology (ecoinformatics).
+ Ecology
+ Computational ecology
+ Ecoinformatics
+ Ecological informatics
+ Ecosystem science
+
+
+
+ Ecology
+
+ http://purl.bioontology.org/ontology/MSH/D004777
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Electron diffraction experiment
+ The study of matter by studying the interference pattern from firing electrons at a sample, to analyse structures at resolutions higher than can be achieved using light.
+ Electron_microscopy
+ Electron crystallography
+ SEM
+ Scanning electron microscopy
+ Single particle electron microscopy
+ TEM
+ Transmission electron microscopy
+
+
+
+ Electron microscopy
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ The cell cycle including key genes and proteins.
+
+ Cell cycle
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ The physicochemical, biochemical or structural properties of amino acids or peptides.
+
+
+ Peptides and amino acids
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ A specific organelle, or organelles in general, typically the genes and proteins (or genome and proteome).
+
+ Organelles
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Ribosomes, typically of ribosome-related genes and proteins.
+
+ Ribosomes
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ A database about scents.
+
+ Scents
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ The structures of drugs, drug target, their interactions and binding affinities.
+
+
+ Drugs and target structures
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ A specific organism, or group of organisms, used to study a particular aspect of biology.
+ Organisms
+ Model_organisms
+
+
+
+ This may include information on the genome (including molecular sequences and map, genes and annotation), proteome, as well as more general information about an organism.
+ Model organisms
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Whole genomes of one or more organisms, or genomes in general, such as meta-information on genomes, genome projects, gene names etc.
+ Genomics
+ Exomes
+ Genome annotation
+ Genomes
+ Personal genomics
+ Synthetic genomics
+ Viral genomics
+ Whole genomes
+
+
+
+ Genomics
+
+ http://purl.bioontology.org/ontology/MSH/D023281
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Particular gene(s), gene family or other gene group or system and their encoded proteins.Primarily the classification of proteins (from sequence or structural data) into clusters, groups, families etc., curation of a particular protein or protein family, or any other proteins that have been classified as members of a common group.
+ Genes, gene family or system
+ Gene_and protein_families
+ Gene families
+ Gene family
+ Gene system
+ Protein families
+ Protein sequence classification
+
+
+
+ A protein families database might include the classifier (e.g. a sequence profile) used to build the classification.
+ Gene and protein families
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Study of chromosomes.
+
+
+ Chromosomes
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ The study of genetic constitution of a living entity, such as an individual, and organism, a cell and so on, typically with respect to a particular observable phenotypic traits, or resources concerning such traits, which might be an aspect of biochemistry, physiology, morphology, anatomy, development and so on.
+ Genotype and phenotype resources
+ Genotype-phenotype
+ Genotype-phenotype analysis
+ Genotype_and_phenotype
+ Genotype
+ Genotyping
+ Phenotype
+ Phenotyping
+
+
+
+ Genotype and phenotype
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Gene expression e.g. microarray data, northern blots, gene-indexed expression profiles etc.
+
+ Gene expression and microarray
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Molecular probes (e.g. a peptide probe or DNA microarray probe) or PCR primers and hybridisation oligos in a nucleic acid sequence.
+ Probes_and_primers
+ Primer quality
+ Primers
+ Probes
+
+
+ This includes the design of primers for PCR and DNA amplification or the design of molecular probes.
+ Probes and primers
+ http://purl.bioontology.org/ontology/MSH/D015335
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ VT 3.1.6 Pathology
+ Diseases, including diseases in general and the genes, gene variations and proteins involved in one or more specific diseases.
+ Disease
+ Pathology
+
+
+
+ Pathology
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ A particular protein, protein family or other group of proteins.
+
+ Specific protein resources
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ VT 1.5.25 Taxonomy
+ Organism classification, identification and naming.
+ Taxonomy
+
+
+ Taxonomy
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Archival, processing and analysis of protein sequences and sequence-based entities such as alignments, motifs and profiles.
+
+
+ Protein sequence analysis
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ The archival, processing and analysis of nucleotide sequences and and sequence-based entities such as alignments, motifs and profiles.
+
+
+ Nucleic acid sequence analysis
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ The repetitive nature of molecular sequences.
+
+ Repeat sequences
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ The (character) complexity of molecular sequences, particularly regions of low complexity.
+
+ Low complexity sequences
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ A specific proteome including protein sequences and annotation.
+
+ Proteome
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ DNA sequences and structure, including processes such as methylation and replication.
+ DNA analysis
+ DNA
+ Ancient DNA
+ Chromosomes
+
+
+ The DNA sequences might be coding or non-coding sequences.
+ DNA
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Protein-coding regions including coding sequences (CDS), exons, translation initiation sites and open reading frames.
+
+
+ Coding RNA
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Non-coding or functional RNA sequences, including regulatory RNA sequences, ribosomal RNA (rRNA) and transfer RNA (tRNA).
+ Functional_regulatory_and_non-coding_RNA
+ Functional RNA
+ Long ncRNA
+ Long non-coding RNA
+ Non-coding RNA
+ Regulatory RNA
+ Small and long non-coding RNAs
+ Small interfering RNA
+ Small ncRNA
+ Small non-coding RNA
+ Small nuclear RNA
+ Small nucleolar RNA
+ lncRNA
+ miRNA
+ microRNA
+ ncRNA
+ piRNA
+ piwi-interacting RNA
+ siRNA
+ snRNA
+ snoRNA
+
+
+ Non-coding RNA includes piwi-interacting RNA (piRNA), small nuclear RNA (snRNA) and small nucleolar RNA (snoRNA). Regulatory RNA includes microRNA (miRNA) - short single stranded RNA molecules that regulate gene expression, and small interfering RNA (siRNA).
+ Functional, regulatory and non-coding RNA
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ One or more ribosomal RNA (rRNA) sequences.
+
+ rRNA
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ One or more transfer RNA (tRNA) sequences.
+
+ tRNA
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Protein secondary structure or secondary structure alignments.
+
+
+ This includes assignment, analysis, comparison, prediction, rendering etc. of secondary structure data.
+ Protein secondary structure
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ RNA secondary or tertiary structure and alignments.
+
+ RNA structure
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Protein tertiary structures.
+
+
+ Protein tertiary structure
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Classification of nucleic acid sequences and structures.
+
+ Nucleic acid classification
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.14
+
+ Primarily the classification of proteins (from sequence or structural data) into clusters, groups, families etc., curation of a particular protein or protein family, or any other proteins that have been classified as members of a common group.
+
+
+ Protein families
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Protein tertiary structural domains and folds in a protein or polypeptide chain.
+ Protein_folds_and_structural_domains
+ Intramembrane regions
+ Protein domains
+ Protein folds
+ Protein membrane regions
+ Protein structural domains
+ Protein topological domains
+ Protein transmembrane regions
+ Transmembrane regions
+
+
+ This includes topological domains such as cytoplasmic regions in a protein.
+ This includes trans- or intra-membrane regions of a protein, typically describing physicochemical properties of the secondary structure elements. For example, the location and size of the membrane spanning segments and intervening loop regions, transmembrane region IN/OUT orientation relative to the membrane, plus the following data for each amino acid: A Z-coordinate (the distance to the membrane center), the free energy of membrane insertion (calculated in a sliding window over the sequence) and a reliability score. The z-coordinate implies information about re-entrant helices, interfacial helices, the tilt of a transmembrane helix and loop lengths.
+ Protein folds and structural domains
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+ Nucleotide sequence alignments.
+
+
+ Nucleic acid sequence alignment
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Protein sequence alignments.
+
+ A sequence profile typically represents a sequence alignment.
+ Protein sequence alignment
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+
+ The archival, detection, prediction and analysis ofpositional features such as functional sites in nucleotide sequences.
+
+ Nucleic acid sites and features
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+
+ The detection, identification and analysis of positional features in proteins, such as functional sites.
+
+ Protein sites and features
+ true
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Proteins that bind to DNA and control transcription of DNA to mRNA (transcription factors) and also transcriptional regulatory sites, elements and regions (such as promoters, enhancers, silencers and boundary elements / insulators) in nucleotide sequences.
+ Transcription_factors_and_regulatory_sites
+ -10 signals
+ -35 signals
+ Attenuators
+ CAAT signals
+ CAT box
+ CCAAT box
+ CpG islands
+ Enhancers
+ GC signals
+ Isochores
+ Promoters
+ TATA signals
+ TFBS
+ Terminators
+ Transcription factor binding sites
+ Transcription factors
+ Transcriptional regulatory sites
+
+
+ This includes CpG rich regions (isochores) in a nucleotide sequence.
+ This includes promoters, CAAT signals, TATA signals, -35 signals, -10 signals, GC signals, primer binding sites for initiation of transcription or reverse transcription, enhancer, attenuator, terminators and ribosome binding sites.
+ Transcription factor proteins either promote (as an activator) or block (as a repressor) the binding to DNA of RNA polymerase. Regulatory sites including transcription factor binding site as well as promoters, enhancers, silencers and boundary elements / insulators.
+ Transcription factors and regulatory sites
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.0
+
+
+
+ Protein phosphorylation and phosphorylation sites in protein sequences.
+
+ Phosphorylation sites
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Metabolic pathways.
+
+
+ Metabolic pathways
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Signaling pathways.
+
+
+ Signaling pathways
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Protein and peptide identification.
+
+ Protein and peptide identification
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Biological or biomedical analytical workflows or pipelines.
+ Pipelines
+ Workflows
+ Software integration
+ Tool integration
+ Tool interoperability
+
+
+ Workflows
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.0
+
+
+ Structuring data into basic types and (computational) objects.
+
+ Data types and objects
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Theoretical biology.
+
+ Theoretical biology
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Mitochondria, typically of mitochondrial genes and proteins.
+
+ Mitochondria
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ VT 1.5.10 Botany
+ VT 1.5.22 Plant science
+ Plants, e.g. information on a specific plant genome including molecular sequences, genes and annotation.
+ Botany
+ Plant
+ Plant science
+ Plants
+ Plant_biology
+ Plant anatomy
+ Plant cell biology
+ Plant ecology
+ Plant genetics
+ Plant physiology
+
+
+ The resource may be specific to a plant, a group of plants or all plants.
+ Plant biology
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ VT 1.5.28
+ Study of viruses, e.g. sequence and structural data, interactions of viral proteins, or a viral genome including molecular sequences, genes and annotation.
+ Virology
+
+
+ Virology
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ (jison)Out of EDAM scope. While very useful to have a basic set of IDs for organisms, should find a better way to provide this e.g. in bio.tools (NCBI taxon ID subset).
+ 1.17
+
+
+ Fungi and molds, e.g. information on a specific fungal genome including molecular sequences, genes and annotation.
+
+ The resource may be specific to a fungus, a group of fungi or all fungi.
+ Fungi
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ (jison)Out of EDAM scope. While very useful to have a basic set of IDs for organisms, should find a better way to provide this e.g. in bio.tools (NCBI taxon ID subset). Definition is wrong anyway.
+ 1.17
+
+
+ Pathogens, e.g. information on a specific vertebrate genome including molecular sequences, genes and annotation.
+
+ The resource may be specific to a pathogen, a group of pathogens or all pathogens.
+ Pathogens
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Arabidopsis-specific data.
+
+ Arabidopsis
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Rice-specific data.
+
+ Rice
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Informatics resources that aim to identify, map or analyse genetic markers in DNA sequences, for example to produce a genetic (linkage) map of a chromosome or genome or to analyse genetic linkage and synteny.
+
+ Genetic mapping and linkage
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ The study (typically comparison) of the sequence, structure or function of multiple genomes.
+ Comparative_genomics
+
+
+
+ Comparative genomics
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Mobile genetic elements, such as transposons, Plasmids, Bacteriophage elements and Group II introns.
+ Mobile_genetic_elements
+ Transposons
+
+
+ Mobile genetic elements
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Human diseases, typically describing the genes, mutations and proteins implicated in disease.
+
+ Human disease
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ VT 3.1.3 Immunology
+ The application of information technology to immunology such as immunological processes, immunological genes, proteins and peptide ligands, antigens and so on.
+ Immunology
+
+
+
+ Immunology
+
+ http://purl.bioontology.org/ontology/MSH/D007120
+ http://purl.bioontology.org/ontology/MSH/D007125
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Lipoproteins (protein-lipid assemblies), and proteins or region of a protein that spans or are associated with a membrane.
+ Membrane_and_lipoproteins
+ Lipoproteins
+ Membrane proteins
+ Transmembrane proteins
+
+
+ Membrane and lipoproteins
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Proteins that catalyze chemical reaction, the kinetics of enzyme-catalysed reactions, enzyme nomenclature etc.
+ Enzymology
+ Enzymes
+
+
+ Enzymes
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ PCR primers and hybridisation oligos in a nucleic acid sequence.
+
+
+ Primers
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Regions or sites in a eukaryotic and eukaryotic viral RNA sequence which directs endonuclease cleavage or polyadenylation of an RNA transcript.
+
+
+ PolyA signal or sites
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ CpG rich regions (isochores) in a nucleotide sequence.
+
+
+ CpG island and isochores
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Restriction enzyme recognition sites (restriction sites) in a nucleic acid sequence.
+
+
+ Restriction sites
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+
+
+ Splice sites in a nucleotide sequence or alternative RNA splicing events.
+
+ Splice sites
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Matrix/scaffold attachment regions (MARs/SARs) in a DNA sequence.
+
+
+ Matrix/scaffold attachment sites
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Operons (operators, promoters and genes) from a bacterial genome.
+
+
+ Operon
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Whole promoters or promoter elements (transcription start sites, RNA polymerase binding site, transcription factor binding sites, promoter enhancers etc) in a DNA sequence.
+
+
+ Promoters
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ VT 1.5.24 Structural biology
+ The molecular structure of biological molecules, particularly macromolecules such as proteins and nucleic acids.
+ Structural_biology
+ Structural assignment
+ Structural determination
+ Structure determination
+
+
+
+ This includes experimental methods for biomolecular structure determination, such as X-ray crystallography, nuclear magnetic resonance (NMR), circular dichroism (CD) spectroscopy, microscopy etc., including the assignment or modelling of molecular structure from such data.
+ Structural biology
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Trans- or intra-membrane regions of a protein, typically describing physicochemical properties of the secondary structure elements.
+
+
+ Protein membrane regions
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ The comparison of two or more molecular structures, for example structure alignment and clustering.
+
+
+ This might involve comparison of secondary or tertiary (3D) structural information.
+ Structure comparison
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ The study of gene and protein function including the prediction of functional properties of a protein.
+ Functional analysis
+ Function_analysis
+ Protein function analysis
+ Protein function prediction
+
+
+
+ Function analysis
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ (jison)Out of EDAM scope. While very useful to have a basic set of IDs for organisms, should find a better way to provide this e.g. in bio.tools (NCBI taxon ID subset).
+ 1.17
+
+
+ Specific bacteria or archaea, e.g. information on a specific prokaryote genome including molecular sequences, genes and annotation.
+
+ The resource may be specific to a prokaryote, a group of prokaryotes or all prokaryotes.
+ Prokaryotes and Archaea
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Protein data resources.
+
+ Protein databases
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Experimental methods for biomolecular structure determination, such as X-ray crystallography, nuclear magnetic resonance (NMR), circular dichroism (CD) spectroscopy, microscopy etc., including the assignment or modelling of molecular structure from such data.
+
+ Structure determination
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ VT 1.5.11 Cell biology
+ Cells, such as key genes and proteins involved in the cell cycle.
+ Cell_biology
+ Cells
+ Cellular processes
+ Protein subcellular localization
+
+
+ Cell biology
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Topic focused on identifying, grouping, or naming things in a structured way according to some schema based on observable relationships.
+
+ Classification
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Lipoproteins (protein-lipid assemblies).
+
+ Lipoproteins
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Visualise a phylogeny, for example, render a phylogenetic tree.
+
+ Phylogeny visualisation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ The application of information technology to chemistry in biological research environment.
+ Chemical informatics
+ Chemoinformatics
+ Cheminformatics
+
+
+
+ Cheminformatics
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ The holistic modelling and analysis of complex biological systems and the interactions therein.
+ Systems_biology
+ Biological modelling
+ Biological system modelling
+ Systems modelling
+
+
+
+ This includes databases of models and methods to construct or analyse a model.
+ Systems biology
+
+ http://purl.bioontology.org/ontology/MSH/D049490
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The application of statistical methods to biological problems.
+ Statistics_and_probability
+ Bayesian methods
+ Biostatistics
+ Descriptive statistics
+ Gaussian processes
+ Inferential statistics
+ Markov processes
+ Multivariate statistics
+ Probabilistic graphical model
+ Probability
+ Statistics
+
+
+
+ Statistics and probability
+
+
+
+ http://purl.bioontology.org/ontology/MSH/D056808
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Search for and retrieve molecular structures that are similar to a structure-based query (typically another structure or part of a structure).
+
+ The query is a structure-based entity such as another structure, a 3D (structural) motif, 3D profile or template.
+ Structure database search
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ The construction, analysis, evaluation, refinement etc. of models of a molecules properties or behaviour, including the modelling the structure of proteins in complex with small molecules or other macromolecules (docking).
+ Molecular_modelling
+ Comparative modelling
+ Docking
+ Homology modeling
+ Homology modelling
+ Molecular docking
+
+
+ Molecular modelling
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.2
+
+
+ The prediction of functional properties of a protein.
+
+ Protein function prediction
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Single nucleotide polymorphisms (SNP) and associated data, for example, the discovery and annotation of SNPs.
+
+
+ SNP
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+
+ Predict transmembrane domains and topology in protein sequences.
+
+ Transmembrane protein prediction
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+
+ The comparison two or more nucleic acid (typically RNA) secondary or tertiary structures.
+
+ Use this concept for methods that are exclusively for nucleic acid structures.
+ Nucleic acid structure comparison
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Exons in a nucleotide sequences.
+
+
+ Exons
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Transcription of DNA into RNA including the regulation of transcription.
+
+
+ Gene transcription
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ DNA mutation.
+ DNA_mutation
+
+
+ DNA mutation
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ VT 3.2.16 Oncology
+ The study of cancer, for example, genes and proteins implicated in cancer.
+ Cancer biology
+ Oncology
+ Cancer
+ Neoplasm
+ Neoplasms
+
+
+
+ Oncology
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Structural and associated data for toxic chemical substances.
+
+
+ Toxins and targets
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Introns in a nucleotide sequences.
+
+
+ Introns
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+ A topic concerning primarily bioinformatics software tools, typically the broad function or purpose of a tool.
+
+
+ Tool topic
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+ A general area of bioinformatics study, typically the broad scope or category of content of a bioinformatics journal or conference proceeding.
+
+
+ Study topic
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Biological nomenclature (naming), symbols and terminology.
+
+ Nomenclature
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ The genes, gene variations and proteins involved in one or more specific diseases.
+
+ Disease genes and proteins
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ http://edamontology.org/topic_3040
+ Protein secondary or tertiary structural data and/or associated annotation.
+ Protein structure
+ Protein_structure_analysis
+ Protein tertiary structure
+
+
+
+ Protein structure analysis
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The study of human beings in general, including the human genome and proteome.
+ Humans
+ Human_biology
+
+
+ Human biology
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Informatics resource (typically a database) primarily focused on genes.
+
+ Gene resources
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Yeast, e.g. information on a specific yeast genome including molecular sequences, genes and annotation.
+
+ Yeast
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ (jison) Out of EDAM scope. While very useful to have a basic set of IDs for organisms, should find a better way to provide this e.g. in bio.tools (NCBI taxon ID subset).
+ 1.17
+
+
+ Eukaryotes or data concerning eukaryotes, e.g. information on a specific eukaryote genome including molecular sequences, genes and annotation.
+
+ The resource may be specific to a eukaryote, a group of eukaryotes or all eukaryotes.
+ Eukaryotes
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ (jison)Out of EDAM scope. While very useful to have a basic set of IDs for organisms, should find a better way to provide this e.g. in bio.tools (NCBI taxon ID subset).
+ 1.17
+
+
+ Invertebrates, e.g. information on a specific invertebrate genome including molecular sequences, genes and annotation.
+
+ The resource may be specific to an invertebrate, a group of invertebrates or all invertebrates.
+ Invertebrates
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ (jison)Out of EDAM scope. While very useful to have a basic set of IDs for organisms, should find a better way to provide this e.g. in bio.tools (NCBI taxon ID subset).
+ 1.17
+
+
+ Vertebrates, e.g. information on a specific vertebrate genome including molecular sequences, genes and annotation.
+
+ The resource may be specific to a vertebrate, a group of vertebrates or all vertebrates.
+ Vertebrates
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ (jison)Out of EDAM scope. While very useful to have a basic set of IDs for organisms, should find a better way to provide this e.g. in bio.tools (NCBI taxon ID subset).
+ 1.17
+
+
+ Unicellular eukaryotes, e.g. information on a unicellular eukaryote genome including molecular sequences, genes and annotation.
+
+ The resource may be specific to a unicellular eukaryote, a group of unicellular eukaryotes or all unicellular eukaryotes.
+ Unicellular eukaryotes
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Protein secondary or tertiary structure alignments.
+
+ Protein structure alignment
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The study of matter and their structure by means of the diffraction of X-rays, typically the diffraction pattern caused by the regularly spaced atoms of a crystalline sample.
+ Crystallography
+ X-ray_diffraction
+ X-ray crystallography
+ X-ray microscopy
+
+
+
+ X-ray diffraction
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Conceptualisation, categorisation and naming of entities or phenomena within biology or bioinformatics.
+
+ Ontologies, nomenclature and classification
+ http://purl.bioontology.org/ontology/MSH/D002965
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Immunity-related proteins and their ligands.
+ Immunoproteins_and_antigens
+ Antigens
+ Immunopeptides
+ Immunoproteins
+ Therapeutic antibodies
+
+
+
+ This includes T cell receptors (TR), major histocompatibility complex (MHC), immunoglobulin superfamily (IgSF) / antibodies, major histocompatibility complex superfamily (MhcSF), etc."
+ Immunoproteins and antigens
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Specific molecules, including large molecules built from repeating subunits (macromolecules) and small molecules of biological significance.
+ CHEBI:23367
+
+ Molecules
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ VT 3.1.9 Toxicology
+ Toxins and the adverse effects of these chemical substances on living organisms.
+ Toxicology
+ Computational toxicology
+ Toxicoinformatics
+
+
+
+ Toxicology
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Parallelised sequencing processes that are capable of sequencing many thousands of sequences simultaneously.
+
+ High-throughput sequencing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Gene regulatory networks.
+
+
+ Gene regulatory networks
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Informatics resources dedicated to one or more specific diseases (not diseases in general).
+
+ Disease (specific)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Variable number of tandem repeat (VNTR) polymorphism in a DNA sequence.
+
+
+ VNTR
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+
+ Microsatellite polymorphism in a DNA sequence.
+
+
+ Microsatellites
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+
+ Restriction fragment length polymorphisms (RFLP) in a DNA sequence.
+
+
+ RFLP
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ DNA polymorphism.
+ DNA_polymorphism
+ Microsatellites
+ RFLP
+ SNP
+ Single nucleotide polymorphism
+ VNTR
+ Variable number of tandem repeat polymorphism
+ snps
+
+
+ Includes microsatellite polymorphism in a DNA sequence. A microsatellite polymorphism is a very short subsequence that is repeated a variable number of times between individuals. These repeats consist of the nucleotides cytosine and adenosine.
+ Includes restriction fragment length polymorphisms (RFLP) in a DNA sequence. An RFLP is defined by the presence or absence of a specific restriction site of a bacterial restriction enzyme.
+ Includes single nucleotide polymorphisms (SNP) and associated data, for example, the discovery and annotation of SNPs. A SNP is a DNA sequence variation where a single nucleotide differs between members of a species or paired chromosomes in an individual.
+ Includes variable number of tandem repeat (VNTR) polymorphism in a DNA sequence. VNTRs occur in non-coding regions of DNA and consists sub-sequence that is repeated a multiple (and varied) number of times.
+ DNA polymorphism
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Topic for the design of nucleic acid sequences with specific conformations.
+
+ Nucleic acid design
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.3
+
+
+ The design of primers for PCR and DNA amplification or the design of molecular probes.
+
+ Primer or probe design
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.2
+
+
+ Molecular secondary or tertiary (3D) structural data resources, typically of proteins and nucleic acids.
+
+ Structure databases
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.2
+
+
+ Nucleic acid (secondary or tertiary) structure, such as whole structures, structural features and associated annotation.
+
+ Nucleic acid structure
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.3
+
+
+ Molecular sequence data resources, including sequence sites, alignments, motifs and profiles.
+
+ Sequence databases
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.3
+
+
+ Nucleotide sequences and associated concepts such as sequence sites, alignments, motifs and profiles.
+
+ Nucleic acid sequences
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.3
+
+ Protein sequences and associated concepts such as sequence sites, alignments, motifs and profiles.
+
+
+ Protein sequences
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.3
+
+
+ Protein interaction networks.
+
+ Protein interaction networks
+ true
+
+
+
+
+
+
+
+
+ beta13
+ true
+ VT 1.5.4 Biochemistry and molecular biology
+ The molecular basis of biological activity, particularly the macromolecules (e.g. proteins and nucleic acids) that are essential to life.
+ Molecular_biology
+ Biological processes
+
+
+
+ Molecular biology
+
+
+
+
+
+
+
+
+
+ beta13
+ 1.3
+
+
+ Mammals, e.g. information on a specific mammal genome including molecular sequences, genes and annotation.
+
+ Mammals
+ true
+
+
+
+
+
+
+
+
+ beta13
+ true
+ VT 1.5.5 Biodiversity conservation
+ The degree of variation of life forms within a given ecosystem, biome or an entire planet.
+ Biodiversity
+
+
+
+ Biodiversity
+
+ http://purl.bioontology.org/ontology/MSH/D044822
+
+
+
+
+
+
+
+
+ beta13
+ 1.3
+
+
+ The comparison, grouping together and classification of macromolecules on the basis of sequence similarity.
+
+ This includes the results of sequence clustering, ortholog identification, assignment to families, annotation etc.
+ Sequence clusters and classification
+ true
+
+
+
+
+
+
+
+
+ beta13
+ true
+ The study of genes, genetic variation and heredity in living organisms.
+ Genetics
+ Genes
+ Heredity
+
+
+
+ Genetics
+
+ http://purl.bioontology.org/ontology/MSH/D005823
+
+
+
+
+
+
+
+
+ beta13
+ true
+ The genes and genetic mechanisms such as Mendelian inheritance that underly continuous phenotypic traits (such as height or weight).
+ Quantitative_genetics
+
+
+ Quantitative genetics
+
+
+
+
+
+
+
+
+
+ beta13
+ true
+ The distribution of allele frequencies in a population of organisms and its change subject to evolutionary processes including natural selection, genetic drift, mutation and gene flow.
+ Population_genetics
+
+
+
+ Population genetics
+
+
+
+
+
+
+
+
+
+ beta13
+ 1.3
+
+ Regulatory RNA sequences including microRNA (miRNA) and small interfering RNA (siRNA).
+
+
+ Regulatory RNA
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.13
+
+ The documentation of resources such as tools, services and databases and how to get help.
+
+
+ Documentation and help
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.3
+
+
+ The structural and functional organisation of genes and other genetic elements.
+
+ Genetic organisation
+ true
+
+
+
+
+
+
+
+
+ beta13
+ true
+ The application of information technology to health, disease and biomedicine.
+ Biomedical informatics
+ Clinical informatics
+ Health and disease
+ Health informatics
+ Healthcare informatics
+ Medical_informatics
+
+
+
+ Medical informatics
+
+
+
+
+
+
+
+
+
+ beta13
+ true
+ VT 1.5.14 Developmental biology
+ How organisms grow and develop.
+ Developmental_biology
+ Development
+
+
+
+ Developmental biology
+
+
+
+
+
+
+
+
+
+ beta13
+ true
+ The development of organisms between the one-cell stage (typically the zygote) and the end of the embryonic stage.
+ Embryology
+
+
+
+ Embryology
+
+
+
+
+
+
+
+
+
+ beta13
+ true
+ VT 3.1.1 Anatomy and morphology
+ The form and function of the structures of living organisms.
+ Anatomy
+
+
+
+ Anatomy
+
+
+
+
+
+
+
+
+
+ beta13
+ true
+ The scientific literature, language processing, reference information, and documentation.
+ Language
+ Literature
+ Literature_and_language
+ Bibliography
+ Citations
+ Documentation
+ References
+ Scientific literature
+
+
+
+ This includes the documentation of resources such as tools, services and databases, user support, how to get help etc.
+ Literature and language
+ http://purl.bioontology.org/ontology/MSH/D011642
+
+
+
+
+
+
+
+
+ beta13
+ true
+ VT 1.5 Biological sciences
+ VT 1.5.1 Aerobiology
+ VT 1.5.13 Cryobiology
+ VT 1.5.23 Reproductive biology
+ VT 1.5.3 Behavioural biology
+ VT 1.5.7 Biological rhythm
+ VT 1.5.8 Biology
+ VT 1.5.99 Other
+ The study of life and living organisms, including their morphology, biochemistry, physiology, development, evolution, and so on.
+ Biological science
+ Biology
+ Aerobiology
+ Behavioural biology
+ Biological rhythms
+ Chronobiology
+ Cryobiology
+ Reproductive biology
+
+
+
+ Biology
+
+
+
+
+
+
+
+
+
+ beta13
+ true
+ Data stewardship
+ VT 1.3.1 Data management
+ Data management comprises the practices and principles of taking care of data, other than analysing them. This includes for example taking care of the associated metadata, formatting, storage, archiving, or access.
+ Metadata management
+
+
+
+ Data management
+
+
+ http://purl.bioontology.org/ontology/MSH/D000079803
+
+
+
+
+
+
+
+
+ beta13
+ 1.3
+
+
+ The detection of the positional features, such as functional and other key sites, in molecular sequences.
+
+ Sequence feature detection
+ http://purl.bioontology.org/ontology/MSH/D058977
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.3
+
+
+ The detection of positional features such as functional sites in nucleotide sequences.
+
+ Nucleic acid feature detection
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.3
+
+
+ The detection, identification and analysis of positional protein sequence features, such as functional sites.
+
+ Protein feature detection
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.2
+
+
+ Topic for modelling biological systems in mathematical terms.
+
+ Biological system modelling
+ true
+
+
+
+
+
+
+
+
+ beta13
+ The acquisition of data, typically measurements of physical systems using any type of sampling system, or by another other means.
+ Data collection
+
+
+ Data acquisition
+
+
+
+
+
+
+
+
+
+ beta13
+ 1.3
+
+
+ Specific genes and/or their encoded proteins or a family or other grouping of related genes and proteins.
+
+ Genes and proteins resources
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.13
+
+ Topological domains such as cytoplasmic regions in a protein.
+
+
+ Protein topological domains
+ true
+
+
+
+
+
+
+
+
+ beta13
+ true
+
+ Protein sequence variants produced e.g. from alternative splicing, alternative promoter usage, alternative initiation and ribosomal frameshifting.
+ Protein_variants
+
+
+ Protein variants
+
+
+
+
+
+
+
+
+
+ beta13
+ 1.12
+
+
+ Regions within a nucleic acid sequence containing a signal that alters a biological function.
+
+ Expression signals
+ true
+
+
+
+
+
+
+
+
+
+ beta13
+
+ Nucleic acids binding to some other molecule.
+ DNA_binding_sites
+ Matrix-attachment region
+ Matrix/scaffold attachment region
+ Nucleosome exclusion sequences
+ Restriction sites
+ Ribosome binding sites
+ Scaffold-attachment region
+
+
+ This includes ribosome binding sites (Shine-Dalgarno sequence in prokaryotes), restriction enzyme recognition sites (restriction sites) etc.
+ This includes sites involved with DNA replication and recombination. This includes binding sites for initiation of replication (origin of replication), regions where transfer is initiated during the conjugation or mobilisation (origin of transfer), starting sites for DNA duplication (origin of replication) and regions which are eliminated through any of kind of recombination. Also nucleosome exclusion regions, i.e. specific patterns or regions which exclude nucleosomes (the basic structural units of eukaryotic chromatin which play a significant role in regulating gene expression).
+ DNA binding sites
+
+
+
+
+
+
+
+
+
+ beta13
+ 1.13
+
+ Repetitive elements within a nucleic acid sequence.
+
+
+ This includes long terminal repeats (LTRs); sequences (typically retroviral) directly repeated at both ends of a defined sequence and other types of repeating unit.
+ Nucleic acid repeats
+ true
+
+
+
+
+
+
+
+
+ beta13
+ true
+ DNA replication or recombination.
+ DNA_replication_and_recombination
+
+
+ DNA replication and recombination
+
+
+
+
+
+
+
+
+
+
+ beta13
+ 1.13
+
+ Coding sequences for a signal or transit peptide.
+
+
+ Signal or transit peptide
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.13
+
+ Sequence tagged sites (STS) in nucleic acid sequences.
+
+
+ Sequence tagged sites
+ true
+
+
+
+
+
+
+
+
+ 1.1
+ true
+ The determination of complete (typically nucleotide) sequences, including those of genomes (full genome sequencing, de novo sequencing and resequencing), amplicons and transcriptomes.
+ DNA-Seq
+ Sequencing
+ Chromosome walking
+ Clone verification
+ DNase-Seq
+ High throughput sequencing
+ High-throughput sequencing
+ NGS
+ NGS data analysis
+ Next gen sequencing
+ Next generation sequencing
+ Panels
+ Primer walking
+ Sanger sequencing
+ Targeted next-generation sequencing panels
+
+
+
+ Sequencing
+
+ http://purl.bioontology.org/ontology/MSH/D059014
+
+
+
+
+
+
+
+
+
+ 1.1
+ The analysis of protein-DNA interactions where chromatin immunoprecipitation (ChIP) is used in combination with massively parallel DNA sequencing to identify the binding sites of DNA-associated proteins.
+ ChIP-sequencing
+ Chip Seq
+ Chip sequencing
+ Chip-sequencing
+ ChIP-seq
+ ChIP-exo
+
+
+ ChIP-seq
+
+
+
+
+
+
+
+
+
+ 1.1
+ A topic concerning high-throughput sequencing of cDNA to measure the RNA content (transcriptome) of a sample, for example, to investigate how different alleles of a gene are expressed, detect post-transcriptional mutations or identify gene fusions.
+ RNA sequencing
+ RNA-Seq analysis
+ Small RNA sequencing
+ Small RNA-Seq
+ Small-Seq
+ Transcriptome profiling
+ WTSS
+ Whole transcriptome shotgun sequencing
+ RNA-Seq
+ MicroRNA sequencing
+ miRNA-seq
+
+
+ This includes small RNA profiling (small RNA-Seq), for example to find novel small RNAs, characterize mutations and analyze expression of small RNAs.
+ RNA-Seq
+
+
+
+
+
+
+
+
+
+
+ 1.1
+ 1.3
+
+ DNA methylation including bisulfite sequencing, methylation sites and analysis, for example of patterns and profiles of DNA methylation in a population, tissue etc.
+
+
+ DNA methylation
+ true
+
+
+
+
+
+
+
+
+ 1.1
+ true
+ The systematic study of metabolites, the chemical processes they are involved, and the chemical fingerprints of specific cellular processes in a whole cell, tissue, organ or organism.
+ Metabolomics
+ Exometabolomics
+ LC-MS-based metabolomics
+ MS-based metabolomics
+ MS-based targeted metabolomics
+ MS-based untargeted metabolomics
+ Mass spectrometry-based metabolomics
+ Metabolites
+ Metabolome
+ Metabonomics
+ NMR-based metabolomics
+
+
+
+ Metabolomics
+
+ http://purl.bioontology.org/ontology/MSH/D055432
+
+
+
+
+
+
+
+
+
+ 1.1
+ true
+ The study of the epigenetic modifications of a whole cell, tissue, organism etc.
+ Epigenomics
+
+
+
+ Epigenetics concerns the heritable changes in gene expression owing to mechanisms other than DNA sequence variation.
+ Epigenomics
+
+ http://purl.bioontology.org/ontology/MSH/D057890
+
+
+
+
+
+
+
+
+
+ 1.1
+ true
+ Environmental DNA (eDNA)
+ Environmental sequencing
+ Biome sequencing
+ Community genomics
+ Ecogenomics
+ Environmental genomics
+ Environmental omics
+ The study of genetic material recovered from environmental samples, and associated environmental data.
+ Metagenomics
+ Shotgun metagenomics
+
+
+
+ Metagenomics
+
+
+
+
+
+
+
+
+
+
+
+ 1.1
+ Variation in chromosome structure including microscopic and submicroscopic types of variation such as deletions, duplications, copy-number variants, insertions, inversions and translocations.
+ DNA structural variation
+ Genomic structural variation
+ DNA_structural_variation
+ Deletion
+ Duplication
+ Insertion
+ Inversion
+ Translocation
+
+
+ Structural variation
+
+
+
+
+
+
+
+
+
+ 1.1
+ DNA-histone complexes (chromatin), organisation of chromatin into nucleosomes and packaging into higher-order structures.
+ DNA_packaging
+ Nucleosome positioning
+
+
+ DNA packaging
+
+ http://purl.bioontology.org/ontology/MSH/D042003
+
+
+
+
+
+
+
+
+ 1.1
+ 1.3
+
+
+ A topic concerning high-throughput sequencing of randomly fragmented genomic DNA, for example, to investigate whole-genome sequencing and resequencing, SNP discovery, identification of copy number variations and chromosomal rearrangements.
+
+ DNA-Seq
+ true
+
+
+
+
+
+
+
+
+ 1.1
+ 1.3
+
+
+ The alignment of sequences of (typically millions) of short reads to a reference genome. This is a specialised topic within sequence alignment, especially because of complications arising from RNA splicing.
+
+ RNA-Seq alignment
+ true
+
+
+
+
+
+
+
+
+ 1.1
+ Experimental techniques that combine chromatin immunoprecipitation ('ChIP') with microarray ('chip'). ChIP-on-chip is used for high-throughput study protein-DNA interactions.
+ ChIP-chip
+ ChIP-on-chip
+ ChiP
+
+
+ ChIP-on-chip
+
+
+
+
+
+
+
+
+
+ 1.3
+ The protection of data, such as patient health data, from damage or unwanted access from unauthorised users.
+ Data privacy
+ Data_security
+
+
+ Data security
+
+
+
+
+
+
+
+
+
+ 1.3
+ Biological samples and specimens.
+ Specimen collections
+ Sample_collections
+ biosamples
+ samples
+
+
+
+ Sample collections
+
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ VT 1.5.4 Biochemistry and molecular biology
+ Chemical substances and physico-chemical processes and that occur within living organisms.
+ Biological chemistry
+ Biochemistry
+ Glycomics
+ Pathobiochemistry
+ Phytochemistry
+
+
+
+ Biochemistry
+
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ The study of evolutionary relationships amongst organisms from analysis of genetic information (typically gene or protein sequences).
+ Phylogenetics
+
+
+ Phylogenetics
+
+ http://purl.bioontology.org/ontology/MSH/D010802
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ Topic concerning the study of heritable changes, for example in gene expression or phenotype, caused by mechanisms other than changes in the DNA sequence.
+ Epigenetics
+ DNA methylation
+ Histone modification
+ Methylation profiles
+
+
+
+ This includes sub-topics such as histone modification and DNA methylation (methylation sites and analysis, for example of patterns and profiles of DNA methylation in a population, tissue etc.)
+ Epigenetics
+
+ http://purl.bioontology.org/ontology/MSH/D019175
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ The exploitation of biological process, structure and function for industrial purposes, for example the genetic manipulation of microorganisms for the antibody production.
+ Biotechnology
+ Applied microbiology
+
+
+
+ Biotechnology
+
+
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ Phenomes, or the study of the change in phenotype (the physical and biochemical traits of organisms) in response to genetic and environmental factors.
+ Phenomics
+
+
+
+ Phenomics
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ VT 1.5.16 Evolutionary biology
+ The evolutionary processes, from the genetic to environmental scale, that produced life in all its diversity.
+ Evolution
+ Evolutionary_biology
+
+
+
+ Evolutionary biology
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ VT 3.1.8 Physiology
+ The functions of living organisms and their constituent parts.
+ Physiology
+ Electrophysiology
+
+
+
+ Physiology
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ VT 1.5.20 Microbiology
+ The biology of microorganisms.
+ Microbiology
+ Antimicrobial stewardship
+ Medical microbiology
+ Microbial genetics
+ Microbial physiology
+ Microbial surveillance
+ Microbiological surveillance
+ Molecular infection biology
+ Molecular microbiology
+
+
+
+ Microbiology
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ The biology of parasites.
+ Parasitology
+
+
+
+ Parasitology
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ VT 3.1 Basic medicine
+ VT 3.2 Clinical medicine
+ VT 3.2.9 General and internal medicine
+ Research in support of healing by diagnosis, treatment, and prevention of disease.
+ Biomedical research
+ Clinical medicine
+ Experimental medicine
+ Medicine
+ General medicine
+ Internal medicine
+
+
+
+ Medicine
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ Neuroscience
+ VT 3.1.5 Neuroscience
+ The study of the nervous system and brain; its anatomy, physiology and function.
+ Neurobiology
+ Molecular neuroscience
+ Neurophysiology
+ Systemetic neuroscience
+
+
+
+ Neurobiology
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ VT 3.3.1 Epidemiology
+ Topic concerning the the patterns, cause, and effect of disease within populations.
+ Public_health_and_epidemiology
+ Epidemiology
+ Public health
+
+
+
+ Public health and epidemiology
+
+
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ VT 1.5.9 Biophysics
+ The use of physics to study biological system.
+ Biophysics
+ Medical physics
+
+
+
+ Biophysics
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ VT 1.5.12 Computational biology
+ VT 1.5.19 Mathematical biology
+ VT 1.5.26 Theoretical biology
+ The development and application of theory, analytical methods, mathematical models and computational simulation of biological systems.
+ Computational_biology
+ Biomathematics
+ Mathematical biology
+ Theoretical biology
+
+
+
+ This includes the modeling and treatment of biological processes and systems in mathematical terms (theoretical biology).
+ Computational biology
+
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ The analysis of transcriptomes, or a set of all the RNA molecules in a specific cell, tissue etc.
+ Transcriptomics
+ Comparative transcriptomics
+ Transcriptome
+
+
+
+ Transcriptomics
+
+
+
+
+
+
+
+
+
+ 1.3
+ Chemical science
+ Polymer science
+ VT 1.7.10 Polymer science
+ VT 1.7 Chemical sciences
+ VT 1.7.2 Chemistry
+ VT 1.7.3 Colloid chemistry
+ VT 1.7.5 Electrochemistry
+ VT 1.7.6 Inorganic and nuclear chemistry
+ VT 1.7.7 Mathematical chemistry
+ VT 1.7.8 Organic chemistry
+ VT 1.7.9 Physical chemistry
+ The composition and properties of matter, reactions, and the use of reactions to create new substances.
+ Chemistry
+ Inorganic chemistry
+ Mathematical chemistry
+ Nuclear chemistry
+ Organic chemistry
+ Physical chemistry
+
+
+
+ Chemistry
+
+
+
+
+
+
+
+
+
+ 1.3
+ VT 1.1.99 Other
+ VT:1.1 Mathematics
+ The study of numbers (quantity) and other topics including structure, space, and change.
+ Maths
+ Mathematics
+ Dynamic systems
+ Dynamical systems
+ Dynymical systems theory
+ Graph analytics
+ Monte Carlo methods
+ Multivariate analysis
+
+
+
+ Mathematics
+
+
+
+
+
+
+
+
+
+ 1.3
+ VT 1.2 Computer sciences
+ VT 1.2.99 Other
+ The theory and practical use of computer systems.
+ Computer_science
+ Cloud computing
+ HPC
+ High performance computing
+ High-performance computing
+
+
+
+ Computer science
+
+
+
+
+
+
+
+
+
+ 1.3
+ The study of matter, space and time, and related concepts such as energy and force.
+ Physics
+
+
+
+ Physics
+
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ RNA splicing; post-transcription RNA modification involving the removal of introns and joining of exons.
+ Alternative splicing
+ RNA_splicing
+ Splice sites
+
+
+ This includes the study of splice sites, splicing patterns, alternative splicing events and variants, isoforms, etc..
+ RNA splicing
+
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ The structure and function of genes at a molecular level.
+ Molecular_genetics
+
+
+
+ Molecular genetics
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ VT 3.2.25 Respiratory systems
+ The study of respiratory system.
+ Pulmonary medicine
+ Pulmonology
+ Respiratory_medicine
+ Pulmonary disorders
+ Respiratory disease
+
+
+
+ Respiratory medicine
+
+
+
+
+
+
+
+
+
+ 1.3
+ 1.4
+
+
+ The study of metabolic diseases.
+
+ Metabolic disease
+ true
+
+
+
+
+
+
+
+
+ 1.3
+ VT 3.3.4 Infectious diseases
+ The branch of medicine that deals with the prevention, diagnosis and management of transmissible disease with clinically evident illness resulting from infection with pathogenic biological agents (viruses, bacteria, fungi, protozoa, parasites and prions).
+ Communicable disease
+ Transmissible disease
+ Infectious_disease
+
+
+
+ Infectious disease
+
+
+
+
+
+
+
+
+
+ 1.3
+ The study of rare diseases.
+ Rare_diseases
+
+
+
+ Rare diseases
+
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ VT 1.7.4 Computational chemistry
+ Topic concerning the development and application of theory, analytical methods, mathematical models and computational simulation of chemical systems.
+ Computational_chemistry
+
+
+
+ Computational chemistry
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ The branch of medicine that deals with the anatomy, functions and disorders of the nervous system.
+ Neurology
+ Neurological disorders
+
+
+
+ Neurology
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ VT 3.2.22 Peripheral vascular disease
+ VT 3.2.4 Cardiac and Cardiovascular systems
+ The diseases and abnormalities of the heart and circulatory system.
+ Cardiovascular medicine
+ Cardiology
+ Cardiovascular disease
+ Heart disease
+
+
+
+ Cardiology
+
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ The discovery and design of drugs or potential drug compounds.
+ Drug_discovery
+
+
+
+ This includes methods that search compound collections, generate or analyse drug 3D conformations, identify drug targets with structural docking etc.
+ Drug discovery
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ Repositories of biological samples, typically human, for basic biological and clinical research.
+ Tissue collection
+ biobanking
+ Biobank
+
+
+
+ Biobank
+
+
+
+
+
+
+
+
+
+ 1.3
+ Laboratory study of mice, for example, phenotyping, and mutagenesis of mouse cell lines.
+ Laboratory mouse
+ Mouse_clinic
+
+
+
+ Mouse clinic
+
+
+
+
+
+
+
+
+
+ 1.3
+ Collections of microbial cells including bacteria, yeasts and moulds.
+ Microbial_collection
+
+
+
+ Microbial collection
+
+
+
+
+
+
+
+
+
+ 1.3
+ Collections of cells grown under laboratory conditions, specifically, cells from multi-cellular eukaryotes and especially animal cells.
+ Cell_culture_collection
+
+
+
+ Cell culture collection
+
+
+
+
+
+
+
+
+
+ 1.3
+ Collections of DNA, including both collections of cloned molecules, and populations of micro-organisms that store and propagate cloned DNA.
+ Clone_library
+
+
+
+ Clone library
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ 'translating' the output of basic and biomedical research into better diagnostic tools, medicines, medical procedures, policies and advice.
+ Translational_medicine
+
+
+
+ Translational medicine
+
+
+
+
+
+
+
+
+
+ 1.3
+ Collections of chemicals, typically for use in high-throughput screening experiments.
+ Compound_libraries_and_screening
+ Chemical library
+ Chemical screening
+ Compound library
+ Small chemical compounds libraries
+ Small compounds libraries
+ Target identification and validation
+
+
+
+ Compound libraries and screening
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ VT 3.3 Health sciences
+ Topic concerning biological science that is (typically) performed in the context of medicine.
+ Biomedical sciences
+ Health science
+ Biomedical_science
+
+
+
+ Biomedical science
+
+
+
+
+
+
+
+
+
+ 1.3
+ Topic concerning the identity of biological entities, or reports on such entities, and the mapping of entities and records in different databases.
+ Data_identity_and_mapping
+
+
+
+ Data identity and mapping
+
+
+
+
+
+
+
+
+ 1.3
+ 1.12
+
+ The search and retrieval from a database on the basis of molecular sequence similarity.
+
+
+ Sequence search
+ true
+
+
+
+
+
+
+
+
+ 1.4
+ true
+ Objective indicators of biological state often used to assess health, and determinate treatment.
+ Diagnostic markers
+ Biomarkers
+
+
+ Biomarkers
+
+
+
+
+
+
+
+
+
+ 1.4
+ The procedures used to conduct an experiment.
+ Experimental techniques
+ Lab method
+ Lab techniques
+ Laboratory method
+ Laboratory_techniques
+ Experiments
+ Laboratory experiments
+
+
+
+ Laboratory techniques
+
+
+
+
+
+
+
+
+
+ 1.4
+ The development of policies, models and standards that cover data acquisition, storage and integration, such that it can be put to use, typically through a process of systematically applying statistical and / or logical techniques to describe, illustrate, summarise or evaluate data.
+ Data_architecture_analysis_and_design
+ Data analysis
+ Data architecture
+ Data design
+
+
+
+ Data architecture, analysis and design
+
+
+
+
+
+
+
+
+
+ 1.4
+ The combination and integration of data from different sources, for example into a central repository or warehouse, to provide users with a unified view of these data.
+ Data_integration_and_warehousing
+ Data integration
+ Data warehousing
+
+
+
+ Data integration and warehousing
+
+
+
+
+
+
+
+
+
+
+ 1.4
+ Any matter, surface or construct that interacts with a biological system.
+ Biomaterials
+
+
+
+ Biomaterials
+
+
+
+
+
+
+
+
+
+
+ 1.4
+ true
+ The use of synthetic chemistry to study and manipulate biological systems.
+ Chemical_biology
+
+
+
+ Chemical biology
+
+
+
+
+
+
+
+
+
+ 1.4
+ VT 1.7.1 Analytical chemistry
+ The study of the separation, identification, and quantification of the chemical components of natural and artificial materials.
+ Analytical_chemistry
+
+
+
+ Analytical chemistry
+
+
+
+
+
+
+
+
+
+ 1.4
+ The use of chemistry to create new compounds.
+ Synthetic_chemistry
+ Synthetic organic chemistry
+
+
+
+ Synthetic chemistry
+
+
+
+
+
+
+
+
+
+ 1.4
+ 1.2.12 Programming languages
+ Software engineering
+ VT 1.2.1 Algorithms
+ VT 1.2.14 Software engineering
+ VT 1.2.7 Data structures
+ The process that leads from an original formulation of a computing problem to executable programs.
+ Computer programming
+ Software development
+ Software_engineering
+ Algorithms
+ Data structures
+ Programming languages
+
+
+
+ Software engineering
+
+
+
+
+
+
+
+
+
+ 1.4
+ true
+ The process of bringing a new drug to market once a lead compounds has been identified through drug discovery.
+ Drug development science
+ Medicine development
+ Medicines development
+ Drug_development
+
+
+
+ Drug development
+
+
+
+
+
+
+
+
+
+ 1.4
+ Drug delivery
+ Drug formulation
+ Drug formulation and delivery
+ The process of formulating and administering a pharmaceutical compound to achieve a therapeutic effect.
+ Biotherapeutics
+
+
+
+ Biotherapeutics
+
+
+
+
+
+
+
+
+ 1.4
+ true
+ The study of how a drug interacts with the body.
+ Drug_metabolism
+ ADME
+ Drug absorption
+ Drug distribution
+ Drug excretion
+ Pharmacodynamics
+ Pharmacokinetics
+ Pharmacokinetics and pharmacodynamics
+
+
+
+ Drug metabolism
+
+
+
+
+
+
+
+
+
+ 1.4
+ Health care research
+ Health care science
+ The discovery, development and approval of medicines.
+ Drug discovery and development
+ Medicines_research_and_development
+
+
+
+ Medicines research and development
+
+
+
+
+
+
+
+
+
+
+ 1.4
+ The safety (or lack) of drugs and other medical interventions.
+ Patient safety
+ Safety_sciences
+ Drug safety
+
+
+
+ Safety sciences
+
+
+
+
+
+
+
+
+
+ 1.4
+ The detection, assessment, understanding and prevention of adverse effects of medicines.
+ Pharmacovigilence
+
+
+
+ Pharmacovigilence concerns safety once a drug has gone to market.
+ Pharmacovigilance
+
+
+
+
+
+
+
+
+
+
+ 1.4
+ The testing of new medicines, vaccines or procedures on animals (preclinical) and humans (clinical) prior to their approval by regulatory authorities.
+ Preclinical_and_clinical_studies
+ Clinical studies
+ Clinical study
+ Clinical trial
+ Drug trials
+ Preclinical studies
+ Preclinical study
+
+
+
+ Preclinical and clinical studies
+
+
+
+
+
+
+
+
+
+ 1.4
+ true
+ The visual representation of an object.
+ Imaging
+ Diffraction experiment
+ Microscopy
+ Microscopy imaging
+ Optical super resolution microscopy
+ Photonic force microscopy
+ Photonic microscopy
+
+
+
+ This includes diffraction experiments that are based upon the interference of waves, typically electromagnetic waves such as X-rays or visible light, by some object being studied, typical in order to produce an image of the object or determine its structure.
+ Imaging
+
+
+
+
+
+
+
+
+
+ 1.4
+ The use of imaging techniques to understand biology.
+ Biological imaging
+ Biological_imaging
+
+
+
+ Bioimaging
+
+
+
+
+
+
+
+
+
+ 1.4
+ VT 3.2.13 Medical imaging
+ VT 3.2.14 Nuclear medicine
+ VT 3.2.24 Radiology
+ The use of imaging techniques for clinical purposes for medical research.
+ Medical_imaging
+ Neuroimaging
+ Nuclear medicine
+ Radiology
+
+
+
+ Medical imaging
+
+
+
+
+
+
+
+
+
+ 1.4
+ The use of optical instruments to magnify the image of an object.
+ Light_microscopy
+
+
+
+ Light microscopy
+
+
+
+
+
+
+
+
+
+ 1.4
+ The use of animals and alternatives in experimental research.
+ Animal experimentation
+ Animal research
+ Animal testing
+ In vivo testing
+ Laboratory_animal_science
+
+
+
+ Laboratory animal science
+
+
+
+
+
+
+
+
+
+ 1.4
+ true
+ VT 1.5.18 Marine and Freshwater biology
+ The study of organisms in the ocean or brackish waters.
+ Marine_biology
+
+
+
+ Marine biology
+
+
+
+
+
+
+
+
+
+ 1.4
+ true
+ The identification of molecular and genetic causes of disease and the development of interventions to correct them.
+ Molecular_medicine
+
+
+
+ Molecular medicine
+
+
+
+
+
+
+
+
+
+ 1.4
+ VT 3.3.7 Nutrition and Dietetics
+ The study of the effects of food components on the metabolism, health, performance and disease resistance of humans and animals. It also includes the study of human behaviours related to food choices.
+ Nutrition
+ Nutrition science
+ Nutritional_science
+ Dietetics
+
+
+
+ Nutritional science
+
+
+
+
+
+
+
+
+
+ 1.4
+ true
+ The collective characterisation and quantification of pools of biological molecules that translate into the structure, function, and dynamics of an organism or organisms.
+ Omics
+
+
+
+ Omics
+
+
+
+
+
+
+
+
+
+ 1.4
+ The processes that need to be in place to ensure the quality of products for human or animal use.
+ Quality assurance
+ Quality_affairs
+ Good clinical practice
+ Good laboratory practice
+ Good manufacturing practice
+
+
+
+ Quality affairs
+
+
+
+
+
+
+
+
+ 1.4
+ The protection of public health by controlling the safety and efficacy of products in areas including pharmaceuticals, veterinary medicine, medical devices, pesticides, agrochemicals, cosmetics, and complementary medicines.
+ Healthcare RA
+ Regulatory_affairs
+
+
+
+ Regulatory affairs
+
+
+
+
+
+
+
+
+
+ 1.4
+ true
+ Biomedical approaches to clinical interventions that involve the use of stem cells.
+ Stem cell research
+ Regenerative_medicine
+
+
+
+ Regenerative medicine
+
+
+
+
+
+
+
+
+
+ 1.4
+ true
+ An interdisciplinary field of study that looks at the dynamic systems of the human body as part of an integrted whole, incorporating biochemical, physiological, and environmental interactions that sustain life.
+ Systems_medicine
+
+
+
+ Systems medicine
+
+
+
+
+
+
+
+
+
+ 1.4
+ Topic concerning the branch of medicine that deals with the prevention, diagnosis, and treatment of disease, disorder and injury in animals.
+ Veterinary_medicine
+ Clinical veterinary medicine
+
+
+
+ Veterinary medicine
+
+
+
+
+
+
+
+
+
+ 1.4
+ The application of biological concepts and methods to the analytical and synthetic methodologies of engineering.
+ Biological engineering
+ Bioengineering
+
+
+
+ Bioengineering
+
+
+
+
+
+
+
+
+
+ 1.4
+ true
+ Ageing
+ Aging
+ Gerontology
+ VT 3.2.10 Geriatrics and gerontology
+ The branch of medicine dealing with the diagnosis, treatment and prevention of disease in older people, and the problems specific to aging.
+ Geriatrics
+ Geriatric_medicine
+
+
+
+ Geriatric medicine
+
+
+
+
+
+
+
+
+
+ 1.4
+ true
+ VT 3.2.1 Allergy
+ Health issues related to the immune system and their prevention, diagnosis and management.
+ Allergy_clinical_immunology_and_immunotherapeutics
+ Allergy
+ Clinical immunology
+ Immune disorders
+ Immunomodulators
+ Immunotherapeutics
+
+
+
+ Allergy, clinical immunology and immunotherapeutics
+
+
+
+
+
+
+
+
+
+
+
+ 1.4
+ true
+ The prevention of pain and the evaluation, treatment and rehabilitation of persons in pain.
+ Algiatry
+ Pain management
+ Pain_medicine
+
+
+
+ Pain medicine
+
+
+
+
+
+
+
+
+
+ 1.4
+ VT 3.2.2 Anaesthesiology
+ Anaesthesia and anaesthetics.
+ Anaesthetics
+ Anaesthesiology
+
+
+
+ Anaesthesiology
+
+
+
+
+
+
+
+
+
+ 1.4
+ VT 3.2.5 Critical care/Emergency medicine
+ The multidisciplinary that cares for patients with acute, life-threatening illness or injury.
+ Acute medicine
+ Emergency medicine
+ Intensive care medicine
+ Critical_care_medicine
+
+
+
+ Critical care medicine
+
+
+
+
+
+
+
+
+
+ 1.4
+ VT 3.2.7 Dermatology and venereal diseases
+ The branch of medicine that deals with prevention, diagnosis and treatment of disorders of the skin, scalp, hair and nails.
+ Dermatology
+ Dermatological disorders
+
+
+
+ Dermatology
+
+
+
+
+
+
+
+
+
+ 1.4
+ The study, diagnosis, prevention and treatments of disorders of the oral cavity, maxillofacial area and adjacent structures.
+ Dentistry
+
+
+
+ Dentistry
+
+
+
+
+
+
+
+
+
+ 1.4
+ VT 3.2.20 Otorhinolaryngology
+ The branch of medicine that deals with the prevention, diagnosis, and treatment of disorders of the ear, nose and throat.
+ Audiovestibular medicine
+ Otolaryngology
+ Otorhinolaryngology
+ Ear_nose_and_throat_medicine
+ Head and neck disorders
+
+
+
+ Ear, nose and throat medicine
+
+
+
+
+
+
+
+
+
+ 1.4
+ true
+ The branch of medicine dealing with diseases of endocrine organs, hormone systems, their target organs, and disorders of the pathways of glucose and lipid metabolism.
+ Endocrinology_and_metabolism
+ Endocrine disorders
+ Endocrinology
+ Metabolic disorders
+ Metabolism
+
+
+
+ Endocrinology and metabolism
+
+
+
+
+
+
+
+
+
+ 1.4
+ true
+ VT 3.2.11 Hematology
+ The branch of medicine that deals with the blood, blood-forming organs and blood diseases.
+ Haematology
+ Blood disorders
+ Haematological disorders
+
+
+
+ Haematology
+
+
+
+
+
+
+
+
+
+ 1.4
+ true
+ VT 3.2.8 Gastroenterology and hepatology
+ The branch of medicine that deals with disorders of the oesophagus, stomach, duodenum, jejenum, ileum, large intestine, sigmoid colon and rectum.
+ Gastroenterology
+ Gastrointestinal disorders
+
+
+
+ Gastroenterology
+
+
+
+
+
+
+
+
+
+ 1.4
+ The study of the biological and physiological differences between males and females and how they effect differences in disease presentation and management.
+ Gender_medicine
+
+
+
+ Gender medicine
+
+
+
+
+
+
+
+
+
+ 1.4
+ true
+ VT 3.2.15 Obstetrics and gynaecology
+ The branch of medicine that deals with the health of the female reproductive system, pregnancy and birth.
+ Gynaecology_and_obstetrics
+ Gynaecological disorders
+ Gynaecology
+ Obstetrics
+
+
+
+ Gynaecology and obstetrics
+
+
+
+
+
+
+
+
+
+
+ 1.4
+ true
+ The branch of medicine that deals with the liver, gallbladder, bile ducts and bile.
+ Hepatology
+ Hepatic_and_biliary_medicine
+ Liver disorders
+
+
+
+ Hepatic and biliary medicine
+
+ Hepatobiliary medicine
+
+
+
+
+
+
+
+
+ 1.4
+ 1.13
+
+ The branch of medicine that deals with the infectious diseases of the tropics.
+
+
+ Infectious tropical disease
+ true
+
+
+
+
+
+
+
+
+ 1.4
+ The branch of medicine that treats body wounds or shock produced by sudden physical injury, as from violence or accident.
+ Traumatology
+ Trauma_medicine
+
+
+
+ Trauma medicine
+
+
+
+
+
+
+
+
+
+ 1.4
+ true
+ The branch of medicine that deals with the diagnosis, management and prevention of poisoning and other adverse health effects caused by medications, occupational and environmental toxins, and biological agents.
+ Medical_toxicology
+
+
+
+ Medical toxicology
+
+
+
+
+
+
+
+
+
+ 1.4
+ VT 3.2.19 Orthopaedics
+ VT 3.2.26 Rheumatology
+ The branch of medicine that deals with the prevention, diagnosis, and treatment of disorders of the muscle, bone and connective tissue. It incorporates aspects of orthopaedics, rheumatology, rehabilitation medicine and pain medicine.
+ Musculoskeletal_medicine
+ Musculoskeletal disorders
+ Orthopaedics
+ Rheumatology
+
+
+
+ Musculoskeletal medicine
+
+
+
+
+
+
+
+
+
+
+
+ 1.4
+ Optometry
+ VT 3.2.17 Ophthalmology
+ VT 3.2.18 Optometry
+ The branch of medicine that deals with disorders of the eye, including eyelid, optic nerve/visual pathways and occular muscles.
+ Ophthalmology
+ Eye disoders
+
+
+
+ Ophthalmology
+
+
+
+
+
+
+
+
+
+ 1.4
+ VT 3.2.21 Paediatrics
+ The branch of medicine that deals with the medical care of infants, children and adolescents.
+ Child health
+ Paediatrics
+
+
+
+ Paediatrics
+
+
+
+
+
+
+
+
+
+ 1.4
+ Mental health
+ VT 3.2.23 Psychiatry
+ The branch of medicine that deals with the management of mental illness, emotional disturbance and abnormal behaviour.
+ Psychiatry
+ Psychiatric disorders
+
+
+
+ Psychiatry
+
+
+
+
+
+
+
+
+
+ 1.4
+ VT 3.2.3 Andrology
+ The health of the reproductive processes, functions and systems at all stages of life.
+ Reproductive_health
+ Andrology
+ Family planning
+ Fertility medicine
+ Reproductive disorders
+
+
+
+ Reproductive health
+
+
+
+
+
+
+
+
+
+ 1.4
+ VT 3.2.28 Transplantation
+ The use of operative, manual and instrumental techniques on a patient to investigate and/or treat a pathological condition or help improve bodily function or appearance.
+ Surgery
+ Transplantation
+
+
+
+ Surgery
+
+
+
+
+
+
+
+
+
+ 1.4
+ VT 3.2.29 Urology and nephrology
+ The branches of medicine and physiology focussing on the function and disorders of the urinary system in males and females, the reproductive system in males, and the kidney.
+ Urology_and_nephrology
+ Kidney disease
+ Nephrology
+ Urological disorders
+ Urology
+
+
+
+ Urology and nephrology
+
+
+
+
+
+
+
+
+
+
+ 1.4
+ Alternative medicine
+ Holistic medicine
+ Integrative medicine
+ VT 3.2.12 Integrative and Complementary medicine
+ Medical therapies that fall beyond the scope of conventional medicine but may be used alongside it in the treatment of disease and ill health.
+ Complementary_medicine
+
+
+
+ Complementary medicine
+
+
+
+
+
+
+
+
+
+ 1.7
+ Techniques that uses magnetic fields and radiowaves to form images, typically to investigate the anatomy and physiology of the human body.
+ MRT
+ Magnetic resonance imaging
+ Magnetic resonance tomography
+ NMRI
+ Nuclear magnetic resonance imaging
+ MRI
+
+
+ MRI
+
+
+
+
+
+
+
+
+
+
+ 1.7
+ The study of matter by studying the diffraction pattern from firing neutrons at a sample, typically to determine atomic and/or magnetic structure.
+ Neutron diffraction experiment
+ Neutron_diffraction
+ Elastic neutron scattering
+ Neutron microscopy
+
+
+ Neutron diffraction
+
+
+
+
+
+
+
+
+
+ 1.7
+ Imaging in sections (sectioning), through the use of a wave-generating device (tomograph) that generates an image (a tomogram).
+ CT
+ Computed tomography
+ TDM
+ Tomography
+ Electron tomography
+ PET
+ Positron emission tomography
+ X-ray tomography
+
+
+ Tomography
+
+
+
+
+
+
+
+
+
+ 1.7
+ true
+ KDD
+ Knowledge discovery in databases
+ VT 1.3.2 Data mining
+ The discovery of patterns in large data sets and the extraction and trasnsformation of those patterns into a useful format.
+ Data_mining
+ Pattern recognition
+
+
+ Data mining
+
+
+
+
+
+
+
+
+
+ 1.7
+ Artificial Intelligence
+ VT 1.2.2 Artificial Intelligence (expert systems, machine learning, robotics)
+ A topic concerning the application of artificial intelligence methods to algorithms, in order to create methods that can learn from data in order to generate an output, rather than relying on explicitly encoded information only.
+ Machine_learning
+ Active learning
+ Ensembl learning
+ Kernel methods
+ Knowledge representation
+ Neural networks
+ Recommender system
+ Reinforcement learning
+ Supervised learning
+ Unsupervised learning
+
+
+ Machine learning
+
+
+
+
+
+
+
+
+
+ 1.8
+ Database administration
+ Information systems
+ Databases
+ The general handling of data stored in digital archives such as databases, databanks, web portals, and other data resources.
+ Database_management
+ Content management
+ Document management
+ File management
+ Record management
+
+
+ This includes databases for the results of scientific experiments, the application of high-throughput technology, computational analysis and the scientific literature. It covers the management and manipulation of digital documents, including database records, files, and reports.
+ Database management
+
+
+
+
+
+
+
+
+
+ 1.8
+ VT 1.5.29 Zoology
+ Animals, e.g. information on a specific animal genome including molecular sequences, genes and annotation.
+ Animal
+ Animal biology
+ Animals
+ Metazoa
+ Zoology
+ Animal genetics
+ Animal physiology
+ Entomology
+
+
+ The study of the animal kingdom.
+ Zoology
+
+
+
+
+
+
+
+
+
+
+ 1.8
+ The biology, archival, detection, prediction and analysis of positional features such as functional and other key sites, in protein sequences and the conserved patterns (motifs, profiles etc.) that may be used to describe them.
+ Protein_sites_features_and_motifs
+ Protein sequence features
+ Signal peptide cleavage sites
+
+
+ A signal peptide coding sequence encodes an N-terminal domain of a secreted protein, which is involved in attaching the polypeptide to a membrane leader sequence. A transit peptide coding sequence encodes an N-terminal domain of a nuclear-encoded organellar protein; which is involved in import of the protein into the organelle.
+ Protein sites, features and motifs
+
+
+
+
+
+
+
+
+
+ 1.8
+ The biology, archival, detection, prediction and analysis of positional features such as functional and other key sites, in nucleic acid sequences and the conserved patterns (motifs, profiles etc.) that may be used to describe them.
+ Nucleic_acid_sites_features_and_motifs
+ Nucleic acid functional sites
+ Nucleic acid sequence features
+ Primer binding sites
+ Sequence tagged sites
+
+
+ Sequence tagged sites are short DNA sequences that are unique within a genome and serve as a mapping landmark, detectable by PCR they allow a genome to be mapped via an ordering of STSs.
+ Nucleic acid sites, features and motifs
+
+
+
+
+
+
+
+
+
+ 1.8
+ Transcription of DNA into RNA and features of a messenger RNA (mRNA) molecules including precursor RNA, primary (unprocessed) transcript and fully processed molecules.
+ Gene_transcripts
+ Coding RNA
+ EST
+ Exons
+ Fusion transcripts
+ Gene transcript features
+ Introns
+ PolyA signal
+ PolyA site
+ Signal peptide coding sequence
+ Transit peptide coding sequence
+ cDNA
+ mRNA
+ mRNA features
+
+
+ This includes 5'untranslated region (5'UTR), coding sequences (CDS), exons, intervening sequences (intron) and 3'untranslated regions (3'UTR).
+ This includes Introns, and protein-coding regions including coding sequences (CDS), exons, translation initiation sites and open reading frames. Also expressed sequence tag (EST) or complementary DNA (cDNA) sequences.
+ This includes coding sequences for a signal or transit peptide. A signal peptide coding sequence encodes an N-terminal domain of a secreted protein, which is involved in attaching the polypeptide to a membrane leader sequence. A transit peptide coding sequence encodes an N-terminal domain of a nuclear-encoded organellar protein; which is involved in import of the protein into the organelle.
+ This includes regions or sites in a eukaryotic and eukaryotic viral RNA sequence which directs endonuclease cleavage or polyadenylation of an RNA transcript. A polyA signal is required for endonuclease cleavage of an RNA transcript that is followed by polyadenylation. A polyA site is a site on an RNA transcript to which adenine residues will be added during post-transcriptional polyadenylation.
+ Gene transcripts
+
+
+
+
+
+
+
+
+
+ 1.8
+ 1.13
+
+ Protein-ligand (small molecule) interaction(s).
+
+
+ Protein-ligand interactions
+ true
+
+
+
+
+
+
+
+
+ 1.8
+ 1.13
+
+ Protein-drug interaction(s).
+
+
+ Protein-drug interactions
+ true
+
+
+
+
+
+
+
+
+ 1.8
+ Genotype experiment including case control, population, and family studies. These might use array based methods and re-sequencing methods.
+ Genotyping_experiment
+
+
+ Genotyping experiment
+
+
+
+
+
+
+
+
+
+ 1.8
+ Genome-wide association study experiments.
+ GWAS
+ GWAS analysis
+ Genome-wide association study
+ GWAS_study
+
+
+ GWAS study
+
+
+
+
+
+
+
+
+
+ 1.8
+ Microarray experiments including conditions, protocol, sample:data relationships etc.
+ Microarrays
+ Microarray_experiment
+ Gene expression microarray
+ Genotyping array
+ Methylation array
+ MicroRNA array
+ Multichannel microarray
+ One channel microarray
+ Proprietary platform micoarray
+ RNA chips
+ RNA microarrays
+ Reverse phase protein array
+ SNP array
+ Tiling arrays
+ Tissue microarray
+ Two channel microarray
+ aCGH microarray
+ mRNA microarray
+ miRNA array
+
+
+ This might specify which raw data file relates to which sample and information on hybridisations, e.g. which are technical and which are biological replicates.
+ Microarray experiment
+
+
+
+
+
+
+
+
+
+ 1.8
+ PCR experiments, e.g. quantitative real-time PCR.
+ Polymerase chain reaction
+ PCR_experiment
+ Quantitative PCR
+ RT-qPCR
+ Real Time Quantitative PCR
+
+
+ PCR experiment
+
+
+
+
+
+
+
+
+
+ 1.8
+ Proteomics experiments.
+ Proteomics_experiment
+ 2D PAGE experiment
+ DIA
+ Data-independent acquisition
+ MS
+ MS experiments
+ Mass spectrometry
+ Mass spectrometry experiments
+ Northern blot experiment
+ Spectrum demultiplexing
+
+
+ This includes two-dimensional gel electrophoresis (2D PAGE) experiments, gels or spots in a gel. Also mass spectrometry - an analytical chemistry technique that measures the mass-to-charge ratio and abundance of ions in the gas phase. Also Northern blot experiments.
+ Proteomics experiment
+
+
+
+
+
+
+
+
+
+
+ 1.8
+ 1.13
+
+ Two-dimensional gel electrophoresis experiments, gels or spots in a gel.
+
+
+ 2D PAGE experiment
+ true
+
+
+
+
+
+
+
+
+ 1.8
+ 1.13
+
+ Northern Blot experiments.
+
+
+ Northern blot experiment
+ true
+
+
+
+
+
+
+
+
+ 1.8
+ RNAi experiments.
+ RNAi_experiment
+
+
+ RNAi experiment
+
+
+
+
+
+
+
+
+
+ 1.8
+ Biological computational model experiments (simulation), for example the minimum information required in order to permit its correct interpretation and reproduction.
+ Simulation_experiment
+
+
+ Simulation experiment
+
+
+
+
+
+
+
+
+ 1.8
+ 1.13
+
+ Protein-DNA/RNA interaction(s).
+
+
+ Protein-nucleic acid interactions
+ true
+
+
+
+
+
+
+
+
+ 1.8
+ 1.13
+
+ Protein-protein interaction(s), including interactions between protein domains.
+
+
+ Protein-protein interactions
+ true
+
+
+
+
+
+
+
+
+ 1.8
+ 1.13
+
+ Cellular process pathways.
+
+
+ Cellular process pathways
+ true
+
+
+
+
+
+
+
+
+ 1.8
+ 1.13
+
+ Disease pathways, typically of human disease.
+
+
+ Disease pathways
+ true
+
+
+
+
+
+
+
+
+ 1.8
+ 1.13
+
+ Environmental information processing pathways.
+
+
+ Environmental information processing pathways
+ true
+
+
+
+
+
+
+
+
+ 1.8
+ 1.13
+
+ Genetic information processing pathways.
+
+
+ Genetic information processing pathways
+ true
+
+
+
+
+
+
+
+
+ 1.8
+ 1.13
+
+ Super-secondary structure of protein sequence(s).
+
+
+ Protein super-secondary structure
+ true
+
+
+
+
+
+
+
+
+ 1.8
+ 1.13
+
+ Catalytic residues (active site) of an enzyme.
+
+
+ Protein active sites
+ true
+
+
+
+
+
+
+
+
+ 1.8
+ Binding sites in proteins, including cleavage sites (for a proteolytic enzyme or agent), key residues involved in protein folding, catalytic residues (active site) of an enzyme, ligand-binding (non-catalytic) residues of a protein, such as sites that bind metal, prosthetic groups or lipids, RNA and DNA-binding proteins and binding sites etc.
+ Protein_binding_sites
+ Enzyme active site
+ Protein cleavage sites
+ Protein functional sites
+ Protein key folding sites
+ Protein-nucleic acid binding sites
+
+
+ Protein binding sites
+
+
+
+
+
+
+
+
+
+ 1.8
+ 1.13
+
+ RNA and DNA-binding proteins and binding sites in protein sequences.
+
+
+ Protein-nucleic acid binding sites
+ true
+
+
+
+
+
+
+
+
+ 1.8
+ 1.13
+
+ Cleavage sites (for a proteolytic enzyme or agent) in a protein sequence.
+
+
+ Protein cleavage sites
+ true
+
+
+
+
+
+
+
+
+ 1.8
+ 1.13
+
+ Chemical modification of a protein.
+
+
+ Protein chemical modifications
+ true
+
+
+
+
+
+
+
+
+ 1.8
+ Disordered structure in a protein.
+ Protein features (disordered structure)
+ Protein_disordered_structure
+
+
+ Protein disordered structure
+
+
+
+
+
+
+
+
+
+ 1.8
+ 1.13
+
+ Structural domains or 3D folds in a protein or polypeptide chain.
+
+
+ Protein domains
+ true
+
+
+
+
+
+
+
+
+ 1.8
+ 1.13
+
+ Key residues involved in protein folding.
+
+
+ Protein key folding sites
+ true
+
+
+
+
+
+
+
+
+ 1.8
+ 1.13
+
+ Post-translation modifications in a protein sequence, typically describing the specific sites involved.
+
+
+ Protein post-translational modifications
+ true
+
+
+
+
+
+
+
+
+ 1.8
+ Secondary structure (predicted or real) of a protein, including super-secondary structure.
+ Protein features (secondary structure)
+ Protein_secondary_structure
+ Protein super-secondary structure
+
+
+ Super-secondary structures include leucine zippers, coiled coils, Helix-Turn-Helix etc.
+ The location and size of the secondary structure elements and intervening loop regions is typically given. The report can include disulphide bonds and post-translationally formed peptide bonds (crosslinks).
+ Protein secondary structure
+
+
+
+
+
+
+
+
+
+ 1.8
+ 1.13
+
+ Short repetitive subsequences (repeat sequences) in a protein sequence.
+
+
+ Protein sequence repeats
+ true
+
+
+
+
+
+
+
+
+ 1.8
+ 1.13
+
+ Signal peptides or signal peptide cleavage sites in protein sequences.
+
+
+ Protein signal peptides
+ true
+
+
+
+
+
+
+
+
+ 1.10
+ VT 1.1.1 Applied mathematics
+ The application of mathematics to specific problems in science, typically by the formulation and analysis of mathematical models.
+ Applied_mathematics
+
+
+ Applied mathematics
+
+
+
+
+
+
+
+
+
+ 1.10
+ VT 1.1.1 Pure mathematics
+ The study of abstract mathematical concepts.
+ Pure_mathematics
+ Linear algebra
+
+
+ Pure mathematics
+
+
+
+
+
+
+
+
+
+ 1.10
+ The control of data entry and maintenance to ensure the data meets defined standards, qualities or constraints.
+ Data_governance
+ Data stewardship
+
+
+ Data governance
+
+ http://purl.bioontology.org/ontology/MSH/D030541
+
+
+
+
+
+
+
+
+ 1.10
+ The quality, integrity, and cleaning up of data.
+ Data_quality_management
+ Data clean-up
+ Data cleaning
+ Data integrity
+ Data quality
+
+
+ Data quality management
+
+
+
+
+
+
+
+
+
+ 1.10
+ Freshwater science
+ VT 1.5.18 Marine and Freshwater biology
+ The study of organisms in freshwater ecosystems.
+ Freshwater_biology
+
+
+
+ Freshwater biology
+
+
+
+
+
+
+
+
+
+ 1.10
+ true
+ VT 3.1.2 Human genetics
+ The study of inheritance in human beings.
+ Human_genetics
+
+
+
+ Human genetics
+
+
+
+
+
+
+
+
+
+ 1.10
+ VT 3.3.14 Tropical medicine
+ Health problems that are prevalent in tropical and subtropical regions.
+ Tropical_medicine
+
+
+
+ Tropical medicine
+
+
+
+
+
+
+
+
+
+ 1.10
+ true
+ VT 3.3.14 Tropical medicine
+ VT 3.4 Medical biotechnology
+ VT 3.4.1 Biomedical devices
+ VT 3.4.2 Health-related biotechnology
+ Biotechnology applied to the medical sciences and the development of medicines.
+ Medical_biotechnology
+ Pharmaceutical biotechnology
+
+
+
+ Medical biotechnology
+
+
+
+
+
+
+
+
+
+ 1.10
+ true
+ VT 3.4.5 Molecular diagnostics
+ An approach to medicine whereby decisions, practices and are tailored to the individual patient based on their predicted response or risk of disease.
+ Precision medicine
+ Personalised_medicine
+ Molecular diagnostics
+
+
+
+ Personalised medicine
+
+
+
+
+
+
+
+
+
+ 1.12
+ Experimental techniques to purify a protein-DNA crosslinked complex. Usually sequencing follows e.g. in the techniques ChIP-chip, ChIP-seq and MeDIP-seq.
+ Chromatin immunoprecipitation
+ Immunoprecipitation_experiment
+
+
+ Immunoprecipitation experiment
+
+
+
+
+
+
+
+
+
+ 1.12
+ Laboratory technique to sequence the complete DNA sequence of an organism's genome at a single time.
+ Genome sequencing
+ WGS
+ Whole_genome_sequencing
+ De novo genome sequencing
+ Whole genome resequencing
+
+
+ Whole genome sequencing
+
+
+
+
+
+
+
+
+
+ 1.12
+
+ Laboratory technique to sequence the methylated regions in DNA.
+ MeDIP-chip
+ MeDIP-seq
+ mDIP
+ Methylated_DNA_immunoprecipitation
+ BS-Seq
+ Bisulfite sequencing
+ MeDIP
+ Methylated DNA immunoprecipitation (MeDIP)
+ Methylation sequencing
+ WGBS
+ Whole-genome bisulfite sequencing
+ methy-seq
+ methyl-seq
+
+
+ Methylated DNA immunoprecipitation
+
+
+
+
+
+
+
+
+
+ 1.12
+ Laboratory technique to sequence all the protein-coding regions in a genome, i.e., the exome.
+ Exome
+ Exome analysis
+ Exome capture
+ Targeted exome capture
+ WES
+ Whole exome sequencing
+ Exome_sequencing
+
+
+ Exome sequencing is considered a cheap alternative to whole genome sequencing.
+ Exome sequencing
+
+
+
+
+
+
+
+
+
+ 1.12
+
+ true
+ The design of an experiment intended to test a hypothesis, and describe or explain empirical data obtained under various experimental conditions.
+ Design of experiments
+ Experimental design
+ Studies
+ Experimental_design_and_studies
+
+
+ Experimental design and studies
+
+
+
+
+
+
+
+
+
+
+ 1.12
+ The design of an experiment involving non-human animals.
+ Animal_study
+ Challenge study
+
+
+ Animal study
+
+
+
+
+
+
+
+
+
+
+ 1.13
+ true
+ The ecology of microorganisms including their relationship with one another and their environment.
+ Environmental microbiology
+ Microbial_ecology
+ Community analysis
+ Microbiome
+ Molecular community analysis
+
+
+ Microbial ecology
+
+
+
+
+
+
+
+
+
+ 1.17
+ An antibody-based technique used to map in vivo RNA-protein interactions.
+ RIP
+ RNA_immunoprecipitation
+ CLIP
+ CLIP-seq
+ HITS-CLIP
+ PAR-CLIP
+ iCLIP
+
+
+ RNA immunoprecipitation
+
+
+
+
+
+
+
+
+
+ 1.17
+ Large-scale study (typically comparison) of DNA sequences of populations.
+ Population_genomics
+
+
+
+ Population genomics
+
+
+
+
+
+
+
+
+
+ 1.20
+ Agriculture
+ Agroecology
+ Agronomy
+ Multidisciplinary study, research and development within the field of agriculture.
+ Agricultural_science
+ Agricultural biotechnology
+ Agricultural economics
+ Animal breeding
+ Animal husbandry
+ Animal nutrition
+ Farming systems research
+ Food process engineering
+ Food security
+ Horticulture
+ Phytomedicine
+ Plant breeding
+ Plant cultivation
+ Plant nutrition
+ Plant pathology
+ Soil science
+
+
+ Agricultural science
+
+
+
+
+
+
+
+
+
+ 1.20
+ Approach which samples, in parallel, all genes in all organisms present in a given sample, e.g. to provide insight into biodiversity and function.
+ Shotgun metagenomic sequencing
+ Metagenomic_sequencing
+
+
+ Metagenomic sequencing
+
+
+
+
+
+
+
+
+
+ 1.21
+ Environment
+ Study of the environment, the interactions between its physical, chemical, and biological components and it's effect on life. Also how humans impact upon the environment, and how we can manage and utilise natural resources.
+ Environmental_science
+
+
+ Environmental sciences
+
+
+
+
+
+
+
+
+
+ 1.22
+ The study and simulation of molecular conformations using a computational model and computer simulations.
+
+
+ This includes methods such as Molecular Dynamics, Coarse-grained dynamics, metadynamics, Quantum Mechanics, QM/MM, Markov State Models, etc.
+ Biomolecular simulation
+
+
+
+
+
+
+
+
+
+ 1.22
+ The application of multi-disciplinary science and technology for the construction of artificial biological systems for diverse applications.
+ Biomimeic chemistry
+
+
+ Synthetic biology
+
+
+
+
+
+
+
+
+
+
+ 1.22
+ The application of biotechnology to directly manipulate an organism's genes.
+ Genetic manipulation
+ Genetic modification
+ Genetic_engineering
+ Genome editing
+ Genome engineering
+
+
+ Genetic engineering
+
+
+
+
+
+
+
+
+
+ 1.24
+ A field of biological research focused on the discovery and identification of peptides, typically by comparing mass spectra against a protein database.
+ Proteogenomics
+
+
+ Proteogenomics
+
+
+
+
+
+
+
+
+
+ 1.24
+ Amplicon panels
+ Resequencing
+ Laboratory experiment to identify the differences between a specific genome (of an individual) and a reference genome (developed typically from many thousands of individuals). WGS re-sequencing is used as golden standard to detect variations compared to a given reference genome, including small variants (SNP and InDels) as well as larger genome re-organisations (CNVs, translocations, etc.).
+ Highly targeted resequencing
+ Whole genome resequencing (WGR)
+ Whole-genome re-sequencing (WGSR)
+ Amplicon sequencing
+ Amplicon-based sequencing
+ Ultra-deep sequencing
+ Amplicon sequencing is the ultra-deep sequencing of PCR products (amplicons), usually for the purpose of efficient genetic variant identification and characterisation in specific genomic regions.
+ Genome resequencing
+
+
+
+
+
+
+
+
+
+ 1.24
+ A biomedical field that bridges immunology and genetics, to study the genetic basis of the immune system.
+ Immune system genetics
+ Immungenetics
+ Immunology and genetics
+ Immunogenetics
+ Immunogenes
+
+
+ This involves the study of often complex genetic traits underlying diseases involving defects in the immune system. For example, identifying target genes for therapeutic approaches, or genetic variations involved in immunological pathology.
+ Immunogenetics
+
+
+
+
+
+
+
+
+
+ 1.24
+ Interdisciplinary science focused on extracting information from chemical systems by data analytical approaches, for example multivariate statistics, applied mathematics, and computer science.
+ Chemometrics
+
+
+ Chemometrics
+
+
+
+
+
+
+
+
+
+ 1.24
+ Cytometry is the measurement of the characteristics of cells.
+ Cytometry
+ Flow cytometry
+ Image cytometry
+ Mass cytometry
+
+
+ Cytometry
+
+
+
+
+
+
+
+
+
+ 1.24
+ Biotechnology approach that seeks to optimize cellular genetic and regulatory processes in order to increase the cells' production of a certain substance.
+
+
+ Metabolic engineering
+
+
+
+
+
+
+
+
+
+ 1.24
+ Molecular biology methods used to analyze the spatial organization of chromatin in a cell.
+ 3C technologies
+ 3C-based methods
+ Chromosome conformation analysis
+ Chromosome_conformation_capture
+ Chromatin accessibility
+ Chromatin accessibility assay
+ Chromosome conformation capture
+
+
+
+
+
+
+
+
+
+
+ 1.24
+ The study of microbe gene expression within natural environments (i.e. the metatranscriptome).
+ Metatranscriptomics
+
+
+ Metatranscriptomics methods can be used for whole gene expression profiling of complex microbial communities.
+ Metatranscriptomics
+
+
+
+
+
+
+
+
+
+ 1.24
+ The reconstruction and analysis of genomic information in extinct species.
+ Paleogenomics
+ Ancestral genomes
+ Paleogenetics
+ Paleogenomics
+
+
+
+
+
+
+
+
+
+
+ 1.24
+ The biological classification of organisms by categorizing them in groups ("clades") based on their most recent common ancestor.
+ Cladistics
+ Tree of life
+
+
+ Cladistics
+
+
+
+
+
+
+
+
+
+
+
+ 1.24
+ The study of the process and mechanism of change of biomolecules such as DNA, RNA, and proteins across generations.
+ Molecular_evolution
+
+
+ Molecular evolution
+
+
+
+
+
+
+
+
+
+
+ 1.24
+ Immunoinformatics is the field of computational biology that deals with the study of immunoloogical questions. Immunoinformatics is at the interface between immunology and computer science. It takes advantage of computational, statistical, mathematical approaches and enhances the understanding of immunological knowledge.
+ Computational immunology
+ Immunoinformatics
+ This involves the study of often complex genetic traits underlying diseases involving defects in the immune system. For example, identifying target genes for therapeutic approaches, or genetic variations involved in immunological pathology.
+ Immunoinformatics
+
+
+
+
+
+
+
+
+
+ 1.24
+ A diagnostic imaging technique based on the application of ultrasound.
+ Standardized echography
+ Ultrasound imaging
+ Echography
+ Diagnostic sonography
+ Medical ultrasound
+ Standard echography
+ Ultrasonography
+
+
+ Echography
+
+
+
+
+
+
+
+
+
+ 1.24
+ Experimental approaches to determine the rates of metabolic reactions - the metabolic fluxes - within a biological entity.
+ Fluxomics
+ The "fluxome" is the complete set of metabolic fluxes in a cell, and is a dynamic aspect of phenotype.
+ Fluxomics
+
+
+
+
+
+
+
+
+
+ 1.12
+ An experiment for studying protein-protein interactions.
+ Protein_interaction_experiment
+ Co-immunoprecipitation
+ Phage display
+ Yeast one-hybrid
+ Yeast two-hybrid
+
+
+ This used to have the ID http://edamontology.org/topic_3557 but the numerical part (owing to an error) duplicated http://edamontology.org/operation_3557 ('Imputation'). ID of this concept set to http://edamontology.org/topic_3957 in EDAM 1.24.
+ Protein interaction experiment
+
+
+
+
+
+
+
+
+
+ 1.25
+ A DNA structural variation, specifically a duplication or deletion event, resulting in sections of the genome to be repeated, or the number of repeats in the genome to vary between individuals.
+ Copy_number_variation
+ CNV deletion
+ CNV duplication
+ CNV insertion / amplification
+ Complex CNV
+ Copy number variant
+ Copy number variation
+
+
+
+
+
+
+
+
+
+ 1.25
+ The branch of genetics concerned with the relationships between chromosomes and cellular behaviour, especially during mitosis and meiosis.
+
+
+ Cytogenetics
+
+
+
+
+
+
+
+
+
+ 1.25
+ The design of vaccines to protect against a particular pathogen, including antigens, delivery systems, and adjuvants to elicit a predictable immune response against specific epitopes.
+ Vaccinology
+ Rational vaccine design
+ Reverse vaccinology
+ Structural vaccinology
+ Structure-based immunogen design
+ Vaccine design
+
+
+ Vaccinology
+
+
+
+
+
+
+
+
+
+ 1.25
+ The study of immune system as a whole, its regulation and response to pathogens using genome-wide approaches.
+
+
+ Immunomics
+
+
+
+
+
+
+
+
+
+ 1.25
+ Epistasis can be defined as the ability of the genotype at one locus to supersede the phenotypic effect of a mutation at another locus. This interaction between genes can occur at different level: gene expression, protein levels, etc...
+ Epistatic genetic interaction
+ Epistatic interactions
+
+
+ Epistasis
+
+ http://purl.bioontology.org/ontology/MSH/D004843
+
+
+
+
+
+
+
+
+ 1.26
+ Open science encompasses the practices of making scientific research transparent and participatory, and its outputs publicly accessible.
+
+
+ Open science
+
+
+
+
+
+
+
+
+
+ 1.26
+ Data rescue denotes digitalisation, formatting, archival, and publication of data that were not available in accessible or usable form. Examples are data from private archives, data inside publications, or in paper records stored privately or publicly.
+
+
+ Data rescue
+
+
+
+
+
+
+
+
+
+
+ 1.26
+ FAIR data principles
+ FAIRification
+ FAIR data is data that meets the principles of being findable, accessible, interoperable, and reusable.
+ Findable, accessible, interoperable, reusable data
+ Open data
+
+
+ A substantially overlapping term is 'open data', i.e. publicly available data that is free to use, distribute, and create derivative work from, without restrictions. Open data does not automatically have to be FAIR (e.g. findable or interoperable), while FAIR data does in some cases not have to be publicly available without restrictions (especially sensitive personal data).
+ FAIR data
+
+
+
+
+
+
+
+
+
+
+
+ 1.26
+ Microbial mechanisms for protecting microorganisms against antimicrobial agents.
+ AMR
+ Antifungal resistance
+ Antiprotozoal resistance
+ Antiviral resistance
+ Extensive drug resistance (XDR)
+ Multidrug resistance
+ Multiple drug resistance (MDR)
+ Multiresistance
+ Pandrug resistance (PDR)
+ Total drug resistance (TDR)
+
+
+ Antimicrobial Resistance
+
+
+
+
+
+
+
+
+
+
+ 1.26
+ The monitoring method for measuring electrical activity in the brain.
+ EEG
+
+
+ Electroencephalography
+
+
+
+
+
+
+
+
+
+
+ 1.26
+ The monitoring method for measuring electrical activity in the heart.
+ ECG
+ EKG
+
+
+ Electrocardiography
+
+
+
+
+
+
+
+
+
+ 1.26
+ A method for studying biomolecules and other structures at very low (cryogenic) temperature using electron microscopy.
+ cryo-EM
+
+
+ Cryogenic electron microscopy
+
+
+
+
+
+
+
+
+
+ 1.26
+ Biosciences, or life sciences, include fields of study related to life, living beings, and biomolecules.
+ Life sciences
+
+
+ Biosciences
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.26
+ Biogeochemical cycle
+ The carbon cycle is the biogeochemical pathway of carbon moving through the different parts of the Earth (such as ocean, atmosphere, soil), or eventually another planet.
+
+
+ Note that the carbon-nitrogen-oxygen (CNO) cycle (https://en.wikipedia.org/wiki/CNO_cycle) is a completely different, thermonuclear reaction in stars.
+ Carbon cycle
+
+
+
+
+
+
+
+
+
+
+ 1.26
+ Multiomics concerns integration of data from multiple omics (e.g. transcriptomics, proteomics, epigenomics).
+ Integrative omics
+ Multi-omics
+ Pan-omics
+ Panomics
+
+
+ Multiomics
+
+
+
+
+
+
+
+
+
+
+ 1.26
+ With ribosome profiling, ribosome-protected mRNA fragments are analyzed with RNA-seq techniques leading to a genome-wide measurement of the translation landscape.
+ RIBO-seq
+ Ribo-Seq
+ RiboSeq
+ ribo-seq
+ ribosomal footprinting
+ translation footprinting
+
+
+ Ribosome Profiling
+
+
+
+
+
+
+
+
+
+ 1.26
+ Combined with NGS (Next Generation Sequencing) technologies, single-cell sequencing allows the study of genetic information (DNA, RNA, epigenome...) at a single cell level. It is often used for differential analysis and gene expression profiling.
+ Single Cell Genomics
+
+
+ Single-Cell Sequencing
+
+
+
+
+
+
+
+
+
+
+ 1.26
+ The study of mechanical waves in liquids, solids, and gases.
+
+
+ Acoustics
+
+
+
+
+
+
+
+
+
+
+
+ 1.26
+ Interdisplinary study of behavior, precise control, and manipulation of low (microlitre) volume fluids in constrained space.
+ Fluidics
+
+
+ Microfluidics
+
+
+
+
+
+
+
+
+
+ 1.26
+ Genomic imprinting is a gene regulation mechanism by which a subset of genes are expressed from one of the two parental chromosomes only. Imprinted genes are organized in clusters, their silencing/activation of the imprinted loci involves epigenetic marks (DNA methylation, etc) and so-called imprinting control regions (ICR). It has been described in mammals, but also plants and insects.
+ Gene imprinting
+
+
+ Genomic imprinting
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.26
+ Environmental DNA (eDNA)
+ Environmental RNA (eRNA)
+ Environmental sequencing
+ Taxonomic profiling
+ Metabarcoding is the barcoding of (environmental) DNA or RNA to identify multiple taxa from the same sample.
+ DNA metabarcoding
+ Environmental metabarcoding
+ RNA metabarcoding
+ eDNA metabarcoding
+ eRNA metabarcoding
+
+
+ Typically, high-throughput sequencing is performed and the resulting sequence reads are matched to DNA barcodes in a reference database.
+ Metabarcoding
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.2
+
+ An obsolete concept (redefined in EDAM).
+
+ Needed for conversion to the OBO format.
+ Obsolete concept (EDAM)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ OWL format
+
+ A serialisation format conforming to the Web Ontology Language (OWL) model.
+
+
+ RDF/XML
+
+
+ RDF/XML can be used as a standard serialisation syntax for OWL DL, but not for OWL Full.
+ rdf
+
+ Resource Description Framework (RDF) XML format.
+ http://www.ebi.ac.uk/SWO/data/SWO_3000006
+ 1.2
+
+
+
+
+
+
+
diff --git a/edamfu/tests/edamontology.org.robot.owl b/edamfu/tests/edamontology.org.robot.owl
new file mode 100644
index 0000000..397094e
--- /dev/null
+++ b/edamfu/tests/edamontology.org.robot.owl
@@ -0,0 +1,61094 @@
+
+
+
+
+ 4040
+
+ 03.10.2023 11:14 UTC
+ EDAM http://edamontology.org/ "EDAM relations, concept properties, and subsets"
+ EDAM_data http://edamontology.org/data_ "EDAM types of data"
+ EDAM_format http://edamontology.org/format_ "EDAM data formats"
+ EDAM_operation http://edamontology.org/operation_ "EDAM operations"
+ EDAM_topic http://edamontology.org/topic_ "EDAM topics"
+ EDAM is a community project and its development can be followed and contributed to at https://github.com/edamontology/edamontology.
+ EDAM is particularly suitable for semantic annotations and categorisation of diverse resources related to data analysis and management: e.g. tools, workflows, learning materials, or standards. EDAM is also useful in data management itself, for recording provenance metadata of processed data.
+ https://github.com/edamontology/edamontology/graphs/contributors and many more!
+ Hervé Ménager
+ Jon Ison
+ Matúš Kalaš
+ EDAM is a domain ontology of data analysis and data management in bio- and other sciences, and science-based applications. It comprises concepts related to analysis, modelling, optimisation, and data life-cycle. Targetting usability by diverse users, the structure of EDAM is relatively simple, divided into 4 main sections: Topic, Operation, Data (incl. Identifier), and Format.
+ application/rdf+xml
+ EDAM - The ontology of data analysis and management
+
+
+ 1.26_dev
+
+
+
+
+
+
+
+
+
+ Matúš Kalaš
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.13
+ true
+ Publication reference
+ 'Citation' concept property ('citation' metadata tag) contains a dereferenceable URI, preferably including a DOI, pointing to a citeable publication of the given data format.
+ Publication
+
+ Citation
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true
+ Version in which a concept was created.
+
+ Created in
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true
+ A comment explaining why the comment should be or was deprecated, including name of person commenting (jison, mkalas etc.).
+
+ deprecation_comment
+
+
+
+
+
+
+
+ true
+ 'Documentation' trailing modifier (qualifier, 'documentation') of 'xref' links of 'Format' concepts. When 'true', the link is pointing to a page with explanation, description, documentation, or specification of the given data format.
+ Specification
+
+ Documentation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true
+ 'Example' concept property ('example' metadata tag) lists examples of valid values of types of identifiers (accessions). Applicable to some other types of data, too.
+
+ Separated by bar ('|'). For more complex data and data formats, it can be a link to a website with examples, instead.
+ Example
+
+
+
+
+
+
+
+ true
+ 'File extension' concept property ('file_extension' metadata tag) lists examples of usual file extensions of formats.
+
+ N.B.: File extensions that are not correspondigly defined at http://filext.com are recorded in EDAM only if not in conflict with http://filext.com, and/or unique and usual within life-science computing.
+ Separated by bar ('|'), without a dot ('.') prefix, preferably not all capital characters.
+ File extension
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true
+ 'Information standard' trailing modifier (qualifier, 'information_standard') of 'xref' links of 'Format' concepts. When 'true', the link is pointing to an information standard supported by the given data format.
+ Minimum information checklist
+ Minimum information standard
+
+ "Supported by the given data format" here means, that the given format enables representation of data that satisfies the information standard.
+ Information standard
+
+
+
+
+
+
+
+ true
+ When 'true', the concept has been proposed to be deprecated.
+
+ deprecation_candidate
+
+
+
+
+
+
+
+ true
+ When 'true', the concept has been proposed to be refactored.
+
+ refactor_candidate
+
+
+
+
+
+
+
+ true
+ When 'true', the concept has been proposed or is supported within Debian as a tag.
+
+ isdebtag
+
+
+
+
+
+
+
+ true
+ 'Media type' trailing modifier (qualifier, 'media_type') of 'xref' links of 'Format' concepts. When 'true', the link is pointing to a page specifying a media type of the given data format.
+ MIME type
+
+ Media type
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true
+ Whether terms associated with this concept are recommended for use in annotation.
+
+ notRecommendedForAnnotation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true
+ Version in which a concept was made obsolete.
+
+ Obsolete since
+
+
+
+
+
+
+
+ true
+ EDAM concept URI of the erstwhile "parent" of a now deprecated concept.
+
+ Old parent
+
+
+
+
+
+
+
+ true
+ EDAM concept URI of an erstwhile related concept (by has_input, has_output, has_topic, is_format_of, etc.) of a now deprecated concept.
+
+ Old related
+
+
+
+
+
+
+
+ true
+ 'Ontology used' concept property ('ontology_used' metadata tag) of format concepts links to a domain ontology that is used inside the given data format, or contains a note about ontology use within the format.
+
+ Ontology used
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true
+ 'Organisation' trailing modifier (qualifier, 'organisation') of 'xref' links of 'Format' concepts. When 'true', the link is pointing to an organisation that developed, standardised, and maintains the given data format.
+ Organization
+
+ Organisation
+
+
+
+
+
+
+
+ true
+ A comment explaining the proposed refactoring, including name of person commenting (jison, mkalas etc.).
+
+ refactor_comment
+
+
+
+
+
+
+
+ true
+ 'Regular expression' concept property ('regex' metadata tag) specifies the allowed values of types of identifiers (accessions). Applicable to some other types of data, too.
+
+ Regular expression
+
+
+
+
+
+
+
+ 'Related term' concept property ('related_term'; supposedly a synonym modifier in OBO format) states a related term - not necessarily closely semantically related - that users (also non-specialists) may use when searching.
+
+ Related term
+
+
+
+
+
+
+
+
+ true
+ 'Repository' trailing modifier (qualifier, 'repository') of 'xref' links of 'Format' concepts. When 'true', the link is pointing to the public source-code repository where the given data format is developed or maintained.
+ Public repository
+ Source-code repository
+
+ Repository
+
+
+
+
+
+
+
+ true
+ Name of thematic editor (http://biotools.readthedocs.io/en/latest/governance.html#registry-editors) responsible for this concept and its children.
+
+ thematic_editor
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ false
+ false
+ false
+ OBO_REL:is_a
+ 'A has_format B' defines for the subject A, that it has the object B as its data format.
+
+ false
+ Subject A can be any concept or entity outside of an ontology (or an ontology concept in a role of an entity being semantically annotated) that is (or is in a role of) 'Data', or an input, output, input or output argument of an 'Operation'. Object B can either be a concept that is a 'Format', or in unexpected cases an entity outside of an ontology that is a 'Format' or is in the role of a 'Format'. In EDAM, 'has_format' is not explicitly defined between EDAM concepts, only the inverse 'is_format_of'.
+ has format
+
+
+
+
+
+
+
+
+
+
+
+ false
+ false
+ false
+ OBO_REL:is_a
+ 'A has_function B' defines for the subject A, that it has the object B as its function.
+ OBO_REL:bearer_of
+
+ true
+ Subject A can be any concept or entity outside of an ontology (or an ontology concept in a role of an entity being semantically annotated). Object B can either be a concept that is (or is in a role of) a function, or an entity outside of an ontology that is (or is in a role of) a function specification. In the scope of EDAM, 'has_function' serves only for relating annotated entities outside of EDAM with 'Operation' concepts.
+ has function
+
+
+
+
+
+
+
+ OBO_REL:bearer_of
+ Is defined anywhere? Not in the 'unknown' version of RO. 'OBO_REL:bearer_of' is narrower in the sense that it only relates ontological categories (concepts) that are an 'independent_continuant' (snap:IndependentContinuant) with ontological categories that are a 'specifically_dependent_continuant' (snap:SpecificallyDependentContinuant), and broader in the sense that it relates with any borne objects not just functions of the subject.
+
+
+
+
+ true
+ In very unusual cases.
+
+
+
+
+
+
+
+
+
+
+ false
+ false
+ false
+ OBO_REL:is_a
+ 'A has_identifier B' defines for the subject A, that it has the object B as its identifier.
+
+ false
+ Subject A can be any concept or entity outside of an ontology (or an ontology concept in a role of an entity being semantically annotated). Object B can either be a concept that is an 'Identifier', or an entity outside of an ontology that is an 'Identifier' or is in the role of an 'Identifier'. In EDAM, 'has_identifier' is not explicitly defined between EDAM concepts, only the inverse 'is_identifier_of'.
+ has identifier
+
+
+
+
+
+
+
+
+
+
+ false
+ false
+ false
+ OBO_REL:is_a
+ 'A has_input B' defines for the subject A, that it has the object B as a necessary or actual input or input argument.
+ OBO_REL:has_participant
+
+ true
+ Subject A can either be concept that is or has an 'Operation' function, or an entity outside of an ontology (or an ontology concept in a role of an entity being semantically annotated) that has an 'Operation' function or is an 'Operation'. Object B can be any concept or entity. In EDAM, only 'has_input' is explicitly defined between EDAM concepts ('Operation' 'has_input' 'Data'). The inverse, 'is_input_of', is not explicitly defined.
+ has input
+
+
+
+
+
+
+ OBO_REL:has_participant
+ 'OBO_REL:has_participant' is narrower in the sense that it only relates ontological categories (concepts) that are a 'process' (span:Process) with ontological categories that are a 'continuant' (snap:Continuant), and broader in the sense that it relates with any participating objects not just inputs or input arguments of the subject.
+
+
+
+
+ true
+ In very unusual cases.
+
+
+
+
+
+
+
+
+
+
+ false
+ false
+ false
+ OBO_REL:is_a
+ 'A has_output B' defines for the subject A, that it has the object B as a necessary or actual output or output argument.
+ OBO_REL:has_participant
+
+ true
+ Subject A can either be concept that is or has an 'Operation' function, or an entity outside of an ontology (or an ontology concept in a role of an entity being semantically annotated) that has an 'Operation' function or is an 'Operation'. Object B can be any concept or entity. In EDAM, only 'has_output' is explicitly defined between EDAM concepts ('Operation' 'has_output' 'Data'). The inverse, 'is_output_of', is not explicitly defined.
+ has output
+
+
+
+
+
+
+ OBO_REL:has_participant
+ 'OBO_REL:has_participant' is narrower in the sense that it only relates ontological categories (concepts) that are a 'process' (span:Process) with ontological categories that are a 'continuant' (snap:Continuant), and broader in the sense that it relates with any participating objects not just outputs or output arguments of the subject. It is also not clear whether an output (result) actually participates in the process that generates it.
+
+
+
+
+ true
+ In very unusual cases.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ false
+ false
+ false
+ OBO_REL:is_a
+ 'A has_topic B' defines for the subject A, that it has the object B as its topic (A is in the scope of a topic B).
+
+ true
+ Subject A can be any concept or entity outside of an ontology (or an ontology concept in a role of an entity being semantically annotated). Object B can either be a concept that is a 'Topic', or in unexpected cases an entity outside of an ontology that is a 'Topic' or is in the role of a 'Topic'. In EDAM, only 'has_topic' is explicitly defined between EDAM concepts ('Operation' or 'Data' 'has_topic' 'Topic'). The inverse, 'is_topic_of', is not explicitly defined.
+ has topic
+
+
+
+
+
+
+
+
+ true
+ In very unusual cases.
+
+
+
+
+
+
+
+
+
+ false
+ false
+ false
+ OBO_REL:is_a
+ 'A is_format_of B' defines for the subject A, that it is a data format of the object B.
+ OBO_REL:quality_of
+
+ false
+ Subject A can either be a concept that is a 'Format', or in unexpected cases an entity outside of an ontology (or an ontology concept in a role of an entity being semantically annotated) that is a 'Format' or is in the role of a 'Format'. Object B can be any concept or entity outside of an ontology that is (or is in a role of) 'Data', or an input, output, input or output argument of an 'Operation'. In EDAM, only 'is_format_of' is explicitly defined between EDAM concepts ('Format' 'is_format_of' 'Data'). The inverse, 'has_format', is not explicitly defined.
+ is format of
+
+
+
+
+
+ OBO_REL:quality_of
+ Is defined anywhere? Not in the 'unknown' version of RO. 'OBO_REL:quality_of' might be seen narrower in the sense that it only relates subjects that are a 'quality' (snap:Quality) with objects that are an 'independent_continuant' (snap:IndependentContinuant), and is broader in the sense that it relates any qualities of the object.
+
+
+
+
+
+
+
+
+ false
+ false
+ false
+ OBO_REL:is_a
+ 'A is_function_of B' defines for the subject A, that it is a function of the object B.
+ OBO_REL:function_of
+ OBO_REL:inheres_in
+
+ true
+ Subject A can either be concept that is (or is in a role of) a function, or an entity outside of an ontology (or an ontology concept in a role of an entity being semantically annotated) that is (or is in a role of) a function specification. Object B can be any concept or entity. Within EDAM itself, 'is_function_of' is not used.
+ is function of
+
+
+
+
+
+
+ OBO_REL:function_of
+ Is defined anywhere? Not in the 'unknown' version of RO. 'OBO_REL:function_of' only relates subjects that are a 'function' (snap:Function) with objects that are an 'independent_continuant' (snap:IndependentContinuant), so for example no processes. It does not define explicitly that the subject is a function of the object.
+
+
+
+
+ OBO_REL:inheres_in
+ Is defined anywhere? Not in the 'unknown' version of RO. 'OBO_REL:inheres_in' is narrower in the sense that it only relates ontological categories (concepts) that are a 'specifically_dependent_continuant' (snap:SpecificallyDependentContinuant) with ontological categories that are an 'independent_continuant' (snap:IndependentContinuant), and broader in the sense that it relates any borne subjects not just functions.
+
+
+
+
+ true
+ In very unusual cases.
+
+
+
+
+
+
+
+
+
+ false
+ false
+ false
+ OBO_REL:is_a
+ 'A is_identifier_of B' defines for the subject A, that it is an identifier of the object B.
+
+ false
+ Subject A can either be a concept that is an 'Identifier', or an entity outside of an ontology (or an ontology concept in a role of an entity being semantically annotated) that is an 'Identifier' or is in the role of an 'Identifier'. Object B can be any concept or entity outside of an ontology. In EDAM, only 'is_identifier_of' is explicitly defined between EDAM concepts (only 'Identifier' 'is_identifier_of' 'Data'). The inverse, 'has_identifier', is not explicitly defined.
+ is identifier of
+
+
+
+
+
+
+
+
+
+ false
+ false
+ false
+ OBO_REL:is_a
+ 'A is_input_of B' defines for the subject A, that it as a necessary or actual input or input argument of the object B.
+ OBO_REL:participates_in
+
+ true
+ Subject A can be any concept or entity outside of an ontology (or an ontology concept in a role of an entity being semantically annotated). Object B can either be a concept that is or has an 'Operation' function, or an entity outside of an ontology that has an 'Operation' function or is an 'Operation'. In EDAM, 'is_input_of' is not explicitly defined between EDAM concepts, only the inverse 'has_input'.
+ is input of
+
+
+
+
+
+
+ OBO_REL:participates_in
+ 'OBO_REL:participates_in' is narrower in the sense that it only relates ontological categories (concepts) that are a 'continuant' (snap:Continuant) with ontological categories that are a 'process' (span:Process), and broader in the sense that it relates any participating subjects not just inputs or input arguments.
+
+
+
+
+ true
+ In very unusual cases.
+
+
+
+
+
+
+
+
+
+ false
+ false
+ false
+ OBO_REL:is_a
+ 'A is_output_of B' defines for the subject A, that it as a necessary or actual output or output argument of the object B.
+ OBO_REL:participates_in
+
+ true
+ Subject A can be any concept or entity outside of an ontology (or an ontology concept in a role of an entity being semantically annotated). Object B can either be a concept that is or has an 'Operation' function, or an entity outside of an ontology that has an 'Operation' function or is an 'Operation'. In EDAM, 'is_output_of' is not explicitly defined between EDAM concepts, only the inverse 'has_output'.
+ is output of
+
+
+
+
+
+
+ OBO_REL:participates_in
+ 'OBO_REL:participates_in' is narrower in the sense that it only relates ontological categories (concepts) that are a 'continuant' (snap:Continuant) with ontological categories that are a 'process' (span:Process), and broader in the sense that it relates any participating subjects not just outputs or output arguments. It is also not clear whether an output (result) actually participates in the process that generates it.
+
+
+
+
+ true
+ In very unusual cases.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ false
+ false
+ false
+ OBO_REL:is_a
+ 'A is_topic_of B' defines for the subject A, that it is a topic of the object B (a topic A is the scope of B).
+ OBO_REL:quality_of
+
+ true
+ Subject A can either be a concept that is a 'Topic', or in unexpected cases an entity outside of an ontology (or an ontology concept in a role of an entity being semantically annotated) that is a 'Topic' or is in the role of a 'Topic'. Object B can be any concept or entity outside of an ontology. In EDAM, 'is_topic_of' is not explicitly defined between EDAM concepts, only the inverse 'has_topic'.
+ is topic of
+
+
+
+
+
+ OBO_REL:quality_of
+ Is defined anywhere? Not in the 'unknown' version of RO. 'OBO_REL:quality_of' might be seen narrower in the sense that it only relates subjects that are a 'quality' (snap:Quality) with objects that are an 'independent_continuant' (snap:IndependentContinuant), and is broader in the sense that it relates any qualities of the object.
+
+
+
+
+ true
+ In very unusual cases.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A type of computational resource used in bioinformatics.
+
+ Resource type
+ true
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Information, represented in an information artefact (data record) that is 'understandable' by dedicated computational tools that can use the data as input or produce it as output.
+ Data record
+ Data set
+ Datum
+
+
+ Data
+
+
+
+
+
+
+
+
+
+
+
+
+ Data record
+ EDAM does not distinguish a data record (a tool-understandable information artefact) from data or datum (its content, the tool-understandable encoding of an information).
+
+
+
+
+ Data set
+ EDAM does not distinguish the multiplicity of data, such as one data item (datum) versus a collection of data (data set).
+
+
+
+
+ Datum
+ EDAM does not distinguish the multiplicity of data, such as one data item (datum) versus a collection of data (data set).
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+ A bioinformatics package or tool, e.g. a standalone application or web service.
+
+
+ Tool
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+ A digital data archive typically based around a relational model but sometimes using an object-oriented, tree or graph-based model.
+
+
+ Database
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An ontology of biological or bioinformatics concepts and relations, a controlled vocabulary, structured glossary etc.
+
+
+ Ontology
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A directory on disk from which files are read.
+
+ Directory metadata
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Controlled vocabulary from National Library of Medicine. The MeSH thesaurus is used to index articles in biomedical journals for the Medline/PubMED databases.
+
+ MeSH vocabulary
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Controlled vocabulary for gene names (symbols) from HUGO Gene Nomenclature Committee.
+
+ HGNC vocabulary
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Compendium of controlled vocabularies for the biomedical domain (Unified Medical Language System).
+
+ UMLS vocabulary
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ A text token, number or something else which identifies an entity, but which may not be persistent (stable) or unique (the same identifier may identify multiple things).
+ ID
+
+
+
+ Identifier
+
+
+
+
+
+
+
+
+ Almost exact but limited to identifying resources, and being unambiguous.
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An entry (retrievable via URL) from a biological database.
+
+ Database entry
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Mass of a molecule.
+
+
+ Molecular mass
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PDBML:pdbx_formal_charge
+ Net charge of a molecule.
+
+
+ Molecular charge
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A specification of a chemical structure.
+ Chemical structure specification
+
+
+ Chemical formula
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A QSAR quantitative descriptor (name-value pair) of chemical structure.
+
+
+ QSAR descriptors have numeric values that quantify chemical information encoded in a symbolic representation of a molecule. They are used in quantitative structure activity relationship (QSAR) applications. Many subtypes of individual descriptors (not included in EDAM) cover various types of protein properties.
+ QSAR descriptor
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Deprecated because this is bloat / confusing & better handled as an EDAM Format concept - "raw" sequences just imply a particular format (i.e. one with a vanilla string, possible in a particular alphabet, with no metadata).
+ 1.23
+
+ A raw molecular sequence (string of characters) which might include ambiguity, unknown positions and non-sequence characters.
+
+
+ Non-sequence characters may be used for example for gaps and translation stop.
+ Raw sequence
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ SO:2000061
+ A molecular sequence and associated metadata.
+
+
+ Sequence record
+ http://purl.bioontology.org/ontology/MSH/D058977
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A collection of one or typically multiple molecular sequences (which can include derived data or metadata) that do not (typically) correspond to molecular sequence database records or entries and which (typically) are derived from some analytical method.
+ Alignment reference
+ SO:0001260
+
+
+ An example is an alignment reference; one or a set of reference molecular sequences, structures, or profiles used for alignment of genomic, transcriptomic, or proteomic experimental data.
+ This concept may be used for arbitrary sequence sets and associated data arising from processing.
+ Sequence set
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A character used to replace (mask) other characters in a molecular sequence.
+
+ Sequence mask character
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A label (text token) describing the type of sequence masking to perform.
+
+ Sequence masking is where specific characters or positions in a molecular sequence are masked (replaced) with an another (mask character). The mask type indicates what is masked, for example regions that are not of interest or which are information-poor including acidic protein regions, basic protein regions, proline-rich regions, low compositional complexity regions, short-periodicity internal repeats, simple repeats and low complexity regions. Masked sequences are used in database search to eliminate statistically significant but biologically uninteresting hits.
+ Sequence mask type
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.20
+
+
+ The strand of a DNA sequence (forward or reverse).
+
+ The forward or 'top' strand might specify a sequence is to be used as given, the reverse or 'bottom' strand specifying the reverse complement of the sequence is to be used.
+ DNA sense specification
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A specification of sequence length(s).
+
+ Sequence length specification
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Basic or general information concerning molecular sequences.
+
+ This is used for such things as a report including the sequence identifier, type and length.
+ Sequence metadata
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ How the annotation of a sequence feature (for example in EMBL or Swiss-Prot) was derived.
+
+
+ This might be the name and version of a software tool, the name of a database, or 'curated' to indicate a manual annotation (made by a human).
+ Sequence feature source
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A report of sequence hits and associated data from searching a database of sequences (for example a BLAST search). This will typically include a list of scores (often with statistical evaluation) and a set of alignments for the hits.
+ Database hits (sequence)
+ Sequence database hits
+ Sequence database search results
+ Sequence search hits
+
+
+ The score list includes the alignment score, percentage of the query sequence matched, length of the database sequence entry in this alignment, identifier of the database sequence entry, excerpt of the database sequence entry description etc.
+ Sequence search results
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Report on the location of matches ("hits") between sequences, sequence profiles, motifs (conserved or functional patterns) and other types of sequence signatures.
+ Profile-profile alignment
+ Protein secondary database search results
+ Search results (protein secondary database)
+ Sequence motif hits
+ Sequence motif matches
+ Sequence profile alignment
+ Sequence profile hits
+ Sequence profile matches
+ Sequence-profile alignment
+
+
+ A "profile-profile alignment" is an alignment of two sequence profiles, each profile typically representing a sequence alignment.
+ A "sequence-profile alignment" is an alignment of one or more molecular sequence(s) to one or more sequence profile(s) (each profile typically representing a sequence alignment).
+ This includes reports of hits from a search of a protein secondary or domain database. Data associated with the search or alignment might also be included, e.g. ranked list of best-scoring sequences, a graphical representation of scores etc.
+ Sequence signature matches
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Data files used by motif or profile methods.
+
+ Sequence signature model
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data concerning concerning specific or conserved pattern in molecular sequences and the classifiers used for their identification, including sequence motifs, profiles or other diagnostic element.
+
+
+ This can include metadata about a motif or sequence profile such as its name, length, technical details about the profile construction, and so on.
+ Sequence signature data
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Alignment of exact matches between subsequences (words) within two or more molecular sequences.
+
+ Sequence alignment (words)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A dotplot of sequence similarities identified from word-matching or character comparison.
+
+
+ Dotplot
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alignment of multiple molecular sequences.
+ Multiple sequence alignment
+ msa
+
+
+ Sequence alignment
+
+ http://purl.bioontology.org/ontology/MSH/D016415
+ http://semanticscience.org/resource/SIO_010066
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Some simple value controlling a sequence alignment (or similar 'match') operation.
+
+ Sequence alignment parameter
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A value representing molecular sequence similarity.
+
+
+ Sequence similarity score
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Report of general information on a sequence alignment, typically include a description, sequence identifiers and alignment score.
+
+ Sequence alignment metadata
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An informative report of molecular sequence alignment-derived data or metadata.
+ Sequence alignment metadata
+
+
+ Use this for any computer-generated reports on sequence alignments, and for general information (metadata) on a sequence alignment, such as a description, sequence identifiers and alignment score.
+ Sequence alignment report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ "Sequence-profile alignment" and "Profile-profile alignment" are synonymous with "Sequence signature matches" which was already stated as including matches (alignment) and other data.
+ 1.25 or earlier
+
+ A profile-profile alignment (each profile typically representing a sequence alignment).
+
+
+ Profile-profile alignment
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ "Sequence-profile alignment" and "Profile-profile alignment" are synonymous with "Sequence signature matches" which was already stated as including matches (alignment) and other data.
+ 1.24
+
+ Alignment of one or more molecular sequence(s) to one or more sequence profile(s) (each profile typically representing a sequence alignment).
+
+
+ Sequence-profile alignment
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby:phylogenetic_distance_matrix
+ A matrix of estimated evolutionary distance between molecular sequences, such as is suitable for phylogenetic tree calculation.
+ Phylogenetic distance matrix
+
+
+ Methods might perform character compatibility analysis or identify patterns of similarity in an alignment or data matrix.
+ Sequence distance matrix
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Basic character data from which a phylogenetic tree may be generated.
+
+
+ As defined, this concept would also include molecular sequences, microsatellites, polymorphisms (RAPDs, RFLPs, or AFLPs), restriction sites and fragments
+ Phylogenetic character data
+ http://www.evolutionaryontology.org/cdao.owl#Character
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby:Tree
+ Moby:myTree
+ Moby:phylogenetic_tree
+ The raw data (not just an image) from which a phylogenetic tree is directly generated or plotted, such as topology, lengths (in time or in expected amounts of variance) and a confidence interval for each length.
+ Phylogeny
+
+
+ A phylogenetic tree is usually constructed from a set of sequences from which an alignment (or data matrix) is calculated. See also 'Phylogenetic tree image'.
+ Phylogenetic tree
+ http://purl.bioontology.org/ontology/MSH/D010802
+ http://www.evolutionaryontology.org/cdao.owl#Tree
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Matrix of integer or floating point numbers for amino acid or nucleotide sequence comparison.
+ Substitution matrix
+
+
+ The comparison matrix might include matrix name, optional comment, height and width (or size) of matrix, an index row/column (of characters) and data rows/columns (of integers or floats).
+ Comparison matrix
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+ Predicted or actual protein topology represented as a string of protein secondary structure elements.
+
+
+ The location and size of the secondary structure elements and intervening loop regions is usually indicated.
+ Protein topology
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Secondary structure (predicted or real) of a protein.
+
+
+ Protein features report (secondary structure)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Super-secondary structure of protein sequence(s).
+
+
+ Super-secondary structures include leucine zippers, coiled coils, Helix-Turn-Helix etc.
+ Protein features report (super-secondary)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Alignment of the (1D representations of) secondary structure of two or more proteins.
+ Secondary structure alignment (protein)
+
+
+ Protein secondary structure alignment
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An informative report on protein secondary structure alignment-derived data or metadata.
+
+ Secondary structure alignment metadata (protein)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby:RNAStructML
+ An informative report of secondary structure (predicted or real) of an RNA molecule.
+ Secondary structure (RNA)
+
+
+ This includes thermodynamically stable or evolutionarily conserved structures such as knots, pseudoknots etc.
+ RNA secondary structure
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Moby:RNAStructAlignmentML
+ Alignment of the (1D representations of) secondary structure of two or more RNA molecules.
+ Secondary structure alignment (RNA)
+
+
+ RNA secondary structure alignment
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An informative report of RNA secondary structure alignment-derived data or metadata.
+
+ Secondary structure alignment metadata (RNA)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 3D coordinate and associated data for a macromolecular tertiary (3D) structure or part of a structure.
+ Coordinate model
+ Structure data
+
+
+ The coordinate data may be predicted or real.
+ Structure
+ http://purl.bioontology.org/ontology/MSH/D015394
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An entry from a molecular tertiary (3D) structure database.
+
+ Tertiary structure record
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+
+ Results (hits) from searching a database of tertiary structure.
+
+ Structure database search results
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alignment (superimposition) of molecular tertiary (3D) structures.
+
+
+ A tertiary structure alignment will include the untransformed coordinates of one macromolecule, followed by the second (or subsequent) structure(s) with all the coordinates transformed (by rotation / translation) to give a superposition.
+ Structure alignment
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An informative report of molecular tertiary structure alignment-derived data.
+
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Structure alignment report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A value representing molecular structure similarity, measured from structure alignment or some other type of structure comparison.
+
+
+ Structure similarity score
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Some type of structural (3D) profile or template (representing a structure or structure alignment).
+ 3D profile
+ Structural (3D) profile
+
+
+ Structural profile
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A 3D profile-3D profile alignment (each profile representing structures or a structure alignment).
+ Structural profile alignment
+
+
+ Structural (3D) profile alignment
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ An alignment of a sequence to a 3D profile (representing structures or a structure alignment).
+
+ Sequence-3D profile alignment
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Matrix of values used for scoring sequence-structure compatibility.
+
+
+ Protein sequence-structure scoring matrix
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An alignment of molecular sequence to structure (from threading sequence(s) through 3D structure or representation of structure(s)).
+
+
+ Sequence-structure alignment
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ An informative report about a specific amino acid.
+
+ Amino acid annotation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ An informative report about a specific peptide.
+
+ Peptide annotation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An informative human-readable report about one or more specific protein molecules or protein structural domains, derived from analysis of primary (sequence or structural) data.
+ Gene product annotation
+
+
+ Protein report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A report of primarily non-positional data describing intrinsic physical, chemical or other properties of a protein molecule or model.
+ Protein physicochemical property
+ Protein properties
+ Protein sequence statistics
+
+
+ This is a broad data type and is used a placeholder for other, more specific types. Data may be based on analysis of nucleic acid sequence or structural data, for example reports on the surface properties (shape, hydropathy, electrostatic patches etc) of a protein structure, protein flexibility or motion, and protein architecture (spatial arrangement of secondary structure).
+ Protein property
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ 3D structural motifs in a protein.
+
+ Protein structural motifs and surfaces
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Data concerning the classification of the sequences and/or structures of protein structural domain(s).
+
+ Protein domain classification
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ structural domains or 3D folds in a protein or polypeptide chain.
+
+
+ Protein features report (domains)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ An informative report on architecture (spatial arrangement of secondary structure) of a protein structure.
+
+ Protein architecture report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ A report on an analysis or model of protein folding properties, folding pathways, residues or sites that are key to protein folding, nucleation or stabilisation centers etc.
+
+
+ Protein folding report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Data on the effect of (typically point) mutation on protein folding, stability, structure and function.
+
+ This is a broad data type and is used a placeholder for other, more specific types. It is primarily intended to help navigation of EDAM and would not typically be used for annotation.
+ Protein features (mutation)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Protein-protein interaction data from for example yeast two-hybrid analysis, protein microarrays, immunoaffinity chromatography followed by mass spectrometry, phage display etc.
+
+
+ This is a broad data type and is used a placeholder for other, more specific types. It is primarily intended to help navigation of EDAM and would not typically be used for annotation.
+ Protein interaction raw data
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data concerning the interactions (predicted or known) within or between a protein, structural domain or part of a protein. This includes intra- and inter-residue contacts and distances, as well as interactions with other proteins and non-protein entities such as nucleic acid, metal atoms, water, ions etc.
+ Protein interaction record
+ Protein interaction report
+ Protein report (interaction)
+ Protein-protein interaction data
+ Atom interaction data
+ Protein non-covalent interactions report
+ Residue interaction data
+
+
+ Protein interaction data
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Protein classification data
+ An informative report on a specific protein family or other classification or group of protein sequences or structures.
+ Protein family annotation
+
+
+ Protein family report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The maximum initial velocity or rate of a reaction. It is the limiting velocity as substrate concentrations get very large.
+
+
+ Vmax
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Km is the concentration (usually in Molar units) of substrate that leads to half-maximal velocity of an enzyme-catalysed reaction.
+
+
+ Km
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ An informative report about a specific nucleotide base.
+
+ Nucleotide base annotation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A report of primarily non-positional data describing intrinsic physical, chemical or other properties of a nucleic acid molecule.
+ Nucleic acid physicochemical property
+ GC-content
+ Nucleic acid property (structural)
+ Nucleic acid structural property
+
+
+ Nucleic acid structural properties stiffness, curvature, twist/roll data or other conformational parameters or properties.
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Nucleic acid property
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data derived from analysis of codon usage (typically a codon usage table) of DNA sequences.
+ Codon usage report
+
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Codon usage data
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby:GeneInfo
+ Moby:gene
+ Moby_namespace:Human_Readable_Description
+ A report on predicted or actual gene structure, regions which make an RNA product and features such as promoters, coding regions, splice sites etc.
+ Gene and transcript structure (report)
+ Gene annotation
+ Gene features report
+ Gene function (report)
+ Gene structure (repot)
+ Nucleic acid features (gene and transcript structure)
+
+
+ This includes any report on a particular locus or gene. This might include the gene name, description, summary and so on. It can include details about the function of a gene, such as its encoded protein or a functional classification of the gene sequence along according to the encoded protein(s).
+ Gene report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A report on the classification of nucleic acid / gene sequences according to the functional classification of their gene products.
+
+ Gene classification
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ stable, naturally occurring mutations in a nucleotide sequence including alleles, naturally occurring mutations such as single base nucleotide substitutions, deletions and insertions, RFLPs and other polymorphisms.
+
+
+ DNA variation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A human-readable collection of information about a specific chromosome.
+
+
+ This includes basic information. e.g. chromosome number, length, karyotype features, chromosome sequence etc.
+ Chromosome report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A human-readable collection of information about the set of genes (or allelic forms) present in an individual, organism or cell and associated with a specific physical characteristic, or a report concerning an organisms traits and phenotypes.
+ Genotype/phenotype annotation
+
+
+ Genotype/phenotype report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ PCR experiments, e.g. quantitative real-time PCR.
+
+
+ PCR experiment report
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Fluorescence trace data generated by an automated DNA sequencer, which can be interpreted as a molecular sequence (reads), given associated sequencing metadata such as base-call quality scores.
+
+
+ This is the raw data produced by a DNA sequencing machine.
+ Sequence trace
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An assembly of fragments of a (typically genomic) DNA sequence.
+ Contigs
+ SO:0000353
+ SO:0001248
+
+
+ Typically, an assembly is a collection of contigs (for example ESTs and genomic DNA fragments) that are ordered, aligned and merged. Annotation of the assembled sequence might be included.
+ Sequence assembly
+
+
+
+
+
+ SO:0001248
+ Perhaps surprisingly, the definition of 'SO:assembly' is narrower than the 'SO:sequence_assembly'.
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Radiation hybrid scores (RH) scores for one or more markers.
+ Radiation Hybrid (RH) scores
+
+
+ Radiation Hybrid (RH) scores are used in Radiation Hybrid mapping.
+ RH scores
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A human-readable collection of information about the linkage of alleles.
+ Gene annotation (linkage)
+ Linkage disequilibrium (report)
+
+
+ This includes linkage disequilibrium; the non-random association of alleles or polymorphisms at two or more loci (not necessarily on the same chromosome).
+ Genetic linkage report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data quantifying the level of expression of (typically) multiple genes, derived for example from microarray experiments.
+ Gene expression pattern
+
+
+ Gene expression profile
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ microarray experiments including conditions, protocol, sample:data relationships etc.
+
+
+ Microarray experiment report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Data on oligonucleotide probes (typically for use with DNA microarrays).
+
+ Oligonucleotide probe data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Output from a serial analysis of gene expression (SAGE) experiment.
+
+ SAGE experimental data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Massively parallel signature sequencing (MPSS) data.
+
+ MPSS experimental data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Sequencing by synthesis (SBS) data.
+
+ SBS experimental data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.14
+
+ Tag to gene assignments (tag mapping) of SAGE, MPSS and SBS data. Typically this is the sequencing-based expression profile annotated with gene identifiers.
+
+
+ Sequence tag profile (with gene assignment)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Protein X-ray crystallographic data
+ X-ray crystallography data.
+
+
+ Electron density map
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Nuclear magnetic resonance (NMR) raw data, typically for a protein.
+ Protein NMR data
+
+
+ Raw NMR data
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Protein secondary structure from protein coordinate or circular dichroism (CD) spectroscopic data.
+ CD spectrum
+ Protein circular dichroism (CD) spectroscopic data
+
+
+ CD spectra
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Volume map data from electron microscopy.
+ 3D volume map
+ EM volume map
+ Electron microscopy volume map
+
+
+ Volume map
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.19
+
+ Annotation on a structural 3D model (volume map) from electron microscopy.
+
+
+ Electron microscopy model
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Two-dimensional gel electrophoresis image.
+
+
+ 2D PAGE image
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Spectra from mass spectrometry.
+ Mass spectrometry spectra
+
+
+ Mass spectrum
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A set of peptide masses (peptide mass fingerprint) from mass spectrometry.
+ Peak list
+ Protein fingerprint
+ Molecular weights standard fingerprint
+
+
+ A molecular weight standard fingerprint is standard protonated molecular masses e.g. from trypsin (modified porcine trypsin, Promega) and keratin peptides.
+ Peptide mass fingerprint
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Protein or peptide identifications with evidence supporting the identifications, for example from comparing a peptide mass fingerprint (from mass spectrometry) to a sequence database, or the set of typical spectra one obtains when running a protein through a mass spectrometer.
+ 'Protein identification'
+ Peptide spectrum match
+
+
+ Peptide identification
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An informative report about a specific biological pathway or network, typically including a map (diagram) of the pathway.
+
+ Pathway or network annotation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A map (typically a diagram) of a biological pathway.
+
+ Biological pathway map
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A definition of a data resource serving one or more types of data, including metadata and links to the resource or data proper.
+
+ Data resource definition
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Basic information, annotation or documentation concerning a workflow (but not the workflow itself).
+
+
+ Workflow metadata
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A biological model represented in mathematical terms.
+ Biological model
+
+
+ Mathematical model
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A value representing estimated statistical significance of some observed data; typically sequence database hits.
+
+
+ Statistical estimate score
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Resource definition for an EMBOSS database.
+
+ EMBOSS database resource definition
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Information on a version of software or data, for example name, version number and release date.
+
+ Development status / maturity may be part of the version information, for example in case of tools, standards, or some data records.
+ Version information
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A mapping of the accession numbers (or other database identifier) of entries between (typically) two biological or biomedical databases.
+
+
+ The cross-mapping is typically a table where each row is an accession number and each column is a database being cross-referenced. The cells give the accession number or identifier of the corresponding entry in a database. If a cell in the table is not filled then no mapping could be found for the database. Additional information might be given on version, date etc.
+ Database cross-mapping
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An index of data of biological relevance.
+
+
+ Data index
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A human-readable collection of information concerning an analysis of an index of biological data.
+ Database index annotation
+
+
+ Data index report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Basic information on bioinformatics database(s) or other data sources such as name, type, description, URL etc.
+
+
+ Database metadata
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Basic information about one or more bioinformatics applications or packages, such as name, type, description, or other documentation.
+
+
+ Tool metadata
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Textual metadata on a submitted or completed job.
+
+ Job metadata
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Textual metadata on a software author or end-user, for example a person or other software.
+
+
+ User metadata
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A human-readable collection of information about a specific chemical compound.
+ Chemical compound annotation
+ Chemical structure report
+ Small molecule annotation
+
+
+ Small molecule report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A human-readable collection of information about a particular strain of organism cell line including plants, virus, fungi and bacteria. The data typically includes strain number, organism type, growth conditions, source and so on.
+ Cell line annotation
+ Organism strain data
+
+
+ Cell line report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ An informative report about a specific scent.
+
+ Scent annotation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A term (name) from an ontology.
+ Ontology class name
+ Ontology terms
+
+
+ Ontology term
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data concerning or derived from a concept from a biological ontology.
+ Ontology class metadata
+ Ontology term metadata
+
+
+ Ontology concept data
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby:BooleanQueryString
+ Moby:Global_Keyword
+ Moby:QueryString
+ Moby:Wildcard_Query
+ Keyword(s) or phrase(s) used (typically) for text-searching purposes.
+ Phrases
+ Term
+
+
+ Boolean operators (AND, OR and NOT) and wildcard characters may be allowed.
+ Keyword
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby:GCP_SimpleCitation
+ Moby:Publication
+ Bibliographic data that uniquely identifies a scientific article, book or other published material.
+ Bibliographic reference
+ Reference
+
+
+ A bibliographic reference might include information such as authors, title, journal name, date and (possibly) a link to the abstract or full-text of the article if available.
+ Citation
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A scientific text, typically a full text article from a scientific journal.
+ Article text
+ Scientific article
+
+
+ Article
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A human-readable collection of information resulting from text mining.
+ Text mining output
+
+
+ A text mining abstract will typically include an annotated a list of words or sentences extracted from one or more scientific articles.
+ Text mining report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An identifier of a biological entity or phenomenon.
+
+ Entity identifier
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An identifier of a data resource.
+
+ Data resource identifier
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An identifier that identifies a particular type of data.
+ Identifier (typed)
+
+
+
+ This concept exists only to assist EDAM maintenance and navigation in graphical browsers. It does not add semantic information. This branch provides an alternative organisation of the concepts nested under 'Accession' and 'Name'. All concepts under here are already included under 'Accession' or 'Name'.
+ Identifier (by type of entity)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An identifier of a bioinformatics tool, e.g. an application or web service.
+
+
+
+ Tool identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Name or other identifier of a discrete entity (any biological thing with a distinct, discrete physical existence).
+
+ Discrete entity identifier
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Name or other identifier of an entity feature (a physical part or region of a discrete biological entity, or a feature that can be mapped to such a thing).
+
+ Entity feature identifier
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Name or other identifier of a collection of discrete biological entities.
+
+ Entity collection identifier
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Name or other identifier of a physical, observable biological occurrence or event.
+
+ Phenomenon identifier
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Name or other identifier of a molecule.
+
+
+
+ Molecule identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier (e.g. character symbol) of a specific atom.
+ Atom identifier
+
+
+
+ Atom ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Name of a specific molecule.
+
+
+
+ Molecule name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A label (text token) describing the type a molecule.
+
+ For example, 'Protein', 'DNA', 'RNA' etc.
+ Molecule type
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Unique identifier of a chemical compound.
+
+ Chemical identifier
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Name of a chromosome.
+
+
+
+ Chromosome name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a peptide chain.
+
+
+
+ Peptide identifier
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a protein.
+
+
+
+ Protein identifier
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique name of a chemical compound.
+ Chemical name
+
+
+
+ Compound name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique registry number of a chemical compound.
+
+
+
+ Chemical registry number
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Code word for a ligand, for example from a PDB file.
+
+ Ligand identifier
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a drug.
+
+
+
+ Drug identifier
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of an amino acid.
+ Residue identifier
+
+
+
+ Amino acid identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Name or other identifier of a nucleotide.
+
+
+
+ Nucleotide identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a monosaccharide.
+
+
+
+ Monosaccharide identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique name from Chemical Entities of Biological Interest (ChEBI) of a chemical compound.
+ ChEBI chemical name
+
+
+
+ This is the recommended chemical name for use for example in database annotation.
+ Chemical name (ChEBI)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ IUPAC recommended name of a chemical compound.
+ IUPAC chemical name
+
+
+
+ Chemical name (IUPAC)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ International Non-proprietary Name (INN or 'generic name') of a chemical compound, assigned by the World Health Organisation (WHO).
+ INN chemical name
+
+
+
+ Chemical name (INN)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Brand name of a chemical compound.
+ Brand chemical name
+
+
+
+ Chemical name (brand)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Synonymous name of a chemical compound.
+ Synonymous chemical name
+
+
+
+ Chemical name (synonymous)
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ CAS registry number of a chemical compound; a unique numerical identifier of chemicals in the scientific literature, as assigned by the Chemical Abstracts Service.
+ CAS chemical registry number
+ Chemical registry number (CAS)
+
+
+
+ CAS number
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Beilstein registry number of a chemical compound.
+ Beilstein chemical registry number
+
+
+
+ Chemical registry number (Beilstein)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Gmelin registry number of a chemical compound.
+ Gmelin chemical registry number
+
+
+
+ Chemical registry number (Gmelin)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 3-letter code word for a ligand (HET group) from a PDB file, for example ATP.
+ Component identifier code
+ Short ligand name
+
+
+
+ HET group name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ String of one or more ASCII characters representing an amino acid.
+
+
+
+ Amino acid name
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ String of one or more ASCII characters representing a nucleotide.
+
+
+
+ Nucleotide code
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PDBML:pdbx_PDB_strand_id
+ WHATIF: chain
+ Identifier of a polypeptide chain from a protein.
+ Chain identifier
+ PDB chain identifier
+ PDB strand id
+ Polypeptide chain identifier
+ Protein chain identifier
+
+
+
+ This is typically a character (for the chain) appended to a PDB identifier, e.g. 1cukA
+ Polypeptide chain ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Name of a protein.
+
+
+
+ Protein name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Name or other identifier of an enzyme or record from a database of enzymes.
+
+
+
+ Enzyme identifier
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]+\.-\.-\.-|[0-9]+\.[0-9]+\.-\.-|[0-9]+\.[0-9]+\.[0-9]+\.-|[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+
+ Moby:Annotated_EC_Number
+ Moby:EC_Number
+ An Enzyme Commission (EC) number of an enzyme.
+ EC
+ EC code
+ Enzyme Commission number
+
+
+
+ EC number
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Name of an enzyme.
+
+
+
+ Enzyme name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Name of a restriction enzyme.
+
+
+
+ Restriction enzyme name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A specification (partial or complete) of one or more positions or regions of a molecular sequence or map.
+
+ Sequence position specification
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A unique identifier of molecular sequence feature, for example an ID of a feature that is unique within the scope of the GFF file.
+
+
+
+ Sequence feature ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PDBML:_atom_site.id
+ WHATIF: PDBx_atom_site
+ WHATIF: number
+ A position of one or more points (base or residue) in a sequence, or part of such a specification.
+ SO:0000735
+
+
+ Sequence position
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Specification of range(s) of sequence positions.
+
+
+ Sequence range
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Name or other identifier of an nucleic acid feature.
+
+ Nucleic acid feature identifier
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Name or other identifier of a protein feature.
+
+ Protein feature identifier
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The type of a sequence feature, typically a term or accession from the Sequence Ontology, for example an EMBL or Swiss-Prot sequence feature key.
+ Sequence feature method
+ Sequence feature type
+
+
+ A feature key indicates the biological nature of the feature or information about changes to or versions of the sequence.
+ Sequence feature key
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Typically one of the EMBL or Swiss-Prot feature qualifiers.
+
+
+ Feature qualifiers hold information about a feature beyond that provided by the feature key and location.
+ Sequence feature qualifier
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A name of a sequence feature, e.g. the name of a feature to be displayed to an end-user. Typically an EMBL or Swiss-Prot feature label.
+ Sequence feature name
+
+
+ A feature label identifies a feature of a sequence database entry. When used with the database name and the entry's primary accession number, it is a unique identifier of that feature.
+ Sequence feature label
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a sequence feature-containing entity adhering to the standard feature naming scheme used by all EMBOSS applications.
+ UFO
+
+
+ EMBOSS Uniform Feature Object
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ String of one or more ASCII characters representing a codon.
+
+ Codon name
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Moby:GeneAccessionList
+ An identifier of a gene, such as a name/symbol or a unique identifier of a gene in a database.
+
+
+
+ Gene identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby_namespace:Global_GeneCommonName
+ Moby_namespace:Global_GeneSymbol
+ The short name of a gene; a single word that does not contain white space characters. It is typically derived from the gene name.
+
+
+
+ Gene symbol
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ http://www.geneontology.org/doc/GO.xrf_abbs:LocusID
+ http://www.geneontology.org/doc/GO.xrf_abbs:NCBI_Gene
+ An NCBI unique identifier of a gene.
+ Entrez gene ID
+ Gene identifier (Entrez)
+ Gene identifier (NCBI)
+ NCBI gene ID
+ NCBI geneid
+
+
+
+ Gene ID (NCBI)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An NCBI RefSeq unique identifier of a gene.
+
+ Gene identifier (NCBI RefSeq)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An NCBI UniGene unique identifier of a gene.
+
+ Gene identifier (NCBI UniGene)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An Entrez unique identifier of a gene.
+
+ Gene identifier (Entrez)
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a gene or feature from the CGD database.
+ CGD ID
+
+
+
+ Gene ID (CGD)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a gene from DictyBase.
+
+
+
+ Gene ID (DictyBase)
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier for a gene (or other feature) from the Ensembl database.
+ Gene ID (Ensembl)
+
+
+
+ Ensembl gene ID
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ S[0-9]+
+ Identifier of an entry from the SGD database.
+ SGD identifier
+
+
+
+ Gene ID (SGD)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [a-zA-Z_0-9\.-]*
+ Moby_namespace:GeneDB
+ Identifier of a gene from the GeneDB database.
+ GeneDB identifier
+
+
+
+ Gene ID (GeneDB)
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the TIGR database.
+
+
+
+ TIGR identifier
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Gene:[0-9]{7}
+ Identifier of an gene from the TAIR database.
+
+
+
+ TAIR accession (gene)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a protein structural domain.
+
+
+
+ This is typically a character or string concatenated with a PDB identifier and a chain identifier.
+ Protein domain ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a protein domain (or other node) from the SCOP database.
+
+
+
+ SCOP domain identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1nr3A00
+ Identifier of a protein domain from CATH.
+ CATH domain identifier
+
+
+
+ CATH domain ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A SCOP concise classification string (sccs) is a compact representation of a SCOP domain classification.
+
+
+
+ An scss includes the class (alphabetical), fold, superfamily and family (all numerical) to which a given domain belongs.
+ SCOP concise classification string (sccs)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 33229
+ Unique identifier (number) of an entry in the SCOP hierarchy, for example 33229.
+ SCOP unique identifier
+ sunid
+
+
+
+ A sunid uniquely identifies an entry in the SCOP hierarchy, including leaves (the SCOP domains) and higher level nodes including entries corresponding to the protein level.
+ SCOP sunid
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 3.30.1190.10.1.1.1.1.1
+ A code number identifying a node from the CATH database.
+ CATH code
+ CATH node identifier
+
+
+
+ CATH node ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a biological kingdom (Bacteria, Archaea, or Eukaryotes).
+
+
+
+ Kingdom name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a species (typically a taxonomic group) of organism.
+ Organism species
+
+
+
+ Species name
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a strain of an organism variant, typically a plant, virus or bacterium.
+
+
+
+ Strain name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ A string of characters that name or otherwise identify a resource on the Internet.
+ URIs
+
+
+ URI
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An identifier of a biological or bioinformatics database.
+ Database identifier
+
+
+
+ Database ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a directory.
+
+
+
+ Directory name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name (or part of a name) of a file (of any type).
+
+
+
+ File name
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Name of an ontology of biological or bioinformatics concepts and relations.
+
+
+
+ Ontology name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby:Link
+ Moby:URL
+ A Uniform Resource Locator (URL).
+
+
+ URL
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A Uniform Resource Name (URN).
+
+
+ URN
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A Life Science Identifier (LSID) - a unique identifier of some data.
+ Life Science Identifier
+
+
+ LSIDs provide a standard way to locate and describe data. An LSID is represented as a Uniform Resource Name (URN) with the following format: URN:LSID:<Authority>:<Namespace>:<ObjectID>[:<Version>]
+ LSID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a biological or bioinformatics database.
+
+
+
+ Database name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ The name of a molecular sequence database.
+
+ Sequence database name
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a file (of any type) with restricted possible values.
+
+
+
+ Enumerated file name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The extension of a file name.
+
+
+
+ A file extension is the characters appearing after the final '.' in the file name.
+ File name extension
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The base name of a file.
+
+
+
+ A file base name is the file name stripped of its directory specification and extension.
+ File base name
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Name of a QSAR descriptor.
+
+
+
+ QSAR descriptor name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An identifier of an entry from a database where the same type of identifier is used for objects (data) of different semantic type.
+
+ This concept is required for completeness. It should never have child concepts.
+ Database entry identifier
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An identifier of molecular sequence(s) or entries from a molecular sequence database.
+
+
+
+ Sequence identifier
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An identifier of a set of molecular sequence(s).
+
+
+
+ Sequence set ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+
+ Identifier of a sequence signature (motif or profile) for example from a database of sequence patterns.
+
+ Sequence signature identifier
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a molecular sequence alignment, for example a record from an alignment database.
+
+
+
+ Sequence alignment ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of a phylogenetic distance matrix.
+
+ Phylogenetic distance matrix identifier
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a phylogenetic tree for example from a phylogenetic tree database.
+
+
+
+ Phylogenetic tree ID
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An identifier of a comparison matrix.
+ Substitution matrix identifier
+
+
+
+ Comparison matrix identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ A unique and persistent identifier of a molecular tertiary structure, typically an entry from a structure database.
+
+
+
+ Structure ID
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier or name of a structural (3D) profile or template (representing a structure or structure alignment).
+ Structural profile identifier
+
+
+
+ Structural (3D) profile ID
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of an entry from a database of tertiary structure alignments.
+
+
+
+ Structure alignment ID
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of an index of amino acid physicochemical and biochemical property data.
+
+
+
+ Amino acid index ID
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Molecular interaction ID
+ Identifier of a report of protein interactions from a protein interaction database (typically).
+
+
+
+ Protein interaction ID
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a protein family.
+ Protein secondary database record identifier
+
+
+
+ Protein family identifier
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique name of a codon usage table.
+
+
+
+ Codon usage table name
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a transcription factor (or a TF binding site).
+
+
+
+ Transcription factor identifier
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of an entry from a database of microarray data.
+
+
+
+ Experiment annotation ID
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of an entry from a database of electron microscopy data.
+
+
+
+ Electron microscopy model ID
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Accession of a report of gene expression (e.g. a gene expression profile) from a database.
+ Gene expression profile identifier
+
+
+
+ Gene expression report ID
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of an entry from a database of genotypes and phenotypes.
+
+
+
+ Genotype and phenotype annotation ID
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of an entry from a database of biological pathways or networks.
+
+
+
+ Pathway or network identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a biological or biomedical workflow, typically from a database of workflows.
+
+
+
+ Workflow ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a data type definition from some provider.
+ Data resource definition identifier
+
+
+
+ Data resource definition ID
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a mathematical model, typically an entry from a database.
+ Biological model identifier
+
+
+
+ Biological model ID
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of an entry from a database of chemicals.
+ Chemical compound identifier
+ Compound ID
+ Small molecule identifier
+
+
+
+ Compound identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A unique (typically numerical) identifier of a concept in an ontology of biological or bioinformatics concepts and relations.
+
+
+
+ Ontology concept ID
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Unique identifier of a scientific article.
+ Article identifier
+
+
+
+ Article ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ FB[a-zA-Z_0-9]{2}[0-9]{7}
+ Identifier of an object from the FlyBase database.
+
+
+
+ FlyBase ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Name of an object from the WormBase database, usually a human-readable name.
+
+
+
+ WormBase name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Class of an object from the WormBase database.
+
+
+
+ A WormBase class describes the type of object such as 'sequence' or 'protein'.
+ WormBase class
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ A persistent, unique identifier of a molecular sequence database entry.
+ Sequence accession number
+
+
+
+ Sequence accession
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A label (text token) describing a type of molecular sequence.
+
+ Sequence type might reflect the molecule (protein, nucleic acid etc) or the sequence itself (gapped, ambiguous etc).
+ Sequence type
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a sequence-based entity adhering to the standard sequence naming scheme used by all EMBOSS applications.
+ EMBOSS USA
+
+
+
+ EMBOSS Uniform Sequence Address
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Accession number of a protein sequence database entry.
+ Protein sequence accession number
+
+
+
+ Sequence accession (protein)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Accession number of a nucleotide sequence database entry.
+ Nucleotide sequence accession number
+
+
+
+ Sequence accession (nucleic acid)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ (NC|AC|NG|NT|NW|NZ|NM|NR|XM|XR|NP|AP|XP|YP|ZP)_[0-9]+
+ Accession number of a RefSeq database entry.
+ RefSeq ID
+
+
+
+ RefSeq accession
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.0
+
+
+ Accession number of a UniProt (protein sequence) database entry. May contain version or isoform number.
+
+ UniProt accession (extended)
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An identifier of PIR sequence database entry.
+ PIR ID
+ PIR accession number
+
+
+
+ PIR identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.2
+
+ Identifier of a TREMBL sequence database entry.
+
+
+ TREMBL accession
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Primary identifier of a Gramene database entry.
+ Gramene primary ID
+
+
+
+ Gramene primary identifier
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a (nucleic acid) entry from the EMBL/GenBank/DDBJ databases.
+
+
+
+ EMBL/GenBank/DDBJ ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A unique identifier of an entry (gene cluster) from the NCBI UniGene database.
+ UniGene ID
+ UniGene cluster ID
+ UniGene identifier
+
+
+
+ Sequence cluster ID (UniGene)
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a dbEST database entry.
+ dbEST ID
+
+
+
+ dbEST accession
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a dbSNP database entry.
+ dbSNP identifier
+
+
+
+ dbSNP ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The EMBOSS type of a molecular sequence.
+
+ See the EMBOSS documentation (http://emboss.sourceforge.net/) for a definition of what this includes.
+ EMBOSS sequence type
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ List of EMBOSS Uniform Sequence Addresses (EMBOSS listfile).
+
+ EMBOSS listfile
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An identifier of a cluster of molecular sequence(s).
+
+
+
+ Sequence cluster ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier of an entry from the COG database.
+ COG ID
+
+
+
+ Sequence cluster ID (COG)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a sequence motif, for example an entry from a motif database.
+
+
+
+ Sequence motif identifier
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a sequence profile.
+
+
+
+ A sequence profile typically represents a sequence alignment.
+ Sequence profile ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the ELMdb database of protein functional sites.
+
+
+
+ ELM ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PS[0-9]{5}
+ Accession number of an entry from the Prosite database.
+ Prosite ID
+
+
+
+ Prosite accession number
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier or name of a HMMER hidden Markov model.
+
+
+
+ HMMER hidden Markov model ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier or name of a profile from the JASPAR database.
+
+
+
+ JASPAR profile ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A label (text token) describing the type of a sequence alignment.
+
+ Possible values include for example the EMBOSS alignment types, BLAST alignment types and so on.
+ Sequence alignment type
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The type of a BLAST sequence alignment.
+
+ BLAST sequence alignment type
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A label (text token) describing the type of a phylogenetic tree.
+
+ For example 'nj', 'upgmp' etc.
+ Phylogenetic tree type
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Accession number of an entry from the TreeBASE database.
+
+
+
+ TreeBASE study accession number
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Accession number of an entry from the TreeFam database.
+
+
+
+ TreeFam accession number
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A label (text token) describing the type of a comparison matrix.
+
+ For example 'blosum', 'pam', 'gonnet', 'id' etc. Comparison matrix type may be required where a series of matrices of a certain type are used.
+ Comparison matrix type
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique name or identifier of a comparison matrix.
+ Substitution matrix name
+
+
+
+ See for example http://www.ebi.ac.uk/Tools/webservices/help/matrix.
+ Comparison matrix name
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9][a-zA-Z_0-9]{3}
+ An identifier of an entry from the PDB database.
+ PDB identifier
+ PDBID
+
+
+
+ A PDB identification code which consists of 4 characters, the first of which is a digit in the range 0 - 9; the remaining 3 are alphanumeric, and letters are upper case only. (source: https://cdn.rcsb.org/wwpdb/docs/documentation/file-format/PDB_format_1996.pdf)
+ PDB ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the AAindex database.
+
+
+
+ AAindex ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Accession number of an entry from the BIND database.
+
+
+
+ BIND accession number
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ EBI\-[0-9]+
+ Accession number of an entry from the IntAct database.
+
+
+
+ IntAct accession number
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Name of a protein family.
+
+
+
+ Protein family name
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Name of an InterPro entry, usually indicating the type of protein matches for that entry.
+
+
+
+ InterPro entry name
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ IPR015590
+ IPR[0-9]{6}
+ Primary accession number of an InterPro entry.
+ InterPro primary accession
+ InterPro primary accession number
+
+
+
+ Every InterPro entry has a unique accession number to provide a persistent citation of database records.
+ InterPro accession
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Secondary accession number of an InterPro entry.
+ InterPro secondary accession number
+
+
+
+ InterPro secondary accession
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier of an entry from the Gene3D database.
+
+
+
+ Gene3D ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PIRSF[0-9]{6}
+ Unique identifier of an entry from the PIRSF database.
+
+
+
+ PIRSF ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PR[0-9]{5}
+ The unique identifier of an entry in the PRINTS database.
+
+
+
+ PRINTS code
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PF[0-9]{5}
+ Accession number of a Pfam entry.
+
+
+
+ Pfam accession number
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ SM[0-9]{5}
+ Accession number of an entry from the SMART database.
+
+
+
+ SMART accession number
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier (number) of a hidden Markov model from the Superfamily database.
+
+
+
+ Superfamily hidden Markov model number
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Accession number of an entry (family) from the TIGRFam database.
+ TIGRFam accession number
+
+
+
+ TIGRFam ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PD[0-9]+
+ A ProDom domain family accession number.
+
+
+
+ ProDom is a protein domain family database.
+ ProDom accession number
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the TRANSFAC database.
+
+
+
+ TRANSFAC accession number
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [AEP]-[a-zA-Z_0-9]{4}-[0-9]+
+ Accession number of an entry from the ArrayExpress database.
+ ArrayExpress experiment ID
+
+
+
+ ArrayExpress accession number
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]+
+ PRIDE experiment accession number.
+
+
+
+ PRIDE experiment accession number
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the EMDB electron microscopy database.
+
+
+
+ EMDB ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [GDS|GPL|GSE|GSM][0-9]+
+ Accession number of an entry from the GEO database.
+
+
+
+ GEO accession number
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the GermOnline database.
+
+
+
+ GermOnline ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the EMAGE database.
+
+
+
+ EMAGE ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Accession number of an entry from a database of disease.
+
+
+
+ Disease ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the HGVbase database.
+
+
+
+ HGVbase ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry from the HIVDB database.
+
+ HIVDB identifier
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [*#+%^]?[0-9]{6}
+ Identifier of an entry from the OMIM database.
+
+
+
+ OMIM ID
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier of an object from one of the KEGG databases (excluding the GENES division).
+
+
+
+ KEGG object identifier
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ REACT_[0-9]+(\.[0-9]+)?
+ Identifier of an entry from the Reactome database.
+ Reactome ID
+
+
+
+ Pathway ID (reactome)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry from the aMAZE database.
+
+ Pathway ID (aMAZE)
+ true
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an pathway from the BioCyc biological pathways database.
+ BioCyc pathway ID
+
+
+
+ Pathway ID (BioCyc)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the INOH database.
+ INOH identifier
+
+
+
+ Pathway ID (INOH)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the PATIKA database.
+ PATIKA ID
+
+
+
+ Pathway ID (PATIKA)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the CPDB (ConsensusPathDB) biological pathways database, which is an identifier from an external database integrated into CPDB.
+ CPDB ID
+
+
+
+ This concept refers to identifiers used by the databases collated in CPDB; CPDB identifiers are not independently defined.
+ Pathway ID (CPDB)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PTHR[0-9]{5}
+ Identifier of a biological pathway from the Panther Pathways database.
+ Panther Pathways ID
+
+
+
+ Pathway ID (Panther)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ MIR:00100005
+ MIR:[0-9]{8}
+ Unique identifier of a MIRIAM data resource.
+
+
+
+ This is the identifier used internally by MIRIAM for a data type.
+ MIRIAM identifier
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a data type from the MIRIAM database.
+
+
+
+ MIRIAM data type name
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ urn:miriam:pubmed:16333295|urn:miriam:obo.go:GO%3A0045202
+ The URI (URL or URN) of a data entity from the MIRIAM database.
+ identifiers.org synonym
+
+
+
+ A MIRIAM URI consists of the URI of the MIRIAM data type (PubMed, UniProt etc) followed by the identifier of an element of that data type, for example PMID for a publication or an accession number for a GO term.
+ MIRIAM URI
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ UniProt|Enzyme Nomenclature
+ The primary name of a data type from the MIRIAM database.
+
+
+
+ The primary name of a MIRIAM data type is taken from a controlled vocabulary.
+ MIRIAM data type primary name
+
+
+
+
+ UniProt|Enzyme Nomenclature
+ A protein entity has the MIRIAM data type 'UniProt', and an enzyme has the MIRIAM data type 'Enzyme Nomenclature'.
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A synonymous name of a data type from the MIRIAM database.
+
+
+
+ A synonymous name for a MIRIAM data type taken from a controlled vocabulary.
+ MIRIAM data type synonymous name
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier of a Taverna workflow.
+
+
+
+ Taverna workflow ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Name of a biological (mathematical) model.
+
+
+
+ Biological model name
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ (BIOMD|MODEL)[0-9]{10}
+ Unique identifier of an entry from the BioModel database.
+
+
+
+ BioModel ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]+
+ Chemical structure specified in PubChem Compound Identification (CID), a non-zero integer identifier for a unique chemical structure.
+ PubChem compound accession identifier
+
+
+
+ PubChem CID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]+
+ Identifier of an entry from the ChemSpider database.
+
+
+
+ ChemSpider ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ CHEBI:[0-9]+
+ Identifier of an entry from the ChEBI database.
+ ChEBI IDs
+ ChEBI identifier
+
+
+
+ ChEBI ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An identifier of a concept from the BioPax ontology.
+
+
+
+ BioPax concept ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]{7}|GO:[0-9]{7}
+ An identifier of a concept from The Gene Ontology.
+ GO concept identifier
+
+
+
+ GO concept ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An identifier of a concept from the MeSH vocabulary.
+
+
+
+ MeSH concept ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An identifier of a concept from the HGNC controlled vocabulary.
+
+
+
+ HGNC concept ID
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 9662|3483|182682
+ [1-9][0-9]{0,8}
+ A stable unique identifier for each taxon (for a species, a family, an order, or any other group in the NCBI taxonomy database.
+ NCBI tax ID
+ NCBI taxonomy identifier
+
+
+
+ NCBI taxonomy ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An identifier of a concept from the Plant Ontology (PO).
+
+
+
+ Plant Ontology concept ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An identifier of a concept from the UMLS vocabulary.
+
+
+
+ UMLS concept ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ FMA:[0-9]+
+ An identifier of a concept from Foundational Model of Anatomy.
+
+
+
+ Classifies anatomical entities according to their shared characteristics (genus) and distinguishing characteristics (differentia). Specifies the part-whole and spatial relationships of the entities, morphological transformation of the entities during prenatal development and the postnatal life cycle and principles, rules and definitions according to which classes and relationships in the other three components of FMA are represented.
+ FMA concept ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An identifier of a concept from the EMAP mouse ontology.
+
+
+
+ EMAP concept ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An identifier of a concept from the ChEBI ontology.
+
+
+
+ ChEBI concept ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An identifier of a concept from the MGED ontology.
+
+
+
+ MGED concept ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An identifier of a concept from the myGrid ontology.
+
+
+
+ The ontology is provided as two components, the service ontology and the domain ontology. The domain ontology acts provides concepts for core bioinformatics data types and their relations. The service ontology describes the physical and operational features of web services.
+ myGrid concept ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 4963447
+ [1-9][0-9]{0,8}
+ PubMed unique identifier of an article.
+ PMID
+
+
+
+ PubMed ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ (doi\:)?[0-9]{2}\.[0-9]{4}/.*
+ Digital Object Identifier (DOI) of a published article.
+ Digital Object Identifier
+
+
+
+ DOI
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Medline UI (unique identifier) of an article.
+ Medline unique identifier
+
+
+
+ The use of Medline UI has been replaced by the PubMed unique identifier.
+ Medline UI
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a computer package, application, method or function.
+
+
+
+ Tool name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The unique name of a signature (sequence classifier) method.
+
+
+
+ Signature methods from http://www.ebi.ac.uk/Tools/InterProScan/help.html#results include BlastProDom, FPrintScan, HMMPIR, HMMPfam, HMMSmart, HMMTigr, ProfileScan, ScanRegExp, SuperFamily and HAMAP.
+ Tool name (signature)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a BLAST tool.
+ BLAST name
+
+
+
+ This include 'blastn', 'blastp', 'blastx', 'tblastn' and 'tblastx'.
+ Tool name (BLAST)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a FASTA tool.
+
+
+
+ This includes 'fasta3', 'fastx3', 'fasty3', 'fastf3', 'fasts3' and 'ssearch'.
+ Tool name (FASTA)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of an EMBOSS application.
+
+
+
+ Tool name (EMBOSS)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of an EMBASSY package.
+
+
+
+ Tool name (EMBASSY package)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A QSAR constitutional descriptor.
+ QSAR constitutional descriptor
+
+
+ QSAR descriptor (constitutional)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A QSAR electronic descriptor.
+ QSAR electronic descriptor
+
+
+ QSAR descriptor (electronic)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A QSAR geometrical descriptor.
+ QSAR geometrical descriptor
+
+
+ QSAR descriptor (geometrical)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A QSAR topological descriptor.
+ QSAR topological descriptor
+
+
+ QSAR descriptor (topological)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A QSAR molecular descriptor.
+ QSAR molecular descriptor
+
+
+ QSAR descriptor (molecular)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Any collection of multiple protein sequences and associated metadata that do not (typically) correspond to common sequence database records or database entries.
+
+
+ Sequence set (protein)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Any collection of multiple nucleotide sequences and associated metadata that do not (typically) correspond to common sequence database records or database entries.
+
+
+ Sequence set (nucleic acid)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A set of sequences that have been clustered or otherwise classified as belonging to a group including (typically) sequence cluster information.
+
+
+ The cluster might include sequences identifiers, short descriptions, alignment and summary information.
+ Sequence cluster
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A file of intermediate results from a PSIBLAST search that is used for priming the search in the next PSIBLAST iteration.
+
+ A Psiblast checkpoint file uses ASN.1 Binary Format and usually has the extension '.asn'.
+ Psiblast checkpoint file
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Sequences generated by HMMER package in FASTA-style format.
+
+ HMMER synthetic sequences set
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A protein sequence cleaved into peptide fragments (by enzymatic or chemical cleavage) with fragment masses.
+
+
+ Proteolytic digest
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ SO:0000412
+ Restriction digest fragments from digesting a nucleotide sequence with restriction sites using a restriction endonuclease.
+
+
+ Restriction digest
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Oligonucleotide primer(s) for PCR and DNA amplification, for example a minimal primer set.
+
+
+ PCR primers
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ File of sequence vectors used by EMBOSS vectorstrip application, or any file in same format.
+
+ vectorstrip cloning vector definition file
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A library of nucleotide sequences to avoid during hybridisation events. Hybridisation of the internal oligo to sequences in this library is avoided, rather than priming from them. The file is in a restricted FASTA format.
+
+ Primer3 internal oligo mishybridizing library
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A nucleotide sequence library of sequences to avoid during amplification (for example repetitive sequences, or possibly the sequences of genes in a gene family that should not be amplified. The file must is in a restricted FASTA format.
+
+ Primer3 mispriming library file
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ File of one or more pairs of primer sequences, as used by EMBOSS primersearch application.
+
+ primersearch primer pairs sequence record
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A cluster of protein sequences.
+ Protein sequence cluster
+
+
+ The sequences are typically related, for example a family of sequences.
+ Sequence cluster (protein)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A cluster of nucleotide sequences.
+ Nucleotide sequence cluster
+
+
+ The sequences are typically related, for example a family of sequences.
+ Sequence cluster (nucleic acid)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The size (length) of a sequence, subsequence or region in a sequence, or range(s) of lengths.
+
+
+ Sequence length
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Size of a sequence word.
+
+ Word size is used for example in word-based sequence database search methods.
+ Word size
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Size of a sequence window.
+
+ A window is a region of fixed size but not fixed position over a molecular sequence. It is typically moved (computationally) over a sequence during scoring.
+ Window size
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Specification of range(s) of length of sequences.
+
+ Sequence length range
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+ Report on basic information about a molecular sequence such as name, accession number, type (nucleic or protein), length, description etc.
+
+
+ Sequence information report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An informative report about non-positional sequence features, typically a report on general molecular sequence properties derived from sequence analysis.
+ Sequence properties report
+
+
+ Sequence property
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Annotation of positional features of molecular sequence(s), i.e. that can be mapped to position(s) in the sequence.
+ Feature record
+ Features
+ General sequence features
+ Sequence features report
+ SO:0000110
+
+
+ This includes annotation of positional sequence features, organised into a standard feature table, or any other report of sequence features. General feature reports are a source of sequence feature table information although internal conversion would be required.
+ Sequence features
+ http://purl.bioontology.org/ontology/MSH/D058977
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Comparative data on sequence features such as statistics, intersections (and data on intersections), differences etc.
+
+ This is a broad data type and is used a placeholder for other, more specific types. It is primarily intended to help navigation of EDAM and would not typically be used for annotation.
+ Sequence features (comparative)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A report of general sequence properties derived from protein sequence data.
+
+ Sequence property (protein)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A report of general sequence properties derived from nucleotide sequence data.
+
+ Sequence property (nucleic acid)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A report on sequence complexity, for example low-complexity or repeat regions in sequences.
+ Sequence property (complexity)
+
+
+ Sequence complexity report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A report on ambiguity in molecular sequence(s).
+ Sequence property (ambiguity)
+
+
+ Sequence ambiguity report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A report (typically a table) on character or word composition / frequency of a molecular sequence(s).
+ Sequence composition
+ Sequence property (composition)
+
+
+ Sequence composition report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A report on peptide fragments of certain molecular weight(s) in one or more protein sequences.
+
+
+ Peptide molecular weight hits
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A plot of third base position variability in a nucleotide sequence.
+
+
+ Base position variability plot
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A table of character or word composition / frequency of a molecular sequence.
+
+ Sequence composition table
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A table of base frequencies of a nucleotide sequence.
+
+
+ Base frequencies table
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A table of word composition of a nucleotide sequence.
+
+
+ Base word frequencies table
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A table of amino acid frequencies of a protein sequence.
+ Sequence composition (amino acid frequencies)
+
+
+ Amino acid frequencies table
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A table of amino acid word composition of a protein sequence.
+ Sequence composition (amino acid words)
+
+
+ Amino acid word frequencies table
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Annotation of a molecular sequence in DAS format.
+
+ DAS sequence feature annotation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Annotation of positional sequence features, organised into a standard feature table.
+ Sequence feature table
+
+
+ Feature table
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A map of (typically one) DNA sequence annotated with positional or non-positional features.
+ DNA map
+
+
+ Map
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An informative report on intrinsic positional features of a nucleotide sequence, formatted to be machine-readable.
+ Feature table (nucleic acid)
+ Nucleic acid feature table
+ Genome features
+ Genomic features
+
+
+ This includes nucleotide sequence feature annotation in any known sequence feature table format and any other report of nucleic acid features.
+ Nucleic acid features
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An informative report on intrinsic positional features of a protein sequence.
+ Feature table (protein)
+ Protein feature table
+
+
+ This includes protein sequence feature annotation in any known sequence feature table format and any other report of protein features.
+ Protein features
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby:GeneticMap
+ A map showing the relative positions of genetic markers in a nucleic acid sequence, based on estimation of non-physical distance such as recombination frequencies.
+ Linkage map
+
+
+ A genetic (linkage) map indicates the proximity of two genes on a chromosome, whether two genes are linked and the frequency they are transmitted together to an offspring. They are limited to genetic markers of traits observable only in whole organisms.
+ Genetic map
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A map of genetic markers in a contiguous, assembled genomic sequence, with the sizes and separation of markers measured in base pairs.
+
+
+ A sequence map typically includes annotation on significant subsequences such as contigs, haplotypes and genes. The contigs shown will (typically) be a set of small overlapping clones representing a complete chromosomal segment.
+ Sequence map
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A map of DNA (linear or circular) annotated with physical features or landmarks such as restriction sites, cloned DNA fragments, genes or genetic markers, along with the physical distances between them.
+
+
+ Distance in a physical map is measured in base pairs. A physical map might be ordered relative to a reference map (typically a genetic map) in the process of genome sequencing.
+ Physical map
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+ Image of a sequence with matches to signatures, motifs or profiles.
+
+
+ Sequence signature map
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A map showing banding patterns derived from direct observation of a stained chromosome.
+ Chromosome map
+ Cytogenic map
+ Cytologic map
+
+
+ This is the lowest-resolution physical map and can provide only rough estimates of physical (base pair) distances. Like a genetic map, they are limited to genetic markers of traits observable only in whole organisms.
+ Cytogenetic map
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A gene map showing distances between loci based on relative cotransduction frequencies.
+
+
+ DNA transduction map
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Sequence map of a single gene annotated with genetic features such as introns, exons, untranslated regions, polyA signals, promoters, enhancers and (possibly) mutations defining alleles of a gene.
+
+
+ Gene map
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Sequence map of a plasmid (circular DNA).
+
+
+ Plasmid map
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Sequence map of a whole genome.
+
+
+ Genome map
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Image of the restriction enzyme cleavage sites (restriction sites) in a nucleic acid sequence.
+
+
+ Restriction map
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+ Image showing matches between protein sequence(s) and InterPro Entries.
+
+
+ The sequence(s) might be screened against InterPro, or be the sequences from the InterPro entry itself. Each protein is represented as a scaled horizontal line with colored bars indicating the position of the matches.
+ InterPro compact match image
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+ Image showing detailed information on matches between protein sequence(s) and InterPro Entries.
+
+
+ The sequence(s) might be screened against InterPro, or be the sequences from the InterPro entry itself.
+ InterPro detailed match image
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+ Image showing the architecture of InterPro domains in a protein sequence.
+
+
+ The sequence(s) might be screened against InterPro, or be the sequences from the InterPro entry itself. Domain architecture is shown as a series of non-overlapping domains in the protein.
+ InterPro architecture image
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ SMART protein schematic in PNG format.
+
+ SMART protein schematic
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+ Images based on GlobPlot prediction of intrinsic disordered regions and globular domains in protein sequences.
+
+
+ GlobPlot domain image
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Report on the location of matches to profiles, motifs (conserved or functional patterns) or other signatures in one or more sequences.
+
+
+ Sequence motif matches
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Location of short repetitive subsequences (repeat sequences) in (typically nucleotide) sequences.
+
+ The report might include derived data map such as classification, annotation, organisation, periodicity etc.
+ Sequence features (repeats)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A report on predicted or actual gene structure, regions which make an RNA product and features such as promoters, coding regions, splice sites etc.
+
+ Gene and transcript structure (report)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ regions of a nucleic acid sequence containing mobile genetic elements.
+
+
+ Mobile genetic elements
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A report on quadruplex-forming motifs in a nucleotide sequence.
+
+ Nucleic acid features (quadruplexes)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Report on nucleosome formation potential or exclusion sequence(s).
+
+
+ Nucleosome exclusion sequences
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+ A report on exonic splicing enhancers (ESE) in an exon.
+
+
+ Gene features (exonic splicing enhancer)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A report on microRNA sequence (miRNA) or precursor, microRNA targets, miRNA binding sites in an RNA sequence etc.
+
+ Nucleic acid features (microRNA)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ protein-coding regions including coding sequences (CDS), exons, translation initiation sites and open reading frames.
+
+
+ Coding region
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ A report on selenocysteine insertion sequence (SECIS) element in a DNA sequence.
+
+ Gene features (SECIS element)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ transcription factor binding sites (TFBS) in a DNA sequence.
+
+
+ Transcription factor binding sites
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A report on predicted or known key residue positions (sites) in a protein sequence, such as binding or functional sites.
+
+ Use this concept for collections of specific sites which are not necessarily contiguous, rather than contiguous stretches of amino acids.
+ Protein features (sites)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ signal peptides or signal peptide cleavage sites in protein sequences.
+
+
+ Protein features report (signal peptides)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ cleavage sites (for a proteolytic enzyme or agent) in a protein sequence.
+
+
+ Protein features report (cleavage sites)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ post-translation modifications in a protein sequence, typically describing the specific sites involved.
+
+
+ Protein features (post-translation modifications)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ catalytic residues (active site) of an enzyme.
+
+
+ Protein features report (active sites)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ ligand-binding (non-catalytic) residues of a protein, such as sites that bind metal, prosthetic groups or lipids.
+
+
+ Protein features report (binding sites)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+ A report on antigenic determinant sites (epitopes) in proteins, from sequence and / or structural data.
+
+
+ Epitope mapping is commonly done during vaccine design.
+ Protein features (epitopes)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ RNA and DNA-binding proteins and binding sites in protein sequences.
+
+
+ Protein features report (nucleic acid binding sites)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A report on epitopes that bind to MHC class I molecules.
+
+ MHC Class I epitopes report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A report on predicted epitopes that bind to MHC class II molecules.
+
+ MHC Class II epitopes report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+ A report or plot of PEST sites in a protein sequence.
+
+
+ 'PEST' motifs target proteins for proteolytic degradation and reduce the half-lives of proteins dramatically.
+ Protein features (PEST sites)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Scores from a sequence database search (for example a BLAST search).
+
+ Sequence database hits scores list
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Alignments from a sequence database search (for example a BLAST search).
+
+ Sequence database hits alignments list
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A report on the evaluation of the significance of sequence similarity scores from a sequence database search (for example a BLAST search).
+
+ Sequence database hits evaluation data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Alphabet for the motifs (patterns) that MEME will search for.
+
+ MEME motif alphabet
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ MEME background frequencies file.
+
+ MEME background frequencies file
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ File of directives for ordering and spacing of MEME motifs.
+
+ MEME motifs directive file
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Dirichlet distribution used by hidden Markov model analysis programs.
+
+
+ Dirichlet distribution
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+
+ Emission and transition counts of a hidden Markov model, generated once HMM has been determined, for example after residues/gaps have been assigned to match, delete and insert states.
+
+ HMM emission and transition counts
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Regular expression pattern.
+
+
+ Regular expression
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Any specific or conserved pattern (typically expressed as a regular expression) in a molecular sequence.
+
+
+ Sequence motif
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Some type of statistical model representing a (typically multiple) sequence alignment.
+
+
+ Sequence profile
+ http://semanticscience.org/resource/SIO_010531
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An informative report about a specific or conserved protein sequence pattern.
+ InterPro entry
+ Protein domain signature
+ Protein family signature
+ Protein region signature
+ Protein repeat signature
+ Protein site signature
+
+
+ Protein signature
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A nucleotide regular expression pattern from the Prosite database.
+
+ Prosite nucleotide pattern
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A protein regular expression pattern from the Prosite database.
+
+ Prosite protein pattern
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A profile (typically representing a sequence alignment) that is a simple matrix of nucleotide (or amino acid) counts per position.
+ PFM
+
+
+ Position frequency matrix
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A profile (typically representing a sequence alignment) that is weighted matrix of nucleotide (or amino acid) counts per position.
+ PWM
+
+
+ Contributions of individual sequences to the matrix might be uneven (weighted).
+ Position weight matrix
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A profile (typically representing a sequence alignment) derived from a matrix of nucleotide (or amino acid) counts per position that reflects information content at each position.
+ ICM
+
+
+ Information content matrix
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A statistical Markov model of a system which is assumed to be a Markov process with unobserved (hidden) states. For example, a hidden Markov model representation of a set or alignment of sequences.
+ HMM
+
+
+ Hidden Markov model
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ One or more fingerprints (sequence classifiers) as used in the PRINTS database.
+
+
+ Fingerprint
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A protein signature of the type used in the EMBASSY Signature package.
+
+ Domainatrix signature
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ NULL hidden Markov model representation used by the HMMER package.
+
+ HMMER NULL hidden Markov model
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A protein family signature (sequence classifier) from the InterPro database.
+
+ Protein family signatures cover all domains in the matching proteins and span >80% of the protein length and with no adjacent protein domain signatures or protein region signatures.
+ Protein family signature
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A protein domain signature (sequence classifier) from the InterPro database.
+
+ Protein domain signatures identify structural or functional domains or other units with defined boundaries.
+ Protein domain signature
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A protein region signature (sequence classifier) from the InterPro database.
+
+ A protein region signature defines a region which cannot be described as a protein family or domain signature.
+ Protein region signature
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A protein repeat signature (sequence classifier) from the InterPro database.
+
+ A protein repeat signature is a repeated protein motif, that is not in single copy expected to independently fold into a globular domain.
+ Protein repeat signature
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A protein site signature (sequence classifier) from the InterPro database.
+
+ A protein site signature is a classifier for a specific site in a protein.
+ Protein site signature
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ A protein conserved site signature (sequence classifier) from the InterPro database.
+
+ A protein conserved site signature is any short sequence pattern that may contain one or more unique residues and is cannot be described as a active site, binding site or post-translational modification.
+ Protein conserved site signature
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ A protein active site signature (sequence classifier) from the InterPro database.
+
+ A protein active site signature corresponds to an enzyme catalytic pocket. An active site typically includes non-contiguous residues, therefore multiple signatures may be required to describe an active site. ; residues involved in enzymatic reactions for which mutational data is typically available.
+ Protein active site signature
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ A protein binding site signature (sequence classifier) from the InterPro database.
+
+ A protein binding site signature corresponds to a site that reversibly binds chemical compounds, which are not themselves substrates of the enzymatic reaction. This includes enzyme cofactors and residues involved in electron transport or protein structure modification.
+ Protein binding site signature
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ A protein post-translational modification signature (sequence classifier) from the InterPro database.
+
+ A protein post-translational modification signature corresponds to sites that undergo modification of the primary structure, typically to activate or de-activate a function. For example, methylation, sumoylation, glycosylation etc. The modification might be permanent or reversible.
+ Protein post-translational modification signature
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Alignment of exactly two molecular sequences.
+ Sequence alignment (pair)
+
+
+ Pair sequence alignment
+ http://semanticscience.org/resource/SIO_010068
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Alignment of more than two molecular sequences.
+
+ Sequence alignment (multiple)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alignment of multiple nucleotide sequences.
+ Sequence alignment (nucleic acid)
+ DNA sequence alignment
+ RNA sequence alignment
+
+
+ Nucleic acid sequence alignment
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alignment of multiple protein sequences.
+ Sequence alignment (protein)
+
+
+ Protein sequence alignment
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alignment of multiple molecular sequences of different types.
+ Sequence alignment (hybrid)
+
+
+ Hybrid sequence alignments include for example genomic DNA to EST, cDNA or mRNA.
+ Hybrid sequence alignment
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+
+
+ Alignment of exactly two nucleotide sequences.
+
+ Sequence alignment (nucleic acid pair)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+
+
+ Alignment of exactly two protein sequences.
+
+ Sequence alignment (protein pair)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Alignment of exactly two molecular sequences of different types.
+
+ Hybrid sequence alignment (pair)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Alignment of more than two nucleotide sequences.
+
+ Multiple nucleotide sequence alignment
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Alignment of more than two protein sequences.
+
+ Multiple protein sequence alignment
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A simple floating point number defining the penalty for opening or extending a gap in an alignment.
+
+
+ Alignment score or penalty
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Whether end gaps are scored or not.
+
+ Score end gaps control
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Controls the order of sequences in an output sequence alignment.
+
+ Aligned sequence order
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A penalty for opening a gap in an alignment.
+
+
+ Gap opening penalty
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A penalty for extending a gap in an alignment.
+
+
+ Gap extension penalty
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A penalty for gaps that are close together in an alignment.
+
+
+ Gap separation penalty
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+
+ A penalty for gaps at the termini of an alignment, either from the N/C terminal of protein or 5'/3' terminal of nucleotide sequences.
+
+ Terminal gap penalty
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The score for a 'match' used in various sequence database search applications with simple scoring schemes.
+
+
+ Match reward score
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The score (penalty) for a 'mismatch' used in various alignment and sequence database search applications with simple scoring schemes.
+
+
+ Mismatch penalty score
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ This is the threshold drop in score at which extension of word alignment is halted.
+
+
+ Drop off score
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A simple floating point number defining the penalty for opening a gap in an alignment.
+
+ Gap opening penalty (integer)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A simple floating point number defining the penalty for opening a gap in an alignment.
+
+ Gap opening penalty (float)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A simple floating point number defining the penalty for extending a gap in an alignment.
+
+ Gap extension penalty (integer)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A simple floating point number defining the penalty for extending a gap in an alignment.
+
+ Gap extension penalty (float)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A simple floating point number defining the penalty for gaps that are close together in an alignment.
+
+ Gap separation penalty (integer)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A simple floating point number defining the penalty for gaps that are close together in an alignment.
+
+ Gap separation penalty (float)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A number defining the penalty for opening gaps at the termini of an alignment, either from the N/C terminal of protein or 5'/3' terminal of nucleotide sequences.
+
+
+ Terminal gap opening penalty
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A number defining the penalty for extending gaps at the termini of an alignment, either from the N/C terminal of protein or 5'/3' terminal of nucleotide sequences.
+
+
+ Terminal gap extension penalty
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Sequence identity is the number (%) of matches (identical characters) in positions from an alignment of two molecular sequences.
+
+
+ Sequence identity
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Sequence similarity is the similarity (expressed as a percentage) of two molecular sequences calculated from their alignment, a scoring matrix for scoring characters substitutions and penalties for gap insertion and extension.
+
+
+ Data Type is float probably.
+ Sequence similarity
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Data on molecular sequence alignment quality (estimated accuracy).
+
+ Sequence alignment metadata (quality report)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ Data on character conservation in a molecular sequence alignment.
+
+ This is a broad data type and is used a placeholder for other, more specific types. It is primarily intended to help navigation of EDAM and would not typically be used for annotation. Use this concept for calculated substitution rates, relative site variability, data on sites with biased properties, highly conserved or very poorly conserved sites, regions, blocks etc.
+ Sequence alignment report (site conservation)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ Data on correlations between sites in a molecular sequence alignment, typically to identify possible covarying positions and predict contacts or structural constraints in protein structures.
+
+ Sequence alignment report (site correlation)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Alignment of molecular sequences to a Domainatrix signature (representing a sequence alignment).
+
+ Sequence-profile alignment (Domainatrix signature)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Alignment of molecular sequence(s) to a hidden Markov model(s).
+
+ Sequence-profile alignment (HMM)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Alignment of molecular sequences to a protein fingerprint from the PRINTS database.
+
+ Sequence-profile alignment (fingerprint)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Continuous quantitative data that may be read during phylogenetic tree calculation.
+ Phylogenetic continuous quantitative characters
+ Quantitative traits
+
+
+ Phylogenetic continuous quantitative data
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Character data with discrete states that may be read during phylogenetic tree calculation.
+ Discrete characters
+ Discretely coded characters
+ Phylogenetic discrete states
+
+
+ Phylogenetic discrete data
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ One or more cliques of mutually compatible characters that are generated, for example from analysis of discrete character data, and are used to generate a phylogeny.
+ Phylogenetic report (cliques)
+
+
+ Phylogenetic character cliques
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Phylogenetic invariants data for testing alternative tree topologies.
+ Phylogenetic report (invariants)
+
+
+ Phylogenetic invariants
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A report of data concerning or derived from a phylogenetic tree, or from comparing two or more phylogenetic trees.
+
+ This is a broad data type and is used for example for reports on confidence, shape or stratigraphic (age) data derived from phylogenetic tree analysis.
+ Phylogenetic report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A model of DNA substitution that explains a DNA sequence alignment, derived from phylogenetic tree analysis.
+ Phylogenetic tree report (DNA substitution model)
+ Sequence alignment report (DNA substitution model)
+ Substitution model
+
+
+ DNA substitution model
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ Data about the shape of a phylogenetic tree.
+
+ Phylogenetic tree report (tree shape)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ Data on the confidence of a phylogenetic tree.
+
+ Phylogenetic tree report (tree evaluation)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Distances, such as Branch Score distance, between two or more phylogenetic trees.
+ Phylogenetic tree report (tree distances)
+
+
+ Phylogenetic tree distances
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ Molecular clock and stratigraphic (age) data derived from phylogenetic tree analysis.
+
+ Phylogenetic tree report (tree stratigraphic)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Independent contrasts for characters used in a phylogenetic tree, or covariances, regressions and correlations between characters for those contrasts.
+ Phylogenetic report (character contrasts)
+
+
+ Phylogenetic character contrasts
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Matrix of integer numbers for sequence comparison.
+
+ Comparison matrix (integers)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Matrix of floating point numbers for sequence comparison.
+
+ Comparison matrix (floats)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Matrix of integer or floating point numbers for nucleotide comparison.
+ Nucleotide comparison matrix
+ Nucleotide substitution matrix
+
+
+ Comparison matrix (nucleotide)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Matrix of integer or floating point numbers for amino acid comparison.
+ Amino acid comparison matrix
+ Amino acid substitution matrix
+
+
+ Comparison matrix (amino acid)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Matrix of integer numbers for nucleotide comparison.
+
+ Nucleotide comparison matrix (integers)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Matrix of floating point numbers for nucleotide comparison.
+
+ Nucleotide comparison matrix (floats)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Matrix of integer numbers for amino acid comparison.
+
+ Amino acid comparison matrix (integers)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Matrix of floating point numbers for amino acid comparison.
+
+ Amino acid comparison matrix (floats)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 3D coordinate and associated data for a nucleic acid tertiary (3D) structure.
+
+
+ Nucleic acid structure
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 3D coordinate and associated data for a protein tertiary (3D) structure, or part of a structure, possibly in complex with other molecules.
+ Protein structures
+
+
+ Protein structure
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The structure of a protein in complex with a ligand, typically a small molecule such as an enzyme substrate or cofactor, but possibly another macromolecule.
+
+
+ This includes interactions of proteins with atoms, ions and small molecules or macromolecules such as nucleic acids or other polypeptides. For stable inter-polypeptide interactions use 'Protein complex' instead.
+ Protein-ligand complex
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 3D coordinate and associated data for a carbohydrate (3D) structure.
+
+
+ Carbohydrate structure
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 3D coordinate and associated data for the (3D) structure of a small molecule, such as any common chemical compound.
+ CHEBI:23367
+
+
+ Small molecule structure
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 3D coordinate and associated data for a DNA tertiary (3D) structure.
+
+
+ DNA structure
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 3D coordinate and associated data for an RNA tertiary (3D) structure.
+
+
+ RNA structure
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 3D coordinate and associated data for a tRNA tertiary (3D) structure, including tmRNA, snoRNAs etc.
+
+
+ tRNA structure
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 3D coordinate and associated data for the tertiary (3D) structure of a polypeptide chain.
+
+
+ Protein chain
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 3D coordinate and associated data for the tertiary (3D) structure of a protein domain.
+
+
+ Protein domain
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ 3D coordinate and associated data for a protein tertiary (3D) structure (all atoms).
+
+ Protein structure (all atoms)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 3D coordinate and associated data for a protein tertiary (3D) structure (typically C-alpha atoms only).
+ Protein structure (C-alpha atoms)
+
+
+ C-beta atoms from amino acid side-chains may be included.
+ C-alpha trace
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ 3D coordinate and associated data for a polypeptide chain tertiary (3D) structure (all atoms).
+
+ Protein chain (all atoms)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ 3D coordinate and associated data for a polypeptide chain tertiary (3D) structure (typically C-alpha atoms only).
+
+ C-beta atoms from amino acid side-chains may be included.
+ Protein chain (C-alpha atoms)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ 3D coordinate and associated data for a protein domain tertiary (3D) structure (all atoms).
+
+ Protein domain (all atoms)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ 3D coordinate and associated data for a protein domain tertiary (3D) structure (typically C-alpha atoms only).
+
+ C-beta atoms from amino acid side-chains may be included.
+ Protein domain (C-alpha atoms)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alignment (superimposition) of exactly two molecular tertiary (3D) structures.
+ Pair structure alignment
+
+
+ Structure alignment (pair)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Alignment (superimposition) of more than two molecular tertiary (3D) structures.
+
+ Structure alignment (multiple)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alignment (superimposition) of protein tertiary (3D) structures.
+ Structure alignment (protein)
+
+
+ Protein structure alignment
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alignment (superimposition) of nucleic acid tertiary (3D) structures.
+ Structure alignment (nucleic acid)
+
+
+ Nucleic acid structure alignment
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+
+
+ Alignment (superimposition) of exactly two protein tertiary (3D) structures.
+
+ Structure alignment (protein pair)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Alignment (superimposition) of more than two protein tertiary (3D) structures.
+
+ Multiple protein tertiary structure alignment
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Alignment (superimposition) of protein tertiary (3D) structures (all atoms considered).
+
+ Structure alignment (protein all atoms)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Alignment (superimposition) of protein tertiary (3D) structures (typically C-alpha atoms only considered).
+
+ C-beta atoms from amino acid side-chains may be considered.
+ Structure alignment (protein C-alpha atoms)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Alignment (superimposition) of exactly two protein tertiary (3D) structures (all atoms considered).
+
+ Pairwise protein tertiary structure alignment (all atoms)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Alignment (superimposition) of exactly two protein tertiary (3D) structures (typically C-alpha atoms only considered).
+
+ C-beta atoms from amino acid side-chains may be included.
+ Pairwise protein tertiary structure alignment (C-alpha atoms)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Alignment (superimposition) of exactly two protein tertiary (3D) structures (all atoms considered).
+
+ Multiple protein tertiary structure alignment (all atoms)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Alignment (superimposition) of exactly two protein tertiary (3D) structures (typically C-alpha atoms only considered).
+
+ C-beta atoms from amino acid side-chains may be included.
+ Multiple protein tertiary structure alignment (C-alpha atoms)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+
+
+ Alignment (superimposition) of exactly two nucleic acid tertiary (3D) structures.
+
+ Structure alignment (nucleic acid pair)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Alignment (superimposition) of more than two nucleic acid tertiary (3D) structures.
+
+ Multiple nucleic acid tertiary structure alignment
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alignment (superimposition) of RNA tertiary (3D) structures.
+ Structure alignment (RNA)
+
+
+ RNA structure alignment
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Matrix to transform (rotate/translate) 3D coordinates, typically the transformation necessary to superimpose two molecular structures.
+
+
+ Structural transformation matrix
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ DaliLite hit table of protein chain tertiary structure alignment data.
+
+ The significant and top-scoring hits for regions of the compared structures is shown. Data such as Z-Scores, number of aligned residues, root-mean-square deviation (RMSD) of atoms and sequence identity are given.
+ DaliLite hit table
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A score reflecting structural similarities of two molecules.
+
+ Molecular similarity score
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Root-mean-square deviation (RMSD) is calculated to measure the average distance between superimposed macromolecular coordinates.
+ RMSD
+
+
+ Root-mean-square deviation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A measure of the similarity between two ligand fingerprints.
+
+
+ A ligand fingerprint is derived from ligand structural data from a Protein DataBank file. It reflects the elements or groups present or absent, covalent bonds and bond orders and the bonded environment in terms of SATIS codes and BLEEP atom types.
+ Tanimoto similarity score
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A matrix of 3D-1D scores reflecting the probability of amino acids to occur in different tertiary structural environments.
+
+
+ 3D-1D scoring matrix
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A table of 20 numerical values which quantify a property (e.g. physicochemical or biochemical) of the common amino acids.
+
+
+ Amino acid index
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Chemical classification (small, aliphatic, aromatic, polar, charged etc) of amino acids.
+ Chemical classes (amino acids)
+
+
+ Amino acid index (chemical classes)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Statistical protein contact potentials.
+ Contact potentials (amino acid pair-wise)
+
+
+ Amino acid pair-wise contact potentials
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Molecular weights of amino acids.
+ Molecular weight (amino acids)
+
+
+ Amino acid index (molecular weight)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Hydrophobic, hydrophilic or charge properties of amino acids.
+ Hydropathy (amino acids)
+
+
+ Amino acid index (hydropathy)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Experimental free energy values for the water-interface and water-octanol transitions for the amino acids.
+ White-Wimley data (amino acids)
+
+
+ Amino acid index (White-Wimley data)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Van der Waals radii of atoms for different amino acid residues.
+ van der Waals radii (amino acids)
+
+
+ Amino acid index (van der Waals radii)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ An informative report on a specific enzyme.
+
+ Enzyme report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ An informative report on a specific restriction enzyme such as enzyme reference data.
+
+ This might include name of enzyme, organism, isoschizomers, methylation, source, suppliers, literature references, or data on restriction enzyme patterns such as name of enzyme, recognition site, length of pattern, number of cuts made by enzyme, details of blunt or sticky end cut etc.
+ Restriction enzyme report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ List of molecular weight(s) of one or more proteins or peptides, for example cut by proteolytic enzymes or reagents.
+
+
+ The report might include associated data such as frequency of peptide fragment molecular weights.
+ Peptide molecular weights
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Report on the hydrophobic moment of a polypeptide sequence.
+
+
+ Hydrophobic moment is a peptides hydrophobicity measured for different angles of rotation.
+ Peptide hydrophobic moment
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The aliphatic index of a protein.
+
+
+ The aliphatic index is the relative protein volume occupied by aliphatic side chains.
+ Protein aliphatic index
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A protein sequence with annotation on hydrophobic or hydrophilic / charged regions, hydrophobicity plot etc.
+
+
+ Hydrophobic moment is a peptides hydrophobicity measured for different angles of rotation.
+ Protein sequence hydropathy plot
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A plot of the mean charge of the amino acids within a window of specified length as the window is moved along a protein sequence.
+
+
+ Protein charge plot
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The solubility or atomic solvation energy of a protein sequence or structure.
+ Protein solubility data
+
+
+ Protein solubility
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data on the crystallizability of a protein sequence.
+ Protein crystallizability data
+
+
+ Protein crystallizability
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data on the stability, intrinsic disorder or globularity of a protein sequence.
+ Protein globularity data
+
+
+ Protein globularity
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The titration curve of a protein.
+
+
+ Protein titration curve
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The isoelectric point of one proteins.
+
+
+ Protein isoelectric point
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The pKa value of a protein.
+
+
+ Protein pKa value
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The hydrogen exchange rate of a protein.
+
+
+ Protein hydrogen exchange rate
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The extinction coefficient of a protein.
+
+
+ Protein extinction coefficient
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The optical density of a protein.
+
+
+ Protein optical density
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ An informative report on protein subcellular localisation (nuclear, cytoplasmic, mitochondrial, chloroplast, plastid, membrane etc) or destination (exported / extracellular proteins).
+
+ Protein subcellular localisation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An report on allergenicity / immunogenicity of peptides and proteins.
+ Peptide immunogenicity
+ Peptide immunogenicity report
+
+
+ This includes data on peptide ligands that elicit an immune response (immunogens), allergic cross-reactivity, predicted antigenicity (Hopp and Woods plot) etc. These data are useful in the development of peptide-specific antibodies or multi-epitope vaccines. Methods might use sequence data (for example motifs) and / or structural data.
+ Peptide immunogenicity data
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ A report on the immunogenicity of MHC class I or class II binding peptides.
+
+ MHC peptide immunogenicity report
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A human-readable collection of information about one or more specific protein 3D structure(s) or structural domains.
+ Protein property (structural)
+ Protein report (structure)
+ Protein structural property
+ Protein structure report (domain)
+ Protein structure-derived report
+
+
+ Protein structure report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Report on the quality of a protein three-dimensional model.
+ Protein property (structural quality)
+ Protein report (structural quality)
+ Protein structure report (quality evaluation)
+ Protein structure validation report
+
+
+ Model validation might involve checks for atomic packing, steric clashes, agreement with electron density maps etc.
+ Protein structural quality report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Data on inter-atomic or inter-residue contacts, distances and interactions in protein structure(s) or on the interactions of protein atoms or residues with non-protein groups.
+
+
+ Protein non-covalent interactions report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ Informative report on flexibility or motion of a protein structure.
+
+ This is a broad data type and is used a placeholder for other, more specific types. It is primarily intended to help navigation of EDAM and would not typically be used for annotation.
+ Protein flexibility or motion report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data on the solvent accessible or buried surface area of a protein structure.
+
+
+ This concept covers definitions of the protein surface, interior and interfaces, accessible and buried residues, surface accessible pockets, interior inaccessible cavities etc.
+ Protein solvent accessibility
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ Data on the surface properties (shape, hydropathy, electrostatic patches etc) of a protein structure.
+
+ This is a broad data type and is used a placeholder for other, more specific types. It is primarily intended to help navigation of EDAM and would not typically be used for annotation.
+ Protein surface report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Phi/psi angle data or a Ramachandran plot of a protein structure.
+
+
+ Ramachandran plot
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data on the net charge distribution (dipole moment) of a protein structure.
+
+
+ Protein dipole moment
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A matrix of distances between amino acid residues (for example the C-alpha atoms) in a protein structure.
+
+
+ Protein distance matrix
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An amino acid residue contact map for a protein structure.
+
+
+ Protein contact map
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Report on clusters of contacting residues in protein structures such as a key structural residue network.
+
+
+ Protein residue 3D cluster
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Patterns of hydrogen bonding in protein structures.
+
+
+ Protein hydrogen bonds
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ Non-canonical atomic interactions in protein structures.
+
+ Protein non-canonical interactions
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Information on a node from the CATH database.
+
+ The report (for example http://www.cathdb.info/cathnode/1.10.10.10) includes CATH code (of the node and upper levels in the hierarchy), classification text (of appropriate levels in hierarchy), list of child nodes, representative domain and other relevant data and links.
+ CATH node
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Information on a node from the SCOP database.
+
+ SCOP node
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+ An EMBASSY domain classification file (DCF) of classification and other data for domains from SCOP or CATH, in EMBL-like format.
+
+
+ EMBASSY domain classification
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Information on a protein 'class' node from the CATH database.
+
+ CATH class
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Information on a protein 'architecture' node from the CATH database.
+
+ CATH architecture
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Information on a protein 'topology' node from the CATH database.
+
+ CATH topology
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Information on a protein 'homologous superfamily' node from the CATH database.
+
+ CATH homologous superfamily
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Information on a protein 'structurally similar group' node from the CATH database.
+
+ CATH structurally similar group
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Information on a protein 'functional category' node from the CATH database.
+
+ CATH functional category
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A report on known protein structural domains or folds that are recognised (identified) in protein sequence(s).
+
+ Methods use some type of mapping between sequence and fold, for example secondary structure prediction and alignment, profile comparison, sequence properties, homologous sequence search, kernel machines etc. Domains and folds might be taken from SCOP or CATH.
+ Protein fold recognition report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ protein-protein interaction(s), including interactions between protein domains.
+
+
+ Protein-protein interaction report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An informative report on protein-ligand (small molecule) interaction(s).
+ Protein-drug interaction report
+
+
+ Protein-ligand interaction report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ protein-DNA/RNA interaction(s).
+
+
+ Protein-nucleic acid interactions report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data on the dissociation characteristics of a double-stranded nucleic acid molecule (DNA or a DNA/RNA hybrid) during heating.
+ Nucleic acid stability profile
+ Melting map
+ Nucleic acid melting curve
+
+
+ A melting (stability) profile calculated the free energy required to unwind and separate the nucleic acid strands, plotted for sliding windows over a sequence.
+ Nucleic acid melting curve: a melting curve of a double-stranded nucleic acid molecule (DNA or DNA/RNA). Shows the proportion of nucleic acid which are double-stranded versus temperature.
+ Nucleic acid probability profile: a probability profile of a double-stranded nucleic acid molecule (DNA or DNA/RNA). Shows the probability of a base pair not being melted (i.e. remaining as double-stranded DNA) at a specified temperature
+ Nucleic acid stitch profile: stitch profile of hybridised or double stranded nucleic acid (DNA or RNA/DNA). A stitch profile diagram shows partly melted DNA conformations (with probabilities) at a range of temperatures. For example, a stitch profile might show possible loop openings with their location, size, probability and fluctuations at a given temperature.
+ Nucleic acid temperature profile: a temperature profile of a double-stranded nucleic acid molecule (DNA or DNA/RNA). Plots melting temperature versus base position.
+ Nucleic acid melting profile
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Enthalpy of hybridised or double stranded nucleic acid (DNA or RNA/DNA).
+
+
+ Nucleic acid enthalpy
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Entropy of hybridised or double stranded nucleic acid (DNA or RNA/DNA).
+
+
+ Nucleic acid entropy
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Melting temperature of hybridised or double stranded nucleic acid (DNA or RNA/DNA).
+
+ Nucleic acid melting temperature
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.21
+
+ Stitch profile of hybridised or double stranded nucleic acid (DNA or RNA/DNA).
+
+
+ Nucleic acid stitch profile
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ DNA base pair stacking energies data.
+
+
+ DNA base pair stacking energies data
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ DNA base pair twist angle data.
+
+
+ DNA base pair twist angle data
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ DNA base trimer roll angles data.
+
+
+ DNA base trimer roll angles data
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ RNA parameters used by the Vienna package.
+
+ Vienna RNA parameters
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Structure constraints used by the Vienna package.
+
+ Vienna RNA structure constraints
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ RNA concentration data used by the Vienna package.
+
+ Vienna RNA concentration data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ RNA calculated energy data generated by the Vienna package.
+
+ Vienna RNA calculated energy
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Dotplot of RNA base pairing probability matrix.
+
+
+ Such as generated by the Vienna package.
+ Base pairing probability matrix dotplot
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A human-readable collection of information about RNA/DNA folding, minimum folding energies for DNA or RNA sequences, energy landscape of RNA mutants etc.
+ Nucleic acid report (folding model)
+ Nucleic acid report (folding)
+ RNA secondary structure folding classification
+ RNA secondary structure folding probabilities
+
+
+ Nucleic acid folding report
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Table of codon usage data calculated from one or more nucleic acid sequences.
+
+
+ A codon usage table might include the codon usage table name, optional comments and a table with columns for codons and corresponding codon usage data. A genetic code can be extracted from or represented by a codon usage table.
+ Codon usage table
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A genetic code for an organism.
+
+
+ A genetic code need not include detailed codon usage information.
+ Genetic code
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A simple measure of synonymous codon usage bias often used to predict gene expression levels.
+
+ Codon adaptation index
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A plot of the synonymous codon usage calculated for windows over a nucleotide sequence.
+ Synonymous codon usage statistic plot
+
+
+ Codon usage bias plot
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The effective number of codons used in a gene sequence. This reflects how far codon usage of a gene departs from equal usage of synonymous codons.
+
+ Nc statistic
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The differences in codon usage fractions between two codon usage tables.
+
+
+ Codon usage fraction difference
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A human-readable collection of information about the influence of genotype on drug response.
+
+
+ The report might correlate gene expression or single-nucleotide polymorphisms with drug efficacy or toxicity.
+ Pharmacogenomic test report
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A human-readable collection of information about a specific disease.
+
+
+ For example, an informative report on a specific tumor including nature and origin of the sample, anatomic site, organ or tissue, tumor type, including morphology and/or histologic type, and so on.
+ Disease report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ A report on linkage disequilibrium; the non-random association of alleles or polymorphisms at two or more loci (not necessarily on the same chromosome).
+
+
+ Linkage disequilibrium (report)
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A graphical 2D tabular representation of expression data, typically derived from an omics experiment. A heat map is a table where rows and columns correspond to different features and contexts (for example, cells or samples) and the cell colour represents the level of expression of a gene that context.
+
+
+ Heat map
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Affymetrix library file of information about which probes belong to which probe set.
+
+ Affymetrix probe sets library file
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Affymetrix library file of information about the probe sets such as the gene name with which the probe set is associated.
+ GIN file
+
+ Affymetrix probe sets information library file
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Standard protonated molecular masses from trypsin (modified porcine trypsin, Promega) and keratin peptides, used in EMBOSS.
+
+
+ Molecular weights standard fingerprint
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ A report typically including a map (diagram) of a metabolic pathway.
+
+
+ This includes carbohydrate, energy, lipid, nucleotide, amino acid, glycan, PK/NRP, cofactor/vitamin, secondary metabolite, xenobiotics etc.
+ Metabolic pathway report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ genetic information processing pathways.
+
+
+ Genetic information processing pathway report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ environmental information processing pathways.
+
+
+ Environmental information processing pathway report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ A report typically including a map (diagram) of a signal transduction pathway.
+
+
+ Signal transduction pathway report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Topic concernning cellular process pathways.
+
+
+ Cellular process pathways report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ disease pathways, typically of human disease.
+
+
+ Disease pathway or network report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.21
+
+ A report typically including a map (diagram) of drug structure relationships.
+
+
+ Drug structure relationship map
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+
+ networks of protein interactions.
+
+ Protein interaction networks
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ An entry (data type) from the Minimal Information Requested in the Annotation of Biochemical Models (MIRIAM) database of data resources.
+
+ A MIRIAM entry describes a MIRIAM data type including the official name, synonyms, root URI, identifier pattern (regular expression applied to a unique identifier of the data type) and documentation. Each data type can be associated with several resources. Each resource is a physical location of a service (typically a database) providing information on the elements of a data type. Several resources may exist for each data type, provided the same (mirrors) or different information. MIRIAM provides a stable and persistent reference to its data types.
+ MIRIAM datatype
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A simple floating point number defining the lower or upper limit of an expectation value (E-value).
+ Expectation value
+
+
+ An expectation value (E-Value) is the expected number of observations which are at least as extreme as observations expected to occur by random chance. The E-value describes the number of hits with a given score or better that are expected to occur at random when searching a database of a particular size. It decreases exponentially with the score (S) of a hit. A low E value indicates a more significant score.
+ E-value
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The z-value is the number of standard deviations a data value is above or below a mean value.
+
+
+ A z-value might be specified as a threshold for reporting hits from database searches.
+ Z-value
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The P-value is the probability of obtaining by random chance a result that is at least as extreme as an observed result, assuming a NULL hypothesis is true.
+
+
+ A z-value might be specified as a threshold for reporting hits from database searches.
+ P-value
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Information on a database (or ontology) version, for example name, version number and release date.
+
+ Database version information
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Information on an application version, for example name, version number and release date.
+
+ Tool version information
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Information on a version of the CATH database.
+
+ CATH version information
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Cross-mapping of Swiss-Prot codes to PDB identifiers.
+
+ Swiss-Prot to PDB mapping
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Cross-references from a sequence record to other databases.
+
+ Sequence database cross-references
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Metadata on the status of a submitted job.
+
+ Values for EBI services are 'DONE' (job has finished and the results can then be retrieved), 'ERROR' (the job failed or no results where found), 'NOT_FOUND' (the job id is no longer available; job results might be deleted, 'PENDING' (the job is in a queue waiting processing), 'RUNNING' (the job is currently being processed).
+ Job status
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.0
+
+
+ The (typically numeric) unique identifier of a submitted job.
+
+ Job ID
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A label (text token) describing the type of job, for example interactive or non-interactive.
+
+ Job type
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A report of tool-specific metadata on some analysis or process performed, for example a log of diagnostic or error messages.
+
+ Tool log
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ DaliLite log file describing all the steps taken by a DaliLite alignment of two protein structures.
+
+ DaliLite log file
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ STRIDE log file.
+
+ STRIDE log file
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ NACCESS log file.
+
+ NACCESS log file
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ EMBOSS wordfinder log file.
+
+ EMBOSS wordfinder log file
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ EMBOSS (EMBASSY) domainatrix application log file.
+
+ EMBOSS domainatrix log file
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ EMBOSS (EMBASSY) sites application log file.
+
+ EMBOSS sites log file
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ EMBOSS (EMBASSY) supermatcher error file.
+
+ EMBOSS supermatcher error file
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ EMBOSS megamerger log file.
+
+ EMBOSS megamerger log file
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ EMBOSS megamerger log file.
+
+ EMBOSS whichdb log file
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ EMBOSS vectorstrip log file.
+
+ EMBOSS vectorstrip log file
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A username on a computer system or a website.
+
+
+
+ Username
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A password on a computer system, or a website.
+
+
+
+ Password
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby:Email
+ Moby:EmailAddress
+ A valid email address of an end-user.
+
+
+
+ Email address
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a person.
+
+
+
+ Person name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Number of iterations of an algorithm.
+
+ Number of iterations
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Number of entities (for example database hits, sequences, alignments etc) to write to an output file.
+
+ Number of output entities
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Controls the order of hits (reported matches) in an output file from a database search.
+
+ Hit sort order
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A human-readable collection of information about a specific drug.
+ Drug annotation
+ Drug structure relationship map
+
+
+ A drug structure relationship map is report (typically a map diagram) of drug structure relationships.
+ Drug report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An image (for viewing or printing) of a phylogenetic tree including (typically) a plot of rooted or unrooted phylogenies, cladograms, circular trees or phenograms and associated information.
+
+
+ See also 'Phylogenetic tree'
+ Phylogenetic tree image
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Image of RNA secondary structure, knots, pseudoknots etc.
+
+
+ RNA secondary structure image
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Image of protein secondary structure.
+
+
+ Protein secondary structure image
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Image of one or more molecular tertiary (3D) structures.
+
+
+ Structure image
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Image of two or more aligned molecular sequences possibly annotated with alignment features.
+
+
+ Sequence alignment image
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An image of the structure of a small chemical compound.
+ Small molecule structure image
+ Chemical structure sketch
+ Small molecule sketch
+
+
+ The molecular identifier and formula are typically included.
+ Chemical structure image
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A fate map is a plan of early stage of an embryo such as a blastula, showing areas that are significance to development.
+
+
+ Fate map
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An image of spots from a microarray experiment.
+
+
+ Microarray spots image
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A term from the BioPax ontology.
+
+ BioPax term
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A term definition from The Gene Ontology (GO).
+
+ GO
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A term from the MeSH vocabulary.
+
+ MeSH
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A term from the HGNC controlled vocabulary.
+
+ HGNC
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A term from the NCBI taxonomy vocabulary.
+
+ NCBI taxonomy vocabulary
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A term from the Plant Ontology (PO).
+
+ Plant ontology term
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A term from the UMLS vocabulary.
+
+ UMLS
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A term from Foundational Model of Anatomy.
+
+ Classifies anatomical entities according to their shared characteristics (genus) and distinguishing characteristics (differentia). Specifies the part-whole and spatial relationships of the entities, morphological transformation of the entities during prenatal development and the postnatal life cycle and principles, rules and definitions according to which classes and relationships in the other three components of FMA are represented.
+ FMA
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A term from the EMAP mouse ontology.
+
+ EMAP
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A term from the ChEBI ontology.
+
+ ChEBI
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A term from the MGED ontology.
+
+ MGED
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A term from the myGrid ontology.
+
+ The ontology is provided as two components, the service ontology and the domain ontology. The domain ontology acts provides concepts for core bioinformatics data types and their relations. The service ontology describes the physical and operational features of web services.
+ myGrid
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A term definition for a biological process from the Gene Ontology (GO).
+
+ Data Type is an enumerated string.
+ GO (biological process)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A term definition for a molecular function from the Gene Ontology (GO).
+
+ Data Type is an enumerated string.
+ GO (molecular function)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A term definition for a cellular component from the Gene Ontology (GO).
+
+ Data Type is an enumerated string.
+ GO (cellular component)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A relation type defined in an ontology.
+
+ Ontology relation type
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The definition of a concept from an ontology.
+ Ontology class definition
+
+
+ Ontology concept definition
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ A comment on a concept from an ontology.
+
+ Ontology concept comment
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Reference for a concept from an ontology.
+
+ Ontology concept reference
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Information on a published article provided by the doc2loc program.
+
+ The doc2loc output includes the url, format, type and availability code of a document for every service provider.
+ doc2loc document information
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PDBML:PDB_residue_no
+ WHATIF: pdb_number
+ A residue identifier (a string) from a PDB file.
+
+
+ PDB residue number
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Cartesian coordinate of an atom (in a molecular structure).
+ Cartesian coordinate
+
+
+ Atomic coordinate
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.21
+
+ Cartesian x coordinate of an atom (in a molecular structure).
+
+
+ Atomic x coordinate
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.21
+
+ Cartesian y coordinate of an atom (in a molecular structure).
+
+
+ Atomic y coordinate
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.21
+
+ Cartesian z coordinate of an atom (in a molecular structure).
+
+
+ Atomic z coordinate
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PDBML:pdbx_PDB_atom_name
+ WHATIF: PDBx_auth_atom_id
+ WHATIF: PDBx_type_symbol
+ WHATIF: alternate_atom
+ WHATIF: atom_type
+ Identifier (a string) of a specific atom from a PDB file for a molecular structure.
+
+
+
+ PDB atom name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data on a single atom from a protein structure.
+ Atom data
+ CHEBI:33250
+
+
+ This is a broad data type and is used a placeholder for other, more specific types. It is primarily intended to help navigation of EDAM and would not typically be used for annotation.
+ Protein atom
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data on a single amino acid residue position in a protein structure.
+ Residue
+
+
+ This is a broad data type and is used a placeholder for other, more specific types. It is primarily intended to help navigation of EDAM and would not typically be used for annotation.
+ Protein residue
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Name of an atom.
+
+
+
+ Atom name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ WHATIF: type
+ Three-letter amino acid residue names as used in PDB files.
+
+
+
+ PDB residue name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PDBML:pdbx_PDB_model_num
+ WHATIF: model_number
+ Identifier of a model structure from a PDB file.
+ Model number
+
+
+
+ PDB model number
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Summary of domain classification information for a CATH domain.
+
+ The report (for example http://www.cathdb.info/domain/1cukA01) includes CATH codes for levels in the hierarchy for the domain, level descriptions and relevant data and links.
+ CATH domain report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ FASTA sequence database (based on ATOM records in PDB) for CATH domains (clustered at different levels of sequence identity).
+
+ CATH representative domain sequences (ATOM)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ FASTA sequence database (based on COMBS sequence data) for CATH domains (clustered at different levels of sequence identity).
+
+ CATH representative domain sequences (COMBS)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ FASTA sequence database for all CATH domains (based on PDB ATOM records).
+
+ CATH domain sequences (ATOM)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ FASTA sequence database for all CATH domains (based on COMBS sequence data).
+
+ CATH domain sequences (COMBS)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Information on an molecular sequence version.
+ Sequence version information
+
+
+ Sequence version
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A numerical value, that is some type of scored value arising for example from a prediction method.
+
+
+ Score
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Report on general functional properties of specific protein(s).
+
+ For properties that can be mapped to a sequence, use 'Sequence report' instead.
+ Protein report (function)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Name of a gene from Aspergillus Genome Database.
+
+ Gene name (ASPGD)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Name of a gene from Candida Genome Database.
+
+ Gene name (CGD)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Name of a gene from dictyBase database.
+
+ Gene name (dictyBase)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Primary name of a gene from EcoGene Database.
+
+ Gene name (EcoGene primary)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Name of a gene from MaizeGDB (maize genes) database.
+
+ Gene name (MaizeGDB)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Name of a gene from Saccharomyces Genome Database.
+
+ Gene name (SGD)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Name of a gene from Tetrahymena Genome Database.
+
+ Gene name (TGD)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Symbol of a gene from E.coli Genetic Stock Center.
+
+ Gene name (CGSC)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Symbol of a gene approved by the HUGO Gene Nomenclature Committee.
+
+ Gene name (HGNC)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Symbol of a gene from the Mouse Genome Database.
+
+ Gene name (MGD)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Symbol of a gene from Bacillus subtilis Genome Sequence Project.
+
+ Gene name (Bacillus subtilis)
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ http://www.geneontology.org/doc/GO.xrf_abbs: ApiDB_PlasmoDB
+ Identifier of a gene from PlasmoDB Plasmodium Genome Resource.
+
+
+
+ Gene ID (PlasmoDB)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a gene from EcoGene Database.
+ EcoGene Accession
+ EcoGene ID
+
+
+
+ Gene ID (EcoGene)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ http://www.geneontology.org/doc/GO.xrf_abbs: FB
+ http://www.geneontology.org/doc/GO.xrf_abbs: FlyBase
+ Gene identifier from FlyBase database.
+
+
+
+ Gene ID (FlyBase)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Gene identifier from Glossina morsitans GeneDB database.
+
+ Gene ID (GeneDB Glossina morsitans)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Gene identifier from Leishmania major GeneDB database.
+
+ Gene ID (GeneDB Leishmania major)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ http://www.geneontology.org/doc/GO.xrf_abbs: GeneDB_Pfalciparum
+ Gene identifier from Plasmodium falciparum GeneDB database.
+
+ Gene ID (GeneDB Plasmodium falciparum)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ http://www.geneontology.org/doc/GO.xrf_abbs: GeneDB_Spombe
+ Gene identifier from Schizosaccharomyces pombe GeneDB database.
+
+ Gene ID (GeneDB Schizosaccharomyces pombe)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ http://www.geneontology.org/doc/GO.xrf_abbs: GeneDB_Tbrucei
+ Gene identifier from Trypanosoma brucei GeneDB database.
+
+ Gene ID (GeneDB Trypanosoma brucei)
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ http://www.geneontology.org/doc/GO.xrf_abbs: GR_GENE
+ http://www.geneontology.org/doc/GO.xrf_abbs: GR_gene
+ Gene identifier from Gramene database.
+
+
+
+ Gene ID (Gramene)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ http://www.geneontology.org/doc/GO.xrf_abbs: PAMGO_VMD
+ http://www.geneontology.org/doc/GO.xrf_abbs: VMD
+ Gene identifier from Virginia Bioinformatics Institute microbial database.
+
+
+
+ Gene ID (Virginia microbial)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ http://www.geneontology.org/doc/GO.xrf_abbs: SGN
+ Gene identifier from Sol Genomics Network.
+
+
+
+ Gene ID (SGN)
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ WBGene[0-9]{8}
+ http://www.geneontology.org/doc/GO.xrf_abbs: WB
+ http://www.geneontology.org/doc/GO.xrf_abbs: WormBase
+ Gene identifier used by WormBase database.
+
+
+
+ Gene ID (WormBase)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Any name (other than the recommended one) for a gene.
+
+ Gene synonym
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of an open reading frame attributed by a sequencing project.
+
+
+
+ ORF name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A component of a larger sequence assembly.
+
+ Sequence assembly component
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A report on a chromosome aberration such as abnormalities in chromosome structure.
+
+ Chromosome annotation (aberration)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An identifier of a clone (cloned molecular sequence) from a database.
+
+
+
+ Clone ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PDBML:pdbx_PDB_ins_code
+ WHATIF: insertion_code
+ An insertion code (part of the residue number) for an amino acid residue from a PDB file.
+
+
+ PDB insertion code
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ WHATIF: PDBx_occupancy
+ The fraction of an atom type present at a site in a molecular structure.
+
+
+ The sum of the occupancies of all the atom types at a site should not normally significantly exceed 1.0.
+ Atomic occupancy
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ WHATIF: PDBx_B_iso_or_equiv
+ Isotropic B factor (atomic displacement parameter) for an atom from a PDB file.
+
+
+ Isotropic B factor
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A cytogenetic map showing chromosome banding patterns in mutant cell lines relative to the wild type.
+ Deletion-based cytogenetic map
+
+
+ A cytogenetic map is built from a set of mutant cell lines with sub-chromosomal deletions and a reference wild-type line ('genome deletion panel'). The panel is used to map markers onto the genome by comparing mutant to wild-type banding patterns. Markers are linked (occur in the same deleted region) if they share the same banding pattern (presence or absence) as the deletion panel.
+ Deletion map
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A genetic map which shows the approximate location of quantitative trait loci (QTL) between two or more markers.
+ Quantitative trait locus map
+
+
+ QTL map
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby:Haplotyping_Study_obj
+ A map of haplotypes in a genome or other sequence, describing common patterns of genetic variation.
+
+
+ Haplotype map
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.21
+
+ Data describing a set of multiple genetic or physical maps, typically sharing a common set of features which are mapped.
+
+
+ Map set data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+
+
+ A feature which may mapped (positioned) on a genetic or other type of map.
+
+ Mappable features may be based on Gramene's notion of map features; see http://www.gramene.org/db/cmap/feature_type_info.
+ Map feature
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A designation of the type of map (genetic map, physical map, sequence map etc) or map set.
+
+ Map types may be based on Gramene's notion of a map type; see http://www.gramene.org/db/cmap/map_type_info.
+ Map type
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a protein fold.
+
+
+
+ Protein fold name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby:BriefTaxonConcept
+ Moby:PotentialTaxon
+ The name of a group of organisms belonging to the same taxonomic rank.
+ Taxonomic rank
+ Taxonomy rank
+
+
+
+ For a complete list of taxonomic ranks see https://www.phenoscape.org/wiki/Taxonomic_Rank_Vocabulary.
+ Taxon
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ A unique identifier of a (group of) organisms.
+
+
+
+ Organism identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a genus of organism.
+
+
+
+ Genus name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby:GCP_Taxon
+ Moby:TaxonName
+ Moby:TaxonScientificName
+ Moby:TaxonTCS
+ Moby:iANT_organism-xml
+ The full name for a group of organisms, reflecting their biological classification and (usually) conforming to a standard nomenclature.
+ Taxonomic information
+ Taxonomic name
+
+
+
+ Name components correspond to levels in a taxonomic hierarchy (e.g. 'Genus', 'Species', etc.) Meta information such as a reference where the name was defined and a date might be included.
+ Taxonomic classification
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby_namespace:iHOPorganism
+ A unique identifier for an organism used in the iHOP database.
+
+
+
+ iHOP organism ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Common name for an organism as used in the GenBank database.
+
+
+
+ Genbank common name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a taxon from the NCBI taxonomy database.
+
+
+
+ NCBI taxon
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An alternative for a word.
+
+ Synonym
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A common misspelling of a word.
+
+ Misspelling
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An abbreviation of a phrase or word.
+
+ Acronym
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A term which is likely to be misleading of its meaning.
+
+ Misnomer
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby:Author
+ Information on the authors of a published work.
+
+
+
+ Author ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An identifier representing an author in the DragonDB database.
+
+
+
+ DragonDB author identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby:DescribedLink
+ A URI along with annotation describing the data found at the address.
+
+
+ Annotated URI
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A controlled vocabulary for words and phrases that can appear in the keywords field (KW line) of entries from the UniProt database.
+
+ UniProt keywords
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby_namespace:GENEFARM_GeneID
+ Identifier of a gene from the GeneFarm database.
+
+
+
+ Gene ID (GeneFarm)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby_namespace:Blattner_number
+ The blattner identifier for a gene.
+
+
+
+ Blattner number
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Moby_namespace:MIPS_GE_Maize
+ Identifier for genetic elements in MIPS Maize database.
+
+ Gene ID (MIPS Maize)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Moby_namespace:MIPS_GE_Medicago
+ Identifier for genetic elements in MIPS Medicago database.
+
+ Gene ID (MIPS Medicago)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ The name of an Antirrhinum Gene from the DragonDB database.
+
+ Gene name (DragonDB)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ A unique identifier for an Arabidopsis gene, which is an acronym or abbreviation of the gene name.
+
+ Gene name (Arabidopsis)
+ true
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby_namespace:iHOPsymbol
+ A unique identifier of a protein or gene used in the iHOP database.
+
+
+
+ iHOP symbol
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Name of a gene from the GeneFarm database.
+
+ Gene name (GeneFarm)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ A unique name or other identifier of a genetic locus, typically conforming to a scheme that names loci (such as predicted genes) depending on their position in a molecular sequence, for example a completely sequenced genome or chromosome.
+ Locus identifier
+ Locus name
+
+
+
+ Locus ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ AT[1-5]G[0-9]{5}
+ http://www.geneontology.org/doc/GO.xrf_abbs:AGI_LocusCode
+ Locus identifier for Arabidopsis Genome Initiative (TAIR, TIGR and MIPS databases).
+ AGI ID
+ AGI identifier
+ AGI locus code
+ Arabidopsis gene loci number
+
+
+
+ Locus ID (AGI)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ http://www.geneontology.org/doc/GO.xrf_abbs: ASPGD
+ http://www.geneontology.org/doc/GO.xrf_abbs: ASPGDID
+ Identifier for loci from ASPGD (Aspergillus Genome Database).
+
+
+
+ Locus ID (ASPGD)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ http://www.geneontology.org/doc/GO.xrf_abbs: Broad_MGG
+ Identifier for loci from Magnaporthe grisea Database at the Broad Institute.
+
+
+
+ Locus ID (MGG)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ http://www.geneontology.org/doc/GO.xrf_abbs: CGD
+ http://www.geneontology.org/doc/GO.xrf_abbs: CGDID
+ Identifier for loci from CGD (Candida Genome Database).
+ CGD locus identifier
+ CGDID
+
+
+
+ Locus ID (CGD)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ http://www.geneontology.org/doc/GO.xrf_abbs: JCVI_CMR
+ http://www.geneontology.org/doc/GO.xrf_abbs: TIGR_CMR
+ Locus identifier for Comprehensive Microbial Resource at the J. Craig Venter Institute.
+
+
+
+ Locus ID (CMR)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby_namespace:LocusID
+ http://www.geneontology.org/doc/GO.xrf_abbs: NCBI_locus_tag
+ Identifier for loci from NCBI database.
+ Locus ID (NCBI)
+
+
+
+ NCBI locus tag
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ http://www.geneontology.org/doc/GO.xrf_abbs: SGD
+ http://www.geneontology.org/doc/GO.xrf_abbs: SGDID
+ Identifier for loci from SGD (Saccharomyces Genome Database).
+ SGDID
+
+
+
+ Locus ID (SGD)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby_namespace:MMP_Locus
+ Identifier of loci from Maize Mapping Project.
+
+
+
+ Locus ID (MMP)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby_namespace:DDB_gene
+ Identifier of locus from DictyBase (Dictyostelium discoideum).
+
+
+
+ Locus ID (DictyBase)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby_namespace:EntrezGene_EntrezGeneID
+ Moby_namespace:EntrezGene_ID
+ Identifier of a locus from EntrezGene database.
+
+
+
+ Locus ID (EntrezGene)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby_namespace:MaizeGDB_Locus
+ Identifier of locus from MaizeGDB (Maize genome database).
+
+
+
+ Locus ID (MaizeGDB)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Moby:SO_QTL
+ A stretch of DNA that is closely linked to the genes underlying a quantitative trait (a phenotype that varies in degree and depends upon the interactions between multiple genes and their environment).
+
+ A QTL sometimes but does not necessarily correspond to a gene.
+ Quantitative trait locus
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby_namespace:GeneId
+ Identifier of a gene from the KOME database.
+
+
+
+ Gene ID (KOME)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby:Tropgene_locus
+ Identifier of a locus from the Tropgene database.
+
+
+
+ Locus ID (Tropgene)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An alignment of molecular sequences, structures or profiles derived from them.
+
+
+ Alignment
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data for an atom (in a molecular structure).
+ General atomic property
+
+
+ Atomic property
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby_namespace:SP_KW
+ http://www.geneontology.org/doc/GO.xrf_abbs: SP_KW
+ A word or phrase that can appear in the keywords field (KW line) of entries from the UniProt database.
+
+
+ UniProt keyword
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A name for a genetic locus conforming to a scheme that names loci (such as predicted genes) depending on their position in a molecular sequence, for example a completely sequenced genome or chromosome.
+
+ Ordered locus name
+ true
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby:GCP_MapInterval
+ Moby:GCP_MapPoint
+ Moby:GCP_MapPosition
+ Moby:GenePosition
+ Moby:HitPosition
+ Moby:Locus
+ Moby:MapPosition
+ Moby:Position
+ PDBML:_atom_site.id
+ A position in a map (for example a genetic map), either a single position (point) or a region / interval.
+ Locus
+ Map position
+
+
+ This includes positions in genomes based on a reference sequence. A position may be specified for any mappable object, i.e. anything that may have positional information such as a physical position in a chromosome. Data might include sequence region name, strand, coordinate system name, assembly name, start position and end position.
+ Sequence coordinates
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data concerning the intrinsic physical (e.g. structural) or chemical properties of one, more or all amino acids.
+ Amino acid data
+
+
+ Amino acid property
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ A human-readable collection of information which (typically) is generated or collated by hand and which describes a biological entity, phenomena or associated primary (e.g. sequence or structural) data, as distinct from the primary data itself and computer-generated reports derived from it.
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Annotation
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data describing a molecular map (genetic or physical) or a set of such maps, including various attributes of, data extracted from or derived from the analysis of them, but excluding the map(s) themselves. This includes metadata for map sets that share a common set of features which are mapped.
+ Map attribute
+ Map set data
+
+
+ Map data
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Data used by the Vienna RNA analysis package.
+
+ Vienna RNA structural data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Data used to replace (mask) characters in a molecular sequence.
+
+ Sequence mask parameter
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data concerning chemical reaction(s) catalysed by enzyme(s).
+
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Enzyme kinetics data
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A plot giving an approximation of the kinetics of an enzyme-catalysed reaction, assuming simple kinetics (i.e. no intermediate or product inhibition, allostericity or cooperativity). It plots initial reaction rate to the substrate concentration (S) from which the maximum rate (vmax) is apparent.
+
+
+ Michaelis Menten plot
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A plot based on the Michaelis Menten equation of enzyme kinetics plotting the ratio of the initial substrate concentration (S) against the reaction velocity (v).
+
+
+ Hanes Woolf plot
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+
+ Raw data from or annotation on laboratory experiments.
+
+ This is a broad data type and is used a placeholder for other, more specific types. It is primarily intended to help navigation of EDAM and would not typically be used for annotation.
+ Experimental data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Information on a genome version.
+
+ Genome version information
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Typically a human-readable summary of body of facts or information indicating why a statement is true or valid. This may include a computational prediction, laboratory experiment, literature reference etc.
+
+
+ Evidence
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ A molecular sequence and minimal metadata, typically an identifier of the sequence and/or a comment.
+
+
+ Sequence record lite
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ One or more molecular sequences, possibly with associated annotation.
+ Sequences
+
+
+ This concept is a placeholder of concepts for primary sequence data including raw sequences and sequence records. It should not normally be used for derivatives such as sequence alignments, motifs or profiles.
+ Sequence
+ http://purl.bioontology.org/ontology/MSH/D008969
+ http://purl.org/biotop/biotop.owl#BioMolecularSequenceInformation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ A nucleic acid sequence and minimal metadata, typically an identifier of the sequence and/or a comment.
+
+
+ Nucleic acid sequence record (lite)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ A protein sequence and minimal metadata, typically an identifier of the sequence and/or a comment.
+
+
+ Protein sequence record (lite)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A human-readable collection of information including annotation on a biological entity or phenomena, computer-generated reports of analysis of primary data (e.g. sequence or structural), and metadata (data about primary data) or any other free (essentially unformatted) text, as distinct from the primary data itself.
+ Document
+ Record
+
+
+ You can use this term by default for any textual report, in case you can't find another, more specific term. Reports may be generated automatically or collated by hand and can include metadata on the origin, source, history, ownership or location of some thing.
+ Report
+ http://semanticscience.org/resource/SIO_000148
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ General data for a molecule.
+ General molecular property
+
+
+ Molecular property (general)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+
+ Data concerning molecular structural data.
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Structural data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A nucleotide sequence motif.
+ Nucleic acid sequence motif
+ DNA sequence motif
+ RNA sequence motif
+
+
+ Sequence motif (nucleic acid)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An amino acid sequence motif.
+ Protein sequence motif
+
+
+ Sequence motif (protein)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Some simple value controlling a search operation, typically a search of a database.
+
+ Search parameter
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A report of hits from searching a database of some type.
+ Database hits
+ Search results
+
+
+ Database search results
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ The secondary structure assignment (predicted or real) of a nucleic acid or protein.
+
+ Secondary structure
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An array of numerical values.
+ Array
+
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Matrix
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+
+ Data concerning, extracted from, or derived from the analysis of molecular alignment of some type.
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Alignment data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A human-readable collection of information about one or more specific nucleic acid molecules.
+
+
+ Nucleic acid report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A human-readable collection of information about one or more molecular tertiary (3D) structures. It might include annotation on the structure, a computer-generated report of analysis of structural data, and metadata (data about primary data) or any other free (essentially unformatted) text, as distinct from the primary data itself.
+ Structure-derived report
+
+
+ Structure report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.21
+
+
+
+ A report on nucleic acid structure-derived data, describing structural properties of a DNA molecule, or any other annotation or information about specific nucleic acid 3D structure(s).
+
+ Nucleic acid structure data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A report on the physical (e.g. structural) or chemical properties of molecules, or parts of a molecule.
+ Physicochemical property
+ SO:0000400
+
+
+ Molecular property
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Structural data for DNA base pairs or runs of bases, such as energy or angle data.
+
+
+ DNA base structural data
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Information on a database (or ontology) entry version, such as name (or other identifier) or parent database, unique identifier of entry, data, author and so on.
+
+ Database entry version information
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ A persistent (stable) and unique identifier, typically identifying an object (entry) from a database.
+
+
+
+ Accession
+ http://semanticscience.org/resource/SIO_000675
+ http://semanticscience.org/resource/SIO_000731
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ single nucleotide polymorphism (SNP) in a DNA sequence.
+
+
+ SNP
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Reference to a dataset (or a cross-reference between two datasets), typically one or more entries in a biological database or ontology.
+
+
+ A list of database accessions or identifiers are usually included.
+ Data reference
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An identifier of a submitted job.
+
+
+
+ Job identifier
+ http://wsio.org/data_009
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+
+ A name of a thing, which need not necessarily uniquely identify it.
+ Symbolic name
+
+
+
+ Name
+ "http://www.w3.org/2000/01/rdf-schema#label
+ http://semanticscience.org/resource/SIO_000116
+ http://usefulinc.com/ns/doap#name
+
+
+
+
+
+ Closely related, but focusing on labeling and human readability but not on identification.
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A label (text token) describing the type of a thing, typically an enumerated string (a string with one of a limited set of values).
+
+ Type
+ http://purl.org/dc/elements/1.1/type
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Authentication data usually used to log in into an account on an information system such as a web application or a database.
+
+
+
+ Account authentication
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A three-letter code used in the KEGG databases to uniquely identify organisms.
+
+
+
+ KEGG organism code
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Name of an entry (gene) from the KEGG GENES database.
+
+ Gene name (KEGG GENES)
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an object from one of the BioCyc databases.
+
+
+
+ BioCyc ID
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a compound from the BioCyc chemical compounds database.
+ BioCyc compound ID
+ BioCyc compound identifier
+
+
+
+ Compound ID (BioCyc)
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a biological reaction from the BioCyc reactions database.
+
+
+
+ Reaction ID (BioCyc)
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an enzyme from the BioCyc enzymes database.
+ BioCyc enzyme ID
+
+
+
+ Enzyme ID (BioCyc)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a biological reaction from a database.
+
+
+
+ Reaction ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An identifier that is re-used for data objects of fundamentally different types (typically served from a single database).
+
+
+
+ This branch provides an alternative organisation of the concepts nested under 'Accession' and 'Name'. All concepts under here are already included under 'Accession' or 'Name'.
+ Identifier (hybrid)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a molecular property.
+
+
+
+ Molecular property identifier
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a codon usage table, for example a genetic code.
+ Codon usage table identifier
+
+
+
+ Codon usage table ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Primary identifier of an object from the FlyBase database.
+
+
+
+ FlyBase primary identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an object from the WormBase database.
+
+
+
+ WormBase identifier
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ CE[0-9]{5}
+ Protein identifier used by WormBase database.
+
+
+
+ WormBase wormpep ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An informative report on a trinucleotide sequence that encodes an amino acid including the triplet sequence, the encoded amino acid or whether it is a start or stop codon.
+
+ Nucleic acid features (codon)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An identifier of a map of a molecular sequence.
+
+
+
+ Map identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An identifier of a software end-user on a website or a database (typically a person or an entity).
+
+
+
+ Person identifier
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Name or other identifier of a nucleic acid molecule.
+
+
+
+ Nucleic acid identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.20
+
+
+ Frame for translation of DNA (3 forward and 3 reverse frames relative to a chromosome).
+
+ Translation frame specification
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An identifier of a genetic code.
+
+
+
+ Genetic code identifier
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Informal name for a genetic code, typically an organism name.
+
+
+
+ Genetic code name
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Name of a file format such as HTML, PNG, PDF, EMBL, GenBank and so on.
+
+
+
+ File format name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A label (text token) describing a type of sequence profile such as frequency matrix, Gribskov profile, hidden Markov model etc.
+
+ Sequence profile type
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Name of a computer operating system such as Linux, PC or Mac.
+
+
+
+ Operating system name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A type of point or block mutation, including insertion, deletion, change, duplication and moves.
+
+ Mutation type
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A logical operator such as OR, AND, XOR, and NOT.
+
+
+
+ Logical operator
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A control of the order of data that is output, for example the order of sequences in an alignment.
+
+ Possible options including sorting by score, rank, by increasing P-value (probability, i.e. most statistically significant hits given first) and so on.
+ Results sort order
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A simple parameter that is a toggle (boolean value), typically a control for a modal tool.
+
+ Toggle
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The width of an output sequence or alignment.
+
+ Sequence width
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A penalty for introducing or extending a gap in an alignment.
+
+
+ Gap penalty
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A temperature concerning nucleic acid denaturation, typically the temperature at which the two strands of a hybridised or double stranded nucleic acid (DNA or RNA/DNA) molecule separate.
+ Melting temperature
+
+
+ Nucleic acid melting temperature
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The concentration of a chemical compound.
+
+
+ Concentration
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Size of the incremental 'step' a sequence window is moved over a sequence.
+
+ Window step size
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An image of a graph generated by the EMBOSS suite.
+
+ EMBOSS graph
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An application report generated by the EMBOSS suite.
+
+ EMBOSS report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ An offset for a single-point sequence position.
+
+ Sequence offset
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A value that serves as a threshold for a tool (usually to control scoring or output).
+
+ Threshold
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ An informative report on a transcription factor protein.
+
+ This might include conformational or physicochemical properties, as well as sequence information for transcription factor(s) binding sites.
+ Protein report (transcription factor)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The name of a category of biological or bioinformatics database.
+
+ Database category name
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Name of a sequence profile.
+
+ Sequence profile name
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Specification of one or more colors.
+
+ Color
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A parameter that is used to control rendering (drawing) to a device or image.
+
+ Rendering parameter
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Any arbitrary name of a molecular sequence.
+
+
+
+ Sequence name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A temporal date.
+
+ Date
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+
+ Word composition data for a molecular sequence.
+
+ Word composition
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A plot of Fickett testcode statistic (identifying protein coding regions) in a nucleotide sequences.
+
+
+ Fickett testcode plot
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A plot of sequence similarities identified from word-matching or character comparison.
+ Sequence conservation report
+
+
+ Use this concept for calculated substitution rates, relative site variability, data on sites with biased properties, highly conserved or very poorly conserved sites, regions, blocks etc.
+ Sequence similarity plot
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An image of peptide sequence sequence looking down the axis of the helix for highlighting amphipathicity and other properties.
+
+
+ Helical wheel
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An image of peptide sequence sequence in a simple 3,4,3,4 repeating pattern that emulates at a simple level the arrangement of residues around an alpha helix.
+
+
+ Useful for highlighting amphipathicity and other properties.
+ Helical net
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A plot of general physicochemical properties of a protein sequence.
+
+ Protein sequence properties plot
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A plot of pK versus pH for a protein.
+
+
+ Protein ionisation curve
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A plot of character or word composition / frequency of a molecular sequence.
+
+
+ Sequence composition plot
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Density plot (of base composition) for a nucleotide sequence.
+
+
+ Nucleic acid density plot
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Image of a sequence trace (nucleotide sequence versus probabilities of each of the 4 bases).
+
+
+ Sequence trace image
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A report on siRNA duplexes in mRNA.
+
+ Nucleic acid features (siRNA)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A collection of multiple molecular sequences and (typically) associated metadata that is intended for sequential processing.
+
+ This concept may be used for sequence sets that are expected to be read and processed a single sequence at a time.
+ Sequence set (stream)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Secondary identifier of an object from the FlyBase database.
+
+
+
+ Secondary identifier are used to handle entries that were merged with or split from other entries in the database.
+ FlyBase secondary identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The number of a certain thing.
+
+ Cardinality
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A single thing.
+
+ Exactly 1
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ One or more things.
+
+ 1 or more
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Exactly two things.
+
+ Exactly 2
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Two or more things.
+
+ 2 or more
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A fixed-size datum calculated (by using a hash function) for a molecular sequence, typically for purposes of error detection or indexing.
+ Hash
+ Hash code
+ Hash sum
+ Hash value
+
+
+ Sequence checksum
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ chemical modification of a protein.
+
+
+ Protein features report (chemical modifications)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Data on an error generated by computer system or tool.
+
+ Error
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Basic information on any arbitrary database entry.
+
+
+ Database entry metadata
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ A cluster of similar genes.
+
+ Gene cluster
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ A molecular sequence and comprehensive metadata (such as a feature table), typically corresponding to a full entry from a molecular sequence database.
+
+
+ Sequence record full
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An identifier of a plasmid in a database.
+
+
+
+ Plasmid identifier
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ A unique identifier of a specific mutation catalogued in a database.
+
+
+
+ Mutation ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Information describing the mutation itself, the organ site, tissue and type of lesion where the mutation has been identified, description of the patient origin and life-style.
+
+ Mutation annotation (basic)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An informative report on the prevalence of mutation(s), including data on samples and mutation prevalence (e.g. by tumour type)..
+
+ Mutation annotation (prevalence)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An informative report on mutation prognostic data, such as information on patient cohort, the study settings and the results of the study.
+
+ Mutation annotation (prognostic)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An informative report on the functional properties of mutant proteins including transcriptional activities, promotion of cell growth and tumorigenicity, dominant negative effects, capacity to induce apoptosis, cell-cycle arrest or checkpoints in human cells and so on.
+
+ Mutation annotation (functional)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The number of a codon, for instance, at which a mutation is located.
+
+
+ Codon number
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ An informative report on a specific tumor including nature and origin of the sample, anatomic site, organ or tissue, tumor type, including morphology and/or histologic type, and so on.
+
+ Tumor annotation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Basic information about a server on the web, such as an SRS server.
+
+ Server metadata
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a field in a database.
+
+
+
+ Database field name
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier of a sequence cluster from the SYSTERS database.
+ SYSTERS cluster ID
+
+
+
+ Sequence cluster ID (SYSTERS)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data concerning a biological ontology.
+
+
+ Ontology metadata
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Raw SCOP domain classification data files.
+
+ These are the parsable data files provided by SCOP.
+ Raw SCOP domain classification
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Raw CATH domain classification data files.
+
+ These are the parsable data files provided by CATH.
+ Raw CATH domain classification
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ An informative report on the types of small molecules or 'heterogens' (non-protein groups) that are represented in PDB files.
+
+ Heterogen annotation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Phylogenetic property values data.
+
+ Phylogenetic property values
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A collection of sequences output from a bootstrapping (resampling) procedure.
+
+ Bootstrapping is often performed in phylogenetic analysis.
+ Sequence set (bootstrapped)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A consensus phylogenetic tree derived from comparison of multiple trees.
+
+ Phylogenetic consensus tree
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A data schema for organising or transforming data of some type.
+
+ Schema
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A DTD (document type definition).
+
+ DTD
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ An XML Schema.
+
+ XML Schema
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A relax-NG schema.
+
+ Relax-NG schema
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ An XSLT stylesheet.
+
+ XSLT stylesheet
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a data type.
+
+
+
+ Data resource definition name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Name of an OBO file format such as OBO-XML, plain and so on.
+
+
+
+ OBO file format name
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier for genetic elements in MIPS database.
+ MIPS genetic element identifier
+
+
+
+ Gene ID (MIPS)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An identifier of protein sequence(s) or protein sequence database entries.
+
+ Sequence identifier (protein)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An identifier of nucleotide sequence(s) or nucleotide sequence database entries.
+
+ Sequence identifier (nucleic acid)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An accession number of an entry from the EMBL sequence database.
+ EMBL ID
+ EMBL accession number
+ EMBL identifier
+
+
+
+ EMBL accession
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An identifier of a polypeptide in the UniProt database.
+ UniProt entry name
+ UniProt identifier
+ UniProtKB entry name
+ UniProtKB identifier
+
+
+
+ UniProt ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Accession number of an entry from the GenBank sequence database.
+ GenBank ID
+ GenBank accession number
+ GenBank identifier
+
+
+
+ GenBank accession
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Secondary (internal) identifier of a Gramene database entry.
+ Gramene internal ID
+ Gramene internal identifier
+ Gramene secondary ID
+
+
+
+ Gramene secondary identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An identifier of an entry from a database of molecular sequence variation.
+
+
+
+ Sequence variation ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ A unique (and typically persistent) identifier of a gene in a database, that is (typically) different to the gene name/symbol.
+ Gene accession
+ Gene code
+
+
+
+ Gene ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Name of an entry (gene) from the AceView genes database.
+
+ Gene name (AceView)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ http://www.geneontology.org/doc/GO.xrf_abbs: ECK
+ Identifier of an E. coli K-12 gene from EcoGene Database.
+ E. coli K-12 gene identifier
+ ECK accession
+
+
+
+ Gene ID (ECK)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier for a gene approved by the HUGO Gene Nomenclature Committee.
+ HGNC ID
+
+
+
+ Gene ID (HGNC)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a gene, (typically) assigned by a person and/or according to a naming scheme. It may contain white space characters and is typically more intuitive and readable than a gene symbol. It (typically) may be used to identify similar genes in different species and to derive a gene symbol.
+ Allele name
+
+
+
+ Gene name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Name of an entry (gene) from the NCBI genes database.
+
+ Gene name (NCBI)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A specification of a chemical structure in SMILES format.
+
+
+ SMILES string
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier of an entry from the STRING database of protein-protein interactions.
+
+
+
+ STRING ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ An informative report on a specific virus.
+
+ Virus annotation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ An informative report on the taxonomy of a specific virus.
+
+ Virus annotation (taxonomy)
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]+
+ Identifier of a biological reaction from the SABIO-RK reactions database.
+
+
+
+ Reaction ID (SABIO-RK)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A human-readable collection of information about one or more specific carbohydrate 3D structure(s).
+
+
+ Carbohydrate report
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A series of digits that are assigned consecutively to each sequence record processed by NCBI. The GI number bears no resemblance to the Accession number of the sequence record.
+ NCBI GI number
+
+
+
+ Nucleotide sequence GI number is shown in the VERSION field of the database record. Protein sequence GI number is shown in the CDS/db_xref field of a nucleotide database record, and the VERSION field of a protein database record.
+ GI number
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An identifier assigned to sequence records processed by NCBI, made of the accession number of the database record followed by a dot and a version number.
+ NCBI accession.version
+ accession.version
+
+
+
+ Nucleotide sequence version contains two letters followed by six digits, a dot, and a version number (or for older nucleotide sequence records, the format is one letter followed by five digits, a dot, and a version number). Protein sequence version contains three letters followed by five digits, a dot, and a version number.
+ NCBI version
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a cell line.
+
+
+
+ Cell line name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The exact name of a cell line.
+
+
+
+ Cell line name (exact)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The truncated name of a cell line.
+
+
+
+ Cell line name (truncated)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a cell line without any punctuation.
+
+
+
+ Cell line name (no punctuation)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The assonant name of a cell line.
+
+
+
+ Cell line name (assonant)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ A unique, persistent identifier of an enzyme.
+ Enzyme accession
+
+
+
+ Enzyme ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an enzyme from the REBASE enzymes database.
+
+
+
+ REBASE enzyme number
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ DB[0-9]{5}
+ Unique identifier of a drug from the DrugBank database.
+
+
+
+ DrugBank ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A unique identifier assigned to NCBI protein sequence records.
+ protein gi
+ protein gi number
+
+
+
+ Nucleotide sequence GI number is shown in the VERSION field of the database record. Protein sequence GI number is shown in the CDS/db_xref field of a nucleotide database record, and the VERSION field of a protein database record.
+ GI number (protein)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A score derived from the alignment of two sequences, which is then normalised with respect to the scoring system.
+
+
+ Bit scores are normalised with respect to the scoring system and therefore can be used to compare alignment scores from different searches.
+ Bit score
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.20
+
+
+ Phase for translation of DNA (0, 1 or 2) relative to a fragment of the coding sequence.
+
+ Translation phase specification
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data concerning or describing some core computational resource, as distinct from primary data. This includes metadata on the origin, source, history, ownership or location of some thing.
+ Provenance metadata
+
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Resource metadata
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Any arbitrary identifier of an ontology.
+
+
+
+ Ontology identifier
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a concept in an ontology.
+
+
+
+ Ontology concept name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An identifier of a build of a particular genome.
+
+
+
+ Genome build identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a biological pathway or network.
+
+
+
+ Pathway or network name
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [a-zA-Z_0-9]{2,3}[0-9]{5}
+ Identifier of a pathway from the KEGG pathway database.
+ KEGG pathway ID
+
+
+
+ Pathway ID (KEGG)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [a-zA-Z_0-9]+
+ Identifier of a pathway from the NCI-Nature pathway database.
+
+
+
+ Pathway ID (NCI-Nature)
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a pathway from the ConsensusPathDB pathway database.
+
+
+
+ Pathway ID (ConsensusPathDB)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier of an entry from the UniRef database.
+ UniRef cluster id
+ UniRef entry accession
+
+
+
+ Sequence cluster ID (UniRef)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier of an entry from the UniRef100 database.
+ UniRef100 cluster id
+ UniRef100 entry accession
+
+
+
+ Sequence cluster ID (UniRef100)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier of an entry from the UniRef90 database.
+ UniRef90 cluster id
+ UniRef90 entry accession
+
+
+
+ Sequence cluster ID (UniRef90)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier of an entry from the UniRef50 database.
+ UniRef50 cluster id
+ UniRef50 entry accession
+
+
+
+ Sequence cluster ID (UniRef50)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data concerning or derived from an ontology.
+ Ontological data
+
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Ontology data
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A human-readable collection of information about a specific RNA family or other group of classified RNA sequences.
+ RNA family annotation
+
+
+ RNA family report
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of an RNA family, typically an entry from a RNA sequence classification database.
+
+
+
+ RNA family identifier
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Stable accession number of an entry (RNA family) from the RFAM database.
+
+
+
+ RFAM accession
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A label (text token) describing a type of protein family signature (sequence classifier) from the InterPro database.
+
+ Protein signature type
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ An informative report on protein domain-DNA/RNA interaction(s).
+
+ Domain-nucleic acid interaction report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+
+ An informative report on protein domain-protein domain interaction(s).
+
+ Domain-domain interactions
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Data on indirect protein domain-protein domain interaction(s).
+
+ Domain-domain interaction (indirect)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Accession number of a nucleotide or protein sequence database entry.
+
+
+
+ Sequence accession (hybrid)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+ Data concerning two-dimensional polygel electrophoresis.
+
+
+ This is a broad data type and is used a placeholder for other, more specific types. It is primarily intended to help navigation of EDAM and would not typically be used for annotation.
+ 2D PAGE data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ two-dimensional gel electrophoresis experiments, gels or spots in a gel.
+
+
+ 2D PAGE report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ A persistent, unique identifier of a biological pathway or network (typically a database entry).
+
+
+
+ Pathway or network accession
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alignment of the (1D representations of) secondary structure of two or more molecules.
+
+
+ Secondary structure alignment
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an object from the ASTD database.
+
+
+
+ ASTD ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an exon from the ASTD database.
+
+
+
+ ASTD ID (exon)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an intron from the ASTD database.
+
+
+
+ ASTD ID (intron)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a polyA signal from the ASTD database.
+
+
+
+ ASTD ID (polya)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a transcription start site from the ASTD database.
+
+
+
+ ASTD ID (tss)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ An informative report on individual spot(s) from a two-dimensional (2D PAGE) gel.
+
+
+ 2D PAGE spot report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Unique identifier of a spot from a two-dimensional (protein) gel.
+
+
+
+ Spot ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier of a spot from a two-dimensional (protein) gel in the SWISS-2DPAGE database.
+
+
+
+ Spot serial number
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier of a spot from a two-dimensional (protein) gel from a HSC-2DPAGE database.
+
+
+
+ Spot ID (HSC-2DPAGE)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Data on the interaction of a protein (or protein domain) with specific structural (3D) and/or sequence motifs.
+
+ Protein-motif interaction
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a strain of an organism variant, typically a plant, virus or bacterium.
+
+
+
+ Strain identifier
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A unique identifier of an item from the CABRI database.
+
+
+
+ CABRI accession
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Report of genotype experiment including case control, population, and family studies. These might use array based methods and re-sequencing methods.
+
+
+ Experiment report (genotyping)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of an entry from a database of genotype experiment metadata.
+
+
+
+ Genotype experiment ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the EGA database.
+
+
+
+ EGA accession
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ IPI[0-9]{8}
+ Identifier of a protein entry catalogued in the International Protein Index (IPI) database.
+
+
+
+ IPI protein ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Accession number of a protein from the RefSeq database.
+ RefSeq protein ID
+
+
+
+ RefSeq accession (protein)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry (promoter) from the EPD database.
+ EPD identifier
+
+
+
+ EPD ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the TAIR database.
+
+
+
+ TAIR accession
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an Arabidopsis thaliana gene from the TAIR database.
+
+
+
+ TAIR accession (At gene)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the UniSTS database.
+
+
+
+ UniSTS accession
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the UNITE database.
+
+
+
+ UNITE accession
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the UTR database.
+
+
+
+ UTR accession
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ UPI[A-F0-9]{10}
+ Accession number of a UniParc (protein sequence) database entry.
+ UPI
+ UniParc ID
+
+
+
+ UniParc accession
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the Rouge or HUGE databases.
+
+
+
+ mFLJ/mKIAA number
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ An informative report on a specific fungus.
+
+ Fungi annotation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ An informative report on a specific fungus anamorph.
+
+ Fungi annotation (anamorph)
+ true
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier for a protein from the Ensembl database.
+ Ensembl ID (protein)
+ Protein ID (Ensembl)
+
+
+
+ Ensembl protein ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ An informative report on a specific toxin.
+
+ Toxin annotation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An informative report on a membrane protein.
+
+ Protein report (membrane protein)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ An informative report on tentative or known protein-drug interaction(s).
+
+
+ Protein-drug interaction report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+
+ Data concerning a map of molecular sequence(s).
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Map data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data concerning phylogeny, typically of molecular sequences, including reports of information concerning or derived from a phylogenetic tree, or from comparing two or more phylogenetic trees.
+
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Phylogenetic data
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Data concerning one or more protein molecules.
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Protein data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Data concerning one or more nucleic acid molecules.
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Nucleic acid data
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data concerning, extracted from, or derived from the analysis of a scientific text (or texts) such as a full text article from a scientific journal.
+ Article data
+ Scientific text data
+
+
+ This is a broad data type and is used a placeholder for other, more specific types. It is primarily intended to help navigation of EDAM and would not typically be used for annotation. It includes concepts that are best described as scientific text or closely concerned with or derived from text.
+ Text data
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.16
+
+
+ Typically a simple numerical or string value that controls the operation of a tool.
+
+ Parameter
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Data concerning a specific type of molecule.
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Molecular data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+
+ An informative report on a specific molecule.
+
+ Molecule report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A human-readable collection of information about a specific organism.
+ Organism annotation
+
+
+ Organism report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A human-readable collection of information about about how a scientific experiment or analysis was carried out that results in a specific set of data or results used for further analysis or to test a specific hypothesis.
+ Experiment annotation
+ Experiment metadata
+ Experiment report
+
+
+ Protocol
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An attribute of a molecular sequence, possibly in reference to some other sequence.
+ Sequence parameter
+
+
+ Sequence attribute
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Output from a serial analysis of gene expression (SAGE), massively parallel signature sequencing (MPSS) or sequencing by synthesis (SBS) experiment. In all cases this is a list of short sequence tags and the number of times it is observed.
+ Sequencing-based expression profile
+ Sequence tag profile (with gene assignment)
+
+
+ SAGE, MPSS and SBS experiments are usually performed to study gene expression. The sequence tags are typically subsequently annotated (after a database search) with the mRNA (and therefore gene) the tag was extracted from.
+ This includes tag to gene assignments (tag mapping) of SAGE, MPSS and SBS data. Typically this is the sequencing-based expression profile annotated with gene identifiers.
+ Sequence tag profile
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data concerning a mass spectrometry measurement.
+
+
+ Mass spectrometry data
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Raw data from experimental methods for determining protein structure.
+
+
+ This is a broad data type and is used a placeholder for other, more specific types. It is primarily intended to help navigation of EDAM and would not typically be used for annotation.
+ Protein structure raw data
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An identifier of a mutation.
+
+
+
+ Mutation identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+
+ Data concerning an alignment of two or more molecular sequences, structures or derived data.
+
+ This is a broad data type and is used a placeholder for other, more specific types. This includes entities derived from sequences and structures such as motifs and profiles.
+ Alignment data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Data concerning an index of data.
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Data index data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Single letter amino acid identifier, e.g. G.
+
+
+
+ Amino acid name (single letter)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Three letter amino acid identifier, e.g. GLY.
+
+
+
+ Amino acid name (three letter)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Full name of an amino acid, e.g. Glycine.
+
+
+
+ Amino acid name (full name)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a toxin.
+
+
+
+ Toxin identifier
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier of a toxin from the ArachnoServer database.
+
+
+
+ ArachnoServer ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A simple summary of expressed genes.
+
+ Expressed gene list
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier of a monomer from the BindingDB database.
+
+
+
+ BindingDB Monomer ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The name of a concept from the GO ontology.
+
+ GO concept name
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]{7}|GO:[0-9]{7}
+ An identifier of a 'biological process' concept from the the Gene Ontology.
+
+
+
+ GO concept ID (biological process)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]{7}|GO:[0-9]{7}
+ An identifier of a 'molecular function' concept from the the Gene Ontology.
+
+
+
+ GO concept ID (molecular function)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The name of a concept for a cellular component from the GO ontology.
+
+ GO concept name (cellular component)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An image arising from a Northern Blot experiment.
+
+
+ Northern blot image
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Unique identifier of a blot from a Northern Blot.
+
+
+
+ Blot ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier of a blot from a Northern Blot from the BlotBase database.
+
+
+
+ BlotBase blot ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Raw data on a biological hierarchy, describing the hierarchy proper, hierarchy components and possibly associated annotation.
+ Hierarchy annotation
+
+
+ Hierarchy
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry from a database of biological hierarchies.
+
+ Hierarchy identifier
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the Brite database of biological hierarchies.
+
+
+
+ Brite hierarchy ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ A type (represented as a string) of cancer.
+
+ Cancer type
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A unique identifier for an organism used in the BRENDA database.
+
+
+
+ BRENDA organism ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a taxon using the controlled vocabulary of the UniGene database.
+ UniGene organism abbreviation
+
+
+
+ UniGene taxon
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a taxon using the controlled vocabulary of the UTRdb database.
+
+
+
+ UTRdb taxon
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An identifier of a catalogue of biological resources.
+ Catalogue identifier
+
+
+
+ Catalogue ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a catalogue of biological resources from the CABRI database.
+
+
+
+ CABRI catalogue name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An informative report on protein secondary structure alignment-derived data or metadata.
+
+ Secondary structure alignment metadata
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Was deprecated since 1.5, but not correctly (fully) obsoleted until 1.19.
+ 1.5
+
+
+ An informative report on the physical, chemical or other information concerning the interaction of two or more molecules (or parts of molecules).
+
+ Molecule interaction report
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Primary data about a specific biological pathway or network (the nodes and connections within the pathway or network).
+ Network
+ Pathway
+
+
+ Pathway or network
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Data concerning one or more small molecules.
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Small molecule data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Data concerning a particular genotype, phenotype or a genotype / phenotype relation.
+
+ Genotype and phenotype data
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Image, hybridisation or some other data arising from a study of feature/molecule expression, typically profiling or quantification.
+ Gene expression data
+ Gene product profile
+ Gene product quantification data
+ Gene transcription profile
+ Gene transcription quantification data
+ Metabolite expression data
+ Microarray data
+ Non-coding RNA profile
+ Non-coding RNA quantification data
+ Protein expression data
+ RNA profile
+ RNA quantification data
+ RNA-seq data
+ Transcriptome profile
+ Transcriptome quantification data
+ mRNA profile
+ mRNA quantification data
+ Protein profile
+ Protein quantification data
+ Proteome profile
+ Proteome quantification data
+
+
+ Expression data
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ C[0-9]+
+ Unique identifier of a chemical compound from the KEGG database.
+ KEGG compound ID
+ KEGG compound identifier
+
+
+
+ Compound ID (KEGG)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Name (not necessarily stable) an entry (RNA family) from the RFAM database.
+
+
+
+ RFAM name
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ R[0-9]+
+ Identifier of a biological reaction from the KEGG reactions database.
+
+
+
+ Reaction ID (KEGG)
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ D[0-9]+
+ Unique identifier of a drug from the KEGG Drug database.
+
+
+
+ Drug ID (KEGG)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ ENS[A-Z]*[FPTG][0-9]{11}
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl database.
+ Ensembl IDs
+
+
+
+ Ensembl ID
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [A-Z][0-9]+(\.[-[0-9]+])?
+ An identifier of a disease from the International Classification of Diseases (ICD) database.
+
+
+
+ ICD identifier
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9A-Za-z]+:[0-9]+:[0-9]{1,5}(\.[0-9])?
+ Unique identifier of a sequence cluster from the CluSTr database.
+ CluSTr ID
+ CluSTr cluster ID
+
+
+
+ Sequence cluster ID (CluSTr)
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ G[0-9]+
+ Unique identifier of a glycan ligand from the KEGG GLYCAN database (a subset of KEGG LIGAND).
+
+
+
+ KEGG Glycan ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]+\.[A-Z]\.[0-9]+\.[0-9]+\.[0-9]+
+ A unique identifier of a family from the transport classification database (TCDB) of membrane transport proteins.
+ TC number
+
+
+
+ OBO file for regular expression.
+ TCDB ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ MINT\-[0-9]{1,5}
+ Unique identifier of an entry from the MINT database of protein-protein interactions.
+
+
+
+ MINT ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ DIP[\:\-][0-9]{3}[EN]
+ Unique identifier of an entry from the DIP database of protein-protein interactions.
+
+
+
+ DIP ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A[0-9]{6}
+ Unique identifier of a protein listed in the UCSD-Nature Signaling Gateway Molecule Pages database.
+
+
+
+ Signaling Gateway protein ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a protein modification catalogued in a database.
+
+
+
+ Protein modification ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ AA[0-9]{4}
+ Identifier of a protein modification catalogued in the RESID database.
+
+
+
+ RESID ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]{4,7}
+ Identifier of an entry from the RGD database.
+
+
+
+ RGD ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ AASequence:[0-9]{10}
+ Identifier of a protein sequence from the TAIR database.
+
+
+
+ TAIR accession (protein)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ HMDB[0-9]{5}
+ Identifier of a small molecule metabolite from the Human Metabolome Database (HMDB).
+ HMDB ID
+
+
+
+ Compound ID (HMDB)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ LM(FA|GL|GP|SP|ST|PR|SL|PK)[0-9]{4}([0-9a-zA-Z]{4})?
+ Identifier of an entry from the LIPID MAPS database.
+ LM ID
+
+
+
+ LIPID MAPS ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PAp[0-9]{8}
+ PDBML:pdbx_PDB_strand_id
+ Identifier of a peptide from the PeptideAtlas peptide databases.
+
+
+
+ PeptideAtlas ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.7
+
+ Identifier of a report of molecular interactions from a database (typically).
+
+
+ Molecular interaction ID
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]+
+ A unique identifier of an interaction from the BioGRID database.
+
+
+
+ BioGRID interaction ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ S[0-9]{2}\.[0-9]{3}
+ Unique identifier of a peptidase enzyme from the MEROPS database.
+ MEROPS ID
+
+
+
+ Enzyme ID (MEROPS)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An identifier of a mobile genetic element.
+
+
+
+ Mobile genetic element ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ mge:[0-9]+
+ An identifier of a mobile genetic element from the Aclame database.
+
+
+
+ ACLAME ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PWY[a-zA-Z_0-9]{2}\-[0-9]{3}
+ Identifier of an entry from the Saccharomyces genome database (SGD).
+
+
+
+ SGD ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Unique identifier of a book.
+
+
+
+ Book ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ (ISBN)?(-13|-10)?[:]?[ ]?([0-9]{2,3}[ -]?)?[0-9]{1,5}[ -]?[0-9]{1,7}[ -]?[0-9]{1,6}[ -]?([0-9]|X)
+ The International Standard Book Number (ISBN) is for identifying printed books.
+
+
+
+ ISBN
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ B[0-9]{5}
+ Identifier of a metabolite from the 3DMET database.
+ 3DMET ID
+
+
+
+ Compound ID (3DMET)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ ([A-NR-Z][0-9][A-Z][A-Z0-9][A-Z0-9][0-9])_.*|([OPQ][0-9][A-Z0-9][A-Z0-9][A-Z0-9][0-9]_.*)|(GAG_.*)|(MULT_.*)|(PFRAG_.*)|(LIP_.*)|(CAT_.*)
+ A unique identifier of an interaction from the MatrixDB database.
+
+
+
+ MatrixDB interaction ID
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]+
+ A unique identifier for pathways, reactions, complexes and small molecules from the cPath (Pathway Commons) database.
+
+
+
+ These identifiers are unique within the cPath database, however, they are not stable between releases.
+ cPath ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ [0-9]+
+ Identifier of an assay from the PubChem database.
+
+
+
+ PubChem bioassay ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the PubChem database.
+ PubChem identifier
+
+
+
+ PubChem ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ M[0-9]{4}
+ Identifier of an enzyme reaction mechanism from the MACie database.
+ MACie entry number
+
+
+
+ Reaction ID (MACie)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ MI[0-9]{7}
+ Identifier for a gene from the miRBase database.
+ miRNA ID
+ miRNA identifier
+ miRNA name
+
+
+
+ Gene ID (miRBase)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ ZDB\-GENE\-[0-9]+\-[0-9]+
+ Identifier for a gene from the Zebrafish information network genome (ZFIN) database.
+
+
+
+ Gene ID (ZFIN)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]{5}
+ Identifier of an enzyme-catalysed reaction from the Rhea database.
+
+
+
+ Reaction ID (Rhea)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ UPA[0-9]{5}
+ Identifier of a biological pathway from the Unipathway database.
+ upaid
+
+
+
+ Pathway ID (Unipathway)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]+
+ Identifier of a small molecular from the ChEMBL database.
+ ChEMBL ID
+
+
+
+ Compound ID (ChEMBL)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [a-zA-Z_0-9]+
+ Unique identifier of an entry from the Ligand-gated ion channel (LGICdb) database.
+
+
+
+ LGICdb identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]+
+ Identifier of a biological reaction (kinetics entry) from the SABIO-RK reactions database.
+
+
+
+ Reaction kinetics ID (SABIO-RK)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PA[0-9]+
+ Identifier of an entry from the pharmacogenetics and pharmacogenomics knowledge base (PharmGKB).
+
+
+
+ PharmGKB ID
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PA[0-9]+
+ Identifier of a pathway from the pharmacogenetics and pharmacogenomics knowledge base (PharmGKB).
+
+
+
+ Pathway ID (PharmGKB)
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PA[0-9]+
+ Identifier of a disease from the pharmacogenetics and pharmacogenomics knowledge base (PharmGKB).
+
+
+
+ Disease ID (PharmGKB)
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PA[0-9]+
+ Identifier of a drug from the pharmacogenetics and pharmacogenomics knowledge base (PharmGKB).
+
+
+
+ Drug ID (PharmGKB)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ DAP[0-9]+
+ Identifier of a drug from the Therapeutic Target Database (TTD).
+
+
+
+ Drug ID (TTD)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ TTDS[0-9]+
+ Identifier of a target protein from the Therapeutic Target Database (TTD).
+
+
+
+ Target ID (TTD)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ A unique identifier of a type or group of cells.
+
+
+
+ Cell type identifier
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]+
+ A unique identifier of a neuron from the NeuronDB database.
+
+
+
+ NeuronDB ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [a-zA-Z_0-9]+
+ A unique identifier of a neuron from the NeuroMorpho database.
+
+
+
+ NeuroMorpho ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]+
+ Identifier of a chemical from the ChemIDplus database.
+ ChemIDplus ID
+
+
+
+ Compound ID (ChemIDplus)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ SMP[0-9]{5}
+ Identifier of a pathway from the Small Molecule Pathway Database (SMPDB).
+
+
+
+ Pathway ID (SMPDB)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]+
+ Identifier of an entry from the BioNumbers database of key numbers and associated data in molecular biology.
+
+
+
+ BioNumbers ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ T3D[0-9]+
+ Unique identifier of a toxin from the Toxin and Toxin Target Database (T3DB) database.
+
+
+
+ T3DB ID
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a carbohydrate.
+
+
+
+ Carbohydrate identifier
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]+
+ Identifier of an entry from the GlycomeDB database.
+
+
+
+ GlycomeDB ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [a-zA-Z_0-9]+[0-9]+
+ Identifier of an entry from the LipidBank database.
+
+
+
+ LipidBank ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ cd[0-9]{5}
+ Identifier of a conserved domain from the Conserved Domain Database.
+
+
+
+ CDD ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]{1,5}
+ An identifier of an entry from the MMDB database.
+ MMDB accession
+
+
+
+ MMDB ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]+
+ Unique identifier of an entry from the iRefIndex database of protein-protein interactions.
+
+
+
+ iRefIndex ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]+
+ Unique identifier of an entry from the ModelDB database.
+
+
+
+ ModelDB ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]+
+ Identifier of a signaling pathway from the Database of Quantitative Cellular Signaling (DQCS).
+
+
+
+ Pathway ID (DQCS)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database (Homo sapiens division).
+
+ Ensembl ID (Homo sapiens)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Bos taurus' division).
+
+ Ensembl ID ('Bos taurus')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Canis familiaris' division).
+
+ Ensembl ID ('Canis familiaris')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Cavia porcellus' division).
+
+ Ensembl ID ('Cavia porcellus')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Ciona intestinalis' division).
+
+ Ensembl ID ('Ciona intestinalis')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Ciona savignyi' division).
+
+ Ensembl ID ('Ciona savignyi')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Danio rerio' division).
+
+ Ensembl ID ('Danio rerio')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Dasypus novemcinctus' division).
+
+ Ensembl ID ('Dasypus novemcinctus')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Echinops telfairi' division).
+
+ Ensembl ID ('Echinops telfairi')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Erinaceus europaeus' division).
+
+ Ensembl ID ('Erinaceus europaeus')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Felis catus' division).
+
+ Ensembl ID ('Felis catus')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Gallus gallus' division).
+
+ Ensembl ID ('Gallus gallus')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Gasterosteus aculeatus' division).
+
+ Ensembl ID ('Gasterosteus aculeatus')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Homo sapiens' division).
+
+ Ensembl ID ('Homo sapiens')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Loxodonta africana' division).
+
+ Ensembl ID ('Loxodonta africana')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Macaca mulatta' division).
+
+ Ensembl ID ('Macaca mulatta')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Monodelphis domestica' division).
+
+ Ensembl ID ('Monodelphis domestica')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Mus musculus' division).
+
+ Ensembl ID ('Mus musculus')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Myotis lucifugus' division).
+
+ Ensembl ID ('Myotis lucifugus')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Ornithorhynchus anatinus' division).
+
+ Ensembl ID ("Ornithorhynchus anatinus")
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Oryctolagus cuniculus' division).
+
+ Ensembl ID ('Oryctolagus cuniculus')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Oryzias latipes' division).
+
+ Ensembl ID ('Oryzias latipes')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Otolemur garnettii' division).
+
+ Ensembl ID ('Otolemur garnettii')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Pan troglodytes' division).
+
+ Ensembl ID ('Pan troglodytes')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Rattus norvegicus' division).
+
+ Ensembl ID ('Rattus norvegicus')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Spermophilus tridecemlineatus' division).
+
+ Ensembl ID ('Spermophilus tridecemlineatus')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Takifugu rubripes' division).
+
+ Ensembl ID ('Takifugu rubripes')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Tupaia belangeri' division).
+
+ Ensembl ID ('Tupaia belangeri')
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identifier of an entry (exon, gene, transcript or protein) from the Ensembl 'core' database ('Xenopus tropicalis' division).
+
+ Ensembl ID ('Xenopus tropicalis')
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a protein domain (or other node) from the CATH database.
+
+
+
+ CATH identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 2.10.10.10
+ A code number identifying a family from the CATH database.
+
+
+
+ CATH node ID (family)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an enzyme from the CAZy enzymes database.
+ CAZy ID
+
+
+
+ Enzyme ID (CAZy)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A unique identifier assigned by the I.M.A.G.E. consortium to a clone (cloned molecular sequence).
+ I.M.A.G.E. cloneID
+ IMAGE cloneID
+
+
+
+ Clone ID (IMAGE)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]{7}|GO:[0-9]{7}
+ An identifier of a 'cellular component' concept from the Gene Ontology.
+ GO concept identifier (cellular compartment)
+
+
+
+ GO concept ID (cellular component)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Name of a chromosome as used in the BioCyc database.
+
+
+
+ Chromosome name (BioCyc)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An identifier of a gene expression profile from the CleanEx database.
+
+
+
+ CleanEx entry name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An identifier of (typically a list of) gene expression experiments catalogued in the CleanEx database.
+
+
+
+ CleanEx dataset code
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A human-readable collection of information concerning a genome as a whole.
+
+
+ Genome report
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier for a protein complex from the CORUM database.
+ CORUM complex ID
+
+
+
+ Protein ID (CORUM)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier of a position-specific scoring matrix from the CDD database.
+
+
+
+ CDD PSSM-ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier for a protein from the CuticleDB database.
+ CuticleDB ID
+
+
+
+ Protein ID (CuticleDB)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a predicted transcription factor from the DBD database.
+
+
+
+ DBD ID
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ General annotation on an oligonucleotide probe, or a set of probes.
+ Oligonucleotide probe sets annotation
+
+
+ Oligonucleotide probe annotation
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of an oligonucleotide from a database.
+
+
+
+ Oligonucleotide ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an oligonucleotide probe from the dbProbe database.
+
+
+
+ dbProbe ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Physicochemical property data for one or more dinucleotides.
+
+
+ Dinucleotide property
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an dinucleotide property from the DiProDB database.
+
+
+
+ DiProDB ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ disordered structure in a protein.
+
+
+ Protein features report (disordered structure)
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier for a protein from the DisProt database.
+ DisProt ID
+
+
+
+ Protein ID (DisProt)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Annotation on an embryo or concerning embryological development.
+
+ Embryo report
+ true
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier for a gene transcript from the Ensembl database.
+ Transcript ID (Ensembl)
+
+
+
+ Ensembl transcript ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ An informative report on one or more small molecules that are enzyme inhibitors.
+
+ Inhibitor annotation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Moby:GeneAccessionList
+ An identifier of a promoter of a gene that is catalogued in a database.
+
+
+
+ Promoter ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an EST sequence.
+
+
+
+ EST accession
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an EST sequence from the COGEME database.
+
+
+
+ COGEME EST ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a unisequence from the COGEME database.
+
+
+
+ A unisequence is a single sequence assembled from ESTs.
+ COGEME unisequence ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Accession number of an entry (protein family) from the GeneFarm database.
+ GeneFarm family ID
+
+
+
+ Protein family ID (GeneFarm)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a family of organism.
+
+
+
+ Family name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ The name of a genus of viruses.
+
+ Genus name (virus)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ The name of a family of viruses.
+
+ Family name (virus)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ The name of a SwissRegulon database.
+
+ Database name (SwissRegulon)
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A feature identifier as used in the SwissRegulon database.
+
+
+
+ This can be name of a gene, the ID of a TFBS, or genomic coordinates in form "chr:start..end".
+ Sequence feature ID (SwissRegulon)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A unique identifier of gene in the NMPDR database.
+
+
+
+ A FIG ID consists of four parts: a prefix, genome id, locus type and id number.
+ FIG ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A unique identifier of gene in the Xenbase database.
+
+
+
+ Gene ID (Xenbase)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A unique identifier of gene in the Genolist database.
+
+
+
+ Gene ID (Genolist)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Name of an entry (gene) from the Genolist genes database.
+
+ Gene name (Genolist)
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry (promoter) from the ABS database.
+ ABS identifier
+
+
+
+ ABS ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a transcription factor from the AraC-XylS database.
+
+
+
+ AraC-XylS ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Name of an entry (gene) from the HUGO database.
+
+ Gene name (HUGO)
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a locus from the PseudoCAP database.
+
+
+
+ Locus ID (PseudoCAP)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a locus from the UTR database.
+
+
+
+ Locus ID (UTR)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier of a monosaccharide from the MonosaccharideDB database.
+
+
+
+ MonosaccharideDB ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ The name of a subdivision of the Collagen Mutation Database (CMD) database.
+
+ Database name (CMD)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ The name of a subdivision of the Osteogenesis database.
+
+ Database name (Osteogenesis)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An identifier of a particular genome.
+
+
+
+ Genome identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.26
+
+
+ An identifier of a particular genome.
+
+
+ GenomeReviews ID
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]+
+ Identifier of an entry from the GlycoMapsDB (Glycosciences.de) database.
+
+
+
+ GlycoMap ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A conformational energy map of the glycosidic linkages in a carbohydrate molecule.
+
+
+ Carbohydrate conformational map
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a transcription factor.
+
+
+
+ Transcription factor name
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a membrane transport proteins from the transport classification database (TCDB).
+
+
+
+ TCID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PF[0-9]{5}
+ Name of a domain from the Pfam database.
+
+
+
+ Pfam domain name
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ CL[0-9]{4}
+ Accession number of a Pfam clan.
+
+
+
+ Pfam clan ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier for a gene from the VectorBase database.
+ VectorBase ID
+
+
+
+ Gene ID (VectorBase)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the UTRSite database of regulatory motifs in eukaryotic UTRs.
+
+
+
+ UTRSite ID
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An informative report about a specific or conserved pattern in a molecular sequence, such as its context in genes or proteins, its role, origin or method of construction, etc.
+ Sequence motif report
+ Sequence profile report
+
+
+ Sequence signature report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ An informative report on a particular locus.
+
+ Locus annotation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Official name of a protein as used in the UniProt database.
+
+
+
+ Protein name (UniProt)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ One or more terms from one or more controlled vocabularies which are annotations on an entity.
+
+ The concepts are typically provided as a persistent identifier or some other link the source ontologies. Evidence of the validity of the annotation might be included.
+ Term ID list
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Name of a protein family from the HAMAP database.
+
+
+
+ HAMAP ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+
+ Basic information concerning an identifier of data (typically including the identifier itself). For example, a gene symbol with information concerning its provenance.
+
+ Identifier with metadata
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Annotation about a gene symbol.
+
+ Gene symbol annotation
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a RNA transcript.
+
+
+
+ Transcript ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an RNA transcript from the H-InvDB database.
+
+
+
+ HIT ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A unique identifier of gene cluster in the H-InvDB database.
+
+
+
+ HIX ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a antibody from the HPA database.
+
+
+
+ HPA antibody id
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a human major histocompatibility complex (HLA) or other protein from the IMGT/HLA database.
+
+
+
+ IMGT/HLA ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A unique identifier of gene assigned by the J. Craig Venter Institute (JCVI).
+
+
+
+ Gene ID (JCVI)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a kinase protein.
+
+
+
+ Kinase name
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a physical entity from the ConsensusPathDB database.
+
+
+
+ ConsensusPathDB entity ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Name of a physical entity from the ConsensusPathDB database.
+
+
+
+ ConsensusPathDB entity name
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The number of a strain of algae and protozoa from the CCAP database.
+
+
+
+ CCAP strain number
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An identifier of stock from a catalogue of biological resources.
+
+
+
+ Stock number
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A stock number from The Arabidopsis information resource (TAIR).
+
+
+
+ Stock number (TAIR)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the RNA editing database (REDIdb).
+
+
+
+ REDIdb ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Name of a domain from the SMART database.
+
+
+
+ SMART domain name
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Accession number of an entry (family) from the PANTHER database.
+ Panther family ID
+
+
+
+ Protein family ID (PANTHER)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A unique identifier for a virus from the RNAVirusDB database.
+
+
+
+ Could list (or reference) other taxa here from https://www.phenoscape.org/wiki/Taxonomic_Rank_Vocabulary.
+ RNAVirusDB ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An accession of annotation on a (group of) viruses (catalogued in a database).
+ Virus ID
+
+
+
+ Virus identifier
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An identifier of a genome project assigned by NCBI.
+
+
+
+ NCBI Genome Project ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A unique identifier of a whole genome assigned by the NCBI.
+
+
+
+ NCBI genome accession
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Data concerning, extracted from, or derived from the analysis of a sequence profile, such as its name, length, technical details about the profile or it's construction, the biological role or annotation, and so on.
+
+
+ Sequence profile data
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier for a membrane protein from the TopDB database.
+ TopDB ID
+
+
+
+ Protein ID (TopDB)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a two-dimensional (protein) gel.
+ Gel identifier
+
+
+
+ Gel ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Name of a reference map gel from the SWISS-2DPAGE database.
+
+
+
+ Reference map name (SWISS-2DPAGE)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier for a peroxidase protein from the PeroxiBase database.
+ PeroxiBase ID
+
+
+
+ Protein ID (PeroxiBase)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an entry from the SISYPHUS database of tertiary structure alignments.
+
+
+
+ SISYPHUS ID
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Accession of an open reading frame (catalogued in a database).
+
+
+
+ ORF ID
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An identifier of an open reading frame.
+
+
+
+ ORF identifier
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [1-9][0-9]*
+ Identifier of an entry from the GlycosciencesDB database.
+ LInear Notation for Unique description of Carbohydrate Sequences ID
+
+
+
+ LINUCS ID
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier for a ligand-gated ion channel protein from the LGICdb database.
+ LGICdb ID
+
+
+
+ Protein ID (LGICdb)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of an EST sequence from the MaizeDB database.
+
+
+
+ MaizeDB ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A unique identifier of gene in the MfunGD database.
+
+
+
+ Gene ID (MfunGD)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An identifier of a disease from the Orpha database.
+
+
+
+ Orpha number
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier for a protein from the EcID database.
+
+
+
+ Protein ID (EcID)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A unique identifier of a cDNA molecule catalogued in the RefSeq database.
+
+
+
+ Clone ID (RefSeq)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Unique identifier for a cone snail toxin protein from the ConoServer database.
+
+
+
+ Protein ID (ConoServer)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a GeneSNP database entry.
+
+
+
+ GeneSNP ID
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Identifier of a lipid.
+
+
+
+ Lipid identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+ A flat-file (textual) data archive.
+
+
+ Databank
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+ A web site providing data (web pages) on a common theme to a HTTP client.
+
+
+ Web portal
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier for a gene from the VBASE2 database.
+ VBASE2 ID
+
+
+
+ Gene ID (VBASE2)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A unique identifier for a virus from the DPVweb database.
+ DPVweb virus ID
+
+
+
+ DPVweb ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ [0-9]+
+ Identifier of a pathway from the BioSystems pathway database.
+
+
+
+ Pathway ID (BioSystems)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Data concerning a proteomics experiment.
+
+ Experimental data (proteomics)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An abstract of a scientific article.
+
+
+ Abstract
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 3D coordinate and associated data for a lipid structure.
+
+
+ Lipid structure
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 3D coordinate and associated data for the (3D) structure of a drug.
+
+
+ Drug structure
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 3D coordinate and associated data for the (3D) structure of a toxin.
+
+
+ Toxin structure
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A simple matrix of numbers, where each value (or column of values) is derived derived from analysis of the corresponding position in a sequence alignment.
+ PSSM
+
+
+ Position-specific scoring matrix
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A matrix of distances between molecular entities, where a value (distance) is (typically) derived from comparison of two entities and reflects their similarity.
+
+
+ Distance matrix
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Distances (values representing similarity) between a group of molecular structures.
+
+
+ Structural distance matrix
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Bibliographic data concerning scientific article(s).
+
+ Article metadata
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A concept from a biological ontology.
+
+
+ This includes any fields from the concept definition such as concept name, definition, comments and so on.
+ Ontology concept
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A numerical measure of differences in the frequency of occurrence of synonymous codons in DNA sequences.
+
+
+ Codon usage bias
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Northern Blot experiments.
+
+
+ Northern blot report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A map showing distance between genetic markers estimated by radiation-induced breaks in a chromosome.
+ RH map
+
+
+ The radiation method can break very closely linked markers providing a more detailed map. Most genetic markers and subsequences may be located to a defined map position and with a more precise estimates of distance than a linkage map.
+ Radiation hybrid map
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A simple list of data identifiers (such as database accessions), possibly with additional basic information on the addressed data.
+
+
+ ID list
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Gene frequencies data that may be read during phylogenetic tree calculation.
+
+
+ Phylogenetic gene frequencies data
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ A set of sub-sequences displaying some type of polymorphism, typically indicating the sequence in which they occur, their position and other metadata.
+
+ Sequence set (polymorphic)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ An entry (resource) from the DRCAT bioinformatics resource catalogue.
+
+ DRCAT resource
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 3D coordinate and associated data for a multi-protein complex; two or more polypeptides chains in a stable, functional association with one another.
+
+
+ Protein complex
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 3D coordinate and associated data for a protein (3D) structural motif; any group of contiguous or non-contiguous amino acid residues but typically those forming a feature with a structural or functional role.
+
+
+ Protein structural motif
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A human-readable collection of information about one or more specific lipid 3D structure(s).
+
+
+ Lipid report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ Image of one or more molecular secondary structures.
+
+ Secondary structure image
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ An informative report on general information, properties or features of one or more molecular secondary structures.
+
+ Secondary structure report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ DNA sequence-specific feature annotation (not in a feature table).
+
+ DNA features
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ Features concerning RNA or regions of DNA that encode an RNA molecule.
+
+ RNA features report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Biological data that has been plotted as a graph of some type, or plotting instructions for rendering such a graph.
+ Graph data
+
+
+ Plot
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A protein sequence and associated metadata.
+ Sequence record (protein)
+
+
+ Protein sequence record
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A nucleic acid sequence and associated metadata.
+ Nucleotide sequence record
+ Sequence record (nucleic acid)
+ DNA sequence record
+ RNA sequence record
+
+
+ Nucleic acid sequence record
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ A protein sequence and comprehensive metadata (such as a feature table), typically corresponding to a full entry from a molecular sequence database.
+
+
+ Protein sequence record (full)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ A nucleic acid sequence and comprehensive metadata (such as a feature table), typically corresponding to a full entry from a molecular sequence database.
+
+
+ Nucleic acid sequence record (full)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Accession of a mathematical model, typically an entry from a database.
+
+
+
+ Biological model accession
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The name of a type or group of cells.
+
+
+
+ Cell type name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Accession of a type or group of cells (catalogued in a database).
+ Cell type ID
+
+
+
+ Cell type accession
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Accession of an entry from a database of chemicals.
+ Chemical compound accession
+ Small molecule accession
+
+
+
+ Compound accession
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Accession of a drug.
+
+
+
+ Drug accession
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Name of a toxin.
+
+
+
+ Toxin name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Accession of a toxin (catalogued in a database).
+
+
+
+ Toxin accession
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Accession of a monosaccharide (catalogued in a database).
+
+
+
+ Monosaccharide accession
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Common name of a drug.
+
+
+
+ Drug name
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Accession of an entry from a database of carbohydrates.
+
+
+
+ Carbohydrate accession
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Accession of a specific molecule (catalogued in a database).
+
+
+
+ Molecule accession
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Accession of a data definition (catalogued in a database).
+
+
+
+ Data resource definition accession
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An accession of a particular genome (in a database).
+
+
+
+ Genome accession
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An accession of a map of a molecular sequence (deposited in a database).
+
+
+
+ Map accession
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Accession of an entry from a database of lipids.
+
+
+
+ Lipid accession
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Accession of a peptide deposited in a database.
+
+
+
+ Peptide ID
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Accession of a protein deposited in a database.
+ Protein accessions
+
+
+
+ Protein accession
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ An accession of annotation on a (group of) organisms (catalogued in a database).
+
+
+
+ Organism accession
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Moby:BriefOccurrenceRecord
+ Moby:FirstEpithet
+ Moby:InfraspecificEpithet
+ Moby:OccurrenceRecord
+ Moby:Organism_Name
+ Moby:OrganismsLongName
+ Moby:OrganismsShortName
+ The name of an organism (or group of organisms).
+
+
+
+ Organism name
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Accession of a protein family (that is deposited in a database).
+
+
+
+ Protein family accession
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Accession of an entry from a database of transcription factors or binding sites.
+
+
+
+ Transcription factor accession
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Accession number of a strain of an organism variant, typically a plant, virus or bacterium.
+
+
+
+ Strain accession
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ 1.26
+
+ An accession of annotation on a (group of) viruses (catalogued in a database).
+
+
+ Virus identifier
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Metadata on sequence features.
+
+
+ Sequence features metadata
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a Gramene database entry.
+
+
+
+ Gramene identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An identifier of an entry from the DDBJ sequence database.
+ DDBJ ID
+ DDBJ accession number
+ DDBJ identifier
+
+
+
+ DDBJ accession
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An identifier of an entity from the ConsensusPathDB database.
+
+
+
+ ConsensusPathDB identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+
+ Data concerning, extracted from, or derived from the analysis of molecular sequence(s).
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Sequence data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Data concerning codon usage.
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Codon usage
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+
+ Data derived from the analysis of a scientific text such as a full text article from a scientific journal.
+
+ Article report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An informative report of information about molecular sequence(s), including basic information (metadata), and reports generated from molecular sequence analysis, including positional features and non-positional properties.
+ Sequence-derived report
+
+
+ Sequence report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data concerning the properties or features of one or more protein secondary structures.
+
+
+ Protein secondary structure
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A Hopp and Woods plot of predicted antigenicity of a peptide or protein.
+
+
+ Hopp and Woods plot
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.21
+
+
+ A melting curve of a double-stranded nucleic acid molecule (DNA or DNA/RNA).
+
+
+ Nucleic acid melting curve
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.21
+
+ A probability profile of a double-stranded nucleic acid molecule (DNA or DNA/RNA).
+
+
+ Nucleic acid probability profile
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.21
+
+ A temperature profile of a double-stranded nucleic acid molecule (DNA or DNA/RNA).
+
+
+ Nucleic acid temperature profile
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ A report typically including a map (diagram) of a gene regulatory network.
+
+
+ Gene regulatory network report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ An informative report on a two-dimensional (2D PAGE) gel.
+
+
+ 2D PAGE gel report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.14
+
+ General annotation on a set of oligonucleotide probes, such as the gene name with which the probe set is associated and which probes belong to the set.
+
+
+ Oligonucleotide probe sets annotation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ An image from a microarray experiment which (typically) allows a visualisation of probe hybridisation and gene-expression data.
+
+ Microarray image
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data (typically biological or biomedical) that has been rendered into an image, typically for display on screen.
+ Image data
+
+
+ Image
+ http://semanticscience.org/resource/SIO_000079
+ http://semanticscience.org/resource/SIO_000081
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Image of a molecular sequence, possibly with sequence features or properties shown.
+
+
+ Sequence image
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A report on protein properties concerning hydropathy.
+ Protein hydropathy report
+
+
+ Protein hydropathy data
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Data concerning a computational workflow.
+
+ Workflow data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A computational workflow.
+
+ Workflow
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Data concerning molecular secondary structure data.
+
+ Secondary structure data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Deprecated because this is bloat / confusing & better handled as an EDAM Format concept - "raw" sequences just imply a particular format (i.e. one with a vanilla string, possible in a particular alphabet, with no metadata).
+ 1.23
+
+ A raw protein sequence (string of characters).
+
+
+ Protein sequence (raw)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Deprecated because this is bloat / confusing & better handled as an EDAM Format concept - "raw" sequences just imply a particular format (i.e. one with a vanilla string, possible in a particular alphabet, with no metadata).
+ 1.23
+
+ A raw nucleic acid sequence.
+
+
+ Nucleic acid sequence (raw)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+ One or more protein sequences, possibly with associated annotation.
+ Amino acid sequence
+ Amino acid sequences
+ Protein sequences
+
+
+ Protein sequence
+ http://purl.org/biotop/biotop.owl#AminoAcidSequenceInformation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ One or more nucleic acid sequences, possibly with associated annotation.
+ Nucleic acid sequences
+ Nucleotide sequence
+ Nucleotide sequences
+ DNA sequence
+
+
+ Nucleic acid sequence
+ http://purl.org/biotop/biotop.owl#NucleotideSequenceInformation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data concerning a biochemical reaction, typically data and more general annotation on the kinetics of enzyme-catalysed reaction.
+ Enzyme kinetics annotation
+ Reaction annotation
+
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Reaction data
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data concerning small peptides.
+ Peptide data
+
+
+ Peptide property
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Was deprecated since 1.5, but not correctly (fully) obsoleted until 1.19.
+ 1.5
+
+
+ An informative report concerning the classification of protein sequences or structures.
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Protein classification
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Data concerning specific or conserved pattern in molecular sequences.
+
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Sequence motif data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Data concerning models representing a (typically multiple) sequence alignment.
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Sequence profile data
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+
+ Data concerning a specific biological pathway or network.
+
+ Pathway or network data
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An informative report concerning or derived from the analysis of a biological pathway or network, such as a map (diagram) or annotation.
+
+
+ Pathway or network report
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A thermodynamic or kinetic property of a nucleic acid molecule.
+ Nucleic acid property (thermodynamic or kinetic)
+ Nucleic acid thermodynamic property
+
+
+ Nucleic acid thermodynamic data
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Was deprecated since 1.5, but not correctly (fully) obsoleted until 1.19.
+ 1.5
+
+
+ Data concerning the classification of nucleic acid sequences or structures.
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Nucleic acid classification
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.5
+
+
+ A report on a classification of molecular sequences, structures or other entities.
+
+ This can include an entire classification, components such as classifiers, assignments of entities to a classification and so on.
+ Classification report
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ key residues involved in protein folding.
+
+
+ Protein features report (key folding sites)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Geometry data for a protein structure, for example bond lengths, bond angles, torsion angles, chiralities, planaraties etc.
+ Torsion angle data
+
+
+ Protein geometry data
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An image of protein structure.
+ Structure image (protein)
+
+
+ Protein structure image
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Weights for sequence positions or characters in phylogenetic analysis where zero is defined as unweighted.
+
+
+ Phylogenetic character weights
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Annotation of one particular positional feature on a biomolecular (typically genome) sequence, suitable for import and display in a genome browser.
+ Genome annotation track
+ Genome track
+ Genome-browser track
+ Genomic track
+ Sequence annotation track
+
+
+ Annotation track
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+ P43353|Q7M1G0|Q9C199|A5A6J6
+ [OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}
+ Accession number of a UniProt (protein sequence) database entry.
+ UniProt accession number
+ UniProt entry accession
+ UniProtKB accession
+ UniProtKB accession number
+ Swiss-Prot entry accession
+ TrEMBL entry accession
+
+
+
+ UniProt accession
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 16
+ [1-9][0-9]?
+ Identifier of a genetic code in the NCBI list of genetic codes.
+
+
+
+ NCBI genetic code ID
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identifier of a concept in an ontology of biological or bioinformatics concepts and relations.
+
+
+
+ Ontology concept identifier
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The name of a concept for a biological process from the GO ontology.
+
+ GO concept name (biological process)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The name of a concept for a molecular function from the GO ontology.
+
+ GO concept name (molecular function)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data concerning the classification, identification and naming of organisms.
+ Taxonomic data
+
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Taxonomy
+
+
+
+
+
+
+
+
+
+ beta13
+ EMBL/GENBANK/DDBJ coding feature protein identifier, issued by International collaborators.
+
+
+
+ This qualifier consists of a stable ID portion (3+5 format with 3 position letters and 5 numbers) plus a version number after the decimal point. When the protein sequence encoded by the CDS changes, only the version number of the /protein_id value is incremented; the stable part of the /protein_id remains unchanged and as a result will permanently be associated with a given protein; this qualifier is valid only on CDS features which translate into a valid protein.
+ Protein ID (EMBL/GenBank/DDBJ)
+
+
+
+
+
+
+
+
+ beta13
+ 1.5
+
+ A type of data that (typically) corresponds to entries from the primary biological databases and which is (typically) the primary input or output of a tool, i.e. the data the tool processes or generates, as distinct from metadata and identifiers which describe and identify such core data, parameters that control the behaviour of tools, reports of derivative data generated by tools and annotation.
+
+
+ Core data entities typically have a format and may be identified by an accession number.
+ Core data
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta13
+ true
+ Name or other identifier of molecular sequence feature(s).
+
+
+
+ Sequence feature identifier
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta13
+ true
+ An identifier of a molecular tertiary structure, typically an entry from a structure database.
+
+
+
+ Structure identifier
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta13
+ true
+ An identifier of an array of numerical values, such as a comparison matrix.
+
+
+
+ Matrix identifier
+
+
+
+
+
+
+
+
+ beta13
+ 1.8
+
+ A report (typically a table) on character or word composition / frequency of protein sequence(s).
+
+
+ Protein sequence composition
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.8
+
+ A report (typically a table) on character or word composition / frequency of nucleic acid sequence(s).
+
+
+ Nucleic acid sequence composition (report)
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.5
+
+
+ A node from a classification of protein structural domain(s).
+
+ Protein domain classification node
+ true
+
+
+
+
+
+
+
+
+ beta13
+ Duplicates http://edamontology.org/data_1002, hence deprecated.
+ 1.23
+
+ Unique numerical identifier of chemicals in the scientific literature, as assigned by the Chemical Abstracts Service.
+
+
+ CAS number
+ true
+
+
+
+
+
+
+
+
+
+ beta13
+ Unique identifier of a drug conforming to the Anatomical Therapeutic Chemical (ATC) Classification System, a drug classification system controlled by the WHO Collaborating Centre for Drug Statistics Methodology (WHOCC).
+
+
+
+ ATC code
+
+
+
+
+
+
+
+
+ beta13
+ A unique, unambiguous, alphanumeric identifier of a chemical substance as catalogued by the Substance Registration System of the Food and Drug Administration (FDA).
+ Unique Ingredient Identifier
+
+
+
+ UNII
+
+
+
+
+
+
+
+
+ beta13
+ 1.5
+
+
+ Basic information concerning geographical location or time.
+
+ Geotemporal metadata
+ true
+
+
+
+
+
+
+
+
+ beta13
+ Metadata concerning the software, hardware or other aspects of a computer system.
+
+
+ System metadata
+
+
+
+
+
+
+
+
+ beta13
+ 1.15
+
+ A name of a sequence feature, e.g. the name of a feature to be displayed to an end-user.
+
+
+ Sequence feature name
+ true
+
+
+
+
+
+
+
+
+ beta13
+ Raw data such as measurements or other results from laboratory experiments, as generated from laboratory hardware.
+ Experimental measurement data
+ Experimentally measured data
+ Measured data
+ Measurement
+ Measurement data
+ Measurement metadata
+ Raw experimental data
+
+
+ This is a broad data type and is used a placeholder for other, more specific types. It is primarily intended to help navigation of EDAM and would not typically be used for annotation.
+ Experimental measurement
+
+
+
+
+
+
+
+
+
+ beta13
+ Raw data (typically MIAME-compliant) for hybridisations from a microarray experiment.
+
+
+ Such data as found in Affymetrix CEL or GPR files.
+ Raw microarray data
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta13
+ Data generated from processing and analysis of probe set data from a microarray experiment.
+ Gene annotation (expression)
+ Gene expression report
+ Microarray probe set data
+
+
+ Such data as found in Affymetrix .CHP files or data from other software such as RMA or dChip.
+ Processed microarray data
+
+
+
+
+
+
+
+
+
+ beta13
+ The final processed (normalised) data for a set of hybridisations in a microarray experiment.
+ Gene expression data matrix
+ Normalised microarray data
+
+
+ This combines data from all hybridisations.
+ Gene expression matrix
+
+
+
+
+
+
+
+
+ beta13
+ Annotation on a biological sample, for example experimental factors and their values.
+
+
+ This might include compound and dose in a dose response experiment.
+ Sample annotation
+
+
+
+
+
+
+
+
+ beta13
+ Annotation on the array itself used in a microarray experiment.
+
+
+ This might include gene identifiers, genomic coordinates, probe oligonucleotide sequences etc.
+ Microarray metadata
+
+
+
+
+
+
+
+
+ beta13
+ 1.8
+
+ Annotation on laboratory and/or data processing protocols used in an microarray experiment.
+
+
+ This might describe e.g. the normalisation methods used to process the raw data.
+ Microarray protocol annotation
+ true
+
+
+
+
+
+
+
+
+ beta13
+ Data concerning the hybridisations measured during a microarray experiment.
+
+
+ Microarray hybridisation data
+
+
+
+
+
+
+
+
+ beta13
+ 1.5
+
+
+ A report of regions in a molecular sequence that are biased to certain characters.
+
+ Sequence features (compositionally-biased regions)
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.5
+
+ A report on features in a nucleic acid sequence that indicate changes to or differences between sequences.
+
+
+ Nucleic acid features (difference and change)
+ true
+
+
+
+
+
+
+
+
+
+ beta13
+ A human-readable collection of information about regions within a nucleic acid sequence which form secondary or tertiary (3D) structures.
+ Nucleic acid features (structure)
+ Quadruplexes (report)
+ Stem loop (report)
+ d-loop (report)
+
+
+ The report may be based on analysis of nucleic acid sequence or structural data, or any annotation or information about specific nucleic acid 3D structure(s) or such structures in general.
+ Nucleic acid structure report
+
+
+
+
+
+
+
+
+ beta13
+ 1.8
+
+ short repetitive subsequences (repeat sequences) in a protein sequence.
+
+
+ Protein features report (repeats)
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.8
+
+ Report on the location of matches to profiles, motifs (conserved or functional patterns) or other signatures in one or more protein sequences.
+
+
+ Sequence motif matches (protein)
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.8
+
+ Report on the location of matches to profiles, motifs (conserved or functional patterns) or other signatures in one or more nucleic acid sequences.
+
+
+ Sequence motif matches (nucleic acid)
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.5
+
+
+ A report on displacement loops in a mitochondrial DNA sequence.
+
+ A displacement loop is a region of mitochondrial DNA in which one of the strands is displaced by an RNA molecule.
+ Nucleic acid features (d-loop)
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.5
+
+
+ A report on stem loops in a DNA sequence.
+
+ A stem loop is a hairpin structure; a double-helical structure formed when two complementary regions of a single strand of RNA or DNA molecule form base-pairs.
+ Nucleic acid features (stem loop)
+ true
+
+
+
+
+
+
+
+
+ beta13
+ An informative report on features of a messenger RNA (mRNA) molecules including precursor RNA, primary (unprocessed) transcript and fully processed molecules. This includes reports on a specific gene transcript, clone or EST.
+ Clone or EST (report)
+ Gene transcript annotation
+ Nucleic acid features (mRNA features)
+ Transcript (report)
+ mRNA (report)
+ mRNA features
+
+
+ This includes 5'untranslated region (5'UTR), coding sequences (CDS), exons, intervening sequences (intron) and 3'untranslated regions (3'UTR).
+ Gene transcript report
+
+
+
+
+
+
+
+
+ beta13
+ 1.8
+
+ features of non-coding or functional RNA molecules, including tRNA and rRNA.
+
+
+ Non-coding RNA
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.5
+
+
+ Features concerning transcription of DNA into RNA including the regulation of transcription.
+
+ This includes promoters, CAAT signals, TATA signals, -35 signals, -10 signals, GC signals, primer binding sites for initiation of transcription or reverse transcription, enhancer, attenuator, terminators and ribosome binding sites.
+ Transcriptional features (report)
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.5
+
+
+ A report on predicted or actual immunoglobulin gene structure including constant, switch and variable regions and diversity, joining and variable segments.
+
+ Nucleic acid features (immunoglobulin gene structure)
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.5
+
+
+ Information on a 'class' node from the SCOP database.
+
+ SCOP class
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.5
+
+
+ Information on a 'fold' node from the SCOP database.
+
+ SCOP fold
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.5
+
+
+ Information on a 'superfamily' node from the SCOP database.
+
+ SCOP superfamily
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.5
+
+
+ Information on a 'family' node from the SCOP database.
+
+ SCOP family
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.5
+
+
+ Information on a 'protein' node from the SCOP database.
+
+ SCOP protein
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.5
+
+
+ Information on a 'species' node from the SCOP database.
+
+ SCOP species
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.8
+
+ mass spectrometry experiments.
+
+
+ Mass spectrometry experiment
+ true
+
+
+
+
+
+
+
+
+ beta13
+ Nucleic acid classification
+ A human-readable collection of information about a particular family of genes, typically a set of genes with similar sequence that originate from duplication of a common ancestor gene, or any other classification of nucleic acid sequences or structures that reflects gene structure.
+ Gene annotation (homology information)
+ Gene annotation (homology)
+ Gene family annotation
+ Gene homology (report)
+ Homology information
+
+
+ This includes reports on on gene homologues between species.
+ Gene family report
+
+
+
+
+
+
+
+
+ beta13
+ An image of a protein.
+
+
+ Protein image
+
+
+
+
+
+
+
+
+ beta13
+ 1.24
+
+
+
+
+ An alignment of protein sequences and/or structures.
+
+ Protein alignment
+ true
+
+
+
+
+
+
+
+
+ 1.0
+ 1.8
+
+ sequencing experiment, including samples, sampling, preparation, sequencing, and analysis.
+
+
+ NGS experiment
+ true
+
+
+
+
+
+
+
+
+ 1.1
+ An informative report about a DNA sequence assembly.
+ Assembly report
+
+
+ This might include an overall quality assessment of the assembly and summary statistics including counts, average length and number of bases for reads, matches and non-matches, contigs, reads in pairs etc.
+ Sequence assembly report
+
+
+
+
+
+
+
+
+ 1.1
+ An index of a genome sequence.
+
+
+ Many sequence alignment tasks involving many or very large sequences rely on a precomputed index of the sequence to accelerate the alignment.
+ Genome index
+
+
+
+
+
+
+
+
+ 1.1
+ 1.8
+
+ Report concerning genome-wide association study experiments.
+
+
+ GWAS report
+ true
+
+
+
+
+
+
+
+
+ 1.2
+ The position of a cytogenetic band in a genome.
+
+
+ Information might include start and end position in a chromosome sequence, chromosome identifier, name of band and so on.
+ Cytoband position
+
+
+
+
+
+
+
+
+
+
+ 1.2
+ CL_[0-9]{7}
+ Cell type ontology concept ID.
+ CL ID
+
+
+
+ Cell type ontology ID
+
+
+
+
+
+
+
+
+ 1.2
+ Mathematical model of a network, that contains biochemical kinetics.
+
+
+ Kinetic model
+
+
+
+
+
+
+
+
+
+ 1.3
+ Identifier of a COSMIC database entry.
+ COSMIC identifier
+
+
+
+ COSMIC ID
+
+
+
+
+
+
+
+
+
+ 1.3
+ Identifier of a HGMD database entry.
+ HGMD identifier
+
+
+
+ HGMD ID
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ Unique identifier of sequence assembly.
+ Sequence assembly version
+
+
+
+ Sequence assembly ID
+
+
+
+
+
+
+
+
+ 1.3
+ 1.5
+
+
+ A label (text token) describing a type of sequence feature such as gene, transcript, cds, exon, repeat, simple, misc, variation, somatic variation, structural variation, somatic structural variation, constrained or regulatory.
+
+ Sequence feature type
+ true
+
+
+
+
+
+
+
+
+ 1.3
+ 1.5
+
+
+ An informative report on gene homologues between species.
+
+ Gene homology (report)
+ true
+
+
+
+
+
+
+
+
+
+
+ 1.3
+ ENSGT00390000003602
+ Unique identifier for a gene tree from the Ensembl database.
+ Ensembl ID (gene tree)
+
+
+
+ Ensembl gene tree ID
+
+
+
+
+
+
+
+
+ 1.3
+ A phylogenetic tree that is an estimate of the character's phylogeny.
+
+
+ Gene tree
+
+
+
+
+
+
+
+
+ 1.3
+ A phylogenetic tree that reflects phylogeny of the taxa from which the characters (used in calculating the tree) were sampled.
+
+
+ Species tree
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ Name or other identifier of an entry from a biosample database.
+ Sample accession
+
+
+
+ Sample ID
+
+
+
+
+
+
+
+
+
+ 1.3
+ Identifier of an object from the MGI database.
+
+
+
+ MGI accession
+
+
+
+
+
+
+
+
+ 1.3
+ Name of a phenotype.
+ Phenotype
+ Phenotypes
+
+
+
+ Phenotype name
+
+
+
+
+
+
+
+
+ 1.4
+ A HMM transition matrix contains the probabilities of switching from one HMM state to another.
+ HMM transition matrix
+
+
+ Consider for example an HMM with two states (AT-rich and GC-rich). The transition matrix will hold the probabilities of switching from the AT-rich to the GC-rich state, and vica versa.
+ Transition matrix
+
+
+
+
+
+
+
+
+ 1.4
+ A HMM emission matrix holds the probabilities of choosing the four nucleotides (A, C, G and T) in each of the states of a HMM.
+ HMM emission matrix
+
+
+ Consider for example an HMM with two states (AT-rich and GC-rich). The emission matrix holds the probabilities of choosing each of the four nucleotides (A, C, G and T) in the AT-rich state and in the GC-rich state.
+ Emission matrix
+
+
+
+
+
+
+
+
+ 1.4
+ 1.15
+
+ A statistical Markov model of a system which is assumed to be a Markov process with unobserved (hidden) states.
+
+
+ Hidden Markov model
+ true
+
+
+
+
+
+
+
+
+ 1.4
+ true
+ An identifier of a data format.
+
+
+ Format identifier
+
+
+
+
+
+
+
+
+ 1.5
+ Raw biological or biomedical image generated by some experimental technique.
+
+
+ Raw image
+ http://semanticscience.org/resource/SIO_000081
+
+
+
+
+
+
+
+
+ 1.5
+ Data concerning the intrinsic physical (e.g. structural) or chemical properties of one, more or all carbohydrates.
+ Carbohydrate data
+
+
+ Carbohydrate property
+
+
+
+
+
+
+
+
+ 1.5
+ 1.8
+
+ Report concerning proteomics experiments.
+
+
+ Proteomics experiment report
+ true
+
+
+
+
+
+
+
+
+ 1.5
+ 1.8
+
+ RNAi experiments.
+
+
+ RNAi report
+ true
+
+
+
+
+
+
+
+
+ 1.5
+ 1.8
+
+ biological computational model experiments (simulation), for example the minimum information required in order to permit its correct interpretation and reproduction.
+
+
+ Simulation experiment report
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.7
+ An imaging technique that uses magnetic fields and radiowaves to form images, typically to investigate the anatomy and physiology of the human body.
+ MRT image
+ Magnetic resonance imaging image
+ Magnetic resonance tomography image
+ NMRI image
+ Nuclear magnetic resonance imaging image
+
+
+ MRI image
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.7
+ An image from a cell migration track assay.
+
+
+ Cell migration track image
+
+
+
+
+
+
+
+
+ 1.7
+ Rate of association of a protein with another protein or some other molecule.
+ kon
+
+
+ Rate of association
+
+
+
+
+
+
+
+
+ 1.7
+ Multiple gene identifiers in a specific order.
+
+
+ Such data are often used for genome rearrangement tools and phylogenetic tree labeling.
+ Gene order
+
+
+
+
+
+
+
+
+ 1.7
+ The spectrum of frequencies of electromagnetic radiation emitted from a molecule as a result of some spectroscopy experiment.
+ Spectra
+
+
+ Spectrum
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.7
+ Spectral information for a molecule from a nuclear magnetic resonance experiment.
+ NMR spectra
+
+
+ NMR spectrum
+
+
+
+
+
+
+
+
+ 1.8
+ 1.21
+
+ A sketch of a small molecule made with some specialised drawing package.
+
+
+ Chemical structure sketches are used for presentational purposes but also as inputs to various analysis software.
+ Chemical structure sketch
+ true
+
+
+
+
+
+
+
+
+ 1.8
+ An informative report about a specific or conserved nucleic acid sequence pattern.
+
+
+ Nucleic acid signature
+
+
+
+
+
+
+
+
+ 1.8
+ A DNA sequence.
+ DNA sequences
+
+
+ DNA sequence
+
+
+
+
+
+
+
+
+ 1.8
+ An RNA sequence.
+ RNA sequences
+
+
+ RNA sequence
+
+
+
+
+
+
+
+
+ 1.8
+ Deprecated because this is bloat / confusing & better handled as an EDAM Format concept - "raw" sequences just imply a particular format (i.e. one with a vanilla string, possible in a particular alphabet, with no metadata).
+ 1.23
+
+ A raw RNA sequence.
+
+
+ RNA sequence (raw)
+ true
+
+
+
+
+
+
+
+
+ 1.8
+ Deprecated because this is bloat / confusing & better handled as an EDAM Format concept - "raw" sequences just imply a particular format (i.e. one with a vanilla string, possible in a particular alphabet, with no metadata).
+ 1.23
+
+ A raw DNA sequence.
+
+
+ DNA sequence (raw)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.8
+ Data on gene sequence variations resulting large-scale genotyping and DNA sequencing projects.
+ Gene sequence variations
+
+
+ Variations are stored along with a reference genome.
+ Sequence variations
+
+
+
+
+
+
+
+
+ 1.8
+ A list of publications such as scientic papers or books.
+
+
+ Bibliography
+
+
+
+
+
+
+
+
+ 1.8
+ A mapping of supplied textual terms or phrases to ontology concepts (URIs).
+
+
+ Ontology mapping
+
+
+
+
+
+
+
+
+ 1.9
+ Any data concerning a specific biological or biomedical image.
+ Image-associated data
+ Image-related data
+
+
+ This can include basic provenance and technical information about the image, scientific annotation and so on.
+ Image metadata
+
+
+
+
+
+
+
+
+ 1.9
+ A human-readable collection of information concerning a clinical trial.
+ Clinical trial information
+
+
+ Clinical trial report
+
+
+
+
+
+
+
+
+ 1.10
+ A report about a biosample.
+ Biosample report
+
+
+ Reference sample report
+
+
+
+
+
+
+
+
+ 1.10
+ Accession number of an entry from the Gene Expression Atlas.
+
+
+
+ Gene Expression Atlas Experiment ID
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.12
+ true
+ Identifier of an entry from a database of disease.
+
+
+
+ Disease identifier
+
+
+
+
+
+
+
+
+
+ 1.12
+ The name of some disease.
+
+
+
+ Disease name
+
+
+
+
+
+
+
+
+ 1.12
+ Some material that is used for educational (training) purposes.
+ OER
+ Open educational resource
+
+
+ Training material
+
+
+
+
+
+
+
+
+ 1.12
+ A training course available for use on the Web.
+ On-line course
+ MOOC
+ Massive open online course
+
+
+ Online course
+
+
+
+
+
+
+
+
+ 1.12
+ Any free or plain text, typically for human consumption and in English. Can instantiate also as a textual search query.
+ Free text
+ Plain text
+ Textual search query
+
+
+ Text
+
+
+
+
+
+
+
+
+
+ 1.14
+ Machine-readable biodiversity data.
+ Biodiversity information
+ OTU table
+
+
+ Biodiversity data
+
+
+
+
+
+
+
+
+ 1.14
+ A human-readable collection of information concerning biosafety data.
+ Biosafety information
+
+
+ Biosafety report
+
+
+
+
+
+
+
+
+ 1.14
+ A report about any kind of isolation of biological material.
+ Geographic location
+ Isolation source
+
+
+ Isolation report
+
+
+
+
+
+
+
+
+ 1.14
+ Information about the ability of an organism to cause disease in a corresponding host.
+ Pathogenicity
+
+
+ Pathogenicity report
+
+
+
+
+
+
+
+
+ 1.14
+ Information about the biosafety classification of an organism according to corresponding law.
+ Biosafety level
+
+
+ Biosafety classification
+
+
+
+
+
+
+
+
+ 1.14
+ A report about localisation of the isolaton of biological material e.g. country or coordinates.
+
+
+ Geographic location
+
+
+
+
+
+
+
+
+ 1.14
+ A report about any kind of isolation source of biological material e.g. blood, water, soil.
+
+
+ Isolation source
+
+
+
+
+
+
+
+
+ 1.14
+ Experimentally determined parameter of the physiology of an organism, e.g. substrate spectrum.
+
+
+ Physiology parameter
+
+
+
+
+
+
+
+
+ 1.14
+ Experimentally determined parameter of the morphology of an organism, e.g. size & shape.
+
+
+ Morphology parameter
+
+
+
+
+
+
+
+
+ 1.14
+ Experimental determined parameter for the cultivation of an organism.
+ Cultivation conditions
+ Carbon source
+ Culture media composition
+ Nitrogen source
+ Salinity
+ Temperature
+ pH value
+
+
+ Cultivation parameter
+
+
+
+
+
+
+
+
+ 1.15
+ Data concerning a sequencing experiment, that may be specified as an input to some tool.
+
+
+ Sequencing metadata name
+
+
+
+
+
+
+
+
+ 1.15
+ An identifier of a flow cell of a sequencing machine.
+
+
+ A flow cell is used to immobilise, amplify and sequence millions of molecules at once. In Illumina machines, a flowcell is composed of 8 "lanes" which allows 8 experiments in a single analysis.
+ Flow cell identifier
+
+
+
+
+
+
+
+
+ 1.15
+ An identifier of a lane within a flow cell of a sequencing machine, within which millions of sequences are immobilised, amplified and sequenced.
+
+
+ Lane identifier
+
+
+
+
+
+
+
+
+ 1.15
+ A number corresponding to the number of an analysis performed by a sequencing machine. For example, if it's the 13th analysis, the run is 13.
+
+
+ Run number
+
+
+
+
+
+
+
+
+ 1.15
+ Data concerning ecology; for example measurements and reports from the study of interactions among organisms and their environment.
+
+
+ This is a broad data type and is used a placeholder for other, more specific types.
+ Ecological data
+
+
+
+
+
+
+
+
+ 1.15
+ The mean species diversity in sites or habitats at a local scale.
+ α-diversity
+
+
+ Alpha diversity data
+
+
+
+
+
+
+
+
+ 1.15
+ The ratio between regional and local species diversity.
+ True beta diversity
+ β-diversity
+
+
+ Beta diversity data
+
+
+
+
+
+
+
+
+ 1.15
+ The total species diversity in a landscape.
+ ɣ-diversity
+
+
+ Gamma diversity data
+
+
+
+
+
+
+
+
+
+ 1.15
+ A plot in which community data (e.g. species abundance data) is summarised. Similar species and samples are plotted close together, and dissimilar species and samples are plotted placed far apart.
+
+
+ Ordination plot
+
+
+
+
+
+
+
+
+ 1.16
+ A ranked list of categories (usually ontology concepts), each associated with a statistical metric of over-/under-representation within the studied data.
+ Enrichment report
+ Over-representation report
+ Functional enrichment report
+
+
+ Over-representation data
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.16
+ GO-term report
+ A ranked list of Gene Ontology concepts, each associated with a p-value, concerning or derived from the analysis of e.g. a set of genes or proteins.
+ GO-term enrichment report
+ Gene ontology concept over-representation report
+ Gene ontology enrichment report
+ Gene ontology term enrichment report
+
+
+ GO-term enrichment data
+
+
+
+
+
+
+
+
+ 1.16
+ Score for localization of one or more post-translational modifications in peptide sequence measured by mass spectrometry.
+ False localisation rate
+ PTM localisation
+ PTM score
+
+
+ Localisation score
+
+
+
+
+
+
+
+
+
+ 1.16
+ Identifier of a protein modification catalogued in the Unimod database.
+
+
+
+ Unimod ID
+
+
+
+
+
+
+
+
+ 1.16
+ Identifier for mass spectrometry proteomics data in the proteomexchange.org repository.
+
+
+
+ ProteomeXchange ID
+
+
+
+
+
+
+
+
+ 1.16
+ Groupings of expression profiles according to a clustering algorithm.
+ Clustered gene expression profiles
+
+
+ Clustered expression profiles
+
+
+
+
+
+
+
+
+
+ 1.16
+ An identifier of a concept from the BRENDA ontology.
+
+
+
+ BRENDA ontology concept ID
+
+
+
+
+
+
+
+
+
+ 1.16
+ A text (such as a scientific article), annotated with notes, data and metadata, such as recognised entities, concepts, and their relations.
+
+
+ Annotated text
+
+
+
+
+
+
+
+
+ 1.16
+ A structured query, in form of a script, that defines a database search task.
+
+
+ Query script
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.19
+ Structural 3D model (volume map) from electron microscopy.
+
+
+ 3D EM Map
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.19
+ Annotation on a structural 3D EM Map from electron microscopy. This might include one or several locations in the map of the known features of a particular macromolecule.
+
+
+ 3D EM Mask
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.19
+ Raw DDD movie acquisition from electron microscopy.
+
+
+ EM Movie
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.19
+ Raw acquisition from electron microscopy or average of an aligned DDD movie.
+
+
+ EM Micrograph
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.21
+ Data coming from molecular simulations, computer "experiments" on model molecules.
+
+
+ Typically formed by two separated but indivisible pieces of information: topology data (static) and trajectory data (dynamic).
+ Molecular simulation data
+
+
+
+
+
+
+
+
+
+ 1.21
+ Identifier of an entry from the RNA central database of annotated human miRNAs.
+
+
+
+ There are canonical and taxon-specific forms of RNAcentral ID. Canonical form e.g. urs_9or10digits identifies an RNA sequence (within the RNA central database) which may appear in multiple sequences. Taxon-specific form identifies a sequence in the specific taxon (e.g. urs_9or10digits_taxonID).
+ RNA central ID
+
+
+
+
+
+
+
+
+ 1.21
+ A human-readable systematic collection of patient (or population) health information in a digital format.
+ EHR
+ EMR
+ Electronic medical record
+
+
+ Electronic health record
+
+
+
+
+
+
+
+
+ 1.22
+ Data coming from molecular simulations, computer "experiments" on model molecules. Typically formed by two separated but indivisible pieces of information: topology data (static) and trajectory data (dynamic).
+
+
+ Simulation
+
+
+
+
+
+
+
+
+ 1.22
+ Dynamic information of a structure molecular system coming from a molecular simulation: XYZ 3D coordinates (sometimes with their associated velocities) for every atom along time.
+
+
+ Trajectory data
+
+
+
+
+
+
+
+
+ 1.22
+ Force field parameters: charges, masses, radii, bond lengths, bond dihedrals, etc. define the structural molecular system, and are essential for the proper description and simulation of a molecular system.
+
+
+ Forcefield parameters
+
+
+
+
+
+
+
+
+ 1.22
+ Static information of a structure molecular system that is needed for a molecular simulation: the list of atoms, their non-bonded parameters for Van der Waals and electrostatic interactions, and the complete connectivity in terms of bonds, angles and dihedrals.
+
+
+ Topology data
+
+
+
+
+
+
+
+
+ 1.22
+ Visualization of distribution of quantitative data, e.g. expression data, by histograms, violin plots and density plots.
+ Density plot
+
+
+ Histogram
+
+
+
+
+
+
+
+
+ 1.23
+ Report of the quality control review that was made of factors involved in a procedure.
+ QC metrics
+ QC report
+ Quality control metrics
+ Quality control report
+
+
+
+
+
+
+
+
+ 1.23
+ A table of unnormalized values representing summarised read counts per genomic region (e.g. gene, transcript, peak).
+ Read count matrix
+
+
+ Count matrix
+
+
+
+
+
+
+
+
+ 1.24
+ Alignment (superimposition) of DNA tertiary (3D) structures.
+ Structure alignment (DNA)
+
+
+ DNA structure alignment
+
+
+
+
+
+
+
+
+ 1.24
+ A score derived from the P-value to ensure correction for multiple tests. The Q-value provides an estimate of the positive False Discovery Rate (pFDR), i.e. the rate of false positives among all the cases reported positive: pFDR = FP / (FP + TP).
+ Adjusted P-value
+ FDR
+ Padj
+ pFDR
+
+
+ Q-values are widely used in high-throughput data analysis (e.g. detection of differentially expressed genes from transcriptome data).
+ Q-value
+
+
+
+
+
+
+
+
+
+
+ 1.24
+ A profile HMM is a variant of a Hidden Markov model that is derived specifically from a set of (aligned) biological sequences. Profile HMMs provide the basis for a position-specific scoring system, which can be used to align sequences and search databases for related sequences.
+
+
+ Profile HMM
+
+
+
+
+
+
+
+
+
+
+
+ 1.24
+
+ WP[0-9]+
+ Identifier of a pathway from the WikiPathways pathway database.
+ WikiPathways ID
+ WikiPathways pathway ID
+
+
+
+ Pathway ID (WikiPathways)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.24
+ A ranked list of pathways, each associated with z-score, p-value or similar, concerning or derived from the analysis of e.g. a set of genes or proteins.
+ Pathway analysis results
+ Pathway enrichment report
+ Pathway over-representation report
+ Pathway report
+ Pathway term enrichment report
+
+
+ Pathway overrepresentation data
+
+
+
+
+
+
+
+
+ 1.26
+
+
+ \d{4}-\d{4}-\d{4}-\d{3}(\d|X)
+ Identifier of a researcher registered with the ORCID database. Used to identify author IDs.
+
+
+
+ ORCID Identifier
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+
+ Chemical structure specified in Simplified Molecular Input Line Entry System (SMILES) line notation.
+
+
+ SMILES
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Chemical structure specified in IUPAC International Chemical Identifier (InChI) line notation.
+
+
+ InChI
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Chemical structure specified by Molecular Formula (MF), including a count of each element in a compound.
+
+
+ The general MF query format consists of a series of valid atomic symbols, with an optional number or range.
+ mf
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The InChIKey (hashed InChI) is a fixed length (25 character) condensed digital representation of an InChI chemical structure specification. It uniquely identifies a chemical compound.
+
+
+ An InChIKey identifier is not human- nor machine-readable but is more suitable for web searches than an InChI chemical structure specification.
+ InChIKey
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ SMILES ARbitrary Target Specification (SMARTS) format for chemical structure specification, which is a subset of the SMILES line notation.
+
+
+ smarts
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for a molecular sequence with possible unknown positions but without ambiguity or non-sequence characters.
+
+
+ unambiguous pure
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for a nucleotide sequence with possible ambiguity, unknown positions and non-sequence characters.
+
+
+ Non-sequence characters may be used for example for gaps.
+ nucleotide
+ http://onto.eva.mpg.de/ontologies/gfo-bio.owl#Nucleotide_sequence
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for a protein sequence with possible ambiguity, unknown positions and non-sequence characters.
+
+
+ Non-sequence characters may be used for gaps and translation stop.
+ protein
+ http://onto.eva.mpg.de/ontologies/gfo-bio.owl#Amino_acid_sequence
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for the consensus of two or more molecular sequences.
+
+
+ consensus
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for a nucleotide sequence with possible ambiguity and unknown positions but without non-sequence characters.
+
+
+ pure nucleotide
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for a nucleotide sequence (characters ACGTU only) with possible unknown positions but without ambiguity or non-sequence characters .
+
+
+ unambiguous pure nucleotide
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for a DNA sequence with possible ambiguity, unknown positions and non-sequence characters.
+
+
+ dna
+ http://onto.eva.mpg.de/ontologies/gfo-bio.owl#DNA_sequence
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for an RNA sequence with possible ambiguity, unknown positions and non-sequence characters.
+
+
+ rna
+ http://onto.eva.mpg.de/ontologies/gfo-bio.owl#RNA_sequence
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for a DNA sequence (characters ACGT only) with possible unknown positions but without ambiguity or non-sequence characters.
+
+
+ unambiguous pure dna
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for a DNA sequence with possible ambiguity and unknown positions but without non-sequence characters.
+
+
+ pure dna
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for an RNA sequence (characters ACGU only) with possible unknown positions but without ambiguity or non-sequence characters.
+
+
+ unambiguous pure rna sequence
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for an RNA sequence with possible ambiguity and unknown positions but without non-sequence characters.
+
+
+ pure rna
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for any protein sequence with possible unknown positions but without ambiguity or non-sequence characters.
+
+
+ unambiguous pure protein
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for any protein sequence with possible ambiguity and unknown positions but without non-sequence characters.
+
+
+ pure protein
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of an entry from UniGene.
+
+ A UniGene entry includes a set of transcript sequences assigned to the same transcription locus (gene or expressed pseudogene), with information on protein similarities, gene expression, cDNA clone reagents, and genomic location.
+ UniGene entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of an entry from the COG database of clusters of (related) protein sequences.
+
+ COG sequence cluster format
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format for sequence positions (feature location) as used in DDBJ/EMBL/GenBank database.
+ Feature location
+
+
+ EMBL feature location
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Report format for tandem repeats in a nucleotide sequence (format generated by the Sanger Centre quicktandem program).
+
+
+ quicktandem
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Report format for inverted repeats in a nucleotide sequence (format generated by the Sanger Centre inverted program).
+
+
+ Sanger inverted repeats
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Report format for tandem repeats in a sequence (an EMBOSS report format).
+
+
+ EMBOSS repeat
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of a report on exon-intron structure generated by EMBOSS est2genome.
+
+
+ est2genome format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Report format for restriction enzyme recognition sites used by EMBOSS restrict program.
+
+
+ restrict format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Report format for restriction enzyme recognition sites used by EMBOSS restover program.
+
+
+ restover format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Report format for restriction enzyme recognition sites used by REBASE database.
+
+
+ REBASE restriction sites
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of results of a sequence database search using FASTA.
+
+
+ This includes (typically) score data, alignment data and a histogram (of observed and expected distribution of E values.)
+ FASTA search results format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of results of a sequence database search using some variant of BLAST.
+
+
+ This includes score data, alignment data and summary table.
+ BLAST results
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of results of a sequence database search using some variant of MSPCrunch.
+
+
+ mspcrunch
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of results of a sequence database search using some variant of Smith Waterman.
+
+
+ Smith-Waterman format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of EMBASSY domain hits file (DHF) of hits (sequences) with domain classification information.
+
+
+ The hits are relatives to a SCOP or CATH family and are found from a search of a sequence database.
+ dhf
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of EMBASSY ligand hits file (LHF) of database hits (sequences) with ligand classification information.
+
+
+ The hits are putative ligand-binding sequences and are found from a search of a sequence database.
+ lhf
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Results format for searches of the InterPro database.
+
+
+ InterPro hits format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of results of a search of the InterPro database showing matches of query protein sequence(s) to InterPro entries.
+
+
+ The report includes a classification of regions in a query protein sequence which are assigned to a known InterPro protein family or group.
+ InterPro protein view report format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of results of a search of the InterPro database showing matches between protein sequence(s) and signatures for an InterPro entry.
+
+
+ The table presents matches between query proteins (rows) and signature methods (columns) for this entry. Alternatively the sequence(s) might be from from the InterPro entry itself. The match position in the protein sequence and match status (true positive, false positive etc) are indicated.
+ InterPro match table format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Dirichlet distribution HMMER format.
+
+
+ HMMER Dirichlet prior
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Dirichlet distribution MEME format.
+
+
+ MEME Dirichlet prior
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of a report from the HMMER package on the emission and transition counts of a hidden Markov model.
+
+
+ HMMER emission and transition
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of a regular expression pattern from the Prosite database.
+
+
+ prosite-pattern
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of an EMBOSS sequence pattern.
+
+
+ EMBOSS sequence pattern
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A motif in the format generated by the MEME program.
+
+
+ meme-motif
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Sequence profile (sequence classifier) format used in the PROSITE database.
+
+
+ prosite-profile
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A profile (sequence classifier) in the format used in the JASPAR database.
+
+
+ JASPAR format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of the model of random sequences used by MEME.
+
+
+ MEME background Markov model
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of a hidden Markov model representation used by the HMMER package.
+
+
+ HMMER format
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ FASTA-style format for multiple sequences aligned by HMMER package to an HMM.
+
+
+ HMMER-aln
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of multiple sequences aligned by DIALIGN package.
+
+
+ DIALIGN format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ EMBASSY 'domain alignment file' (DAF) format, containing a sequence alignment of protein domains belonging to the same SCOP or CATH family.
+
+
+ The format is clustal-like and includes annotation of domain family classification information.
+ daf
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format for alignment of molecular sequences to MEME profiles (position-dependent scoring matrices) as generated by the MAST tool from the MEME package.
+
+
+ Sequence-MEME profile alignment
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format used by the HMMER package for an alignment of a sequence against a hidden Markov model database.
+
+
+ HMMER profile alignment (sequences versus HMMs)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format used by the HMMER package for of an alignment of a hidden Markov model against a sequence database.
+
+
+ HMMER profile alignment (HMM versus sequences)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of PHYLIP phylogenetic distance matrix data.
+
+
+ Data Type must include the distance matrix, probably as pairs of sequence identifiers with a distance (integer or float).
+ Phylip distance matrix
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Dendrogram (tree file) format generated by ClustalW.
+
+
+ ClustalW dendrogram
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Raw data file format used by Phylip from which a phylogenetic tree is directly generated or plotted.
+
+
+ Phylip tree raw
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PHYLIP file format for continuous quantitative character data.
+
+
+ Phylip continuous quantitative characters
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of phylogenetic property data.
+
+ Phylogenetic property values format
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PHYLIP file format for phylogenetics character frequency data.
+
+
+ Phylip character frequencies format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of PHYLIP discrete states data.
+
+
+ Phylip discrete states format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of PHYLIP cliques data.
+
+
+ Phylip cliques format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Phylogenetic tree data format used by the PHYLIP program.
+
+
+ Phylip tree format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The format of an entry from the TreeBASE database of phylogenetic data.
+
+
+ TreeBASE format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The format of an entry from the TreeFam database of phylogenetic data.
+
+
+ TreeFam format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format for distances, such as Branch Score distance, between two or more phylogenetic trees as used by the Phylip package.
+
+
+ Phylip tree distance format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of an entry from the DSSP database (Dictionary of Secondary Structure in Proteins).
+
+
+ The DSSP database is built using the DSSP application which defines secondary structure, geometrical features and solvent exposure of proteins, given atomic coordinates in PDB format.
+ dssp
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Entry format of the HSSP database (Homology-derived Secondary Structure in Proteins).
+
+
+ hssp
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of RNA secondary structure in dot-bracket notation, originally generated by the Vienna RNA package/server.
+ Vienna RNA format
+ Vienna RNA secondary structure format
+
+
+ Dot-bracket format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of local RNA secondary structure components with free energy values, generated by the Vienna RNA package/server.
+
+
+ Vienna local RNA secondary structure format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of an entry (or part of an entry) from the PDB database.
+ PDB entry format
+
+
+ PDB database entry format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Entry format of PDB database in PDB format.
+ PDB format
+
+
+ PDB
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Entry format of PDB database in mmCIF format.
+
+
+ mmCIF
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Entry format of PDB database in PDBML (XML) format.
+
+
+ PDBML
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+ Format of a matrix of 3D-1D scores used by the EMBOSS Domainatrix applications.
+
+
+ Domainatrix 3D-1D scoring matrix format
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Amino acid index format used by the AAindex database.
+
+
+ aaindex
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of an entry from IntEnz (The Integrated Relational Enzyme Database).
+
+ IntEnz is the master copy of the Enzyme Nomenclature, the recommendations of the NC-IUBMB on the Nomenclature and Classification of Enzyme-Catalysed Reactions.
+ IntEnz enzyme report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of an entry from the BRENDA enzyme database.
+
+ BRENDA enzyme report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of an entry from the KEGG REACTION database of biochemical reactions.
+
+ KEGG REACTION enzyme report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of an entry from the KEGG ENZYME database.
+
+ KEGG ENZYME enzyme report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of an entry from the proto section of the REBASE enzyme database.
+
+ REBASE proto enzyme report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of an entry from the withrefm section of the REBASE enzyme database.
+
+ REBASE withrefm enzyme report format
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of output of the Pcons Model Quality Assessment Program (MQAP).
+
+
+ Pcons ranks protein models by assessing their quality based on the occurrence of recurring common three-dimensional structural patterns. Pcons returns a score reflecting the overall global quality and a score for each individual residue in the protein reflecting the local residue quality.
+ Pcons report format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of output of the ProQ protein model quality predictor.
+
+
+ ProQ is a neural network-based predictor that predicts the quality of a protein model based on the number of structural features.
+ ProQ report format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of SMART domain assignment data.
+
+ The SMART output file includes data on genetically mobile domains / analysis of domain architectures, including phyletic distributions, functional class, tertiary structures and functionally important residues.
+ SMART domain assignment report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format for the BIND database of protein interaction.
+
+ BIND entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format for the IntAct database of protein interaction.
+
+ IntAct entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format for the InterPro database of protein signatures (sequence classifiers) and classified sequences.
+
+ This includes signature metadata, sequence references and a reference to the signature itself. There is normally a header (entry accession numbers and name), abstract, taxonomy information, example proteins etc. Each entry also includes a match list which give a number of different views of the signature matches for the sequences in each InterPro entry.
+ InterPro entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format for the textual abstract of signatures in an InterPro entry and its protein matches.
+
+ References are included and a functional inference is made where possible.
+ InterPro entry abstract format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format for the Gene3D protein secondary database.
+
+ Gene3D entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format for the PIRSF protein secondary database.
+
+ PIRSF entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format for the PRINTS protein secondary database.
+
+ PRINTS entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format for the Panther library of protein families and subfamilies.
+
+ Panther Families and HMMs entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format for the Pfam protein secondary database.
+
+ Pfam entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format for the SMART protein secondary database.
+
+ SMART entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format for the Superfamily protein secondary database.
+
+ Superfamily entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format for the TIGRFam protein secondary database.
+
+ TIGRFam entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format for the ProDom protein domain classification database.
+
+ ProDom entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format for the FSSP database.
+
+ FSSP entry format
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A report format for the kinetics of enzyme-catalysed reaction(s) in a format generated by EMBOSS findkm. This includes Michaelis Menten plot, Hanes Woolf plot, Michaelis Menten constant (Km) and maximum velocity (Vmax).
+
+
+ findkm
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format of Ensembl genome database.
+
+ Ensembl gene report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format of DictyBase genome database.
+
+ DictyBase gene report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format of Candida Genome database.
+
+ CGD gene report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format of DragonDB genome database.
+
+ DragonDB gene report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format of EcoCyc genome database.
+
+ EcoCyc gene report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format of FlyBase genome database.
+
+ FlyBase gene report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format of Gramene genome database.
+
+ Gramene gene report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format of KEGG GENES genome database.
+
+ KEGG GENES gene report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format of the Maize genetics and genomics database (MaizeGDB).
+
+ MaizeGDB gene report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format of the Mouse Genome Database (MGD).
+
+ MGD gene report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format of the Rat Genome Database (RGD).
+
+ RGD gene report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format of the Saccharomyces Genome Database (SGD).
+
+ SGD gene report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format of the Sanger GeneDB genome database.
+
+ GeneDB gene report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format of The Arabidopsis Information Resource (TAIR) genome database.
+
+ TAIR gene report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format of the WormBase genomes database.
+
+ WormBase gene report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format of the Zebrafish Information Network (ZFIN) genome database.
+
+ ZFIN gene report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format of the TIGR genome database.
+
+ TIGR gene report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format for the dbSNP database.
+
+ dbSNP polymorphism report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of an entry from the OMIM database of genotypes and phenotypes.
+
+ OMIM entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of a record from the HGVbase database of genotypes and phenotypes.
+
+ HGVbase entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of a record from the HIVDB database of genotypes and phenotypes.
+
+ HIVDB entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of an entry from the KEGG DISEASE database.
+
+ KEGG DISEASE entry format
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Report format on PCR primers and hybridisation oligos as generated by Whitehead primer3 program.
+
+
+ Primer3 primer
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A format of raw sequence read data from an Applied Biosystems sequencing machine.
+
+
+ ABI
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of MIRA sequence trace information file.
+
+
+ mira
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+ caf
+
+ Common Assembly Format (CAF). A sequence assembly format including contigs, base-call qualities, and other metadata.
+
+
+ CAF
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+ Sequence assembly project file EXP format.
+ Affymetrix EXP format
+
+
+ EXP
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+ Staden Chromatogram Files format (SCF) of base-called sequence reads, qualities, and other metadata.
+
+
+ SCF
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+ PHD sequence trace format to store serialised chromatogram data (reads).
+
+
+ PHD
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of Affymetrix data file of raw image data.
+ Affymetrix image data file format
+
+
+ dat
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of Affymetrix data file of information about (raw) expression levels of the individual probes.
+ Affymetrix probe raw data format
+
+
+ cel
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of affymetrix gene cluster files (hc-genes.txt, hc-chips.txt) from hierarchical clustering.
+
+
+ affymetrix
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format for the ArrayExpress microarrays database.
+
+ ArrayExpress entry format
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Affymetrix data file format for information about experimental conditions and protocols.
+ Affymetrix experimental conditions data file format
+
+
+ affymetrix-exp
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+ chp
+ Format of Affymetrix data file of information about (normalised) expression levels of the individual probes.
+ Affymetrix probe normalised data format
+
+
+ CHP
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of an entry from the Electron Microscopy DataBase (EMDB).
+
+ EMDB entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The format of an entry from the KEGG PATHWAY database of pathway maps for molecular interactions and reaction networks.
+
+ KEGG PATHWAY entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The format of an entry from the MetaCyc metabolic pathways database.
+
+ MetaCyc entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The format of a report from the HumanCyc metabolic pathways database.
+
+ HumanCyc entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The format of an entry from the INOH signal transduction pathways database.
+
+ INOH entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The format of an entry from the PATIKA biological pathways database.
+
+ PATIKA entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The format of an entry from the reactome biological pathways database.
+
+ Reactome entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The format of an entry from the aMAZE biological pathways and molecular interactions database.
+
+ aMAZE entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The format of an entry from the CPDB database.
+
+ CPDB entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The format of an entry from the Panther Pathways database.
+
+ Panther Pathways entry format
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of Taverna workflows.
+
+
+ Taverna workflow format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of mathematical models from the BioModel database.
+
+ Models are annotated and linked to relevant data resources, such as publications, databases of compounds and pathways, controlled vocabularies, etc.
+ BioModel mathematical model format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The format of an entry from the KEGG LIGAND chemical database.
+
+ KEGG LIGAND entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The format of an entry from the KEGG COMPOUND database.
+
+ KEGG COMPOUND entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The format of an entry from the KEGG PLANT database.
+
+ KEGG PLANT entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The format of an entry from the KEGG GLYCAN database.
+
+ KEGG GLYCAN entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The format of an entry from PubChem.
+
+ PubChem entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The format of an entry from a database of chemical structures and property predictions.
+
+ ChemSpider entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The format of an entry from Chemical Entities of Biological Interest (ChEBI).
+
+ ChEBI includes an ontological classification defining relations between entities or classes of entities.
+ ChEBI entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The format of an entry from the MSDchem ligand dictionary.
+
+ MSDchem ligand dictionary entry format
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The format of an entry from the HET group dictionary (HET groups from PDB files).
+
+
+ HET group dictionary entry format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The format of an entry from the KEGG DRUG database.
+
+ KEGG DRUG entry format
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of bibliographic reference as used by the PubMed database.
+
+
+ PubMed citation
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format for abstracts of scientific articles from the Medline database.
+
+
+ Bibliographic reference information including citation information is included
+ Medline Display Format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ CiteXplore 'core' citation format including title, journal, authors and abstract.
+
+
+ CiteXplore-core
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ CiteXplore 'all' citation format includes all known details such as Mesh terms and cross-references.
+
+
+ CiteXplore-all
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Article format of the PubMed Central database.
+
+
+ pmc
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The format of iHOP (Information Hyperlinked over Proteins) text-mining result.
+
+
+ iHOP format
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ OSCAR format of annotated chemical text.
+
+
+ OSCAR (Open-Source Chemistry Analysis Routines) software performs chemistry-specific parsing of chemical documents. It attempts to identify chemical names, ontology concepts, and chemical data from a document.
+ OSCAR format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Format of an ATOM record (describing data for an individual atom) from a PDB file.
+
+ PDB atom record format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of CATH domain classification information for a polypeptide chain.
+
+ The report (for example http://www.cathdb.info/chain/1cukA) includes chain identifiers, domain identifiers and CATH codes for domains in a given protein chain.
+ CATH chain report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of CATH domain classification information for a protein PDB file.
+
+ The report (for example http://www.cathdb.info/pdb/1cuk) includes chain identifiers, domain identifiers and CATH codes for domains in a given PDB file.
+ CATH PDB report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry (gene) format of the NCBI database.
+
+ NCBI gene report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Moby:GI_Gene
+ Report format for biological functions associated with a gene name and its alternative names (synonyms, homonyms), as generated by the GeneIlluminator service.
+
+ This includes a gene name and abbreviation of the name which may be in a name space indicating the gene status and relevant organisation.
+ GeneIlluminator gene report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Moby:BacMapGeneCard
+ Format of a report on the DNA and protein sequences for a given gene label from a bacterial chromosome maps from the BacMap database.
+
+ BacMap gene card format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of a report on Escherichia coli genes, proteins and molecules from the CyberCell Database (CCDB).
+
+ ColiCard report format
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Map of a plasmid (circular DNA) in PlasMapper TextMap format.
+
+
+ PlasMapper TextMap
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Phylogenetic tree Newick (text) format.
+ nh
+
+
+ newick
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Phylogenetic tree TreeCon (text) format.
+
+
+ TreeCon format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Phylogenetic tree Nexus (text) format.
+
+
+ Nexus format
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ A defined way or layout of representing and structuring data in a computer file, blob, string, message, or elsewhere.
+ Data format
+ Data model
+ Exchange format
+ File format
+
+
+ The main focus in EDAM lies on formats as means of structuring data exchanged between different tools or resources. The serialisation, compression, or encoding of concrete data formats/models is not in scope of EDAM. Format 'is format of' Data.
+ Format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Data model
+ A defined data format has its implicit or explicit data model, and EDAM does not distinguish the two. Some data models, however, do not have any standard way of serialisation into an exchange format, and those are thus not considered formats in EDAM. (Remark: even broader - or closely related - term to 'Data model' would be an 'Information model'.)
+
+
+
+
+ File format
+ File format denotes only formats of a computer file, but the same formats apply also to data blobs or exchanged messages.
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Data format for an individual atom.
+
+ Atomic data format
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Data format for a molecular sequence record.
+
+
+ Sequence record format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Data format for molecular sequence feature information.
+
+
+ Sequence feature annotation format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Data format for molecular sequence alignment information.
+
+
+ Alignment format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ ACEDB sequence format.
+
+
+ acedb
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Clustalw output format.
+
+ clustal sequence format
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Codata entry format.
+
+
+ codata
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Fasta format variant with database name before ID.
+
+
+ dbid
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ EMBL entry format.
+ EMBL
+ EMBL sequence format
+
+
+ EMBL format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Staden experiment file format.
+
+
+ Staden experiment format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ FASTA format including NCBI-style IDs.
+ FASTA format
+ FASTA sequence format
+
+
+ FASTA
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ fastq
+ fq
+ FASTQ short read format ignoring quality scores.
+ FASTAQ
+ fq
+
+
+ FASTQ
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ FASTQ Illumina 1.3 short read format.
+
+
+ FASTQ-illumina
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ FASTQ short read format with phred quality.
+
+
+ FASTQ-sanger
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ FASTQ Solexa/Illumina 1.0 short read format.
+
+
+ FASTQ-solexa
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Fitch program format.
+
+
+ fitch program
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ GCG sequence file format.
+ GCG SSF
+
+
+ GCG SSF (single sequence file) file format.
+ GCG
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Genbank entry format.
+ GenBank
+
+
+ GenBank format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Genpept protein entry format.
+
+
+ Currently identical to refseqp format
+ genpept
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ GFF feature file format with sequence in the header.
+
+
+ GFF2-seq
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ GFF3 feature file format with sequence.
+
+
+ GFF3-seq
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ FASTA sequence format including NCBI-style GIs.
+
+
+ giFASTA format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Hennig86 output sequence format.
+
+
+ hennig86
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Intelligenetics sequence format.
+
+
+ ig
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Intelligenetics sequence format (strict version).
+
+
+ igstrict
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Jackknifer interleaved and non-interleaved sequence format.
+
+
+ jackknifer
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Mase program sequence format.
+
+
+ mase format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Mega interleaved and non-interleaved sequence format.
+
+
+ mega-seq
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ GCG MSF (multiple sequence file) file format.
+
+
+ GCG MSF
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ pir
+ NBRF/PIR entry sequence format.
+ nbrf
+ pir
+
+
+ nbrf/pir
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Nexus/paup interleaved sequence format.
+
+
+ nexus-seq
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PDB sequence format (ATOM lines).
+
+
+ pdb format in EMBOSS.
+ pdbatom
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PDB nucleotide sequence format (ATOM lines).
+
+
+ pdbnuc format in EMBOSS.
+ pdbatomnuc
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PDB nucleotide sequence format (SEQRES lines).
+
+
+ pdbnucseq format in EMBOSS.
+ pdbseqresnuc
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ PDB sequence format (SEQRES lines).
+
+
+ pdbseq format in EMBOSS.
+ pdbseqres
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Plain old FASTA sequence format (unspecified format for IDs).
+
+
+ Pearson format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Phylip interleaved sequence format.
+
+ phylip sequence format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ PHYLIP non-interleaved sequence format.
+
+ phylipnon sequence format
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Raw sequence format with no non-sequence characters.
+
+
+ raw
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Refseq protein entry sequence format.
+
+
+ Currently identical to genpept format
+ refseqp
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Selex sequence format.
+
+ selex sequence format
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+
+
+ Staden suite sequence format.
+
+
+ Staden format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+ Stockholm multiple sequence alignment format (used by Pfam and Rfam).
+
+
+ Stockholm format
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ DNA strider output sequence format.
+
+
+ strider format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ UniProtKB entry sequence format.
+ SwissProt format
+ UniProt format
+
+
+ UniProtKB format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ txt
+ Plain text sequence format (essentially unformatted).
+
+
+ plain text format (unformatted)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Treecon output sequence format.
+
+ treecon sequence format
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ NCBI ASN.1-based sequence format.
+
+
+ ASN.1 sequence format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ DAS sequence (XML) format (any type).
+ das sequence format
+
+
+ DAS format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ DAS sequence (XML) format (nucleotide-only).
+
+
+ The use of this format is deprecated.
+ dasdna
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ EMBOSS debugging trace sequence format of full internal data content.
+
+
+ debug-seq
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Jackknifer output sequence non-interleaved format.
+
+
+ jackknifernon
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Mega non-interleaved output sequence format.
+
+ meganon sequence format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ NCBI FASTA sequence format with NCBI-style IDs.
+
+
+ There are several variants of this.
+ NCBI format
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Nexus/paup non-interleaved sequence format.
+
+
+ nexusnon
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+ General Feature Format (GFF) of sequence features.
+
+
+ GFF2
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+
+ Generic Feature Format version 3 (GFF3) of sequence features.
+
+
+ GFF3
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.7
+
+ PIR feature format.
+
+
+ pir
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Swiss-Prot feature format.
+
+ swiss feature
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ DAS GFF (XML) feature format.
+ DASGFF feature
+ das feature
+
+
+ DASGFF
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ EMBOSS debugging trace feature format of full internal data content.
+
+
+ debug-feat
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ EMBL feature format.
+
+ EMBL feature
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Genbank feature format.
+
+ GenBank feature
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ ClustalW format for (aligned) sequences.
+ clustal
+
+
+ ClustalW format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ EMBOSS alignment format for debugging trace of full internal data content.
+
+
+ debug
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Fasta format for (aligned) sequences.
+
+
+ FASTA-aln
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Pearson MARKX0 alignment format.
+
+
+ markx0
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Pearson MARKX1 alignment format.
+
+
+ markx1
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Pearson MARKX10 alignment format.
+
+
+ markx10
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Pearson MARKX2 alignment format.
+
+
+ markx2
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Pearson MARKX3 alignment format.
+
+
+ markx3
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alignment format for start and end of matches between sequence pairs.
+
+
+ match
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Mega format for (typically aligned) sequences.
+
+
+ mega
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Mega non-interleaved format for (typically aligned) sequences.
+
+
+ meganon
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ MSF format for (aligned) sequences.
+
+ msf alignment format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Nexus/paup format for (aligned) sequences.
+
+ nexus alignment format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Nexus/paup non-interleaved format for (aligned) sequences.
+
+ nexusnon alignment format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ EMBOSS simple sequence pairwise alignment format.
+
+
+ pair
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ http://www.bioperl.org/wiki/PHYLIP_multiple_alignment_format
+ Phylip format for (aligned) sequences.
+ PHYLIP
+ PHYLIP interleaved format
+ ph
+ phy
+
+
+ PHYLIP format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ http://www.bioperl.org/wiki/PHYLIP_multiple_alignment_format
+ Phylip non-interleaved format for (aligned) sequences.
+ PHYLIP sequential format
+ phylipnon
+
+
+ PHYLIP sequential
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alignment format for score values for pairs of sequences.
+
+
+ scores format
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ SELEX format for (aligned) sequences.
+
+
+ selex
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ EMBOSS simple multiple alignment format.
+
+
+ EMBOSS simple format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Simple multiple sequence (alignment) format for SRS.
+
+
+ srs format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Simple sequence pair (alignment) format for SRS.
+
+
+ srspair
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ T-Coffee program alignment format.
+
+
+ T-Coffee format
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Treecon format for (aligned) sequences.
+
+
+ TreeCon-seq
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Data format for a phylogenetic tree.
+
+
+ Phylogenetic tree format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Data format for a biological pathway or network.
+
+
+ Biological pathway or network format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Data format for a sequence-profile alignment.
+
+
+ Sequence-profile alignment format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Data format for a sequence-HMM profile alignment.
+
+ Sequence-profile alignment (HMM) format
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data format for an amino acid index.
+
+
+ Amino acid index format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Data format for a full-text scientific article.
+ Literature format
+
+
+ Article format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Data format of a report from text mining.
+
+
+ Text mining report format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Data format for reports on enzyme kinetics.
+
+
+ Enzyme kinetics report format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format of a report on a chemical compound.
+ Chemical compound annotation format
+ Chemical structure format
+ Small molecule report format
+ Small molecule structure format
+
+
+ Chemical data format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format of a report on a particular locus, gene, gene system or groups of genes.
+ Gene features format
+
+
+ Gene annotation format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format of a workflow.
+ Programming language
+ Script format
+
+
+ Workflow format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Data format for a molecular tertiary structure.
+
+
+ Tertiary structure format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.2
+
+
+ Data format for a biological model.
+
+ Biological model format
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Text format of a chemical formula.
+
+
+ Chemical formula format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format of raw (unplotted) phylogenetic data.
+
+
+ Phylogenetic character data format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of phylogenetic continuous quantitative character data.
+
+
+ Phylogenetic continuous quantitative character format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of phylogenetic discrete states data.
+
+
+ Phylogenetic discrete states format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of phylogenetic cliques data.
+
+
+ Phylogenetic tree report (cliques) format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of phylogenetic invariants data.
+
+
+ Phylogenetic tree report (invariants) format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Annotation format for electron microscopy models.
+
+ Electron microscopy model format
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format for phylogenetic tree distance data.
+
+
+ Phylogenetic tree report (tree distances) format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.0
+
+
+ Format for sequence polymorphism data.
+
+ Polymorphism report format
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format for reports on a protein family.
+
+
+ Protein family report format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format for molecular interaction data.
+ Molecular interaction format
+
+
+ Protein interaction format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format for sequence assembly data.
+
+
+ Sequence assembly format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format for information about a microarray experimental per se (not the data generated from that experiment).
+
+
+ Microarray experiment data format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format for sequence trace data (i.e. including base call information).
+
+
+ Sequence trace format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format of a file of gene expression data, e.g. a gene expression matrix or profile.
+ Gene expression data format
+
+
+ Gene expression report format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of a report on genotype / phenotype information.
+
+ Genotype and phenotype annotation format
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format of a map of (typically one) molecular sequence annotated with features.
+
+
+ Map format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format of a report on PCR primers or hybridisation oligos in a nucleic acid sequence.
+
+
+ Nucleic acid features (primers) format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format of a report of general information about a specific protein.
+
+
+ Protein report format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of a report of general information about a specific enzyme.
+
+ Protein report (enzyme) format
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of a matrix of 3D-1D scores (amino acid environment probabilities).
+
+
+ 3D-1D scoring matrix format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format of a report on the quality of a protein three-dimensional model.
+
+
+ Protein structure report (quality evaluation) format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format of a report on sequence hits and associated data from searching a sequence database.
+
+
+ Database hits (sequence) format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format of a matrix of genetic distances between molecular sequences.
+
+
+ Sequence distance matrix format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format of a sequence motif.
+
+
+ Sequence motif format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format of a sequence profile.
+
+
+ Sequence profile format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of a hidden Markov model.
+
+
+ Hidden Markov model format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Data format of a dirichlet distribution.
+
+
+ Dirichlet distribution format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Data format for the emission and transition counts of a hidden Markov model.
+
+
+ HMM emission and transition counts format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format for secondary structure (predicted or real) of an RNA molecule.
+
+
+ RNA secondary structure format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format for secondary structure (predicted or real) of a protein molecule.
+
+
+ Protein secondary structure format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format used to specify range(s) of sequence positions.
+
+
+ Sequence range format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for molecular sequence with possible unknown positions but without non-sequence characters.
+
+
+ pure
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for a molecular sequence with possible unknown positions but possibly with non-sequence characters.
+
+
+ unpure
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for a molecular sequence with possible unknown positions but without ambiguity characters.
+
+
+ unambiguous sequence
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for a molecular sequence with possible unknown positions and possible ambiguity characters.
+
+
+ ambiguous
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format used for map of repeats in molecular (typically nucleotide) sequences.
+
+
+ Sequence features (repeats) format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format used for report on restriction enzyme recognition sites in nucleotide sequences.
+
+
+ Nucleic acid features (restriction sites) format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.10
+
+ Format used for report on coding regions in nucleotide sequences.
+
+
+ Gene features (coding region) format
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format used for clusters of molecular sequences.
+
+
+ Sequence cluster format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format used for clusters of protein sequences.
+
+
+ Sequence cluster format (protein)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format used for clusters of nucleotide sequences.
+
+
+ Sequence cluster format (nucleic acid)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Format used for clusters of genes.
+
+ Gene cluster format
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A text format resembling EMBL entry format.
+
+
+ This concept may be used for the many non-standard EMBL-like text formats.
+ EMBL-like (text)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A text format resembling FASTQ short read format.
+
+
+ This concept may be used for non-standard FASTQ short read-like formats.
+ FASTQ-like format (text)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+ true
+ XML format for EMBL entries.
+
+
+ EMBLXML
+ https://fairsharing.org/bsg-s001452/
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+ true
+ Specific XML format for EMBL entries (only uses certain sections).
+
+
+ cdsxml
+ https://fairsharing.org/bsg-s001452/
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+ INSDSeq provides the elements of a sequence as presented in the GenBank/EMBL/DDBJ-style flatfile formats, with a small amount of additional structure.
+ INSD XML
+ INSDC XML
+
+
+ INSDSeq
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Geneseq sequence format.
+
+
+ geneseq
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A text sequence format resembling uniprotkb entry format.
+
+
+ UniProt-like (text)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ UniProt entry sequence format.
+
+
+ UniProt format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+
+ ipi sequence format.
+
+ ipi
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Abstract format used by MedLine database.
+
+
+ medline
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format used for ontologies.
+
+
+ Ontology format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A serialisation format conforming to the Open Biomedical Ontologies (OBO) model.
+
+
+ OBO format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A text format resembling FASTA format.
+
+
+ This concept may also be used for the many non-standard FASTA-like formats.
+ FASTA-like (text)
+ http://filext.com/file-extension/FASTA
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Data format for a molecular sequence record, typically corresponding to a full entry from a molecular sequence database.
+
+
+ Sequence record full format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Data format for a molecular sequence record 'lite', typically molecular sequence and minimal metadata, such as an identifier of the sequence and/or a comment.
+
+
+ Sequence record lite format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An XML format for EMBL entries.
+
+
+ This is a placeholder for other more specific concepts. It should not normally be used for annotation.
+ EMBL format (XML)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A text format resembling GenBank entry (plain text) format.
+
+
+ This concept may be used for the non-standard GenBank-like text formats.
+ GenBank-like format (text)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Text format for a sequence feature table.
+
+
+ Sequence feature table format (text)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.0
+
+
+ Format of a report on organism strain data / cell line.
+
+ Strain data format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format for a report of strain data as used for CIP database entries.
+
+ CIP strain data format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ PHYLIP file format for phylogenetic property data.
+
+ phylip property values
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format (HTML) for the STRING database of protein interaction.
+
+ STRING entry format (HTML)
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Entry format (XML) for the STRING database of protein interaction.
+
+
+ STRING entry format (XML)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ GFF feature format (of indeterminate version).
+
+
+ GFF
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+
+ Gene Transfer Format (GTF), a restricted version of GFF.
+
+
+ GTF
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ FASTA format wrapped in HTML elements.
+
+
+ FASTA-HTML
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ EMBL entry format wrapped in HTML elements.
+
+
+ EMBL-HTML
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of an entry from the BioCyc enzyme database.
+
+ BioCyc enzyme report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of an entry from the Enzyme nomenclature database (ENZYME).
+
+ ENZYME enzyme report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of a report on a gene from the PseudoCAP database.
+
+ PseudoCAP gene report format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of a report on a gene from the GeneCards database.
+
+ GeneCards gene report format
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Textual format.
+ Plain text format
+ txt
+
+
+ Data in text format can be compressed into binary format, or can be a value of an XML element or attribute. Markup formats are not considered textual (or more precisely, not plain-textual).
+ Textual format
+ http://filext.com/file-extension/TXT
+ http://www.iana.org/assignments/media-types/media-types.xhtml#text
+ http://www.iana.org/assignments/media-types/text/plain
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ HTML format.
+ Hypertext Markup Language
+
+
+ HTML
+ http://filext.com/file-extension/HTML
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ xml
+
+
+
+ eXtensible Markup Language (XML) format.
+ eXtensible Markup Language
+
+
+ Data in XML format can be serialised into text, or binary format.
+ XML
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Binary format.
+
+
+ Only specific native binary formats are listed under 'Binary format' in EDAM. Generic binary formats - such as any data being zipped, or any XML data being serialised into the Efficient XML Interchange (EXI) format - are not modelled in EDAM. Refer to http://wsio.org/compression_004.
+ Binary format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Typical textual representation of a URI.
+
+ URI format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ The format of an entry from the NCI-Nature pathways database.
+
+ NCI-Nature pathway entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ A placeholder concept for visual navigation by dividing data formats by the content of the data that is represented.
+ Format (typed)
+
+
+ This concept exists only to assist EDAM maintenance and navigation in graphical browsers. It does not add semantic information. The concept branch under 'Format (typed)' provides an alternative organisation of the concepts nested under the other top-level branches ('Binary', 'HTML', 'RDF', 'Text' and 'XML'. All concepts under here are already included under those branches.
+ Format (by type of data)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+
+
+
+
+ Any ontology allowed, none mandatory. Preferably with URIs but URIs are not mandatory. Non-ontology terms are also allowed as the last resort in case of a lack of suitable ontology.
+
+
+
+ BioXSD-schema-based XML format of sequence-based data and some other common data - sequence records, alignments, feature records, references to resources, and more - optimised for integrative bioinformatics, Web services, and object-oriented programming.
+ BioJSON
+ BioXSD
+ BioXSD XML
+ BioXSD XML format
+ BioXSD data model
+ BioXSD format
+ BioXSD in XML
+ BioXSD in XML format
+ BioXSD+XML
+ BioXSD/GTrack
+ BioXSD|GTrack
+ BioYAML
+
+
+ 'BioXSD' belongs to the 'BioXSD|GTrack' ecosystem of generic formats. 'BioXSD in XML' is the XML format based on the common, unified 'BioXSD data model', a.k.a. 'BioXSD|BioJSON|BioYAML'.
+ BioXSD (XML)
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A serialisation format conforming to the Resource Description Framework (RDF) model.
+ Resource Description Framework format
+ RDF
+ Resource Description Framework
+
+
+ RDF format
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Genbank entry format wrapped in HTML elements.
+
+
+ GenBank-HTML
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Format of a report on protein features (domain composition).
+
+ Protein features (domains) format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A format resembling EMBL entry (plain text) format.
+
+
+ This concept may be used for the many non-standard EMBL-like formats.
+ EMBL-like format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A format resembling FASTQ short read format.
+
+
+ This concept may be used for non-standard FASTQ short read-like formats.
+ FASTQ-like format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A format resembling FASTA format.
+
+
+ This concept may be used for the many non-standard FASTA-like formats.
+ FASTA-like
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A sequence format resembling uniprotkb entry format.
+
+
+ uniprotkb-like format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format for a sequence feature table.
+
+
+ Sequence feature table format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ OBO ontology text format.
+
+
+ OBO
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ OBO ontology XML format.
+
+
+ OBO-XML
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data format for a molecular sequence record (text).
+
+
+ Sequence record format (text)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data format for a molecular sequence record (XML).
+
+
+ Sequence record format (XML)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ XML format for a sequence feature table.
+
+
+ Sequence feature table format (XML)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Text format for molecular sequence alignment information.
+
+
+ Alignment format (text)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ XML format for molecular sequence alignment information.
+
+
+ Alignment format (XML)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Text format for a phylogenetic tree.
+
+
+ Phylogenetic tree format (text)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ XML format for a phylogenetic tree.
+
+
+ Phylogenetic tree format (XML)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ An XML format resembling EMBL entry format.
+
+
+ This concept may be used for the any non-standard EMBL-like XML formats.
+ EMBL-like (XML)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A format resembling GenBank entry (plain text) format.
+
+
+ This concept may be used for the non-standard GenBank-like formats.
+ GenBank-like format
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Entry format for the STRING database of protein interaction.
+
+ STRING entry format
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Text format for sequence assembly data.
+
+
+ Sequence assembly format (text)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Text format (representation) of amino acid residues.
+
+ Amino acid identifier format
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for a molecular sequence without any unknown positions or ambiguity characters.
+
+
+ completely unambiguous
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for a molecular sequence without unknown positions, ambiguity or non-sequence characters.
+
+
+ completely unambiguous pure
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for a nucleotide sequence (characters ACGTU only) without unknown positions, ambiguity or non-sequence characters .
+
+
+ completely unambiguous pure nucleotide
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for a DNA sequence (characters ACGT only) without unknown positions, ambiguity or non-sequence characters.
+
+
+ completely unambiguous pure dna
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for an RNA sequence (characters ACGU only) without unknown positions, ambiguity or non-sequence characters.
+
+
+ completely unambiguous pure rna sequence
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format of a raw molecular sequence (i.e. the alphabet used).
+
+
+ Raw sequence format
+ http://www.onto-med.de/ontologies/gfo.owl#Symbol_sequence
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+ BAM format, the binary, BGZF-formatted compressed version of SAM format for alignment of nucleotide sequences (e.g. sequencing reads) to (a) reference sequence(s). May contain base-call and alignment qualities and other data.
+
+
+ BAM
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+ Sequence Alignment/Map (SAM) format for alignment of nucleotide sequences (e.g. sequencing reads) to (a) reference sequence(s). May contain base-call and alignment qualities and other data.
+
+
+ The format supports short and long reads (up to 128Mbp) produced by different sequencing platforms and is used to hold mapped data within the GATK and across the Broad Institute, the Sanger Centre, and throughout the 1000 Genomes project.
+ SAM
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+ Systems Biology Markup Language (SBML), the standard XML format for models of biological processes such as for example metabolism, cell signaling, and gene regulation.
+
+
+ SBML
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Alphabet for any protein sequence without unknown positions, ambiguity or non-sequence characters.
+
+
+ completely unambiguous pure protein
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format of a bibliographic reference.
+
+
+ Bibliographic reference format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Format of a sequence annotation track.
+
+
+ Sequence annotation track format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Data format for molecular sequence alignment information that can hold sequence alignment(s) of only 2 sequences.
+
+
+ Alignment format (pair only)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Format of sequence variation annotation.
+
+
+ Sequence variation annotation format
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Some variant of Pearson MARKX alignment format.
+
+
+ markx0 variant
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Some variant of Mega format for (typically aligned) sequences.
+
+
+ mega variant
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Some variant of Phylip format for (aligned) sequences.
+
+
+ Phylip format variant
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ AB1 binary format of raw DNA sequence reads (output of Applied Biosystems' sequencing analysis software). Contains an electropherogram and the DNA base sequence.
+
+
+ AB1 uses the generic binary Applied Biosystems, Inc. Format (ABIF).
+ AB1
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+ ACE sequence assembly format including contigs, base-call qualities, and other metadata (version Aug 1998 and onwards).
+
+
+ ACE
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+ Browser Extensible Data (BED) format of sequence annotation track, typically to be displayed in a genome browser.
+
+
+ BED detail format includes 2 additional columns (http://genome.ucsc.edu/FAQ/FAQformat#format1.7) and BED 15 includes 3 additional columns for experiment scores (http://genomewiki.ucsc.edu/index.php/Microarray_track).
+ BED
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+ bigBed format for large sequence annotation tracks, similar to textual BED format.
+
+
+ bigBed
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+ wig
+
+ Wiggle format (WIG) of a sequence annotation track that consists of a value for each sequence position. Typically to be displayed in a genome browser.
+
+
+ WIG
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+ bigWig format for large sequence annotation tracks that consist of a value for each sequence position. Similar to textual WIG format.
+
+
+ bigWig
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+ PSL format of alignments, typically generated by BLAT or psLayout. Can be displayed in a genome browser like a sequence annotation track.
+
+
+ PSL
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+ Multiple Alignment Format (MAF) supporting alignments of whole genomes with rearrangements, directions, multiple pieces to the alignment, and so forth.
+
+
+ Typically generated by Multiz and TBA aligners; can be displayed in a genome browser like a sequence annotation track. This should not be confused with MIRA Assembly Format or Mutation Annotation Format.
+ MAF
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+
+ 2bit binary format of nucleotide sequences using 2 bits per nucleotide. In addition encodes unknown nucleotides and lower-case 'masking'.
+
+
+ 2bit
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+ .nib (nibble) binary format of a nucleotide sequence using 4 bits per nucleotide (including unknown) and its lower-case 'masking'.
+
+
+ .nib
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+ gp
+
+ genePred table format for gene prediction tracks.
+
+
+ genePred format has 3 main variations (http://genome.ucsc.edu/FAQ/FAQformat#format9 http://www.broadinstitute.org/software/igv/genePred). They reflect UCSC Browser DB tables.
+ genePred
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+ Personal Genome SNP (pgSnp) format for sequence variation tracks (indels and polymorphisms), supported by the UCSC Genome Browser.
+
+
+ pgSnp
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+ axt format of alignments, typically produced from BLASTZ.
+
+
+ axt
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+ lav
+
+ LAV format of alignments generated by BLASTZ and LASTZ.
+
+
+ LAV
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+ Pileup format of alignment of sequences (e.g. sequencing reads) to (a) reference sequence(s). Contains aligned bases per base of the reference sequence(s).
+
+
+ Pileup
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+ vcf
+ vcf.gz
+ Variant Call Format (VCF) is tabular format for storing genomic sequence variations.
+
+
+ 1000 Genomes Project has its own specification for encoding structural variations in VCF (https://www.internationalgenome.org/wiki/Analysis/Variant%20Call%20Format/VCF%20(Variant%20Call%20Format)%20version%204.0/encoding-structural-variants). This is based on VCF version 4.0 and not directly compatible with VCF version 4.3.
+ VCF
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+ Sequence Read Format (SRF) of sequence trace data. Supports submission to the NCBI Short Read Archive.
+
+
+ SRF
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+ ZTR format for storing chromatogram data from DNA sequencing instruments.
+
+
+ ZTR
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+
+ Genome Variation Format (GVF). A GFF3-compatible format with defined header and attribute tags for sequence variation.
+
+
+ GVF
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+
+ bcf
+ bcf.gz
+
+ BCF is the binary version of Variant Call Format (VCF) for sequence variation (indels, polymorphisms, structural variation).
+
+
+ BCF
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta13
+ true
+ Format of a matrix (array) of numerical values.
+
+
+ Matrix format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta13
+ true
+ Format of data concerning the classification of the sequences and/or structures of protein structural domain(s).
+
+
+ Protein domain classification format
+
+
+
+
+
+
+
+
+ beta13
+ Format of raw SCOP domain classification data files.
+
+
+ These are the parsable data files provided by SCOP.
+ Raw SCOP domain classification format
+
+
+
+
+
+
+
+
+ beta13
+ Format of raw CATH domain classification data files.
+
+
+ These are the parsable data files provided by CATH.
+ Raw CATH domain classification format
+
+
+
+
+
+
+
+
+ beta13
+ Format of summary of domain classification information for a CATH domain.
+
+
+ The report (for example http://www.cathdb.info/domain/1cukA01) includes CATH codes for levels in the hierarchy for the domain, level descriptions and relevant data and links.
+ CATH domain report format
+
+
+
+
+
+
+
+
+
+ 1.0
+
+
+ Systems Biology Result Markup Language (SBRML), the standard XML format for simulated or calculated results (e.g. trajectories) of systems biology models.
+
+
+ SBRML
+
+
+
+
+
+
+
+
+ 1.0
+
+
+ BioPAX is an exchange format for pathway data, with its data model defined in OWL.
+
+
+ BioPAX
+
+
+
+
+
+
+
+
+
+
+ 1.0
+
+
+ EBI Application Result XML is a format returned by sequence similarity search Web services at EBI.
+
+
+ EBI Application Result XML
+
+
+
+
+
+
+
+
+
+ 1.0
+
+
+ XML Molecular Interaction Format (MIF), standardised by HUPO PSI MI.
+ MIF
+
+
+ PSI MI XML (MIF)
+
+
+
+
+
+
+
+
+
+ 1.0
+
+
+ phyloXML is a standardised XML format for phylogenetic trees, networks, and associated data.
+
+
+ phyloXML
+
+
+
+
+
+
+
+
+
+ 1.0
+
+
+ NeXML is a standardised XML format for rich phyloinformatic data.
+
+
+ NeXML
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.0
+
+
+ MAGE-ML XML format for microarray expression data, standardised by MGED (now FGED).
+
+
+ MAGE-ML
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.0
+
+
+ MAGE-TAB textual format for microarray expression data, standardised by MGED (now FGED).
+
+
+ MAGE-TAB
+
+
+
+
+
+
+
+
+
+ 1.0
+
+
+ GCDML XML format for genome and metagenome metadata according to MIGS/MIMS/MIMARKS information standards, standardised by the Genomic Standards Consortium (GSC).
+
+
+ GCDML
+
+
+
+
+
+
+
+
+
+
+
+ 1.0
+
+
+
+
+
+
+
+
+
+ GTrack is a generic and optimised tabular format for genome or sequence feature tracks. GTrack unifies the power of other track formats (e.g. GFF3, BED, WIG), and while optimised in size, adds more flexibility, customisation, and automation ("machine understandability").
+ BioXSD/GTrack GTrack
+ BioXSD|GTrack GTrack
+ GTrack ecosystem of formats
+ GTrack format
+ GTrack|BTrack|GSuite GTrack
+ GTrack|GSuite|BTrack GTrack
+
+
+ 'GTrack' belongs to the 'BioXSD|GTrack' ecosystem of generic formats, and particular to its subset, the 'GTrack ecosystem' (GTrack, GSuite, BTrack). 'GTrack' is the tabular format for representing features of sequences and genomes.
+ GTrack
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.0
+ true
+ Data format for a report of information derived from a biological pathway or network.
+
+
+ Biological pathway or network report format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.0
+ true
+ Data format for annotation on a laboratory experiment.
+
+
+ Experiment annotation format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.2
+
+
+ Cytoband format for chromosome cytobands.
+
+
+ Reflects a UCSC Browser DB table.
+ Cytoband format
+
+
+
+
+
+
+
+
+
+
+ 1.2
+
+
+ CopasiML, the native format of COPASI.
+
+
+ CopasiML
+
+
+
+
+
+
+
+
+
+ 1.2
+
+
+
+
+ CellML, the format for mathematical models of biological and other networks.
+
+
+ CellML
+
+
+
+
+
+
+
+
+
+ 1.2
+
+
+
+
+
+ Tabular Molecular Interaction format (MITAB), standardised by HUPO PSI MI.
+
+
+ PSI MI TAB (MITAB)
+
+
+
+
+
+
+
+
+ 1.2
+
+
+ Protein affinity format (PSI-PAR), standardised by HUPO PSI MI. It is compatible with PSI MI XML (MIF) and uses the same XML Schema.
+
+
+ PSI-PAR
+
+
+
+
+
+
+
+
+
+ 1.2
+
+
+ mzML format for raw spectrometer output data, standardised by HUPO PSI MSS.
+
+
+ mzML is the successor and unifier of the mzData format developed by PSI and mzXML developed at the Seattle Proteome Center.
+ mzML
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.2
+ true
+ Format for mass pectra and derived data, include peptide sequences etc.
+
+
+ Mass spectrometry data format
+
+
+
+
+
+
+
+
+
+ 1.2
+
+
+ TraML (Transition Markup Language) is the format for mass spectrometry transitions, standardised by HUPO PSI MSS.
+
+
+ TraML
+
+
+
+
+
+
+
+
+
+ 1.2
+
+
+ mzIdentML is the exchange format for peptides and proteins identified from mass spectra, standardised by HUPO PSI PI. It can be used for outputs of proteomics search engines.
+
+
+ mzIdentML
+
+
+
+
+
+
+
+
+
+ 1.2
+
+
+ mzQuantML is the format for quantitation values associated with peptides, proteins and small molecules from mass spectra, standardised by HUPO PSI PI. It can be used for outputs of quantitation software for proteomics.
+
+
+ mzQuantML
+
+
+
+
+
+
+
+
+
+ 1.2
+
+
+ GelML is the format for describing the process of gel electrophoresis, standardised by HUPO PSI PS.
+
+
+ GelML
+
+
+
+
+
+
+
+
+
+ 1.2
+
+
+ spML is the format for describing proteomics sample processing, other than using gels, prior to mass spectrometric protein identification, standardised by HUPO PSI PS. It may also be applicable for metabolomics.
+
+
+ spML
+
+
+
+
+
+
+
+
+
+ 1.2
+ A human-readable encoding for the Web Ontology Language (OWL).
+
+
+ OWL Functional Syntax
+
+
+
+
+
+
+
+
+
+ 1.2
+ A syntax for writing OWL class expressions.
+
+
+ This format was influenced by the OWL Abstract Syntax and the DL style syntax.
+ Manchester OWL Syntax
+
+
+
+
+
+
+
+
+
+ 1.2
+ A superset of the "Description-Logic Knowledge Representation System Specification from the KRSS Group of the ARPA Knowledge Sharing Effort".
+
+
+ This format is used in Protege 4.
+ KRSS2 Syntax
+
+
+
+
+
+
+
+
+
+ 1.2
+ The Terse RDF Triple Language (Turtle) is a human-friendly serialisation format for RDF (Resource Description Framework) graphs.
+
+
+ The SPARQL Query Language incorporates a very similar syntax.
+ Turtle
+
+
+
+
+
+
+
+
+
+ 1.2
+ nt
+ A plain text serialisation format for RDF (Resource Description Framework) graphs, and a subset of the Turtle (Terse RDF Triple Language) format.
+
+
+ N-Triples should not be confused with Notation 3 which is a superset of Turtle.
+ N-Triples
+
+
+
+
+
+
+
+
+
+ 1.2
+ n3
+ A shorthand non-XML serialisation of Resource Description Framework model, designed with human-readability in mind.
+ N3
+
+
+ Notation3
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.2
+ OWL ontology XML serialisation format.
+ OWL
+
+
+ OWL/XML
+
+
+
+
+
+
+
+
+
+ 1.3
+
+
+ The A2M format is used as the primary format for multiple alignments of protein or nucleic-acid sequences in the SAM suite of tools. It is a small modification of FASTA format for sequences and is compatible with most tools that read FASTA.
+
+
+ A2M
+
+
+
+
+
+
+
+
+
+ 1.3
+
+
+ Standard flowgram format (SFF) is a binary file format used to encode results of pyrosequencing from the 454 Life Sciences platform for high-throughput sequencing.
+ Standard flowgram format
+
+
+ SFF
+
+
+
+
+
+
+
+
+ 1.3
+
+ The MAP file describes SNPs and is used by the Plink package.
+ Plink MAP
+
+
+ MAP
+
+
+
+
+
+
+
+
+ 1.3
+
+ The PED file describes individuals and genetic data and is used by the Plink package.
+ Plink PED
+
+
+ PED
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ Data format for a metadata on an individual and their genetic data.
+
+
+ Individual genetic data format
+
+
+
+
+
+
+
+
+
+ 1.3
+
+ The PED/MAP file describes data used by the Plink package.
+ Plink PED/MAP
+
+
+ PED/MAP
+
+
+
+
+
+
+
+
+
+ 1.3
+
+
+ File format of a CT (Connectivity Table) file from the RNAstructure package.
+ Connect format
+ Connectivity Table file format
+
+
+ CT
+
+
+
+
+
+
+
+
+
+ 1.3
+
+ XRNA old input style format.
+
+
+ SS
+
+
+
+
+
+
+
+
+
+
+ 1.3
+
+ RNA Markup Language.
+
+
+ RNAML
+
+
+
+
+
+
+
+
+
+ 1.3
+
+ Format for the Genetic Data Environment (GDE).
+
+
+ GDE
+
+
+
+
+
+
+
+
+ 1.3
+
+ A multiple alignment in vertical format, as used in the AMPS (Alignment of Multiple Protein Sequences) package.
+ Block file format
+
+
+ BLC
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ Format of a data index of some type.
+
+
+ Data index format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.3
+
+ BAM indexing format.
+
+
+ BAI
+
+
+
+
+
+
+
+
+ 1.3
+
+ HMMER profile HMM file for HMMER versions 2.x.
+
+
+ HMMER2
+
+
+
+
+
+
+
+
+ 1.3
+
+ HMMER profile HMM file for HMMER versions 3.x.
+
+
+ HMMER3
+
+
+
+
+
+
+
+
+ 1.3
+
+ PO is the output format of Partial Order Alignment program (POA) performing Multiple Sequence Alignment (MSA).
+
+
+ PO
+
+
+
+
+
+
+
+
+
+ 1.3
+ XML format as produced by the NCBI Blast package.
+
+
+ BLAST XML results format
+
+
+
+
+
+
+
+
+
+ 1.7
+ http://www.ebi.ac.uk/ena/software/cram-usage#format_specification http://samtools.github.io/hts-specs/CRAMv2.1.pdf
+ http://www.ebi.ac.uk/ena/software/cram-usage#format_specification http://samtools.github.io/hts-specs/CRAMv2.1.pdf
+ Reference-based compression of alignment format.
+
+
+ CRAM
+
+
+
+
+
+
+
+
+
+ 1.7
+ json
+
+
+
+ JavaScript Object Notation format; a lightweight, text-based format to represent tree-structured data using key-value pairs.
+ JavaScript Object Notation
+
+
+ JSON
+
+
+
+
+
+
+
+
+
+ 1.7
+ Encapsulated PostScript format.
+
+
+ EPS
+
+
+
+
+
+
+
+
+ 1.7
+ Graphics Interchange Format.
+
+
+ GIF
+
+
+
+
+
+
+
+
+
+ 1.7
+ Microsoft Excel spreadsheet format.
+ Microsoft Excel format
+
+
+ xls
+
+
+
+
+
+
+
+
+ 1.7
+ tab
+ tsv
+
+
+
+ Tabular data represented as tab-separated values in a text file.
+ Tab-delimited
+ Tab-separated values
+ tab
+
+
+ TSV
+
+
+
+
+
+
+
+
+ 1.7
+ 1.10
+
+ Format of a file of gene expression data, e.g. a gene expression matrix or profile.
+
+
+ Gene expression data format
+ true
+
+
+
+
+
+
+
+
+
+ 1.7
+ Format of the cytoscape input file of gene expression ratios or values are specified over one or more experiments.
+
+
+ Cytoscape input file format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.7
+ https://github.com/BenLangmead/bowtie/blob/master/MANUAL
+ Bowtie format for indexed reference genome for "small" genomes.
+ Bowtie index format
+
+
+ ebwt
+
+
+
+
+
+
+
+
+ 1.7
+ http://www.molbiol.ox.ac.uk/tutorials/Seqlab_GCG.pdf
+ Rich sequence format.
+ GCG RSF
+
+
+ RSF-format files contain one or more sequences that may or may not be related. In addition to the sequence data, each sequence can be annotated with descriptive sequence information (from the GCG manual).
+ RSF
+
+
+
+
+
+
+
+
+
+
+ 1.7
+ Some format based on the GCG format.
+
+
+ GCG format variant
+
+
+
+
+
+
+
+
+
+ 1.7
+ http://rothlab.ucdavis.edu/genhelp/chapter_2_using_sequences.html#_Creating_and_Editing_Single_Sequenc
+ Bioinformatics Sequence Markup Language format.
+
+
+ BSML
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.7
+ https://github.com/BenLangmead/bowtie/blob/master/MANUAL
+ Bowtie format for indexed reference genome for "large" genomes.
+ Bowtie long index format
+
+
+ ebwtl
+
+
+
+
+
+
+
+
+
+ 1.8
+
+ Ensembl standard format for variation data.
+
+
+ Ensembl variation file format
+
+
+
+
+
+
+
+
+
+ 1.8
+ Microsoft Word format.
+ Microsoft Word format
+ doc
+
+
+ docx
+
+
+
+
+
+
+
+
+ 1.8
+ true
+ Format of documents including word processor, spreadsheet and presentation.
+
+
+ Document format
+
+
+
+
+
+
+
+
+
+ 1.8
+ Portable Document Format.
+
+
+ PDF
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.9
+ true
+ Format used for images and image metadata.
+
+
+ Image format
+
+
+
+
+
+
+
+
+
+ 1.9
+
+ Medical image format corresponding to the Digital Imaging and Communications in Medicine (DICOM) standard.
+
+
+ DICOM format
+
+
+
+
+
+
+
+
+
+ 1.9
+
+ nii
+ An open file format from the Neuroimaging Informatics Technology Initiative (NIfTI) commonly used to store brain imaging data obtained using Magnetic Resonance Imaging (MRI) methods.
+ NIFTI format
+ NIfTI-1 format
+
+
+ nii
+
+
+
+
+
+
+
+
+
+ 1.9
+
+ Text-based tagged file format for medical images generated using the MetaImage software package.
+ Metalmage format
+
+
+ mhd
+
+
+
+
+
+
+
+
+
+ 1.9
+
+ Nearly Raw Rasta Data format designed to support scientific visualisation and image processing involving N-dimensional raster data.
+
+
+ nrrd
+
+
+
+
+
+
+
+
+ 1.9
+ File format used for scripts written in the R programming language for execution within the R software environment, typically for statistical computation and graphics.
+
+
+ R file format
+
+
+
+
+
+
+
+
+ 1.9
+ File format used for scripts for the Statistical Package for the Social Sciences.
+
+
+ SPSS
+
+
+
+
+
+
+
+
+ 1.9
+
+ eml
+ mht
+ mhtml
+
+
+
+ MIME HTML format for Web pages, which can include external resources, including images, Flash animations and so on.
+ HTML email format
+ HTML email message format
+ MHT
+ MHT format
+ MHTML format
+ MIME HTML
+ MIME HTML format
+ eml
+ MIME multipart
+ MIME multipart format
+ MIME multipart message
+ MIME multipart message format
+
+
+ MHTML is not strictly an HTML format, it is encoded as an HTML email message (although with multipart/related instead of multipart/alternative). It, however, contains the main HTML block as its core, and thus it is for practical reasons included in EDAM as a specialisation of 'HTML'.
+ MHTML
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.10
+ Proprietary file format for (raw) BeadArray data used by genomewide profiling platforms from Illumina Inc. This format is output directly from the scanner and stores summary intensities for each probe-type on an array.
+
+
+ IDAT
+
+
+
+
+
+
+
+
+
+ 1.10
+
+ Joint Picture Group file format for lossy graphics file.
+ JPEG
+ jpeg
+
+
+ Sequence of segments with markers. Begins with byte of 0xFF and follows by marker type.
+ JPG
+
+
+
+
+
+
+
+
+
+ 1.10
+ Reporter Code Count-A data file (.csv) output by the Nanostring nCounter Digital Analyzer, which contains gene sample information, probe information and probe counts.
+
+
+ rcc
+
+
+
+
+
+
+
+
+ 1.11
+
+
+ ARFF (Attribute-Relation File Format) is an ASCII text file format that describes a list of instances sharing a set of attributes.
+
+
+ This file format is for machine learning.
+ arff
+
+
+
+
+
+
+
+
+
+ 1.11
+
+
+ AFG is a single text-based file assembly format that holds read and consensus information together.
+
+
+ afg
+
+
+
+
+
+
+
+
+
+ 1.11
+
+
+ The bedGraph format allows display of continuous-valued data in track format. This display type is useful for probability scores and transcriptome data.
+
+
+ Holds a tab-delimited chromosome /start /end / datavalue dataset.
+ bedgraph
+
+
+
+
+
+
+
+
+ 1.11
+
+
+ Browser Extensible Data (BED) format of sequence annotation track that strictly does not contain non-standard fields beyond the first 3 columns.
+
+
+ Galaxy allows BED files to contain non-standard fields beyond the first 3 columns, some other implementations do not.
+ bedstrict
+
+
+
+
+
+
+
+
+ 1.11
+
+
+ BED file format where each feature is described by chromosome, start, end, name, score, and strand.
+
+
+ Tab delimited data in strict BED format - no non-standard columns allowed; column count forced to 6
+ bed6
+
+
+
+
+
+
+
+
+ 1.11
+
+
+ A BED file where each feature is described by all twelve columns.
+
+
+ Tab delimited data in strict BED format - no non-standard columns allowed; column count forced to 12
+ bed12
+
+
+
+
+
+
+
+
+
+ 1.11
+
+
+ Tabular format of chromosome names and sizes used by Galaxy.
+
+
+ Galaxy allows BED files to contain non-standard fields beyond the first 3 columns, some other implementations do not.
+ chrominfo
+
+
+
+
+
+
+
+
+
+ 1.11
+
+
+ Custom Sequence annotation track format used by Galaxy.
+
+
+ Used for tracks/track views within galaxy.
+ customtrack
+
+
+
+
+
+
+
+
+
+ 1.11
+
+
+ Color space FASTA format sequence variant.
+
+
+ FASTA format extended for color space information.
+ csfasta
+
+
+
+
+
+
+
+
+
+ 1.11
+
+
+ HDF5 is a data model, library, and file format for storing and managing data, based on Hierarchical Data Format (HDF).
+ h5
+
+
+ An HDF5 file appears to the user as a directed graph. The nodes of this graph are the higher-level HDF5 objects that are exposed by the HDF5 APIs: Groups, Datasets, Named datatypes. Currently supported by the Python MDTraj package.
+ HDF5 is the new version, according to the HDF group, a completely different technology (https://support.hdfgroup.org/products/hdf4/ compared to HDF.
+ HDF5
+
+
+
+
+
+
+
+
+
+ 1.11
+
+ A versatile bitmap format.
+
+
+ The TIFF format is perhaps the most versatile and diverse bitmap format in existence. Its extensible nature and support for numerous data compression schemes allow developers to customize the TIFF format to fit any peculiar data storage needs.
+ TIFF
+
+
+
+
+
+
+
+
+
+ 1.11
+
+ Standard bitmap storage format in the Microsoft Windows environment.
+
+
+ Although it is based on Windows internal bitmap data structures, it is supported by many non-Windows and non-PC applications.
+ BMP
+
+
+
+
+
+
+
+
+
+ 1.11
+
+ IM is a format used by LabEye and other applications based on the IFUNC image processing library.
+
+
+ IFUNC library reads and writes most uncompressed interchange versions of this format.
+ im
+
+
+
+
+
+
+
+
+
+ 1.11
+
+ pcd
+ Photo CD format, which is the highest resolution format for images on a CD.
+
+
+ PCD was developed by Kodak. A PCD file contains five different resolution (ranging from low to high) of a slide or film negative. Due to it PCD is often used by many photographers and graphics professionals for high-end printed applications.
+ pcd
+
+
+
+
+
+
+
+
+
+ 1.11
+
+ PCX is an image file format that uses a simple form of run-length encoding. It is lossless.
+
+
+ pcx
+
+
+
+
+
+
+
+
+
+ 1.11
+
+ The PPM format is a lowest common denominator color image file format.
+
+
+ ppm
+
+
+
+
+
+
+
+
+
+ 1.11
+
+ PSD (Photoshop Document) is a proprietary file that allows the user to work with the images' individual layers even after the file has been saved.
+
+
+ psd
+
+
+
+
+
+
+
+
+
+ 1.11
+
+ X BitMap is a plain text binary image format used by the X Window System used for storing cursor and icon bitmaps used in the X GUI.
+
+
+ The XBM format was replaced by XPM for X11 in 1989.
+ xbm
+
+
+
+
+
+
+
+
+
+ 1.11
+
+ X PixMap (XPM) is an image file format used by the X Window System, it is intended primarily for creating icon pixmaps, and supports transparent pixels.
+
+
+ Sequence of segments with markers. Begins with byte of 0xFF and follows by marker type.
+ xpm
+
+
+
+
+
+
+
+
+
+ 1.11
+
+ RGB file format is the native raster graphics file format for Silicon Graphics workstations.
+
+
+ rgb
+
+
+
+
+
+
+
+
+
+ 1.11
+
+ The PBM format is a lowest common denominator monochrome file format. It serves as the common language of a large family of bitmap image conversion filters.
+
+
+ pbm
+
+
+
+
+
+
+
+
+
+ 1.11
+
+ The PGM format is a lowest common denominator grayscale file format.
+
+
+ It is designed to be extremely easy to learn and write programs for.
+ pgm
+
+
+
+
+
+
+
+
+
+ 1.11
+
+ png
+ PNG is a file format for image compression.
+
+
+ It iis expected to replace the Graphics Interchange Format (GIF).
+ PNG
+
+
+
+
+
+
+
+
+
+ 1.11
+
+ Scalable Vector Graphics (SVG) is an XML-based vector image format for two-dimensional graphics with support for interactivity and animation.
+ Scalable Vector Graphics
+
+
+ The SVG specification is an open standard developed by the World Wide Web Consortium (W3C) since 1999.
+ SVG
+
+
+
+
+
+
+
+
+
+ 1.11
+
+ Sun Raster is a raster graphics file format used on SunOS by Sun Microsystems.
+
+
+ The SVG specification is an open standard developed by the World Wide Web Consortium (W3C) since 1999.
+ rast
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.11
+ true
+ Textual report format for sequence quality for reports from sequencing machines.
+
+
+ Sequence quality report format (text)
+
+
+
+
+
+
+
+
+
+
+ 1.11
+ http://en.wikipedia.org/wiki/Phred_quality_score
+ FASTQ format subset for Phred sequencing quality score data only (no sequences).
+
+
+ Phred quality scores are defined as a property which is logarithmically related to the base-calling error probabilities.
+ qual
+
+
+
+
+
+
+
+
+
+ 1.11
+ FASTQ format subset for Phred sequencing quality score data only (no sequences) for Solexa/Illumina 1.0 format.
+
+
+ Solexa/Illumina 1.0 format can encode a Solexa/Illumina quality score from -5 to 62 using ASCII 59 to 126 (although in raw read data Solexa scores from -5 to 40 only are expected)
+ qualsolexa
+
+
+
+
+
+
+
+
+
+ 1.11
+ http://en.wikipedia.org/wiki/Phred_quality_score
+ FASTQ format subset for Phred sequencing quality score data only (no sequences) from Illumina 1.5 and before Illumina 1.8.
+
+
+ Starting in Illumina 1.5 and before Illumina 1.8, the Phred scores 0 to 2 have a slightly different meaning. The values 0 and 1 are no longer used and the value 2, encoded by ASCII 66 "B", is used also at the end of reads as a Read Segment Quality Control Indicator.
+ qualillumina
+
+
+
+
+
+
+
+
+ 1.11
+ http://en.wikipedia.org/wiki/Phred_quality_score
+ FASTQ format subset for Phred sequencing quality score data only (no sequences) for SOLiD data.
+
+
+ For SOLiD data, the sequence is in color space, except the first position. The quality values are those of the Sanger format.
+ qualsolid
+
+
+
+
+
+
+
+
+ 1.11
+ http://en.wikipedia.org/wiki/Phred_quality_score
+ FASTQ format subset for Phred sequencing quality score data only (no sequences) from 454 sequencers.
+
+
+ qual454
+
+
+
+
+
+
+
+
+ 1.11
+
+
+ Human ENCODE peak format.
+
+
+ Format that covers both the broad peak format and narrow peak format from ENCODE.
+ ENCODE peak format
+
+
+
+
+
+
+
+
+ 1.11
+
+
+ Human ENCODE narrow peak format.
+
+
+ Format that covers both the broad peak format and narrow peak format from ENCODE.
+ ENCODE narrow peak format
+
+
+
+
+
+
+
+
+ 1.11
+
+
+ Human ENCODE broad peak format.
+
+
+ ENCODE broad peak format
+
+
+
+
+
+
+
+
+
+ 1.11
+
+ bgz
+ Blocked GNU Zip format.
+
+
+ BAM files are compressed using a variant of GZIP (GNU ZIP), into a format called BGZF (Blocked GNU Zip Format).
+ bgzip
+
+
+
+
+
+
+
+
+
+ 1.11
+
+
+ TAB-delimited genome position file index format.
+
+
+ tabix
+
+
+
+
+
+
+
+
+ 1.11
+ true
+ Data format for graph data.
+
+
+ Graph format
+
+
+
+
+
+
+
+
+ 1.11
+
+ XML-based format used to store graph descriptions within Galaxy.
+
+
+ xgmml
+
+
+
+
+
+
+
+
+ 1.11
+
+ SIF (simple interaction file) Format - a network/pathway format used for instance in cytoscape.
+
+
+ sif
+
+
+
+
+
+
+
+
+
+ 1.11
+ MS Excel spreadsheet format consisting of a set of XML documents stored in a ZIP-compressed file.
+
+
+ xlsx
+
+
+
+
+
+
+
+
+ 1.11
+
+ Data format used by the SQLite database.
+
+
+ SQLite format
+
+
+
+
+
+
+
+
+
+ 1.11
+
+ Data format used by the SQLite database conformant to the Gemini schema.
+
+
+ Gemini SQLite format
+
+
+
+
+
+
+
+
+ 1.11
+ Duplicate of http://edamontology.org/format_3326
+ 1.20
+
+
+ Format of a data index of some type.
+
+
+ Index format
+ true
+
+
+
+
+
+
+
+
+
+ 1.11
+ An index of a genome database, indexed for use by the snpeff tool.
+
+
+ snpeffdb
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.12
+
+ Binary format used by MATLAB files to store workspace variables.
+ .mat file format
+ MAT file format
+ MATLAB file format
+
+
+ MAT
+
+
+
+
+
+
+
+
+
+
+ 1.12
+
+ Format used by netCDF software library for writing and reading chromatography-MS data files. Also used to store trajectory atom coordinates information, such as the ones obtained by Molecular Dynamics simulations.
+ ANDI-MS
+
+
+ Network Common Data Form (NetCDF) library is supported by AMBER MD package from version 9.
+ netCDF
+
+
+
+
+
+
+
+
+ 1.12
+ mgf
+ Mascot Generic Format. Encodes multiple MS/MS spectra in a single file.
+
+
+ Files includes *m*/*z*, intensity pairs separated by headers; headers can contain a bit more information, including search engine instructions.
+ MGF
+
+
+
+
+
+
+
+
+ 1.12
+ Spectral data format file where each spectrum is written to a separate file.
+
+
+ Each file contains one header line for the known or assumed charge and the mass of the precursor peptide ion, calculated from the measured *m*/*z* and the charge. This one line was then followed by all the *m*/*z*, intensity pairs that represent the spectrum.
+ dta
+
+
+
+
+
+
+
+
+ 1.12
+ Spectral data file similar to dta.
+
+
+ Differ from .dta only in subtleties of the header line format and content and support the added feature of being able to.
+ pkl
+
+
+
+
+
+
+
+
+ 1.12
+ https://dx.doi.org/10.1038%2Fnbt1031
+ Common file format for proteomics mass spectrometric data developed at the Seattle Proteome Center/Institute for Systems Biology.
+
+
+ mzXML
+
+
+
+
+
+
+
+
+
+ 1.12
+ http://sashimi.sourceforge.net/schema_revision/pepXML/pepXML_v118.xsd
+ Open data format for the storage, exchange, and processing of peptide sequence assignments of MS/MS scans, intended to provide a common data output format for many different MS/MS search engines and subsequent peptide-level analyses.
+
+
+ pepXML
+
+
+
+
+
+
+
+
+
+ 1.12
+
+ Graphical Pathway Markup Language (GPML) is an XML format used for exchanging biological pathways.
+
+
+ GPML
+
+
+
+
+
+
+
+
+
+ 1.12
+
+ oxlicg
+
+
+
+ A list of k-mers and their occurrences in a dataset. Can also be used as an implicit De Bruijn graph.
+ K-mer countgraph
+
+
+
+
+
+
+
+
+
+
+ 1.13
+
+
+ mzTab is a tab-delimited format for mass spectrometry-based proteomics and metabolomics results.
+
+
+ mzTab
+
+
+
+
+
+
+
+
+
+
+ 1.13
+
+ imzml
+
+ imzML metadata is a data format for mass spectrometry imaging metadata.
+
+
+ imzML data are recorded in 2 files: '.imzXML' is a metadata XML file based on mzML by HUPO-PSI, and '.ibd' is a binary file containing the mass spectra. This entry is for the metadata XML file
+ imzML metadata file
+
+
+
+
+
+
+
+
+
+
+ 1.13
+
+
+ qcML is an XML format for quality-related data of mass spectrometry and other high-throughput measurements.
+
+
+ The focus of qcML is towards mass spectrometry based proteomics, but the format is suitable for metabolomics and sequencing as well.
+ qcML
+
+
+
+
+
+
+
+
+
+
+ 1.13
+
+
+ PRIDE XML is an XML format for mass spectra, peptide and protein identifications, and metadata about a corresponding measurement, sample, experiment.
+
+
+ PRIDE XML
+
+
+
+
+
+
+
+
+
+
+
+ 1.13
+
+
+ Simulation Experiment Description Markup Language (SED-ML) is an XML format for encoding simulation setups, according to the MIASE (Minimum Information About a Simulation Experiment) requirements.
+
+
+ SED-ML
+
+
+
+
+
+
+
+
+
+
+
+ 1.13
+
+
+ Open Modeling EXchange format (OMEX) is a ZIPped format for encapsulating all information necessary for a modeling and simulation project in systems biology.
+
+
+ An OMEX file is a ZIP container that includes a manifest file, listing the content of the archive, an optional metadata file adding information about the archive and its content, and the files describing the model. OMEX is one of the standardised formats within COMBINE (Computational Modeling in Biology Network).
+ COMBINE OMEX
+
+
+
+
+
+
+
+
+
+
+ 1.13
+
+
+ The Investigation / Study / Assay (ISA) tab-delimited (TAB) format incorporates metadata from experiments employing a combination of technologies.
+
+
+ ISA-TAB is based on MAGE-TAB. Other than tabular, the ISA model can also be represented in RDF, and in JSON (compliable with a set of defined JSON Schemata).
+ ISA-TAB
+
+
+
+
+
+
+
+
+
+
+ 1.13
+
+
+ SBtab is a tabular format for biochemical network models.
+
+
+ SBtab
+
+
+
+
+
+
+
+
+
+ 1.13
+
+
+ Biological Connection Markup Language (BCML) is an XML format for biological pathways.
+
+
+ BCML
+
+
+
+
+
+
+
+
+
+ 1.13
+
+
+ Biological Dynamics Markup Language (BDML) is an XML format for quantitative data describing biological dynamics.
+
+
+ BDML
+
+
+
+
+
+
+
+
+ 1.13
+
+
+ Biological Expression Language (BEL) is a textual format for representing scientific findings in life sciences in a computable form.
+
+
+ BEL
+
+
+
+
+
+
+
+
+
+ 1.13
+
+
+ SBGN-ML is an XML format for Systems Biology Graphical Notation (SBGN) diagrams of biological pathways or networks.
+
+
+ SBGN-ML
+
+
+
+
+
+
+
+
+
+ 1.13
+
+ agp
+
+ AGP is a tabular format for a sequence assembly (a contig, a scaffold/supercontig, or a chromosome).
+
+
+ AGP
+
+
+
+
+
+
+
+
+ 1.13
+ PostScript format.
+ PostScript
+
+
+ PS
+
+
+
+
+
+
+
+
+ 1.13
+
+ sra
+ SRA archive format (SRA) is the archive format used for input to the NCBI Sequence Read Archive.
+ SRA
+ SRA archive format
+
+
+ SRA format
+
+
+
+
+
+
+
+
+ 1.13
+
+ VDB ('vertical database') is the native format used for export from the NCBI Sequence Read Archive.
+ SRA native format
+
+
+ VDB
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.13
+
+ Index file format used by the samtools package to index TAB-delimited genome position files.
+
+
+ Tabix index file format
+
+
+
+
+
+
+
+
+ 1.13
+ A five-column, tab-delimited table of feature locations and qualifiers for importing annotation into an existing Sequin submission (an NCBI tool for submitting and updating GenBank entries).
+
+
+ Sequin format
+
+
+
+
+
+
+
+
+ 1.14
+ Proprietary mass-spectrometry format of Thermo Scientific's ProteomeDiscoverer software.
+ Magellan storage file format
+
+
+ This format corresponds to an SQLite database, and you can look into the files with e.g. SQLiteStudio3. There are also some readers (http://doi.org/10.1021/pr2005154) and converters (http://doi.org/10.1016/j.jprot.2015.06.015) for this format available, which re-engineered the database schema, but there is no official DB schema specification of Thermo Scientific for the format.
+ MSF
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.14
+ true
+ Data format for biodiversity data.
+
+
+ Biodiversity data format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.14
+
+ Exchange format of the Access to Biological Collections Data (ABCD) Schema; a standard for the access to and exchange of data about specimens and observations (primary biodiversity data).
+ ABCD
+
+
+ ABCD format
+
+
+
+
+
+
+
+
+
+ 1.14
+ Tab-delimited text files of GenePattern that contain a column for each sample, a row for each gene, and an expression value for each gene in each sample.
+ GCT format
+ Res format
+
+
+ GCT/Res format
+
+
+
+
+
+
+
+
+
+ 1.14
+ wiff
+ Mass spectrum file format from QSTAR and QTRAP instruments (ABI/Sciex).
+ wiff
+
+
+ WIFF format
+
+
+
+
+
+
+
+
+
+ 1.14
+
+ Output format used by X! series search engines that is based on the XML language BIOML.
+
+
+ X!Tandem XML
+
+
+
+
+
+
+
+
+
+ 1.14
+ Proprietary file format for mass spectrometry data from Thermo Scientific.
+
+
+ Proprietary format for which documentation is not available.
+ Thermo RAW
+
+
+
+
+
+
+
+
+
+ 1.14
+
+ "Raw" result file from Mascot database search.
+
+
+ Mascot .dat file
+
+
+
+
+
+
+
+
+
+ 1.14
+
+ Format of peak list files from Andromeda search engine (MaxQuant) that consist of arbitrarily many spectra.
+ MaxQuant APL
+
+
+ MaxQuant APL peaklist format
+
+
+
+
+
+
+
+
+ 1.14
+
+ Synthetic Biology Open Language (SBOL) is an XML format for the specification and exchange of biological design information in synthetic biology.
+
+
+ SBOL introduces a standardised format for the electronic exchange of information on the structural and functional aspects of biological designs.
+ SBOL
+
+
+
+
+
+
+
+
+ 1.14
+
+ PMML uses XML to represent mining models. The structure of the models is described by an XML Schema.
+
+
+ One or more mining models can be contained in a PMML document.
+ PMML
+
+
+
+
+
+
+
+
+
+ 1.14
+
+ Image file format used by the Open Microscopy Environment (OME).
+
+
+ An OME-TIFF dataset consists of one or more files in standard TIFF or BigTIFF format, with the file extension .ome.tif or .ome.tiff, and an identical (or in the case of multiple files, nearly identical) string of OME-XML metadata embedded in the ImageDescription tag of each file's first IFD (Image File Directory). BigTIFF file extensions are also permitted, with the file extension .ome.tf2, .ome.tf8 or .ome.btf, but note these file extensions are an addition to the original specification, and software using an older version of the specification may not be able to handle these file extensions.
+ OME develops open-source software and data format standards for the storage and manipulation of biological microscopy data. It is a joint project between universities, research establishments, industry and the software development community.
+ OME-TIFF
+
+
+
+
+
+
+
+
+ 1.14
+
+ The LocARNA PP format combines sequence or alignment information and (respectively, single or consensus) ensemble probabilities into an PP 2.0 record.
+
+
+ Format for multiple aligned or single sequences together with the probabilistic description of the (consensus) RNA secondary structure ensemble by probabilities of base pairs, base pair stackings, and base pairs and unpaired bases in the loop of base pairs.
+ LocARNA PP
+
+
+
+
+
+
+
+
+ 1.14
+
+ Input format used by the Database of Genotypes and Phenotypes (dbGaP).
+
+
+ The Database of Genotypes and Phenotypes (dbGaP) is a National Institutes of Health (NIH) sponsored repository charged to archive, curate and distribute information produced by studies investigating the interaction of genotype and phenotype.
+ dbGaP format
+
+
+
+
+
+
+
+
+
+
+ 1.15
+
+ biom
+ The BIological Observation Matrix (BIOM) is a format for representing biological sample by observation contingency tables in broad areas of comparative omics. The primary use of this format is to represent OTU tables and metagenome tables.
+ BIological Observation Matrix format
+ biom
+
+
+ BIOM is a recognised standard for the Earth Microbiome Project, and is a project supported by Genomics Standards Consortium. Supported in QIIME, Mothur, MEGAN, etc.
+ BIOM format
+
+
+
+
+
+
+
+
+
+ 1.15
+
+
+ A format for storage, exchange, and processing of protein identifications created from ms/ms-derived peptide sequence data.
+
+
+ No human-consumable information about this format is available (see http://tools.proteomecenter.org/wiki/index.php?title=Formats:protXML).
+ protXML
+ http://doi.org/10.1038/msb4100024
+ http://sashimi.sourceforge.net/schema_revision/protXML/protXML_v3.xsd
+
+
+
+
+
+
+
+
+
+
+ 1.15
+ true
+ A linked data format enables publishing structured data as linked data (Linked Data), so that the data can be interlinked and become more useful through semantic queries.
+ Semantic Web format
+
+
+ Linked data format
+
+
+
+
+
+
+
+
+
+
+
+ 1.15
+
+ jsonld
+
+
+ JSON-LD, or JavaScript Object Notation for Linked Data, is a method of encoding Linked Data using JSON.
+ JavaScript Object Notation for Linked Data
+ jsonld
+
+
+ JSON-LD
+
+
+
+
+
+
+
+
+
+ 1.15
+
+ yaml
+ yml
+
+ YAML (YAML Ain't Markup Language) is a human-readable tree-structured data serialisation language.
+ YAML Ain't Markup Language
+ yml
+
+
+ Data in YAML format can be serialised into text, or binary format.
+ YAML version 1.2 is a superset of JSON; prior versions were "not strictly compatible".
+ YAML
+
+
+
+
+
+
+
+
+
+ 1.16
+ Tabular data represented as values in a text file delimited by some character.
+ Delimiter-separated values
+ Tabular format
+
+
+ DSV
+
+
+
+
+
+
+
+
+
+ 1.16
+ csv
+
+
+
+ Tabular data represented as comma-separated values in a text file.
+ Comma-separated values
+
+
+ CSV
+
+
+
+
+
+
+
+
+
+ 1.16
+ out
+ "Raw" result file from SEQUEST database search.
+
+
+ SEQUEST .out file
+
+
+
+
+
+
+
+
+
+ 1.16
+ http://ftp.mi.fu-berlin.de/pub/OpenMS/release1.9-documentation/html/classOpenMS_1_1IdXMLFile.html
+ http://open-ms.sourceforge.net/schemas/
+ XML file format for files containing information about peptide identifications from mass spectrometry data analysis carried out with OpenMS.
+
+
+ idXML
+
+
+
+
+
+
+
+
+ 1.16
+ Data table formatted such that it can be passed/streamed within the KNIME platform.
+
+
+ KNIME datatable format
+
+
+
+
+
+
+
+
+
+
+ 1.16
+
+ UniProtKB XML sequence features format is an XML format available for downloading UniProt entries.
+ UniProt XML
+ UniProt XML format
+ UniProtKB XML format
+
+
+ UniProtKB XML
+
+
+
+
+
+
+
+
+
+ 1.16
+
+ UniProtKB RDF sequence features format is an RDF format available for downloading UniProt entries (in RDF/XML).
+ UniProt RDF
+ UniProt RDF format
+ UniProt RDF/XML
+ UniProt RDF/XML format
+ UniProtKB RDF format
+ UniProtKB RDF/XML
+ UniProtKB RDF/XML format
+
+
+ UniProtKB RDF
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.16
+
+
+
+
+
+ BioJSON is a BioXSD-schema-based JSON format of sequence-based data and some other common data - sequence records, alignments, feature records, references to resources, and more - optimised for integrative bioinformatics, web applications and APIs, and object-oriented programming.
+ BioJSON (BioXSD data model)
+ BioJSON format (BioXSD)
+ BioXSD BioJSON
+ BioXSD BioJSON format
+ BioXSD JSON
+ BioXSD JSON format
+ BioXSD in JSON
+ BioXSD in JSON format
+ BioXSD+JSON
+ BioXSD/GTrack BioJSON
+ BioXSD|BioJSON|BioYAML BioJSON
+ BioXSD|GTrack BioJSON
+
+
+ Work in progress. 'BioXSD' belongs to the 'BioXSD|GTrack' ecosystem of generic formats. 'BioJSON' is the JSON format based on the common, unified 'BioXSD data model', a.k.a. 'BioXSD|BioJSON|BioYAML'.
+ BioJSON (BioXSD)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.16
+
+
+
+
+
+ BioYAML is a BioXSD-schema-based YAML format of sequence-based data and some other common data - sequence records, alignments, feature records, references to resources, and more - optimised for integrative bioinformatics, web APIs, human readability and editing, and object-oriented programming.
+ BioXSD BioYAML
+ BioXSD BioYAML format
+ BioXSD YAML
+ BioXSD YAML format
+ BioXSD in YAML
+ BioXSD in YAML format
+ BioXSD+YAML
+ BioXSD/GTrack BioYAML
+ BioXSD|BioJSON|BioYAML BioYAML
+ BioXSD|GTrack BioYAML
+ BioYAML (BioXSD data model)
+ BioYAML (BioXSD)
+ BioYAML format
+ BioYAML format (BioXSD)
+
+
+ Work in progress. 'BioXSD' belongs to the 'BioXSD|GTrack' ecosystem of generic formats. 'BioYAML' is the YAML format based on the common, unified 'BioXSD data model', a.k.a. 'BioXSD|BioJSON|BioYAML'.
+ BioYAML
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.16
+
+
+ BioJSON is a JSON format of single multiple sequence alignments, with their annotations, features, and custom visualisation and application settings for the Jalview workbench.
+ BioJSON format (Jalview)
+ JSON (Jalview)
+ JSON format (Jalview)
+ Jalview BioJSON
+ Jalview BioJSON format
+ Jalview JSON
+ Jalview JSON format
+
+
+ BioJSON (Jalview)
+
+
+
+
+
+
+
+
+
+
+
+ 1.16
+
+
+
+
+
+
+ GSuite is a tabular format for collections of genome or sequence feature tracks, suitable for integrative multi-track analysis. GSuite contains links to genome/sequence tracks, with additional metadata.
+ BioXSD/GTrack GSuite
+ BioXSD|GTrack GSuite
+ GSuite (GTrack ecosystem of formats)
+ GSuite format
+ GTrack|BTrack|GSuite GSuite
+ GTrack|GSuite|BTrack GSuite
+
+
+ 'GSuite' belongs to the 'BioXSD|GTrack' ecosystem of generic formats, and particular to its subset, the 'GTrack ecosystem' (GTrack, GSuite, BTrack). 'GSuite' is the tabular format for an annotated collection of individual GTrack files.
+ GSuite
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.16
+
+ BTrack is an HDF5-based binary format for genome or sequence feature tracks and their collections, suitable for integrative multi-track analysis. BTrack is a binary, compressed alternative to the GTrack and GSuite formats.
+ BTrack (GTrack ecosystem of formats)
+ BTrack format
+ BioXSD/GTrack BTrack
+ BioXSD|GTrack BTrack
+ GTrack|BTrack|GSuite BTrack
+ GTrack|GSuite|BTrack BTrack
+
+
+ 'BTrack' belongs to the 'BioXSD|GTrack' ecosystem of generic formats, and particular to its subset, the 'GTrack ecosystem' (GTrack, GSuite, BTrack). 'BTrack' is the binary, optionally compressed HDF5-based version of the GTrack and GSuite formats.
+ BTrack
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.16
+
+
+
+
+
+
+
+
+
+
+ The FAO/Bioversity/IPGRI Multi-Crop Passport Descriptors (MCPD) is an international standard format for exchange of germplasm information.
+ Bioversity MCPD
+ FAO MCPD
+ IPGRI MCPD
+ MCPD V.1
+ MCPD V.2
+ MCPD format
+ Multi-Crop Passport Descriptors
+ Multi-Crop Passport Descriptors format
+
+
+ Multi-Crop Passport Descriptors is a format available in 2 successive versions, V.1 (FAO/IPGRI 2001) and V.2 (FAO/Bioversity 2012).
+ MCPD
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.16
+ true
+ Data format of an annotated text, e.g. with recognised entities, concepts, and relations.
+
+
+ Annotated text format
+
+
+
+
+
+
+
+
+
+
+ 1.16
+
+
+ JSON format of annotated scientific text used by PubAnnotations and other tools.
+
+
+ PubAnnotation format
+
+
+
+
+
+
+
+
+
+
+ 1.16
+
+
+ BioC is a standardised XML format for sharing and integrating text data and annotations.
+
+
+ BioC
+
+
+
+
+
+
+
+
+
+
+
+ 1.16
+
+
+ Native textual export format of annotated scientific text from PubTator.
+
+
+ PubTator format
+
+
+
+
+
+
+
+
+
+
+ 1.16
+
+
+ A format of text annotation using the linked-data Open Annotation Data Model, serialised typically in RDF or JSON-LD.
+
+
+ Open Annotation format
+
+
+
+
+
+
+
+
+
+
+ 1.16
+
+
+
+
+
+
+
+
+
+
+
+
+ A family of similar formats of text annotation, used by BRAT and other tools, known as BioNLP Shared Task format (BioNLP 2009 Shared Task on Event Extraction, BioNLP Shared Task 2011, BioNLP Shared Task 2013), BRAT format, BRAT standoff format, and similar.
+ BRAT format
+ BRAT standoff format
+
+
+ BioNLP Shared Task format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.16
+ true
+ A query language (format) for structured database queries.
+ Query format
+
+
+ Query language
+
+
+
+
+
+
+
+
+ 1.16
+ sql
+
+
+
+ SQL (Structured Query Language) is the de-facto standard query language (format of queries) for querying and manipulating data in relational databases.
+ Structured Query Language
+
+
+ SQL
+
+
+
+
+
+
+
+
+
+ 1.16
+
+ xq
+ xquery
+ xqy
+
+ XQuery (XML Query) is a query language (format of queries) for querying and manipulating structured and unstructured data, usually in the form of XML, text, and with vendor-specific extensions for other data formats (JSON, binary, etc.).
+ XML Query
+ xq
+ xqy
+
+
+ XQuery
+
+
+
+
+
+
+
+
+
+ 1.16
+
+
+ SPARQL (SPARQL Protocol and RDF Query Language) is a semantic query language for querying and manipulating data stored in Resource Description Framework (RDF) format.
+ SPARQL Protocol and RDF Query Language
+
+
+ SPARQL
+
+
+
+
+
+
+
+
+
+ 1.17
+ XML format for XML Schema.
+
+
+ xsd
+
+
+
+
+
+
+
+
+
+ 1.20
+
+ XMFA format stands for eXtended Multi-FastA format and is used to store collinear sub-alignments that constitute a single genome alignment.
+ eXtended Multi-FastA format
+
+
+ XMFA
+
+
+
+
+
+
+
+
+
+
+ 1.20
+
+ The GEN file format contains genetic data and describes SNPs.
+ Genotype file format
+
+
+ GEN
+
+
+
+
+
+
+
+
+ 1.20
+
+ The SAMPLE file format contains information about each individual i.e. individual IDs, covariates, phenotypes and missing data proportions, from a GWAS study.
+
+
+ SAMPLE file format
+
+
+
+
+
+
+
+
+
+ 1.20
+
+ SDF is one of a family of chemical-data file formats developed by MDL Information Systems; it is intended especially for structural information.
+
+
+ SDF
+
+
+
+
+
+
+
+
+
+ 1.20
+
+ An MDL Molfile is a file format for holding information about the atoms, bonds, connectivity and coordinates of a molecule.
+
+
+ Molfile
+
+
+
+
+
+
+
+
+
+ 1.20
+
+ Complete, portable representation of a SYBYL molecule. ASCII file which contains all the information needed to reconstruct a SYBYL molecule.
+
+
+ Mol2
+
+
+
+
+
+
+
+
+
+ 1.20
+
+ format for the LaTeX document preparation system.
+ LaTeX format
+
+
+ uses the TeX typesetting program format
+ latex
+
+
+
+
+
+
+
+
+
+ 1.20
+
+ Tab-delimited text file format used by Eland - the read-mapping program distributed by Illumina with its sequencing analysis pipeline - which maps short Solexa sequence reads to the human reference genome.
+ ELAND
+ eland
+
+
+ ELAND format
+
+
+
+
+
+
+
+
+ 1.20
+
+
+ Phylip multiple alignment sequence format, less stringent than PHYLIP format.
+ PHYLIP Interleaved format
+
+
+ It differs from Phylip Format (format_1997) on length of the ID sequence. There no length restrictions on the ID, but whitespaces aren't allowed in the sequence ID/Name because one space separates the longest ID and the beginning of the sequence. Sequences IDs must be padded to the longest ID length.
+ Relaxed PHYLIP Interleaved
+
+
+
+
+
+
+
+
+ 1.20
+
+
+ Phylip multiple alignment sequence format, less stringent than PHYLIP sequential format (format_1998).
+ Relaxed PHYLIP non-interleaved
+ Relaxed PHYLIP non-interleaved format
+ Relaxed PHYLIP sequential format
+
+
+ It differs from Phylip sequential format (format_1997) on length of the ID sequence. There no length restrictions on the ID, but whitespaces aren't allowed in the sequence ID/Name because one space separates the longest ID and the beginning of the sequence. Sequences IDs must be padded to the longest ID length.
+ Relaxed PHYLIP Sequential
+
+
+
+
+
+
+
+
+
+ 1.20
+
+ Default XML format of VisANT, containing all the network information.
+ VisANT xml
+ VisANT xml format
+
+
+ VisML
+
+
+
+
+
+
+
+
+
+ 1.20
+
+ GML (Graph Modeling Language) is a text file format supporting network data with a very easy syntax. It is used by Graphlet, Pajek, yEd, LEDA and NetworkX.
+ GML format
+
+
+ GML
+
+
+
+
+
+
+
+
+
+ 1.20
+
+
+ FASTG is a format for faithfully representing genome assemblies in the face of allelic polymorphism and assembly uncertainty.
+ FASTG assembly graph format
+
+
+ It is called FASTG, like FASTA, but the G stands for "graph".
+ FASTG
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.20
+ true
+ Data format for raw data from a nuclear magnetic resonance (NMR) spectroscopy experiment.
+ NMR peak assignment data format
+ NMR processed data format
+ NMR raw data format
+ Nuclear magnetic resonance spectroscopy data format
+ Processed NMR data format
+ Raw NMR data format
+
+
+ NMR data format
+
+
+
+
+
+
+
+
+
+ 1.20
+
+
+ nmrML is an MSI supported XML-based open access format for metabolomics NMR raw and processed spectral data. It is accompanies by an nmrCV (controlled vocabulary) to allow ontology-based annotations.
+
+
+ nmrML
+
+
+
+
+
+
+
+
+
+
+ 1.20
+
+ . proBAM is an adaptation of BAM (format_2572), which was extended to meet specific requirements entailed by proteomics data.
+
+
+ proBAM
+
+
+
+
+
+
+
+
+
+ 1.20
+
+ . proBED is an adaptation of BED (format_3003), which was extended to meet specific requirements entailed by proteomics data.
+
+
+ proBED
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.20
+ true
+ Data format for raw microarray data.
+ Microarray data format
+
+
+ Raw microarray data format
+
+
+
+
+
+
+
+
+
+ 1.20
+
+ GenePix Results (GPR) text file format developed by Axon Instruments that is used to save GenePix Results data.
+
+
+ GPR
+
+
+
+
+
+
+
+
+
+ 1.20
+ Binary format used by the ARB software suite.
+ ARB binary format
+
+
+ ARB
+
+
+
+
+
+
+
+
+
+ 1.20
+ http://ftp.mi.fu-berlin.de/pub/OpenMS/release1.9-documentation/html/classOpenMS_1_1ConsensusXMLFile.html
+ OpenMS format for grouping features in one map or across several maps.
+
+
+ consensusXML
+
+
+
+
+
+
+
+
+
+ 1.20
+ http://ftp.mi.fu-berlin.de/pub/OpenMS/release1.9-documentation/html/classOpenMS_1_1FeatureXMLFile.html
+ OpenMS format for quantitation results (LC/MS features).
+
+
+ featureXML
+
+
+
+
+
+
+
+
+
+ 1.20
+ http://www.psidev.info/mzdata-1_0_5-docs
+ Now deprecated data format of the HUPO Proteomics Standards Initiative. Replaced by mzML (format_3244).
+
+
+ mzData
+
+
+
+
+
+
+
+
+
+ 1.20
+ http://cruxtoolkit.sourceforge.net/tide-search.html
+ Format supported by the Tide tool for identifying peptides from tandem mass spectra.
+
+
+ TIDE TXT
+
+
+
+
+
+
+
+
+
+ 1.20
+ ftp://ftp.ncbi.nlm.nih.gov/blast/documents/NEWXML/ProposedBLASTXMLChanges.pdf
+ ftp://ftp.ncbi.nlm.nih.gov/blast/documents/NEWXML/xml2.pdf
+ http://www.ncbi.nlm.nih.gov/data_specs/schema/NCBI_BlastOutput2.mod.xsd
+ XML format as produced by the NCBI Blast package v2.
+
+
+ BLAST XML v2 results format
+
+
+
+
+
+
+
+
+
+ 1.20
+
+
+ Microsoft Powerpoint format.
+
+
+ pptx
+
+
+
+
+
+
+
+
+
+
+ 1.20
+
+ ibd
+
+ ibd is a data format for mass spectrometry imaging data.
+
+
+ imzML data is recorded in 2 files: '.imzXML' is a metadata XML file based on mzML by HUPO-PSI, and '.ibd' is a binary file containing the mass spectra.
+ ibd
+
+
+
+
+
+
+
+
+ 1.21
+ Data format used in Natural Language Processing.
+ Natural Language Processing format
+
+
+ NLP format
+
+
+
+
+
+
+
+
+
+ 1.21
+
+ XML input file format for BEAST Software (Bayesian Evolutionary Analysis Sampling Trees).
+
+
+ BEAST
+
+
+
+
+
+
+
+
+
+ 1.21
+
+ Chado-XML format is a direct mapping of the Chado relational schema into XML.
+
+
+ Chado-XML
+
+
+
+
+
+
+
+
+
+ 1.21
+
+ An alignment format generated by PRANK/PRANKSTER consisting of four elements: newick, nodes, selection and model.
+
+
+ HSAML
+
+
+
+
+
+
+
+
+
+ 1.21
+
+ Output xml file from the InterProScan sequence analysis application.
+
+
+ InterProScan XML
+
+
+
+
+
+
+
+
+
+ 1.21
+
+ The KEGG Markup Language (KGML) is an exchange format of the KEGG pathway maps, which is converted from internally used KGML+ (KGML+SVG) format.
+ KEGG Markup Language
+
+
+ KGML
+
+
+
+
+
+
+
+
+
+ 1.21
+ XML format for collected entries from bibliographic databases MEDLINE and PubMed.
+ MEDLINE XML
+
+
+ PubMed XML
+
+
+
+
+
+
+
+
+
+ 1.21
+
+ A set of XML compliant markup components for describing multiple sequence alignments.
+
+
+ MSAML
+
+
+
+
+
+
+
+
+
+
+ 1.21
+
+ OrthoXML is designed broadly to allow the storage and comparison of orthology data from any ortholog database. It establishes a structure for describing orthology relationships while still allowing flexibility for database-specific information to be encapsulated in the same format.
+
+
+ OrthoXML
+
+
+
+
+
+
+
+
+
+ 1.21
+
+ Tree structure of Protein Sequence Database Markup Language generated using Matra software.
+
+
+ PSDML
+
+
+
+
+
+
+
+
+
+ 1.21
+
+ SeqXML is an XML Schema to describe biological sequences, developed by the Stockholm Bioinformatics Centre.
+
+
+ SeqXML
+
+
+
+
+
+
+
+
+
+ 1.21
+
+ XML format for the UniParc database.
+
+
+ UniParc XML
+
+
+
+
+
+
+
+
+
+ 1.21
+
+ XML format for the UniRef reference clusters.
+
+
+ UniRef XML
+
+
+
+
+
+
+
+
+
+
+ 1.21
+
+
+
+
+ cwl
+
+
+
+ Common Workflow Language (CWL) format for description of command-line tools and workflows.
+ Common Workflow Language
+ CommonWL
+
+
+ CWL
+
+
+
+
+
+
+
+
+
+ 1.21
+ Proprietary file format for mass spectrometry data from Waters.
+
+
+ Proprietary format for which documentation is not available, but used by multiple tools.
+ Waters RAW
+
+
+
+
+
+
+
+
+
+ 1.21
+
+ A standardized file format for data exchange in mass spectrometry, initially developed for infrared spectrometry.
+
+
+ JCAMP-DX is an ASCII based format and therefore not very compact even though it includes standards for file compression.
+ JCAMP-DX
+
+
+
+
+
+
+
+
+
+ 1.21
+ An NLP format used for annotated textual documents.
+
+
+ NLP annotation format
+
+
+
+
+
+
+
+
+ 1.21
+ NLP format used by a specific type of corpus (collection of texts).
+
+
+ NLP corpus format
+
+
+
+
+
+
+
+
+
+
+ 1.21
+
+
+
+ mirGFF3 is a common format for microRNA data resulting from small-RNA RNA-Seq workflows.
+ miRTop format
+
+
+ mirGFF3 is a specialisation of GFF3; produced by small-RNA-Seq analysis workflows, usable and convertible with the miRTop API (https://mirtop.readthedocs.io/en/latest/), and consumable by tools for downstream analysis.
+ mirGFF3
+
+
+
+
+
+
+
+
+ 1.21
+ A "placeholder" concept for formats of annotated RNA data, including e.g. microRNA and RNA-Seq data.
+ RNA data format
+ miRNA data format
+ microRNA data format
+
+
+ RNA annotation format
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.22
+ true
+ File format to store trajectory information for a 3D structure .
+ CG trajectory formats
+ MD trajectory formats
+ NA trajectory formats
+ Protein trajectory formats
+
+
+ Formats differ on what they are able to store (coordinates, velocities, topologies) and how they are storing it (raw, compressed, textual, binary).
+ Trajectory format
+
+
+
+
+
+
+
+
+ 1.22
+ true
+ Binary file format to store trajectory information for a 3D structure .
+
+
+ Trajectory format (binary)
+
+
+
+
+
+
+
+
+ 1.22
+ true
+ Textual file format to store trajectory information for a 3D structure .
+
+
+ Trajectory format (text)
+
+
+
+
+
+
+
+
+
+ 1.22
+ HDF is the name of a set of file formats and libraries designed to store and organize large amounts of numerical data, originally developed at the National Center for Supercomputing Applications at the University of Illinois.
+
+
+ HDF is currently supported by many commercial and non-commercial software platforms such as Java, MATLAB/Scilab, Octave, Python and R.
+ HDF
+
+
+
+
+
+
+
+
+
+ 1.22
+ PCAZip format is a binary compressed file to store atom coordinates based on Essential Dynamics (ED) and Principal Component Analysis (PCA).
+
+
+ The compression is made projecting the Cartesian snapshots collected along the trajectory into an orthogonal space defined by the most relevant eigenvectors obtained by diagonalization of the covariance matrix (PCA). In the compression/decompression process, part of the original information is lost, depending on the final number of eigenvectors chosen. However, with a reasonable choice of the set of eigenvectors the compression typically reduces the trajectory file to less than one tenth of their original size with very acceptable loss of information. Compression with PCAZip can only be applied to unsolvated structures.
+ PCAzip
+
+
+
+
+
+
+
+
+
+ 1.22
+ Portable binary format for trajectories produced by GROMACS package.
+
+
+ XTC uses the External Data Representation (xdr) routines for writing and reading data which were created for the Unix Network File System (NFS). XTC files use a reduced precision (lossy) algorithm which works multiplying the coordinates by a scaling factor (typically 1000), so converting them to pm (GROMACS standard distance unit is nm). This allows an integer rounding of the values. Several other tricks are performed, such as making use of atom proximity information: atoms close in sequence are usually close in space (e.g. water molecules). That makes XTC format the most efficient in terms of disk usage, in most cases reducing by a factor of 2 the size of any other binary trajectory format.
+ XTC
+
+
+
+
+
+
+
+
+
+ 1.22
+ Trajectory Next Generation (TNG) is a format for storage of molecular simulation data. It is designed and implemented by the GROMACS development group, and it is called to be the substitute of the XTC format.
+ Trajectory Next Generation format
+
+
+ Fully architecture-independent format, regarding both endianness and the ability to mix single/double precision trajectories and I/O libraries. Self-sufficient, it should not require any other files for reading, and all the data should be contained in a single file for easy transport. Temporal compression of data, improving the compression rate of the previous XTC format. Possibility to store meta-data with information about the simulation. Direct access to a particular frame. Efficient parallel I/O.
+ TNG
+
+
+
+
+
+
+
+
+
+
+ 1.22
+ The XYZ chemical file format is widely supported by many programs, although many slightly different XYZ file formats coexist (Tinker XYZ, UniChem XYZ, etc.). Basic information stored for each atom in the system are x, y and z coordinates and atom element/atomic number.
+
+
+ XYZ files are structured in this way: First line contains the number of atoms in the file. Second line contains a title, comment, or filename. Remaining lines contain atom information. Each line starts with the element symbol, followed by x, y and z coordinates in angstroms separated by whitespace. Multiple molecules or frames can be contained within one file, so it supports trajectory storage. XYZ files can be directly represented by a molecular viewer, as they contain all the basic information needed to build the 3D model.
+ XYZ
+
+
+
+
+
+
+
+
+
+
+ 1.22
+
+ AMBER trajectory (also called mdcrd), with 10 coordinates per line and format F8.3 (fixed point notation with field width 8 and 3 decimal places).
+ AMBER trajectory format
+ inpcrd
+
+
+ mdcrd
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.22
+ true
+ Format of topology files; containing the static information of a structure molecular system that is needed for a molecular simulation.
+ CG topology format
+ MD topology format
+ NA topology format
+ Protein topology format
+
+
+ Many different file formats exist describing structural molecular topology. Typically, each MD package or simulation software works with their own implementation (e.g. GROMACS top, CHARMM psf, AMBER prmtop).
+ Topology format
+
+
+
+
+
+
+
+
+
+
+ 1.22
+
+ GROMACS MD package top textual files define an entire structure system topology, either directly, or by including itp files.
+
+
+ There is currently no tool available for conversion between GROMACS topology format and other formats, due to the internal differences in both approaches. There is, however, a method to convert small molecules parameterized with AMBER force-field into GROMACS format, allowing simulations of these systems with GROMACS MD package.
+ GROMACS top
+
+
+
+
+
+
+
+
+
+
+ 1.22
+
+ AMBER Prmtop file (version 7) is a structure topology text file divided in several sections designed to be parsed easily using simple Fortran code. Each section contains particular topology information, such as atom name, charge, mass, angles, dihedrals, etc.
+ AMBER Parm
+ AMBER Parm7
+ Parm7
+ Prmtop
+ Prmtop7
+
+
+ It can be modified manually, but as the size of the system increases, the hand-editing becomes increasingly complex. AMBER Parameter-Topology file format is used extensively by the AMBER software suite and is referred to as the Prmtop file for short.
+ version 7 is written to distinguish it from old versions of AMBER Prmtop. Similarly to HDF5, it is a completely different format, according to AMBER group: a drastic change to the file format occurred with the 2004 release of Amber 7 (http://ambermd.org/prmtop.pdf)
+ AMBER top
+
+
+
+
+
+
+
+
+
+
+ 1.22
+
+ X-Plor Protein Structure Files (PSF) are structure topology files used by NAMD and CHARMM molecular simulations programs. PSF files contain six main sections of interest: atoms, bonds, angles, dihedrals, improper dihedrals (force terms used to maintain planarity) and cross-terms.
+
+
+ The high similarity in the functional form of the two potential energy functions used by AMBER and CHARMM force-fields gives rise to the possible use of one force-field within the other MD engine. Therefore, the conversion of PSF files to AMBER Prmtop format is possible with the use of AMBER chamber (CHARMM - AMBER) program.
+ PSF
+
+
+
+
+
+
+
+
+
+
+
+ 1.22
+
+ GROMACS itp files (include topology) contain structure topology information, and are typically included in GROMACS topology files (GROMACS top). Itp files are used to define individual (or multiple) components of a topology as a separate file. This is particularly useful if there is a molecule that is used frequently, and also reduces the size of the system topology file, splitting it in different parts.
+
+
+ GROMACS itp files are used also to define position restrictions on the molecule, or to define the force field parameters for a particular ligand.
+ GROMACS itp
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.22
+ Format of force field parameter files, which store the set of parameters (charges, masses, radii, bond lengths, bond dihedrals, etc.) that are essential for the proper description and simulation of a molecular system.
+ Many different file formats exist describing force field parameters. Typically, each MD package or simulation software works with their own implementation (e.g. GROMACS itp, CHARMM rtf, AMBER off / frcmod).
+ FF parameter format
+
+
+
+
+
+
+
+
+
+ 1.22
+ Scripps Research Institute BinPos format is a binary formatted file to store atom coordinates.
+ Scripps Research Institute BinPos
+
+
+ It is basically a translation of the ASCII atom coordinate format to binary code. The only additional information stored is a magic number that identifies the BinPos format and the number of atoms per snapshot. The remainder is the chain of coordinates binary encoded. A drawback of this format is its architecture dependency. Integers and floats codification depends on the architecture, thus it needs to be converted if working in different platforms (little endian, big endian).
+ BinPos
+
+
+
+
+
+
+
+
+
+
+ 1.22
+
+ AMBER coordinate/restart file with 6 coordinates per line and decimal format F12.7 (fixed point notation with field width 12 and 7 decimal places).
+ restrt
+ rst7
+
+
+ RST
+
+
+
+
+
+
+
+
+
+
+ 1.22
+ Format of CHARMM Residue Topology Files (RTF), which define groups by including the atoms, the properties of the group, and bond and charge information.
+
+
+ There is currently no tool available for conversion between GROMACS topology format and other formats, due to the internal differences in both approaches. There is, however, a method to convert small molecules parameterized with AMBER force-field into GROMACS format, allowing simulations of these systems with GROMACS MD package.
+ CHARMM rtf
+
+
+
+
+
+
+
+
+
+ 1.22
+
+ AMBER frcmod (Force field Modification) is a file format to store any modification to the standard force field needed for a particular molecule to be properly represented in the simulation.
+
+
+ AMBER frcmod
+
+
+
+
+
+
+
+
+
+ 1.22
+
+ AMBER Object File Format library files (OFF library files) store residue libraries (forcefield residue parameters).
+ AMBER Object File Format
+ AMBER lib
+ AMBER off
+
+
+
+
+
+
+
+
+
+ 1.22
+ MReData is a text based data standard for processed NMR data. It is relying on SDF molecule data and allows to store assignments of NMR peaks to molecule features. The NMR-extracted data (or "NMReDATA") includes: Chemical shift,scalar coupling, 2D correlation, assignment, etc.
+
+
+ NMReData is a text based data standard for processed NMR data. It is relying on SDF molecule data and allows to store assignments of NMR peaks to molecule features. The NMR-extracted data (or "NMReDATA") includes: Chemical shift,scalar coupling, 2D correlation, assignment, etc. Find more in the paper at https://doi.org/10.1002/mrc.4527.
+ NMReDATA
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.22
+
+
+
+
+ BpForms is a string format for concretely representing the primary structures of biopolymers, including DNA, RNA, and proteins that include non-canonical nucleic and amino acids. See https://www.bpforms.org for more information.
+
+
+ BpForms
+
+
+
+
+
+
+
+
+
+
+ 1.22
+
+ Format of trr files that contain the trajectory of a simulation experiment used by GROMACS.
+ The first 4 bytes of any trr file containing 1993. See https://github.com/galaxyproject/galaxy/pull/6597/files#diff-409951594551183dbf886e24de6cb129R760
+ trr
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.22
+
+
+
+
+
+ msh
+
+
+
+ Mash sketch is a format for sequence / sequence checksum information. To make a sketch, each k-mer in a sequence is hashed, which creates a pseudo-random identifier. By sorting these hashes, a small subset from the top of the sorted list can represent the entire sequence.
+ Mash sketch
+ min-hash sketch
+
+
+ msh
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.23
+
+
+
+ loom
+ The Loom file format is based on HDF5, a standard for storing large numerical datasets. The Loom format is designed to efficiently hold large omics datasets. Typically, such data takes the form of a large matrix of numbers, along with metadata for the rows and columns.
+ Loom
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.23
+
+
+
+ zarray
+ zgroup
+ The Zarr format is an implementation of chunked, compressed, N-dimensional arrays for storing data.
+ Zarr
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.23
+
+
+ mtx
+
+ The Matrix Market matrix (MTX) format stores numerical or pattern matrices in a dense (array format) or sparse (coordinate format) representation.
+ MTX
+
+
+
+
+
+
+
+
+
+
+ 1.24
+
+
+
+
+
+ text/plain
+
+
+ BcForms is a format for abstractly describing the molecular structure (atoms and bonds) of macromolecular complexes as a collection of subunits and crosslinks. Each subunit can be described with BpForms (http://edamontology.org/format_3909) or SMILES (http://edamontology.org/data_2301). BcForms uses an ontology of crosslinks to abstract the chemical details of crosslinks from the descriptions of complexes (see https://bpforms.org/crosslink.html).
+ BcForms is related to http://edamontology.org/format_3909. (BcForms uses BpForms to describe subunits which are DNA, RNA, or protein polymers.) However, that format isn't the parent of BcForms. BcForms is similarly related to SMILES (http://edamontology.org/data_2301).
+ BcForms
+
+
+
+
+
+
+
+
+
+ 1.24
+
+ nq
+ N-Quads is a line-based, plain text format for encoding an RDF dataset. It includes information about the graph each triple belongs to.
+
+
+ N-Quads should not be confused with N-Triples which does not contain graph information.
+ N-Quads
+
+
+
+
+
+
+
+
+
+ 1.25
+
+
+
+
+ json
+ application/json
+
+ Vega is a visualization grammar, a declarative language for creating, saving, and sharing interactive visualization designs. With Vega, you can describe the visual appearance and interactive behavior of a visualization in a JSON format, and generate web-based views using Canvas or SVG.
+
+
+ Vega
+
+
+
+
+
+
+
+
+
+ 1.25
+
+
+
+
+ json
+ application/json
+
+ Vega-Lite is a high-level grammar of interactive graphics. It provides a concise JSON syntax for rapidly generating visualizations to support analysis. Vega-Lite specifications can be compiled to Vega specifications.
+
+
+ Vega-lite
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.25
+
+
+
+
+ application/xml
+
+ A model description language for computational neuroscience.
+
+
+ NeuroML
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.25
+
+
+
+
+ bngl
+ application/xml
+ plain/text
+
+ BioNetGen is a format for the specification and simulation of rule-based models of biochemical systems, including signal transduction, metabolic, and genetic regulatory networks.
+ BioNetGen Language
+
+
+ BNGL
+
+
+
+
+
+
+
+
+ 1.25
+
+
+
+ A Docker image is a file, comprised of multiple layers, that is used to execute code in a Docker container. An image is essentially built from the instructions for a complete and executable version of an application, which relies on the host OS kernel.
+
+
+ Docker image
+
+
+
+
+
+
+
+
+
+
+ 1.25
+
+
+ gfa
+
+ Graphical Fragment Assembly captures sequence graphs as the product of an assembly, a representation of variation in genomes, splice graphs in genes, or even overlap between reads from long-read sequencing technology.
+ Graphical Fragment Assembly (GFA) 1.0
+
+
+ GFA 1
+
+
+
+
+
+
+
+
+
+
+ 1.25
+
+
+ gfa
+
+ Graphical Fragment Assembly captures sequence graphs as the product of an assembly, a representation of variation in genomes, splice graphs in genes, or even overlap between reads from long-read sequencing technology. GFA2 is an update of GFA1 which is not compatible with GFA1.
+ Graphical Fragment Assembly (GFA) 2.0
+
+
+ GFA 2
+
+
+
+
+
+
+
+
+
+ 1.25
+
+
+ xlsx
+ application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
+
+ ObjTables is a toolkit for creating re-usable datasets that are both human and machine-readable, combining the ease of spreadsheets (e.g., Excel workbooks) with the rigor of schemas (classes, their attributes, the type of each attribute, and the possible relationships between instances of classes). ObjTables consists of a format for describing schemas for spreadsheets, numerous data types for science, a syntax for indicating the class and attribute represented by each table and column in a workbook, and software for using schemas to rigorously validate, merge, split, compare, and revision datasets.
+
+
+ ObjTables
+
+
+
+
+
+
+
+
+
+ 1.25
+ contig
+ The CONTIG format used for output of the SOAPdenovo alignment program. It contains contig sequences generated without using mate pair information.
+
+
+ CONTIG
+
+
+
+
+
+
+
+
+
+ 1.25
+ wego
+ WEGO native format used by the Web Gene Ontology Annotation Plot application. Tab-delimited format with gene names and others GO IDs (columns) with one annotation record per line.
+
+
+ WEGO
+
+
+
+
+
+
+
+
+
+ 1.25
+ rpkm
+ Tab-delimited format for gene expression levels table, calculated as Reads Per Kilobase per Million (RPKM) mapped reads.
+ Gene expression levels table format
+
+
+ For example a 1kb transcript with 1000 alignments in a sample of 10 million reads (out of which 8 million reads can be mapped) will have RPKM = 1000/(1 * 8) = 125
+ RPKM
+
+
+
+
+
+
+
+
+ 1.25
+ tar
+ TAR archive file format generated by the Unix-based utility tar.
+ TAR
+ Tarball
+ tar
+
+
+ For example a 1kb transcript with 1000 alignments in a sample of 10 million reads (out of which 8 million reads can be mapped) will have RPKM = 1000/(1 * 8) = 125
+ TAR format
+
+
+
+
+
+
+
+
+
+
+ 1.25
+ chain
+ The CHAIN format describes a pairwise alignment that allow gaps in both sequences simultaneously and is used by the UCSC Genome Browser.
+
+
+ CHAIN
+ https://genome.ucsc.edu/goldenPath/help/chain.html
+
+
+
+
+
+
+
+
+
+ 1.25
+ net
+ The NET file format is used to describe the data that underlie the net alignment annotations in the UCSC Genome Browser.
+
+
+ NET
+ https://genome.ucsc.edu/goldenPath/help/net.html
+
+
+
+
+
+
+
+
+
+ 1.25
+ qmap
+ Format of QMAP files generated for methylation data from an internal BGI pipeline.
+
+
+ QMAP
+
+
+
+
+
+
+
+
+
+ 1.25
+ ga
+ An emerging format for high-level Galaxy workflow description.
+ Galaxy workflow format
+ GalaxyWF
+ ga
+
+
+ gxformat2
+ https://github.com/galaxyproject/gxformat2
+
+
+
+
+
+
+
+
+
+ 1.25
+ wmv
+ The proprietary native video format of various Microsoft programs such as Windows Media Player.
+ Windows Media Video format
+ Windows movie file format
+
+
+ WMV
+
+
+
+
+
+
+
+
+
+ 1.25
+ zip
+ ZIP is an archive file format that supports lossless data compression.
+ ZIP
+
+
+ A ZIP file may contain one or more files or directories that may have been compressed.
+ ZIP format
+
+
+
+
+
+
+
+
+
+
+ 1.25
+ lsm
+ Zeiss' proprietary image format based on TIFF.
+
+
+ LSM files are the default data export for the Zeiss LSM series confocal microscopes (e.g. LSM 510, LSM 710). In addition to the image data, LSM files contain most imaging settings.
+ LSM
+
+
+
+
+
+
+
+
+ 1.25
+ gz
+ gzip
+ GNU zip compressed file format common to Unix-based operating systems.
+ GNU Zip
+ gz
+ gzip
+
+
+ GZIP format
+
+
+
+
+
+
+
+
+
+
+ 1.25
+ avi
+ Audio Video Interleaved (AVI) format is a multimedia container format for AVI files, that allows synchronous audio-with-video playback.
+ Audio Video Interleaved
+
+
+ AVI
+
+
+
+
+
+
+
+
+
+
+ 1.25
+ trackdb
+ A declaration file format for UCSC browsers track dataset display charateristics.
+
+
+ TrackDB
+
+
+
+
+
+
+
+
+
+ 1.25
+ cigar
+ Compact Idiosyncratic Gapped Alignment Report format is a compressed (run-length encoded) pairwise alignment format. It is useful for representing long (e.g. genomic) pairwise alignments.
+ CIGAR
+
+
+ CIGAR format
+ http://wiki.bits.vib.be/index.php/CIGAR/
+
+
+
+
+
+
+
+
+
+ 1.25
+ stl
+ STL is a file format native to the stereolithography CAD software created by 3D Systems. The format is used to save and share surface-rendered 3D images and also for 3D printing.
+ stl
+
+
+ Stereolithography format
+
+
+
+
+
+
+
+
+
+ 1.25
+ u3d
+ U3D (Universal 3D) is a compressed file format and data structure for 3D computer graphics. It contains 3D model information such as triangle meshes, lighting, shading, motion data, lines and points with color and structure.
+ Universal 3D
+ Universal 3D format
+
+
+ U3D
+
+
+
+
+
+
+
+
+
+ 1.25
+ tex
+ Bitmap image format used for storing textures.
+
+
+ Texture files can create the appearance of different surfaces and can be applied to both 2D and 3D objects. Note the file extension .tex is also used for LaTex documents which are a completely different format and they are NOT interchangeable.
+ Texture file format
+
+
+
+
+
+
+
+
+
+ 1.25
+ py
+ Format for scripts writtenin Python - a widely used high-level programming language for general-purpose programming.
+ Python
+ Python program
+ py
+
+
+ Python script
+
+
+
+
+
+
+
+
+
+ 1.25
+ mp4
+ A digital multimedia container format most commonly used to store video and audio.
+ MP4
+
+
+ MPEG-4
+
+
+
+
+
+
+
+
+
+
+ 1.25
+ pl
+ Format for scripts written in Perl - a family of high-level, general-purpose, interpreted, dynamic programming languages.
+ Perl
+ Perl program
+ pl
+
+
+ Perl script
+
+
+
+
+
+
+
+
+
+ 1.25
+ r
+ Format for scripts written in the R language - an open source programming language and software environment for statistical computing and graphics that is supported by the R Foundation for Statistical Computing.
+ R
+ R program
+
+
+ R script
+
+
+
+
+
+
+
+
+
+ 1.25
+ rmd
+ A file format for making dynamic documents (R Markdown scripts) with the R language.
+
+
+ R markdown
+ https://rmarkdown.rstudio.com/articles_intro.html
+
+
+
+
+
+
+
+
+ 1.25
+ This duplicates an existing concept (http://edamontology.org/format_3549).
+ 1.26
+
+ An open file format from the Neuroimaging Informatics Technology Initiative (NIfTI) commonly used to store brain imaging data obtained using Magnetic Resonance Imaging (MRI) methods.
+
+
+ NIFTI format
+ true
+
+
+
+
+
+
+
+
+ 1.25
+ pickle
+ Format used by Python pickle module for serializing and de-serializing a Python object structure.
+
+
+ pickle
+ https://docs.python.org/2/library/pickle.html
+
+
+
+
+
+
+
+
+ 1.25
+ npy
+ The standard binary file format used by NumPy - a fundamental package for scientific computing with Python - for persisting a single arbitrary NumPy array on disk. The format stores all of the shape and dtype information necessary to reconstruct the array correctly.
+ NumPy
+ npy
+
+
+ NumPy format
+
+
+
+
+
+
+
+
+ 1.25
+ repz
+ Format of repertoire (archive) files that can be read by SimToolbox (a MATLAB toolbox for structured illumination fluorescence microscopy) or alternatively extracted with zip file archiver software.
+
+
+ SimTools repertoire file format
+ https://pdfs.semanticscholar.org/5f25/f1cc6cdf2225fe22dc6fd4fc0296d486a85c.pdf
+
+
+
+
+
+
+
+
+ 1.25
+ cfg
+ A configuration file used by various programs to store settings that are specific to their respective software.
+
+
+ Configuration file format
+
+
+
+
+
+
+
+
+ 1.25
+ zst
+ Format used by the Zstandard real-time compression algorithm.
+ Zstandard compression format
+ Zstandard-compressed file format
+ zst
+
+
+ Zstandard format
+ https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md
+
+
+
+
+
+
+
+
+
+ 1.25
+ m
+ The file format for MATLAB scripts or functions.
+ MATLAB
+ m
+
+
+ MATLAB script
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.26
+
+
+
+ A data format for specifying parameter estimation problems in systems biology.
+
+
+ PEtab
+
+
+
+
+
+
+
+
+
+ 1.26
+
+
+ g.vcf
+ g.vcf.gz
+ Genomic Variant Call Format (gVCF) is a version of VCF that includes not only the positions that are variant when compared to a reference genome, but also the non-variant positions as ranges, including metrics of confidence that the positions in the range are actually non-variant e.g. minimum read-depth and genotype quality.
+ g.vcf
+ g.vcf.gz
+
+
+ gVCF
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.26
+
+
+ cml
+
+ Chemical Markup Language (CML) is an XML-based format for encoding detailed information about a wide range of chemical concepts.
+ ChemML
+
+
+ cml
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.26
+
+
+ cif
+
+ Crystallographic Information File (CIF) is a data exchange standard file format for Crystallographic Information and related Structural Science data.
+
+
+ cif
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.26
+
+
+ json
+
+
+
+
+
+
+
+
+
+ Format for describing the capabilities of a biosimulation tool including the modeling frameworks, simulation algorithms, and modeling formats that it supports, as well as metadata such as a list of the interfaces, programming languages, and operating systems supported by the tool; a link to download the tool; a list of the authors of the tool; and the license to the tool.
+
+
+ BioSimulators format for the specifications of biosimulation tools
+
+
+
+
+
+
+
+
+
+ 1.26
+
+
+
+ Outlines the syntax and semantics of the input and output arguments for command-line interfaces for biosimulation tools.
+
+
+ BioSimulators standard for command-line interfaces for biosimulation tools
+
+
+
+
+
+
+
+
+
+ 1.26
+ Data format derived from the standard PDB format, which enables user to incorporate parameters for charge and radius to the existing PDB data file.
+
+
+ PQR
+
+
+
+
+
+
+
+
+
+
+ 1.26
+ Data format used in AutoDock 4 for storing atomic coordinates, partial atomic charges and AutoDock atom types for both receptors and ligands.
+
+
+ PDBQT
+
+
+
+
+
+
+
+
+
+
+
+ 1.26
+
+
+ msp
+ MSP is a data format for mass spectrometry data.
+
+
+ NIST Text file format for storing MS∕MS spectra (m∕z and intensity of mass peaks) along with additional annotations for each spectrum. A single MSP file can thus contain single or multiple spectra. This format is frequently used to share spectra libraries.
+ MSP
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Function
+ A function that processes a set of inputs and results in a set of outputs, or associates arguments (inputs) with values (outputs).
+ Computational method
+ Computational operation
+ Computational procedure
+ Computational subroutine
+ Function (programming)
+ Lambda abstraction
+ Mathematical function
+ Mathematical operation
+ Computational tool
+ Process
+ sumo:Function
+
+
+ Special cases are: a) An operation that consumes no input (has no input arguments). Such operation is either a constant function, or an operation depending only on the underlying state. b) An operation that may modify the underlying state but has no output. c) The singular-case operation with no input or output, that still may modify the underlying state.
+ Operation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Function
+ Operation is a function that is computational. It typically has input(s) and output(s), which are always data.
+
+
+
+
+ Computational tool
+ Computational tool provides one or more operations.
+
+
+
+
+ Process
+ Process can have a function (as its quality/attribute), and can also perform an operation with inputs and outputs.
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Search or query a data resource and retrieve entries and / or annotation.
+ Database retrieval
+ Query
+
+
+ Query and retrieval
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Search database to retrieve all relevant references to a particular entity or entry.
+
+ Data retrieval (database cross-reference)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Annotate an entity (typically a biological or biomedical database entity) with terms from a controlled vocabulary.
+
+
+ This is a broad concept and is used a placeholder for other, more specific concepts.
+ Annotation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Generate an index of (typically a file of) biological data.
+ Data indexing
+ Database indexing
+
+
+ Indexing
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Analyse an index of biological data.
+
+ Data index analysis
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Retrieve basic information about a molecular sequence.
+
+ Annotation retrieval (sequence)
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Generate a molecular sequence by some means.
+ Sequence generation (nucleic acid)
+ Sequence generation (protein)
+
+
+ Sequence generation
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Edit or change a molecular sequence, either randomly or specifically.
+
+
+ Sequence editing
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Merge two or more (typically overlapping) molecular sequences.
+ Sequence splicing
+ Paired-end merging
+ Paired-end stitching
+ Read merging
+ Read stitching
+
+
+ Sequence merging
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Convert a molecular sequence from one type to another.
+
+
+ Sequence conversion
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate sequence complexity, for example to find low-complexity regions in sequences.
+
+
+ Sequence complexity calculation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate sequence ambiguity, for example identity regions in protein or nucleotide sequences with many ambiguity codes.
+
+
+ Sequence ambiguity calculation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate character or word composition or frequency of a molecular sequence.
+
+
+ Sequence composition calculation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Find and/or analyse repeat sequences in (typically nucleotide) sequences.
+
+
+ Repeat sequences include tandem repeats, inverted or palindromic repeats, DNA microsatellites (Simple Sequence Repeats or SSRs), interspersed repeats, maximal duplications and reverse, complemented and reverse complemented repeats etc. Repeat units can be exact or imperfect, in tandem or dispersed, of specified or unspecified length.
+ Repeat sequence analysis
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Discover new motifs or conserved patterns in sequences or sequence alignments (de-novo discovery).
+ Motif discovery
+
+
+ Motifs and patterns might be conserved or over-represented (occur with improbable frequency).
+ Sequence motif discovery
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Find (scan for) known motifs, patterns and regular expressions in molecular sequence(s).
+ Motif scanning
+ Sequence signature detection
+ Sequence signature recognition
+ Motif detection
+ Motif recognition
+ Motif search
+ Sequence motif detection
+ Sequence motif search
+ Sequence profile search
+
+
+ Sequence motif recognition
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Find motifs shared by molecular sequences.
+
+
+ Sequence motif comparison
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Analyse the sequence, conformational or physicochemical properties of transcription regulatory elements in DNA sequences.
+
+ For example transcription factor binding sites (TFBS) analysis to predict accessibility of DNA to binding factors.
+ Transcription regulatory sequence analysis
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.19
+
+ Identify common, conserved (homologous) or synonymous transcriptional regulatory motifs (transcription factor binding sites).
+
+
+ Conserved transcription regulatory sequence identification
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.18
+
+
+ Extract, calculate or predict non-positional (physical or chemical) properties of a protein from processing a protein (3D) structure.
+
+
+ Protein property calculation (from structure)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse flexibility and motion in protein structure.
+ CG analysis
+ MD analysis
+ Protein Dynamics Analysis
+ Trajectory analysis
+ Nucleic Acid Dynamics Analysis
+ Protein flexibility and motion analysis
+ Protein flexibility prediction
+ Protein motion prediction
+
+
+ Use this concept for analysis of flexible and rigid residues, local chain deformability, regions undergoing conformational change, molecular vibrations or fluctuational dynamics, domain motions or other large-scale structural transitions in a protein structure.
+ Simulation analysis
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identify or screen for 3D structural motifs in protein structure(s).
+ Protein structural feature identification
+ Protein structural motif recognition
+
+
+ This includes conserved substructures and conserved geometry, such as spatial arrangement of secondary structure or protein backbone. Methods might use structure alignment, structural templates, searches for similar electrostatic potential and molecular surface shape, surface-mapping of phylogenetic information etc.
+ Structural motif discovery
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identify structural domains in a protein structure from first principles (for example calculations on structural compactness).
+
+
+ Protein domain recognition
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse the architecture (spatial arrangement of secondary structure) of protein structure(s).
+
+
+ Protein architecture analysis
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ WHATIF: SymShellFiveXML
+ WHATIF: SymShellOneXML
+ WHATIF: SymShellTenXML
+ WHATIF: SymShellTwoXML
+ WHATIF:ListContactsNormal
+ WHATIF:ListContactsRelaxed
+ WHATIF:ListSideChainContactsNormal
+ WHATIF:ListSideChainContactsRelaxed
+ Calculate or extract inter-atomic, inter-residue or residue-atom contacts, distances and interactions in protein structure(s).
+
+
+ Residue interaction calculation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ WHATIF:CysteineTorsions
+ WHATIF:ResidueTorsions
+ WHATIF:ResidueTorsionsBB
+ WHATIF:ShowTauAngle
+ Calculate, visualise or analyse phi/psi angles of a protein structure.
+ Backbone torsion angle calculation
+ Cysteine torsion angle calculation
+ Tau angle calculation
+ Torsion angle calculation
+
+
+ Protein geometry calculation
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Extract, calculate or predict non-positional (physical or chemical) properties of a protein, including any non-positional properties of the molecular sequence, from processing a protein sequence or 3D structure.
+ Protein property rendering
+ Protein property calculation (from sequence)
+ Protein property calculation (from structure)
+ Protein structural property calculation
+ Structural property calculation
+
+
+ This includes methods to render and visualise the properties of a protein sequence, and a residue-level search for properties such as solvent accessibility, hydropathy, secondary structure, ligand-binding etc.
+ Protein property calculation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Immunogen design
+ Predict antigenicity, allergenicity / immunogenicity, allergic cross-reactivity etc of peptides and proteins.
+ Antigenicity prediction
+ Immunogenicity prediction
+ B cell peptide immunogenicity prediction
+ Hopp and Woods plotting
+ MHC peptide immunogenicity prediction
+
+
+ Immunological system are cellular or humoral. In vaccine design to induces a cellular immune response, methods must search for antigens that can be recognized by the major histocompatibility complex (MHC) molecules present in T lymphocytes. If a humoral response is required, antigens for B cells must be identified.
+ This includes methods that generate a graphical rendering of antigenicity of a protein, such as a Hopp and Woods plot.
+ This is usually done in the development of peptide-specific antibodies or multi-epitope vaccines. Methods might use sequence data (for example motifs) and / or structural data.
+ Peptide immunogenicity prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict, recognise and identify positional features in molecular sequences such as key functional sites or regions.
+ Sequence feature prediction
+ Sequence feature recognition
+ Motif database search
+ SO:0000110
+
+
+ Look at "Protein feature detection" (http://edamontology.org/operation_3092) and "Nucleic acid feature detection" (http://edamontology.org/operation_0415) in case more specific terms are needed.
+ Sequence feature detection
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Extract a sequence feature table from a sequence database entry.
+
+ Data retrieval (feature table)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Query the features (in a feature table) of molecular sequence(s).
+
+ Feature table query
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Compare the feature tables of two or more molecular sequences.
+ Feature comparison
+ Feature table comparison
+
+
+ Sequence feature comparison
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Display basic information about a sequence alignment.
+
+ Data retrieval (sequence alignment)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse a molecular sequence alignment.
+
+
+ Sequence alignment analysis
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Compare (typically by aligning) two molecular sequence alignments.
+
+
+ See also 'Sequence profile alignment'.
+ Sequence alignment comparison
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Convert a molecular sequence alignment from one type to another (for example amino acid to coding nucleotide sequence).
+
+
+ Sequence alignment conversion
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Process (read and / or write) physicochemical property data of nucleic acids.
+
+ Nucleic acid property processing
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate or predict physical or chemical properties of nucleic acid molecules, including any non-positional properties of the molecular sequence.
+
+
+ Nucleic acid property calculation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict splicing alternatives or transcript isoforms from analysis of sequence data.
+ Alternative splicing analysis
+ Alternative splicing detection
+ Differential splicing analysis
+ Splice transcript prediction
+
+
+ Alternative splicing prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Detect frameshifts in DNA sequences, including frameshift sites and signals, and frameshift errors from sequencing projects.
+ Frameshift error detection
+
+
+ Methods include sequence alignment (if related sequences are available) and word-based sequence comparison.
+ Frameshift detection
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Detect vector sequences in nucleotide sequence, typically by comparison to a set of known vector sequences.
+
+
+ Vector sequence detection
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict secondary structure of protein sequences.
+ Secondary structure prediction (protein)
+
+
+ Methods might use amino acid composition, local sequence information, multiple sequence alignments, physicochemical features, estimated energy content, statistical algorithms, hidden Markov models, support vector machines, kernel machines, neural networks etc.
+ Protein secondary structure prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict super-secondary structure of protein sequence(s).
+
+
+ Super-secondary structures include leucine zippers, coiled coils, Helix-Turn-Helix etc.
+ Protein super-secondary structure prediction
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict and/or classify transmembrane proteins or transmembrane (helical) domains or regions in protein sequences.
+
+
+ Transmembrane protein prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse transmembrane protein(s), typically by processing sequence and / or structural data, and write an informative report for example about the protein and its transmembrane domains / regions.
+
+
+ Use this (or child) concept for analysis of transmembrane domains (buried and exposed faces), transmembrane helices, helix topology, orientation, inter-helical contacts, membrane dipping (re-entrant) loops and other secondary structure etc. Methods might use pattern discovery, hidden Markov models, sequence alignment, structural profiles, amino acid property analysis, comparison to known domains or some combination (hybrid methods).
+ Transmembrane protein analysis
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ This is a "organisational class" not very useful for annotation per se.
+ 1.19
+
+
+
+
+ Predict tertiary structure of a molecular (biopolymer) sequence.
+
+ Structure prediction
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict contacts, non-covalent interactions and distance (constraints) between amino acids in protein sequences.
+ Residue interaction prediction
+ Contact map prediction
+ Protein contact map prediction
+
+
+ Methods usually involve multiple sequence alignment analysis.
+ Residue contact prediction
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.19
+
+ Analyse experimental protein-protein interaction data from for example yeast two-hybrid analysis, protein microarrays, immunoaffinity chromatography followed by mass spectrometry, phage display etc.
+
+
+ Protein interaction raw data analysis
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Identify or predict protein-protein interactions, interfaces, binding sites etc in protein sequences.
+
+
+ Protein-protein interaction prediction (from protein sequence)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Identify or predict protein-protein interactions, interfaces, binding sites etc in protein structures.
+
+
+ Protein-protein interaction prediction (from protein structure)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse a network of protein interactions.
+ Protein interaction network comparison
+
+
+ Protein interaction network analysis
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Notions of pathway and network were mixed up, EDAM 1.24 disentangles them.
+ 1.24
+
+
+
+ Compare two or more biological pathways or networks.
+
+ Pathway or network comparison
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict RNA secondary structure (for example knots, pseudoknots, alternative structures etc).
+ RNA shape prediction
+
+
+ Methods might use RNA motifs, predicted intermolecular contacts, or RNA sequence-structure compatibility (inverse RNA folding).
+ RNA secondary structure prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse some aspect of RNA/DNA folding, typically by processing sequence and/or structural data. For example, compute folding energies such as minimum folding energies for DNA or RNA sequences or energy landscape of RNA mutants.
+ Nucleic acid folding
+ Nucleic acid folding modelling
+ Nucleic acid folding prediction
+ Nucleic acid folding energy calculation
+
+
+ Nucleic acid folding analysis
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Retrieve information on restriction enzymes or restriction enzyme sites.
+
+ Data retrieval (restriction enzyme annotation)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Identify genetic markers in DNA sequences.
+
+ A genetic marker is any DNA sequence of known chromosomal location that is associated with and specific to a particular gene or trait. This includes short sequences surrounding a SNP, Sequence-Tagged Sites (STS) which are well suited for PCR amplification, a longer minisatellites sequence etc.
+ Genetic marker identification
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Generate a genetic (linkage) map of a DNA sequence (typically a chromosome) showing the relative positions of genetic markers based on estimation of non-physical distances.
+ Functional mapping
+ Genetic cartography
+ Genetic map construction
+ Genetic map generation
+ Linkage mapping
+ QTL mapping
+
+
+ Mapping involves ordering genetic loci along a chromosome and estimating the physical distance between loci. A genetic map shows the relative (not physical) position of known genes and genetic markers.
+ This includes mapping of the genetic architecture of dynamic complex traits (functional mapping), e.g. by characterisation of the underlying quantitative trait loci (QTLs) or nucleotides (QTNs).
+ Genetic mapping
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse genetic linkage.
+
+
+ For example, estimate how close two genes are on a chromosome by calculating how often they are transmitted together to an offspring, ascertain whether two genes are linked and parental linkage, calculate linkage map distance etc.
+ Linkage analysis
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate codon usage statistics and create a codon usage table.
+ Codon usage table construction
+
+
+ Codon usage table generation
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Compare two or more codon usage tables.
+
+
+ Codon usage table comparison
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse codon usage in molecular sequences or process codon usage data (e.g. a codon usage table).
+ Codon usage data analysis
+ Codon usage table analysis
+
+
+ Codon usage analysis
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identify and plot third base position variability in a nucleotide sequence.
+
+
+ Base position variability plotting
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Find exact character or word matches between molecular sequences without full sequence alignment.
+
+
+ Sequence word comparison
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate a sequence distance matrix or otherwise estimate genetic distances between molecular sequences.
+ Phylogenetic distance matrix generation
+ Sequence distance calculation
+ Sequence distance matrix construction
+
+
+ Sequence distance matrix generation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Compare two or more molecular sequences, identify and remove redundant sequences based on some criteria.
+
+
+ Sequence redundancy removal
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Build clusters of similar sequences, typically using scores from pair-wise alignment or other comparison of the sequences.
+ Sequence cluster construction
+ Sequence cluster generation
+
+
+ The clusters may be output or used internally for some other purpose.
+ Sequence clustering
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Align (identify equivalent sites within) molecular sequences.
+ Sequence alignment construction
+ Sequence alignment generation
+ Consensus-based sequence alignment
+ Constrained sequence alignment
+ Multiple sequence alignment (constrained)
+ Sequence alignment (constrained)
+
+
+ Includes methods that align sequence profiles (representing sequence alignments): ethods might perform one-to-one, one-to-many or many-to-many comparisons. See also 'Sequence alignment comparison'.
+ See also "Read mapping"
+ Sequence alignment
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Align two or more molecular sequences of different types (for example genomic DNA to EST, cDNA or mRNA).
+
+ Hybrid sequence alignment construction
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Align molecular sequences using sequence and structural information.
+ Sequence alignment (structure-based)
+
+
+ Structure-based sequence alignment
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Align (superimpose) molecular tertiary structures.
+ Structural alignment
+ 3D profile alignment
+ 3D profile-to-3D profile alignment
+ Structural profile alignment
+
+
+ Includes methods that align structural (3D) profiles or templates (representing structures or structure alignments) - including methods that perform one-to-one, one-to-many or many-to-many comparisons.
+ Structure alignment
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Generate some type of sequence profile (for example a hidden Markov model) from a sequence alignment.
+ Sequence profile construction
+
+
+ Sequence profile generation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Generate some type of structural (3D) profile or template from a structure or structure alignment.
+ Structural profile construction
+ Structural profile generation
+
+
+ 3D profile generation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.19
+
+ Align sequence profiles (representing sequence alignments).
+
+
+ Profile-profile alignment
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.19
+
+ Align structural (3D) profiles or templates (representing structures or structure alignments).
+
+
+ 3D profile-to-3D profile alignment
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Align molecular sequence(s) to sequence profile(s), or profiles to other profiles. A profile typically represents a sequence alignment.
+ Profile-profile alignment
+ Profile-to-profile alignment
+ Sequence-profile alignment
+ Sequence-to-profile alignment
+
+
+ A sequence profile typically represents a sequence alignment. Methods might perform one-to-one, one-to-many or many-to-many comparisons.
+ Sequence profile alignment
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.19
+
+ Align molecular sequence(s) to structural (3D) profile(s) or template(s) (representing a structure or structure alignment).
+
+
+ Sequence-to-3D-profile alignment
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Align molecular sequence to structure in 3D space (threading).
+ Sequence-structure alignment
+ Sequence-3D profile alignment
+ Sequence-to-3D-profile alignment
+
+
+ This includes sequence-to-3D-profile alignment methods, which align molecular sequence(s) to structural (3D) profile(s) or template(s) (representing a structure or structure alignment) - methods might perform one-to-one, one-to-many or many-to-many comparisons.
+ Use this concept for methods that evaluate sequence-structure compatibility by assessing residue interactions in 3D. Methods might perform one-to-one, one-to-many or many-to-many comparisons.
+ Protein threading
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Recognize (predict and identify) known protein structural domains or folds in protein sequence(s) which (typically) are not accompanied by any significant sequence similarity to know structures.
+ Domain prediction
+ Fold prediction
+ Protein domain prediction
+ Protein fold prediction
+ Protein fold recognition
+
+
+ Methods use some type of mapping between sequence and fold, for example secondary structure prediction and alignment, profile comparison, sequence properties, homologous sequence search, kernel machines etc. Domains and folds might be taken from SCOP or CATH.
+ Fold recognition
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ (jison)Too fine-grained, the operation (Data retrieval) hasn't changed, just what is retrieved.
+ 1.17
+
+ Search for and retrieve data concerning or describing some core data, as distinct from the primary data that is being described.
+
+
+ This includes documentation, general information and other metadata on entities such as databases, database entries and tools.
+ Metadata retrieval
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Query scientific literature, in search for articles, article data, concepts, named entities, or for statistics.
+
+
+ Literature search
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Text analysis
+ Process and analyse text (typically scientific literature) to extract information from it.
+ Literature mining
+ Text analytics
+ Text data mining
+ Article analysis
+ Literature analysis
+
+
+ Text mining
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Perform in-silico (virtual) PCR.
+
+
+ Virtual PCR
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Design or predict oligonucleotide primers for PCR and DNA amplification etc.
+ PCR primer prediction
+ Primer design
+ PCR primer design (based on gene structure)
+ PCR primer design (for conserved primers)
+ PCR primer design (for gene transcription profiling)
+ PCR primer design (for genotyping polymorphisms)
+ PCR primer design (for large scale sequencing)
+ PCR primer design (for methylation PCRs)
+ Primer quality estimation
+
+
+ Primer design involves predicting or selecting primers that are specific to a provided PCR template. Primers can be designed with certain properties such as size of product desired, primer size etc. The output might be a minimal or overlapping primer set.
+ This includes predicting primers based on gene structure, promoters, exon-exon junctions, predicting primers that are conserved across multiple genomes or species, primers for for gene transcription profiling, for genotyping polymorphisms, for example single nucleotide polymorphisms (SNPs), for large scale sequencing, or for methylation PCRs.
+ PCR primer design
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict and/or optimize oligonucleotide probes for DNA microarrays, for example for transcription profiling of genes, or for genomes and gene families.
+ Microarray probe prediction
+
+
+ Microarray probe design
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Combine (align and merge) overlapping fragments of a DNA sequence to reconstruct the original sequence.
+ Metagenomic assembly
+ Sequence assembly editing
+
+
+ For example, assemble overlapping reads from paired-end sequencers into contigs (a contiguous sequence corresponding to read overlaps). Or assemble contigs, for example ESTs and genomic DNA fragments, depending on the detected fragment overlaps.
+ Sequence assembly
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.16
+
+ Standardize or normalize microarray data.
+
+
+ Microarray data standardisation and normalisation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Process (read and / or write) SAGE, MPSS or SBS experimental data.
+
+ Sequencing-based expression profile data processing
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Perform cluster analysis of expression data to identify groups with similar expression profiles, for example by clustering.
+ Gene expression clustering
+ Gene expression profile clustering
+
+
+ Expression profile clustering
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The measurement of the activity (expression) of multiple genes in a cell, tissue, sample etc., in order to get an impression of biological function.
+ Feature expression analysis
+ Functional profiling
+ Gene expression profile construction
+ Gene expression profile generation
+ Gene expression quantification
+ Gene transcription profiling
+ Non-coding RNA profiling
+ Protein profiling
+ RNA profiling
+ mRNA profiling
+
+
+ Gene expression profiling generates some sort of gene expression profile, for example from microarray data.
+ Gene expression profiling
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Comparison of expression profiles.
+ Gene expression comparison
+ Gene expression profile comparison
+
+
+ Expression profile comparison
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+ Interpret (in functional terms) and annotate gene expression data.
+
+
+ Functional profiling
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Analyse EST or cDNA sequences.
+
+ For example, identify full-length cDNAs from EST sequences or detect potential EST antisense transcripts.
+ EST and cDNA sequence analysis
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identify and select targets for protein structural determination.
+
+ Methods will typically navigate a graph of protein families of known structure.
+ Structural genomics target selection
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Assign secondary structure from protein coordinate or experimental data.
+
+
+ Includes secondary structure assignment from circular dichroism (CD) spectroscopic data, and from protein coordinate data.
+ Protein secondary structure assignment
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Assign a protein tertiary structure (3D coordinates), or other aspects of protein structure, from raw experimental data.
+ NOE assignment
+ Structure calculation
+
+
+ Protein structure assignment
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ WHATIF: CorrectedPDBasXML
+ WHATIF: UseFileDB
+ WHATIF: UseResidueDB
+ Evaluate the quality or correctness a protein three-dimensional model.
+ Protein model validation
+ Residue validation
+
+
+ Model validation might involve checks for atomic packing, steric clashes (bumps), volume irregularities, agreement with electron density maps, number of amino acid residues, percentage of residues with missing or bad atoms, irregular Ramachandran Z-scores, irregular Chi-1 / Chi-2 normality scores, RMS-Z score on bonds and angles etc.
+ The PDB file format has had difficulties, inconsistencies and errors. Corrections can include identifying a meaningful sequence, removal of alternate atoms, correction of nomenclature problems, removal of incomplete residues and spurious waters, addition or removal of water, modelling of missing side chains, optimisation of cysteine bonds, regularisation of bond lengths, bond angles and planarities etc.
+ This includes methods that calculate poor quality residues. The scoring function to identify poor quality residues may consider residues with bad atoms or atoms with high B-factor, residues in the N- or C-terminal position, adjacent to an unstructured residue, non-canonical residues, glycine and proline (or adjacent to these such residues).
+ Protein structure validation
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ WHATIF: CorrectedPDBasXML
+ Refine (after evaluation) a model of a molecular structure (typically a protein structure) to reduce steric clashes, volume irregularities etc.
+ Protein model refinement
+
+
+ Molecular model refinement
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Construct a phylogenetic tree.
+ Phlyogenetic tree construction
+ Phylogenetic reconstruction
+ Phylogenetic tree generation
+
+
+ Phylogenetic trees are usually constructed from a set of sequences from which an alignment (or data matrix) is calculated.
+ Phylogenetic inference
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse an existing phylogenetic tree or trees, typically to detect features or make predictions.
+ Phylogenetic tree analysis
+ Phylogenetic modelling
+
+
+ Phylgenetic modelling is the modelling of trait evolution and prediction of trait values using phylogeny as a basis.
+ Phylogenetic analysis
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Compare two or more phylogenetic trees.
+
+
+ For example, to produce a consensus tree, subtrees, supertrees, calculate distances between trees or test topological similarity between trees (e.g. a congruence index) etc.
+ Phylogenetic tree comparison
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Edit a phylogenetic tree.
+
+
+ Phylogenetic tree editing
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Comparison of a DNA sequence to orthologous sequences in different species and inference of a phylogenetic tree, in order to identify regulatory elements such as transcription factor binding sites (TFBS).
+ Phylogenetic shadowing
+
+
+ Phylogenetic shadowing is a type of footprinting where many closely related species are used. A phylogenetic 'shadow' represents the additive differences between individual sequences. By masking or 'shadowing' variable positions a conserved sequence is produced with few or none of the variations, which is then compared to the sequences of interest to identify significant regions of conservation.
+ Phylogenetic footprinting
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.20
+
+ Simulate the folding of a protein.
+
+
+ Protein folding simulation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.19
+
+ Predict the folding pathway(s) or non-native structural intermediates of a protein.
+
+
+ Protein folding pathway prediction
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Map and model the effects of single nucleotide polymorphisms (SNPs) on protein structure(s).
+
+
+ Protein SNP mapping
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict the effect of point mutation on a protein structure, in terms of strucural effects and protein folding, stability and function.
+ Variant functional prediction
+ Protein SNP mapping
+ Protein mutation modelling
+ Protein stability change prediction
+
+
+ Protein SNP mapping maps and modesl the effects of single nucleotide polymorphisms (SNPs) on protein structure(s). Methods might predict silent or pathological mutations.
+ Variant effect prediction
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+ Design molecules that elicit an immune response (immunogens).
+
+
+ Immunogen design
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.18
+
+ Predict and optimise zinc finger protein domains for DNA/RNA binding (for example for transcription factors and nucleases).
+
+
+ Zinc finger prediction
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate Km, Vmax and derived data for an enzyme reaction.
+
+
+ Enzyme kinetics calculation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Reformat a file of data (or equivalent entity in memory).
+ File format conversion
+ File formatting
+ File reformatting
+ Format conversion
+ Reformatting
+
+
+ Formatting
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Test and validate the format and content of a data file.
+ File format validation
+
+
+ Format validation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Visualise, plot or render (graphically) biomolecular data such as molecular sequences or structures.
+ Data visualisation
+ Rendering
+ Molecular visualisation
+ Plotting
+
+
+ This includes methods to render and visualise molecules.
+ Visualisation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Search a sequence database by sequence comparison and retrieve similar sequences. Sequences matching a given sequence motif or pattern, such as a Prosite pattern or regular expression.
+
+
+ This excludes direct retrieval methods (e.g. the dbfetch program).
+ Sequence database search
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Search a tertiary structure database, typically by sequence and/or structure comparison, or some other means, and retrieve structures and associated data.
+
+
+ Structure database search
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Search a secondary protein database (of classification information) to assign a protein sequence(s) to a known protein family or group.
+
+
+ Protein secondary database search
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+
+ Screen a sequence against a motif or pattern database.
+
+ Motif database search
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ Search a database of sequence profiles with a query sequence.
+
+ Sequence profile database search
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Search a database of transmembrane proteins, for example for sequence or structural similarities.
+
+ Transmembrane protein database search
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Query a database and retrieve sequences with a given entry code or accession number.
+
+ Sequence retrieval (by code)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Query a database and retrieve sequences containing a given keyword.
+
+ Sequence retrieval (by keyword)
+ true
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Search a sequence database and retrieve sequences that are similar to a query sequence.
+ Sequence database search (by sequence)
+ Structure database search (by sequence)
+
+
+ Sequence similarity search
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Search a sequence database and retrieve sequences matching a given sequence motif or pattern, such as a Prosite pattern or regular expression.
+
+
+ Sequence database search (by motif or pattern)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Search a sequence database and retrieve sequences of a given amino acid composition.
+
+ Sequence database search (by amino acid composition)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Search a sequence database and retrieve sequences with a specified property, typically a physicochemical or compositional property.
+
+
+ Sequence database search (by property)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Search a sequence database and retrieve sequences that are similar to a query sequence using a word-based method.
+
+ Word-based methods (for example BLAST, gapped BLAST, MEGABLAST, WU-BLAST etc.) are usually quicker than alignment-based methods. They may or may not handle gaps.
+ Sequence database search (by sequence using word-based methods)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Search a sequence database and retrieve sequences that are similar to a query sequence using a sequence profile-based method, or with a supplied profile as query.
+
+ This includes tools based on PSI-BLAST.
+ Sequence database search (by sequence using profile-based methods)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Search a sequence database for sequences that are similar to a query sequence using a local alignment-based method.
+
+ This includes tools based on the Smith-Waterman algorithm or FASTA.
+ Sequence database search (by sequence using local alignment-based methods)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Search sequence(s) or a sequence database for sequences that are similar to a query sequence using a global alignment-based method.
+
+ This includes tools based on the Needleman and Wunsch algorithm.
+ Sequence database search (by sequence using global alignment-based methods)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Search a DNA database (for example a database of conserved sequence tags) for matches to Sequence-Tagged Site (STS) primer sequences.
+
+ STSs are genetic markers that are easily detected by the polymerase chain reaction (PCR) using specific primers.
+ Sequence database search (by sequence for primer sequences)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+ Search sequence(s) or a sequence database for sequences which match a set of peptide masses, for example a peptide mass fingerprint from mass spectrometry.
+
+
+ Sequence database search (by molecular weight)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Search sequence(s) or a sequence database for sequences of a given isoelectric point.
+
+ Sequence database search (by isoelectric point)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Query a tertiary structure database and retrieve entries with a given entry code or accession number.
+
+ Structure retrieval (by code)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Query a tertiary structure database and retrieve entries containing a given keyword.
+
+ Structure retrieval (by keyword)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Search a tertiary structure database and retrieve structures with a sequence similar to a query sequence.
+
+
+ Structure database search (by sequence)
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Search a database of molecular structure and retrieve structures that are similar to a query structure.
+ Structure database search (by structure)
+ Structure retrieval by structure
+
+
+ Structural similarity search
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Annotate a molecular sequence record with terms from a controlled vocabulary.
+
+
+ Sequence annotation
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Annotate a genome sequence with terms from a controlled vocabulary.
+ Functional genome annotation
+ Metagenome annotation
+ Structural genome annotation
+
+
+ Genome annotation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Generate the reverse and / or complement of a nucleotide sequence.
+ Nucleic acid sequence reverse and complement
+ Reverse / complement
+ Reverse and complement
+
+
+ Reverse complement
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Generate a random sequence, for example, with a specific character composition.
+
+
+ Random sequence generation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Generate digest fragments for a nucleotide sequence containing restriction sites.
+ Nucleic acid restriction digest
+
+
+ Restriction digest
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Cleave a protein sequence into peptide fragments (corresponding to enzymatic or chemical cleavage).
+
+
+ This is often followed by calculation of protein fragment masses (http://edamontology.org/operation_0398).
+ Protein sequence cleavage
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Mutate a molecular sequence a specified amount or shuffle it to produce a randomised sequence with the same overall composition.
+
+
+ Sequence mutation and randomisation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Mask characters in a molecular sequence (replacing those characters with a mask character).
+
+
+ For example, SNPs or repeats in a DNA sequence might be masked.
+ Sequence masking
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Cut (remove) characters or a region from a molecular sequence.
+
+
+ Sequence cutting
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Create (or remove) restriction sites in sequences, for example using silent mutations.
+
+
+ Restriction site creation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Translate a DNA sequence into protein.
+
+
+ DNA translation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Transcribe a nucleotide sequence into mRNA sequence(s).
+
+
+ DNA transcription
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Calculate base frequency or word composition of a nucleotide sequence.
+
+
+ Sequence composition calculation (nucleic acid)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Calculate amino acid frequency or word composition of a protein sequence.
+
+
+ Sequence composition calculation (protein)
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Find (and possibly render) short repetitive subsequences (repeat sequences) in (typically nucleotide) sequences.
+
+
+ Repeat sequence detection
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse repeat sequence organisation such as periodicity.
+
+
+ Repeat sequence organisation analysis
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Analyse the hydrophobic, hydrophilic or charge properties of a protein structure.
+
+
+ Protein hydropathy calculation (from structure)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ WHATIF:AtomAccessibilitySolvent
+ WHATIF:AtomAccessibilitySolventPlus
+ Calculate solvent accessible or buried surface areas in protein or other molecular structures.
+ Protein solvent accessibility calculation
+
+
+ Solvent accessibility might be calculated for the backbone, sidechain and total (backbone plus sidechain).
+ Accessible surface calculation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Identify clusters of hydrophobic or charged residues in a protein structure.
+
+
+ Protein hydropathy cluster calculation
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate whether a protein structure has an unusually large net charge (dipole moment).
+
+
+ Protein dipole moment calculation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ WHATIF:AtomAccessibilityMolecular
+ WHATIF:AtomAccessibilityMolecularPlus
+ WHATIF:ResidueAccessibilityMolecular
+ WHATIF:ResidueAccessibilitySolvent
+ WHATIF:ResidueAccessibilityVacuum
+ WHATIF:ResidueAccessibilityVacuumMolecular
+ WHATIF:TotAccessibilityMolecular
+ WHATIF:TotAccessibilitySolvent
+ Calculate the molecular surface area in proteins and other macromolecules.
+ Protein atom surface calculation
+ Protein residue surface calculation
+ Protein surface and interior calculation
+ Protein surface calculation
+
+
+ Molecular surface calculation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Identify or predict catalytic residues, active sites or other ligand-binding sites in protein structures.
+
+
+ Protein binding site prediction (from structure)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse the interaction of protein with nucleic acids, e.g. RNA or DNA-binding sites, interfaces etc.
+ Protein-nucleic acid binding site analysis
+ Protein-DNA interaction analysis
+ Protein-RNA interaction analysis
+
+
+ Protein-nucleic acid interaction analysis
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Decompose a structure into compact or globular fragments (protein peeling).
+
+
+ Protein peeling
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate a matrix of distance between residues (for example the C-alpha atoms) in a protein structure.
+
+
+ Protein distance matrix calculation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate a residue contact map (typically all-versus-all inter-residue contacts) for a protein structure.
+ Protein contact map calculation
+
+
+ Contact map calculation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate clusters of contacting residues in protein structures.
+
+
+ This includes for example clusters of hydrophobic or charged residues, or clusters of contacting residues which have a key structural or functional role.
+ Residue cluster calculation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ WHATIF:HasHydrogenBonds
+ WHATIF:ShowHydrogenBonds
+ WHATIF:ShowHydrogenBondsM
+ Identify potential hydrogen bonds between amino acids and other groups.
+
+
+ The output might include the atoms involved in the bond, bond geometric parameters and bond enthalpy.
+ Hydrogen bond calculation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+
+ Calculate non-canonical atomic interactions in protein structures.
+
+ Residue non-canonical interaction detection
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate a Ramachandran plot of a protein structure.
+
+
+ Ramachandran plot calculation
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.22
+
+ Validate a Ramachandran plot of a protein structure.
+
+
+ Ramachandran plot validation
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate the molecular weight of a protein sequence or fragments.
+ Peptide mass calculation
+
+
+ Protein molecular weight calculation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict extinction coefficients or optical density of a protein sequence.
+
+
+ Protein extinction coefficient calculation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate pH-dependent properties from pKa calculations of a protein sequence.
+ Protein pH-dependent property calculation
+
+
+ Protein pKa calculation
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Hydropathy calculation on a protein sequence.
+
+
+ Protein hydropathy calculation (from sequence)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Plot a protein titration curve.
+
+
+ Protein titration curve plotting
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate isoelectric point of a protein sequence.
+
+
+ Protein isoelectric point calculation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Estimate hydrogen exchange rate of a protein sequence.
+
+
+ Protein hydrogen exchange rate calculation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate hydrophobic or hydrophilic / charged regions of a protein sequence.
+
+
+ Protein hydrophobic region calculation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate aliphatic index (relative volume occupied by aliphatic side chains) of a protein.
+
+
+ Protein aliphatic index calculation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate the hydrophobic moment of a peptide sequence and recognize amphiphilicity.
+
+
+ Hydrophobic moment is a peptides hydrophobicity measured for different angles of rotation.
+ Protein hydrophobic moment plotting
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict the stability or globularity of a protein sequence, whether it is intrinsically unfolded etc.
+
+
+ Protein globularity prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict the solubility or atomic solvation energy of a protein sequence.
+
+
+ Protein solubility prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict crystallizability of a protein sequence.
+
+
+ Protein crystallizability prediction
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ (jison)Too fine-grained.
+ 1.17
+
+ Detect or predict signal peptides (and typically predict subcellular localisation) of eukaryotic proteins.
+
+
+ Protein signal peptide detection (eukaryotes)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ (jison)Too fine-grained.
+ 1.17
+
+ Detect or predict signal peptides (and typically predict subcellular localisation) of bacterial proteins.
+
+
+ Protein signal peptide detection (bacteria)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Predict MHC class I or class II binding peptides, promiscuous binding peptides, immunogenicity etc.
+
+
+ MHC peptide immunogenicity prediction
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Predict, recognise and identify positional features in protein sequences such as functional sites or regions and secondary structure.
+
+ Methods typically involve scanning for known motifs, patterns and regular expressions.
+ Protein feature prediction (from sequence)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict, recognise and identify features in nucleotide sequences such as functional sites or regions, typically by scanning for known motifs, patterns and regular expressions.
+ Sequence feature detection (nucleic acid)
+ Nucleic acid feature prediction
+ Nucleic acid feature recognition
+ Nucleic acid site detection
+ Nucleic acid site prediction
+ Nucleic acid site recognition
+
+
+ Methods typically involve scanning for known motifs, patterns and regular expressions.
+ This is placeholder but does not comprehensively include all child concepts - please inspect other concepts under "Nucleic acid sequence analysis" for example "Gene prediction", for other feature detection operations.
+ Nucleic acid feature detection
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict antigenic determinant sites (epitopes) in protein sequences.
+ Antibody epitope prediction
+ Epitope prediction
+ B cell epitope mapping
+ B cell epitope prediction
+ Epitope mapping (MHC Class I)
+ Epitope mapping (MHC Class II)
+ T cell epitope mapping
+ T cell epitope prediction
+
+
+ Epitope mapping is commonly done during vaccine design.
+ Epitope mapping
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict post-translation modification sites in protein sequences.
+ PTM analysis
+ PTM prediction
+ PTM site analysis
+ PTM site prediction
+ Post-translation modification site prediction
+ Post-translational modification analysis
+ Protein post-translation modification site prediction
+ Acetylation prediction
+ Acetylation site prediction
+ Dephosphorylation prediction
+ Dephosphorylation site prediction
+ GPI anchor prediction
+ GPI anchor site prediction
+ GPI modification prediction
+ GPI modification site prediction
+ Glycosylation prediction
+ Glycosylation site prediction
+ Hydroxylation prediction
+ Hydroxylation site prediction
+ Methylation prediction
+ Methylation site prediction
+ N-myristoylation prediction
+ N-myristoylation site prediction
+ N-terminal acetylation prediction
+ N-terminal acetylation site prediction
+ N-terminal myristoylation prediction
+ N-terminal myristoylation site prediction
+ Palmitoylation prediction
+ Palmitoylation site prediction
+ Phosphoglycerylation prediction
+ Phosphoglycerylation site prediction
+ Phosphorylation prediction
+ Phosphorylation site prediction
+ Phosphosite localization
+ Prenylation prediction
+ Prenylation site prediction
+ Pupylation prediction
+ Pupylation site prediction
+ S-nitrosylation prediction
+ S-nitrosylation site prediction
+ S-sulfenylation prediction
+ S-sulfenylation site prediction
+ Succinylation prediction
+ Succinylation site prediction
+ Sulfation prediction
+ Sulfation site prediction
+ Sumoylation prediction
+ Sumoylation site prediction
+ Tyrosine nitration prediction
+ Tyrosine nitration site prediction
+ Ubiquitination prediction
+ Ubiquitination site prediction
+
+
+ Methods might predict sites of methylation, N-terminal myristoylation, N-terminal acetylation, sumoylation, palmitoylation, phosphorylation, sulfation, glycosylation, glycosylphosphatidylinositol (GPI) modification sites (GPI lipid anchor signals) etc.
+ Post-translational modification site prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Detect or predict signal peptides and signal peptide cleavage sites in protein sequences.
+
+
+ Methods might use sequence motifs and features, amino acid composition, profiles, machine-learned classifiers, etc.
+ Protein signal peptide detection
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Predict catalytic residues, active sites or other ligand-binding sites in protein sequences.
+
+
+ Protein binding site prediction (from sequence)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict or detect RNA and DNA-binding binding sites in protein sequences.
+ Protein-nucleic acid binding detection
+ Protein-nucleic acid binding prediction
+ Protein-nucleic acid binding site detection
+ Protein-nucleic acid binding site prediction
+ Zinc finger prediction
+
+
+ This includes methods that predict and optimise zinc finger protein domains for DNA/RNA binding (for example for transcription factors and nucleases).
+ Nucleic acids-binding site prediction
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.20
+
+ Predict protein sites that are key to protein folding, such as possible sites of nucleation or stabilisation.
+
+
+ Protein folding site prediction
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Detect or predict cleavage sites (enzymatic or chemical) in protein sequences.
+
+
+ Protein cleavage site prediction
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Predict epitopes that bind to MHC class I molecules.
+
+
+ Epitope mapping (MHC Class I)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Predict epitopes that bind to MHC class II molecules.
+
+
+ Epitope mapping (MHC Class II)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Detect, predict and identify whole gene structure in DNA sequences. This includes protein coding regions, exon-intron structure, regulatory regions etc.
+
+
+ Whole gene prediction
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Detect, predict and identify genetic elements such as promoters, coding regions, splice sites, etc in DNA sequences.
+
+
+ Methods for gene prediction might be ab initio, based on phylogenetic comparisons, use motifs, sequence features, support vector machine, alignment etc.
+ Gene component prediction
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Detect or predict transposons, retrotransposons / retrotransposition signatures etc.
+
+
+ Transposon prediction
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Detect polyA signals in nucleotide sequences.
+ PolyA detection
+ PolyA prediction
+ PolyA signal prediction
+ Polyadenylation signal detection
+ Polyadenylation signal prediction
+
+
+ PolyA signal detection
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Detect quadruplex-forming motifs in nucleotide sequences.
+ Quadruplex structure prediction
+
+
+ Quadruplex (4-stranded) structures are formed by guanine-rich regions and are implicated in various important biological processes and as therapeutic targets.
+ Quadruplex formation site detection
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Find CpG rich regions in a nucleotide sequence or isochores in genome sequences.
+ CpG island and isochores detection
+ CpG island and isochores rendering
+
+
+ An isochore is long region (> 3 KB) of DNA with very uniform GC content, in contrast to the rest of the genome. Isochores tend tends to have more genes, higher local melting or denaturation temperatures, and different flexibility. Methods might calculate fractional GC content or variation of GC content, predict methylation status of CpG islands etc. This includes methods that visualise CpG rich regions in a nucleotide sequence, for example plot isochores in a genome sequence.
+ CpG island and isochore detection
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Find and identify restriction enzyme cleavage sites (restriction sites) in (typically) DNA sequences, for example to generate a restriction map.
+
+
+ Restriction site recognition
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identify or predict nucleosome exclusion sequences (nucleosome free regions) in DNA.
+ Nucleosome exclusion sequence prediction
+ Nucleosome formation sequence prediction
+
+
+ Nucleosome position prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identify, predict or analyse splice sites in nucleotide sequences.
+ Splice prediction
+
+
+ Methods might require a pre-mRNA or genomic DNA sequence.
+ Splice site prediction
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.19
+
+ Predict whole gene structure using a combination of multiple methods to achieve better predictions.
+
+
+ Integrated gene prediction
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Find operons (operators, promoters and genes) in bacteria genes.
+
+
+ Operon prediction
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict protein-coding regions (CDS or exon) or open reading frames in nucleotide sequences.
+ ORF finding
+ ORF prediction
+
+
+ Coding region prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict selenocysteine insertion sequence (SECIS) in a DNA sequence.
+ Selenocysteine insertion sequence (SECIS) prediction
+
+
+ SECIS elements are around 60 nucleotides in length with a stem-loop structure directs the cell to translate UGA codons as selenocysteines.
+ SECIS element prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identify or predict transcriptional regulatory motifs, patterns, elements or regions in DNA sequences.
+ Regulatory element prediction
+ Transcription regulatory element prediction
+ Conserved transcription regulatory sequence identification
+ Translational regulatory element prediction
+
+
+ This includes comparative genomics approaches that identify common, conserved (homologous) or synonymous transcriptional regulatory elements. For example cross-species comparison of transcription factor binding sites (TFBS). Methods might analyse co-regulated or co-expressed genes, or sets of oppositely expressed genes.
+ This includes promoters, enhancers, silencers and boundary elements / insulators, regulatory protein or transcription factor binding sites etc. Methods might be specific to a particular genome and use motifs, word-based / grammatical methods, position-specific frequency matrices, discriminative pattern analysis etc.
+ Transcriptional regulatory element prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict translation initiation sites, possibly by searching a database of sites.
+
+
+ Translation initiation site prediction
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identify or predict whole promoters or promoter elements (transcription start sites, RNA polymerase binding site, transcription factor binding sites, promoter enhancers etc) in DNA sequences.
+
+
+ Methods might recognize CG content, CpG islands, splice sites, polyA signals etc.
+ Promoter prediction
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identify, predict or analyse cis-regulatory elements in DNA sequences (TATA box, Pribnow box, SOS box, CAAT box, CCAAT box, operator etc.) or in RNA sequences (e.g. riboswitches).
+ Transcriptional regulatory element prediction (DNA-cis)
+ Transcriptional regulatory element prediction (RNA-cis)
+
+
+ Cis-regulatory elements (cis-elements) regulate the expression of genes located on the same strand from which the element was transcribed. Cis-elements are found in the 5' promoter region of the gene, in an intron, or in the 3' untranslated region. Cis-elements are often binding sites of one or more trans-acting factors. They also occur in RNA sequences, e.g. a riboswitch is a region of an mRNA molecule that bind a small target molecule that regulates the gene's activity.
+ cis-regulatory element prediction
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.19
+
+ Identify, predict or analyse cis-regulatory elements (for example riboswitches) in RNA sequences.
+
+
+ Transcriptional regulatory element prediction (RNA-cis)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identify or predict functional RNA sequences with a gene regulatory role (trans-regulatory elements) or targets.
+ Functional RNA identification
+ Transcriptional regulatory element prediction (trans)
+
+
+ Trans-regulatory elements regulate genes distant from the gene from which they were transcribed.
+ trans-regulatory element prediction
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identify matrix/scaffold attachment regions (MARs/SARs) in DNA sequences.
+ MAR/SAR prediction
+ Matrix/scaffold attachment site prediction
+
+
+ MAR/SAR sites often flank a gene or gene cluster and are found nearby cis-regulatory sequences. They might contribute to transcription regulation.
+ S/MAR prediction
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identify or predict transcription factor binding sites in DNA sequences.
+
+
+ Transcription factor binding site prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identify or predict exonic splicing enhancers (ESE) in exons.
+
+
+ An exonic splicing enhancer (ESE) is 6-base DNA sequence motif in an exon that enhances or directs splicing of pre-mRNA or hetero-nuclear RNA (hnRNA) into mRNA.
+ Exonic splicing enhancer prediction
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Evaluate molecular sequence alignment accuracy.
+ Sequence alignment quality evaluation
+
+
+ Evaluation might be purely sequence-based or use structural information.
+ Sequence alignment validation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse character conservation in a molecular sequence alignment, for example to derive a consensus sequence.
+ Residue conservation analysis
+
+
+ Use this concept for methods that calculate substitution rates, estimate relative site variability, identify sites with biased properties, derive a consensus sequence, or identify highly conserved or very poorly conserved sites, regions, blocks etc.
+ Sequence alignment analysis (conservation)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse correlations between sites in a molecular sequence alignment.
+
+
+ This is typically done to identify possible covarying positions and predict contacts or structural constraints in protein structures.
+ Sequence alignment analysis (site correlation)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Detects chimeric sequences (chimeras) from a sequence alignment.
+ Chimeric sequence detection
+
+
+ A chimera includes regions from two or more phylogenetically distinct sequences. They are usually artifacts of PCR and are thought to occur when a prematurely terminated amplicon reanneals to another DNA strand and is subsequently copied to completion in later PCR cycles.
+ Chimera detection
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Detect recombination (hotspots and coldspots) and identify recombination breakpoints in a sequence alignment.
+ Sequence alignment analysis (recombination detection)
+
+
+ Tools might use a genetic algorithm, quartet-mapping, bootscanning, graphical methods, random forest model and so on.
+ Recombination detection
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identify insertion, deletion and duplication events from a sequence alignment.
+ Indel discovery
+ Sequence alignment analysis (indel detection)
+
+
+ Tools might use a genetic algorithm, quartet-mapping, bootscanning, graphical methods, random forest model and so on.
+ Indel detection
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Predict nucleosome formation potential of DNA sequences.
+
+ Nucleosome formation potential prediction
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate a thermodynamic property of DNA or DNA/RNA, such as melting temperature, enthalpy and entropy.
+
+
+ Nucleic acid thermodynamic property calculation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate and plot a DNA or DNA/RNA melting profile.
+
+
+ A melting profile is used to visualise and analyse partly melted DNA conformations.
+ Nucleic acid melting profile plotting
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate and plot a DNA or DNA/RNA stitch profile.
+
+
+ A stitch profile represents the alternative conformations that partly melted DNA can adopt in a temperature range.
+ Nucleic acid stitch profile plotting
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate and plot a DNA or DNA/RNA melting curve.
+
+
+ Nucleic acid melting curve plotting
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate and plot a DNA or DNA/RNA probability profile.
+
+
+ Nucleic acid probability profile plotting
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate and plot a DNA or DNA/RNA temperature profile.
+
+
+ Nucleic acid temperature profile plotting
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate curvature and flexibility / stiffness of a nucleotide sequence.
+
+
+ This includes properties such as.
+ Nucleic acid curvature calculation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identify or predict microRNA sequences (miRNA) and precursors or microRNA targets / binding sites in a DNA sequence.
+ miRNA prediction
+ microRNA detection
+ microRNA target detection
+
+
+ miRNA target prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identify or predict tRNA genes in genomic sequences (tRNA).
+
+
+ tRNA gene prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Assess binding specificity of putative siRNA sequence(s), for example for a functional assay, typically with respect to designing specific siRNA sequences.
+
+
+ siRNA binding specificity prediction
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.18
+
+ Predict secondary structure of protein sequence(s) using multiple methods to achieve better predictions.
+
+
+ Protein secondary structure prediction (integrated)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict helical secondary structure of protein sequences.
+
+
+ Protein secondary structure prediction (helices)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict turn structure (for example beta hairpin turns) of protein sequences.
+
+
+ Protein secondary structure prediction (turns)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict open coils, non-regular secondary structure and intrinsically disordered / unstructured regions of protein sequences.
+
+
+ Protein secondary structure prediction (coils)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict cysteine bonding state and disulfide bond partners in protein sequences.
+
+
+ Disulfide bond prediction
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Not sustainable to have protein type-specific concepts.
+ 1.19
+
+ Predict G protein-coupled receptors (GPCR).
+
+
+ GPCR prediction
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Not sustainable to have protein type-specific concepts.
+ 1.19
+
+ Analyse G-protein coupled receptor proteins (GPCRs).
+
+
+ GPCR analysis
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict tertiary structure (backbone and side-chain conformation) of protein sequences.
+ Protein folding pathway prediction
+
+
+ This includes methods that predict the folding pathway(s) or non-native structural intermediates of a protein.
+ Protein structure prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict structure of DNA or RNA.
+
+
+ Methods might identify thermodynamically stable or evolutionarily conserved structures.
+ Nucleic acid structure prediction
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict tertiary structure of protein sequence(s) without homologs of known structure.
+ de novo structure prediction
+
+
+ Ab initio structure prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Build a three-dimensional protein model based on known (for example homologs) structures.
+ Comparative modelling
+ Homology modelling
+ Homology structure modelling
+ Protein structure comparative modelling
+
+
+ The model might be of a whole, part or aspect of protein structure. Molecular modelling methods might use sequence-structure alignment, structural templates, molecular dynamics, energy minimisation etc.
+ Protein modelling
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Model the structure of a protein in complex with a small molecule or another macromolecule.
+ Docking simulation
+ Macromolecular docking
+
+
+ This includes protein-protein interactions, protein-nucleic acid, protein-ligand binding etc. Methods might predict whether the molecules are likely to bind in vivo, their conformation when bound, the strength of the interaction, possible mutations to achieve bonding and so on.
+ Molecular docking
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Model protein backbone conformation.
+ Protein modelling (backbone)
+ Design optimization
+ Epitope grafting
+ Scaffold search
+ Scaffold selection
+
+
+ Methods might require a preliminary C(alpha) trace.
+ Scaffold selection, scaffold search, epitope grafting and design optimization are stages of backbone modelling done during rational vaccine design.
+ Backbone modelling
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Model, analyse or edit amino acid side chain conformation in protein structure, optimize side-chain packing, hydrogen bonding etc.
+ Protein modelling (side chains)
+ Antibody optimisation
+ Antigen optimisation
+ Antigen resurfacing
+ Rotamer likelihood prediction
+
+
+ Antibody optimisation is to optimize the antibody-interacting surface of the antigen (epitope). Antigen optimisation is to optimize the antigen-interacting surface of the antibody (paratope). Antigen resurfacing is to resurface the antigen by varying the sequence of non-epitope regions.
+ Methods might use a residue rotamer library.
+ This includes rotamer likelihood prediction: the prediction of rotamer likelihoods for all 20 amino acid types at each position in a protein structure, where output typically includes, for each residue position, the likelihoods for the 20 amino acid types with estimated reliability of the 20 likelihoods.
+ Side chain modelling
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Model loop conformation in protein structures.
+ Protein loop modelling
+ Protein modelling (loops)
+
+
+ Loop modelling
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Model protein-ligand (for example protein-peptide) binding using comparative modelling or other techniques.
+ Ligand-binding simulation
+ Protein-peptide docking
+
+
+ Methods aim to predict the position and orientation of a ligand bound to a protein receptor or enzyme.
+ Virtual screening is used in drug discovery to search libraries of small molecules in order to identify those molecules which are most likely to bind to a drug target (typically a protein receptor or enzyme).
+ Protein-ligand docking
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict or optimise RNA sequences (sequence pools) with likely secondary and tertiary structure for in vitro selection.
+ Nucleic acid folding family identification
+ Structured RNA prediction and optimisation
+
+
+ RNA inverse folding
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Find single nucleotide polymorphisms (SNPs) - single nucleotide change in base positions - between sequences. Typically done for sequences from a high-throughput sequencing experiment that differ from a reference genome and which might, especially by reference to population frequency or functional data, indicate a polymorphism.
+ SNP calling
+ SNP discovery
+ Single nucleotide polymorphism detection
+
+
+ This includes functional SNPs for large-scale genotyping purposes, disease-associated non-synonymous SNPs etc.
+ SNP detection
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Generate a physical (radiation hybrid) map of genetic markers in a DNA sequence using provided radiation hybrid (RH) scores for one or more markers.
+
+
+ Radiation Hybrid Mapping
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Map the genetic architecture of dynamic complex traits.
+
+ This can involve characterisation of the underlying quantitative trait loci (QTLs) or nucleotides (QTNs).
+ Functional mapping
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Infer haplotypes, either alleles at multiple loci that are transmitted together on the same chromosome, or a set of single nucleotide polymorphisms (SNPs) on a single chromatid that are statistically associated.
+ Haplotype inference
+ Haplotype map generation
+ Haplotype reconstruction
+
+
+ Haplotype inference can help in population genetic studies and the identification of complex disease genes, , and is typically based on aligned single nucleotide polymorphism (SNP) fragments. Haplotype comparison is a useful way to characterize the genetic variation between individuals. An individual's haplotype describes which nucleotide base occurs at each position for a set of common SNPs. Tools might use combinatorial functions (for example parsimony) or a likelihood function or model with optimisation such as minimum error correction (MEC) model, expectation-maximisation algorithm (EM), genetic algorithm or Markov chain Monte Carlo (MCMC).
+ Haplotype mapping
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate linkage disequilibrium; the non-random association of alleles or polymorphisms at two or more loci (not necessarily on the same chromosome).
+
+
+ Linkage disequilibrium is identified where a combination of alleles (or genetic markers) occurs more or less frequently in a population than expected by chance formation of haplotypes.
+ Linkage disequilibrium calculation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict genetic code from analysis of codon usage data.
+
+
+ Genetic code prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Render a representation of a distribution that consists of group of data points plotted on a simple scale.
+ Categorical plot plotting
+ Dotplot plotting
+
+
+ Dot plots are useful when having not too many (e.g. 20) data points for each category. Example: draw a dotplot of sequence similarities identified from word-matching or character comparison.
+ Dot plot plotting
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Align exactly two molecular sequences.
+ Pairwise alignment
+
+
+ Methods might perform one-to-one, one-to-many or many-to-many comparisons.
+ Pairwise sequence alignment
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Align more than two molecular sequences.
+ Multiple alignment
+
+
+ This includes methods that use an existing alignment, for example to incorporate sequences into an alignment, or combine several multiple alignments into a single, improved alignment.
+ Multiple sequence alignment
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+
+ Locally align exactly two molecular sequences.
+
+ Local alignment methods identify regions of local similarity.
+ Pairwise sequence alignment generation (local)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+
+ Globally align exactly two molecular sequences.
+
+ Global alignment methods identify similarity across the entire length of the sequences.
+ Pairwise sequence alignment generation (global)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Locally align two or more molecular sequences.
+ Local sequence alignment
+ Sequence alignment (local)
+ Smith-Waterman
+
+
+ Local alignment methods identify regions of local similarity.
+ Local alignment
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Globally align two or more molecular sequences.
+ Global sequence alignment
+ Sequence alignment (global)
+
+
+ Global alignment methods identify similarity across the entire length of the sequences.
+ Global alignment
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.19
+
+ Align two or more molecular sequences with user-defined constraints.
+
+
+ Constrained sequence alignment
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.16
+
+ Align two or more molecular sequences using multiple methods to achieve higher quality.
+
+
+ Consensus-based sequence alignment
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Align multiple sequences using relative gap costs calculated from neighbors in a supplied phylogenetic tree.
+ Multiple sequence alignment (phylogenetic tree-based)
+ Multiple sequence alignment construction (phylogenetic tree-based)
+ Phylogenetic tree-based multiple sequence alignment construction
+ Sequence alignment (phylogenetic tree-based)
+ Sequence alignment generation (phylogenetic tree-based)
+
+
+ This is supposed to give a more biologically meaningful alignment than standard alignments.
+ Tree-based sequence alignment
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Align molecular secondary structure (represented as a 1D string).
+
+ Secondary structure alignment generation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.18
+
+ Align protein secondary structures.
+
+
+ Protein secondary structure alignment generation
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Align RNA secondary structures.
+ RNA secondary structure alignment construction
+ RNA secondary structure alignment generation
+ Secondary structure alignment construction (RNA)
+
+
+ RNA secondary structure alignment
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Align (superimpose) exactly two molecular tertiary structures.
+ Structure alignment (pairwise)
+ Pairwise protein structure alignment
+
+
+ Pairwise structure alignment
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Align (superimpose) more than two molecular tertiary structures.
+ Structure alignment (multiple)
+ Multiple protein structure alignment
+
+
+ This includes methods that use an existing alignment.
+ Multiple structure alignment
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Align protein tertiary structures.
+
+ Structure alignment (protein)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Align RNA tertiary structures.
+
+ Structure alignment (RNA)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+
+ Locally align (superimpose) exactly two molecular tertiary structures.
+
+ Local alignment methods identify regions of local similarity, common substructures etc.
+ Pairwise structure alignment generation (local)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+
+ Globally align (superimpose) exactly two molecular tertiary structures.
+
+ Global alignment methods identify similarity across the entire structures.
+ Pairwise structure alignment generation (global)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Locally align (superimpose) two or more molecular tertiary structures.
+ Structure alignment (local)
+ Local protein structure alignment
+
+
+ Local alignment methods identify regions of local similarity, common substructures etc.
+ Local structure alignment
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Globally align (superimpose) two or more molecular tertiary structures.
+ Structure alignment (global)
+ Global protein structure alignment
+
+
+ Global alignment methods identify similarity across the entire structures.
+ Global structure alignment
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.16
+
+
+
+ Align exactly two molecular profiles.
+
+ Methods might perform one-to-one, one-to-many or many-to-many comparisons.
+ Profile-profile alignment (pairwise)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+
+ Align two or more molecular profiles.
+
+ Sequence alignment generation (multiple profile)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.16
+
+
+
+
+ Align exactly two molecular Structural (3D) profiles.
+
+ 3D profile-to-3D profile alignment (pairwise)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+
+
+ Align two or more molecular 3D profiles.
+
+ Structural profile alignment generation (multiple)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Search and retrieve names of or documentation on bioinformatics tools, for example by keyword or which perform a particular function.
+
+ Data retrieval (tool metadata)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Search and retrieve names of or documentation on bioinformatics databases or query terms, for example by keyword.
+
+ Data retrieval (database metadata)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Predict primers for large scale sequencing.
+
+
+ PCR primer design (for large scale sequencing)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Predict primers for genotyping polymorphisms, for example single nucleotide polymorphisms (SNPs).
+
+
+ PCR primer design (for genotyping polymorphisms)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Predict primers for gene transcription profiling.
+
+
+ PCR primer design (for gene transcription profiling)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Predict primers that are conserved across multiple genomes or species.
+
+
+ PCR primer design (for conserved primers)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Predict primers based on gene structure.
+
+
+ PCR primer design (based on gene structure)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Predict primers for methylation PCRs.
+
+
+ PCR primer design (for methylation PCRs)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Sequence assembly by combining fragments using an existing backbone sequence, typically a reference genome.
+ Sequence assembly (mapping assembly)
+
+
+ The final sequence will resemble the backbone sequence. Mapping assemblers are usually much faster and less memory intensive than de-novo assemblers.
+ Mapping assembly
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Sequence assembly by combining fragments without the aid of a reference sequence or genome.
+ De Bruijn graph
+ Sequence assembly (de-novo assembly)
+
+
+ De-novo assemblers are much slower and more memory intensive than mapping assemblers.
+ De-novo assembly
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The process of assembling many short DNA sequences together such that they represent the original chromosomes from which the DNA originated.
+ Genomic assembly
+ Sequence assembly (genome assembly)
+ Breakend assembly
+
+
+ Genome assembly
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Sequence assembly for EST sequences (transcribed mRNA).
+ Sequence assembly (EST assembly)
+
+
+ Assemblers must handle (or be complicated by) alternative splicing, trans-splicing, single-nucleotide polymorphism (SNP), recoding, and post-transcriptional modification.
+ EST assembly
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Make sequence tag to gene assignments (tag mapping) of SAGE, MPSS and SBS data.
+ Tag to gene assignment
+
+
+ Sequence tag mapping assigns experimentally obtained sequence tags to known transcripts or annotate potential virtual sequence tags in a genome.
+ Sequence tag mapping
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Process (read and / or write) serial analysis of gene expression (SAGE) data.
+
+ SAGE data processing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Process (read and / or write) massively parallel signature sequencing (MPSS) data.
+
+ MPSS data processing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Process (read and / or write) sequencing by synthesis (SBS) data.
+
+ SBS data processing
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Generate a heat map of expression data from e.g. microarray data.
+ Heat map construction
+ Heatmap generation
+
+
+ The heat map usually uses a coloring scheme to represent expression values. They can show how quantitative measurements were influenced by experimental conditions.
+ Heat map generation
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Analyse one or more gene expression profiles, typically to interpret them in functional terms.
+
+ Gene expression profile analysis
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Map an expression profile to known biological pathways, for example, to identify or reconstruct a pathway.
+ Pathway mapping
+ Gene expression profile pathway mapping
+ Gene to pathway mapping
+ Gene-to-pathway mapping
+
+
+ Expression profile pathway mapping
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.18
+
+ Assign secondary structure from protein coordinate data.
+
+
+ Protein secondary structure assignment (from coordinate data)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.18
+
+ Assign secondary structure from circular dichroism (CD) spectroscopic data.
+
+
+ Protein secondary structure assignment (from CD data)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.7
+
+ Assign a protein tertiary structure (3D coordinates) from raw X-ray crystallography data.
+
+
+ Protein structure assignment (from X-ray crystallographic data)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.7
+
+ Assign a protein tertiary structure (3D coordinates) from raw NMR spectroscopy data.
+
+
+ Protein structure assignment (from NMR data)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Construct a phylogenetic tree from a specific type of data.
+ Phylogenetic tree construction (data centric)
+ Phylogenetic tree generation (data centric)
+
+
+ Subconcepts of this concept reflect different types of data used to generate a tree, and provide an alternate axis for curation.
+ Phylogenetic inference (data centric)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Construct a phylogenetic tree using a specific method.
+ Phylogenetic tree construction (method centric)
+ Phylogenetic tree generation (method centric)
+
+
+ Subconcepts of this concept reflect different computational methods used to generate a tree, and provide an alternate axis for curation.
+ Phylogenetic inference (method centric)
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Phylogenetic tree construction from molecular sequences.
+ Phylogenetic tree construction (from molecular sequences)
+ Phylogenetic tree generation (from molecular sequences)
+
+
+ Methods typically compare multiple molecular sequence and estimate evolutionary distances and relationships to infer gene families or make functional predictions.
+ Phylogenetic inference (from molecular sequences)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Phylogenetic tree construction from continuous quantitative character data.
+ Phylogenetic tree construction (from continuous quantitative characters)
+ Phylogenetic tree generation (from continuous quantitative characters)
+
+
+ Phylogenetic inference (from continuous quantitative characters)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Phylogenetic tree construction from gene frequency data.
+ Phylogenetic tree construction (from gene frequencies)
+ Phylogenetic tree generation (from gene frequencies)
+
+
+ Phylogenetic inference (from gene frequencies)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Phylogenetic tree construction from polymorphism data including microsatellites, RFLP (restriction fragment length polymorphisms), RAPD (random-amplified polymorphic DNA) and AFLP (amplified fragment length polymorphisms) data.
+ Phylogenetic tree construction (from polymorphism data)
+ Phylogenetic tree generation (from polymorphism data)
+
+
+ Phylogenetic inference (from polymorphism data)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Construct a phylogenetic species tree, for example, from a genome-wide sequence comparison.
+ Phylogenetic species tree construction
+ Phylogenetic species tree generation
+
+
+ Species tree construction
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Construct a phylogenetic tree by computing a sequence alignment and searching for the tree with the fewest number of character-state changes from the alignment.
+ Phylogenetic tree construction (parsimony methods)
+ Phylogenetic tree generation (parsimony methods)
+
+
+ This includes evolutionary parsimony (invariants) methods.
+ Phylogenetic inference (parsimony methods)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Construct a phylogenetic tree by computing (or using precomputed) distances between sequences and searching for the tree with minimal discrepancies between pairwise distances.
+ Phylogenetic tree construction (minimum distance methods)
+ Phylogenetic tree generation (minimum distance methods)
+
+
+ This includes neighbor joining (NJ) clustering method.
+ Phylogenetic inference (minimum distance methods)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Construct a phylogenetic tree by relating sequence data to a hypothetical tree topology using a model of sequence evolution.
+ Phylogenetic tree construction (maximum likelihood and Bayesian methods)
+ Phylogenetic tree generation (maximum likelihood and Bayesian methods)
+
+
+ Maximum likelihood methods search for a tree that maximizes a likelihood function, i.e. that is most likely given the data and model. Bayesian analysis estimate the probability of tree for branch lengths and topology, typically using a Monte Carlo algorithm.
+ Phylogenetic inference (maximum likelihood and Bayesian methods)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Construct a phylogenetic tree by computing four-taxon trees (4-trees) and searching for the phylogeny that matches most closely.
+ Phylogenetic tree construction (quartet methods)
+ Phylogenetic tree generation (quartet methods)
+
+
+ Phylogenetic inference (quartet methods)
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Construct a phylogenetic tree by using artificial-intelligence methods, for example genetic algorithms.
+ Phylogenetic tree construction (AI methods)
+ Phylogenetic tree generation (AI methods)
+
+
+ Phylogenetic inference (AI methods)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identify a plausible model of DNA substitution that explains a molecular (DNA or protein) sequence alignment.
+ Nucleotide substitution modelling
+
+
+ DNA substitution modelling
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse the shape (topology) of a phylogenetic tree.
+ Phylogenetic tree analysis (shape)
+
+
+ Phylogenetic tree topology analysis
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Apply bootstrapping or other measures to estimate confidence of a phylogenetic tree.
+
+
+ Phylogenetic tree bootstrapping
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Construct a "gene tree" which represents the evolutionary history of the genes included in the study. This can be used to predict families of genes and gene function based on their position in a phylogenetic tree.
+ Phylogenetic tree analysis (gene family prediction)
+
+
+ Gene trees can provide evidence for gene duplication events, as well as speciation events. Where sequences from different homologs are included in a gene tree, subsequent clustering of the orthologs can demonstrate evolutionary history of the orthologs.
+ Gene tree construction
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse a phylogenetic tree to identify allele frequency distribution and change that is subject to evolutionary pressures (natural selection, genetic drift, mutation and gene flow). Identify type of natural selection (such as stabilizing, balancing or disruptive).
+ Phylogenetic tree analysis (natural selection)
+
+
+ Stabilizing/purifying (directional) selection favors a single phenotype and tends to decrease genetic diversity as a population stabilizes on a particular trait, selecting out trait extremes or deleterious mutations. In contrast, balancing selection maintain genetic polymorphisms (or multiple alleles), whereas disruptive (or diversifying) selection favors individuals at both extremes of a trait.
+ Allele frequency distribution analysis
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Compare two or more phylogenetic trees to produce a consensus tree.
+ Phylogenetic tree construction (consensus)
+ Phylogenetic tree generation (consensus)
+
+
+ Methods typically test for topological similarity between trees using for example a congruence index.
+ Consensus tree construction
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Compare two or more phylogenetic trees to detect subtrees or supertrees.
+ Phylogenetic sub/super tree detection
+ Subtree construction
+ Supertree construction
+
+
+ Phylogenetic sub/super tree construction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Compare two or more phylogenetic trees to calculate distances between trees.
+
+
+ Phylogenetic tree distances calculation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Annotate a phylogenetic tree with terms from a controlled vocabulary.
+
+
+ Phylogenetic tree annotation
+ http://www.evolutionaryontology.org/cdao.owl#CDAOAnnotation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Predict and optimise peptide ligands that elicit an immunological response.
+
+
+ Immunogenicity prediction
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict or optimise DNA to elicit (via DNA vaccination) an immunological response.
+
+
+ DNA vaccine design
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Reformat (a file or other report of) molecular sequence(s).
+
+
+ Sequence formatting
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Reformat (a file or other report of) molecular sequence alignment(s).
+
+
+ Sequence alignment formatting
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Reformat a codon usage table.
+
+
+ Codon usage table formatting
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Visualise, format or render a molecular sequence or sequences such as a sequence alignment, possibly with sequence features or properties shown.
+ Sequence rendering
+ Sequence alignment visualisation
+
+
+ Sequence visualisation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.15
+
+ Visualise, format or print a molecular sequence alignment.
+
+
+ Sequence alignment visualisation
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Visualise, format or render sequence clusters.
+ Sequence cluster rendering
+
+
+ Sequence cluster visualisation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Render or visualise a phylogenetic tree.
+ Phylogenetic tree rendering
+
+
+ Phylogenetic tree visualisation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.15
+
+ Visualise RNA secondary structure, knots, pseudoknots etc.
+
+
+ RNA secondary structure visualisation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.15
+
+ Render and visualise protein secondary structure.
+
+
+ Protein secondary structure visualisation
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Visualise or render molecular 3D structure, for example a high-quality static picture or animation.
+ Structure rendering
+ Protein secondary structure visualisation
+ RNA secondary structure visualisation
+
+
+ This includes visualisation of protein secondary structure such as knots, pseudoknots etc. as well as tertiary and quaternary structure.
+ Structure visualisation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Visualise microarray or other expression data.
+ Expression data rendering
+ Gene expression data visualisation
+ Microarray data rendering
+
+
+ Expression data visualisation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.19
+
+ Identify and analyse networks of protein interactions.
+
+
+ Protein interaction network visualisation
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Draw or visualise a DNA map.
+ DNA map drawing
+ Map rendering
+
+
+ Map drawing
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Render a sequence with motifs.
+
+ Sequence motif rendering
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Draw or visualise restriction maps in DNA sequences.
+
+
+ Restriction map drawing
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Draw a linear maps of DNA.
+
+ DNA linear map rendering
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ DNA circular map rendering
+ Draw a circular maps of DNA, for example a plasmid map.
+
+
+ Plasmid map drawing
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Visualise operon structure etc.
+ Operon rendering
+
+
+ Operon drawing
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Identify folding families of related RNAs.
+
+ Nucleic acid folding family identification
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.20
+
+ Compute energies of nucleic acid folding, e.g. minimum folding energies for DNA or RNA sequences or energy landscape of RNA mutants.
+
+
+ Nucleic acid folding energy calculation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Retrieve existing annotation (or documentation), typically annotation on a database entity.
+
+ Use this concepts for tools which retrieve pre-existing annotations, not for example prediction methods that might make annotations.
+ Annotation retrieval
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict the biological or biochemical role of a protein, or other aspects of a protein function.
+ Protein function analysis
+ Protein functional analysis
+
+
+ For functional properties that can be mapped to a sequence, use 'Sequence feature detection (protein)' instead.
+ Protein function prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Compare the functional properties of two or more proteins.
+
+
+ Protein function comparison
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Submit a molecular sequence to a database.
+
+ Sequence submission
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse a known network of gene regulation.
+ Gene regulatory network comparison
+ Gene regulatory network modelling
+ Regulatory network comparison
+ Regulatory network modelling
+
+
+ Gene regulatory network analysis
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ WHATIF:UploadPDB
+ Parse, prepare or load a user-specified data file so that it is available for use.
+ Data loading
+ Loading
+
+
+ Parsing
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Query a sequence data resource (typically a database) and retrieve sequences and / or annotation.
+
+ This includes direct retrieval methods (e.g. the dbfetch program) but not those that perform calculations on the sequence.
+ Sequence retrieval
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ WHATIF:DownloadPDB
+ WHATIF:EchoPDB
+ Query a tertiary structure data resource (typically a database) and retrieve structures, structure-related data and annotation.
+
+ This includes direct retrieval methods but not those that perform calculations on the sequence or structure.
+ Structure retrieval
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ WHATIF:GetSurfaceDots
+ Calculate the positions of dots that are homogeneously distributed over the surface of a molecule.
+
+
+ A dot has three coordinates (x,y,z) and (typically) a color.
+ Surface rendering
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Calculate the solvent accessibility ('accessible surface') for each atom in a structure.
+
+
+ Waters are not considered.
+ Protein atom surface calculation (accessible)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Calculate the solvent accessibility ('accessible molecular surface') for each atom in a structure.
+
+
+ Waters are not considered.
+ Protein atom surface calculation (accessible molecular)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Calculate the solvent accessibility ('accessible surface') for each residue in a structure.
+
+
+ Solvent accessibility might be calculated for the backbone, sidechain and total (backbone plus sidechain).
+ Protein residue surface calculation (accessible)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Calculate the solvent accessibility ('vacuum accessible surface') for each residue in a structure. This is the accessibility of the residue when taken out of the protein together with the backbone atoms of any residue it is covalently bound to.
+
+
+ Solvent accessibility might be calculated for the backbone, sidechain and total (backbone plus sidechain).
+ Protein residue surface calculation (vacuum accessible)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Calculate the solvent accessibility ('accessible molecular surface') for each residue in a structure.
+
+
+ Solvent accessibility might be calculated for the backbone, sidechain and total (backbone plus sidechain).
+ Protein residue surface calculation (accessible molecular)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Calculate the solvent accessibility ('vacuum molecular surface') for each residue in a structure. This is the accessibility of the residue when taken out of the protein together with the backbone atoms of any residue it is covalently bound to.
+
+
+ Solvent accessibility might be calculated for the backbone, sidechain and total (backbone plus sidechain).
+ Protein residue surface calculation (vacuum molecular)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Calculate the solvent accessibility ('accessible molecular surface') for a structure as a whole.
+
+
+ Protein surface calculation (accessible molecular)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Calculate the solvent accessibility ('accessible surface') for a structure as a whole.
+
+
+ Protein surface calculation (accessible)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Calculate for each residue in a protein structure all its backbone torsion angles.
+
+
+ Backbone torsion angle calculation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Calculate for each residue in a protein structure all its torsion angles.
+
+
+ Full torsion angle calculation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Calculate for each cysteine (bridge) all its torsion angles.
+
+
+ Cysteine torsion angle calculation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ For each amino acid in a protein structure calculate the backbone angle tau.
+
+
+ Tau is the backbone angle N-Calpha-C (angle over the C-alpha).
+ Tau angle calculation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ WHATIF:ShowCysteineBridge
+ Detect cysteine bridges (from coordinate data) in a protein structure.
+
+
+ Cysteine bridge detection
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ WHATIF:ShowCysteineFree
+ Detect free cysteines in a protein structure.
+
+
+ A free cysteine is neither involved in a cysteine bridge, nor functions as a ligand to a metal.
+ Free cysteine detection
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ WHATIF:ShowCysteineMetal
+ Detect cysteines that are bound to metal in a protein structure.
+
+
+ Metal-bound cysteine detection
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Calculate protein residue contacts with nucleic acids in a structure.
+
+
+ Residue contact calculation (residue-nucleic acid)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate protein residue contacts with metal in a structure.
+ Residue-metal contact calculation
+
+
+ Protein-metal contact calculation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Calculate ion contacts in a structure (all ions for all side chain atoms).
+
+
+ Residue contact calculation (residue-negative ion)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ WHATIF:ShowBumps
+ Detect 'bumps' between residues in a structure, i.e. those with pairs of atoms whose Van der Waals' radii interpenetrate more than a defined distance.
+
+
+ Residue bump detection
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ WHATIF:SymmetryContact
+ Calculate the number of symmetry contacts made by residues in a protein structure.
+
+
+ A symmetry contact is a contact between two atoms in different asymmetric unit.
+ Residue symmetry contact calculation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Calculate contacts between residues and ligands in a protein structure.
+
+
+ Residue contact calculation (residue-ligand)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ WHATIF:HasSaltBridge
+ WHATIF:HasSaltBridgePlus
+ WHATIF:ShowSaltBridges
+ WHATIF:ShowSaltBridgesH
+ Calculate (and possibly score) salt bridges in a protein structure.
+
+
+ Salt bridges are interactions between oppositely charged atoms in different residues. The output might include the inter-atomic distance.
+ Salt bridge calculation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ WHATIF:ShowLikelyRotamers
+ WHATIF:ShowLikelyRotamers100
+ WHATIF:ShowLikelyRotamers200
+ WHATIF:ShowLikelyRotamers300
+ WHATIF:ShowLikelyRotamers400
+ WHATIF:ShowLikelyRotamers500
+ WHATIF:ShowLikelyRotamers600
+ WHATIF:ShowLikelyRotamers700
+ WHATIF:ShowLikelyRotamers800
+ WHATIF:ShowLikelyRotamers900
+ Predict rotamer likelihoods for all 20 amino acid types at each position in a protein structure.
+
+
+ Output typically includes, for each residue position, the likelihoods for the 20 amino acid types with estimated reliability of the 20 likelihoods.
+ Rotamer likelihood prediction
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ WHATIF:ProlineMutationValue
+ Calculate for each position in a protein structure the chance that a proline, when introduced at this position, would increase the stability of the whole protein.
+
+
+ Proline mutation value calculation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ WHATIF: PackingQuality
+ Identify poorly packed residues in protein structures.
+
+
+ Residue packing validation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ WHATIF: ImproperQualityMax
+ WHATIF: ImproperQualitySum
+ Validate protein geometry, for example bond lengths, bond angles, torsion angles, chiralities, planaraties etc. An example is validation of a Ramachandran plot of a protein structure.
+ Ramachandran plot validation
+
+
+ Protein geometry validation
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+ WHATIF: PDB_sequence
+ Extract a molecular sequence from a PDB file.
+
+
+ PDB file sequence retrieval
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Identify HET groups in PDB files.
+
+
+ A HET group usually corresponds to ligands, lipids, but might also (not consistently) include groups that are attached to amino acids. Each HET group is supposed to have a unique three letter code and a unique name which might be given in the output.
+ HET group detection
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Determine for residue the DSSP determined secondary structure in three-state (HSC).
+
+ DSSP secondary structure assignment
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ WHATIF: PDBasXML
+ Reformat (a file or other report of) tertiary structure data.
+
+
+ Structure formatting
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Assign cysteine bonding state and disulfide bond partners in protein structures.
+
+
+ Protein cysteine and disulfide bond assignment
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Identify poor quality amino acid positions in protein structures.
+
+
+ Residue validation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ WHATIF:MovedWaterPDB
+ Query a tertiary structure database and retrieve water molecules.
+
+ Structure retrieval (water)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identify or predict siRNA duplexes in RNA.
+
+
+ siRNA duplex prediction
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Refine an existing sequence alignment.
+
+
+ Sequence alignment refinement
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Process an EMBOSS listfile (list of EMBOSS Uniform Sequence Addresses).
+
+ Listfile processing
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Perform basic (non-analytical) operations on a report or file of sequences (which might include features), such as file concatenation, removal or ordering of sequences, creation of subset or a new file of sequences.
+
+
+ Sequence file editing
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Perform basic (non-analytical) operations on a sequence alignment file, such as copying or removal and ordering of sequences.
+
+ Sequence alignment file processing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Process (read and / or write) physicochemical property data for small molecules.
+
+ Small molecule data processing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Search and retrieve documentation on a bioinformatics ontology.
+
+ Data retrieval (ontology annotation)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Query an ontology and retrieve concepts or relations.
+
+ Data retrieval (ontology concept)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identify a representative sequence from a set of sequences, typically using scores from pair-wise alignment or other comparison of the sequences.
+
+
+ Representative sequence identification
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Perform basic (non-analytical) operations on a file of molecular tertiary structural data.
+
+ Structure file processing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Query a profile data resource and retrieve one or more profile(s) and / or associated annotation.
+
+ This includes direct retrieval methods that retrieve a profile by, e.g. the profile name.
+ Data retrieval (sequence profile)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Perform a statistical data operation of some type, e.g. calibration or validation.
+ Significance testing
+ Statistical analysis
+ Statistical test
+ Statistical testing
+ Expectation maximisation
+ Gibbs sampling
+ Hypothesis testing
+ Omnibus test
+
+
+ Statistical calculation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate a 3D-1D scoring matrix from analysis of protein sequence and structural data.
+ 3D-1D scoring matrix construction
+
+
+ A 3D-1D scoring matrix scores the probability of amino acids occurring in different structural environments.
+ 3D-1D scoring matrix generation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Visualise transmembrane proteins, typically the transmembrane regions within a sequence.
+ Transmembrane protein rendering
+
+
+ Transmembrane protein visualisation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ An operation performing purely illustrative (pedagogical) purposes.
+
+ Demonstration
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Query a biological pathways database and retrieve annotation on one or more pathways.
+
+ Data retrieval (pathway or network)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Query a database and retrieve one or more data identifiers.
+
+ Data retrieval (identifier)
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate a density plot (of base composition) for a nucleotide sequence.
+
+
+ Nucleic acid density plotting
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse one or more known molecular sequences.
+ Sequence analysis (general)
+
+
+ Sequence analysis
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse molecular sequence motifs.
+ Sequence motif processing
+
+
+ Sequence motif analysis
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Process (read and / or write) protein interaction data.
+
+ Protein interaction data processing
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse protein structural data.
+ Structure analysis (protein)
+
+
+ Protein structure analysis
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Process (read and / or write) annotation of some type, typically annotation on an entry from a biological or biomedical database entity.
+
+ Annotation processing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Analyse features in molecular sequences.
+
+ Sequence feature analysis
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Basic (non-analytical) operations of some data, either a file or equivalent entity in memory, such that the same basic type of data is consumed as input and generated as output.
+ File handling
+ File processing
+ Report handling
+ Utility operation
+ Processing
+
+
+ Data handling
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Analyse gene expression and regulation data.
+
+ Gene expression analysis
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Process (read and / or write) one or more structural (3D) profile(s) or template(s) of some type.
+
+ Structural profile processing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Process (read and / or write) an index of (typically a file of) biological data.
+
+ Data index processing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Process (read and / or write) some type of sequence profile.
+
+ Sequence profile processing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.22
+
+ Analyse protein function, typically by processing protein sequence and/or structural data, and generate an informative report.
+
+
+ Protein function analysis
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse, simulate or predict protein folding, typically by processing sequence and / or structural data. For example, predict sites of nucleation or stabilisation key to protein folding.
+ Protein folding modelling
+ Protein folding simulation
+ Protein folding site prediction
+
+
+ Protein folding analysis
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse protein secondary structure data.
+ Secondary structure analysis (protein)
+
+
+ Protein secondary structure analysis
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Process (read and / or write) data on the physicochemical property of a molecule.
+
+ Physicochemical property data processing
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict oligonucleotide primers or probes.
+ Primer and probe prediction
+
+
+ Primer and probe design
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Process (read and / or write) data of a specific type, for example applying analytical methods.
+
+
+ Operation (typed)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Search a database (or other data resource) with a supplied query and retrieve entries (or parts of entries) that are similar to the query.
+ Search
+
+
+ Typically the query is compared to each entry and high scoring matches (hits) are returned. For example, a BLAST search of a sequence database.
+ Database search
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Retrieve an entry (or part of an entry) from a data resource that matches a supplied query. This might include some primary data and annotation. The query is a data identifier or other indexed term. For example, retrieve a sequence record with the specified accession number, or matching supplied keywords.
+ Data extraction
+ Retrieval
+ Data retrieval (metadata)
+ Metadata retrieval
+
+
+ Data retrieval
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Predict, recognise, detect or identify some properties of a biomolecule.
+ Detection
+ Prediction
+ Recognition
+
+
+ Prediction and recognition
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Compare two or more things to identify similarities.
+
+
+ Comparison
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Refine or optimise some data model.
+
+
+ Optimisation and refinement
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Model or simulate some biological entity or system, typically using mathematical techniques including dynamical systems, statistical models, differential equations, and game theoretic models.
+ Mathematical modelling
+
+
+ Modelling and simulation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+ Perform basic operations on some data or a database.
+
+
+ Data handling
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Validate some data.
+ Quality control
+
+
+ Validation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Map properties to positions on an biological entity (typically a molecular sequence or structure), or assemble such an entity from constituent parts.
+ Cartography
+
+
+ Mapping
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Design a biological entity (typically a molecular sequence or structure) with specific properties.
+
+
+ Design
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Process (read and / or write) microarray data.
+
+ Microarray data processing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.18
+
+
+
+ Process (read and / or write) a codon usage table.
+
+ Codon usage table processing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Retrieve a codon usage table and / or associated annotation.
+
+ Data retrieval (codon usage table)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Process (read and / or write) a gene expression profile.
+
+ Gene expression profile processing
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Gene set testing
+ Identify classes of genes or proteins that are over or under-represented in a large set of genes or proteins. For example analysis of a set of genes corresponding to a gene expression profile, annotated with Gene Ontology (GO) concepts, where eventual over-/under-representation of certain GO concept within the studied set of genes is revealed.
+ Functional enrichment analysis
+ GSEA
+ Gene-set over-represenation analysis
+ Gene set analysis
+ GO-term enrichment
+ Gene Ontology concept enrichment
+ Gene Ontology term enrichment
+
+
+ "Gene set analysis" (often used interchangeably or in an overlapping sense with "gene-set enrichment analysis") refers to the functional analysis (term enrichment) of a differentially expressed set of genes, rather than all genes analysed.
+ Analyse gene expression patterns to identify sets of genes that are associated with a specific trait, condition, clinical outcome etc.
+ Gene sets can be defined beforehand by biological function, chromosome locations and so on.
+ The Gene Ontology (GO) is typically used, the input is a set of Gene IDs, and the output of the analysis is typically a ranked list of GO concepts, each associated with a p-value.
+ Gene-set enrichment analysis
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict a network of gene regulation.
+
+
+ Gene regulatory network prediction
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+
+
+ Generate, analyse or handle a biological pathway or network.
+
+ Pathway or network processing
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Process (read and / or write) RNA secondary structure data.
+
+
+ RNA secondary structure analysis
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+ Process (read and / or write) RNA tertiary structure data.
+
+
+ Structure processing (RNA)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict RNA tertiary structure.
+
+
+ RNA structure prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict DNA tertiary structure.
+
+
+ DNA structure prediction
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Generate, process or analyse phylogenetic tree or trees.
+
+
+ Phylogenetic tree processing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Process (read and / or write) protein secondary structure data.
+
+ Protein secondary structure processing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Process (read and / or write) a network of protein interactions.
+
+ Protein interaction network processing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Process (read and / or write) one or more molecular sequences and associated annotation.
+
+ Sequence processing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+ Process (read and / or write) a protein sequence and associated annotation.
+
+
+ Sequence processing (protein)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Process (read and / or write) a nucleotide sequence and associated annotation.
+
+ Sequence processing (nucleic acid)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Compare two or more molecular sequences.
+
+
+ Sequence comparison
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Process (read and / or write) a sequence cluster.
+
+ Sequence cluster processing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Process (read and / or write) a sequence feature table.
+
+ Feature table processing
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Detect, predict and identify genes or components of genes in DNA sequences, including promoters, coding regions, splice sites, etc.
+ Gene calling
+ Gene finding
+ Whole gene prediction
+
+
+ Includes methods that predict whole gene structure using a combination of multiple methods to achieve better predictions.
+ Methods for gene prediction might be ab initio, based on phylogenetic comparisons, use motifs, sequence features, support vector machine, alignment etc.
+ Gene prediction
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.16
+
+ Classify G-protein coupled receptors (GPCRs) into families and subfamilies.
+
+
+ GPCR classification
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Not sustainable to have protein type-specific concepts.
+ 1.19
+
+
+ Predict G-protein coupled receptor (GPCR) coupling selectivity.
+
+ GPCR coupling selectivity prediction
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+ Process (read and / or write) a protein tertiary structure.
+
+
+ Structure processing (protein)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Calculate the solvent accessibility for each atom in a structure.
+
+
+ Waters are not considered.
+ Protein atom surface calculation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Calculate the solvent accessibility for each residue in a structure.
+
+
+ Protein residue surface calculation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Calculate the solvent accessibility of a structure as a whole.
+
+
+ Protein surface calculation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Process (read and / or write) a molecular sequence alignment.
+
+ Sequence alignment processing
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identify or predict protein-protein binding sites.
+ Protein-protein binding site detection
+
+
+ Protein-protein binding site prediction
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Process (read and / or write) a molecular tertiary structure.
+
+ Structure processing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Annotate a DNA map of some type with terms from a controlled vocabulary.
+
+ Map annotation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Retrieve information on a protein.
+
+ Data retrieval (protein annotation)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Retrieve a phylogenetic tree from a data resource.
+
+ Data retrieval (phylogenetic tree)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Retrieve information on a protein interaction.
+
+ Data retrieval (protein interaction annotation)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Retrieve information on a protein family.
+
+ Data retrieval (protein family annotation)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Retrieve information on an RNA family.
+
+ Data retrieval (RNA family annotation)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Retrieve information on a specific gene.
+
+ Data retrieval (gene annotation)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Retrieve information on a specific genotype or phenotype.
+
+ Data retrieval (genotype and phenotype annotation)
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Compare the architecture of two or more protein structures.
+
+
+ Protein architecture comparison
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identify the architecture of a protein structure.
+
+
+ Includes methods that try to suggest the most likely biological unit for a given protein X-ray crystal structure based on crystal symmetry and scoring of putative protein-protein interfaces.
+ Protein architecture recognition
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The simulation of molecular (typically protein) conformation using a computational model of physical forces and computer simulation.
+ Molecular dynamics simulation
+ Protein dynamics
+
+
+ Molecular dynamics
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse a nucleic acid sequence (using methods that are only applicable to nucleic acid sequences).
+ Sequence analysis (nucleic acid)
+ Nucleic acid sequence alignment analysis
+ Sequence alignment analysis (nucleic acid)
+
+
+ Nucleic acid sequence analysis
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse a protein sequence (using methods that are only applicable to protein sequences).
+ Sequence analysis (protein)
+ Protein sequence alignment analysis
+ Sequence alignment analysis (protein)
+
+
+ Protein sequence analysis
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse known molecular tertiary structures.
+
+
+ Structure analysis
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse nucleic acid tertiary structural data.
+
+
+ Nucleic acid structure analysis
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Process (read and / or write) a molecular secondary structure.
+
+ Secondary structure processing
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Compare two or more molecular tertiary structures.
+
+
+ Structure comparison
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Render a helical wheel representation of protein secondary structure.
+ Helical wheel rendering
+
+
+ Helical wheel drawing
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Render a topology diagram of protein secondary structure.
+ Topology diagram rendering
+
+
+ Topology diagram drawing
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Compare protein tertiary structures.
+ Structure comparison (protein)
+
+
+ Methods might identify structural neighbors, find structural similarities or define a structural core.
+ Protein structure comparison
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Compare protein secondary structures.
+ Protein secondary structure
+ Secondary structure comparison (protein)
+ Protein secondary structure alignment
+
+
+ Protein secondary structure comparison
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict the subcellular localisation of a protein sequence.
+ Protein cellular localization prediction
+ Protein subcellular localisation prediction
+ Protein targeting prediction
+
+
+ The prediction might include subcellular localisation (nuclear, cytoplasmic, mitochondrial, chloroplast, plastid, membrane etc) or export (extracellular proteins) of a protein.
+ Subcellular localisation prediction
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Calculate contacts between residues in a protein structure.
+
+
+ Residue contact calculation (residue-residue)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Identify potential hydrogen bonds between amino acid residues.
+
+
+ Hydrogen bond calculation (inter-residue)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict the interactions of proteins with other proteins.
+ Protein-protein interaction detection
+ Protein-protein binding prediction
+ Protein-protein interaction prediction
+
+
+ Protein interaction prediction
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Process (read and / or write) codon usage data.
+
+ Codon usage data processing
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Process (read and/or write) expression data from experiments measuring molecules (e.g. omics data), including analysis of one or more expression profiles, typically to interpret them in functional terms.
+ Expression data analysis
+ Gene expression analysis
+ Gene expression data analysis
+ Gene expression regulation analysis
+ Metagenomic inference
+ Microarray data analysis
+ Protein expression analysis
+
+
+ Metagenomic inference is the profiling of phylogenetic marker genes in order to predict metagenome function.
+ Expression analysis
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+ Process (read and / or write) a network of gene regulation.
+
+
+ Gene regulatory network processing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Notions of pathway and network were mixed up, EDAM 1.24 disentangles them.
+ 1.24
+
+
+
+ Generate, process or analyse a biological pathway or network.
+
+ Pathway or network analysis
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Analyse SAGE, MPSS or SBS experimental data, typically to identify or quantify mRNA transcripts.
+
+ Sequencing-based expression profile data analysis
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Predict, analyse, characterize or model splice sites, splicing events and so on, typically by comparing multiple nucleic acid sequences.
+ Splicing model analysis
+
+
+ Splicing analysis
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Analyse raw microarray data.
+
+ Microarray raw data analysis
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.19
+
+
+
+ Process (read and / or write) nucleic acid sequence or structural data.
+
+ Nucleic acid analysis
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.19
+
+
+
+ Process (read and / or write) protein sequence or structural data.
+
+ Protein analysis
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+ Process (read and / or write) molecular sequence data.
+
+
+ Sequence data processing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Process (read and / or write) molecular structural data.
+
+ Structural data processing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+
+ Process (read and / or write) text.
+
+ Text processing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.18
+
+
+
+ Analyse a protein sequence alignment, typically to detect features or make predictions.
+
+
+ Protein sequence alignment analysis
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.18
+
+
+
+ Analyse a protein sequence alignment, typically to detect features or make predictions.
+
+
+ Nucleic acid sequence alignment analysis
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.18
+
+
+
+ Compare two or more nucleic acid sequences.
+
+
+ Nucleic acid sequence comparison
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.18
+
+ Compare two or more protein sequences.
+
+
+ Protein sequence comparison
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Back-translate a protein sequence into DNA.
+
+
+ DNA back-translation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Edit or change a nucleic acid sequence, either randomly or specifically.
+
+
+ Sequence editing (nucleic acid)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Edit or change a protein sequence, either randomly or specifically.
+
+
+ Sequence editing (protein)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.22
+
+ Generate a nucleic acid sequence by some means.
+
+
+ Sequence generation (nucleic acid)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.22
+
+ Generate a protein sequence by some means.
+
+
+ Sequence generation (protein)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Visualise, format or render a nucleic acid sequence.
+
+
+ Various nucleic acid sequence analysis methods might generate a sequence rendering but are not (for brevity) listed under here.
+ Nucleic acid sequence visualisation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Visualise, format or render a protein sequence.
+
+
+ Various protein sequence analysis methods might generate a sequence rendering but are not (for brevity) listed under here.
+ Protein sequence visualisation
+ true
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Compare nucleic acid tertiary structures.
+ Structure comparison (nucleic acid)
+
+
+ Nucleic acid structure comparison
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Process (read and / or write) nucleic acid tertiary structure data.
+
+ Structure processing (nucleic acid)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Generate a map of a DNA sequence annotated with positional or non-positional features of some type.
+
+
+ DNA mapping
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Process (read and / or write) a DNA map of some type.
+
+ Map data processing
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse the hydrophobic, hydrophilic or charge properties of a protein (from analysis of sequence or structural information).
+
+
+ Protein hydropathy calculation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Identify or predict catalytic residues, active sites or other ligand-binding sites in protein sequences or structures.
+ Protein binding site detection
+ Protein binding site prediction
+
+
+ Binding site prediction
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Build clusters of similar structures, typically using scores from structural alignment methods.
+ Structural clustering
+
+
+ Structure clustering
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Generate a physical DNA map (sequence map) from analysis of sequence tagged sites (STS).
+ Sequence mapping
+
+
+ An STS is a short subsequence of known sequence and location that occurs only once in the chromosome or genome that is being mapped. Sources of STSs include 1. expressed sequence tags (ESTs), simple sequence length polymorphisms (SSLPs), and random genomic sequences from cloned genomic DNA or database sequences.
+ Sequence tagged site (STS) mapping
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Compare two or more entities, typically the sequence or structure (or derivatives) of macromolecules, to identify equivalent subunits.
+ Alignment construction
+ Alignment generation
+
+
+ Alignment
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate the molecular weight of a protein (or fragments) and compare it to another protein or reference data. Generally used for protein identification.
+ PMF
+ Peptide mass fingerprinting
+ Protein fingerprinting
+
+
+ Protein fragment weight comparison
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Compare the physicochemical properties of two or more proteins (or reference data).
+
+
+ Protein property comparison
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.18
+
+
+
+ Compare two or more molecular secondary structures.
+
+ Secondary structure comparison
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ Generate a Hopp and Woods plot of antigenicity of a protein.
+
+
+ Hopp and Woods plotting
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.19
+
+ Generate a view of clustered quantitative data, annotated with textual information.
+
+
+ Cluster textual view generation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Visualise clustered quantitative data as set of different profiles, where each profile is plotted versus different entities or samples on the X-axis.
+ Clustered quantitative data plotting
+ Clustered quantitative data rendering
+ Wave graph plotting
+ Microarray cluster temporal graph rendering
+ Microarray wave graph plotting
+ Microarray wave graph rendering
+
+
+ In the case of microarray data, visualise clustered gene expression data as a set of profiles, where each profile shows the gene expression values of a cluster across samples on the X-axis.
+ Clustering profile plotting
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.19
+
+ Generate a dendrograph of raw, preprocessed or clustered expression (e.g. microarray) data.
+
+
+ Dendrograph plotting
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Generate a plot of distances (distance or correlation matrix) between expression values.
+ Distance map rendering
+ Distance matrix plotting
+ Distance matrix rendering
+ Proximity map rendering
+ Correlation matrix plotting
+ Correlation matrix rendering
+ Microarray distance map rendering
+ Microarray proximity map plotting
+ Microarray proximity map rendering
+
+
+ Proximity map plotting
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Visualise clustered expression data using a tree diagram.
+ Dendrogram plotting
+ Dendrograph plotting
+ Dendrograph visualisation
+ Expression data tree or dendrogram rendering
+ Expression data tree visualisation
+ Microarray 2-way dendrogram rendering
+ Microarray checks view rendering
+ Microarray matrix tree plot rendering
+ Microarray tree or dendrogram rendering
+
+
+ Dendrogram visualisation
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Visualize the results of a principal component analysis (orthogonal data transformation). For example, visualization of the principal components (essential subspace) coming from a Principal Component Analysis (PCA) on the trajectory atomistic coordinates of a molecular structure.
+ PCA plotting
+ Principal component plotting
+ ED visualization
+ Essential Dynamics visualization
+ Microarray principal component plotting
+ Microarray principal component rendering
+ PCA visualization
+ Principal modes visualization
+
+
+ Examples for visualization are the distribution of variance over the components, loading and score plots.
+ The use of Principal Component Analysis (PCA), a multivariate statistical analysis to obtain collective variables on the atomic positional fluctuations, helps to separate the configurational space in two subspaces: an essential subspace containing relevant motions, and another one containing irrelevant local fluctuations.
+ Principal component visualisation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Render a graph in which the values of two variables are plotted along two axes; the pattern of the points reveals any correlation.
+ Scatter chart plotting
+ Microarray scatter plot plotting
+ Microarray scatter plot rendering
+
+
+ Comparison of two sets of quantitative data such as two samples of gene expression values.
+ Scatter plot plotting
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.18
+
+ Visualise gene expression data where each band (or line graph) corresponds to a sample.
+
+
+ Whole microarray graph plotting
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Visualise gene expression data after hierarchical clustering for representing hierarchical relationships.
+ Expression data tree-map rendering
+ Treemapping
+ Microarray tree-map rendering
+
+
+ Treemap visualisation
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Generate a box plot, i.e. a depiction of groups of numerical data through their quartiles.
+ Box plot plotting
+ Microarray Box-Whisker plot plotting
+
+
+ In the case of micorarray data, visualise raw and pre-processed gene expression data, via a plot showing over- and under-expression along with mean, upper and lower quartiles.
+ Box-Whisker plot plotting
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Generate a physical (sequence) map of a DNA sequence showing the physical distance (base pairs) between features or landmarks such as restriction sites, cloned DNA fragments, genes and other genetic markers.
+ Physical cartography
+
+
+ Physical mapping
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Apply analytical methods to existing data of a specific type.
+
+
+ This excludes non-analytical methods that read and write the same basic type of data (for that, see 'Data handling').
+ Analysis
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+
+ Process or analyse an alignment of molecular sequences or structures.
+
+ Alignment analysis
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.16
+
+
+
+ Analyse a body of scientific text (typically a full text article from a scientific journal).
+
+ Article analysis
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Analyse the interactions of two or more molecules (or parts of molecules) that are known to interact.
+
+ Molecular interaction analysis
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Analyse the interactions of proteins with other proteins.
+ Protein interaction analysis
+ Protein interaction raw data analysis
+ Protein interaction simulation
+
+
+ Includes analysis of raw experimental protein-protein interaction data from for example yeast two-hybrid analysis, protein microarrays, immunoaffinity chromatography followed by mass spectrometry, phage display etc.
+ Protein-protein interaction analysis
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ WHATIF: HETGroupNames
+ WHATIF:HasMetalContacts
+ WHATIF:HasMetalContactsPlus
+ WHATIF:HasNegativeIonContacts
+ WHATIF:HasNegativeIonContactsPlus
+ WHATIF:HasNucleicContacts
+ WHATIF:ShowDrugContacts
+ WHATIF:ShowDrugContactsShort
+ WHATIF:ShowLigandContacts
+ WHATIF:ShowProteiNucleicContacts
+ Calculate contacts between residues, or between residues and other groups, in a protein structure, on the basis of distance calculations.
+ HET group detection
+ Residue contact calculation (residue-ligand)
+ Residue contact calculation (residue-metal)
+ Residue contact calculation (residue-negative ion)
+ Residue contact calculation (residue-nucleic acid)
+ WHATIF:SymmetryContact
+
+
+ This includes identifying HET groups, which usually correspond to ligands, lipids, but might also (not consistently) include groups that are attached to amino acids. Each HET group is supposed to have a unique three letter code and a unique name which might be given in the output. It can also include calculation of symmetry contacts, i.e. a contact between two atoms in different asymmetric unit.
+ Residue distance calculation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+
+ Process (read and / or write) an alignment of two or more molecular sequences, structures or derived data.
+
+ Alignment processing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.6
+
+
+ Process (read and / or write) a molecular tertiary (3D) structure alignment.
+
+ Structure alignment processing
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate codon usage bias, e.g. generate a codon usage bias plot.
+ Codon usage bias plotting
+
+
+ Codon usage bias calculation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.22
+
+ Generate a codon usage bias plot.
+
+
+ Codon usage bias plotting
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Calculate the differences in codon usage fractions between two sequences, sets of sequences, codon usage tables etc.
+
+
+ Codon usage fraction calculation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Assign molecular sequences, structures or other biological data to a specific group or category according to qualities it shares with that group or category.
+
+
+ Classification
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Process (read and / or write) molecular interaction data.
+
+ Molecular interaction data processing
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Assign molecular sequence(s) to a group or category.
+
+
+ Sequence classification
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Assign molecular structure(s) to a group or category.
+
+
+ Structure classification
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Compare two or more proteins (or some aspect) to identify similarities.
+
+
+ Protein comparison
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Compare two or more nucleic acids to identify similarities.
+
+
+ Nucleic acid comparison
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.19
+
+ Predict, recognise, detect or identify some properties of proteins.
+
+
+ Prediction and recognition (protein)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.19
+
+ Predict, recognise, detect or identify some properties of nucleic acids.
+
+
+ Prediction and recognition (nucleic acid)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta13
+ Edit, convert or otherwise change a molecular tertiary structure, either randomly or specifically.
+
+
+ Structure editing
+
+
+
+
+
+
+
+
+ beta13
+ Edit, convert or otherwise change a molecular sequence alignment, either randomly or specifically.
+
+
+ Sequence alignment editing
+
+
+
+
+
+
+
+
+ beta13
+ Notions of pathway and network were mixed up, EDAM 1.24 disentangles them.
+ 1.24
+
+
+
+ Render (visualise) a biological pathway or network.
+
+ Pathway or network visualisation
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.6
+
+
+ Predict general (non-positional) functional properties of a protein from analysing its sequence.
+
+ For functional properties that are positional, use 'Protein site detection' instead.
+ Protein function prediction (from sequence)
+ true
+
+
+
+
+
+
+
+
+ beta13
+ (jison)This is a distinction made on basis of input; all features exist can be mapped to a sequence so this isn't needed (consolidate with "Protein feature detection").
+ 1.17
+
+
+
+ Predict, recognise and identify functional or other key sites within protein sequences, typically by scanning for known motifs, patterns and regular expressions.
+
+
+ Protein sequence feature detection
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.18
+
+
+ Calculate (or predict) physical or chemical properties of a protein, including any non-positional properties of the molecular sequence, from processing a protein sequence.
+
+
+ Protein property calculation (from sequence)
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.6
+
+
+ Predict, recognise and identify positional features in proteins from analysing protein structure.
+
+ Protein feature prediction (from structure)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta13
+ Predict, recognise and identify positional features in proteins from analysing protein sequences or structures.
+ Protein feature prediction
+ Protein feature recognition
+ Protein secondary database search
+ Protein site detection
+ Protein site prediction
+ Protein site recognition
+ Sequence feature detection (protein)
+ Sequence profile database search
+
+
+ Features includes functional sites or regions, secondary structure, structural domains and so on. Methods might use fingerprints, motifs, profiles, hidden Markov models, sequence alignment etc to provide a mapping of a query protein sequence to a discriminatory element. This includes methods that search a secondary protein database (Prosite, Blocks, ProDom, Prints, Pfam etc.) to assign a protein sequence(s) to a known protein family or group.
+ Protein feature detection
+
+
+
+
+
+
+
+
+ beta13
+ 1.6
+
+
+ Screen a molecular sequence(s) against a database (of some type) to identify similarities between the sequence and database entries.
+
+ Database search (by sequence)
+ true
+
+
+
+
+
+
+
+
+
+ beta13
+ Predict a network of protein interactions.
+
+
+ Protein interaction network prediction
+
+
+
+
+
+
+
+
+
+ beta13
+ Design (or predict) nucleic acid sequences with specific chemical or physical properties.
+ Gene design
+
+
+ Nucleic acid design
+
+
+
+
+
+
+
+
+
+ beta13
+ Edit a data entity, either randomly or specifically.
+
+
+ Editing
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.1
+ Evaluate a DNA sequence assembly, typically for purposes of quality control.
+ Assembly QC
+ Assembly quality evaluation
+ Sequence assembly QC
+ Sequence assembly quality evaluation
+
+
+ Sequence assembly validation
+
+
+
+
+
+
+
+
+
+ 1.1
+ Align two or more (tpyically huge) molecular sequences that represent genomes.
+ Genome alignment construction
+ Whole genome alignment
+
+
+ Genome alignment
+
+
+
+
+
+
+
+
+ 1.1
+ Reconstruction of a sequence assembly in a localised area.
+
+
+ Localised reassembly
+
+
+
+
+
+
+
+
+ 1.1
+ Render and visualise a DNA sequence assembly.
+ Assembly rendering
+ Assembly visualisation
+ Sequence assembly rendering
+
+
+ Sequence assembly visualisation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.1
+ Identify base (nucleobase) sequence from a fluorescence 'trace' data generated by an automated DNA sequencer.
+ Base calling
+ Phred base calling
+ Phred base-calling
+
+
+ Base-calling
+
+
+
+
+
+
+
+
+
+ 1.1
+ The mapping of methylation sites in a DNA (genome) sequence. Typically, the mapping of high-throughput bisulfite reads to the reference genome.
+ Bisulfite read mapping
+ Bisulfite sequence alignment
+ Bisulfite sequence mapping
+
+
+ Bisulfite mapping follows high-throughput sequencing of DNA which has undergone bisulfite treatment followed by PCR amplification; unmethylated cytosines are specifically converted to thymine, allowing the methylation status of cytosine in the DNA to be detected.
+ Bisulfite mapping
+
+
+
+
+
+
+
+
+ 1.1
+ Identify and filter a (typically large) sequence data set to remove sequences from contaminants in the sample that was sequenced.
+
+
+ Sequence contamination filtering
+
+
+
+
+
+
+
+
+ 1.1
+ 1.12
+
+ Trim sequences (typically from an automated DNA sequencer) to remove misleading ends.
+
+
+ For example trim polyA tails, introns and primer sequence flanking the sequence of amplified exons, or other unwanted sequence.
+ Trim ends
+ true
+
+
+
+
+
+
+
+
+ 1.1
+ 1.12
+
+ Trim sequences (typically from an automated DNA sequencer) to remove sequence-specific end regions, typically contamination from vector sequences.
+
+
+ Trim vector
+ true
+
+
+
+
+
+
+
+
+ 1.1
+ 1.12
+
+ Trim sequences (typically from an automated DNA sequencer) to remove the sequence ends that extend beyond an assembled reference sequence.
+
+
+ Trim to reference
+ true
+
+
+
+
+
+
+
+
+ 1.1
+ Cut (remove) the end from a molecular sequence.
+ Trimming
+ Barcode sequence removal
+ Trim ends
+ Trim to reference
+ Trim vector
+
+
+ This includes end trimming, -- Trim sequences (typically from an automated DNA sequencer) to remove misleading ends. For example trim polyA tails, introns and primer sequence flanking the sequence of amplified exons, or other unwanted sequence.-- trimming to a reference sequence, --Trim sequences (typically from an automated DNA sequencer) to remove the sequence ends that extend beyond an assembled reference sequence. -- vector trimming -- Trim sequences (typically from an automated DNA sequencer) to remove sequence-specific end regions, typically contamination from vector sequences.
+ Sequence trimming
+
+
+
+
+
+
+
+
+
+ 1.1
+ Compare the features of two genome sequences.
+
+
+ Genomic elements that might be compared include genes, indels, single nucleotide polymorphisms (SNPs), retrotransposons, tandem repeats and so on.
+ Genome feature comparison
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.1
+ Detect errors in DNA sequences generated from sequencing projects).
+ Short read error correction
+ Short-read error correction
+
+
+ Sequencing error detection
+
+
+
+
+
+
+
+
+ 1.1
+ Analyse DNA sequence data to identify differences between the genetic composition (genotype) of an individual compared to other individual's or a reference sequence.
+
+
+ Methods might consider cytogenetic analyses, copy number polymorphism (and calculate copy number calls for copy-number variation(CNV) regions), single nucleotide polymorphism (SNP), , rare copy number variation (CNV) identification, loss of heterozygosity data and so on.
+ Genotyping
+
+
+
+
+
+
+
+
+ 1.1
+ Analyse a genetic variation, for example to annotate its location, alleles, classification, and effects on individual transcripts predicted for a gene model.
+ Genetic variation annotation
+ Sequence variation analysis
+ Variant analysis
+ Transcript variant analysis
+
+
+ Genetic variation annotation provides contextual interpretation of coding SNP consequences in transcripts. It allows comparisons to be made between variation data in different populations or strains for the same transcript.
+ Genetic variation analysis
+
+
+
+
+
+
+
+
+
+ 1.1
+ Align short oligonucleotide sequences (reads) to a larger (genomic) sequence.
+ Oligonucleotide alignment
+ Oligonucleotide alignment construction
+ Oligonucleotide alignment generation
+ Oligonucleotide mapping
+ Read alignment
+ Short oligonucleotide alignment
+ Short read alignment
+ Short read mapping
+ Short sequence read mapping
+
+
+ The purpose of read mapping is to identify the location of sequenced fragments within a reference genome and assumes that there is, in fact, at least local similarity between the fragment and reference sequences.
+ Read mapping
+
+
+
+
+
+
+
+
+ 1.1
+ A variant of oligonucleotide mapping where a read is mapped to two separate locations because of possible structural variation.
+ Split-read mapping
+
+
+ Split read mapping
+
+
+
+
+
+
+
+
+ 1.1
+ Analyse DNA sequences in order to identify a DNA 'barcode'; marker genes or any short fragment(s) of DNA that are useful to diagnose the taxa of biological organisms.
+ Community profiling
+ Sample barcoding
+
+
+ DNA barcoding
+
+
+
+
+
+
+
+
+
+ 1.1
+ 1.19
+
+ Identify single nucleotide change in base positions in sequencing data that differ from a reference genome and which might, especially by reference to population frequency or functional data, indicate a polymorphism.
+
+
+ SNP calling
+ true
+
+
+
+
+
+
+
+
+ 1.1
+ "Polymorphism detection" and "Variant calling" are essentially the same thing - keeping the later as a more prevalent term nowadays.
+ 1.24
+
+
+ Detect mutations in multiple DNA sequences, for example, from the alignment and comparison of the fluorescent traces produced by DNA sequencing hardware.
+
+
+ Polymorphism detection
+ true
+
+
+
+
+
+
+
+
+ 1.1
+ Visualise, format or render an image of a Chromatogram.
+ Chromatogram viewing
+
+
+ Chromatogram visualisation
+
+
+
+
+
+
+
+
+ 1.1
+ Analyse cytosine methylation states in nucleic acid sequences.
+ Methylation profile analysis
+
+
+ Methylation analysis
+
+
+
+
+
+
+
+
+ 1.1
+ 1.19
+
+ Determine cytosine methylation status of specific positions in a nucleic acid sequences.
+
+
+ Methylation calling
+ true
+
+
+
+
+
+
+
+
+
+ 1.1
+ Measure the overall level of methyl cytosines in a genome from analysis of experimental data, typically from chromatographic methods and methyl accepting capacity assay.
+ Genome methylation analysis
+ Global methylation analysis
+ Methylation level analysis (global)
+
+
+ Whole genome methylation analysis
+
+
+
+
+
+
+
+
+ 1.1
+ Analysing the DNA methylation of specific genes or regions of interest.
+ Gene-specific methylation analysis
+ Methylation level analysis (gene-specific)
+
+
+ Gene methylation analysis
+
+
+
+
+
+
+
+
+
+ 1.1
+ Visualise, format or render a nucleic acid sequence that is part of (and in context of) a complete genome sequence.
+ Genome browser
+ Genome browsing
+ Genome rendering
+ Genome viewing
+
+
+ Genome visualisation
+
+
+
+
+
+
+
+
+
+ 1.1
+ Compare the sequence or features of two or more genomes, for example, to find matching regions.
+ Genomic region matching
+
+
+ Genome comparison
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.1
+ Generate an index of a genome sequence.
+ Burrows-Wheeler
+ Genome indexing (Burrows-Wheeler)
+ Genome indexing (suffix arrays)
+ Suffix arrays
+
+
+ Many sequence alignment tasks involving many or very large sequences rely on a precomputed index of the sequence to accelerate the alignment. The Burrows-Wheeler Transform (BWT) is a permutation of the genome based on a suffix array algorithm. A suffix array consists of the lexicographically sorted list of suffixes of a genome.
+ Genome indexing
+
+
+
+
+
+
+
+
+ 1.1
+ 1.12
+
+ Generate an index of a genome sequence using the Burrows-Wheeler algorithm.
+
+
+ The Burrows-Wheeler Transform (BWT) is a permutation of the genome based on a suffix array algorithm.
+ Genome indexing (Burrows-Wheeler)
+ true
+
+
+
+
+
+
+
+
+ 1.1
+ 1.12
+
+ Generate an index of a genome sequence using a suffix arrays algorithm.
+
+
+ A suffix array consists of the lexicographically sorted list of suffixes of a genome.
+ Genome indexing (suffix arrays)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.1
+ Analyse one or more spectra from mass spectrometry (or other) experiments.
+ Mass spectrum analysis
+ Spectrum analysis
+
+
+ Spectral analysis
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.1
+ Identify peaks in a spectrum from a mass spectrometry, NMR, or some other spectrum-generating experiment.
+ Peak assignment
+ Peak finding
+
+
+ Peak detection
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.1
+ Link together a non-contiguous series of genomic sequences into a scaffold, consisting of sequences separated by gaps of known length. The sequences that are linked are typically typically contigs; contiguous sequences corresponding to read overlaps.
+ Scaffold construction
+ Scaffold generation
+
+
+ Scaffold may be positioned along a chromosome physical map to create a "golden path".
+ Scaffolding
+
+
+
+
+
+
+
+
+ 1.1
+ Fill the gaps in a sequence assembly (scaffold) by merging in additional sequences.
+
+
+ Different techniques are used to generate gap sequences to connect contigs, depending on the size of the gap. For small (5-20kb) gaps, PCR amplification and sequencing is used. For large (>20kb) gaps, fragments are cloned (e.g. in BAC (Bacterial artificial chromosomes) vectors) and then sequenced.
+ Scaffold gap completion
+
+
+
+
+
+
+
+
+
+ 1.1
+ Raw sequence data quality control.
+ Sequencing QC
+ Sequencing quality assessment
+
+
+ Analyse raw sequence data from a sequencing pipeline and identify (and possiby fix) problems.
+ Sequencing quality control
+
+
+
+
+
+
+
+
+
+ 1.1
+ Pre-process sequence reads to ensure (or improve) quality and reliability.
+ Sequence read pre-processing
+
+
+ For example process paired end reads to trim low quality ends remove short sequences, identify sequence inserts, detect chimeric reads, or remove low quality sequences including vector, adaptor, low complexity and contaminant sequences. Sequences might come from genomic DNA library, EST libraries, SSH library and so on.
+ Read pre-processing
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.1
+ Estimate the frequencies of different species from analysis of the molecular sequences, typically of DNA recovered from environmental samples.
+
+
+ Species frequency estimation
+
+
+
+
+
+
+
+
+ 1.1
+ Identify putative protein-binding regions in a genome sequence from analysis of Chip-sequencing data or ChIP-on-chip data.
+ Protein binding peak detection
+ Peak-pair calling
+
+
+ Chip-sequencing combines chromatin immunoprecipitation (ChIP) with massively parallel DNA sequencing to generate a set of reads, which are aligned to a genome sequence. The enriched areas contain the binding sites of DNA-associated proteins. For example, a transcription factor binding site. ChIP-on-chip in contrast combines chromatin immunoprecipitation ('ChIP') with microarray ('chip'). "Peak-pair calling" is similar to "Peak calling" in the context of ChIP-exo.
+ Peak calling
+
+
+
+
+
+
+
+
+ 1.1
+ Identify from molecular sequence analysis (typically from analysis of microarray or RNA-seq data) genes whose expression levels are significantly different between two sample groups.
+ Differential expression analysis
+ Differential gene analysis
+ Differential gene expression analysis
+ Differentially expressed gene identification
+
+
+ Differential gene expression analysis is used, for example, to identify which genes are up-regulated (increased expression) or down-regulated (decreased expression) between a group treated with a drug and a control groups.
+ Differential gene expression profiling
+
+
+
+
+
+
+
+
+ 1.1
+ 1.21
+
+ Analyse gene expression patterns (typically from DNA microarray datasets) to identify sets of genes that are associated with a specific trait, condition, clinical outcome etc.
+
+
+ Gene set testing
+ true
+
+
+
+
+
+
+
+
+
+ 1.1
+ Classify variants based on their potential effect on genes, especially functional effects on the expressed proteins.
+
+
+ Variants are typically classified by their position (intronic, exonic, etc.) in a gene transcript and (for variants in coding exons) by their effect on the protein sequence (synonymous, non-synonymous, frameshifting, etc.)
+ Variant classification
+
+
+
+
+
+
+
+
+ 1.1
+ Identify biologically interesting variants by prioritizing individual variants, for example, homozygous variants absent in control genomes.
+
+
+ Variant prioritisation can be used for example to produce a list of variants responsible for 'knocking out' genes in specific genomes. Methods amino acid substitution, aggregative approaches, probabilistic approach, inheritance and unified likelihood-frameworks.
+ Variant prioritisation
+
+
+
+
+
+
+
+
+
+ 1.1
+ Detect, identify and map mutations, such as single nucleotide polymorphisms, short indels and structural variants, in multiple DNA sequences. Typically the alignment and comparison of the fluorescent traces produced by DNA sequencing hardware, to study genomic alterations.
+ Variant mapping
+ Allele calling
+ Exome variant detection
+ Genome variant detection
+ Germ line variant calling
+ Mutation detection
+ Somatic variant calling
+ de novo mutation detection
+
+
+ Methods often utilise a database of aligned reads.
+ Somatic variant calling is the detection of variations established in somatic cells and hence not inherited as a germ line variant.
+ Variant detection
+ Variant calling
+
+
+
+
+
+
+
+
+ 1.1
+ Detect large regions in a genome subject to copy-number variation, or other structural variations in genome(s).
+ Structural variation discovery
+
+
+ Methods might involve analysis of whole-genome array comparative genome hybridisation or single-nucleotide polymorphism arrays, paired-end mapping of sequencing data, or from analysis of short reads from new sequencing technologies.
+ Structural variation detection
+
+
+
+
+
+
+
+
+ 1.1
+ Analyse sequencing data from experiments aiming to selectively sequence the coding regions of the genome.
+ Exome sequence analysis
+
+
+ Exome assembly
+
+
+
+
+
+
+
+
+ 1.1
+ Analyse mapping density (read depth) of (typically) short reads from sequencing platforms, for example, to detect deletions and duplications.
+
+
+ Read depth analysis
+
+
+
+
+
+
+
+
+ 1.1
+ Combine classical quantitative trait loci (QTL) analysis with gene expression profiling, for example, to describe describe cis- and trans-controlling elements for the expression of phenotype associated genes.
+ Gene expression QTL profiling
+ Gene expression quantitative trait loci profiling
+ eQTL profiling
+
+
+ Gene expression QTL analysis
+
+
+
+
+
+
+
+
+ 1.1
+ Estimate the number of copies of loci of particular gene(s) in DNA sequences typically from gene-expression profiling technology based on microarray hybridisation-based experiments. For example, estimate copy number (or marker dosage) of a dominant marker in samples from polyploid plant cells or tissues, or chromosomal gains and losses in tumors.
+ Transcript copy number estimation
+
+
+ Methods typically implement some statistical model for hypothesis testing, and methods estimate total copy number, i.e. do not distinguish the two inherited chromosomes quantities (specific copy number).
+ Copy number estimation
+
+
+
+
+
+
+
+
+ 1.2
+ Adapter removal
+ Remove forward and/or reverse primers from nucleic acid sequences (typically PCR products).
+
+
+ Primer removal
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.2
+ Infer a transcriptome sequence by analysis of short sequence reads.
+
+
+ Transcriptome assembly
+
+
+
+
+
+
+
+
+ 1.2
+ 1.6
+
+
+ Infer a transcriptome sequence without the aid of a reference genome, i.e. by comparing short sequences (reads) to each other.
+
+ Transcriptome assembly (de novo)
+ true
+
+
+
+
+
+
+
+
+ 1.2
+ 1.6
+
+
+ Infer a transcriptome sequence by mapping short reads to a reference genome.
+
+ Transcriptome assembly (mapping)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.3
+ Convert one set of sequence coordinates to another, e.g. convert coordinates of one assembly to another, cDNA to genomic, CDS to genomic, protein translation to genomic etc.
+
+
+ Sequence coordinate conversion
+
+
+
+
+
+
+
+
+ 1.3
+ Calculate similarity between 2 or more documents.
+
+
+ Document similarity calculation
+
+
+
+
+
+
+
+
+
+ 1.3
+ Cluster (group) documents on the basis of their calculated similarity.
+
+
+ Document clustering
+
+
+
+
+
+
+
+
+
+ 1.3
+ Recognise named entities, ontology concepts, tags, events, and dictionary terms within documents.
+ Concept mining
+ Entity chunking
+ Entity extraction
+ Entity identification
+ Event extraction
+ NER
+ Named-entity recognition
+
+
+ Named-entity and concept recognition
+
+
+
+
+
+
+
+
+
+
+
+ 1.3
+ Map data identifiers to one another for example to establish a link between two biological databases for the purposes of data integration.
+ Accession mapping
+ Identifier mapping
+
+
+ The mapping can be achieved by comparing identifier values or some other means, e.g. exact matches to a provided sequence.
+ ID mapping
+
+
+
+
+
+
+
+
+ 1.3
+ Process data in such a way that makes it hard to trace to the person which the data concerns.
+ Data anonymisation
+
+
+ Anonymisation
+
+
+
+
+
+
+
+
+ 1.3
+ (jison)Too fine-grained, the operation (Data retrieval) hasn't changed, just what is retrieved.
+ 1.17
+
+ Search for and retrieve a data identifier of some kind, e.g. a database entry accession.
+
+
+ ID retrieval
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.4
+ Generate a checksum of a molecular sequence.
+
+
+ Sequence checksum generation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.4
+ Construct a bibliography from the scientific literature.
+ Bibliography construction
+
+
+ Bibliography generation
+
+
+
+
+
+
+
+
+ 1.4
+ Predict the structure of a multi-subunit protein and particularly how the subunits fit together.
+
+
+ Protein quaternary structure prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.4
+ Analyse the surface properties of proteins or other macromolecules, including surface accessible pockets, interior inaccessible cavities etc.
+
+
+ Molecular surface analysis
+
+
+
+
+
+
+
+
+ 1.4
+ Compare two or more ontologies, e.g. identify differences.
+
+
+ Ontology comparison
+
+
+
+
+
+
+
+
+ 1.4
+ 1.9
+
+ Compare two or more ontologies, e.g. identify differences.
+
+
+ Ontology comparison
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.4
+ Recognition of which format the given data is in.
+ Format identification
+ Format inference
+ Format recognition
+
+
+ 'Format recognition' is not a bioinformatics-specific operation, but of great relevance in bioinformatics. Should be removed from EDAM if/when captured satisfactorily in a suitable domain-generic ontology.
+ Format detection
+
+
+
+
+
+ The has_input "Data" (data_0006) may cause visualisation or other problems although ontologically correct. But on the other hand it may be useful to distinguish from nullary operations without inputs.
+
+
+
+
+
+
+
+
+ 1.4
+ Split a file containing multiple data items into many files, each containing one item.
+ File splitting
+
+
+ Splitting
+
+
+
+
+
+
+
+
+ 1.6
+ true
+ Construct some data entity.
+ Construction
+
+
+ For non-analytical operations, see the 'Processing' branch.
+ Generation
+
+
+
+
+
+
+
+
+ 1.6
+ (jison)This is a distinction made on basis of input; all features exist can be mapped to a sequence so this isn't needed.
+ 1.17
+
+
+ Predict, recognise and identify functional or other key sites within nucleic acid sequences, typically by scanning for known motifs, patterns and regular expressions.
+
+
+ Nucleic acid sequence feature detection
+ true
+
+
+
+
+
+
+
+
+ 1.6
+ Deposit some data in a database or some other type of repository or software system.
+ Data deposition
+ Data submission
+ Database deposition
+ Database submission
+ Submission
+
+
+ For non-analytical operations, see the 'Processing' branch.
+ Deposition
+
+
+
+
+
+
+
+
+ 1.6
+ true
+ Group together some data entities on the basis of similarities such that entities in the same group (cluster) are more similar to each other than to those in other groups (clusters).
+
+
+ Clustering
+
+
+
+
+
+
+
+
+ 1.6
+ 1.19
+
+ Construct some entity (typically a molecule sequence) from component pieces.
+
+
+ Assembly
+ true
+
+
+
+
+
+
+
+
+ 1.6
+ true
+ Convert a data set from one form to another.
+
+
+ Conversion
+
+
+
+
+
+
+
+
+ 1.6
+ Standardize or normalize data by some statistical method.
+ Normalisation
+ Standardisation
+
+
+ In the simplest normalisation means adjusting values measured on different scales to a common scale (often between 0.0 and 1.0), but can refer to more sophisticated adjustment whereby entire probability distributions of adjusted values are brought into alignment. Standardisation typically refers to an operation whereby a range of values are standardised to measure how many standard deviations a value is from its mean.
+ Standardisation and normalisation
+
+
+
+
+
+
+
+
+ 1.6
+ Combine multiple files or data items into a single file or object.
+
+
+ Aggregation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.6
+ Compare two or more scientific articles.
+
+
+ Article comparison
+
+
+
+
+
+
+
+
+ 1.6
+ true
+ Mathematical determination of the value of something, typically a properly of a molecule.
+
+
+ Calculation
+
+
+
+
+
+
+
+
+ 1.6
+ Notions of pathway and network were mixed up, EDAM 1.24 disentangles them.
+ 1.24
+
+
+
+
+ Predict a molecular pathway or network.
+
+ Pathway or network prediction
+ true
+
+
+
+
+
+
+
+
+ 1.6
+ 1.12
+
+ The process of assembling many short DNA sequences together such that they represent the original chromosomes from which the DNA originated.
+
+
+ Genome assembly
+ true
+
+
+
+
+
+
+
+
+ 1.6
+ 1.19
+
+ Generate a graph, or other visual representation, of data, showing the relationship between two or more variables.
+
+
+ Plotting
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.7
+ Image processing
+ The analysis of a image (typically a digital image) of some type in order to extract information from it.
+
+
+ Image analysis
+
+
+
+
+
+
+
+
+
+ 1.7
+ Analysis of data from a diffraction experiment.
+
+
+ Diffraction data analysis
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.7
+ Analysis of cell migration images in order to study cell migration, typically in order to study the processes that play a role in the disease progression.
+
+
+ Cell migration analysis
+
+
+
+
+
+
+
+
+
+ 1.7
+ Processing of diffraction data into a corrected, ordered, and simplified form.
+
+
+ Diffraction data reduction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.7
+ Measurement of neurites; projections (axons or dendrites) from the cell body of a neuron, from analysis of neuron images.
+
+
+ Neurite measurement
+
+
+
+
+
+
+
+
+ 1.7
+ The evaluation of diffraction intensities and integration of diffraction maxima from a diffraction experiment.
+ Diffraction profile fitting
+ Diffraction summation integration
+
+
+ Diffraction data integration
+
+
+
+
+
+
+
+
+ 1.7
+ Phase a macromolecular crystal structure, for example by using molecular replacement or experimental phasing methods.
+
+
+ Phasing
+
+
+
+
+
+
+
+
+ 1.7
+ A technique used to construct an atomic model of an unknown structure from diffraction data, based upon an atomic model of a known structure, either a related protein or the same protein from a different crystal form.
+
+
+ The technique solves the phase problem, i.e. retrieve information concern phases of the structure.
+ Molecular replacement
+
+
+
+
+
+
+
+
+ 1.7
+ A method used to refine a structure by moving the whole molecule or parts of it as a rigid unit, rather than moving individual atoms.
+
+
+ Rigid body refinement usually follows molecular replacement in the assignment of a structure from diffraction data.
+ Rigid body refinement
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.7
+ An image processing technique that combines and analyze multiple images of a particulate sample, in order to produce an image with clearer features that are more easily interpreted.
+
+
+ Single particle analysis is used to improve the information that can be obtained by relatively low resolution techniques, , e.g. an image of a protein or virus from transmission electron microscopy (TEM).
+ Single particle analysis
+
+
+
+
+
+
+
+
+
+
+ 1.7
+ true
+ This is two related concepts.
+ Compare (align and classify) multiple particle images from a micrograph in order to produce a representative image of the particle.
+
+
+ A micrograph can include particles in multiple different orientations and/or conformations. Particles are compared and organised into sets based on their similarity. Typically iterations of classification and alignment and are performed to optimise the final 3D EM map.
+ Single particle alignment and classification
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.7
+ Clustering of molecular sequences on the basis of their function, typically using information from an ontology of gene function, or some other measure of functional phenotype.
+ Functional sequence clustering
+
+
+ Functional clustering
+
+
+
+
+
+
+
+
+ 1.7
+ Classifiication (typically of molecular sequences) by assignment to some taxonomic hierarchy.
+ Taxonomy assignment
+ Taxonomic profiling
+
+
+ Taxonomic classification
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.7
+ The prediction of the degree of pathogenicity of a microorganism from analysis of molecular sequences.
+ Pathogenicity prediction
+
+
+ Virulence prediction
+
+
+
+
+
+
+
+
+
+ 1.7
+ Analyse the correlation patterns among features/molecules across across a variety of experiments, samples etc.
+ Co-expression analysis
+ Gene co-expression network analysis
+ Gene expression correlation
+ Gene expression correlation analysis
+
+
+ Expression correlation analysis
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.7
+ true
+ Identify a correlation, i.e. a statistical relationship between two random variables or two sets of data.
+
+
+ Correlation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.7
+ Compute the covariance model for (a family of) RNA secondary structures.
+
+
+ RNA structure covariance model generation
+
+
+
+
+
+
+
+
+ 1.7
+ 1.18
+
+ Predict RNA secondary structure by analysis, e.g. probabilistic analysis, of the shape of RNA folds.
+
+
+ RNA secondary structure prediction (shape-based)
+ true
+
+
+
+
+
+
+
+
+ 1.7
+ 1.18
+
+ Prediction of nucleic-acid folding using sequence alignments as a source of data.
+
+
+ Nucleic acid folding prediction (alignment-based)
+ true
+
+
+
+
+
+
+
+
+ 1.7
+ Count k-mers (substrings of length k) in DNA sequence data.
+
+
+ k-mer counting is used in genome and transcriptome assembly, metagenomic sequencing, and for error correction of sequence reads.
+ k-mer counting
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.7
+ Reconstructing the inner node labels of a phylogenetic tree from its leafes.
+ Phylogenetic tree reconstruction
+ Gene tree reconstruction
+ Species tree reconstruction
+
+
+ Note that this is somewhat different from simply analysing an existing tree or constructing a completely new one.
+ Phylogenetic reconstruction
+
+
+
+
+
+
+
+
+ 1.7
+ Generate some data from a chosen probibalistic model, possibly to evaluate algorithms.
+
+
+ Probabilistic data generation
+
+
+
+
+
+
+
+
+
+ 1.7
+ Generate sequences from some probabilistic model, e.g. a model that simulates evolution.
+
+
+ Probabilistic sequence generation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.7
+ Identify or predict causes for antibiotic resistance from molecular sequence analysis.
+
+
+ Antimicrobial resistance prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.8
+ Analysis of a set of objects, such as genes, annotated with given categories, where eventual over-/under-representation of certain categories within the studied set of objects is revealed.
+ Enrichment
+ Over-representation analysis
+ Functional enrichment
+
+
+ Categories from a relevant ontology can be used. The input is typically a set of genes or other biological objects, possibly represented by their identifiers, and the output of the analysis is typically a ranked list of categories, each associated with a statistical metric of over-/under-representation within the studied data.
+ Enrichment analysis
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.8
+ Analyse a dataset with respect to concepts from an ontology of chemical structure, leveraging chemical similarity information.
+ Chemical class enrichment
+
+
+ Chemical similarity enrichment
+
+
+
+
+
+
+
+
+ 1.8
+ Plot an incident curve such as a survival curve, death curve, mortality curve.
+
+
+ Incident curve plotting
+
+
+
+
+
+
+
+
+ 1.8
+ Identify and map patterns of genomic variations.
+
+
+ Methods often utilise a database of aligned reads.
+ Variant pattern analysis
+
+
+
+
+
+
+
+
+ 1.8
+ 1.12
+
+ Model some biological system using mathematical techniques including dynamical systems, statistical models, differential equations, and game theoretic models.
+
+
+ Mathematical modelling
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.9
+ Visualise images resulting from various types of microscopy.
+
+
+ Microscope image visualisation
+
+
+
+
+
+
+
+
+ 1.9
+ Annotate an image of some sort, typically with terms from a controlled vocabulary.
+
+
+ Image annotation
+
+
+
+
+
+
+
+
+ 1.9
+ Replace missing data with substituted values, usually by using some statistical or other mathematical approach.
+ Data imputation
+
+
+ Imputation
+
+
+
+
+
+
+
+
+
+ 1.9
+ Visualise, format or render data from an ontology, typically a tree of terms.
+ Ontology browsing
+
+
+ Ontology visualisation
+
+
+
+
+
+
+
+
+ 1.9
+ A method for making numerical assessments about the maximum percent of time that a conformer of a flexible macromolecule can exist and still be compatible with the experimental data.
+
+
+ Maximum occurrence analysis
+
+
+
+
+
+
+
+
+
+ 1.9
+ Compare the models or schemas used by two or more databases, or any other general comparison of databases rather than a detailed comparison of the entries themselves.
+ Data model comparison
+ Schema comparison
+
+
+ Database comparison
+
+
+
+
+
+
+
+
+ 1.9
+ 1.24
+
+
+
+ Simulate the bevaviour of a biological pathway or network.
+
+ Notions of pathway and network were mixed up, EDAM 1.24 disentangles them.
+ Network simulation
+ true
+
+
+
+
+
+
+
+
+ 1.9
+ Analyze read counts from RNA-seq experiments.
+
+
+ RNA-seq read count analysis
+
+
+
+
+
+
+
+
+ 1.9
+ Identify and remove redundancy from a set of small molecule structures.
+
+
+ Chemical redundancy removal
+
+
+
+
+
+
+
+
+ 1.9
+ Analyze time series data from an RNA-seq experiment.
+
+
+ RNA-seq time series data analysis
+
+
+
+
+
+
+
+
+ 1.9
+ Simulate gene expression data, e.g. for purposes of benchmarking.
+
+
+ Simulated gene expression data generation
+
+
+
+
+
+
+
+
+ 1.12
+ Identify semantic relations among entities and concepts within a text, using text mining techniques.
+ Relation discovery
+ Relation inference
+ Relationship discovery
+ Relationship extraction
+ Relationship inference
+
+
+ Relation extraction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.12
+ Re-adjust the output of mass spectrometry experiments with shifted ppm values.
+
+
+ Mass spectra calibration
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.12
+ Align multiple data sets using information from chromatography and/or peptide identification, from mass spectrometry experiments.
+
+
+ Chromatographic alignment
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.12
+ The removal of isotope peaks in a spectrum, to represent the fragment ion as one data point.
+ Deconvolution
+
+
+ Deisotoping is commonly done to reduce complexity, and done in conjunction with the charge state deconvolution.
+ Deisotoping
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.12
+ Technique for determining the amount of proteins in a sample.
+ Protein quantitation
+
+
+ Protein quantification
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.12
+ Determination of peptide sequence from mass spectrum.
+ Peptide-spectrum-matching
+
+
+ Peptide identification
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.12
+ Calculate the isotope distribution of a given chemical species.
+
+
+ Isotopic distributions calculation
+
+
+
+
+
+
+
+
+ 1.12
+ Prediction of retention time in a mass spectrometry experiment based on compositional and structural properties of the separated species.
+ Retention time calculation
+
+
+ Retention time prediction
+
+
+
+
+
+
+
+
+ 1.12
+ Quantification without the use of chemical tags.
+
+
+ Label-free quantification
+
+
+
+
+
+
+
+
+ 1.12
+ Quantification based on the use of chemical tags.
+
+
+ Labeled quantification
+
+
+
+
+
+
+
+
+ 1.12
+ Quantification by Selected/multiple Reaction Monitoring workflow (XIC quantitation of precursor / fragment mass pair).
+
+
+ MRM/SRM
+
+
+
+
+
+
+
+
+ 1.12
+ Calculate number of identified MS2 spectra as approximation of peptide / protein quantity.
+
+
+ Spectral counting
+
+
+
+
+
+
+
+
+ 1.12
+ Quantification analysis using stable isotope labeling by amino acids in cell culture.
+
+
+ SILAC
+
+
+
+
+
+
+
+
+ 1.12
+ Quantification analysis using the AB SCIEX iTRAQ isobaric labelling workflow, wherein 2-8 reporter ions are measured in MS2 spectra near 114 m/z.
+
+
+ iTRAQ
+
+
+
+
+
+
+
+
+ 1.12
+ Quantification analysis using labeling based on 18O-enriched H2O.
+
+
+ 18O labeling
+
+
+
+
+
+
+
+
+ 1.12
+ Quantification analysis using the Thermo Fisher tandem mass tag labelling workflow.
+
+
+ TMT-tag
+
+
+
+
+
+
+
+
+ 1.12
+ Quantification analysis using chemical labeling by stable isotope dimethylation.
+
+
+ Stable isotope dimethyl labelling
+
+
+
+
+
+
+
+
+ 1.12
+ Peptide sequence tags are used as piece of information about a peptide obtained by tandem mass spectrometry.
+
+
+ Tag-based peptide identification
+
+
+
+
+
+
+
+
+
+ 1.12
+ Analytical process that derives a peptide's amino acid sequence from its tandem mass spectrum (MS/MS) without the assistance of a sequence database.
+
+
+ de Novo sequencing
+
+
+
+
+
+
+
+
+ 1.12
+ Identification of post-translational modifications (PTMs) of peptides/proteins in mass spectrum.
+
+
+ PTM identification
+
+
+
+
+
+
+
+
+
+ 1.12
+ Determination of best matches between MS/MS spectrum and a database of protein or nucleic acid sequences.
+
+
+ Peptide database search
+
+
+
+
+
+
+
+
+ 1.12
+ Peptide database search for identification of known and unknown PTMs looking for mass difference mismatches.
+ Modification-tolerant peptide database search
+ Unrestricted peptide database search
+
+
+ Blind peptide database search
+
+
+
+
+
+
+
+
+ 1.12
+ 1.19
+
+
+ Statistical estimation of false discovery rate from score distribution for peptide-spectrum-matches, following a peptide database search.
+
+
+ Validation of peptide-spectrum matches
+ true
+
+
+
+
+
+
+
+
+
+ 1.12
+ Validation of peptide-spectrum matches
+ Statistical estimation of false discovery rate from score distribution for peptide-spectrum-matches, following a peptide database search, and by comparison to search results with a database containing incorrect information.
+
+
+ Target-Decoy
+
+
+
+
+
+
+
+
+ 1.12
+ Analyse data in order to deduce properties of an underlying distribution or population.
+ Empirical Bayes
+
+
+ Statistical inference
+
+
+
+
+
+
+
+
+
+ 1.12
+ A statistical calculation to estimate the relationships among variables.
+ Regression
+
+
+ Regression analysis
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.12
+ Model a metabolic network. This can include 1) reconstruction to break down a metabolic pathways into reactions, enzymes, and other relevant information, and compilation of this into a mathematical model and 2) simulations of metabolism based on the model.
+
+
+ Metabolic network reconstruction
+ Metabolic network simulation
+ Metabolic pathway simulation
+ Metabolic reconstruction
+
+
+ The terms and synyonyms here reflect that for practical intents and purposes, "pathway" and "network" can be treated the same.
+ Metabolic network modelling
+
+
+
+
+
+
+
+
+
+ 1.12
+ Predict the effect or function of an individual single nucleotide polymorphism (SNP).
+
+
+ SNP annotation
+
+
+
+
+
+
+
+
+ 1.12
+ Prediction of genes or gene components from first principles, i.e. without reference to existing genes.
+ Gene prediction (ab-initio)
+
+
+ Ab-initio gene prediction
+
+
+
+
+
+
+
+
+
+ 1.12
+ Prediction of genes or gene components by reference to homologous genes.
+ Empirical gene finding
+ Empirical gene prediction
+ Evidence-based gene prediction
+ Gene prediction (homology-based)
+ Similarity-based gene prediction
+ Homology prediction
+ Orthology prediction
+
+
+ Homology-based gene prediction
+
+
+
+
+
+
+
+
+
+ 1.12
+ Construction of a statistical model, or a set of assumptions around some observed data, usually by describing a set of probability distributions which approximate the distribution of data.
+
+
+ Statistical modelling
+
+
+
+
+
+
+
+
+
+
+ 1.12
+ Compare two or more molecular surfaces.
+
+
+ Molecular surface comparison
+
+
+
+
+
+
+
+
+ 1.12
+ Annotate one or more sequences with functional information, such as cellular processes or metaobolic pathways, by reference to a controlled vocabulary - invariably the Gene Ontology (GO).
+ Sequence functional annotation
+
+
+ Gene functional annotation
+
+
+
+
+
+
+
+
+ 1.12
+ Variant filtering is used to eliminate false positive variants based for example on base calling quality, strand and position information, and mapping info.
+
+
+ Variant filtering
+
+
+
+
+
+
+
+
+ 1.12
+ Identify binding sites in nucleic acid sequences that are statistically significantly differentially bound between sample groups.
+
+
+ Differential binding analysis
+
+
+
+
+
+
+
+
+
+ 1.13
+ Analyze data from RNA-seq experiments.
+
+
+ RNA-Seq analysis
+
+
+
+
+
+
+
+
+ 1.13
+ Visualise, format or render a mass spectrum.
+
+
+ Mass spectrum visualisation
+
+
+
+
+
+
+
+
+ 1.13
+ Filter a set of files or data items according to some property.
+ Sequence filtering
+ rRNA filtering
+
+
+ Filtering
+
+
+
+
+
+
+
+
+ 1.14
+ Identification of the best reference for mapping for a specific dataset from a list of potential references, when performing genetic variation analysis.
+
+
+ Reference identification
+
+
+
+
+
+
+
+
+ 1.14
+ Label-free quantification by integration of ion current (ion counting).
+ Ion current integration
+
+
+ Ion counting
+
+
+
+
+
+
+
+
+ 1.14
+ Chemical tagging free amino groups of intact proteins with stable isotopes.
+ ICPL
+
+
+ Isotope-coded protein label
+
+
+
+
+
+
+
+
+ 1.14
+ Labeling all proteins and (possibly) all amino acids using C-13 or N-15 enriched grown medium or feed.
+ C-13 metabolic labeling
+ N-15 metabolic labeling
+
+
+ This includes N-15 metabolic labeling (labeling all proteins and (possibly) all amino acids using N-15 enriched grown medium or feed) and C-13 metabolic labeling (labeling all proteins and (possibly) all amino acids using C-13 enriched grown medium or feed).
+ Metabolic labeling
+
+
+
+
+
+
+
+
+ 1.15
+ Construction of a single sequence assembly of all reads from different samples, typically as part of a comparative metagenomic analysis.
+ Sequence assembly (cross-assembly)
+
+
+ Cross-assembly
+
+
+
+
+
+
+
+
+ 1.15
+ The comparison of samples from a metagenomics study, for example, by comparison of metagenome shotgun reads or assembled contig sequences, by comparison of functional profiles, or some other method.
+
+
+ Sample comparison
+
+
+
+
+
+
+
+
+
+ 1.15
+ Differential protein analysis
+ The analysis, using proteomics techniques, to identify proteins whose encoding genes are differentially expressed under a given experimental setup.
+ Differential protein expression analysis
+
+
+ Differential protein expression profiling
+
+
+
+
+
+
+
+
+ 1.15
+ 1.17
+
+ The analysis, using any of diverse techniques, to identify genes that are differentially expressed under a given experimental setup.
+
+
+ Differential gene expression analysis
+ true
+
+
+
+
+
+
+
+
+ 1.15
+ Visualise, format or render data arising from an analysis of multiple samples from a metagenomics/community experiment.
+
+
+ Multiple sample visualisation
+
+
+
+
+
+
+
+
+ 1.15
+ The extrapolation of empirical characteristics of individuals or populations, backwards in time, to their common ancestors.
+ Ancestral sequence reconstruction
+ Character mapping
+ Character optimisation
+
+
+ Ancestral reconstruction is often used to recover possible ancestral character states of ancient, extinct organisms.
+ Ancestral reconstruction
+
+
+
+
+
+
+
+
+ 1.16
+ Site localisation of post-translational modifications in peptide or protein mass spectra.
+ PTM scoring
+ Site localisation
+
+
+ PTM localisation
+
+
+
+
+
+
+
+
+ 1.16
+ Operations concerning the handling and use of other tools.
+ Endpoint management
+
+
+ Service management
+
+
+
+
+
+
+
+
+ 1.16
+ An operation supporting the browsing or discovery of other tools and services.
+
+
+ Service discovery
+
+
+
+
+
+
+
+
+ 1.16
+ An operation supporting the aggregation of other services (at least two) into a functional unit, for the automation of some task.
+
+
+ Service composition
+
+
+
+
+
+
+
+
+ 1.16
+ An operation supporting the calling (invocation) of other tools and services.
+
+
+ Service invocation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.16
+ A data mining method typically used for studying biological networks based on pairwise correlations between variables.
+ WGCNA
+ Weighted gene co-expression network analysis
+
+
+ Weighted correlation network analysis
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.16
+ Identification of protein, for example from one or more peptide identifications by tandem mass spectrometry.
+ Protein inference
+
+
+ Protein identification
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.16
+ Text annotation is the operation of adding notes, data and metadata, recognised entities and concepts, and their relations to a text (such as a scientific article).
+ Article annotation
+ Literature annotation
+
+
+ Text annotation
+
+
+
+
+
+
+
+
+
+ 1.17
+ A method whereby data on several variants are "collapsed" into a single covariate based on regions such as genes.
+
+
+ Genome-wide association studies (GWAS) analyse a genome-wide set of genetic variants in different individuals to see if any variant is associated with a trait. Traditional association techniques can lack the power to detect the significance of rare variants individually, or measure their compound effect (rare variant burden). "Collapsing methods" were developed to overcome these problems.
+ Collapsing methods
+
+
+
+
+
+
+
+
+ 1.17
+ miRNA analysis
+ The analysis of microRNAs (miRNAs) : short, highly conserved small noncoding RNA molecules that are naturally occurring plant and animal genomes.
+ miRNA expression profiling
+
+
+ miRNA expression analysis
+
+
+
+
+
+
+
+
+ 1.17
+ Counting and summarising the number of short sequence reads that map to genomic features.
+
+
+ Read summarisation
+
+
+
+
+
+
+
+
+ 1.17
+ A technique whereby molecules with desired properties and function are isolated from libraries of random molecules, through iterative cycles of selection, amplification, and mutagenesis.
+
+
+ In vitro selection
+
+
+
+
+
+
+
+
+ 1.17
+ The calculation of species richness for a number of individual samples, based on plots of the number of species as a function of the number of samples (rarefaction curves).
+ Species richness assessment
+
+
+ Rarefaction
+
+
+
+
+
+
+
+
+
+ 1.17
+ An operation which groups reads or contigs and assigns them to operational taxonomic units.
+ Binning
+ Binning shotgun reads
+
+
+ Binning methods use one or a combination of compositional features or sequence similarity.
+ Read binning
+
+
+
+
+
+
+
+
+
+ 1.17
+ true
+ Counting and measuring experimentally determined observations into quantities.
+ Quantitation
+
+
+ Quantification
+
+
+
+
+
+
+
+
+ 1.17
+ Quantification of data arising from RNA-Seq high-throughput sequencing, typically the quantification of transcript abundances durnig transcriptome analysis in a gene expression study.
+ RNA-Seq quantitation
+
+
+ RNA-Seq quantification
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.17
+ Match experimentally measured mass spectrum to a spectrum in a spectral library or database.
+
+
+ Spectral library search
+
+
+
+
+
+
+
+
+ 1.17
+ Sort a set of files or data items according to some property.
+
+
+ Sorting
+
+
+
+
+
+
+
+
+ 1.17
+ Mass spectra identification of compounds that are produced by living systems. Including polyketides, terpenoids, phenylpropanoids, alkaloids and antibiotics.
+ De novo metabolite identification
+ Fragmenation tree generation
+ Metabolite identification
+
+
+ Natural product identification
+
+
+
+
+
+
+
+
+ 1.19
+ Identify and assess specific genes or regulatory regions of interest that are differentially methylated.
+ Differentially-methylated region identification
+
+
+ DMR identification
+
+
+
+
+
+
+
+
+ 1.21
+
+
+ Genotyping of multiple loci, typically characterizing microbial species isolates using internal fragments of multiple housekeeping genes.
+ MLST
+
+
+ Multilocus sequence typing
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.21
+ Calculate a theoretical mass spectrometry spectra for given sequences.
+ Spectrum prediction
+
+
+ Spectrum calculation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.22
+ 3D visualization of a molecular trajectory.
+
+
+ Trajectory visualization
+
+
+
+
+
+
+
+
+
+ 1.22
+ Compute Essential Dynamics (ED) on a simulation trajectory: an analysis of molecule dynamics using PCA (Principal Component Analysis) applied to the atomic positional fluctuations.
+ ED
+ PCA
+ Principal modes
+
+
+ Principal Component Analysis (PCA) is a multivariate statistical analysis to obtain collective variables and reduce the dimensionality of the system.
+ Essential dynamics
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.22
+ Obtain force field parameters (charge, bonds, dihedrals, etc.) from a molecule, to be used in molecular simulations.
+ Ligand parameterization
+ Molecule parameterization
+
+
+ Forcefield parameterisation
+
+
+
+
+
+
+
+
+ 1.22
+ Analyse DNA sequences in order to determine an individual's DNA characteristics, for example in criminal forensics, parentage testing and so on.
+ DNA fingerprinting
+ DNA profiling
+
+
+
+
+
+
+
+
+
+ 1.22
+ Predict or detect active sites in proteins; the region of an enzyme which binds a substrate bind and catalyses a reaction.
+ Active site detection
+
+
+ Active site prediction
+
+
+
+
+
+
+
+
+
+ 1.22
+ Predict or detect ligand-binding sites in proteins; a region of a protein which reversibly binds a ligand for some biochemical purpose, such as transport or regulation of protein function.
+ Ligand-binding site detection
+ Peptide-protein binding prediction
+
+
+ Ligand-binding site prediction
+
+
+
+
+
+
+
+
+
+ 1.22
+ Predict or detect metal ion-binding sites in proteins.
+ Metal-binding site detection
+ Protein metal-binding site prediction
+
+
+ Metal-binding site prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.22
+ Model or simulate protein-protein binding using comparative modelling or other techniques.
+ Protein docking
+
+
+ Protein-protein docking
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.22
+ Predict DNA-binding proteins.
+ DNA-binding protein detection
+ DNA-protein interaction prediction
+ Protein-DNA interaction prediction
+
+
+ DNA-binding protein prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.22
+ Predict RNA-binding proteins.
+ Protein-RNA interaction prediction
+ RNA-binding protein detection
+ RNA-protein interaction prediction
+
+
+ RNA-binding protein prediction
+
+
+
+
+
+
+
+
+ 1.22
+ Predict or detect RNA-binding sites in protein sequences.
+ Protein-RNA binding site detection
+ Protein-RNA binding site prediction
+ RNA binding site detection
+
+
+ RNA binding site prediction
+
+
+
+
+
+
+
+
+ 1.22
+ Predict or detect DNA-binding sites in protein sequences.
+ Protein-DNA binding site detection
+ Protein-DNA binding site prediction
+ DNA binding site detection
+
+
+ DNA binding site prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.22
+ Identify or predict intrinsically disordered regions in proteins.
+
+
+ Protein disorder prediction
+
+
+
+
+
+
+
+
+
+ 1.22
+ Extract structured information from unstructured ("free") or semi-structured textual documents.
+ IE
+
+
+ Information extraction
+
+
+
+
+
+
+
+
+
+ 1.22
+ Retrieve resources from information systems matching a specific information need.
+
+
+ Information retrieval
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.24
+ Study of genomic feature structure, variation, function and evolution at a genomic scale.
+ Genomic analysis
+ Genome analysis
+
+
+
+
+
+
+
+
+ 1.24
+ The determination of cytosine methylation status of specific positions in a nucleic acid sequences (usually reads from a bisulfite sequencing experiment).
+
+
+ Methylation calling
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.24
+ The identification of changes in DNA sequence or chromosome structure, usually in the context of diagnostic tests for disease, or to study ancestry or phylogeny.
+ Genetic testing
+
+
+ This can include indirect methods which reveal the results of genetic changes, such as RNA analysis to indicate gene expression, or biochemical analysis to identify expressed proteins.
+ DNA testing
+
+
+
+
+
+
+
+
+
+ 1.24
+ The processing of reads from high-throughput sequencing machines.
+
+
+ Sequence read processing
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.24
+ Render (visualise) a network - typically a biological network of some sort.
+ Network rendering
+ Protein interaction network rendering
+ Protein interaction network visualisation
+ Network visualisation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.24
+ Render (visualise) a biological pathway.
+ Pathway rendering
+
+
+ Pathway visualisation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.24
+ Generate, process or analyse a biological network.
+ Biological network analysis
+ Biological network modelling
+ Biological network prediction
+ Network comparison
+ Network modelling
+ Network prediction
+ Network simulation
+ Network topology simulation
+
+
+ Network analysis
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.24
+ Generate, process or analyse a biological pathway.
+ Biological pathway analysis
+ Biological pathway modelling
+ Biological pathway prediction
+ Functional pathway analysis
+ Pathway comparison
+ Pathway modelling
+ Pathway prediction
+ Pathway simulation
+
+
+ Pathway analysis
+
+
+
+
+
+
+
+
+
+
+ 1.24
+ Predict a metabolic pathway.
+
+
+ Metabolic pathway prediction
+
+
+
+
+
+
+
+
+ 1.24
+ Assigning sequence reads to separate groups / files based on their index tag (sample origin).
+ Sequence demultiplexing
+
+
+ NGS sequence runs are often performed with multiple samples pooled together. In such cases, an index tag (or "barcode") - a unique sequence of between 6 and 12bp - is ligated to each sample's genetic material so that the sequence reads from different samples can be identified. The process of demultiplexing (dividing sequence reads into separate files for each index tag/sample) may be performed automatically by the sequencing hardware. Alternatively the reads may be lumped together in one file with barcodes still attached, requiring you to do the splitting using software. In such cases, a "mapping" file is used which indicates which barcodes correspond to which samples.
+ Demultiplexing
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.24
+ A process used in statistics, machine learning, and information theory that reduces the number of random variables by obtaining a set of principal variables.
+ Dimension reduction
+
+
+ Dimensionality reduction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.24
+ A dimensionality reduction process that selects a subset of relevant features (variables, predictors) for use in model construction.
+ Attribute selection
+ Variable selection
+ Variable subset selection
+
+
+ Feature selection
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.24
+ A dimensionality reduction process which builds (ideally) informative and non-redundant values (features) from an initial set of measured data, to aid subsequent generalization, learning or interpretation.
+ Feature projection
+
+
+ Feature extraction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.24
+ Virtual screening is used in drug discovery to identify potential drug compounds. It involves searching libraries of small molecules in order to identify those molecules which are most likely to bind to a drug target (typically a protein receptor or enzyme).
+ Ligand-based screening
+ Ligand-based virtual screening
+ Structure-based screening
+ Structured-based virtual screening
+ Virtual ligand screening
+
+
+ Virtual screening is widely used for lead identification, lead optimization, and scaffold hopping during drug design and discovery.
+ Virtual screening
+
+
+
+
+
+
+
+
+ 1.24
+ The application of phylogenetic and other methods to estimate paleogeographical events such as speciation.
+ Biogeographic dating
+ Speciation dating
+ Species tree dating
+ Tree-dating
+
+
+ Tree dating
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.24
+ The development and use of mathematical models and systems analysis for the description of ecological processes, and applications such as the sustainable management of resources.
+
+
+ Ecological modelling
+
+
+
+
+
+
+
+
+ 1.24
+ Mapping between gene tree nodes and species tree nodes or branches, to analyse and account for possible differences between gene histories and species histories, explaining this in terms of gene-scale events such as duplication, loss, transfer etc.
+ Gene tree / species tree reconciliation
+
+
+ Methods typically test for topological similarity between trees using for example a congruence index.
+ Phylogenetic tree reconciliation
+
+
+
+
+
+
+
+
+ 1.24
+ The detection of genetic selection, or (the end result of) the process by which certain traits become more prevalent in a species than other traits.
+
+
+ Selection detection
+
+
+
+
+
+
+
+
+ 1.25
+ A statistical procedure that uses an orthogonal transformation to convert a set of observations of possibly correlated variables into a set of values of linearly uncorrelated variables called principal components.
+
+
+ Principal component analysis
+
+
+
+
+
+
+
+
+
+ 1.25
+ Identify where sections of the genome are repeated and the number of repeats in the genome varies between individuals.
+ CNV detection
+
+
+ Copy number variation detection
+
+
+
+
+
+
+
+
+ 1.25
+ Identify deletion events causing the number of repeats in the genome to vary between individuals.
+
+
+ Deletion detection
+
+
+
+
+
+
+
+
+ 1.25
+ Identify duplication events causing the number of repeats in the genome to vary between individuals.
+
+
+ Duplication detection
+
+
+
+
+
+
+
+
+ 1.25
+ Identify copy number variations which are complex, e.g. multi-allelic variations that have many structural alleles and have rearranged multiple times in the ancestral genomes.
+
+
+ Complex CNV detection
+
+
+
+
+
+
+
+
+ 1.25
+ Identify amplification events causing the number of repeats in the genome to vary between individuals.
+
+
+ Amplification detection
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.25
+ Predict adhesins in protein sequences.
+
+
+ An adhesin is a cell-surface component that facilitate the adherence of a microorganism to a cell or surface. They are important virulence factors during establishment of infection and thus are targeted during vaccine development approaches that seek to block adhesin function and prevent adherence to host cell.
+ Adhesin prediction
+
+
+
+
+
+
+
+
+ 1.25
+ Design new protein molecules with specific structural or functional properties.
+ Protein redesign
+ Rational protein design
+ de novo protein design
+
+
+ Protein design
+
+
+
+
+
+
+
+
+
+ 1.25
+ The design of small molecules with specific biological activity, such as inhibitors or modulators for proteins that are of therapeutic interest. This can involve the modification of individual atoms, the addition or removal of molecular fragments, and the use reaction-based design to explore tractable synthesis options for the small molecule.
+ Drug design
+ Ligand-based drug design
+ Structure-based drug design
+ Structure-based small molecule design
+ Small molecule design can involve assessment of target druggability and flexibility, molecular docking, in silico fragment screening, molecular dynamics, and homology modeling.
+ There are two broad categories of small molecule design techniques when applied to the design of drugs: ligand-based drug design (e.g. ligand similarity) and structure-based drug design (ligand docking) methods. Ligand similarity methods exploit structural similarities to known active ligands, whereas ligand docking methods use the 3D structure of a target protein to predict the binding modes and affinities of ligands to it.
+ Small molecule design
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.26
+ The estimation of the power of a test; that is the probability of correctly rejecting the null hypothesis when it is false.
+ Estimation of statistical power
+ Power analysis
+
+
+ Power test
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.26
+ The prediction of DNA modifications (e.g. N4-methylcytosine and N6-Methyladenine) using, for example, statistical models.
+
+
+ DNA modification prediction
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.26
+ The analysis and simulation of disease transmission using, for example, statistical methods such as the SIR-model.
+
+
+ Disease transmission analysis
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.26
+ The correction of p-values from multiple statistical tests to correct for false positives.
+ FDR estimation
+ False discovery rate estimation
+
+
+ Multiple testing correction
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ A category denoting a rather broad domain or field of interest, of study, application, work, data, or technology. Topics have no clearly defined borders between each other.
+ sumo:FieldOfStudy
+
+
+ Topic
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ The processing and analysis of nucleic acid sequence, structural and other data.
+ Nucleic acid bioinformatics
+ Nucleic acid informatics
+ Nucleic_acids
+ Nucleic acid physicochemistry
+ Nucleic acid properties
+
+
+ Nucleic acids
+
+ http://purl.bioontology.org/ontology/MSH/D017422
+ http://purl.bioontology.org/ontology/MSH/D017423
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Archival, processing and analysis of protein data, typically molecular sequence and structural data.
+ Protein bioinformatics
+ Protein informatics
+ Proteins
+ Protein databases
+
+
+ Proteins
+
+ http://purl.bioontology.org/ontology/MSH/D020539
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ The structures of reactants or products of metabolism, for example small molecules such as including vitamins, polyols, nucleotides and amino acids.
+
+
+ Metabolites
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ The archival, processing and analysis of molecular sequences (monomer composition of polymers) including molecular sequence data resources, sequence sites, alignments, motifs and profiles.
+ Sequence_analysis
+ Biological sequences
+ Sequence databases
+
+
+
+ Sequence analysis
+
+ http://purl.bioontology.org/ontology/MSH/D017421
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ The curation, processing, analysis and prediction of data about the structure of biological molecules, typically proteins and nucleic acids and other macromolecules.
+ Biomolecular structure
+ Structural bioinformatics
+ Structure_analysis
+ Computational structural biology
+ Molecular structure
+ Structure data resources
+ Structure databases
+ Structures
+
+
+
+ This includes related concepts such as structural properties, alignments and structural motifs.
+ Structure analysis
+
+ http://purl.bioontology.org/ontology/MSH/D015394
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ The prediction of molecular structure, including the prediction, modelling, recognition or design of protein secondary or tertiary structure or other structural features, and the folding of nucleic acid molecules and the prediction or design of nucleic acid (typically RNA) sequences with specific conformations.
+ Structure_prediction
+ DNA structure prediction
+ Nucleic acid design
+ Nucleic acid folding
+ Nucleic acid structure prediction
+ Protein fold recognition
+ Protein structure prediction
+ RNA structure prediction
+
+
+ This includes the recognition (prediction and assignment) of known protein structural domains or folds in protein sequence(s), for example by threading, or the alignment of molecular sequences to structures, structural (3D) profiles or templates (representing a structure or structure alignment).
+ Structure prediction
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+
+ The alignment (equivalence between sites) of molecular sequences, structures or profiles (representing a sequence or structure alignment).
+
+ Alignment
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ The study of evolutionary relationships amongst organisms.
+ Phylogeny
+ Phylogenetic clocks
+ Phylogenetic dating
+ Phylogenetic simulation
+ Phylogenetic stratigraphy
+ Phylogeny reconstruction
+
+
+
+ This includes diverse phylogenetic methods, including phylogenetic tree construction, typically from molecular sequence or morphological data, methods that simulate DNA sequence evolution, a phylogenetic tree or the underlying data, or which estimate or use molecular clock and stratigraphic (age) data, methods for studying gene evolution etc.
+ Phylogeny
+
+ http://purl.bioontology.org/ontology/MSH/D010802
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ The study of gene or protein functions and their interactions in totality in a given organism, tissue, cell etc.
+ Functional_genomics
+
+
+
+ Functional genomics
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ The conceptualisation, categorisation and nomenclature (naming) of entities or phenomena within biology or bioinformatics. This includes formal ontologies, controlled vocabularies, structured glossary, symbols and terminology or other related resource.
+ Ontology_and_terminology
+ Applied ontology
+ Ontologies
+ Ontology
+ Ontology relations
+ Terminology
+ Upper ontology
+
+
+
+ Ontology and terminology
+
+ http://purl.bioontology.org/ontology/MSH/D002965
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+
+
+ The search and query of data sources (typically databases or ontologies) in order to retrieve entries or other information.
+
+ Information retrieval
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ VT 1.5.6 Bioinformatics
+ The archival, curation, processing and analysis of complex biological data.
+ Bioinformatics
+
+
+
+ This includes data processing in general, including basic handling of files and databases, datatypes, workflows and annotation.
+ Bioinformatics
+
+ http://purl.bioontology.org/ontology/MSH/D016247
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Computer graphics
+ VT 1.2.5 Computer graphics
+ Rendering (drawing on a computer screen) or visualisation of molecular sequences, structures or other biomolecular data.
+ Data rendering
+ Data_visualisation
+
+
+ Data visualisation
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ The study of the thermodynamic properties of a nucleic acid.
+
+ Nucleic acid thermodynamics
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The archival, curation, processing and analysis of nucleic acid structural information, such as whole structures, structural features and alignments, and associated annotation.
+ Nucleic acid structure
+ Nucleic_acid_structure_analysis
+ DNA melting
+ DNA structure
+ Nucleic acid denaturation
+ Nucleic acid thermodynamics
+ RNA alignment
+ RNA structure
+ RNA structure alignment
+
+
+ Includes secondary and tertiary nucleic acid structural data, nucleic acid thermodynamic, thermal and conformational properties including DNA or DNA/RNA denaturation (melting) etc.
+ Nucleic acid structure analysis
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ RNA sequences and structures.
+ RNA
+ Small RNA
+
+
+ RNA
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Topic for the study of restriction enzymes, their cleavage sites and the restriction of nucleic acids.
+
+ Nucleic acid restriction
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ The mapping of complete (typically nucleotide) sequences. Mapping (in the sense of short read alignment, or more generally, just alignment) has application in RNA-Seq analysis (mapping of transcriptomics reads), variant discovery (e.g. mapping of exome capture), and re-sequencing (mapping of WGS reads).
+ Mapping
+ Genetic linkage
+ Linkage
+ Linkage mapping
+ Synteny
+
+
+ This includes resources that aim to identify, map or analyse genetic markers in DNA sequences, for example to produce a genetic (linkage) map of a chromosome or genome or to analyse genetic linkage and synteny. It also includes resources for physical (sequence) maps of a DNA sequence showing the physical distance (base pairs) between features or landmarks such as restriction sites, cloned DNA fragments, genes and other genetic markers. It also covers for example the alignment of sequences of (typically millions) of short reads to a reference genome.
+ Mapping
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ The study of codon usage in nucleotide sequence(s), genetic codes and so on.
+
+ Genetic codes and codon usage
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The translation of mRNA into protein and subsequent protein processing in the cell.
+ Protein_expression
+ Translation
+
+
+
+ Protein expression
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Methods that aims to identify, predict, model or analyse genes or gene structure in DNA sequences.
+
+ This includes the study of promoters, coding regions, splice sites, etc. Methods for gene prediction might be ab initio, based on phylogenetic comparisons, use motifs, sequence features, support vector machine, alignment etc.
+ Gene finding
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ The transcription of DNA into mRNA.
+
+ Transcription
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Promoters in DNA sequences (region of DNA that facilitates the transcription of a particular gene by binding RNA polymerase and transcription factor proteins).
+
+ Promoters
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+ The folding (in 3D space) of nucleic acid molecules.
+
+
+ Nucleic acid folding
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Gene structure, regions which make an RNA product and features such as promoters, coding regions, gene fusion, splice sites etc.
+ Gene features
+ Gene_structure
+ Fusion genes
+
+
+ This includes operons (operators, promoters and genes) from a bacterial genome. For example the operon leader and trailer gene, gene composition of the operon and associated information.
+ This includes the study of promoters, coding regions etc.
+ Gene structure
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Protein and peptide identification, especially in the study of whole proteomes of organisms.
+ Proteomics
+ Bottom-up proteomics
+ Discovery proteomics
+ MS-based targeted proteomics
+ MS-based untargeted proteomics
+ Metaproteomics
+ Peptide identification
+ Protein and peptide identification
+ Quantitative proteomics
+ Targeted proteomics
+ Top-down proteomics
+
+
+
+ Includes metaproteomics: proteomics analysis of an environmental sample.
+ Proteomics includes any methods (especially high-throughput) that separate, characterize and identify expressed proteins such as mass spectrometry, two-dimensional gel electrophoresis and protein microarrays, as well as in-silico methods that perform proteolytic or mass calculations on a protein sequence and other analyses of protein production data, for example in different cells or tissues.
+ Proteomics
+
+ http://purl.bioontology.org/ontology/MSH/D040901
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ The elucidation of the three dimensional structure for all (available) proteins in a given organism.
+ Structural_genomics
+
+
+
+ Structural genomics
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ The study of the physical and biochemical properties of peptides and proteins, for example the hydrophobic, hydrophilic and charge properties of a protein.
+ Protein physicochemistry
+ Protein_properties
+ Protein hydropathy
+
+
+ Protein properties
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Protein-protein, protein-DNA/RNA and protein-ligand interactions, including analysis of known interactions and prediction of putative interactions.
+ Protein_interactions
+ Protein interaction map
+ Protein interaction networks
+ Protein interactome
+ Protein-DNA interaction
+ Protein-DNA interactions
+ Protein-RNA interaction
+ Protein-RNA interactions
+ Protein-ligand interactions
+ Protein-nucleic acid interactions
+ Protein-protein interactions
+
+
+ This includes experimental (e.g. yeast two-hybrid) and computational analysis techniques.
+ Protein interactions
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Protein stability, folding (in 3D space) and protein sequence-structure-function relationships. This includes for example study of inter-atomic or inter-residue interactions in protein (3D) structures, the effect of mutation, and the design of proteins with specific properties, typically by designing changes (via site-directed mutagenesis) to an existing protein.
+ Protein_folding_stability_and_design
+ Protein design
+ Protein folding
+ Protein residue interactions
+ Protein stability
+ Rational protein design
+
+
+ Protein folding, stability and design
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Two-dimensional gel electrophoresis image and related data.
+
+ Two-dimensional gel electrophoresis
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ An analytical chemistry technique that measures the mass-to-charge ratio and abundance of ions in the gas phase.
+
+
+ Mass spectrometry
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Protein microarray data.
+
+ Protein microarrays
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ The study of the hydrophobic, hydrophilic and charge properties of a protein.
+
+ Protein hydropathy
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The study of how proteins are transported within and without the cell, including signal peptides, protein subcellular localisation and export.
+ Protein_targeting_and_localisation
+ Protein localisation
+ Protein sorting
+ Protein targeting
+
+
+ Protein targeting and localisation
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Enzyme or chemical cleavage sites and proteolytic or mass calculations on a protein sequence.
+
+ Protein cleavage sites and proteolysis
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+ The comparison of two or more protein structures.
+
+
+ Use this concept for methods that are exclusively for protein structure.
+ Protein structure comparison
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ The processing and analysis of inter-atomic or inter-residue interactions in protein (3D) structures.
+
+ Protein residue interactions
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Protein-protein interactions, individual interactions and networks, protein complexes, protein functional coupling etc.
+
+ Protein-protein interactions
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Protein-ligand (small molecule) interactions.
+
+ Protein-ligand interactions
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Protein-DNA/RNA interactions.
+
+ Protein-nucleic acid interactions
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ The design of proteins with specific properties, typically by designing changes (via site-directed mutagenesis) to an existing protein.
+
+ Protein design
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ G-protein coupled receptors (GPCRs).
+
+ G protein-coupled receptors (GPCR)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Carbohydrates, typically including structural information.
+ Carbohydrates
+
+
+ Carbohydrates
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Lipids and their structures.
+ Lipidomics
+ Lipids
+
+
+ Lipids
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Small molecules of biological significance, typically archival, curation, processing and analysis of structural information.
+ Small_molecules
+ Amino acids
+ Chemical structures
+ Drug structures
+ Drug targets
+ Drugs and target structures
+ Metabolite structures
+ Peptides
+ Peptides and amino acids
+ Target structures
+ Targets
+ Toxins
+ Toxins and targets
+ CHEBI:23367
+
+
+ Small molecules include organic molecules, metal-organic compounds, small polypeptides, small polysaccharides and oligonucleotides. Structural data is usually included.
+ This concept excludes macromolecules such as proteins and nucleic acids.
+ This includes the structures of drugs, drug target, their interactions and binding affinities. Also the structures of reactants or products of metabolism, for example small molecules such as including vitamins, polyols, nucleotides and amino acids. Also the physicochemical, biochemical or structural properties of amino acids or peptides. Also structural and associated data for toxic chemical substances.
+ Small molecules
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+
+ Edit, convert or otherwise change a molecular sequence, either randomly or specifically.
+
+ Sequence editing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ The archival, processing and analysis of the basic character composition of molecular sequences, for example character or word frequency, ambiguity, complexity, particularly regions of low complexity, and repeats or the repetitive nature of molecular sequences.
+ Sequence_composition_complexity_and_repeats
+ Low complexity sequences
+ Nucleic acid repeats
+ Protein repeats
+ Protein sequence repeats
+ Repeat sequences
+ Sequence complexity
+ Sequence composition
+ Sequence repeats
+
+
+ This includes repetitive elements within a nucleic acid sequence, e.g. long terminal repeats (LTRs); sequences (typically retroviral) directly repeated at both ends of a sequence and other types of repeating unit.
+ This includes short repetitive subsequences (repeat sequences) in a protein sequence.
+ Sequence composition, complexity and repeats
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Conserved patterns (motifs) in molecular sequences, that (typically) describe functional or other key sites.
+
+ Sequence motifs
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ The comparison of two or more molecular sequences, for example sequence alignment and clustering.
+
+
+ The comparison might be on the basis of sequence, physico-chemical or some other properties of the sequences.
+ Sequence comparison
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ The archival, detection, prediction and analysis of positional features such as functional and other key sites, in molecular sequences and the conserved patterns (motifs, profiles etc.) that may be used to describe them.
+ Sequence_sites_features_and_motifs
+ Functional sites
+ HMMs
+ Sequence features
+ Sequence motifs
+ Sequence profiles
+ Sequence sites
+
+
+ Sequence sites, features and motifs
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Search and retrieve molecular sequences that are similar to a sequence-based query (typically a simple sequence).
+
+ The query is a sequence-based entity such as another sequence, a motif or profile.
+ Sequence database search
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.7
+
+ The comparison and grouping together of molecular sequences on the basis of their similarities.
+
+
+ This includes systems that generate, process and analyse sequence clusters.
+ Sequence clustering
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Structural features or common 3D motifs within protein structures, including the surface of a protein structure, such as biological interfaces with other molecules.
+ Protein 3D motifs
+ Protein_structural_motifs_and_surfaces
+ Protein structural features
+ Protein structural motifs
+ Protein surfaces
+ Structural motifs
+
+
+ This includes conformation of conserved substructures, conserved geometry (spatial arrangement) of secondary structure or protein backbone, solvent-exposed surfaces, internal cavities, the analysis of shape, hydropathy, electrostatic patches, role and functions etc.
+ Protein structural motifs and surfaces
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ The processing, analysis or use of some type of structural (3D) profile or template; a computational entity (typically a numerical matrix) that is derived from and represents a structure or structure alignment.
+
+ Structural (3D) profiles
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ The prediction, modelling, recognition or design of protein secondary or tertiary structure or other structural features.
+
+
+ Protein structure prediction
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ The folding of nucleic acid molecules and the prediction or design of nucleic acid (typically RNA) sequences with specific conformations.
+
+
+ Nucleic acid structure prediction
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.7
+
+ The prediction of three-dimensional structure of a (typically protein) sequence from first principles, using a physics-based or empirical scoring function and without using explicit structural templates.
+
+
+ Ab initio structure prediction
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.4
+
+
+ The modelling of the three-dimensional structure of a protein using known sequence and structural data.
+
+ Homology modelling
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Molecular flexibility
+ Molecular motions
+ The study and simulation of molecular (typically protein) conformation using a computational model of physical forces and computer simulation.
+ Molecular_dynamics
+ Protein dynamics
+
+
+ This includes methods such as Molecular Dynamics, Coarse-grained dynamics, metadynamics, Quantum Mechanics, QM/MM, Markov State Models, etc. This includes resources concerning flexibility and motion in protein and other molecular structures.
+ Molecular dynamics
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ 1.12
+
+ The modelling the structure of proteins in complex with small molecules or other macromolecules.
+
+
+ Molecular docking
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+ The prediction of secondary or supersecondary structure of protein sequences.
+
+
+ Protein secondary structure prediction
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+ The prediction of tertiary structure of protein sequences.
+
+
+ Protein tertiary structure prediction
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ The recognition (prediction and assignment) of known protein structural domains or folds in protein sequence(s).
+
+
+ Protein fold recognition
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.7
+
+ The alignment of molecular sequences or sequence profiles (representing sequence alignments).
+
+
+ This includes the generation of alignments (the identification of equivalent sites), the analysis of alignments, editing, visualisation, alignment databases, the alignment (equivalence between sites) of sequence profiles (representing sequence alignments) and so on.
+ Sequence alignment
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.7
+
+ The superimposition of molecular tertiary structures or structural (3D) profiles (representing a structure or structure alignment).
+
+
+ This includes the generation, storage, analysis, rendering etc. of structure alignments.
+ Structure alignment
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+ The alignment of molecular sequences to structures, structural (3D) profiles or templates (representing a structure or structure alignment).
+
+
+ Threading
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Sequence profiles; typically a positional, numerical matrix representing a sequence alignment.
+
+ Sequence profiles include position-specific scoring matrix (position weight matrix), hidden Markov models etc.
+ Sequence profiles and HMMs
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ The reconstruction of a phylogeny (evolutionary relatedness amongst organisms), for example, by building a phylogenetic tree.
+
+ Currently too specific for the topic sub-ontology (but might be unobsoleted).
+ Phylogeny reconstruction
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ The integrated study of evolutionary relationships and whole genome data, for example, in the analysis of species trees, horizontal gene transfer and evolutionary reconstruction.
+ Phylogenomics
+
+
+
+ Phylogenomics
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Simulated polymerase chain reaction (PCR).
+
+ Virtual PCR
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ The assembly of fragments of a DNA sequence to reconstruct the original sequence.
+ Sequence_assembly
+ Assembly
+
+
+ Assembly has two broad types, de-novo and re-sequencing. Re-sequencing is a specialised case of assembly, where an assembled (typically de-novo assembled) reference genome is available and is about 95% identical to the re-sequenced genome. All other cases of assembly are 'de-novo'.
+ Sequence assembly
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Stable, naturally occurring mutations in a nucleotide sequence including alleles, naturally occurring mutations such as single base nucleotide substitutions, deletions and insertions, RFLPs and other polymorphisms.
+ DNA variation
+ Genetic_variation
+ Genomic variation
+ Mutation
+ Polymorphism
+ Somatic mutations
+
+
+ Genetic variation
+
+ http://purl.bioontology.org/ontology/MSH/D014644
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Microarrays, for example, to process microarray data or design probes and experiments.
+
+ Microarrays
+ http://purl.bioontology.org/ontology/MSH/D046228
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ VT 3.1.7 Pharmacology and pharmacy
+ The study of drugs and their effects or responses in living systems.
+ Pharmacology
+ Computational pharmacology
+ Pharmacoinformatics
+
+
+
+ Pharmacology
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ http://edamontology.org/topic_0197
+ The analysis of levels and patterns of synthesis of gene products (proteins and functional RNA) including interpretation in functional terms of gene expression data.
+ Expression
+ Gene_expression
+ Codon usage
+ DNA chips
+ DNA microarrays
+ Gene expression profiling
+ Gene transcription
+ Gene translation
+ Transcription
+
+
+
+ Gene expression levels are analysed by identifying, quantifying or comparing mRNA transcripts, for example using microarrays, RNA-seq, northern blots, gene-indexed expression profiles etc.
+ This includes the study of codon usage in nucleotide sequence(s), genetic codes and so on.
+ Gene expression
+
+ http://purl.bioontology.org/ontology/MSH/D015870
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ The regulation of gene expression.
+ Regulatory genomics
+
+
+ Gene regulation
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ The influence of genotype on drug response, for example by correlating gene expression or single-nucleotide polymorphisms with drug efficacy or toxicity.
+ Pharmacogenomics
+ Pharmacogenetics
+
+
+
+ Pharmacogenomics
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ VT 3.1.4 Medicinal chemistry
+ The design and chemical synthesis of bioactive molecules, for example drugs or potential drug compounds, for medicinal purposes.
+ Drug design
+ Medicinal_chemistry
+
+
+
+ This includes methods that search compound collections, generate or analyse drug 3D conformations, identify drug targets with structural docking etc.
+ Medicinal chemistry
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Information on a specific fish genome including molecular sequences, genes and annotation.
+
+ Fish
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Information on a specific fly genome including molecular sequences, genes and annotation.
+
+ Flies
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ (jison)Out of EDAM scope. While very useful to have a basic set of IDs for organisms, should find a better way to provide this e.g. in bio.tools (NCBI taxon ID subset).
+ 1.17
+
+
+ Information on a specific mouse or rat genome including molecular sequences, genes and annotation.
+
+ The resource may be specific to a group of mice / rats or all mice / rats.
+ Mice or rats
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Information on a specific worm genome including molecular sequences, genes and annotation.
+
+ Worms
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+ The processing and analysis of the bioinformatics literature and bibliographic data, such as literature search and query.
+
+
+ Literature analysis
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The processing and analysis of natural language, such as scientific literature in English, in order to extract data and information, or to enable human-computer interaction.
+ NLP
+ Natural_language_processing
+ BioNLP
+ Literature mining
+ Text analytics
+ Text data mining
+ Text mining
+
+
+
+ Natural language processing
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Deposition and curation of database accessions, including annotation, typically with terms from a controlled vocabulary.
+ Data_submission_annotation_and_curation
+ Data curation
+ Data provenance
+ Database curation
+
+
+
+ Data submission, annotation, and curation
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ The management and manipulation of digital documents, including database records, files and reports.
+
+
+ Document, record and content management
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Annotation of a molecular sequence.
+
+ Sequence annotation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+
+
+ Annotation of a genome.
+
+ Genome annotation
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Spectroscopy
+ An analytical technique that exploits the magenetic properties of certain atomic nuclei to provide information on the structure, dynamics, reaction state and chemical environment of molecules.
+ NMR spectroscopy
+ Nuclear magnetic resonance spectroscopy
+ NMR
+ HOESY
+ Heteronuclear Overhauser Effect Spectroscopy
+ NOESY
+ Nuclear Overhauser Effect Spectroscopy
+ ROESY
+ Rotational Frame Nuclear Overhauser Effect Spectroscopy
+
+
+
+ NMR
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.12
+
+ The classification of molecular sequences based on some measure of their similarity.
+
+
+ Methods including sequence motifs, profile and other diagnostic elements which (typically) represent conserved patterns (of residues or properties) in molecular sequences.
+ Sequence classification
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ primarily the classification of proteins (from sequence or structural data) into clusters, groups, families etc.
+
+ Protein classification
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Sequence motifs, or sequence profiles derived from an alignment of molecular sequences of a particular type.
+
+ This includes comparison, discovery, recognition etc. of sequence motifs.
+ Sequence motif or profile
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Protein chemical modifications, e.g. post-translational modifications.
+ PTMs
+ Post-translational modifications
+ Protein post-translational modification
+ Protein_modifications
+ Post-translation modifications
+ Protein chemical modifications
+ Protein post-translational modifications
+ GO:0006464
+ MOD:00000
+
+
+ EDAM does not describe all possible protein modifications. For fine-grained annotation of protein modification use the Gene Ontology (children of concept GO:0006464) and/or the Protein Modifications ontology (children of concept MOD:00000)
+ Protein modifications
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ http://edamontology.org/topic_3076
+ Molecular interactions, biological pathways, networks and other models.
+ Molecular_interactions_pathways_and_networks
+ Biological models
+ Biological networks
+ Biological pathways
+ Cellular process pathways
+ Disease pathways
+ Environmental information processing pathways
+ Gene regulatory networks
+ Genetic information processing pathways
+ Interactions
+ Interactome
+ Metabolic pathways
+ Molecular interactions
+ Networks
+ Pathways
+ Signal transduction pathways
+ Signaling pathways
+
+
+
+ Molecular interactions, pathways and networks
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ VT 1.3 Information sciences
+ VT 1.3.3 Information retrieval
+ VT 1.3.4 Information management
+ VT 1.3.5 Knowledge management
+ VT 1.3.99 Other
+ The study and practice of information processing and use of computer information systems.
+ Information management
+ Information science
+ Knowledge management
+ Informatics
+
+
+ Informatics
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+ Data resources for the biological or biomedical literature, either a primary source of literature or some derivative.
+
+
+ Literature data resources
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Laboratory management and resources, for example, catalogues of biological resources for use in the lab including cell lines, viruses, plasmids, phages, DNA probes and primers and so on.
+ Laboratory_Information_management
+ Laboratory resources
+
+
+
+ Laboratory information management
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ General cell culture or data on a specific cell lines.
+
+ Cell and tissue culture
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ VT 1.5.15 Ecology
+ The ecological and environmental sciences and especially the application of information technology (ecoinformatics).
+ Ecology
+ Computational ecology
+ Ecoinformatics
+ Ecological informatics
+ Ecosystem science
+
+
+
+ Ecology
+
+ http://purl.bioontology.org/ontology/MSH/D004777
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Electron diffraction experiment
+ The study of matter by studying the interference pattern from firing electrons at a sample, to analyse structures at resolutions higher than can be achieved using light.
+ Electron_microscopy
+ Electron crystallography
+ SEM
+ Scanning electron microscopy
+ Single particle electron microscopy
+ TEM
+ Transmission electron microscopy
+
+
+
+ Electron microscopy
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ The cell cycle including key genes and proteins.
+
+ Cell cycle
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ The physicochemical, biochemical or structural properties of amino acids or peptides.
+
+
+ Peptides and amino acids
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ A specific organelle, or organelles in general, typically the genes and proteins (or genome and proteome).
+
+ Organelles
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Ribosomes, typically of ribosome-related genes and proteins.
+
+ Ribosomes
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ A database about scents.
+
+ Scents
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ The structures of drugs, drug target, their interactions and binding affinities.
+
+
+ Drugs and target structures
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ A specific organism, or group of organisms, used to study a particular aspect of biology.
+ Organisms
+ Model_organisms
+
+
+
+ This may include information on the genome (including molecular sequences and map, genes and annotation), proteome, as well as more general information about an organism.
+ Model organisms
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Whole genomes of one or more organisms, or genomes in general, such as meta-information on genomes, genome projects, gene names etc.
+ Genomics
+ Exomes
+ Genome annotation
+ Genomes
+ Personal genomics
+ Synthetic genomics
+ Viral genomics
+ Whole genomes
+
+
+
+ Genomics
+
+ http://purl.bioontology.org/ontology/MSH/D023281
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Particular gene(s), gene family or other gene group or system and their encoded proteins.Primarily the classification of proteins (from sequence or structural data) into clusters, groups, families etc., curation of a particular protein or protein family, or any other proteins that have been classified as members of a common group.
+ Genes, gene family or system
+ Gene_and protein_families
+ Gene families
+ Gene family
+ Gene system
+ Protein families
+ Protein sequence classification
+
+
+
+ A protein families database might include the classifier (e.g. a sequence profile) used to build the classification.
+ Gene and protein families
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Study of chromosomes.
+
+
+ Chromosomes
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ The study of genetic constitution of a living entity, such as an individual, and organism, a cell and so on, typically with respect to a particular observable phenotypic traits, or resources concerning such traits, which might be an aspect of biochemistry, physiology, morphology, anatomy, development and so on.
+ Genotype and phenotype resources
+ Genotype-phenotype
+ Genotype-phenotype analysis
+ Genotype_and_phenotype
+ Genotype
+ Genotyping
+ Phenotype
+ Phenotyping
+
+
+
+ Genotype and phenotype
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Gene expression e.g. microarray data, northern blots, gene-indexed expression profiles etc.
+
+ Gene expression and microarray
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Molecular probes (e.g. a peptide probe or DNA microarray probe) or PCR primers and hybridisation oligos in a nucleic acid sequence.
+ Probes_and_primers
+ Primer quality
+ Primers
+ Probes
+
+
+ This includes the design of primers for PCR and DNA amplification or the design of molecular probes.
+ Probes and primers
+ http://purl.bioontology.org/ontology/MSH/D015335
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ VT 3.1.6 Pathology
+ Diseases, including diseases in general and the genes, gene variations and proteins involved in one or more specific diseases.
+ Disease
+ Pathology
+
+
+
+ Pathology
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ A particular protein, protein family or other group of proteins.
+
+ Specific protein resources
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ VT 1.5.25 Taxonomy
+ Organism classification, identification and naming.
+ Taxonomy
+
+
+ Taxonomy
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Archival, processing and analysis of protein sequences and sequence-based entities such as alignments, motifs and profiles.
+
+
+ Protein sequence analysis
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ The archival, processing and analysis of nucleotide sequences and and sequence-based entities such as alignments, motifs and profiles.
+
+
+ Nucleic acid sequence analysis
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ The repetitive nature of molecular sequences.
+
+ Repeat sequences
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ The (character) complexity of molecular sequences, particularly regions of low complexity.
+
+ Low complexity sequences
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ A specific proteome including protein sequences and annotation.
+
+ Proteome
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ DNA sequences and structure, including processes such as methylation and replication.
+ DNA analysis
+ DNA
+ Ancient DNA
+ Chromosomes
+
+
+ The DNA sequences might be coding or non-coding sequences.
+ DNA
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Protein-coding regions including coding sequences (CDS), exons, translation initiation sites and open reading frames.
+
+
+ Coding RNA
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Non-coding or functional RNA sequences, including regulatory RNA sequences, ribosomal RNA (rRNA) and transfer RNA (tRNA).
+ Functional_regulatory_and_non-coding_RNA
+ Functional RNA
+ Long ncRNA
+ Long non-coding RNA
+ Non-coding RNA
+ Regulatory RNA
+ Small and long non-coding RNAs
+ Small interfering RNA
+ Small ncRNA
+ Small non-coding RNA
+ Small nuclear RNA
+ Small nucleolar RNA
+ lncRNA
+ miRNA
+ microRNA
+ ncRNA
+ piRNA
+ piwi-interacting RNA
+ siRNA
+ snRNA
+ snoRNA
+
+
+ Non-coding RNA includes piwi-interacting RNA (piRNA), small nuclear RNA (snRNA) and small nucleolar RNA (snoRNA). Regulatory RNA includes microRNA (miRNA) - short single stranded RNA molecules that regulate gene expression, and small interfering RNA (siRNA).
+ Functional, regulatory and non-coding RNA
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ One or more ribosomal RNA (rRNA) sequences.
+
+ rRNA
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ One or more transfer RNA (tRNA) sequences.
+
+ tRNA
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Protein secondary structure or secondary structure alignments.
+
+
+ This includes assignment, analysis, comparison, prediction, rendering etc. of secondary structure data.
+ Protein secondary structure
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ RNA secondary or tertiary structure and alignments.
+
+ RNA structure
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.8
+
+ Protein tertiary structures.
+
+
+ Protein tertiary structure
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Classification of nucleic acid sequences and structures.
+
+ Nucleic acid classification
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.14
+
+ Primarily the classification of proteins (from sequence or structural data) into clusters, groups, families etc., curation of a particular protein or protein family, or any other proteins that have been classified as members of a common group.
+
+
+ Protein families
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Protein tertiary structural domains and folds in a protein or polypeptide chain.
+ Protein_folds_and_structural_domains
+ Intramembrane regions
+ Protein domains
+ Protein folds
+ Protein membrane regions
+ Protein structural domains
+ Protein topological domains
+ Protein transmembrane regions
+ Transmembrane regions
+
+
+ This includes topological domains such as cytoplasmic regions in a protein.
+ This includes trans- or intra-membrane regions of a protein, typically describing physicochemical properties of the secondary structure elements. For example, the location and size of the membrane spanning segments and intervening loop regions, transmembrane region IN/OUT orientation relative to the membrane, plus the following data for each amino acid: A Z-coordinate (the distance to the membrane center), the free energy of membrane insertion (calculated in a sliding window over the sequence) and a reliability score. The z-coordinate implies information about re-entrant helices, interfacial helices, the tilt of a transmembrane helix and loop lengths.
+ Protein folds and structural domains
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+ Nucleotide sequence alignments.
+
+
+ Nucleic acid sequence alignment
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Protein sequence alignments.
+
+ A sequence profile typically represents a sequence alignment.
+ Protein sequence alignment
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+
+ The archival, detection, prediction and analysis ofpositional features such as functional sites in nucleotide sequences.
+
+ Nucleic acid sites and features
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+
+ The detection, identification and analysis of positional features in proteins, such as functional sites.
+
+ Protein sites and features
+ true
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Proteins that bind to DNA and control transcription of DNA to mRNA (transcription factors) and also transcriptional regulatory sites, elements and regions (such as promoters, enhancers, silencers and boundary elements / insulators) in nucleotide sequences.
+ Transcription_factors_and_regulatory_sites
+ -10 signals
+ -35 signals
+ Attenuators
+ CAAT signals
+ CAT box
+ CCAAT box
+ CpG islands
+ Enhancers
+ GC signals
+ Isochores
+ Promoters
+ TATA signals
+ TFBS
+ Terminators
+ Transcription factor binding sites
+ Transcription factors
+ Transcriptional regulatory sites
+
+
+ This includes CpG rich regions (isochores) in a nucleotide sequence.
+ This includes promoters, CAAT signals, TATA signals, -35 signals, -10 signals, GC signals, primer binding sites for initiation of transcription or reverse transcription, enhancer, attenuator, terminators and ribosome binding sites.
+ Transcription factor proteins either promote (as an activator) or block (as a repressor) the binding to DNA of RNA polymerase. Regulatory sites including transcription factor binding site as well as promoters, enhancers, silencers and boundary elements / insulators.
+ Transcription factors and regulatory sites
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.0
+
+
+
+ Protein phosphorylation and phosphorylation sites in protein sequences.
+
+ Phosphorylation sites
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Metabolic pathways.
+
+
+ Metabolic pathways
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Signaling pathways.
+
+
+ Signaling pathways
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Protein and peptide identification.
+
+ Protein and peptide identification
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Biological or biomedical analytical workflows or pipelines.
+ Pipelines
+ Workflows
+ Software integration
+ Tool integration
+ Tool interoperability
+
+
+ Workflows
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.0
+
+
+ Structuring data into basic types and (computational) objects.
+
+ Data types and objects
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Theoretical biology.
+
+ Theoretical biology
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Mitochondria, typically of mitochondrial genes and proteins.
+
+ Mitochondria
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ VT 1.5.10 Botany
+ VT 1.5.22 Plant science
+ Plants, e.g. information on a specific plant genome including molecular sequences, genes and annotation.
+ Botany
+ Plant
+ Plant science
+ Plants
+ Plant_biology
+ Plant anatomy
+ Plant cell biology
+ Plant ecology
+ Plant genetics
+ Plant physiology
+
+
+ The resource may be specific to a plant, a group of plants or all plants.
+ Plant biology
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ VT 1.5.28
+ Study of viruses, e.g. sequence and structural data, interactions of viral proteins, or a viral genome including molecular sequences, genes and annotation.
+ Virology
+
+
+ Virology
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ (jison)Out of EDAM scope. While very useful to have a basic set of IDs for organisms, should find a better way to provide this e.g. in bio.tools (NCBI taxon ID subset).
+ 1.17
+
+
+ Fungi and molds, e.g. information on a specific fungal genome including molecular sequences, genes and annotation.
+
+ The resource may be specific to a fungus, a group of fungi or all fungi.
+ Fungi
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ (jison)Out of EDAM scope. While very useful to have a basic set of IDs for organisms, should find a better way to provide this e.g. in bio.tools (NCBI taxon ID subset). Definition is wrong anyway.
+ 1.17
+
+
+ Pathogens, e.g. information on a specific vertebrate genome including molecular sequences, genes and annotation.
+
+ The resource may be specific to a pathogen, a group of pathogens or all pathogens.
+ Pathogens
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Arabidopsis-specific data.
+
+ Arabidopsis
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Rice-specific data.
+
+ Rice
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Informatics resources that aim to identify, map or analyse genetic markers in DNA sequences, for example to produce a genetic (linkage) map of a chromosome or genome or to analyse genetic linkage and synteny.
+
+ Genetic mapping and linkage
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ The study (typically comparison) of the sequence, structure or function of multiple genomes.
+ Comparative_genomics
+
+
+
+ Comparative genomics
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Mobile genetic elements, such as transposons, Plasmids, Bacteriophage elements and Group II introns.
+ Mobile_genetic_elements
+ Transposons
+
+
+ Mobile genetic elements
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Human diseases, typically describing the genes, mutations and proteins implicated in disease.
+
+ Human disease
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ VT 3.1.3 Immunology
+ The application of information technology to immunology such as immunological processes, immunological genes, proteins and peptide ligands, antigens and so on.
+ Immunology
+
+
+
+ Immunology
+
+ http://purl.bioontology.org/ontology/MSH/D007120
+ http://purl.bioontology.org/ontology/MSH/D007125
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Lipoproteins (protein-lipid assemblies), and proteins or region of a protein that spans or are associated with a membrane.
+ Membrane_and_lipoproteins
+ Lipoproteins
+ Membrane proteins
+ Transmembrane proteins
+
+
+ Membrane and lipoproteins
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ Proteins that catalyze chemical reaction, the kinetics of enzyme-catalysed reactions, enzyme nomenclature etc.
+ Enzymology
+ Enzymes
+
+
+ Enzymes
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ PCR primers and hybridisation oligos in a nucleic acid sequence.
+
+
+ Primers
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Regions or sites in a eukaryotic and eukaryotic viral RNA sequence which directs endonuclease cleavage or polyadenylation of an RNA transcript.
+
+
+ PolyA signal or sites
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ CpG rich regions (isochores) in a nucleotide sequence.
+
+
+ CpG island and isochores
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Restriction enzyme recognition sites (restriction sites) in a nucleic acid sequence.
+
+
+ Restriction sites
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+
+
+ Splice sites in a nucleotide sequence or alternative RNA splicing events.
+
+ Splice sites
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Matrix/scaffold attachment regions (MARs/SARs) in a DNA sequence.
+
+
+ Matrix/scaffold attachment sites
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Operons (operators, promoters and genes) from a bacterial genome.
+
+
+ Operon
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Whole promoters or promoter elements (transcription start sites, RNA polymerase binding site, transcription factor binding sites, promoter enhancers etc) in a DNA sequence.
+
+
+ Promoters
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ VT 1.5.24 Structural biology
+ The molecular structure of biological molecules, particularly macromolecules such as proteins and nucleic acids.
+ Structural_biology
+ Structural assignment
+ Structural determination
+ Structure determination
+
+
+
+ This includes experimental methods for biomolecular structure determination, such as X-ray crystallography, nuclear magnetic resonance (NMR), circular dichroism (CD) spectroscopy, microscopy etc., including the assignment or modelling of molecular structure from such data.
+ Structural biology
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Trans- or intra-membrane regions of a protein, typically describing physicochemical properties of the secondary structure elements.
+
+
+ Protein membrane regions
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ The comparison of two or more molecular structures, for example structure alignment and clustering.
+
+
+ This might involve comparison of secondary or tertiary (3D) structural information.
+ Structure comparison
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ The study of gene and protein function including the prediction of functional properties of a protein.
+ Functional analysis
+ Function_analysis
+ Protein function analysis
+ Protein function prediction
+
+
+
+ Function analysis
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ (jison)Out of EDAM scope. While very useful to have a basic set of IDs for organisms, should find a better way to provide this e.g. in bio.tools (NCBI taxon ID subset).
+ 1.17
+
+
+ Specific bacteria or archaea, e.g. information on a specific prokaryote genome including molecular sequences, genes and annotation.
+
+ The resource may be specific to a prokaryote, a group of prokaryotes or all prokaryotes.
+ Prokaryotes and Archaea
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Protein data resources.
+
+ Protein databases
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Experimental methods for biomolecular structure determination, such as X-ray crystallography, nuclear magnetic resonance (NMR), circular dichroism (CD) spectroscopy, microscopy etc., including the assignment or modelling of molecular structure from such data.
+
+ Structure determination
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ VT 1.5.11 Cell biology
+ Cells, such as key genes and proteins involved in the cell cycle.
+ Cell_biology
+ Cells
+ Cellular processes
+ Protein subcellular localization
+
+
+ Cell biology
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Topic focused on identifying, grouping, or naming things in a structured way according to some schema based on observable relationships.
+
+ Classification
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Lipoproteins (protein-lipid assemblies).
+
+ Lipoproteins
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Visualise a phylogeny, for example, render a phylogenetic tree.
+
+ Phylogeny visualisation
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ The application of information technology to chemistry in biological research environment.
+ Chemical informatics
+ Chemoinformatics
+ Cheminformatics
+
+
+
+ Cheminformatics
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ The holistic modelling and analysis of complex biological systems and the interactions therein.
+ Systems_biology
+ Biological modelling
+ Biological system modelling
+ Systems modelling
+
+
+
+ This includes databases of models and methods to construct or analyse a model.
+ Systems biology
+
+ http://purl.bioontology.org/ontology/MSH/D049490
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The application of statistical methods to biological problems.
+ Statistics_and_probability
+ Bayesian methods
+ Biostatistics
+ Descriptive statistics
+ Gaussian processes
+ Inferential statistics
+ Markov processes
+ Multivariate statistics
+ Probabilistic graphical model
+ Probability
+ Statistics
+
+
+
+ Statistics and probability
+
+
+
+ http://purl.bioontology.org/ontology/MSH/D056808
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Search for and retrieve molecular structures that are similar to a structure-based query (typically another structure or part of a structure).
+
+ The query is a structure-based entity such as another structure, a 3D (structural) motif, 3D profile or template.
+ Structure database search
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ The construction, analysis, evaluation, refinement etc. of models of a molecules properties or behaviour, including the modelling the structure of proteins in complex with small molecules or other macromolecules (docking).
+ Molecular_modelling
+ Comparative modelling
+ Docking
+ Homology modeling
+ Homology modelling
+ Molecular docking
+
+
+ Molecular modelling
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.2
+
+
+ The prediction of functional properties of a protein.
+
+ Protein function prediction
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Single nucleotide polymorphisms (SNP) and associated data, for example, the discovery and annotation of SNPs.
+
+
+ SNP
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+
+ Predict transmembrane domains and topology in protein sequences.
+
+ Transmembrane protein prediction
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+
+ The comparison two or more nucleic acid (typically RNA) secondary or tertiary structures.
+
+ Use this concept for methods that are exclusively for nucleic acid structures.
+ Nucleic acid structure comparison
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Exons in a nucleotide sequences.
+
+
+ Exons
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Transcription of DNA into RNA including the regulation of transcription.
+
+
+ Gene transcription
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ DNA mutation.
+ DNA_mutation
+
+
+ DNA mutation
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ VT 3.2.16 Oncology
+ The study of cancer, for example, genes and proteins implicated in cancer.
+ Cancer biology
+ Oncology
+ Cancer
+ Neoplasm
+ Neoplasms
+
+
+
+ Oncology
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Structural and associated data for toxic chemical substances.
+
+
+ Toxins and targets
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Introns in a nucleotide sequences.
+
+
+ Introns
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+ A topic concerning primarily bioinformatics software tools, typically the broad function or purpose of a tool.
+
+
+ Tool topic
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+ A general area of bioinformatics study, typically the broad scope or category of content of a bioinformatics journal or conference proceeding.
+
+
+ Study topic
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Biological nomenclature (naming), symbols and terminology.
+
+ Nomenclature
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ The genes, gene variations and proteins involved in one or more specific diseases.
+
+ Disease genes and proteins
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ http://edamontology.org/topic_3040
+ Protein secondary or tertiary structural data and/or associated annotation.
+ Protein structure
+ Protein_structure_analysis
+ Protein tertiary structure
+
+
+
+ Protein structure analysis
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The study of human beings in general, including the human genome and proteome.
+ Humans
+ Human_biology
+
+
+ Human biology
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Informatics resource (typically a database) primarily focused on genes.
+
+ Gene resources
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Yeast, e.g. information on a specific yeast genome including molecular sequences, genes and annotation.
+
+ Yeast
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ (jison) Out of EDAM scope. While very useful to have a basic set of IDs for organisms, should find a better way to provide this e.g. in bio.tools (NCBI taxon ID subset).
+ 1.17
+
+
+ Eukaryotes or data concerning eukaryotes, e.g. information on a specific eukaryote genome including molecular sequences, genes and annotation.
+
+ The resource may be specific to a eukaryote, a group of eukaryotes or all eukaryotes.
+ Eukaryotes
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ (jison)Out of EDAM scope. While very useful to have a basic set of IDs for organisms, should find a better way to provide this e.g. in bio.tools (NCBI taxon ID subset).
+ 1.17
+
+
+ Invertebrates, e.g. information on a specific invertebrate genome including molecular sequences, genes and annotation.
+
+ The resource may be specific to an invertebrate, a group of invertebrates or all invertebrates.
+ Invertebrates
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ (jison)Out of EDAM scope. While very useful to have a basic set of IDs for organisms, should find a better way to provide this e.g. in bio.tools (NCBI taxon ID subset).
+ 1.17
+
+
+ Vertebrates, e.g. information on a specific vertebrate genome including molecular sequences, genes and annotation.
+
+ The resource may be specific to a vertebrate, a group of vertebrates or all vertebrates.
+ Vertebrates
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ (jison)Out of EDAM scope. While very useful to have a basic set of IDs for organisms, should find a better way to provide this e.g. in bio.tools (NCBI taxon ID subset).
+ 1.17
+
+
+ Unicellular eukaryotes, e.g. information on a unicellular eukaryote genome including molecular sequences, genes and annotation.
+
+ The resource may be specific to a unicellular eukaryote, a group of unicellular eukaryotes or all unicellular eukaryotes.
+ Unicellular eukaryotes
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Protein secondary or tertiary structure alignments.
+
+ Protein structure alignment
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ The study of matter and their structure by means of the diffraction of X-rays, typically the diffraction pattern caused by the regularly spaced atoms of a crystalline sample.
+ Crystallography
+ X-ray_diffraction
+ X-ray crystallography
+ X-ray microscopy
+
+
+
+ X-ray diffraction
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Conceptualisation, categorisation and naming of entities or phenomena within biology or bioinformatics.
+
+ Ontologies, nomenclature and classification
+ http://purl.bioontology.org/ontology/MSH/D002965
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ Immunity-related proteins and their ligands.
+ Immunoproteins_and_antigens
+ Antigens
+ Immunopeptides
+ Immunoproteins
+ Therapeutic antibodies
+
+
+
+ This includes T cell receptors (TR), major histocompatibility complex (MHC), immunoglobulin superfamily (IgSF) / antibodies, major histocompatibility complex superfamily (MhcSF), etc."
+ Immunoproteins and antigens
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Specific molecules, including large molecules built from repeating subunits (macromolecules) and small molecules of biological significance.
+ CHEBI:23367
+
+ Molecules
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ VT 3.1.9 Toxicology
+ Toxins and the adverse effects of these chemical substances on living organisms.
+ Toxicology
+ Computational toxicology
+ Toxicoinformatics
+
+
+
+ Toxicology
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta13
+
+
+ Parallelised sequencing processes that are capable of sequencing many thousands of sequences simultaneously.
+
+ High-throughput sequencing
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Gene regulatory networks.
+
+
+ Gene regulatory networks
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ beta12orEarlier
+
+
+ Informatics resources dedicated to one or more specific diseases (not diseases in general).
+
+ Disease (specific)
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+ Variable number of tandem repeat (VNTR) polymorphism in a DNA sequence.
+
+
+ VNTR
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+
+ Microsatellite polymorphism in a DNA sequence.
+
+
+ Microsatellites
+ true
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.13
+
+
+ Restriction fragment length polymorphisms (RFLP) in a DNA sequence.
+
+
+ RFLP
+ true
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ true
+ DNA polymorphism.
+ DNA_polymorphism
+ Microsatellites
+ RFLP
+ SNP
+ Single nucleotide polymorphism
+ VNTR
+ Variable number of tandem repeat polymorphism
+ snps
+
+
+ Includes microsatellite polymorphism in a DNA sequence. A microsatellite polymorphism is a very short subsequence that is repeated a variable number of times between individuals. These repeats consist of the nucleotides cytosine and adenosine.
+ Includes restriction fragment length polymorphisms (RFLP) in a DNA sequence. An RFLP is defined by the presence or absence of a specific restriction site of a bacterial restriction enzyme.
+ Includes single nucleotide polymorphisms (SNP) and associated data, for example, the discovery and annotation of SNPs. A SNP is a DNA sequence variation where a single nucleotide differs between members of a species or paired chromosomes in an individual.
+ Includes variable number of tandem repeat (VNTR) polymorphism in a DNA sequence. VNTRs occur in non-coding regions of DNA and consists sub-sequence that is repeated a multiple (and varied) number of times.
+ DNA polymorphism
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ 1.3
+
+
+ Topic for the design of nucleic acid sequences with specific conformations.
+
+ Nucleic acid design
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.3
+
+
+ The design of primers for PCR and DNA amplification or the design of molecular probes.
+
+ Primer or probe design
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.2
+
+
+ Molecular secondary or tertiary (3D) structural data resources, typically of proteins and nucleic acids.
+
+ Structure databases
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.2
+
+
+ Nucleic acid (secondary or tertiary) structure, such as whole structures, structural features and associated annotation.
+
+ Nucleic acid structure
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.3
+
+
+ Molecular sequence data resources, including sequence sites, alignments, motifs and profiles.
+
+ Sequence databases
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.3
+
+
+ Nucleotide sequences and associated concepts such as sequence sites, alignments, motifs and profiles.
+
+ Nucleic acid sequences
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.3
+
+ Protein sequences and associated concepts such as sequence sites, alignments, motifs and profiles.
+
+
+ Protein sequences
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.3
+
+
+ Protein interaction networks.
+
+ Protein interaction networks
+ true
+
+
+
+
+
+
+
+
+ beta13
+ true
+ VT 1.5.4 Biochemistry and molecular biology
+ The molecular basis of biological activity, particularly the macromolecules (e.g. proteins and nucleic acids) that are essential to life.
+ Molecular_biology
+ Biological processes
+
+
+
+ Molecular biology
+
+
+
+
+
+
+
+
+
+ beta13
+ 1.3
+
+
+ Mammals, e.g. information on a specific mammal genome including molecular sequences, genes and annotation.
+
+ Mammals
+ true
+
+
+
+
+
+
+
+
+ beta13
+ true
+ VT 1.5.5 Biodiversity conservation
+ The degree of variation of life forms within a given ecosystem, biome or an entire planet.
+ Biodiversity
+
+
+
+ Biodiversity
+
+ http://purl.bioontology.org/ontology/MSH/D044822
+
+
+
+
+
+
+
+
+ beta13
+ 1.3
+
+
+ The comparison, grouping together and classification of macromolecules on the basis of sequence similarity.
+
+ This includes the results of sequence clustering, ortholog identification, assignment to families, annotation etc.
+ Sequence clusters and classification
+ true
+
+
+
+
+
+
+
+
+ beta13
+ true
+ The study of genes, genetic variation and heredity in living organisms.
+ Genetics
+ Genes
+ Heredity
+
+
+
+ Genetics
+
+ http://purl.bioontology.org/ontology/MSH/D005823
+
+
+
+
+
+
+
+
+ beta13
+ true
+ The genes and genetic mechanisms such as Mendelian inheritance that underly continuous phenotypic traits (such as height or weight).
+ Quantitative_genetics
+
+
+ Quantitative genetics
+
+
+
+
+
+
+
+
+
+ beta13
+ true
+ The distribution of allele frequencies in a population of organisms and its change subject to evolutionary processes including natural selection, genetic drift, mutation and gene flow.
+ Population_genetics
+
+
+
+ Population genetics
+
+
+
+
+
+
+
+
+
+ beta13
+ 1.3
+
+ Regulatory RNA sequences including microRNA (miRNA) and small interfering RNA (siRNA).
+
+
+ Regulatory RNA
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.13
+
+ The documentation of resources such as tools, services and databases and how to get help.
+
+
+ Documentation and help
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.3
+
+
+ The structural and functional organisation of genes and other genetic elements.
+
+ Genetic organisation
+ true
+
+
+
+
+
+
+
+
+ beta13
+ true
+ The application of information technology to health, disease and biomedicine.
+ Biomedical informatics
+ Clinical informatics
+ Health and disease
+ Health informatics
+ Healthcare informatics
+ Medical_informatics
+
+
+
+ Medical informatics
+
+
+
+
+
+
+
+
+
+ beta13
+ true
+ VT 1.5.14 Developmental biology
+ How organisms grow and develop.
+ Developmental_biology
+ Development
+
+
+
+ Developmental biology
+
+
+
+
+
+
+
+
+
+ beta13
+ true
+ The development of organisms between the one-cell stage (typically the zygote) and the end of the embryonic stage.
+ Embryology
+
+
+
+ Embryology
+
+
+
+
+
+
+
+
+
+ beta13
+ true
+ VT 3.1.1 Anatomy and morphology
+ The form and function of the structures of living organisms.
+ Anatomy
+
+
+
+ Anatomy
+
+
+
+
+
+
+
+
+
+ beta13
+ true
+ The scientific literature, language processing, reference information, and documentation.
+ Language
+ Literature
+ Literature_and_language
+ Bibliography
+ Citations
+ Documentation
+ References
+ Scientific literature
+
+
+
+ This includes the documentation of resources such as tools, services and databases, user support, how to get help etc.
+ Literature and language
+ http://purl.bioontology.org/ontology/MSH/D011642
+
+
+
+
+
+
+
+
+ beta13
+ true
+ VT 1.5 Biological sciences
+ VT 1.5.1 Aerobiology
+ VT 1.5.13 Cryobiology
+ VT 1.5.23 Reproductive biology
+ VT 1.5.3 Behavioural biology
+ VT 1.5.7 Biological rhythm
+ VT 1.5.8 Biology
+ VT 1.5.99 Other
+ The study of life and living organisms, including their morphology, biochemistry, physiology, development, evolution, and so on.
+ Biological science
+ Biology
+ Aerobiology
+ Behavioural biology
+ Biological rhythms
+ Chronobiology
+ Cryobiology
+ Reproductive biology
+
+
+
+ Biology
+
+
+
+
+
+
+
+
+
+ beta13
+ true
+ Data stewardship
+ VT 1.3.1 Data management
+ Data management comprises the practices and principles of taking care of data, other than analysing them. This includes for example taking care of the associated metadata, formatting, storage, archiving, or access.
+ Metadata management
+
+
+
+ Data management
+
+
+ http://purl.bioontology.org/ontology/MSH/D000079803
+
+
+
+
+
+
+
+
+ beta13
+ 1.3
+
+
+ The detection of the positional features, such as functional and other key sites, in molecular sequences.
+
+ Sequence feature detection
+ http://purl.bioontology.org/ontology/MSH/D058977
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.3
+
+
+ The detection of positional features such as functional sites in nucleotide sequences.
+
+ Nucleic acid feature detection
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.3
+
+
+ The detection, identification and analysis of positional protein sequence features, such as functional sites.
+
+ Protein feature detection
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.2
+
+
+ Topic for modelling biological systems in mathematical terms.
+
+ Biological system modelling
+ true
+
+
+
+
+
+
+
+
+ beta13
+ The acquisition of data, typically measurements of physical systems using any type of sampling system, or by another other means.
+ Data collection
+
+
+ Data acquisition
+
+
+
+
+
+
+
+
+
+ beta13
+ 1.3
+
+
+ Specific genes and/or their encoded proteins or a family or other grouping of related genes and proteins.
+
+ Genes and proteins resources
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.13
+
+ Topological domains such as cytoplasmic regions in a protein.
+
+
+ Protein topological domains
+ true
+
+
+
+
+
+
+
+
+ beta13
+ true
+
+ Protein sequence variants produced e.g. from alternative splicing, alternative promoter usage, alternative initiation and ribosomal frameshifting.
+ Protein_variants
+
+
+ Protein variants
+
+
+
+
+
+
+
+
+
+ beta13
+ 1.12
+
+
+ Regions within a nucleic acid sequence containing a signal that alters a biological function.
+
+ Expression signals
+ true
+
+
+
+
+
+
+
+
+
+ beta13
+
+ Nucleic acids binding to some other molecule.
+ DNA_binding_sites
+ Matrix-attachment region
+ Matrix/scaffold attachment region
+ Nucleosome exclusion sequences
+ Restriction sites
+ Ribosome binding sites
+ Scaffold-attachment region
+
+
+ This includes ribosome binding sites (Shine-Dalgarno sequence in prokaryotes), restriction enzyme recognition sites (restriction sites) etc.
+ This includes sites involved with DNA replication and recombination. This includes binding sites for initiation of replication (origin of replication), regions where transfer is initiated during the conjugation or mobilisation (origin of transfer), starting sites for DNA duplication (origin of replication) and regions which are eliminated through any of kind of recombination. Also nucleosome exclusion regions, i.e. specific patterns or regions which exclude nucleosomes (the basic structural units of eukaryotic chromatin which play a significant role in regulating gene expression).
+ DNA binding sites
+
+
+
+
+
+
+
+
+
+ beta13
+ 1.13
+
+ Repetitive elements within a nucleic acid sequence.
+
+
+ This includes long terminal repeats (LTRs); sequences (typically retroviral) directly repeated at both ends of a defined sequence and other types of repeating unit.
+ Nucleic acid repeats
+ true
+
+
+
+
+
+
+
+
+ beta13
+ true
+ DNA replication or recombination.
+ DNA_replication_and_recombination
+
+
+ DNA replication and recombination
+
+
+
+
+
+
+
+
+
+
+ beta13
+ 1.13
+
+ Coding sequences for a signal or transit peptide.
+
+
+ Signal or transit peptide
+ true
+
+
+
+
+
+
+
+
+ beta13
+ 1.13
+
+ Sequence tagged sites (STS) in nucleic acid sequences.
+
+
+ Sequence tagged sites
+ true
+
+
+
+
+
+
+
+
+ 1.1
+ true
+ The determination of complete (typically nucleotide) sequences, including those of genomes (full genome sequencing, de novo sequencing and resequencing), amplicons and transcriptomes.
+ DNA-Seq
+ Sequencing
+ Chromosome walking
+ Clone verification
+ DNase-Seq
+ High throughput sequencing
+ High-throughput sequencing
+ NGS
+ NGS data analysis
+ Next gen sequencing
+ Next generation sequencing
+ Panels
+ Primer walking
+ Sanger sequencing
+ Targeted next-generation sequencing panels
+
+
+
+ Sequencing
+
+ http://purl.bioontology.org/ontology/MSH/D059014
+
+
+
+
+
+
+
+
+
+ 1.1
+ The analysis of protein-DNA interactions where chromatin immunoprecipitation (ChIP) is used in combination with massively parallel DNA sequencing to identify the binding sites of DNA-associated proteins.
+ ChIP-sequencing
+ Chip Seq
+ Chip sequencing
+ Chip-sequencing
+ ChIP-seq
+ ChIP-exo
+
+
+ ChIP-seq
+
+
+
+
+
+
+
+
+
+ 1.1
+ A topic concerning high-throughput sequencing of cDNA to measure the RNA content (transcriptome) of a sample, for example, to investigate how different alleles of a gene are expressed, detect post-transcriptional mutations or identify gene fusions.
+ RNA sequencing
+ RNA-Seq analysis
+ Small RNA sequencing
+ Small RNA-Seq
+ Small-Seq
+ Transcriptome profiling
+ WTSS
+ Whole transcriptome shotgun sequencing
+ RNA-Seq
+ MicroRNA sequencing
+ miRNA-seq
+
+
+ This includes small RNA profiling (small RNA-Seq), for example to find novel small RNAs, characterize mutations and analyze expression of small RNAs.
+ RNA-Seq
+
+
+
+
+
+
+
+
+
+
+ 1.1
+ 1.3
+
+ DNA methylation including bisulfite sequencing, methylation sites and analysis, for example of patterns and profiles of DNA methylation in a population, tissue etc.
+
+
+ DNA methylation
+ true
+
+
+
+
+
+
+
+
+ 1.1
+ true
+ The systematic study of metabolites, the chemical processes they are involved, and the chemical fingerprints of specific cellular processes in a whole cell, tissue, organ or organism.
+ Metabolomics
+ Exometabolomics
+ LC-MS-based metabolomics
+ MS-based metabolomics
+ MS-based targeted metabolomics
+ MS-based untargeted metabolomics
+ Mass spectrometry-based metabolomics
+ Metabolites
+ Metabolome
+ Metabonomics
+ NMR-based metabolomics
+
+
+
+ Metabolomics
+
+ http://purl.bioontology.org/ontology/MSH/D055432
+
+
+
+
+
+
+
+
+
+ 1.1
+ true
+ The study of the epigenetic modifications of a whole cell, tissue, organism etc.
+ Epigenomics
+
+
+
+ Epigenetics concerns the heritable changes in gene expression owing to mechanisms other than DNA sequence variation.
+ Epigenomics
+
+ http://purl.bioontology.org/ontology/MSH/D057890
+
+
+
+
+
+
+
+
+
+ 1.1
+ true
+ Environmental DNA (eDNA)
+ Environmental sequencing
+ Biome sequencing
+ Community genomics
+ Ecogenomics
+ Environmental genomics
+ Environmental omics
+ The study of genetic material recovered from environmental samples, and associated environmental data.
+ Metagenomics
+ Shotgun metagenomics
+
+
+
+ Metagenomics
+
+
+
+
+
+
+
+
+
+
+
+ 1.1
+ Variation in chromosome structure including microscopic and submicroscopic types of variation such as deletions, duplications, copy-number variants, insertions, inversions and translocations.
+ DNA structural variation
+ Genomic structural variation
+ DNA_structural_variation
+ Deletion
+ Duplication
+ Insertion
+ Inversion
+ Translocation
+
+
+ Structural variation
+
+
+
+
+
+
+
+
+
+ 1.1
+ DNA-histone complexes (chromatin), organisation of chromatin into nucleosomes and packaging into higher-order structures.
+ DNA_packaging
+ Nucleosome positioning
+
+
+ DNA packaging
+
+ http://purl.bioontology.org/ontology/MSH/D042003
+
+
+
+
+
+
+
+
+ 1.1
+ 1.3
+
+
+ A topic concerning high-throughput sequencing of randomly fragmented genomic DNA, for example, to investigate whole-genome sequencing and resequencing, SNP discovery, identification of copy number variations and chromosomal rearrangements.
+
+ DNA-Seq
+ true
+
+
+
+
+
+
+
+
+ 1.1
+ 1.3
+
+
+ The alignment of sequences of (typically millions) of short reads to a reference genome. This is a specialised topic within sequence alignment, especially because of complications arising from RNA splicing.
+
+ RNA-Seq alignment
+ true
+
+
+
+
+
+
+
+
+ 1.1
+ Experimental techniques that combine chromatin immunoprecipitation ('ChIP') with microarray ('chip'). ChIP-on-chip is used for high-throughput study protein-DNA interactions.
+ ChIP-chip
+ ChIP-on-chip
+ ChiP
+
+
+ ChIP-on-chip
+
+
+
+
+
+
+
+
+
+ 1.3
+ The protection of data, such as patient health data, from damage or unwanted access from unauthorised users.
+ Data privacy
+ Data_security
+
+
+ Data security
+
+
+
+
+
+
+
+
+
+ 1.3
+ Biological samples and specimens.
+ Specimen collections
+ Sample_collections
+ biosamples
+ samples
+
+
+
+ Sample collections
+
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ VT 1.5.4 Biochemistry and molecular biology
+ Chemical substances and physico-chemical processes and that occur within living organisms.
+ Biological chemistry
+ Biochemistry
+ Glycomics
+ Pathobiochemistry
+ Phytochemistry
+
+
+
+ Biochemistry
+
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ The study of evolutionary relationships amongst organisms from analysis of genetic information (typically gene or protein sequences).
+ Phylogenetics
+
+
+ Phylogenetics
+
+ http://purl.bioontology.org/ontology/MSH/D010802
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ Topic concerning the study of heritable changes, for example in gene expression or phenotype, caused by mechanisms other than changes in the DNA sequence.
+ Epigenetics
+ DNA methylation
+ Histone modification
+ Methylation profiles
+
+
+
+ This includes sub-topics such as histone modification and DNA methylation (methylation sites and analysis, for example of patterns and profiles of DNA methylation in a population, tissue etc.)
+ Epigenetics
+
+ http://purl.bioontology.org/ontology/MSH/D019175
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ The exploitation of biological process, structure and function for industrial purposes, for example the genetic manipulation of microorganisms for the antibody production.
+ Biotechnology
+ Applied microbiology
+
+
+
+ Biotechnology
+
+
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ Phenomes, or the study of the change in phenotype (the physical and biochemical traits of organisms) in response to genetic and environmental factors.
+ Phenomics
+
+
+
+ Phenomics
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ VT 1.5.16 Evolutionary biology
+ The evolutionary processes, from the genetic to environmental scale, that produced life in all its diversity.
+ Evolution
+ Evolutionary_biology
+
+
+
+ Evolutionary biology
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ VT 3.1.8 Physiology
+ The functions of living organisms and their constituent parts.
+ Physiology
+ Electrophysiology
+
+
+
+ Physiology
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ VT 1.5.20 Microbiology
+ The biology of microorganisms.
+ Microbiology
+ Antimicrobial stewardship
+ Medical microbiology
+ Microbial genetics
+ Microbial physiology
+ Microbial surveillance
+ Microbiological surveillance
+ Molecular infection biology
+ Molecular microbiology
+
+
+
+ Microbiology
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ The biology of parasites.
+ Parasitology
+
+
+
+ Parasitology
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ VT 3.1 Basic medicine
+ VT 3.2 Clinical medicine
+ VT 3.2.9 General and internal medicine
+ Research in support of healing by diagnosis, treatment, and prevention of disease.
+ Biomedical research
+ Clinical medicine
+ Experimental medicine
+ Medicine
+ General medicine
+ Internal medicine
+
+
+
+ Medicine
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ Neuroscience
+ VT 3.1.5 Neuroscience
+ The study of the nervous system and brain; its anatomy, physiology and function.
+ Neurobiology
+ Molecular neuroscience
+ Neurophysiology
+ Systemetic neuroscience
+
+
+
+ Neurobiology
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ VT 3.3.1 Epidemiology
+ Topic concerning the the patterns, cause, and effect of disease within populations.
+ Public_health_and_epidemiology
+ Epidemiology
+ Public health
+
+
+
+ Public health and epidemiology
+
+
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ VT 1.5.9 Biophysics
+ The use of physics to study biological system.
+ Biophysics
+ Medical physics
+
+
+
+ Biophysics
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ VT 1.5.12 Computational biology
+ VT 1.5.19 Mathematical biology
+ VT 1.5.26 Theoretical biology
+ The development and application of theory, analytical methods, mathematical models and computational simulation of biological systems.
+ Computational_biology
+ Biomathematics
+ Mathematical biology
+ Theoretical biology
+
+
+
+ This includes the modeling and treatment of biological processes and systems in mathematical terms (theoretical biology).
+ Computational biology
+
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ The analysis of transcriptomes, or a set of all the RNA molecules in a specific cell, tissue etc.
+ Transcriptomics
+ Comparative transcriptomics
+ Transcriptome
+
+
+
+ Transcriptomics
+
+
+
+
+
+
+
+
+
+ 1.3
+ Chemical science
+ Polymer science
+ VT 1.7.10 Polymer science
+ VT 1.7 Chemical sciences
+ VT 1.7.2 Chemistry
+ VT 1.7.3 Colloid chemistry
+ VT 1.7.5 Electrochemistry
+ VT 1.7.6 Inorganic and nuclear chemistry
+ VT 1.7.7 Mathematical chemistry
+ VT 1.7.8 Organic chemistry
+ VT 1.7.9 Physical chemistry
+ The composition and properties of matter, reactions, and the use of reactions to create new substances.
+ Chemistry
+ Inorganic chemistry
+ Mathematical chemistry
+ Nuclear chemistry
+ Organic chemistry
+ Physical chemistry
+
+
+
+ Chemistry
+
+
+
+
+
+
+
+
+
+ 1.3
+ VT 1.1.99 Other
+ VT:1.1 Mathematics
+ The study of numbers (quantity) and other topics including structure, space, and change.
+ Maths
+ Mathematics
+ Dynamic systems
+ Dynamical systems
+ Dynymical systems theory
+ Graph analytics
+ Monte Carlo methods
+ Multivariate analysis
+
+
+
+ Mathematics
+
+
+
+
+
+
+
+
+
+ 1.3
+ VT 1.2 Computer sciences
+ VT 1.2.99 Other
+ The theory and practical use of computer systems.
+ Computer_science
+ Cloud computing
+ HPC
+ High performance computing
+ High-performance computing
+
+
+
+ Computer science
+
+
+
+
+
+
+
+
+
+ 1.3
+ The study of matter, space and time, and related concepts such as energy and force.
+ Physics
+
+
+
+ Physics
+
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ RNA splicing; post-transcription RNA modification involving the removal of introns and joining of exons.
+ Alternative splicing
+ RNA_splicing
+ Splice sites
+
+
+ This includes the study of splice sites, splicing patterns, alternative splicing events and variants, isoforms, etc..
+ RNA splicing
+
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ The structure and function of genes at a molecular level.
+ Molecular_genetics
+
+
+
+ Molecular genetics
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ VT 3.2.25 Respiratory systems
+ The study of respiratory system.
+ Pulmonary medicine
+ Pulmonology
+ Respiratory_medicine
+ Pulmonary disorders
+ Respiratory disease
+
+
+
+ Respiratory medicine
+
+
+
+
+
+
+
+
+
+ 1.3
+ 1.4
+
+
+ The study of metabolic diseases.
+
+ Metabolic disease
+ true
+
+
+
+
+
+
+
+
+ 1.3
+ VT 3.3.4 Infectious diseases
+ The branch of medicine that deals with the prevention, diagnosis and management of transmissible disease with clinically evident illness resulting from infection with pathogenic biological agents (viruses, bacteria, fungi, protozoa, parasites and prions).
+ Communicable disease
+ Transmissible disease
+ Infectious_disease
+
+
+
+ Infectious disease
+
+
+
+
+
+
+
+
+
+ 1.3
+ The study of rare diseases.
+ Rare_diseases
+
+
+
+ Rare diseases
+
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ VT 1.7.4 Computational chemistry
+ Topic concerning the development and application of theory, analytical methods, mathematical models and computational simulation of chemical systems.
+ Computational_chemistry
+
+
+
+ Computational chemistry
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ The branch of medicine that deals with the anatomy, functions and disorders of the nervous system.
+ Neurology
+ Neurological disorders
+
+
+
+ Neurology
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ VT 3.2.22 Peripheral vascular disease
+ VT 3.2.4 Cardiac and Cardiovascular systems
+ The diseases and abnormalities of the heart and circulatory system.
+ Cardiovascular medicine
+ Cardiology
+ Cardiovascular disease
+ Heart disease
+
+
+
+ Cardiology
+
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ The discovery and design of drugs or potential drug compounds.
+ Drug_discovery
+
+
+
+ This includes methods that search compound collections, generate or analyse drug 3D conformations, identify drug targets with structural docking etc.
+ Drug discovery
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ Repositories of biological samples, typically human, for basic biological and clinical research.
+ Tissue collection
+ biobanking
+ Biobank
+
+
+
+ Biobank
+
+
+
+
+
+
+
+
+
+ 1.3
+ Laboratory study of mice, for example, phenotyping, and mutagenesis of mouse cell lines.
+ Laboratory mouse
+ Mouse_clinic
+
+
+
+ Mouse clinic
+
+
+
+
+
+
+
+
+
+ 1.3
+ Collections of microbial cells including bacteria, yeasts and moulds.
+ Microbial_collection
+
+
+
+ Microbial collection
+
+
+
+
+
+
+
+
+
+ 1.3
+ Collections of cells grown under laboratory conditions, specifically, cells from multi-cellular eukaryotes and especially animal cells.
+ Cell_culture_collection
+
+
+
+ Cell culture collection
+
+
+
+
+
+
+
+
+
+ 1.3
+ Collections of DNA, including both collections of cloned molecules, and populations of micro-organisms that store and propagate cloned DNA.
+ Clone_library
+
+
+
+ Clone library
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ 'translating' the output of basic and biomedical research into better diagnostic tools, medicines, medical procedures, policies and advice.
+ Translational_medicine
+
+
+
+ Translational medicine
+
+
+
+
+
+
+
+
+
+ 1.3
+ Collections of chemicals, typically for use in high-throughput screening experiments.
+ Compound_libraries_and_screening
+ Chemical library
+ Chemical screening
+ Compound library
+ Small chemical compounds libraries
+ Small compounds libraries
+ Target identification and validation
+
+
+
+ Compound libraries and screening
+
+
+
+
+
+
+
+
+
+ 1.3
+ true
+ VT 3.3 Health sciences
+ Topic concerning biological science that is (typically) performed in the context of medicine.
+ Biomedical sciences
+ Health science
+ Biomedical_science
+
+
+
+ Biomedical science
+
+
+
+
+
+
+
+
+
+ 1.3
+ Topic concerning the identity of biological entities, or reports on such entities, and the mapping of entities and records in different databases.
+ Data_identity_and_mapping
+
+
+
+ Data identity and mapping
+
+
+
+
+
+
+
+
+ 1.3
+ 1.12
+
+ The search and retrieval from a database on the basis of molecular sequence similarity.
+
+
+ Sequence search
+ true
+
+
+
+
+
+
+
+
+ 1.4
+ true
+ Objective indicators of biological state often used to assess health, and determinate treatment.
+ Diagnostic markers
+ Biomarkers
+
+
+ Biomarkers
+
+
+
+
+
+
+
+
+
+ 1.4
+ The procedures used to conduct an experiment.
+ Experimental techniques
+ Lab method
+ Lab techniques
+ Laboratory method
+ Laboratory_techniques
+ Experiments
+ Laboratory experiments
+
+
+
+ Laboratory techniques
+
+
+
+
+
+
+
+
+
+ 1.4
+ The development of policies, models and standards that cover data acquisition, storage and integration, such that it can be put to use, typically through a process of systematically applying statistical and / or logical techniques to describe, illustrate, summarise or evaluate data.
+ Data_architecture_analysis_and_design
+ Data analysis
+ Data architecture
+ Data design
+
+
+
+ Data architecture, analysis and design
+
+
+
+
+
+
+
+
+
+ 1.4
+ The combination and integration of data from different sources, for example into a central repository or warehouse, to provide users with a unified view of these data.
+ Data_integration_and_warehousing
+ Data integration
+ Data warehousing
+
+
+
+ Data integration and warehousing
+
+
+
+
+
+
+
+
+
+
+ 1.4
+ Any matter, surface or construct that interacts with a biological system.
+ Biomaterials
+
+
+
+ Biomaterials
+
+
+
+
+
+
+
+
+
+
+ 1.4
+ true
+ The use of synthetic chemistry to study and manipulate biological systems.
+ Chemical_biology
+
+
+
+ Chemical biology
+
+
+
+
+
+
+
+
+
+ 1.4
+ VT 1.7.1 Analytical chemistry
+ The study of the separation, identification, and quantification of the chemical components of natural and artificial materials.
+ Analytical_chemistry
+
+
+
+ Analytical chemistry
+
+
+
+
+
+
+
+
+
+ 1.4
+ The use of chemistry to create new compounds.
+ Synthetic_chemistry
+ Synthetic organic chemistry
+
+
+
+ Synthetic chemistry
+
+
+
+
+
+
+
+
+
+ 1.4
+ 1.2.12 Programming languages
+ Software engineering
+ VT 1.2.1 Algorithms
+ VT 1.2.14 Software engineering
+ VT 1.2.7 Data structures
+ The process that leads from an original formulation of a computing problem to executable programs.
+ Computer programming
+ Software development
+ Software_engineering
+ Algorithms
+ Data structures
+ Programming languages
+
+
+
+ Software engineering
+
+
+
+
+
+
+
+
+
+ 1.4
+ true
+ The process of bringing a new drug to market once a lead compounds has been identified through drug discovery.
+ Drug development science
+ Medicine development
+ Medicines development
+ Drug_development
+
+
+
+ Drug development
+
+
+
+
+
+
+
+
+
+ 1.4
+ Drug delivery
+ Drug formulation
+ Drug formulation and delivery
+ The process of formulating and administering a pharmaceutical compound to achieve a therapeutic effect.
+ Biotherapeutics
+
+
+
+ Biotherapeutics
+
+
+
+
+
+
+
+
+ 1.4
+ true
+ The study of how a drug interacts with the body.
+ Drug_metabolism
+ ADME
+ Drug absorption
+ Drug distribution
+ Drug excretion
+ Pharmacodynamics
+ Pharmacokinetics
+ Pharmacokinetics and pharmacodynamics
+
+
+
+ Drug metabolism
+
+
+
+
+
+
+
+
+
+ 1.4
+ Health care research
+ Health care science
+ The discovery, development and approval of medicines.
+ Drug discovery and development
+ Medicines_research_and_development
+
+
+
+ Medicines research and development
+
+
+
+
+
+
+
+
+
+
+ 1.4
+ The safety (or lack) of drugs and other medical interventions.
+ Patient safety
+ Safety_sciences
+ Drug safety
+
+
+
+ Safety sciences
+
+
+
+
+
+
+
+
+
+ 1.4
+ The detection, assessment, understanding and prevention of adverse effects of medicines.
+ Pharmacovigilence
+
+
+
+ Pharmacovigilence concerns safety once a drug has gone to market.
+ Pharmacovigilance
+
+
+
+
+
+
+
+
+
+
+ 1.4
+ The testing of new medicines, vaccines or procedures on animals (preclinical) and humans (clinical) prior to their approval by regulatory authorities.
+ Preclinical_and_clinical_studies
+ Clinical studies
+ Clinical study
+ Clinical trial
+ Drug trials
+ Preclinical studies
+ Preclinical study
+
+
+
+ Preclinical and clinical studies
+
+
+
+
+
+
+
+
+
+ 1.4
+ true
+ The visual representation of an object.
+ Imaging
+ Diffraction experiment
+ Microscopy
+ Microscopy imaging
+ Optical super resolution microscopy
+ Photonic force microscopy
+ Photonic microscopy
+
+
+
+ This includes diffraction experiments that are based upon the interference of waves, typically electromagnetic waves such as X-rays or visible light, by some object being studied, typical in order to produce an image of the object or determine its structure.
+ Imaging
+
+
+
+
+
+
+
+
+
+ 1.4
+ The use of imaging techniques to understand biology.
+ Biological imaging
+ Biological_imaging
+
+
+
+ Bioimaging
+
+
+
+
+
+
+
+
+
+ 1.4
+ VT 3.2.13 Medical imaging
+ VT 3.2.14 Nuclear medicine
+ VT 3.2.24 Radiology
+ The use of imaging techniques for clinical purposes for medical research.
+ Medical_imaging
+ Neuroimaging
+ Nuclear medicine
+ Radiology
+
+
+
+ Medical imaging
+
+
+
+
+
+
+
+
+
+ 1.4
+ The use of optical instruments to magnify the image of an object.
+ Light_microscopy
+
+
+
+ Light microscopy
+
+
+
+
+
+
+
+
+
+ 1.4
+ The use of animals and alternatives in experimental research.
+ Animal experimentation
+ Animal research
+ Animal testing
+ In vivo testing
+ Laboratory_animal_science
+
+
+
+ Laboratory animal science
+
+
+
+
+
+
+
+
+
+ 1.4
+ true
+ VT 1.5.18 Marine and Freshwater biology
+ The study of organisms in the ocean or brackish waters.
+ Marine_biology
+
+
+
+ Marine biology
+
+
+
+
+
+
+
+
+
+ 1.4
+ true
+ The identification of molecular and genetic causes of disease and the development of interventions to correct them.
+ Molecular_medicine
+
+
+
+ Molecular medicine
+
+
+
+
+
+
+
+
+
+ 1.4
+ VT 3.3.7 Nutrition and Dietetics
+ The study of the effects of food components on the metabolism, health, performance and disease resistance of humans and animals. It also includes the study of human behaviours related to food choices.
+ Nutrition
+ Nutrition science
+ Nutritional_science
+ Dietetics
+
+
+
+ Nutritional science
+
+
+
+
+
+
+
+
+
+ 1.4
+ true
+ The collective characterisation and quantification of pools of biological molecules that translate into the structure, function, and dynamics of an organism or organisms.
+ Omics
+
+
+
+ Omics
+
+
+
+
+
+
+
+
+
+ 1.4
+ The processes that need to be in place to ensure the quality of products for human or animal use.
+ Quality assurance
+ Quality_affairs
+ Good clinical practice
+ Good laboratory practice
+ Good manufacturing practice
+
+
+
+ Quality affairs
+
+
+
+
+
+
+
+
+ 1.4
+ The protection of public health by controlling the safety and efficacy of products in areas including pharmaceuticals, veterinary medicine, medical devices, pesticides, agrochemicals, cosmetics, and complementary medicines.
+ Healthcare RA
+ Regulatory_affairs
+
+
+
+ Regulatory affairs
+
+
+
+
+
+
+
+
+
+ 1.4
+ true
+ Biomedical approaches to clinical interventions that involve the use of stem cells.
+ Stem cell research
+ Regenerative_medicine
+
+
+
+ Regenerative medicine
+
+
+
+
+
+
+
+
+
+ 1.4
+ true
+ An interdisciplinary field of study that looks at the dynamic systems of the human body as part of an integrted whole, incorporating biochemical, physiological, and environmental interactions that sustain life.
+ Systems_medicine
+
+
+
+ Systems medicine
+
+
+
+
+
+
+
+
+
+ 1.4
+ Topic concerning the branch of medicine that deals with the prevention, diagnosis, and treatment of disease, disorder and injury in animals.
+ Veterinary_medicine
+ Clinical veterinary medicine
+
+
+
+ Veterinary medicine
+
+
+
+
+
+
+
+
+
+ 1.4
+ The application of biological concepts and methods to the analytical and synthetic methodologies of engineering.
+ Biological engineering
+ Bioengineering
+
+
+
+ Bioengineering
+
+
+
+
+
+
+
+
+
+ 1.4
+ true
+ Ageing
+ Aging
+ Gerontology
+ VT 3.2.10 Geriatrics and gerontology
+ The branch of medicine dealing with the diagnosis, treatment and prevention of disease in older people, and the problems specific to aging.
+ Geriatrics
+ Geriatric_medicine
+
+
+
+ Geriatric medicine
+
+
+
+
+
+
+
+
+
+ 1.4
+ true
+ VT 3.2.1 Allergy
+ Health issues related to the immune system and their prevention, diagnosis and management.
+ Allergy_clinical_immunology_and_immunotherapeutics
+ Allergy
+ Clinical immunology
+ Immune disorders
+ Immunomodulators
+ Immunotherapeutics
+
+
+
+ Allergy, clinical immunology and immunotherapeutics
+
+
+
+
+
+
+
+
+
+
+
+ 1.4
+ true
+ The prevention of pain and the evaluation, treatment and rehabilitation of persons in pain.
+ Algiatry
+ Pain management
+ Pain_medicine
+
+
+
+ Pain medicine
+
+
+
+
+
+
+
+
+
+ 1.4
+ VT 3.2.2 Anaesthesiology
+ Anaesthesia and anaesthetics.
+ Anaesthetics
+ Anaesthesiology
+
+
+
+ Anaesthesiology
+
+
+
+
+
+
+
+
+
+ 1.4
+ VT 3.2.5 Critical care/Emergency medicine
+ The multidisciplinary that cares for patients with acute, life-threatening illness or injury.
+ Acute medicine
+ Emergency medicine
+ Intensive care medicine
+ Critical_care_medicine
+
+
+
+ Critical care medicine
+
+
+
+
+
+
+
+
+
+ 1.4
+ VT 3.2.7 Dermatology and venereal diseases
+ The branch of medicine that deals with prevention, diagnosis and treatment of disorders of the skin, scalp, hair and nails.
+ Dermatology
+ Dermatological disorders
+
+
+
+ Dermatology
+
+
+
+
+
+
+
+
+
+ 1.4
+ The study, diagnosis, prevention and treatments of disorders of the oral cavity, maxillofacial area and adjacent structures.
+ Dentistry
+
+
+
+ Dentistry
+
+
+
+
+
+
+
+
+
+ 1.4
+ VT 3.2.20 Otorhinolaryngology
+ The branch of medicine that deals with the prevention, diagnosis, and treatment of disorders of the ear, nose and throat.
+ Audiovestibular medicine
+ Otolaryngology
+ Otorhinolaryngology
+ Ear_nose_and_throat_medicine
+ Head and neck disorders
+
+
+
+ Ear, nose and throat medicine
+
+
+
+
+
+
+
+
+
+ 1.4
+ true
+ The branch of medicine dealing with diseases of endocrine organs, hormone systems, their target organs, and disorders of the pathways of glucose and lipid metabolism.
+ Endocrinology_and_metabolism
+ Endocrine disorders
+ Endocrinology
+ Metabolic disorders
+ Metabolism
+
+
+
+ Endocrinology and metabolism
+
+
+
+
+
+
+
+
+
+ 1.4
+ true
+ VT 3.2.11 Hematology
+ The branch of medicine that deals with the blood, blood-forming organs and blood diseases.
+ Haematology
+ Blood disorders
+ Haematological disorders
+
+
+
+ Haematology
+
+
+
+
+
+
+
+
+
+ 1.4
+ true
+ VT 3.2.8 Gastroenterology and hepatology
+ The branch of medicine that deals with disorders of the oesophagus, stomach, duodenum, jejenum, ileum, large intestine, sigmoid colon and rectum.
+ Gastroenterology
+ Gastrointestinal disorders
+
+
+
+ Gastroenterology
+
+
+
+
+
+
+
+
+
+ 1.4
+ The study of the biological and physiological differences between males and females and how they effect differences in disease presentation and management.
+ Gender_medicine
+
+
+
+ Gender medicine
+
+
+
+
+
+
+
+
+
+ 1.4
+ true
+ VT 3.2.15 Obstetrics and gynaecology
+ The branch of medicine that deals with the health of the female reproductive system, pregnancy and birth.
+ Gynaecology_and_obstetrics
+ Gynaecological disorders
+ Gynaecology
+ Obstetrics
+
+
+
+ Gynaecology and obstetrics
+
+
+
+
+
+
+
+
+
+
+ 1.4
+ true
+ The branch of medicine that deals with the liver, gallbladder, bile ducts and bile.
+ Hepatology
+ Hepatic_and_biliary_medicine
+ Liver disorders
+
+
+
+ Hepatic and biliary medicine
+
+ Hepatobiliary medicine
+
+
+
+
+
+
+
+
+ 1.4
+ 1.13
+
+ The branch of medicine that deals with the infectious diseases of the tropics.
+
+
+ Infectious tropical disease
+ true
+
+
+
+
+
+
+
+
+ 1.4
+ The branch of medicine that treats body wounds or shock produced by sudden physical injury, as from violence or accident.
+ Traumatology
+ Trauma_medicine
+
+
+
+ Trauma medicine
+
+
+
+
+
+
+
+
+
+ 1.4
+ true
+ The branch of medicine that deals with the diagnosis, management and prevention of poisoning and other adverse health effects caused by medications, occupational and environmental toxins, and biological agents.
+ Medical_toxicology
+
+
+
+ Medical toxicology
+
+
+
+
+
+
+
+
+
+ 1.4
+ VT 3.2.19 Orthopaedics
+ VT 3.2.26 Rheumatology
+ The branch of medicine that deals with the prevention, diagnosis, and treatment of disorders of the muscle, bone and connective tissue. It incorporates aspects of orthopaedics, rheumatology, rehabilitation medicine and pain medicine.
+ Musculoskeletal_medicine
+ Musculoskeletal disorders
+ Orthopaedics
+ Rheumatology
+
+
+
+ Musculoskeletal medicine
+
+
+
+
+
+
+
+
+
+
+
+ 1.4
+ Optometry
+ VT 3.2.17 Ophthalmology
+ VT 3.2.18 Optometry
+ The branch of medicine that deals with disorders of the eye, including eyelid, optic nerve/visual pathways and occular muscles.
+ Ophthalmology
+ Eye disoders
+
+
+
+ Ophthalmology
+
+
+
+
+
+
+
+
+
+ 1.4
+ VT 3.2.21 Paediatrics
+ The branch of medicine that deals with the medical care of infants, children and adolescents.
+ Child health
+ Paediatrics
+
+
+
+ Paediatrics
+
+
+
+
+
+
+
+
+
+ 1.4
+ Mental health
+ VT 3.2.23 Psychiatry
+ The branch of medicine that deals with the management of mental illness, emotional disturbance and abnormal behaviour.
+ Psychiatry
+ Psychiatric disorders
+
+
+
+ Psychiatry
+
+
+
+
+
+
+
+
+
+ 1.4
+ VT 3.2.3 Andrology
+ The health of the reproductive processes, functions and systems at all stages of life.
+ Reproductive_health
+ Andrology
+ Family planning
+ Fertility medicine
+ Reproductive disorders
+
+
+
+ Reproductive health
+
+
+
+
+
+
+
+
+
+ 1.4
+ VT 3.2.28 Transplantation
+ The use of operative, manual and instrumental techniques on a patient to investigate and/or treat a pathological condition or help improve bodily function or appearance.
+ Surgery
+ Transplantation
+
+
+
+ Surgery
+
+
+
+
+
+
+
+
+
+ 1.4
+ VT 3.2.29 Urology and nephrology
+ The branches of medicine and physiology focussing on the function and disorders of the urinary system in males and females, the reproductive system in males, and the kidney.
+ Urology_and_nephrology
+ Kidney disease
+ Nephrology
+ Urological disorders
+ Urology
+
+
+
+ Urology and nephrology
+
+
+
+
+
+
+
+
+
+
+ 1.4
+ Alternative medicine
+ Holistic medicine
+ Integrative medicine
+ VT 3.2.12 Integrative and Complementary medicine
+ Medical therapies that fall beyond the scope of conventional medicine but may be used alongside it in the treatment of disease and ill health.
+ Complementary_medicine
+
+
+
+ Complementary medicine
+
+
+
+
+
+
+
+
+
+ 1.7
+ Techniques that uses magnetic fields and radiowaves to form images, typically to investigate the anatomy and physiology of the human body.
+ MRT
+ Magnetic resonance imaging
+ Magnetic resonance tomography
+ NMRI
+ Nuclear magnetic resonance imaging
+ MRI
+
+
+ MRI
+
+
+
+
+
+
+
+
+
+
+ 1.7
+ The study of matter by studying the diffraction pattern from firing neutrons at a sample, typically to determine atomic and/or magnetic structure.
+ Neutron diffraction experiment
+ Neutron_diffraction
+ Elastic neutron scattering
+ Neutron microscopy
+
+
+ Neutron diffraction
+
+
+
+
+
+
+
+
+
+ 1.7
+ Imaging in sections (sectioning), through the use of a wave-generating device (tomograph) that generates an image (a tomogram).
+ CT
+ Computed tomography
+ TDM
+ Tomography
+ Electron tomography
+ PET
+ Positron emission tomography
+ X-ray tomography
+
+
+ Tomography
+
+
+
+
+
+
+
+
+
+ 1.7
+ true
+ KDD
+ Knowledge discovery in databases
+ VT 1.3.2 Data mining
+ The discovery of patterns in large data sets and the extraction and trasnsformation of those patterns into a useful format.
+ Data_mining
+ Pattern recognition
+
+
+ Data mining
+
+
+
+
+
+
+
+
+
+ 1.7
+ Artificial Intelligence
+ VT 1.2.2 Artificial Intelligence (expert systems, machine learning, robotics)
+ A topic concerning the application of artificial intelligence methods to algorithms, in order to create methods that can learn from data in order to generate an output, rather than relying on explicitly encoded information only.
+ Machine_learning
+ Active learning
+ Ensembl learning
+ Kernel methods
+ Knowledge representation
+ Neural networks
+ Recommender system
+ Reinforcement learning
+ Supervised learning
+ Unsupervised learning
+
+
+ Machine learning
+
+
+
+
+
+
+
+
+
+ 1.8
+ Database administration
+ Information systems
+ Databases
+ The general handling of data stored in digital archives such as databases, databanks, web portals, and other data resources.
+ Database_management
+ Content management
+ Document management
+ File management
+ Record management
+
+
+ This includes databases for the results of scientific experiments, the application of high-throughput technology, computational analysis and the scientific literature. It covers the management and manipulation of digital documents, including database records, files, and reports.
+ Database management
+
+
+
+
+
+
+
+
+
+ 1.8
+ VT 1.5.29 Zoology
+ Animals, e.g. information on a specific animal genome including molecular sequences, genes and annotation.
+ Animal
+ Animal biology
+ Animals
+ Metazoa
+ Zoology
+ Animal genetics
+ Animal physiology
+ Entomology
+
+
+ The study of the animal kingdom.
+ Zoology
+
+
+
+
+
+
+
+
+
+
+ 1.8
+ The biology, archival, detection, prediction and analysis of positional features such as functional and other key sites, in protein sequences and the conserved patterns (motifs, profiles etc.) that may be used to describe them.
+ Protein_sites_features_and_motifs
+ Protein sequence features
+ Signal peptide cleavage sites
+
+
+ A signal peptide coding sequence encodes an N-terminal domain of a secreted protein, which is involved in attaching the polypeptide to a membrane leader sequence. A transit peptide coding sequence encodes an N-terminal domain of a nuclear-encoded organellar protein; which is involved in import of the protein into the organelle.
+ Protein sites, features and motifs
+
+
+
+
+
+
+
+
+
+ 1.8
+ The biology, archival, detection, prediction and analysis of positional features such as functional and other key sites, in nucleic acid sequences and the conserved patterns (motifs, profiles etc.) that may be used to describe them.
+ Nucleic_acid_sites_features_and_motifs
+ Nucleic acid functional sites
+ Nucleic acid sequence features
+ Primer binding sites
+ Sequence tagged sites
+
+
+ Sequence tagged sites are short DNA sequences that are unique within a genome and serve as a mapping landmark, detectable by PCR they allow a genome to be mapped via an ordering of STSs.
+ Nucleic acid sites, features and motifs
+
+
+
+
+
+
+
+
+
+ 1.8
+ Transcription of DNA into RNA and features of a messenger RNA (mRNA) molecules including precursor RNA, primary (unprocessed) transcript and fully processed molecules.
+ Gene_transcripts
+ Coding RNA
+ EST
+ Exons
+ Fusion transcripts
+ Gene transcript features
+ Introns
+ PolyA signal
+ PolyA site
+ Signal peptide coding sequence
+ Transit peptide coding sequence
+ cDNA
+ mRNA
+ mRNA features
+
+
+ This includes 5'untranslated region (5'UTR), coding sequences (CDS), exons, intervening sequences (intron) and 3'untranslated regions (3'UTR).
+ This includes Introns, and protein-coding regions including coding sequences (CDS), exons, translation initiation sites and open reading frames. Also expressed sequence tag (EST) or complementary DNA (cDNA) sequences.
+ This includes coding sequences for a signal or transit peptide. A signal peptide coding sequence encodes an N-terminal domain of a secreted protein, which is involved in attaching the polypeptide to a membrane leader sequence. A transit peptide coding sequence encodes an N-terminal domain of a nuclear-encoded organellar protein; which is involved in import of the protein into the organelle.
+ This includes regions or sites in a eukaryotic and eukaryotic viral RNA sequence which directs endonuclease cleavage or polyadenylation of an RNA transcript. A polyA signal is required for endonuclease cleavage of an RNA transcript that is followed by polyadenylation. A polyA site is a site on an RNA transcript to which adenine residues will be added during post-transcriptional polyadenylation.
+ Gene transcripts
+
+
+
+
+
+
+
+
+
+ 1.8
+ 1.13
+
+ Protein-ligand (small molecule) interaction(s).
+
+
+ Protein-ligand interactions
+ true
+
+
+
+
+
+
+
+
+ 1.8
+ 1.13
+
+ Protein-drug interaction(s).
+
+
+ Protein-drug interactions
+ true
+
+
+
+
+
+
+
+
+ 1.8
+ Genotype experiment including case control, population, and family studies. These might use array based methods and re-sequencing methods.
+ Genotyping_experiment
+
+
+ Genotyping experiment
+
+
+
+
+
+
+
+
+
+ 1.8
+ Genome-wide association study experiments.
+ GWAS
+ GWAS analysis
+ Genome-wide association study
+ GWAS_study
+
+
+ GWAS study
+
+
+
+
+
+
+
+
+
+ 1.8
+ Microarray experiments including conditions, protocol, sample:data relationships etc.
+ Microarrays
+ Microarray_experiment
+ Gene expression microarray
+ Genotyping array
+ Methylation array
+ MicroRNA array
+ Multichannel microarray
+ One channel microarray
+ Proprietary platform micoarray
+ RNA chips
+ RNA microarrays
+ Reverse phase protein array
+ SNP array
+ Tiling arrays
+ Tissue microarray
+ Two channel microarray
+ aCGH microarray
+ mRNA microarray
+ miRNA array
+
+
+ This might specify which raw data file relates to which sample and information on hybridisations, e.g. which are technical and which are biological replicates.
+ Microarray experiment
+
+
+
+
+
+
+
+
+
+ 1.8
+ PCR experiments, e.g. quantitative real-time PCR.
+ Polymerase chain reaction
+ PCR_experiment
+ Quantitative PCR
+ RT-qPCR
+ Real Time Quantitative PCR
+
+
+ PCR experiment
+
+
+
+
+
+
+
+
+
+ 1.8
+ Proteomics experiments.
+ Proteomics_experiment
+ 2D PAGE experiment
+ DIA
+ Data-independent acquisition
+ MS
+ MS experiments
+ Mass spectrometry
+ Mass spectrometry experiments
+ Northern blot experiment
+ Spectrum demultiplexing
+
+
+ This includes two-dimensional gel electrophoresis (2D PAGE) experiments, gels or spots in a gel. Also mass spectrometry - an analytical chemistry technique that measures the mass-to-charge ratio and abundance of ions in the gas phase. Also Northern blot experiments.
+ Proteomics experiment
+
+
+
+
+
+
+
+
+
+
+ 1.8
+ 1.13
+
+ Two-dimensional gel electrophoresis experiments, gels or spots in a gel.
+
+
+ 2D PAGE experiment
+ true
+
+
+
+
+
+
+
+
+ 1.8
+ 1.13
+
+ Northern Blot experiments.
+
+
+ Northern blot experiment
+ true
+
+
+
+
+
+
+
+
+ 1.8
+ RNAi experiments.
+ RNAi_experiment
+
+
+ RNAi experiment
+
+
+
+
+
+
+
+
+
+ 1.8
+ Biological computational model experiments (simulation), for example the minimum information required in order to permit its correct interpretation and reproduction.
+ Simulation_experiment
+
+
+ Simulation experiment
+
+
+
+
+
+
+
+
+ 1.8
+ 1.13
+
+ Protein-DNA/RNA interaction(s).
+
+
+ Protein-nucleic acid interactions
+ true
+
+
+
+
+
+
+
+
+ 1.8
+ 1.13
+
+ Protein-protein interaction(s), including interactions between protein domains.
+
+
+ Protein-protein interactions
+ true
+
+
+
+
+
+
+
+
+ 1.8
+ 1.13
+
+ Cellular process pathways.
+
+
+ Cellular process pathways
+ true
+
+
+
+
+
+
+
+
+ 1.8
+ 1.13
+
+ Disease pathways, typically of human disease.
+
+
+ Disease pathways
+ true
+
+
+
+
+
+
+
+
+ 1.8
+ 1.13
+
+ Environmental information processing pathways.
+
+
+ Environmental information processing pathways
+ true
+
+
+
+
+
+
+
+
+ 1.8
+ 1.13
+
+ Genetic information processing pathways.
+
+
+ Genetic information processing pathways
+ true
+
+
+
+
+
+
+
+
+ 1.8
+ 1.13
+
+ Super-secondary structure of protein sequence(s).
+
+
+ Protein super-secondary structure
+ true
+
+
+
+
+
+
+
+
+ 1.8
+ 1.13
+
+ Catalytic residues (active site) of an enzyme.
+
+
+ Protein active sites
+ true
+
+
+
+
+
+
+
+
+ 1.8
+ Binding sites in proteins, including cleavage sites (for a proteolytic enzyme or agent), key residues involved in protein folding, catalytic residues (active site) of an enzyme, ligand-binding (non-catalytic) residues of a protein, such as sites that bind metal, prosthetic groups or lipids, RNA and DNA-binding proteins and binding sites etc.
+ Protein_binding_sites
+ Enzyme active site
+ Protein cleavage sites
+ Protein functional sites
+ Protein key folding sites
+ Protein-nucleic acid binding sites
+
+
+ Protein binding sites
+
+
+
+
+
+
+
+
+
+ 1.8
+ 1.13
+
+ RNA and DNA-binding proteins and binding sites in protein sequences.
+
+
+ Protein-nucleic acid binding sites
+ true
+
+
+
+
+
+
+
+
+ 1.8
+ 1.13
+
+ Cleavage sites (for a proteolytic enzyme or agent) in a protein sequence.
+
+
+ Protein cleavage sites
+ true
+
+
+
+
+
+
+
+
+ 1.8
+ 1.13
+
+ Chemical modification of a protein.
+
+
+ Protein chemical modifications
+ true
+
+
+
+
+
+
+
+
+ 1.8
+ Disordered structure in a protein.
+ Protein features (disordered structure)
+ Protein_disordered_structure
+
+
+ Protein disordered structure
+
+
+
+
+
+
+
+
+
+ 1.8
+ 1.13
+
+ Structural domains or 3D folds in a protein or polypeptide chain.
+
+
+ Protein domains
+ true
+
+
+
+
+
+
+
+
+ 1.8
+ 1.13
+
+ Key residues involved in protein folding.
+
+
+ Protein key folding sites
+ true
+
+
+
+
+
+
+
+
+ 1.8
+ 1.13
+
+ Post-translation modifications in a protein sequence, typically describing the specific sites involved.
+
+
+ Protein post-translational modifications
+ true
+
+
+
+
+
+
+
+
+ 1.8
+ Secondary structure (predicted or real) of a protein, including super-secondary structure.
+ Protein features (secondary structure)
+ Protein_secondary_structure
+ Protein super-secondary structure
+
+
+ Super-secondary structures include leucine zippers, coiled coils, Helix-Turn-Helix etc.
+ The location and size of the secondary structure elements and intervening loop regions is typically given. The report can include disulphide bonds and post-translationally formed peptide bonds (crosslinks).
+ Protein secondary structure
+
+
+
+
+
+
+
+
+
+ 1.8
+ 1.13
+
+ Short repetitive subsequences (repeat sequences) in a protein sequence.
+
+
+ Protein sequence repeats
+ true
+
+
+
+
+
+
+
+
+ 1.8
+ 1.13
+
+ Signal peptides or signal peptide cleavage sites in protein sequences.
+
+
+ Protein signal peptides
+ true
+
+
+
+
+
+
+
+
+ 1.10
+ VT 1.1.1 Applied mathematics
+ The application of mathematics to specific problems in science, typically by the formulation and analysis of mathematical models.
+ Applied_mathematics
+
+
+ Applied mathematics
+
+
+
+
+
+
+
+
+
+ 1.10
+ VT 1.1.1 Pure mathematics
+ The study of abstract mathematical concepts.
+ Pure_mathematics
+ Linear algebra
+
+
+ Pure mathematics
+
+
+
+
+
+
+
+
+
+ 1.10
+ The control of data entry and maintenance to ensure the data meets defined standards, qualities or constraints.
+ Data_governance
+ Data stewardship
+
+
+ Data governance
+
+ http://purl.bioontology.org/ontology/MSH/D030541
+
+
+
+
+
+
+
+
+ 1.10
+ The quality, integrity, and cleaning up of data.
+ Data_quality_management
+ Data clean-up
+ Data cleaning
+ Data integrity
+ Data quality
+
+
+ Data quality management
+
+
+
+
+
+
+
+
+
+ 1.10
+ Freshwater science
+ VT 1.5.18 Marine and Freshwater biology
+ The study of organisms in freshwater ecosystems.
+ Freshwater_biology
+
+
+
+ Freshwater biology
+
+
+
+
+
+
+
+
+
+ 1.10
+ true
+ VT 3.1.2 Human genetics
+ The study of inheritance in human beings.
+ Human_genetics
+
+
+
+ Human genetics
+
+
+
+
+
+
+
+
+
+ 1.10
+ VT 3.3.14 Tropical medicine
+ Health problems that are prevalent in tropical and subtropical regions.
+ Tropical_medicine
+
+
+
+ Tropical medicine
+
+
+
+
+
+
+
+
+
+ 1.10
+ true
+ VT 3.3.14 Tropical medicine
+ VT 3.4 Medical biotechnology
+ VT 3.4.1 Biomedical devices
+ VT 3.4.2 Health-related biotechnology
+ Biotechnology applied to the medical sciences and the development of medicines.
+ Medical_biotechnology
+ Pharmaceutical biotechnology
+
+
+
+ Medical biotechnology
+
+
+
+
+
+
+
+
+
+ 1.10
+ true
+ VT 3.4.5 Molecular diagnostics
+ An approach to medicine whereby decisions, practices and are tailored to the individual patient based on their predicted response or risk of disease.
+ Precision medicine
+ Personalised_medicine
+ Molecular diagnostics
+
+
+
+ Personalised medicine
+
+
+
+
+
+
+
+
+
+ 1.12
+ Experimental techniques to purify a protein-DNA crosslinked complex. Usually sequencing follows e.g. in the techniques ChIP-chip, ChIP-seq and MeDIP-seq.
+ Chromatin immunoprecipitation
+ Immunoprecipitation_experiment
+
+
+ Immunoprecipitation experiment
+
+
+
+
+
+
+
+
+
+ 1.12
+ Laboratory technique to sequence the complete DNA sequence of an organism's genome at a single time.
+ Genome sequencing
+ WGS
+ Whole_genome_sequencing
+ De novo genome sequencing
+ Whole genome resequencing
+
+
+ Whole genome sequencing
+
+
+
+
+
+
+
+
+
+ 1.12
+
+ Laboratory technique to sequence the methylated regions in DNA.
+ MeDIP-chip
+ MeDIP-seq
+ mDIP
+ Methylated_DNA_immunoprecipitation
+ BS-Seq
+ Bisulfite sequencing
+ MeDIP
+ Methylated DNA immunoprecipitation (MeDIP)
+ Methylation sequencing
+ WGBS
+ Whole-genome bisulfite sequencing
+ methy-seq
+ methyl-seq
+
+
+ Methylated DNA immunoprecipitation
+
+
+
+
+
+
+
+
+
+ 1.12
+ Laboratory technique to sequence all the protein-coding regions in a genome, i.e., the exome.
+ Exome
+ Exome analysis
+ Exome capture
+ Targeted exome capture
+ WES
+ Whole exome sequencing
+ Exome_sequencing
+
+
+ Exome sequencing is considered a cheap alternative to whole genome sequencing.
+ Exome sequencing
+
+
+
+
+
+
+
+
+
+ 1.12
+
+ true
+ The design of an experiment intended to test a hypothesis, and describe or explain empirical data obtained under various experimental conditions.
+ Design of experiments
+ Experimental design
+ Studies
+ Experimental_design_and_studies
+
+
+ Experimental design and studies
+
+
+
+
+
+
+
+
+
+
+ 1.12
+ The design of an experiment involving non-human animals.
+ Animal_study
+ Challenge study
+
+
+ Animal study
+
+
+
+
+
+
+
+
+
+
+ 1.13
+ true
+ The ecology of microorganisms including their relationship with one another and their environment.
+ Environmental microbiology
+ Microbial_ecology
+ Community analysis
+ Microbiome
+ Molecular community analysis
+
+
+ Microbial ecology
+
+
+
+
+
+
+
+
+
+ 1.17
+ An antibody-based technique used to map in vivo RNA-protein interactions.
+ RIP
+ RNA_immunoprecipitation
+ CLIP
+ CLIP-seq
+ HITS-CLIP
+ PAR-CLIP
+ iCLIP
+
+
+ RNA immunoprecipitation
+
+
+
+
+
+
+
+
+
+ 1.17
+ Large-scale study (typically comparison) of DNA sequences of populations.
+ Population_genomics
+
+
+
+ Population genomics
+
+
+
+
+
+
+
+
+
+ 1.20
+ Agriculture
+ Agroecology
+ Agronomy
+ Multidisciplinary study, research and development within the field of agriculture.
+ Agricultural_science
+ Agricultural biotechnology
+ Agricultural economics
+ Animal breeding
+ Animal husbandry
+ Animal nutrition
+ Farming systems research
+ Food process engineering
+ Food security
+ Horticulture
+ Phytomedicine
+ Plant breeding
+ Plant cultivation
+ Plant nutrition
+ Plant pathology
+ Soil science
+
+
+ Agricultural science
+
+
+
+
+
+
+
+
+
+ 1.20
+ Approach which samples, in parallel, all genes in all organisms present in a given sample, e.g. to provide insight into biodiversity and function.
+ Shotgun metagenomic sequencing
+ Metagenomic_sequencing
+
+
+ Metagenomic sequencing
+
+
+
+
+
+
+
+
+
+ 1.21
+ Environment
+ Study of the environment, the interactions between its physical, chemical, and biological components and it's effect on life. Also how humans impact upon the environment, and how we can manage and utilise natural resources.
+ Environmental_science
+
+
+ Environmental sciences
+
+
+
+
+
+
+
+
+
+ 1.22
+ The study and simulation of molecular conformations using a computational model and computer simulations.
+
+
+ This includes methods such as Molecular Dynamics, Coarse-grained dynamics, metadynamics, Quantum Mechanics, QM/MM, Markov State Models, etc.
+ Biomolecular simulation
+
+
+
+
+
+
+
+
+
+ 1.22
+ The application of multi-disciplinary science and technology for the construction of artificial biological systems for diverse applications.
+ Biomimeic chemistry
+
+
+ Synthetic biology
+
+
+
+
+
+
+
+
+
+
+ 1.22
+ The application of biotechnology to directly manipulate an organism's genes.
+ Genetic manipulation
+ Genetic modification
+ Genetic_engineering
+ Genome editing
+ Genome engineering
+
+
+ Genetic engineering
+
+
+
+
+
+
+
+
+
+ 1.24
+ A field of biological research focused on the discovery and identification of peptides, typically by comparing mass spectra against a protein database.
+ Proteogenomics
+
+
+ Proteogenomics
+
+
+
+
+
+
+
+
+
+ 1.24
+ Amplicon panels
+ Resequencing
+ Laboratory experiment to identify the differences between a specific genome (of an individual) and a reference genome (developed typically from many thousands of individuals). WGS re-sequencing is used as golden standard to detect variations compared to a given reference genome, including small variants (SNP and InDels) as well as larger genome re-organisations (CNVs, translocations, etc.).
+ Highly targeted resequencing
+ Whole genome resequencing (WGR)
+ Whole-genome re-sequencing (WGSR)
+ Amplicon sequencing
+ Amplicon-based sequencing
+ Ultra-deep sequencing
+ Amplicon sequencing is the ultra-deep sequencing of PCR products (amplicons), usually for the purpose of efficient genetic variant identification and characterisation in specific genomic regions.
+ Genome resequencing
+
+
+
+
+
+
+
+
+
+ 1.24
+ A biomedical field that bridges immunology and genetics, to study the genetic basis of the immune system.
+ Immune system genetics
+ Immungenetics
+ Immunology and genetics
+ Immunogenetics
+ Immunogenes
+
+
+ This involves the study of often complex genetic traits underlying diseases involving defects in the immune system. For example, identifying target genes for therapeutic approaches, or genetic variations involved in immunological pathology.
+ Immunogenetics
+
+
+
+
+
+
+
+
+
+ 1.24
+ Interdisciplinary science focused on extracting information from chemical systems by data analytical approaches, for example multivariate statistics, applied mathematics, and computer science.
+ Chemometrics
+
+
+ Chemometrics
+
+
+
+
+
+
+
+
+
+ 1.24
+ Cytometry is the measurement of the characteristics of cells.
+ Cytometry
+ Flow cytometry
+ Image cytometry
+ Mass cytometry
+
+
+ Cytometry
+
+
+
+
+
+
+
+
+
+ 1.24
+ Biotechnology approach that seeks to optimize cellular genetic and regulatory processes in order to increase the cells' production of a certain substance.
+
+
+ Metabolic engineering
+
+
+
+
+
+
+
+
+
+ 1.24
+ Molecular biology methods used to analyze the spatial organization of chromatin in a cell.
+ 3C technologies
+ 3C-based methods
+ Chromosome conformation analysis
+ Chromosome_conformation_capture
+ Chromatin accessibility
+ Chromatin accessibility assay
+ Chromosome conformation capture
+
+
+
+
+
+
+
+
+
+
+ 1.24
+ The study of microbe gene expression within natural environments (i.e. the metatranscriptome).
+ Metatranscriptomics
+
+
+ Metatranscriptomics methods can be used for whole gene expression profiling of complex microbial communities.
+ Metatranscriptomics
+
+
+
+
+
+
+
+
+
+ 1.24
+ The reconstruction and analysis of genomic information in extinct species.
+ Paleogenomics
+ Ancestral genomes
+ Paleogenetics
+ Paleogenomics
+
+
+
+
+
+
+
+
+
+
+ 1.24
+ The biological classification of organisms by categorizing them in groups ("clades") based on their most recent common ancestor.
+ Cladistics
+ Tree of life
+
+
+ Cladistics
+
+
+
+
+
+
+
+
+
+
+
+ 1.24
+ The study of the process and mechanism of change of biomolecules such as DNA, RNA, and proteins across generations.
+ Molecular_evolution
+
+
+ Molecular evolution
+
+
+
+
+
+
+
+
+
+
+ 1.24
+ Immunoinformatics is the field of computational biology that deals with the study of immunoloogical questions. Immunoinformatics is at the interface between immunology and computer science. It takes advantage of computational, statistical, mathematical approaches and enhances the understanding of immunological knowledge.
+ Computational immunology
+ Immunoinformatics
+ This involves the study of often complex genetic traits underlying diseases involving defects in the immune system. For example, identifying target genes for therapeutic approaches, or genetic variations involved in immunological pathology.
+ Immunoinformatics
+
+
+
+
+
+
+
+
+
+ 1.24
+ A diagnostic imaging technique based on the application of ultrasound.
+ Standardized echography
+ Ultrasound imaging
+ Echography
+ Diagnostic sonography
+ Medical ultrasound
+ Standard echography
+ Ultrasonography
+
+
+ Echography
+
+
+
+
+
+
+
+
+
+ 1.24
+ Experimental approaches to determine the rates of metabolic reactions - the metabolic fluxes - within a biological entity.
+ Fluxomics
+ The "fluxome" is the complete set of metabolic fluxes in a cell, and is a dynamic aspect of phenotype.
+ Fluxomics
+
+
+
+
+
+
+
+
+
+ 1.12
+ An experiment for studying protein-protein interactions.
+ Protein_interaction_experiment
+ Co-immunoprecipitation
+ Phage display
+ Yeast one-hybrid
+ Yeast two-hybrid
+
+
+ This used to have the ID http://edamontology.org/topic_3557 but the numerical part (owing to an error) duplicated http://edamontology.org/operation_3557 ('Imputation'). ID of this concept set to http://edamontology.org/topic_3957 in EDAM 1.24.
+ Protein interaction experiment
+
+
+
+
+
+
+
+
+
+ 1.25
+ A DNA structural variation, specifically a duplication or deletion event, resulting in sections of the genome to be repeated, or the number of repeats in the genome to vary between individuals.
+ Copy_number_variation
+ CNV deletion
+ CNV duplication
+ CNV insertion / amplification
+ Complex CNV
+ Copy number variant
+ Copy number variation
+
+
+
+
+
+
+
+
+
+ 1.25
+ The branch of genetics concerned with the relationships between chromosomes and cellular behaviour, especially during mitosis and meiosis.
+
+
+ Cytogenetics
+
+
+
+
+
+
+
+
+
+ 1.25
+ The design of vaccines to protect against a particular pathogen, including antigens, delivery systems, and adjuvants to elicit a predictable immune response against specific epitopes.
+ Vaccinology
+ Rational vaccine design
+ Reverse vaccinology
+ Structural vaccinology
+ Structure-based immunogen design
+ Vaccine design
+
+
+ Vaccinology
+
+
+
+
+
+
+
+
+
+ 1.25
+ The study of immune system as a whole, its regulation and response to pathogens using genome-wide approaches.
+
+
+ Immunomics
+
+
+
+
+
+
+
+
+
+ 1.25
+ Epistasis can be defined as the ability of the genotype at one locus to supersede the phenotypic effect of a mutation at another locus. This interaction between genes can occur at different level: gene expression, protein levels, etc...
+ Epistatic genetic interaction
+ Epistatic interactions
+
+
+ Epistasis
+
+ http://purl.bioontology.org/ontology/MSH/D004843
+
+
+
+
+
+
+
+
+ 1.26
+ Open science encompasses the practices of making scientific research transparent and participatory, and its outputs publicly accessible.
+
+
+ Open science
+
+
+
+
+
+
+
+
+
+ 1.26
+ Data rescue denotes digitalisation, formatting, archival, and publication of data that were not available in accessible or usable form. Examples are data from private archives, data inside publications, or in paper records stored privately or publicly.
+
+
+ Data rescue
+
+
+
+
+
+
+
+
+
+
+ 1.26
+ FAIR data principles
+ FAIRification
+ FAIR data is data that meets the principles of being findable, accessible, interoperable, and reusable.
+ Findable, accessible, interoperable, reusable data
+ Open data
+
+
+ A substantially overlapping term is 'open data', i.e. publicly available data that is free to use, distribute, and create derivative work from, without restrictions. Open data does not automatically have to be FAIR (e.g. findable or interoperable), while FAIR data does in some cases not have to be publicly available without restrictions (especially sensitive personal data).
+ FAIR data
+
+
+
+
+
+
+
+
+
+
+
+ 1.26
+ Microbial mechanisms for protecting microorganisms against antimicrobial agents.
+ AMR
+ Antifungal resistance
+ Antiprotozoal resistance
+ Antiviral resistance
+ Extensive drug resistance (XDR)
+ Multidrug resistance
+ Multiple drug resistance (MDR)
+ Multiresistance
+ Pandrug resistance (PDR)
+ Total drug resistance (TDR)
+
+
+ Antimicrobial Resistance
+
+
+
+
+
+
+
+
+
+
+ 1.26
+ The monitoring method for measuring electrical activity in the brain.
+ EEG
+
+
+ Electroencephalography
+
+
+
+
+
+
+
+
+
+
+ 1.26
+ The monitoring method for measuring electrical activity in the heart.
+ ECG
+ EKG
+
+
+ Electrocardiography
+
+
+
+
+
+
+
+
+
+ 1.26
+ A method for studying biomolecules and other structures at very low (cryogenic) temperature using electron microscopy.
+ cryo-EM
+
+
+ Cryogenic electron microscopy
+
+
+
+
+
+
+
+
+
+ 1.26
+ Biosciences, or life sciences, include fields of study related to life, living beings, and biomolecules.
+ Life sciences
+
+
+ Biosciences
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.26
+ Biogeochemical cycle
+ The carbon cycle is the biogeochemical pathway of carbon moving through the different parts of the Earth (such as ocean, atmosphere, soil), or eventually another planet.
+
+
+ Note that the carbon-nitrogen-oxygen (CNO) cycle (https://en.wikipedia.org/wiki/CNO_cycle) is a completely different, thermonuclear reaction in stars.
+ Carbon cycle
+
+
+
+
+
+
+
+
+
+
+ 1.26
+ Multiomics concerns integration of data from multiple omics (e.g. transcriptomics, proteomics, epigenomics).
+ Integrative omics
+ Multi-omics
+ Pan-omics
+ Panomics
+
+
+ Multiomics
+
+
+
+
+
+
+
+
+
+
+ 1.26
+ With ribosome profiling, ribosome-protected mRNA fragments are analyzed with RNA-seq techniques leading to a genome-wide measurement of the translation landscape.
+ RIBO-seq
+ Ribo-Seq
+ RiboSeq
+ ribo-seq
+ ribosomal footprinting
+ translation footprinting
+
+
+ Ribosome Profiling
+
+
+
+
+
+
+
+
+
+ 1.26
+ Combined with NGS (Next Generation Sequencing) technologies, single-cell sequencing allows the study of genetic information (DNA, RNA, epigenome...) at a single cell level. It is often used for differential analysis and gene expression profiling.
+ Single Cell Genomics
+
+
+ Single-Cell Sequencing
+
+
+
+
+
+
+
+
+
+
+ 1.26
+ The study of mechanical waves in liquids, solids, and gases.
+
+
+ Acoustics
+
+
+
+
+
+
+
+
+
+
+
+ 1.26
+ Interdisplinary study of behavior, precise control, and manipulation of low (microlitre) volume fluids in constrained space.
+ Fluidics
+
+
+ Microfluidics
+
+
+
+
+
+
+
+
+
+ 1.26
+ Genomic imprinting is a gene regulation mechanism by which a subset of genes are expressed from one of the two parental chromosomes only. Imprinted genes are organized in clusters, their silencing/activation of the imprinted loci involves epigenetic marks (DNA methylation, etc) and so-called imprinting control regions (ICR). It has been described in mammals, but also plants and insects.
+ Gene imprinting
+
+
+ Genomic imprinting
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.26
+ Environmental DNA (eDNA)
+ Environmental RNA (eRNA)
+ Environmental sequencing
+ Taxonomic profiling
+ Metabarcoding is the barcoding of (environmental) DNA or RNA to identify multiple taxa from the same sample.
+ DNA metabarcoding
+ Environmental metabarcoding
+ RNA metabarcoding
+ eDNA metabarcoding
+ eRNA metabarcoding
+
+
+ Typically, high-throughput sequencing is performed and the resulting sequence reads are matched to DNA barcodes in a reference database.
+ Metabarcoding
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.2
+
+ An obsolete concept (redefined in EDAM).
+
+ Needed for conversion to the OBO format.
+ Obsolete concept (EDAM)
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ beta12orEarlier
+ A serialisation format conforming to the Web Ontology Language (OWL) model.
+
+
+ OWL format
+
+
+ 1.2
+ rdf
+
+ Resource Description Framework (RDF) XML format.
+
+
+ RDF/XML can be used as a standard serialisation syntax for OWL DL, but not for OWL Full.
+ RDF/XML
+ http://www.ebi.ac.uk/SWO/data/SWO_3000006
+
+
+
+
+
+
+
diff --git a/edamfu/tests/output.xml b/edamfu/tests/output.xml
new file mode 100644
index 0000000..793a6cb
--- /dev/null
+++ b/edamfu/tests/output.xml
@@ -0,0 +1,2 @@
+
+Text with '
\ No newline at end of file
diff --git a/edamfu/tests/test_core.py b/edamfu/tests/test_core.py
new file mode 100644
index 0000000..70abd8d
--- /dev/null
+++ b/edamfu/tests/test_core.py
@@ -0,0 +1,113 @@
+import filecmp
+import os
+import tempfile
+import unittest
+from xml.dom import minidom
+import xml.etree.ElementTree as ET
+
+from rdflib import Graph, URIRef, RDF, OWL, Namespace
+
+from edamfu.core import load, save, CANONICAL_NAMESPACES
+
+
+def get_temporary_file_path():
+ # Create a temporary file and return its path
+ temp_file = tempfile.NamedTemporaryFile(delete=False)
+ temp_file_path = temp_file.name
+ temp_file.close()
+ return temp_file_path
+
+
+def get_ontology_subject(graph):
+ # Test for the presence of owl:Ontology
+ ontology_subjects = [
+ subject
+ for subject in graph.subjects(RDF.type, OWL.Ontology)
+ if all(
+ triple_object != OWL.Class
+ for _, _, triple_object in graph.triples((subject, RDF.type, None))
+ )
+ ]
+ return ontology_subjects[0] if ontology_subjects else None
+
+
+def retrieve_element_from_xml(xml_file_path, element_path):
+ # Parse the XML file
+ tree = ET.parse(xml_file_path)
+ root = tree.getroot()
+ return root.findall(element_path)
+
+
+def compare_xml_elements(elem1, elem2, tag=''):
+ if elem1.tag != elem2.tag:
+ return False, f"tag `{tag}`: Tags {elem1.tag} and {elem2.tag} are different"
+ if elem1.text and elem2.text and elem1.text != elem2.text:
+ text1 = elem1.text.replace(' ', '\u2423').replace(' ', '\u2192').replace('\n', '\u240A')
+ text2 = elem2.text.replace(' ', '\u2423').replace(' ', '\u2192').replace('\n', '\u240A')
+ return False, f"tag `{tag}`: Text `{text1}` and `{text2}` are different"
+ elif (elem1.text and not elem2.text) or (not elem1.text and elem2.text):
+ return False, f"tag `{tag}`: Text {elem1.text} and {elem2.text} are different"
+ if elem1.attrib != elem2.attrib:
+ return False, f"tag `{tag}`: Attributes {elem1.attrib} and {elem2.attrib} are different"
+ if len(elem1) != len(elem2):
+ return False, f"tag `{tag}`: Number of children {len(elem1)} and {len(elem2)} are different"
+ for child1, child2 in zip(elem1, elem2):
+ res, msg = compare_xml_elements(child1, child2, tag=tag+'/'+elem1.tag)
+ if not res:
+ return res, msg
+ return True, "all elements are identical"
+
+def get_pretty_xml(element):
+ rough_string = ET.tostring(element, 'utf-8')
+ reparsed = minidom.parseString(rough_string)
+ return reparsed.toprettyxml(indent=" ")
+
+class CoreTestCase(unittest.TestCase):
+ # Test loading the "raw" EDAM ontology from a file, and then saving it to another file
+
+ @classmethod
+ def setUpClass(cls):
+ # Set up any necessary test data or resources
+ cls.edam_file_raw_path = "EDAM_dev.owl"
+ g = load(cls.edam_file_raw_path)
+ cls.result_file_path = get_temporary_file_path()
+ save(g, cls.result_file_path)
+ cls.result_graph = Graph()
+ cls.result_graph.parse(cls.result_file_path, format="xml")
+
+ @classmethod
+ def tearDownClass(cls):
+ #print(retrieve_element_from_xml(cls.edam_file_raw_path, ".//{http://www.w3.org/2002/07/owl#}Ontology"))
+ #print(retrieve_element_from_xml(cls.result_file_path, ".//{http://www.w3.org/2002/07/owl#}Ontology"))
+ #if cls.result_file_path and os.path.exists(cls.result_file_path):
+ # os.remove(cls.result_file_path)
+ return
+
+ def test_ontology_class_exists(self):
+ # there should be an ontology class in the result graph
+ self.assertIsNotNone(
+ get_ontology_subject(self.result_graph),
+ f"Cannot find the ontology class in the result graph of file {self.result_file_path}",
+ )
+
+ def test_ontology_elements_identical(self):
+ # the result should be the same as the original file
+ raw_onto_el = retrieve_element_from_xml(self.edam_file_raw_path, ".//{http://www.w3.org/2002/07/owl#}Ontology")[0]
+ result_onto_el = retrieve_element_from_xml(self.result_file_path, ".//{http://www.w3.org/2002/07/owl#}Ontology")[0]
+ res, msg = compare_xml_elements(raw_onto_el, result_onto_el)
+ self.assertTrue(
+ res,
+ f"Ontology elements in {self.edam_file_raw_path} and {self.result_file_path} are not identical because {msg}",
+ #f"Ontology elements in {self.edam_file_raw_path} and {self.result_file_path} are not identical because {msg}:\n\n {get_pretty_xml(raw_onto_el)}\n\n vs \n\n{get_pretty_xml(result_onto_el)}",
+ )
+
+ def test_files_identical(self):
+ # the result should be the same as the original file
+ self.assertTrue(
+ filecmp.cmp(self.edam_file_raw_path, self.result_file_path),
+ f"Files {self.edam_file_raw_path} and {self.result_file_path} are not identical",
+ )
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/src/webapp/app.py b/src/webapp/app.py
index c4f82ad..ac86123 100644
--- a/src/webapp/app.py
+++ b/src/webapp/app.py
@@ -1,7 +1,7 @@
import csv
from flask import Flask, redirect, url_for, request, render_template
import random
-import nbformat
+import os
from rdflib import ConjunctiveGraph, Namespace
@@ -51,11 +51,27 @@ def index():
new_formats=res['nb_formats'] - res_last['nb_formats']
)
-@app.route('/expert_curation')
-def expert_curation():
- # 1. select a topic
- # 2. select topic-specific curation actions (subclasses of the identified topic)
- return render_template('expert_curation.html')
+@app.route('/current')
+def current():
+ res = get_edam_numbers(g)
+
+ return render_template('current.html',
+ topics=res['nb_topics'],
+ operations=res['nb_operations'],
+ data=res['nb_data'],
+ formats=res['nb_formats']
+ )
+
+@app.route('/quality')
+def quality():
+ res = get_edam_numbers(g)
+
+ return render_template('quality.html',
+ topics=res['nb_topics'],
+ operations=res['nb_operations'],
+ data=res['nb_data'],
+ formats=res['nb_formats']
+ )
def get_edam_numbers(g):
query_op = """
@@ -140,49 +156,182 @@ def edam_last_report():
return render_template('edam_last_report.html', output_edamci_list=edamci_output_list, robot_output_list=robot_output_list)
+#################################################
+# How to contribute
+#################################################
+@app.route('/high_priority')
+def high_priority():
+ dir_queries = "./queries"
+
+ ## Checks that all mandatory properties are filled in.
+ query = dir_queries + "/mandatory_property_missing.rq"
+ with open(query, "r") as f:
+ query = f.read()
+ results = g.query(query)
+ f.close()
+
+ mandatory_property_missing = []
+ for r in results:
+ mandatory_property_missing.append({"term": r["label"], "class": r["entity"]})
+
+ ## Checks that all IDs have a unique number.
+ query = dir_queries + "/get_uri.rq"
+ with open(query, "r") as f:
+ query = f.read()
+ results = g.query(query)
+ f.close()
+
+ id_unique = []
+ for r in results:
+ id_unique.append({"term": r["label"], "class": r["entity"]})
+
+ return render_template('high_priority.html',
+ mandatory_property_missing = mandatory_property_missing,
+ id_unique = id_unique,
+ random = random)
+
+####################################
@app.route('/quick_curation')
def quick_curation():
- # NO wikipedia
- q_no_wikipedia = """
- SELECT (count(?term) as ?nb_no_wikipedia) WHERE {
- ?c rdfs:subClassOf+ edam:topic_0003 ;
- rdfs:label ?term .
-
- FILTER NOT EXISTS {
- ?c rdfs:seeAlso ?seealso .
- FILTER (regex(str(?seealso), "wikipedia.org", "i"))
- } .
- }
- """
- results = g.query(q_no_wikipedia, initNs=ns)
- count_no_wikipedia = 0
+ dir_queries = "./queries"
+
+ ## Checks that all webpage and doi are declared as literal links.
+ query = dir_queries + "/literal_links.rq"
+ with open(query, "r") as f:
+ query = f.read()
+ results = g.query(query)
+ f.close()
+
+ literal_links = []
+ for r in results:
+ literal_links.append({"term": r["label"], "class": r["entity"]})
+
+ ## Formatting of def and labels
+ # end_dot_def_missing.rq;end_dot_label.rq;end_space_annotation.rq;eol_in_annotation.rq;start_space_annotation.rq;tab_in_annotation.rq
+ queries = [ dir_queries + "/end_dot_def_missing.rq", dir_queries + "/end_dot_label.rq", dir_queries + "/end_space_annotation.rq", dir_queries + "/eol_in_annotation.rq",
+ dir_queries + "/start_space_annotation.rq", dir_queries + "/tab_in_annotation.rq"]
+ results = {}
+ for q in queries:
+ with open(q, "r") as f:
+ q = f.read()
+ results.update(g.query(q))
+ f.close()
+
+ formatting = []
for r in results:
- count_no_wikipedia = str(r["nb_no_wikipedia"])
-
- #########
- q_no_wikipedia_all = """
- SELECT ?c ?term WHERE {
- ?c rdfs:subClassOf+ edam:topic_0003 ;
- rdfs:label ?term .
-
- FILTER NOT EXISTS {
- ?c rdfs:seeAlso ?seealso .
- FILTER (regex(str(?seealso), "wikipedia.org", "i"))
- } .
- }
- """
- results = g.query(q_no_wikipedia_all, initNs=ns)
- no_wikipedia = []
+ formatting.append({"term": r["label"], "class": r["entity"]})
+
+ ## Get topics without a wikipedia link (WARNING)
+ query = dir_queries + "/no_wikipedia_link_topic.rq"
+ with open(query, "r") as f:
+ query = f.read()
+ results = g.query(query)
+ f.close()
+
+ no_wikipedia_link_topic = []
for r in results:
- no_wikipedia.append({"term": r["term"], "class": r["c"]})
+ no_wikipedia_link_topic.append({"term": r["term"], "class": r["concept"]})
+
+ # ## Get operations without a wikipedia link (WARNING)
+ # query = dir_queries + "/no_wikipedia_link_operation.rq"
+ # with open(query, "r") as f:
+ # query = f.read()
+ # results = g.query(query)
+ # f.close()
+
+ # no_wikipedia_link_operation = []
+ # for r in results:
+ # no_wikipedia_link_operation.append({"term": r["term"], "class": r["concept"]})
+
+ # ## Get topics without any broad synonym (OPTIONAL)
+ # query = dir_queries + "/no_broad_synonym_topic.rq"
+ # with open(query, "r") as f:
+ # query = f.read()
+ # results = g.query(query)
+ # f.close()
+
+ # no_broad_synonym_topic = []
+ # for r in results:
+ # no_broad_synonym_topic.append({"term": r["term"], "class": r["concept"]})
- if len(no_wikipedia) > 5:
- no_wikipedia = random.sample(no_wikipedia, 5)
+ # ## Get topics without a definition (ERROR)
+ # query = dir_queries + "/no_definition_topic.rq"
+ # with open(query, "r") as f:
+ # query = f.read()
+ # results = g.query(query)
+ # f.close()
+
+ # no_definition_topic = []
+ # for r in results:
+ # no_definition_topic.append({"term": r["term"], "class": r["concept"]})
+
+
+ # NO wikipedia
+ # q_no_wikipedia = """
+ # SELECT (count(?term) as ?nb_no_wikipedia) WHERE {
+ # ?c rdfs:subClassOf+ edam:topic_0003 ;
+ # rdfs:label ?term .
+ #
+ # FILTER NOT EXISTS {
+ # ?c rdfs:seeAlso ?seealso .
+ # FILTER (regex(str(?seealso), "wikipedia.org", "i"))
+ # } .
+ # }
+ # """
+ #
+ # results = g.query(q_no_wikipedia, initNs=ns)
+ # count_no_wikipedia = 0
+ # for r in results:
+ # count_no_wikipedia = str(r["nb_no_wikipedia"])
+
+ # #########
+ # q_no_wikipedia_all = """
+ # SELECT ?c ?term WHERE {
+ # ?c rdfs:subClassOf+ edam:topic_0003 ;
+ # rdfs:label ?term .
+ #
+ # FILTER NOT EXISTS {
+ # ?c rdfs:seeAlso ?seealso .
+ # FILTER (regex(str(?seealso), "wikipedia.org", "i"))
+ # } .
+ # }
+ # """
+ # results = g.query(q_no_wikipedia_all, initNs=ns)
+ # no_wikipedia = []
+ # for r in results:
+ # no_wikipedia.append({"term": r["term"], "class": r["c"]})
+ #
+ # if len(no_wikipedia) > 5:
+ # no_wikipedia = random.sample(no_wikipedia, 5)
return render_template('quick_curation.html',
- count_no_wikipedia = count_no_wikipedia,
- missing_wikipedia = no_wikipedia)
+ #count_no_wikipedia = count_no_wikipedia,
+ no_wikipedia_link_topic = no_wikipedia_link_topic,
+ literal_links = literal_links,
+ formatting = formatting,
+ random = random)
+
+##############################################
+@app.route('/field_specific')
+def field_specific():
+ dir_queries = "./queries"
+ ## Get identifiers (hybrid) without a regex (WARNING)
+ query = dir_queries + "/no_regex_identifier.rq"
+ with open(query, "r") as f:
+ query = f.read()
+ results = g.query(query)
+ f.close()
+
+ no_regex_identifier = []
+ for r in results:
+ no_regex_identifier.append({"term": r["term"], "class": r["concept"]})
+
+
+ return render_template('field_specific.html',
+ no_regex_identifier = no_regex_identifier,
+ random = random)
+
if __name__ == "__main__":
diff --git a/src/webapp/queries/end_dot_def_missing.rq b/src/webapp/queries/end_dot_def_missing.rq
new file mode 100644
index 0000000..8ae3ab5
--- /dev/null
+++ b/src/webapp/queries/end_dot_def_missing.rq
@@ -0,0 +1,21 @@
+PREFIX obo:
+PREFIX owl:
+PREFIX rdfs:
+PREFIX rdf:
+PREFIX oboInOwl:
+PREFIX edam:
+PREFIX xsd:
+
+SELECT DISTINCT ?entity ?property ?def ?label WHERE {
+ VALUES ?property {oboInOwl:hasDefinition}
+ ?entity ?property ?def .
+ ?entity rdfs:label ?label .
+
+ FILTER NOT EXISTS {
+ FILTER REGEX(str(?def), "['.']+$")
+ }
+ FILTER (!isBlank(?def))
+
+}
+ORDER BY ?entity
+
diff --git a/src/webapp/queries/end_dot_label.rq b/src/webapp/queries/end_dot_label.rq
new file mode 100644
index 0000000..358bd3e
--- /dev/null
+++ b/src/webapp/queries/end_dot_label.rq
@@ -0,0 +1,19 @@
+PREFIX obo:
+PREFIX owl:
+PREFIX rdfs:
+PREFIX rdf:
+PREFIX oboInOwl:
+PREFIX edam:
+PREFIX xsd:
+
+SELECT DISTINCT ?entity ?property ?value ?label WHERE {
+ VALUES ?property {rdfs:label}
+ ?entity ?property ?value .
+ ?entity rdfs:label ?label .
+
+ FILTER REGEX(str(?value), "['.']+$")
+ FILTER (!isBlank(?entity))
+
+}
+ORDER BY ?entity
+
diff --git a/src/webapp/queries/end_space_annotation.rq b/src/webapp/queries/end_space_annotation.rq
new file mode 100644
index 0000000..a0fe042
--- /dev/null
+++ b/src/webapp/queries/end_space_annotation.rq
@@ -0,0 +1,18 @@
+PREFIX obo:
+PREFIX owl:
+PREFIX rdfs:
+PREFIX rdf:
+PREFIX oboInOwl:
+PREFIX edam:
+PREFIX xsd:
+
+SELECT DISTINCT ?entity ?property ?value ?label WHERE {
+ ?entity ?property ?value .
+ ?entity rdfs:label ?label .
+
+ FILTER REGEX(str(?value), "[\\s\r\n]+$")
+ FILTER (!isBlank(?entity))
+
+}
+ORDER BY ?entity
+
diff --git a/src/webapp/queries/eol_in_annotation.rq b/src/webapp/queries/eol_in_annotation.rq
new file mode 100644
index 0000000..3f3cf88
--- /dev/null
+++ b/src/webapp/queries/eol_in_annotation.rq
@@ -0,0 +1,17 @@
+PREFIX obo:
+PREFIX owl:
+PREFIX rdfs:
+PREFIX rdf:
+PREFIX oboInOwl:
+PREFIX edam:
+PREFIX xsd:
+
+SELECT DISTINCT ?entity ?property ?value ?label WHERE {
+ ?entity ?property ?value .
+ ?entity rdfs:label ?label .
+ FILTER regex(?value, "\n")
+ FILTER (!isBlank(?entity))
+
+}
+ORDER BY ?entity
+
diff --git a/src/webapp/queries/get_uri.rq b/src/webapp/queries/get_uri.rq
new file mode 100644
index 0000000..4e409fc
--- /dev/null
+++ b/src/webapp/queries/get_uri.rq
@@ -0,0 +1,15 @@
+PREFIX obo:
+PREFIX owl:
+PREFIX rdfs:
+PREFIX rdf:
+PREFIX oboInOwl:
+PREFIX edam:
+PREFIX xsd:
+
+SELECT DISTINCT ?entity ?label WHERE {
+
+ ?entity a owl:Class .
+ OPTIONAL {?entity rdfs:label ?label .}
+
+}
+ORDER BY ?entity
diff --git a/src/webapp/queries/literal_links.rq b/src/webapp/queries/literal_links.rq
new file mode 100644
index 0000000..e0078a0
--- /dev/null
+++ b/src/webapp/queries/literal_links.rq
@@ -0,0 +1,38 @@
+PREFIX obo:
+PREFIX owl:
+PREFIX rdfs:
+PREFIX rdf:
+PREFIX oboInOwl:
+PREFIX edam:
+PREFIX xsd:
+
+SELECT DISTINCT ?entity ?property ?label ?value WHERE {
+ VALUES ?property {
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+}
+
+?entity ?property ?value .
+
+FILTER isLiteral(?value)
+?entity rdfs:label ?label .
+
+}
+ORDER BY ?entity
\ No newline at end of file
diff --git a/src/webapp/queries/mandatory_property_missing.rq b/src/webapp/queries/mandatory_property_missing.rq
new file mode 100644
index 0000000..084bf7e
--- /dev/null
+++ b/src/webapp/queries/mandatory_property_missing.rq
@@ -0,0 +1,37 @@
+PREFIX obo:
+PREFIX owl:
+PREFIX rdfs:
+PREFIX rdf:
+PREFIX oboInOwl:
+PREFIX edam:
+PREFIX xsd:
+
+SELECT DISTINCT ?entity ?property ?label ?property_subs_edam WHERE {
+
+ VALUES ?property {oboInOwl:hasDefinition
+ edam:created_in
+ #oboInOwl:inSubset
+ rdfs:label
+ rdfs:subClassOf }
+ ?entity a owl:Class .
+
+ FILTER NOT EXISTS {?entity owl:deprecated true .}
+ OPTIONAL {?entity rdfs:label ?label .}
+ FILTER ( ?entity != )
+ FILTER ( ?entity != )
+ FILTER ( ?entity != )
+ FILTER ( ?entity != )
+ FILTER ( ?entity != )
+
+ FILTER NOT EXISTS {?entity ?property ?value .
+ MINUS { ?value rdf:type owl:Restriction .} #to prevent concept with rdfs:subClassOf property being only restriction (e.g. has_topic)
+ }
+ FILTER (!isBlank(?entity))
+ # UNION
+ # {
+ # VALUES ?property { oboInOwl:inSubset
+ # }
+ # FILTER NOT EXISTS {?entity ?property .}
+ # }
+}
+ORDER BY ?entity
\ No newline at end of file
diff --git a/src/webapp/queries/no_broad_synonym_topic.rq b/src/webapp/queries/no_broad_synonym_topic.rq
new file mode 100644
index 0000000..5efaa10
--- /dev/null
+++ b/src/webapp/queries/no_broad_synonym_topic.rq
@@ -0,0 +1,12 @@
+## Get topics that don't have any 'broadSynonym' attribute
+
+PREFIX edam:
+PREFIX oboInOwl:
+
+SELECT ?term ?concept WHERE {
+ ?concept rdfs:subClassOf+ edam:topic_0003 ;
+ rdfs:label ?term .
+ FILTER NOT EXISTS {
+ ?concept oboInOwl:hasBroadSynonym ?hasBroadSynonym .
+ } .
+}
\ No newline at end of file
diff --git a/src/webapp/queries/no_definition_topic.rq b/src/webapp/queries/no_definition_topic.rq
new file mode 100644
index 0000000..7a24ef0
--- /dev/null
+++ b/src/webapp/queries/no_definition_topic.rq
@@ -0,0 +1,13 @@
+## Get topics that don't have a 'hasDefinition' attribute (ERROR level)
+
+PREFIX edam:
+PREFIX oboInOwl:
+
+SELECT ?term ?concept WHERE {
+ ?concept rdfs:subClassOf+ edam:topic_0003 ;
+ rdfs:label ?term .
+
+ FILTER NOT EXISTS {
+ ?concept oboInOwl:hasDefinition ?def
+ } .
+}
\ No newline at end of file
diff --git a/src/webapp/queries/no_regex_identifier.rq b/src/webapp/queries/no_regex_identifier.rq
new file mode 100644
index 0000000..ee7309f
--- /dev/null
+++ b/src/webapp/queries/no_regex_identifier.rq
@@ -0,0 +1,14 @@
+## Hybrid Identifiers with no regex attribute (WARNING)
+
+PREFIX edam:
+PREFIX oboInOwl:
+
+SELECT ?term ?concept ?regex WHERE {
+ ?concept rdfs:subClassOf+ edam:data_2109 ;
+ rdfs:label ?term .
+
+ FILTER NOT EXISTS {
+ ?concept edam:regex ?regex
+ } .
+
+}
\ No newline at end of file
diff --git a/src/webapp/queries/no_wikipedia_link_operation.rq b/src/webapp/queries/no_wikipedia_link_operation.rq
new file mode 100644
index 0000000..446cfea
--- /dev/null
+++ b/src/webapp/queries/no_wikipedia_link_operation.rq
@@ -0,0 +1,13 @@
+## Get operations that don't have a wikipedia link as a 'seeAlso' attribute
+
+PREFIX edam:
+PREFIX oboInOwl:
+
+SELECT ?term ?concept WHERE {
+ ?concept rdfs:subClassOf+ edam:operation_0004 ;
+ rdfs:label ?term .
+ FILTER NOT EXISTS {
+ ?concept rdfs:seeAlso ?seealso .
+ FILTER (regex(str(?seealso), "wikipedia.org", "i"))
+ } .
+}
\ No newline at end of file
diff --git a/src/webapp/queries/no_wikipedia_link_topic.rq b/src/webapp/queries/no_wikipedia_link_topic.rq
new file mode 100644
index 0000000..9af413e
--- /dev/null
+++ b/src/webapp/queries/no_wikipedia_link_topic.rq
@@ -0,0 +1,13 @@
+## Get topics that don't have a wikipedia link as a 'seeAlso' attribute
+
+PREFIX edam:
+PREFIX oboInOwl:
+
+SELECT ?term ?concept WHERE {
+ ?concept rdfs:subClassOf+ edam:topic_0003 ;
+ rdfs:label ?term .
+ FILTER NOT EXISTS {
+ ?concept rdfs:seeAlso ?seealso .
+ FILTER (regex(str(?seealso), "wikipedia.org", "i"))
+ } .
+}
\ No newline at end of file
diff --git a/src/webapp/queries/start_space_annotation.rq b/src/webapp/queries/start_space_annotation.rq
new file mode 100644
index 0000000..90d2200
--- /dev/null
+++ b/src/webapp/queries/start_space_annotation.rq
@@ -0,0 +1,18 @@
+PREFIX obo:
+PREFIX owl:
+PREFIX rdfs:
+PREFIX rdf:
+PREFIX oboInOwl:
+PREFIX edam:
+PREFIX xsd:
+
+SELECT DISTINCT ?entity ?property ?value ?label WHERE {
+ ?entity ?property ?value .
+ ?entity rdfs:label ?label .
+ FILTER REGEX(str(?value), "^[\\s\r\n]+")
+ FILTER (!isBlank(?entity))
+
+}
+ORDER BY ?entity
+
+
diff --git a/src/webapp/queries/tab_in_annotation.rq b/src/webapp/queries/tab_in_annotation.rq
new file mode 100644
index 0000000..bc92a89
--- /dev/null
+++ b/src/webapp/queries/tab_in_annotation.rq
@@ -0,0 +1,20 @@
+PREFIX obo:
+PREFIX owl:
+PREFIX rdfs:
+PREFIX rdf:
+PREFIX oboInOwl:
+PREFIX edam:
+PREFIX xsd:
+
+SELECT DISTINCT ?entity ?property ?value ?label WHERE {
+ ?entity ?property ?value .
+ ?entity rdfs:label ?label .
+
+ FILTER regex(?value, "\t")
+ FILTER (!isBlank(?entity))
+
+}
+ORDER BY ?entity
+
+
+
diff --git a/src/webapp/static/img/data_cloud_dev.png b/src/webapp/static/img/data_cloud_dev.png
new file mode 100644
index 0000000..24e52e9
Binary files /dev/null and b/src/webapp/static/img/data_cloud_dev.png differ
diff --git a/src/webapp/static/img/data_tree_dev.png b/src/webapp/static/img/data_tree_dev.png
new file mode 100644
index 0000000..4861646
Binary files /dev/null and b/src/webapp/static/img/data_tree_dev.png differ
diff --git a/src/webapp/static/img/operation_cloud_dev.png b/src/webapp/static/img/operation_cloud_dev.png
new file mode 100644
index 0000000..1f24121
Binary files /dev/null and b/src/webapp/static/img/operation_cloud_dev.png differ
diff --git a/src/webapp/static/img/operation_tree_dev.png b/src/webapp/static/img/operation_tree_dev.png
new file mode 100644
index 0000000..79c358b
Binary files /dev/null and b/src/webapp/static/img/operation_tree_dev.png differ
diff --git a/src/webapp/static/img/topic_cloud_dev.png b/src/webapp/static/img/topic_cloud_dev.png
new file mode 100644
index 0000000..cec065e
Binary files /dev/null and b/src/webapp/static/img/topic_cloud_dev.png differ
diff --git a/src/webapp/static/img/topic_tree_dev.png b/src/webapp/static/img/topic_tree_dev.png
new file mode 100644
index 0000000..3ab1e93
Binary files /dev/null and b/src/webapp/static/img/topic_tree_dev.png differ
diff --git a/src/webapp/templates/current.html b/src/webapp/templates/current.html
new file mode 100644
index 0000000..52236fd
--- /dev/null
+++ b/src/webapp/templates/current.html
@@ -0,0 +1,103 @@
+{% extends "layout.html" %}
+
+{% block nav %}
+{% include 'nav.html' %}
+{% endblock %}
+
+{% block body %}
+
+
Current panorama of the EDAM ontology
+
+
+
Number of terms available
+
+
+
+
+
+
+
+
Overview of the topics
+
+
+
+
+
+
+
Overview of the operations
+
+
+
+
+
+
+
Overview of the data types
+
+
+
+
+
+{% endblock %}
\ No newline at end of file
diff --git a/src/webapp/templates/expert_curation.html b/src/webapp/templates/expert_curation.html
deleted file mode 100644
index 521af2c..0000000
--- a/src/webapp/templates/expert_curation.html
+++ /dev/null
@@ -1,12 +0,0 @@
-{% extends "layout.html" %}
-
-{% block nav %}
-{% include 'nav.html' %}
-{% endblock %}
-
-{% block body %}
-
-
Top priority curation needs in my field
-
In this demo, we focus on some specific topics and randomly pick 5 classes to be updated.
-
-{% endblock %}
\ No newline at end of file
diff --git a/src/webapp/templates/field_specific.html b/src/webapp/templates/field_specific.html
new file mode 100644
index 0000000..dde0e07
--- /dev/null
+++ b/src/webapp/templates/field_specific.html
@@ -0,0 +1,23 @@
+{% extends "layout.html" %}
+
+{% block nav %}
+{% include 'nav.html' %}
+{% endblock %}
+
+{% block body %}
+
+
Field specific contributions
+
+
+
{{ no_regex_identifier|length }} EDAM hybrid identifiers with missing regex.
+
+ {% for item in random.sample(no_regex_identifier, 8) %}
+ {{ item.term }}
+ {% endfor %}
+
+
+
+
+
+
+{% endblock %}
\ No newline at end of file
diff --git a/src/webapp/templates/high_priority.html b/src/webapp/templates/high_priority.html
new file mode 100644
index 0000000..802d938
--- /dev/null
+++ b/src/webapp/templates/high_priority.html
@@ -0,0 +1,43 @@
+{% extends "layout.html" %}
+
+{% block nav %}
+{% include 'nav.html' %}
+{% endblock %}
+
+{% block body %}
+
+
High priority
+
+
+
{{ mandatory_property_missing|length }} Concepts with missing mandatory properties.
+
+ {% if mandatory_property_missing|length < 8 %}
+ {% for item in mandatory_property_missing %}
+ {{ item.term }}
+ {% endfor %}
+ {% else %}
+ {% for item in random.sample(mandatory_property_missing, 8) %}
+ {{ item.term }}
+ {% endfor %}
+ {% endif %}
+
+
+
+
+
{{ mandatory_property_missing|length }} ID numbers that are no unique.
+
+ {% if id_unique|length < 8 %}
+ {% for item in id_unique %}
+ {{ item.term }}
+ {% endfor %}
+ {% else %}
+ {% for item in random.sample(id_unique, 8) %}
+ {{ item.term }}
+ {% endfor %}
+ {% endif %}
+
+
+
+
+
+{% endblock %}
\ No newline at end of file
diff --git a/src/webapp/templates/index.html b/src/webapp/templates/index.html
index 4d67451..a3fc184 100644
--- a/src/webapp/templates/index.html
+++ b/src/webapp/templates/index.html
@@ -6,67 +6,85 @@
{% block body %}
-