diff --git a/CURATED_SET/curated_service/curatedDB/generate_draft_seeds.ipynb b/CURATED_SET/curated_service/curatedDB/generate_draft_seeds.ipynb new file mode 100644 index 0000000..fdf0de0 --- /dev/null +++ b/CURATED_SET/curated_service/curatedDB/generate_draft_seeds.ipynb @@ -0,0 +1,2310 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 54, + "id": "bde51c46-a7d0-4254-8492-29d84551ca32", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import io\n", + "import json\n", + "import os\n", + "import shutil\n", + "import subprocess\n", + "import sys\n", + "\n", + "import numpy as np\n", + "import pandas as pd\n", + "from Bio import Entrez, SearchIO, SeqIO\n", + "from Bio.Align import MultipleSeqAlignment\n", + "from Bio.Align.AlignInfo import SummaryInfo\n", + "from Bio.Seq import Seq\n", + "from Bio.SeqRecord import SeqRecord\n", + "from ete3 import Tree\n", + "from mysql.connector import connection\n", + "from sshtunnel import SSHTunnelForwarder" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "95503090-7bc9-4909-b81b-54d5db2b6edf", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "with open(\"db_curated_server_info.txt\", \"r\") as file:\n", + " lines = file.readlines()\n", + "\n", + "config = {}\n", + "\n", + "for line in lines:\n", + " line = line.strip()\n", + " if line and not line.startswith(\"#\"):\n", + " key, value = line.split(\"=\", 1)\n", + " config[key] = value.strip()\n", + "\n", + "server_name = config.get(\"server_name\")\n", + "srever_port = int(config.get(\"srever_port\"))\n", + "ssh_password = config.get(\"ssh_password\")\n", + "ssh_username = config.get(\"ssh_username\")\n", + "db_adress = config.get(\"db_adress\")\n", + "db_port = int(config.get(\"db_port\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "8deea4bb-e5aa-4363-92d7-616ccf86eb4a", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "44071\n" + ] + } + ], + "source": [ + "tunnel = SSHTunnelForwarder(\n", + " (server_name, srever_port),\n", + " ssh_password=ssh_password,\n", + " ssh_username=ssh_username,\n", + " remote_bind_address=(db_adress, db_port),\n", + ")\n", + "tunnel.start()\n", + "print(tunnel.local_bind_port)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "1f6e1cba-8d17-4bd2-8d1a-3a12e173dac6", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "conn = connection.MySQLConnection(\n", + " user=\"db_user\",\n", + " password=\"db_password\",\n", + " host=\"localhost\",\n", + " port=tunnel.local_bind_port,\n", + " database=\"db_name\",\n", + ")\n", + "cursor = conn.cursor()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "c8290ee5-a414-4e14-b961-efe14968776e", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[('alternative_name',),\n", + " ('histone',),\n", + " ('histone_description',),\n", + " ('histone_has_publication',),\n", + " ('publication',),\n", + " ('sequence',),\n", + " ('sequence_has_publication',)]" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "query = \"SHOW TABLES;\"\n", + "cursor.execute(query)\n", + "cursor.fetchall()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "d43c2a65-2e42-4e26-9d38-ec43e30cf591", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "'id, level, taxonomic_span, taxonomic_span_id, description, parent'" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "query = \"SELECT * FROM histone\"\n", + "cursor.execute(query)\n", + "cursor.fetchall()\n", + "\", \".join([i[0] for i in cursor.description])" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "f59aa7bd-7ef7-4f02-81e5-7086702e4583", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "'accession, variant, gi, ncbi_gene_id, hgnc_gene_name, taxonomy_id, organism, phylum, class, taxonomy_group, info, sequence, variant_under_consideration'" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "query = \"SELECT * FROM sequence\"\n", + "cursor.execute(query)\n", + "cursor.fetchall()\n", + "\", \".join([i[0] for i in cursor.description])" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "d187cd86-8a35-4145-b65a-65b6adf959a7", + "metadata": { + "collapsed": true, + "jupyter": { + "outputs_hidden": true + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idlevelcount
0Archaealtype0
141H2A.Z.2_(Primates)variant0
139H2A.Z.2.s2_(Primates)variant0
137H2A.Z.2.s1_(Primates)variant0
135H2A.Z.1_(Primates)variant0
............
132H2A.Zvariant_group20
59cH3variant_group23
120H2A.Qvariant28
50cH2B_(Chlorophyta)variant35
149H2B.Svariant_group47
\n", + "

204 rows × 3 columns

\n", + "
" + ], + "text/plain": [ + " id level count\n", + "0 Archaeal type 0\n", + "141 H2A.Z.2_(Primates) variant 0\n", + "139 H2A.Z.2.s2_(Primates) variant 0\n", + "137 H2A.Z.2.s1_(Primates) variant 0\n", + "135 H2A.Z.1_(Primates) variant 0\n", + ".. ... ... ...\n", + "132 H2A.Z variant_group 20\n", + "59 cH3 variant_group 23\n", + "120 H2A.Q variant 28\n", + "50 cH2B_(Chlorophyta) variant 35\n", + "149 H2B.S variant_group 47\n", + "\n", + "[204 rows x 3 columns]" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "query = (\n", + " \"SELECT h.id, h.level, COUNT(accession) as count FROM histone h LEFT JOIN sequence s ON h.id = s.variant \"\n", + " \"GROUP BY h.id \"\n", + ")\n", + "cursor.execute(query)\n", + "count_df = pd.DataFrame(\n", + " cursor.fetchall(), columns=[i[0] for i in cursor.description]\n", + ").sort_values([\"count\"])\n", + "count_df" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "45ef0dd7-7887-46ef-a7d1-bcb825abf3ff", + "metadata": { + "collapsed": true, + "jupyter": { + "outputs_hidden": true + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idlevelcount
0Archaealtype0
141H2A.Z.2_(Primates)variant0
139H2A.Z.2.s2_(Primates)variant0
137H2A.Z.2.s1_(Primates)variant0
135H2A.Z.1_(Primates)variant0
............
27cH2A_(Mus_musculus)variant0
53cH2B_(Homo_sapiens)variant0
31cH2Bvariant_group0
25cH2A_(Homo_sapiens)variant0
7cH1variant_group0
\n", + "

67 rows × 3 columns

\n", + "
" + ], + "text/plain": [ + " id level count\n", + "0 Archaeal type 0\n", + "141 H2A.Z.2_(Primates) variant 0\n", + "139 H2A.Z.2.s2_(Primates) variant 0\n", + "137 H2A.Z.2.s1_(Primates) variant 0\n", + "135 H2A.Z.1_(Primates) variant 0\n", + ".. ... ... ...\n", + "27 cH2A_(Mus_musculus) variant 0\n", + "53 cH2B_(Homo_sapiens) variant 0\n", + "31 cH2B variant_group 0\n", + "25 cH2A_(Homo_sapiens) variant 0\n", + "7 cH1 variant_group 0\n", + "\n", + "[67 rows x 3 columns]" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "count_df[count_df[\"count\"] == 0]" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "4146be8b-f619-4334-b010-7a862a8d2d2c", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
accessionvariantgincbi_gene_idhgnc_gene_nametaxonomy_idorganismphylumclasstaxonomy_groupinfosequencevariant_under_consideration
\n", + "
" + ], + "text/plain": [ + "Empty DataFrame\n", + "Columns: [accession, variant, gi, ncbi_gene_id, hgnc_gene_name, taxonomy_id, organism, phylum, class, taxonomy_group, info, sequence, variant_under_consideration]\n", + "Index: []" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "query = \"SELECT * FROM sequence WHERE variant='H2A.Z.2_(Primates)'\"\n", + "cursor.execute(query)\n", + "pd.DataFrame(cursor.fetchall(), columns=[i[0] for i in cursor.description])" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "5b731573-21ec-4f74-ac03-59354d94adf5", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "def get_tree_dict(cursor, paretnt_id):\n", + " cursor.execute(f\"SELECT id FROM histone WHERE parent = '{paretnt_id}'\")\n", + " res = cursor.fetchall()\n", + " if len(res) < 1:\n", + " return \"null\"\n", + " return {v: get_tree_dict(cursor, v) for v, *_ in res}" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "5fa4cb6f-349a-4a5b-b5b8-4f65b6524182", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "def dict2tree(tree, d):\n", + " \"\"\"\n", + " converts tree from classification.json to a ete3 object\n", + " d is\n", + " with open('classification.json') as json_file:\n", + " data = json.load(json_file)\n", + " d=data['tree']\n", + " \"\"\"\n", + " for k, v in d.items():\n", + " CH = tree.add_child(name=k)\n", + " if isinstance(v, dict):\n", + " dict2tree(CH, v)" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "8e2bf14d-7d44-42e9-afe3-330295bcae67", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "def muscle_aln(sequences, options=[], debug=False, sort=True):\n", + " muscle = os.path.join(os.path.dirname(sys.executable), \"muscle\")\n", + " process = subprocess.Popen(\n", + " [muscle] + options,\n", + " stdin=subprocess.PIPE,\n", + " stdout=subprocess.PIPE,\n", + " stderr=subprocess.PIPE,\n", + " )\n", + " aln, error = process.communicate(sequences.encode(\"utf-8\"))\n", + " if debug:\n", + " print(sequences)\n", + " print()\n", + " print(\"Stderr:\")\n", + " print(error.decode(\"utf-8\"))\n", + " print(\"Stdout:\")\n", + " print(aln.decode(\"utf-8\"))\n", + " seqFile = io.StringIO()\n", + " seqFile.write(aln.decode(\"utf-8\"))\n", + " seqFile.seek(0)\n", + " sequences_ids = [s.split(\" \", 1)[0] for s in sequences.split(\">\")]\n", + " sequences = list(\n", + " SeqIO.parse(seqFile, \"fasta\")\n", + " ) # Not in same order, but does it matter?\n", + " if sort:\n", + " sequences.sort(key=lambda x: sequences_ids.index(x.id)) # Yes, it matters\n", + " msa = MultipleSeqAlignment(sequences)\n", + " return msa" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "id": "d20dab81-4d92-447c-8a3e-7e8405c92234", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "def muscle_p2p_aln(msa1, msa2, options=[], debug=False):\n", + " \"\"\"\n", + " align two alignments\n", + " :return: MultipleSeqAlignment object\n", + " \"\"\"\n", + " os.makedirs(\"tmp/\") # create tmp dir to save msa profiles\n", + " try:\n", + " with open(\"tmp/one.afa\", \"w\") as f:\n", + " f.write(format(msa1, \"fasta\"))\n", + " with open(\"tmp/two.afa\", \"w\") as f:\n", + " f.write(format(msa2, \"fasta\"))\n", + "\n", + " muscle = os.path.join(os.path.dirname(sys.executable), \"muscle\")\n", + " process = subprocess.Popen(\n", + " [muscle]\n", + " + options\n", + " + [\"-profile\", \"-in1\", \"tmp/one.afa\", \"-in2\", \"tmp/two.afa\"],\n", + " stdin=subprocess.PIPE,\n", + " stdout=subprocess.PIPE,\n", + " stderr=subprocess.PIPE,\n", + " )\n", + "\n", + " aln, error = process.communicate()\n", + " if debug:\n", + " print(\"Stderr:\")\n", + " print(error.decode(\"utf-8\"))\n", + " print(\"Stdout:\")\n", + " print(aln.decode(\"utf-8\"))\n", + "\n", + " seqFile = io.StringIO()\n", + " seqFile.write(aln.decode(\"utf-8\"))\n", + " seqFile.seek(0)\n", + " sequences = list(\n", + " SeqIO.parse(seqFile, \"fasta\")\n", + " ) # Not in same order, but does it matter?\n", + " msa = MultipleSeqAlignment(sequences)\n", + " except:\n", + " shutil.rmtree(\"tmp/\")\n", + " raise\n", + " shutil.rmtree(\"tmp/\") # rm -rf tmp dir\n", + " return msa" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "id": "650ed4b1-60d9-456d-977d-125949071539", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "def get_fasta_seq(data):\n", + " return \"\\n\".join(\n", + " [\n", + " SeqRecord(\n", + " Seq(row[\"sequence\"]),\n", + " id=f\"{row['organism'].split()[0]}|{row['accession']}|{row['variant']}\",\n", + " name=row[\"accession\"],\n", + " description=f\"organism={row['organism']} phylum={row['phylum']} class={row['class']}\",\n", + " ).format(\"fasta\")\n", + " for i, row in data.iterrows()\n", + " ]\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 88, + "id": "bf521b06-4d78-4948-90a2-02df8a2c7804", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "def generate_draft_seeds(hist_tree, sequence_data, save_directory):\n", + " # generate draft seeds - needs debugging\n", + " draft_seeds_msa = {}\n", + "\n", + " # create directory if not exist or rewrite the directory\n", + " try:\n", + " os.makedirs(save_directory)\n", + " except FileExistsError:\n", + " # directory already exists\n", + " shutil.rmtree(save_directory)\n", + " os.makedirs(save_directory)\n", + "\n", + " # hist_tree traversal\n", + " for node in hist_tree.traverse(\"postorder\"):\n", + " print(\"Processing \", node.name)\n", + " if node.is_root(): continue\n", + " if node.is_leaf(): # we get sequences for that variant and align them.\n", + " draft_seeds_msa[node.name] = muscle_aln(\n", + " get_fasta_seq(\n", + " sequence_data.query(\n", + " f'type==\"{node.name}\" | variant==\"{node.name}\"',\n", + " engine=\"python\",\n", + " )\n", + " )\n", + " )\n", + " print(node.name, \"Alignment length:\", len(draft_seeds_msa[node.name]))\n", + " with open(f\"{save_directory}/{node.name}.fasta\", \"w\") as f:\n", + " f.write(format(draft_seeds_msa[node.name], \"fasta\"))\n", + " elif not node.is_root(): # we will do profile to profile alignment\n", + " # we should first check if there are seqs with this subvariant as the most specific one\n", + " print(f\"\\t Node is internal, progressive alignment:\")\n", + " msa = muscle_aln(\n", + " get_fasta_seq(\n", + " sequence_data.query(\n", + " f'variant==\"{node.name}\"'\n", + " )\n", + " )\n", + " )\n", + " draft_seeds_msa[node.name + \"_only\"] = msa\n", + " print(f\"\\t\\t For {node.name} aligned {len(msa)} sequences\")\n", + " # progressively align\n", + " for ch in node.get_children():\n", + " if len(msa) == 0:\n", + " msa = draft_seeds_msa[ch.name]\n", + " print(\n", + " f\"\\t\\t Adding child {node.name} aligned {len(draft_seeds_msa[ch.name])} sequences\"\n", + " )\n", + " continue\n", + " elif len(draft_seeds_msa[ch.name]) != 0:\n", + " msa = muscle_p2p_aln(msa, draft_seeds_msa[ch.name])\n", + " print(\n", + " f\"\\t\\t Adding child {node.name} aligned {len(draft_seeds_msa[ch.name])} sequences\"\n", + " )\n", + " else:\n", + " continue\n", + " draft_seeds_msa[node.name] = msa\n", + " print(node.name, \"Alignment length:\", len(draft_seeds_msa[node.name]))\n", + " with open(f\"{save_directory}/{node.name}.fasta\", \"w\") as f:\n", + " f.write(format(draft_seeds_msa[node.name], \"fasta\"))\n", + " with open(f\"{save_directory}/{node.name}_only.fasta\", \"w\") as f:\n", + " f.write(format(draft_seeds_msa[node.name + \"_only\"], \"fasta\"))\n", + " # print(f\"\\t\\t Final for {node.name} aligned {len(draft_seeds_msa[node.name])} sequences\")" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "id": "9699c9c2-d4d2-4132-8b45-88a8557712ed", + "metadata": { + "collapsed": true, + "jupyter": { + "outputs_hidden": true + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'Archaeal': 'null',\n", + " 'H1': {'cH1': 'null',\n", + " 'generic_H1': 'null',\n", + " 'H1.0': {'H1.0_(Homo_sapiens)': 'null'},\n", + " 'H1.1': {'H1.1_(Homo_sapiens)': 'null'},\n", + " 'H1.10': {'H1.10_(Homo_sapiens)': 'null'},\n", + " 'H1.2': {'H1.2_(Homo_sapiens)': 'null'},\n", + " 'H1.3': {'H1.3_(Homo_sapiens)': 'null'},\n", + " 'H1.4': {'H1.4_(Homo_sapiens)': 'null'},\n", + " 'H1.5': {'H1.5_(Homo_sapiens)': 'null'},\n", + " 'OO_H1.8': {'H1.8_(Homo_sapiens)': 'null'},\n", + " 'scH1': 'null',\n", + " 'TS_H1.6': {'H1.6_(Homo_sapiens)': 'null'},\n", + " 'TS_H1.7': {'H1.7_(Homo_sapiens)': 'null'},\n", + " 'TS_H1.9': 'null'},\n", + " 'H2A': {'cH2A': {'cH2A_(Animals)': {'cH2A_(Vertebrata)': {'cH2A_(Mammalia)': {'cH2A_(Homo_sapiens)': {'cH2A.10_(Homo_sapiens)': 'null',\n", + " 'cH2A.11_(Homo_sapiens)': 'null',\n", + " 'cH2A.1_(Homo_sapiens)': 'null',\n", + " 'cH2A.2_(Homo_sapiens)': 'null',\n", + " 'cH2A.3_(Homo_sapiens)': 'null',\n", + " 'cH2A.4_(Homo_sapiens)': 'null',\n", + " 'cH2A.5_(Homo_sapiens)': 'null',\n", + " 'cH2A.6_(Homo_sapiens)': 'null',\n", + " 'cH2A.7_(Homo_sapiens)': 'null',\n", + " 'cH2A.8_(Homo_sapiens)': 'null',\n", + " 'cH2A.9_(Homo_sapiens)': 'null'},\n", + " 'cH2A_(Mus_musculus)': {'cH2A.1_(Mus_musculus)': 'null'}}}},\n", + " 'cH2A_(Fungi)': 'null',\n", + " 'cH2A_(Plants)': {'cH2A_(Chlorophyta)': 'null',\n", + " 'cH2A_(Embryophyta)': 'null'},\n", + " 'cH2A_(Protists)': 'null'},\n", + " 'gH2A': 'null',\n", + " 'H2A.J': {'H2A.J_(Homo_sapiens)': 'null'},\n", + " 'H2A.M': 'null',\n", + " 'H2A.R': 'null',\n", + " 'H2A.W': 'null',\n", + " 'H2A.X': {'H2A.X_(Animals)': {'H2A.X_(Vertebrata)': {'H2A.X_(Mammalia)': {'H2A.X_(Homo_sapiens)': 'null',\n", + " 'H2A.X_(Mus_musculus)': 'null'}}},\n", + " 'H2A.X_(Fungi)': 'null',\n", + " 'H2A.X_(Plants)': 'null',\n", + " 'H2A.X_(Protists)': 'null'},\n", + " 'H2A.Z': {'H2A.Z.1_(Chordata)': {'H2A.Z.1_(Primates)': {'H2A.Z.1_(Homo_sapiens)': 'null'}},\n", + " 'H2A.Z.2_(Chordata)': {'H2A.Z.2_(Primates)': {'H2A.Z.2.s1_(Primates)': {'H2A.Z.2.s1_(Homo_sapiens)': 'null'},\n", + " 'H2A.Z.2.s2_(Primates)': {'H2A.Z.2.s2_(Homo_sapiens)': 'null'}}}},\n", + " 'macroH2A': {'macroH2A.1_(Mammalia)': {'macroH2A.1.s1_(Mammalia)': {'macroH2A.1.s1_(Homo_sapiens)': 'null'},\n", + " 'macroH2A.1.s2_(Mammalia)': {'macroH2A.1.s2_(Homo_sapiens)': 'null'}},\n", + " 'macroH2A.2_(Mammalia)': {'macroH2A.2_(Homo_sapiens)': 'null'}},\n", + " 'short_H2A': {'H2A.B': {'H2A.B_(Homo_sapiens)': {'H2A.B.1_(Homo_sapiens)': 'null',\n", + " 'H2A.B.2_(Homo_sapiens)': 'null'},\n", + " 'H2A.B_(Mus_musculus)': {'H2A.B.1_(Mus_musculus)': 'null',\n", + " 'H2A.B.2_(Mus_musculus)': 'null',\n", + " 'H2A.B.3_(Mus_musculus)': 'null'}},\n", + " 'H2A.L': {'H2A.L_(Homo_sapiens)': {'H2A.L.1_(Homo_sapiens)': 'null',\n", + " 'H2A.L.3_(Homo_sapiens)': 'null'},\n", + " 'H2A.L_(Mus_musculus)': {'H2A.L.1_(Mus_musculus)': 'null',\n", + " 'H2A.L.2_(Mus_musculus)': 'null',\n", + " 'H2A.L.3_(Mus_musculus)': 'null'}},\n", + " 'H2A.P': {'H2A.P_(Homo_sapiens)': 'null', 'H2A.P_(Mus_musculus)': 'null'},\n", + " 'H2A.Q': 'null'}},\n", + " 'H2B': {'cH2B': {'cH2B_(Animals)': {'cH2B_(Vertebrata)': {'cH2B_(Mammalia)': {'cH2B_(Homo_sapiens)': {'cH2B.10_(Homo_sapiens)': 'null',\n", + " 'cH2B.11_(Homo_sapiens)': 'null',\n", + " 'cH2B.12_(Homo_sapiens)': 'null',\n", + " 'cH2B.13_(Homo_sapiens)': 'null',\n", + " 'cH2B.14_(Homo_sapiens)': 'null',\n", + " 'cH2B.15_(Homo_sapiens)': 'null',\n", + " 'cH2B.1_(Homo_sapiens)': 'null',\n", + " 'cH2B.2_(Homo_sapiens)': 'null',\n", + " 'cH2B.3_(Homo_sapiens)': 'null',\n", + " 'cH2B.4_(Homo_sapiens)': 'null',\n", + " 'cH2B.5_(Homo_sapiens)': 'null',\n", + " 'cH2B.6_(Homo_sapiens)': 'null',\n", + " 'cH2B.7_(Homo_sapiens)': 'null',\n", + " 'cH2B.8_(Homo_sapiens)': 'null',\n", + " 'cH2B.9_(Homo_sapiens)': 'null'},\n", + " 'cH2B_(Mus_musculus)': {'cH2B.1_(Mus_musculus)': 'null',\n", + " 'cH2B.E_(Mus_musculus)': 'null'}}}},\n", + " 'cH2B_(Fungi)': 'null',\n", + " 'cH2B_(Plants)': {'cH2B_(Chlorophyta)': 'null',\n", + " 'cH2B_(Embryophyta)': 'null'},\n", + " 'cH2B_(Protists)': 'null'},\n", + " 'CS_H2B_(Echinoidea)': 'null',\n", + " 'early_H2B_(Echinoidea)': 'null',\n", + " 'gH2B': 'null',\n", + " 'H2B.K': {'H2B.K_(Homo_sapiens)': 'null'},\n", + " 'H2B.L': 'null',\n", + " 'H2B.N': {'H2B.N_(Homo_sapiens)': 'null'},\n", + " 'H2B.O': 'null',\n", + " 'H2B.S': 'null',\n", + " 'H2B.V': 'null',\n", + " 'H2B.W': {'H2B.W_(Homo_sapiens)': {'H2B.W.1_(Homo_sapiens)': 'null',\n", + " 'H2B.W.2_(Homo_sapiens)': 'null'}},\n", + " 'H2B.Z': 'null',\n", + " 'late_H2B_(Echinoidea)': 'null',\n", + " 'sperm_H2B_(Echinoidea)': 'null'},\n", + " 'H3': {'cenH3_(Eukarya)': {'cenH3_(Animals)': {'cenH3_(Mammalia)': {'cenH3_(Homo_sapiens)': 'null'}},\n", + " 'cenH3_(Fungi)': 'null',\n", + " 'cenH3_(Plants)': 'null'},\n", + " 'cH3': {'cH3_(Animals)': {'cH3_(Vertebrata)': {'cH3_(Mammalia)': {'cH3.1_(Mammalia)': {'cH3.1_(Homo_sapiens)': 'null'},\n", + " 'cH3.2_(Mammalia)': {'cH3.2_(Homo_sapiens)': 'null'}}}},\n", + " 'cH3_(Fungi)': 'null',\n", + " 'cH3_(Plants)': {'cH3_(Chlorophyta)': 'null', 'cH3_(Embryophyta)': 'null'},\n", + " 'cH3_(Protists)': 'null'},\n", + " 'H3.1-like_(Plants)': 'null',\n", + " 'H3.3': {'H3.3_(Animals)': {'H3.3_(Homo_sapiens)': 'null'},\n", + " 'H3.3_(Fungi)': {'H3.3_(Ascomycota)': 'null'},\n", + " 'H3.3_(Plants)': 'null',\n", + " 'H3.3_(Protists)': 'null'},\n", + " 'H3.3-like?': {'H3.3-like_(Animals)': {'H3.X_(Primates?)': {'H3.X_(Homo_sapiens)': 'null'},\n", + " 'H3.Y_(Primates?)': {'H3.Y_(Homo_sapiens)': {'H3.Y.1_(Homo_sapiens)': 'null',\n", + " 'H3.Y.2_(Homo_sapiens)': 'null'}}},\n", + " 'H3.3-like_(Plants)': {'H3_(Lilly???)': 'null', 'TS H3.10': 'null'}},\n", + " 'H3.4_(Mammalia)': {'H3.4_(Homo_sapiens)': 'null'},\n", + " 'H3.5_(Hominidae)': {'H3.5_(Homo_sapiens)': 'null'},\n", + " 'H3.6_(Mammals?)?': 'null',\n", + " 'H3.7_(Mammals?)?': 'null',\n", + " 'H3.8_(Mammals?)?': 'null',\n", + " 'H3.B_(Giardia?)': 'null',\n", + " 'H3.P_(Moneuplotes?)': 'null',\n", + " 'H3.V_(Trypanosomatidae)': 'null'},\n", + " 'H4': {'cH4': {'cH4_(Homo_sapiens)': 'null'}},\n", + " 'Viral': 'null'}" + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "query = \"SELECT id FROM histone WHERE parent IS NULL\"\n", + "cursor.execute(query)\n", + "types = cursor.fetchall()\n", + "variants_tree = {}\n", + "for t, *_ in types:\n", + " variants_tree[t] = get_tree_dict(cursor, t)\n", + "variants_tree" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "id": "36cb0bf3-69bf-44f5-8ad4-0c15a62023ca", + "metadata": { + "collapsed": true, + "jupyter": { + "outputs_hidden": true + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + " /-Archaeal\n", + " |\n", + " | /-cH1\n", + " | |\n", + " | |--generic_H1\n", + " | |\n", + " | |-H1.0-H1.0_(Homo_sapiens)\n", + " | |\n", + " | |-H1.1-H1.1_(Homo_sapiens)\n", + " | |\n", + " | |-H1.10-H1.10_(Homo_sapiens)\n", + " | |\n", + " | |-H1.2-H1.2_(Homo_sapiens)\n", + " | |\n", + " | |-H1.3-H1.3_(Homo_sapiens)\n", + " |-H1\n", + " | |-H1.4-H1.4_(Homo_sapiens)\n", + " | |\n", + " | |-H1.5-H1.5_(Homo_sapiens)\n", + " | |\n", + " | |-OO_H1.8-H1.8_(Homo_sapiens)\n", + " | |\n", + " | |--scH1\n", + " | |\n", + " | |-TS_H1.6-H1.6_(Homo_sapiens)\n", + " | |\n", + " | |-TS_H1.7-H1.7_(Homo_sapiens)\n", + " | |\n", + " | \\-TS_H1.9\n", + " |\n", + " | /-cH2A.10_(Homo_sapiens)\n", + " | |\n", + " | |--cH2A.11_(Homo_sapiens)\n", + " | |\n", + " | |--cH2A.1_(Homo_sapiens)\n", + " | |\n", + " | |--cH2A.2_(Homo_sapiens)\n", + " | |\n", + " | |--cH2A.3_(Homo_sapiens)\n", + " | |\n", + " | /cH2A_(Homo_sapiens)-cH2A.4_(Homo_sapiens)\n", + " | | |\n", + " | | |--cH2A.5_(Homo_sapiens)\n", + " | | |\n", + " | | |--cH2A.6_(Homo_sapiens)\n", + " | | |\n", + " | /cH2A_(Animals)cH2A_(Vertebrata)cH2A_(Mammalia) |--cH2A.7_(Homo_sapiens)\n", + " | | | |\n", + " | | | |--cH2A.8_(Homo_sapiens)\n", + " | | | |\n", + " | | | \\-cH2A.9_(Homo_sapiens)\n", + " | | |\n", + " | | \\cH2A_(Mus_musculus)-cH2A.1_(Mus_musculus)\n", + " | /cH2A\n", + " | | |--cH2A_(Fungi)\n", + " | | |\n", + " | | | /-cH2A_(Chlorophyta)\n", + " | | |-cH2A_(Plants)\n", + " | | | \\-cH2A_(Embryophyta)\n", + " | | |\n", + " | | \\-cH2A_(Protists)\n", + " | |\n", + " | |--gH2A\n", + " | |\n", + " | |-H2A.J-H2A.J_(Homo_sapiens)\n", + " | |\n", + " | |--H2A.M\n", + " | |\n", + " | |--H2A.R\n", + " | |\n", + " | |--H2A.W\n", + " | |\n", + " | | /-H2A.X_(Homo_sapiens)\n", + " | | /H2A.X_(Animals)H2A.X_(Vertebrata)H2A.X_(Mammalia)\n", + " | | | \\-H2A.X_(Mus_musculus)\n", + " | | |\n", + " | |-H2A.X-H2A.X_(Fungi)\n", + " | | |\n", + " | | |--H2A.X_(Plants)\n", + " | | |\n", + " |-H2A \\-H2A.X_(Protists)\n", + " | |\n", + " | | /H2A.Z.1_(Chordata)H2A.Z.1_(Primates)-H2A.Z.1_(Homo_sapiens)\n", + " | |-H2A.Z\n", + " | | | /H2A.Z.2.s1_(Primates)-H2A.Z.2.s1_(Homo_sapiens)\n", + " | | \\H2A.Z.2_(Chordata)H2A.Z.2_(Primates)\n", + " | | \\H2A.Z.2.s2_(Primates)-H2A.Z.2.s2_(Homo_sapiens)\n", + " | |\n", + " | | /macroH2A.1.s1_(Mammalia)-macroH2A.1.s1_(Homo_sapiens)\n", + " | | /macroH2A.1_(Mammalia)\n", + " | |-macroH2A \\macroH2A.1.s2_(Mammalia)-macroH2A.1.s2_(Homo_sapiens)\n", + " | | |\n", + " | | \\macroH2A.2_(Mammalia)-macroH2A.2_(Homo_sapiens)\n", + " | |\n", + " | | /-H2A.B.1_(Homo_sapiens)\n", + " | | /H2A.B_(Homo_sapiens)\n", + " | | | \\-H2A.B.2_(Homo_sapiens)\n", + " | | /H2A.B\n", + " | | | | /-H2A.B.1_(Mus_musculus)\n", + " | | | | |\n", + " | | | \\H2A.B_(Mus_musculus)-H2A.B.2_(Mus_musculus)\n", + " | | | |\n", + " | | | \\-H2A.B.3_(Mus_musculus)\n", + " | | |\n", + " | | | /-H2A.L.1_(Homo_sapiens)\n", + " | | | /H2A.L_(Homo_sapiens)\n", + " | | | | \\-H2A.L.3_(Homo_sapiens)\n", + " | \\short_H2AH2A.L\n", + " | | | /-H2A.L.1_(Mus_musculus)\n", + " | | | |\n", + " | | \\H2A.L_(Mus_musculus)-H2A.L.2_(Mus_musculus)\n", + " | | |\n", + " | | \\-H2A.L.3_(Mus_musculus)\n", + " | |\n", + " | | /-H2A.P_(Homo_sapiens)\n", + " | |-H2A.P\n", + " | | \\-H2A.P_(Mus_musculus)\n", + " | |\n", + " | \\-H2A.Q\n", + " |\n", + " | /-cH2B.10_(Homo_sapiens)\n", + " | |\n", + " | |--cH2B.11_(Homo_sapiens)\n", + "--| |\n", + " | |--cH2B.12_(Homo_sapiens)\n", + " | |\n", + " | |--cH2B.13_(Homo_sapiens)\n", + " | |\n", + " | |--cH2B.14_(Homo_sapiens)\n", + " | |\n", + " | |--cH2B.15_(Homo_sapiens)\n", + " | |\n", + " | |--cH2B.1_(Homo_sapiens)\n", + " | |\n", + " | /cH2B_(Homo_sapiens)-cH2B.2_(Homo_sapiens)\n", + " | | |\n", + " | | |--cH2B.3_(Homo_sapiens)\n", + " | | |\n", + " | | |--cH2B.4_(Homo_sapiens)\n", + " | | |\n", + " | | |--cH2B.5_(Homo_sapiens)\n", + " | | |\n", + " | /cH2B_(Animals)cH2B_(Vertebrata)cH2B_(Mammalia) |--cH2B.6_(Homo_sapiens)\n", + " | | | |\n", + " | | | |--cH2B.7_(Homo_sapiens)\n", + " | | | |\n", + " | | | |--cH2B.8_(Homo_sapiens)\n", + " | | | |\n", + " | | | \\-cH2B.9_(Homo_sapiens)\n", + " | | |\n", + " | | | /-cH2B.1_(Mus_musculus)\n", + " | /cH2B \\cH2B_(Mus_musculus)\n", + " | | | \\-cH2B.E_(Mus_musculus)\n", + " | | |\n", + " | | |--cH2B_(Fungi)\n", + " | | |\n", + " | | | /-cH2B_(Chlorophyta)\n", + " | | |-cH2B_(Plants)\n", + " | | | \\-cH2B_(Embryophyta)\n", + " | | |\n", + " | | \\-cH2B_(Protists)\n", + " | |\n", + " | |--CS_H2B_(Echinoidea)\n", + " | |\n", + " | |--early_H2B_(Echinoidea)\n", + " | |\n", + " | |--gH2B\n", + " | |\n", + " | |-H2B.K-H2B.K_(Homo_sapiens)\n", + " |-H2B\n", + " | |--H2B.L\n", + " | |\n", + " | |-H2B.N-H2B.N_(Homo_sapiens)\n", + " | |\n", + " | |--H2B.O\n", + " | |\n", + " | |--H2B.S\n", + " | |\n", + " | |--H2B.V\n", + " | |\n", + " | | /-H2B.W.1_(Homo_sapiens)\n", + " | |-H2B.WH2B.W_(Homo_sapiens)\n", + " | | \\-H2B.W.2_(Homo_sapiens)\n", + " | |\n", + " | |--H2B.Z\n", + " | |\n", + " | |--late_H2B_(Echinoidea)\n", + " | |\n", + " | \\-sperm_H2B_(Echinoidea)\n", + " |\n", + " | /cenH3_(Animals)cenH3_(Mammalia)-cenH3_(Homo_sapiens)\n", + " | |\n", + " | /cenH3_(Eukarya)-cenH3_(Fungi)\n", + " | | |\n", + " | | \\-cenH3_(Plants)\n", + " | |\n", + " | | /cH3.1_(Mammalia)-cH3.1_(Homo_sapiens)\n", + " | | /cH3_(Animals)cH3_(Vertebrata)cH3_(Mammalia)\n", + " | | | \\cH3.2_(Mammalia)-cH3.2_(Homo_sapiens)\n", + " | | |\n", + " | | |--cH3_(Fungi)\n", + " | |-cH3\n", + " | | | /-cH3_(Chlorophyta)\n", + " | | |-cH3_(Plants)\n", + " | | | \\-cH3_(Embryophyta)\n", + " | | |\n", + " | | \\-cH3_(Protists)\n", + " | |\n", + " | |--H3.1-like_(Plants)\n", + " | |\n", + " | | /H3.3_(Animals)-H3.3_(Homo_sapiens)\n", + " | | |\n", + " | | |-H3.3_(Fungi)-H3.3_(Ascomycota)\n", + " | |-H3.3\n", + " | | |--H3.3_(Plants)\n", + " | | |\n", + " | | \\-H3.3_(Protists)\n", + " |-H3\n", + " | | /H3.X_(Primates?)-H3.X_(Homo_sapiens)\n", + " | | /H3.3-like_(Animals)\n", + " | | | | /-H3.Y.1_(Homo_sapiens)\n", + " | | | \\H3.Y_(Primates?)H3.Y_(Homo_sapiens)\n", + " | |-H3.3-like? \\-H3.Y.2_(Homo_sapiens)\n", + " | | |\n", + " | | | /-H3_(Lilly???)\n", + " | | \\H3.3-like_(Plants)\n", + " | | \\-TS H3.10\n", + " | |\n", + " | |-H3.4_(Mammalia)-H3.4_(Homo_sapiens)\n", + " | |\n", + " | |-H3.5_(Hominidae)-H3.5_(Homo_sapiens)\n", + " | |\n", + " | |--H3.6_(Mammals?)?\n", + " | |\n", + " | |--H3.7_(Mammals?)?\n", + " | |\n", + " | |--H3.8_(Mammals?)?\n", + " | |\n", + " | |--H3.B_(Giardia?)\n", + " | |\n", + " | |--H3.P_(Moneuplotes?)\n", + " | |\n", + " | \\-H3.V_(Trypanosomatidae)\n", + " |\n", + " |-H4/cH4-cH4_(Homo_sapiens)\n", + " |\n", + " \\-Viral\n" + ] + } + ], + "source": [ + "hist_tree = Tree()\n", + "dict2tree(hist_tree, variants_tree)\n", + "print(hist_tree.get_ascii(show_internal=True))" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "id": "2e65711f-6265-4f31-a614-1d9b797001cd", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "sequence_df = pd.read_csv(\n", + " \"/home/l_singh/_scratch/hdb/project_dir/histonedb/CURATED_SET/histones.csv\"\n", + ").fillna(\"\")\n", + "sequence_df.index = list(sequence_df[\"accession\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "id": "c572e59d-256a-4d17-94c3-a0b51c2c44f5", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
accessiontypevariant_groupvariantdoubletgincbi_gene_idhgnc_gene_nametaxonomy_idorganismphylumclasstaxonomy_groupinforeferencessequence
\n", + "
" + ], + "text/plain": [ + "Empty DataFrame\n", + "Columns: [accession, type, variant_group, variant, doublet, gi, ncbi_gene_id, hgnc_gene_name, taxonomy_id, organism, phylum, class, taxonomy_group, info, references, sequence]\n", + "Index: []" + ] + }, + "execution_count": 80, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sequence_df[sequence_df[\"variant\"].str.contains(\"H3_(Lilly\\?\\?\\?)\")]" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "id": "d0766ba9-4966-40de-839f-b03196df7610", + "metadata": { + "collapsed": true, + "jupyter": { + "outputs_hidden": true + }, + "tags": [] + }, + "outputs": [ + { + "ename": "error", + "evalue": "multiple repeat at position 11", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31merror\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[75], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43msequence_df\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mquery\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 2\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43mf\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mvariant.str.contains(\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mH3_(Lilly???)\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m)\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3\u001b[0m \u001b[43m \u001b[49m\u001b[43mengine\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mpython\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4\u001b[0m \u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/.conda/envs/histdb_env/lib/python3.8/site-packages/pandas/core/frame.py:4440\u001b[0m, in \u001b[0;36mDataFrame.query\u001b[0;34m(self, expr, inplace, **kwargs)\u001b[0m\n\u001b[1;32m 4438\u001b[0m kwargs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mlevel\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m kwargs\u001b[38;5;241m.\u001b[39mpop(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mlevel\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;241m0\u001b[39m) \u001b[38;5;241m+\u001b[39m \u001b[38;5;241m1\u001b[39m\n\u001b[1;32m 4439\u001b[0m kwargs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtarget\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m-> 4440\u001b[0m res \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43meval\u001b[49m\u001b[43m(\u001b[49m\u001b[43mexpr\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 4442\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 4443\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mloc[res]\n", + "File \u001b[0;32m~/.conda/envs/histdb_env/lib/python3.8/site-packages/pandas/core/frame.py:4566\u001b[0m, in \u001b[0;36mDataFrame.eval\u001b[0;34m(self, expr, inplace, **kwargs)\u001b[0m\n\u001b[1;32m 4563\u001b[0m kwargs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtarget\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\n\u001b[1;32m 4564\u001b[0m kwargs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mresolvers\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mtuple\u001b[39m(kwargs\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mresolvers\u001b[39m\u001b[38;5;124m\"\u001b[39m, ())) \u001b[38;5;241m+\u001b[39m resolvers\n\u001b[0;32m-> 4566\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_eval\u001b[49m\u001b[43m(\u001b[49m\u001b[43mexpr\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minplace\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minplace\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/.conda/envs/histdb_env/lib/python3.8/site-packages/pandas/core/computation/eval.py:336\u001b[0m, in \u001b[0;36meval\u001b[0;34m(expr, parser, engine, local_dict, global_dict, resolvers, level, target, inplace)\u001b[0m\n\u001b[1;32m 327\u001b[0m \u001b[38;5;66;03m# get our (possibly passed-in) scope\u001b[39;00m\n\u001b[1;32m 328\u001b[0m env \u001b[38;5;241m=\u001b[39m ensure_scope(\n\u001b[1;32m 329\u001b[0m level \u001b[38;5;241m+\u001b[39m \u001b[38;5;241m1\u001b[39m,\n\u001b[1;32m 330\u001b[0m global_dict\u001b[38;5;241m=\u001b[39mglobal_dict,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 333\u001b[0m target\u001b[38;5;241m=\u001b[39mtarget,\n\u001b[1;32m 334\u001b[0m )\n\u001b[0;32m--> 336\u001b[0m parsed_expr \u001b[38;5;241m=\u001b[39m \u001b[43mExpr\u001b[49m\u001b[43m(\u001b[49m\u001b[43mexpr\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mengine\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mengine\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mparser\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mparser\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43menv\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43menv\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 338\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m engine \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnumexpr\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m (\n\u001b[1;32m 339\u001b[0m is_extension_array_dtype(parsed_expr\u001b[38;5;241m.\u001b[39mterms\u001b[38;5;241m.\u001b[39mreturn_type)\n\u001b[1;32m 340\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mgetattr\u001b[39m(parsed_expr\u001b[38;5;241m.\u001b[39mterms, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124moperand_types\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m) \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 344\u001b[0m )\n\u001b[1;32m 345\u001b[0m ):\n\u001b[1;32m 346\u001b[0m warnings\u001b[38;5;241m.\u001b[39mwarn(\n\u001b[1;32m 347\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mEngine has switched to \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mpython\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m because numexpr does not support \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 348\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mextension array dtypes. Please set your engine to python manually.\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 349\u001b[0m \u001b[38;5;167;01mRuntimeWarning\u001b[39;00m,\n\u001b[1;32m 350\u001b[0m stacklevel\u001b[38;5;241m=\u001b[39mfind_stack_level(),\n\u001b[1;32m 351\u001b[0m )\n", + "File \u001b[0;32m~/.conda/envs/histdb_env/lib/python3.8/site-packages/pandas/core/computation/expr.py:809\u001b[0m, in \u001b[0;36mExpr.__init__\u001b[0;34m(self, expr, engine, parser, env, level)\u001b[0m\n\u001b[1;32m 807\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mparser \u001b[38;5;241m=\u001b[39m parser\n\u001b[1;32m 808\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_visitor \u001b[38;5;241m=\u001b[39m PARSERS[parser](\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39menv, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mengine, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mparser)\n\u001b[0;32m--> 809\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mterms \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mparse\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/.conda/envs/histdb_env/lib/python3.8/site-packages/pandas/core/computation/expr.py:828\u001b[0m, in \u001b[0;36mExpr.parse\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 824\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mparse\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[1;32m 825\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 826\u001b[0m \u001b[38;5;124;03m Parse an expression.\u001b[39;00m\n\u001b[1;32m 827\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 828\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_visitor\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvisit\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mexpr\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/.conda/envs/histdb_env/lib/python3.8/site-packages/pandas/core/computation/expr.py:415\u001b[0m, in \u001b[0;36mBaseExprVisitor.visit\u001b[0;34m(self, node, **kwargs)\u001b[0m\n\u001b[1;32m 413\u001b[0m method \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mvisit_\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mtype\u001b[39m(node)\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 414\u001b[0m visitor \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mgetattr\u001b[39m(\u001b[38;5;28mself\u001b[39m, method)\n\u001b[0;32m--> 415\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mvisitor\u001b[49m\u001b[43m(\u001b[49m\u001b[43mnode\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/.conda/envs/histdb_env/lib/python3.8/site-packages/pandas/core/computation/expr.py:421\u001b[0m, in \u001b[0;36mBaseExprVisitor.visit_Module\u001b[0;34m(self, node, **kwargs)\u001b[0m\n\u001b[1;32m 419\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mSyntaxError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124monly a single expression is allowed\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 420\u001b[0m expr \u001b[38;5;241m=\u001b[39m node\u001b[38;5;241m.\u001b[39mbody[\u001b[38;5;241m0\u001b[39m]\n\u001b[0;32m--> 421\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvisit\u001b[49m\u001b[43m(\u001b[49m\u001b[43mexpr\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/.conda/envs/histdb_env/lib/python3.8/site-packages/pandas/core/computation/expr.py:415\u001b[0m, in \u001b[0;36mBaseExprVisitor.visit\u001b[0;34m(self, node, **kwargs)\u001b[0m\n\u001b[1;32m 413\u001b[0m method \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mvisit_\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mtype\u001b[39m(node)\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 414\u001b[0m visitor \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mgetattr\u001b[39m(\u001b[38;5;28mself\u001b[39m, method)\n\u001b[0;32m--> 415\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mvisitor\u001b[49m\u001b[43m(\u001b[49m\u001b[43mnode\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/.conda/envs/histdb_env/lib/python3.8/site-packages/pandas/core/computation/expr.py:424\u001b[0m, in \u001b[0;36mBaseExprVisitor.visit_Expr\u001b[0;34m(self, node, **kwargs)\u001b[0m\n\u001b[1;32m 423\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mvisit_Expr\u001b[39m(\u001b[38;5;28mself\u001b[39m, node, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[0;32m--> 424\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvisit\u001b[49m\u001b[43m(\u001b[49m\u001b[43mnode\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvalue\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/.conda/envs/histdb_env/lib/python3.8/site-packages/pandas/core/computation/expr.py:415\u001b[0m, in \u001b[0;36mBaseExprVisitor.visit\u001b[0;34m(self, node, **kwargs)\u001b[0m\n\u001b[1;32m 413\u001b[0m method \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mvisit_\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mtype\u001b[39m(node)\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 414\u001b[0m visitor \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mgetattr\u001b[39m(\u001b[38;5;28mself\u001b[39m, method)\n\u001b[0;32m--> 415\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mvisitor\u001b[49m\u001b[43m(\u001b[49m\u001b[43mnode\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/.conda/envs/histdb_env/lib/python3.8/site-packages/pandas/core/computation/expr.py:705\u001b[0m, in \u001b[0;36mBaseExprVisitor.visit_Call\u001b[0;34m(self, node, side, **kwargs)\u001b[0m\n\u001b[1;32m 702\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m key\u001b[38;5;241m.\u001b[39marg:\n\u001b[1;32m 703\u001b[0m kwargs[key\u001b[38;5;241m.\u001b[39marg] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mvisit(key\u001b[38;5;241m.\u001b[39mvalue)(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39menv)\n\u001b[0;32m--> 705\u001b[0m name \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39menv\u001b[38;5;241m.\u001b[39madd_tmp(\u001b[43mres\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mnew_args\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m)\n\u001b[1;32m 706\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mterm_type(name\u001b[38;5;241m=\u001b[39mname, env\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39menv)\n", + "File \u001b[0;32m~/.conda/envs/histdb_env/lib/python3.8/site-packages/pandas/core/strings/accessor.py:129\u001b[0m, in \u001b[0;36mforbid_nonstring_types.._forbid_nonstring_types..wrapper\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 124\u001b[0m msg \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m 125\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCannot use .str.\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfunc_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m with values of \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 126\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124minferred dtype \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_inferred_dtype\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 127\u001b[0m )\n\u001b[1;32m 128\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(msg)\n\u001b[0;32m--> 129\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/.conda/envs/histdb_env/lib/python3.8/site-packages/pandas/core/strings/accessor.py:1281\u001b[0m, in \u001b[0;36mStringMethods.contains\u001b[0;34m(self, pat, case, flags, na, regex)\u001b[0m\n\u001b[1;32m 1154\u001b[0m \u001b[38;5;129m@forbid_nonstring_types\u001b[39m([\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbytes\u001b[39m\u001b[38;5;124m\"\u001b[39m])\n\u001b[1;32m 1155\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mcontains\u001b[39m(\n\u001b[1;32m 1156\u001b[0m \u001b[38;5;28mself\u001b[39m, pat, case: \u001b[38;5;28mbool\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m, flags: \u001b[38;5;28mint\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m, na\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, regex: \u001b[38;5;28mbool\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[1;32m 1157\u001b[0m ):\n\u001b[1;32m 1158\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124mr\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 1159\u001b[0m \u001b[38;5;124;03m Test if pattern or regex is contained within a string of a Series or Index.\u001b[39;00m\n\u001b[1;32m 1160\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1279\u001b[0m \u001b[38;5;124;03m dtype: bool\u001b[39;00m\n\u001b[1;32m 1280\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m-> 1281\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m regex \u001b[38;5;129;01mand\u001b[39;00m \u001b[43mre\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcompile\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpat\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39mgroups:\n\u001b[1;32m 1282\u001b[0m warnings\u001b[38;5;241m.\u001b[39mwarn(\n\u001b[1;32m 1283\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThis pattern is interpreted as a regular expression, and has \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 1284\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmatch groups. To actually get the groups, use str.extract.\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 1285\u001b[0m \u001b[38;5;167;01mUserWarning\u001b[39;00m,\n\u001b[1;32m 1286\u001b[0m stacklevel\u001b[38;5;241m=\u001b[39mfind_stack_level(),\n\u001b[1;32m 1287\u001b[0m )\n\u001b[1;32m 1289\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_data\u001b[38;5;241m.\u001b[39marray\u001b[38;5;241m.\u001b[39m_str_contains(pat, case, flags, na, regex)\n", + "File \u001b[0;32m~/.conda/envs/histdb_env/lib/python3.8/re.py:250\u001b[0m, in \u001b[0;36mcompile\u001b[0;34m(pattern, flags)\u001b[0m\n\u001b[1;32m 248\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mcompile\u001b[39m(pattern, flags\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m0\u001b[39m):\n\u001b[1;32m 249\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCompile a regular expression pattern, returning a Pattern object.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m--> 250\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_compile\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpattern\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mflags\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/.conda/envs/histdb_env/lib/python3.8/re.py:302\u001b[0m, in \u001b[0;36m_compile\u001b[0;34m(pattern, flags)\u001b[0m\n\u001b[1;32m 300\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m sre_compile\u001b[38;5;241m.\u001b[39misstring(pattern):\n\u001b[1;32m 301\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfirst argument must be string or compiled pattern\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 302\u001b[0m p \u001b[38;5;241m=\u001b[39m \u001b[43msre_compile\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcompile\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpattern\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mflags\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 303\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (flags \u001b[38;5;241m&\u001b[39m DEBUG):\n\u001b[1;32m 304\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(_cache) \u001b[38;5;241m>\u001b[39m\u001b[38;5;241m=\u001b[39m _MAXCACHE:\n\u001b[1;32m 305\u001b[0m \u001b[38;5;66;03m# Drop the oldest item\u001b[39;00m\n", + "File \u001b[0;32m~/.conda/envs/histdb_env/lib/python3.8/sre_compile.py:764\u001b[0m, in \u001b[0;36mcompile\u001b[0;34m(p, flags)\u001b[0m\n\u001b[1;32m 762\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m isstring(p):\n\u001b[1;32m 763\u001b[0m pattern \u001b[38;5;241m=\u001b[39m p\n\u001b[0;32m--> 764\u001b[0m p \u001b[38;5;241m=\u001b[39m \u001b[43msre_parse\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mparse\u001b[49m\u001b[43m(\u001b[49m\u001b[43mp\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mflags\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 765\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 766\u001b[0m pattern \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n", + "File \u001b[0;32m~/.conda/envs/histdb_env/lib/python3.8/sre_parse.py:948\u001b[0m, in \u001b[0;36mparse\u001b[0;34m(str, flags, state)\u001b[0m\n\u001b[1;32m 945\u001b[0m state\u001b[38;5;241m.\u001b[39mstr \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mstr\u001b[39m\n\u001b[1;32m 947\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 948\u001b[0m p \u001b[38;5;241m=\u001b[39m \u001b[43m_parse_sub\u001b[49m\u001b[43m(\u001b[49m\u001b[43msource\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstate\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mflags\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m&\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mSRE_FLAG_VERBOSE\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 949\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m Verbose:\n\u001b[1;32m 950\u001b[0m \u001b[38;5;66;03m# the VERBOSE flag was switched on inside the pattern. to be\u001b[39;00m\n\u001b[1;32m 951\u001b[0m \u001b[38;5;66;03m# on the safe side, we'll parse the whole thing again...\u001b[39;00m\n\u001b[1;32m 952\u001b[0m state \u001b[38;5;241m=\u001b[39m State()\n", + "File \u001b[0;32m~/.conda/envs/histdb_env/lib/python3.8/sre_parse.py:443\u001b[0m, in \u001b[0;36m_parse_sub\u001b[0;34m(source, state, verbose, nested)\u001b[0m\n\u001b[1;32m 441\u001b[0m start \u001b[38;5;241m=\u001b[39m source\u001b[38;5;241m.\u001b[39mtell()\n\u001b[1;32m 442\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[0;32m--> 443\u001b[0m itemsappend(\u001b[43m_parse\u001b[49m\u001b[43m(\u001b[49m\u001b[43msource\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstate\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mverbose\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mnested\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 444\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;129;43;01mnot\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mnested\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mand\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mnot\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mitems\u001b[49m\u001b[43m)\u001b[49m)\n\u001b[1;32m 445\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m sourcematch(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m|\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n\u001b[1;32m 446\u001b[0m \u001b[38;5;28;01mbreak\u001b[39;00m\n", + "File \u001b[0;32m~/.conda/envs/histdb_env/lib/python3.8/sre_parse.py:834\u001b[0m, in \u001b[0;36m_parse\u001b[0;34m(source, state, verbose, nested, first)\u001b[0m\n\u001b[1;32m 831\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m source\u001b[38;5;241m.\u001b[39merror(err\u001b[38;5;241m.\u001b[39mmsg, \u001b[38;5;28mlen\u001b[39m(name) \u001b[38;5;241m+\u001b[39m \u001b[38;5;241m1\u001b[39m) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 832\u001b[0m sub_verbose \u001b[38;5;241m=\u001b[39m ((verbose \u001b[38;5;129;01mor\u001b[39;00m (add_flags \u001b[38;5;241m&\u001b[39m SRE_FLAG_VERBOSE)) \u001b[38;5;129;01mand\u001b[39;00m\n\u001b[1;32m 833\u001b[0m \u001b[38;5;129;01mnot\u001b[39;00m (del_flags \u001b[38;5;241m&\u001b[39m SRE_FLAG_VERBOSE))\n\u001b[0;32m--> 834\u001b[0m p \u001b[38;5;241m=\u001b[39m \u001b[43m_parse_sub\u001b[49m\u001b[43m(\u001b[49m\u001b[43msource\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstate\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msub_verbose\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mnested\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 835\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m source\u001b[38;5;241m.\u001b[39mmatch(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m)\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n\u001b[1;32m 836\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m source\u001b[38;5;241m.\u001b[39merror(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmissing ), unterminated subpattern\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 837\u001b[0m source\u001b[38;5;241m.\u001b[39mtell() \u001b[38;5;241m-\u001b[39m start)\n", + "File \u001b[0;32m~/.conda/envs/histdb_env/lib/python3.8/sre_parse.py:443\u001b[0m, in \u001b[0;36m_parse_sub\u001b[0;34m(source, state, verbose, nested)\u001b[0m\n\u001b[1;32m 441\u001b[0m start \u001b[38;5;241m=\u001b[39m source\u001b[38;5;241m.\u001b[39mtell()\n\u001b[1;32m 442\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[0;32m--> 443\u001b[0m itemsappend(\u001b[43m_parse\u001b[49m\u001b[43m(\u001b[49m\u001b[43msource\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstate\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mverbose\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mnested\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 444\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;129;43;01mnot\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mnested\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mand\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mnot\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mitems\u001b[49m\u001b[43m)\u001b[49m)\n\u001b[1;32m 445\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m sourcematch(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m|\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n\u001b[1;32m 446\u001b[0m \u001b[38;5;28;01mbreak\u001b[39;00m\n", + "File \u001b[0;32m~/.conda/envs/histdb_env/lib/python3.8/sre_parse.py:671\u001b[0m, in \u001b[0;36m_parse\u001b[0;34m(source, state, verbose, nested, first)\u001b[0m\n\u001b[1;32m 668\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m source\u001b[38;5;241m.\u001b[39merror(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnothing to repeat\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 669\u001b[0m source\u001b[38;5;241m.\u001b[39mtell() \u001b[38;5;241m-\u001b[39m here \u001b[38;5;241m+\u001b[39m \u001b[38;5;28mlen\u001b[39m(this))\n\u001b[1;32m 670\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m item[\u001b[38;5;241m0\u001b[39m][\u001b[38;5;241m0\u001b[39m] \u001b[38;5;129;01min\u001b[39;00m _REPEATCODES:\n\u001b[0;32m--> 671\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m source\u001b[38;5;241m.\u001b[39merror(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmultiple repeat\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 672\u001b[0m source\u001b[38;5;241m.\u001b[39mtell() \u001b[38;5;241m-\u001b[39m here \u001b[38;5;241m+\u001b[39m \u001b[38;5;28mlen\u001b[39m(this))\n\u001b[1;32m 673\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m item[\u001b[38;5;241m0\u001b[39m][\u001b[38;5;241m0\u001b[39m] \u001b[38;5;129;01mis\u001b[39;00m SUBPATTERN:\n\u001b[1;32m 674\u001b[0m group, add_flags, del_flags, p \u001b[38;5;241m=\u001b[39m item[\u001b[38;5;241m0\u001b[39m][\u001b[38;5;241m1\u001b[39m]\n", + "\u001b[0;31merror\u001b[0m: multiple repeat at position 11" + ] + } + ], + "source": [ + "sequence_df.query(\n", + " f'type==\"H3_(Lilly???)\" | variant.str.contains(\"H3_(Lilly???)\")',\n", + " engine=\"python\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 87, + "id": "a01e42cb-d68b-4941-a564-2d1a4a02da45", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
accessiontypevariant_groupvariantdoubletgincbi_gene_idhgnc_gene_nametaxonomy_idorganismphylumclasstaxonomy_groupinforeferencessequence
NP_734466.1NP_734466.1H2AcH2AcH2A.1_(Homo_sapiens)221613.0H2AC19606.0Homo sapiensChordataMammaliaMammalia2011515 24506885 7068607MSGRGKQGGKARAKSKSRSSRAGLQFPVGRIHRLLRKGNYAERIGA...
\n", + "
" + ], + "text/plain": [ + " accession type variant_group variant doublet gi \\\n", + "NP_734466.1 NP_734466.1 H2A cH2A cH2A.1_(Homo_sapiens) \n", + "\n", + " ncbi_gene_id hgnc_gene_name taxonomy_id organism phylum \\\n", + "NP_734466.1 221613.0 H2AC1 9606.0 Homo sapiens Chordata \n", + "\n", + " class taxonomy_group info references \\\n", + "NP_734466.1 Mammalia Mammalia 2011515 24506885 7068607 \n", + "\n", + " sequence \n", + "NP_734466.1 MSGRGKQGGKARAKSKSRSSRAGLQFPVGRIHRLLRKGNYAERIGA... " + ] + }, + "execution_count": 87, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sequence_df[sequence_df[\"variant\"].str.contains(\"cH2A.1_\\(Homo_sapiens\\)\")]" + ] + }, + { + "cell_type": "code", + "execution_count": 85, + "id": "d2db8101-4205-4af6-9ce0-53245d6a289d", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
accessiontypevariant_groupvariantdoubletgincbi_gene_idhgnc_gene_nametaxonomy_idorganismphylumclasstaxonomy_groupinforeferencessequence
\n", + "
" + ], + "text/plain": [ + "Empty DataFrame\n", + "Columns: [accession, type, variant_group, variant, doublet, gi, ncbi_gene_id, hgnc_gene_name, taxonomy_id, organism, phylum, class, taxonomy_group, info, references, sequence]\n", + "Index: []" + ] + }, + "execution_count": 85, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sequence_df.query(\n", + " f'type==\"cH2A.1_(Homo_sapiens)\" | variant.str.contains(\"cH2A.1_(Homo_sapiens)\")',\n", + " engine=\"python\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 89, + "id": "b09590fc-4dd0-44d2-9c99-e87ad5ff1660", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing Archaeal\n", + "Archaeal Alignment length: 0\n", + "Processing cH1\n", + "cH1 Alignment length: 0\n", + "Processing generic_H1\n", + "generic_H1 Alignment length: 13\n", + "Processing H1.0_(Homo_sapiens)\n", + "H1.0_(Homo_sapiens) Alignment length: 1\n", + "Processing H1.0\n", + "\t Node is internal, progressive alignment:\n", + "\t\t For H1.0 aligned 14 sequences\n", + "\t\t Adding child H1.0 aligned 1 sequences\n", + "H1.0 Alignment length: 15\n", + "Processing H1.1_(Homo_sapiens)\n", + "H1.1_(Homo_sapiens) Alignment length: 1\n", + "Processing H1.1\n", + "\t Node is internal, progressive alignment:\n", + "\t\t For H1.1 aligned 0 sequences\n", + "\t\t Adding child H1.1 aligned 1 sequences\n", + "H1.1 Alignment length: 1\n", + "Processing H1.10_(Homo_sapiens)\n", + "H1.10_(Homo_sapiens) Alignment length: 1\n", + "Processing H1.10\n", + "\t Node is internal, progressive alignment:\n", + "\t\t For H1.10 aligned 5 sequences\n", + "\t\t Adding child H1.10 aligned 1 sequences\n", + "H1.10 Alignment length: 6\n", + "Processing H1.2_(Homo_sapiens)\n", + "H1.2_(Homo_sapiens) Alignment length: 1\n", + "Processing H1.2\n", + "\t Node is internal, progressive alignment:\n", + "\t\t For H1.2 aligned 0 sequences\n", + "\t\t Adding child H1.2 aligned 1 sequences\n", + "H1.2 Alignment length: 1\n", + "Processing H1.3_(Homo_sapiens)\n", + "H1.3_(Homo_sapiens) Alignment length: 1\n", + "Processing H1.3\n", + "\t Node is internal, progressive alignment:\n", + "\t\t For H1.3 aligned 0 sequences\n", + "\t\t Adding child H1.3 aligned 1 sequences\n", + "H1.3 Alignment length: 1\n", + "Processing H1.4_(Homo_sapiens)\n", + "H1.4_(Homo_sapiens) Alignment length: 1\n", + "Processing H1.4\n", + "\t Node is internal, progressive alignment:\n", + "\t\t For H1.4 aligned 0 sequences\n", + "\t\t Adding child H1.4 aligned 1 sequences\n", + "H1.4 Alignment length: 1\n", + "Processing H1.5_(Homo_sapiens)\n", + "H1.5_(Homo_sapiens) Alignment length: 1\n", + "Processing H1.5\n", + "\t Node is internal, progressive alignment:\n", + "\t\t For H1.5 aligned 0 sequences\n", + "\t\t Adding child H1.5 aligned 1 sequences\n", + "H1.5 Alignment length: 1\n", + "Processing H1.8_(Homo_sapiens)\n", + "H1.8_(Homo_sapiens) Alignment length: 2\n", + "Processing OO_H1.8\n", + "\t Node is internal, progressive alignment:\n", + "\t\t For OO_H1.8 aligned 1 sequences\n", + "\t\t Adding child OO_H1.8 aligned 2 sequences\n", + "OO_H1.8 Alignment length: 3\n", + "Processing scH1\n", + "scH1 Alignment length: 2\n", + "Processing H1.6_(Homo_sapiens)\n", + "H1.6_(Homo_sapiens) Alignment length: 1\n", + "Processing TS_H1.6\n", + "\t Node is internal, progressive alignment:\n", + "\t\t For TS_H1.6 aligned 7 sequences\n", + "\t\t Adding child TS_H1.6 aligned 1 sequences\n", + "TS_H1.6 Alignment length: 8\n", + "Processing H1.7_(Homo_sapiens)\n", + "H1.7_(Homo_sapiens) Alignment length: 1\n", + "Processing TS_H1.7\n", + "\t Node is internal, progressive alignment:\n", + "\t\t For TS_H1.7 aligned 1 sequences\n", + "\t\t Adding child TS_H1.7 aligned 1 sequences\n", + "TS_H1.7 Alignment length: 2\n", + "Processing TS_H1.9\n", + "TS_H1.9 Alignment length: 3\n", + "Processing H1\n", + "\t Node is internal, progressive alignment:\n", + "\t\t For H1 aligned 0 sequences\n", + "\t\t Adding child H1 aligned 0 sequences\n", + "\t\t Adding child H1 aligned 13 sequences\n", + "\t\t Adding child H1 aligned 15 sequences\n", + "\t\t Adding child H1 aligned 1 sequences\n", + "\t\t Adding child H1 aligned 6 sequences\n", + "\t\t Adding child H1 aligned 1 sequences\n", + "\t\t Adding child H1 aligned 1 sequences\n", + "\t\t Adding child H1 aligned 1 sequences\n", + "\t\t Adding child H1 aligned 1 sequences\n", + "\t\t Adding child H1 aligned 3 sequences\n", + "\t\t Adding child H1 aligned 2 sequences\n", + "\t\t Adding child H1 aligned 8 sequences\n", + "\t\t Adding child H1 aligned 2 sequences\n", + "\t\t Adding child H1 aligned 3 sequences\n", + "H1 Alignment length: 57\n", + "Processing cH2A.10_(Homo_sapiens)\n", + "cH2A.10_(Homo_sapiens) Alignment length: 1\n", + "Processing cH2A.11_(Homo_sapiens)\n", + "cH2A.11_(Homo_sapiens) Alignment length: 1\n", + "Processing cH2A.1_(Homo_sapiens)\n", + "cH2A.1_(Homo_sapiens) Alignment length: 1\n", + "Processing cH2A.2_(Homo_sapiens)\n", + "cH2A.2_(Homo_sapiens) Alignment length: 2\n", + "Processing cH2A.3_(Homo_sapiens)\n", + "cH2A.3_(Homo_sapiens) Alignment length: 1\n", + "Processing cH2A.4_(Homo_sapiens)\n", + "cH2A.4_(Homo_sapiens) Alignment length: 1\n", + "Processing cH2A.5_(Homo_sapiens)\n", + "cH2A.5_(Homo_sapiens) Alignment length: 5\n", + "Processing cH2A.6_(Homo_sapiens)\n", + "cH2A.6_(Homo_sapiens) Alignment length: 1\n", + "Processing cH2A.7_(Homo_sapiens)\n", + "cH2A.7_(Homo_sapiens) Alignment length: 1\n", + "Processing cH2A.8_(Homo_sapiens)\n", + "cH2A.8_(Homo_sapiens) Alignment length: 2\n", + "Processing cH2A.9_(Homo_sapiens)\n", + "cH2A.9_(Homo_sapiens) Alignment length: 1\n", + "Processing cH2A_(Homo_sapiens)\n", + "\t Node is internal, progressive alignment:\n", + "\t\t For cH2A_(Homo_sapiens) aligned 0 sequences\n", + "\t\t Adding child cH2A_(Homo_sapiens) aligned 1 sequences\n", + "\t\t Adding child cH2A_(Homo_sapiens) aligned 1 sequences\n", + "\t\t Adding child cH2A_(Homo_sapiens) aligned 1 sequences\n", + "\t\t Adding child cH2A_(Homo_sapiens) aligned 2 sequences\n", + "\t\t Adding child cH2A_(Homo_sapiens) aligned 1 sequences\n", + "\t\t Adding child cH2A_(Homo_sapiens) aligned 1 sequences\n", + "\t\t Adding child cH2A_(Homo_sapiens) aligned 5 sequences\n", + "\t\t Adding child cH2A_(Homo_sapiens) aligned 1 sequences\n", + "\t\t Adding child cH2A_(Homo_sapiens) aligned 1 sequences\n", + "\t\t Adding child cH2A_(Homo_sapiens) aligned 2 sequences\n", + "\t\t Adding child cH2A_(Homo_sapiens) aligned 1 sequences\n", + "cH2A_(Homo_sapiens) Alignment length: 17\n", + "Processing cH2A.1_(Mus_musculus)\n", + "cH2A.1_(Mus_musculus) Alignment length: 1\n", + "Processing cH2A_(Mus_musculus)\n", + "\t Node is internal, progressive alignment:\n", + "\t\t For cH2A_(Mus_musculus) aligned 0 sequences\n", + "\t\t Adding child cH2A_(Mus_musculus) aligned 1 sequences\n", + "cH2A_(Mus_musculus) Alignment length: 1\n", + "Processing cH2A_(Mammalia)\n", + "\t Node is internal, progressive alignment:\n", + "\t\t For cH2A_(Mammalia) aligned 16 sequences\n", + "\t\t Adding child cH2A_(Mammalia) aligned 17 sequences\n", + "\t\t Adding child cH2A_(Mammalia) aligned 1 sequences\n", + "cH2A_(Mammalia) Alignment length: 34\n", + "Processing cH2A_(Vertebrata)\n", + "\t Node is internal, progressive alignment:\n", + "\t\t For cH2A_(Vertebrata) aligned 3 sequences\n", + "\t\t Adding child cH2A_(Vertebrata) aligned 34 sequences\n", + "cH2A_(Vertebrata) Alignment length: 37\n", + "Processing cH2A_(Animals)\n", + "\t Node is internal, progressive alignment:\n", + "\t\t For cH2A_(Animals) aligned 5 sequences\n", + "\t\t Adding child cH2A_(Animals) aligned 37 sequences\n", + "cH2A_(Animals) Alignment length: 42\n", + "Processing cH2A_(Fungi)\n", + "cH2A_(Fungi) Alignment length: 2\n", + "Processing cH2A_(Chlorophyta)\n", + "cH2A_(Chlorophyta) Alignment length: 1\n", + "Processing cH2A_(Embryophyta)\n", + "cH2A_(Embryophyta) Alignment length: 8\n", + "Processing cH2A_(Plants)\n", + "\t Node is internal, progressive alignment:\n", + "\t\t For cH2A_(Plants) aligned 0 sequences\n", + "\t\t Adding child cH2A_(Plants) aligned 1 sequences\n", + "\t\t Adding child cH2A_(Plants) aligned 8 sequences\n", + "cH2A_(Plants) Alignment length: 9\n", + "Processing cH2A_(Protists)\n", + "cH2A_(Protists) Alignment length: 4\n", + "Processing cH2A\n", + "\t Node is internal, progressive alignment:\n", + "\t\t For cH2A aligned 0 sequences\n", + "\t\t Adding child cH2A aligned 42 sequences\n", + "\t\t Adding child cH2A aligned 2 sequences\n", + "\t\t Adding child cH2A aligned 9 sequences\n", + "\t\t Adding child cH2A aligned 4 sequences\n", + "cH2A Alignment length: 57\n", + "Processing gH2A\n", + "gH2A Alignment length: 1\n", + "Processing H2A.J_(Homo_sapiens)\n", + "H2A.J_(Homo_sapiens) Alignment length: 1\n", + "Processing H2A.J\n", + "\t Node is internal, progressive alignment:\n", + "\t\t For H2A.J aligned 0 sequences\n", + "\t\t Adding child H2A.J aligned 1 sequences\n", + "H2A.J Alignment length: 1\n", + "Processing H2A.M\n", + "H2A.M Alignment length: 7\n", + "Processing H2A.R\n", + "H2A.R Alignment length: 5\n", + "Processing H2A.W\n", + "H2A.W Alignment length: 9\n", + "Processing H2A.X_(Homo_sapiens)\n", + "H2A.X_(Homo_sapiens) Alignment length: 1\n", + "Processing H2A.X_(Mus_musculus)\n", + "H2A.X_(Mus_musculus) Alignment length: 1\n", + "Processing H2A.X_(Mammalia)\n", + "\t Node is internal, progressive alignment:\n", + "\t\t For H2A.X_(Mammalia) aligned 3 sequences\n", + "\t\t Adding child H2A.X_(Mammalia) aligned 1 sequences\n", + "\t\t Adding child H2A.X_(Mammalia) aligned 1 sequences\n", + "H2A.X_(Mammalia) Alignment length: 5\n", + "Processing H2A.X_(Vertebrata)\n", + "\t Node is internal, progressive alignment:\n", + "\t\t For H2A.X_(Vertebrata) aligned 1 sequences\n", + "\t\t Adding child H2A.X_(Vertebrata) aligned 5 sequences\n", + "H2A.X_(Vertebrata) Alignment length: 6\n", + "Processing H2A.X_(Animals)\n", + "\t Node is internal, progressive alignment:\n", + "\t\t For H2A.X_(Animals) aligned 2 sequences\n", + "\t\t Adding child H2A.X_(Animals) aligned 6 sequences\n", + "H2A.X_(Animals) Alignment length: 8\n", + "Processing H2A.X_(Fungi)\n", + "H2A.X_(Fungi) Alignment length: 4\n", + "Processing H2A.X_(Plants)\n", + "H2A.X_(Plants) Alignment length: 5\n", + "Processing H2A.X_(Protists)\n", + "H2A.X_(Protists) Alignment length: 6\n", + "Processing H2A.X\n", + "\t Node is internal, progressive alignment:\n", + "\t\t For H2A.X aligned 0 sequences\n", + "\t\t Adding child H2A.X aligned 8 sequences\n", + "\t\t Adding child H2A.X aligned 4 sequences\n", + "\t\t Adding child H2A.X aligned 5 sequences\n", + "\t\t Adding child H2A.X aligned 6 sequences\n", + "H2A.X Alignment length: 23\n", + "Processing H2A.Z.1_(Homo_sapiens)\n", + "H2A.Z.1_(Homo_sapiens) Alignment length: 1\n", + "Processing H2A.Z.1_(Primates)\n", + "\t Node is internal, progressive alignment:\n", + "\t\t For H2A.Z.1_(Primates) aligned 0 sequences\n", + "\t\t Adding child H2A.Z.1_(Primates) aligned 1 sequences\n", + "H2A.Z.1_(Primates) Alignment length: 1\n", + "Processing H2A.Z.1_(Chordata)\n", + "\t Node is internal, progressive alignment:\n", + "\t\t For H2A.Z.1_(Chordata) aligned 2 sequences\n", + "\t\t Adding child H2A.Z.1_(Chordata) aligned 1 sequences\n", + "H2A.Z.1_(Chordata) Alignment length: 3\n", + "Processing H2A.Z.2.s1_(Homo_sapiens)\n", + "H2A.Z.2.s1_(Homo_sapiens) Alignment length: 3\n", + "Processing H2A.Z.2.s1_(Primates)\n", + "\t Node is internal, progressive alignment:\n", + "\t\t For H2A.Z.2.s1_(Primates) aligned 0 sequences\n", + "\t\t Adding child H2A.Z.2.s1_(Primates) aligned 3 sequences\n", + "H2A.Z.2.s1_(Primates) Alignment length: 3\n", + "Processing H2A.Z.2.s2_(Homo_sapiens)\n", + "H2A.Z.2.s2_(Homo_sapiens) Alignment length: 2\n", + "Processing H2A.Z.2.s2_(Primates)\n", + "\t Node is internal, progressive alignment:\n", + "\t\t For H2A.Z.2.s2_(Primates) aligned 0 sequences\n", + "\t\t Adding child H2A.Z.2.s2_(Primates) aligned 2 sequences\n", + "H2A.Z.2.s2_(Primates) Alignment length: 2\n", + "Processing H2A.Z.2_(Primates)\n", + "\t Node is internal, progressive alignment:\n", + "\t\t For H2A.Z.2_(Primates) aligned 0 sequences\n", + "\t\t Adding child H2A.Z.2_(Primates) aligned 3 sequences\n", + "\t\t Adding child H2A.Z.2_(Primates) aligned 2 sequences\n", + "H2A.Z.2_(Primates) Alignment length: 5\n", + "Processing H2A.Z.2_(Chordata)\n", + "\t Node is internal, progressive alignment:\n", + "\t\t For H2A.Z.2_(Chordata) aligned 2 sequences\n", + "\t\t Adding child H2A.Z.2_(Chordata) aligned 5 sequences\n", + "H2A.Z.2_(Chordata) Alignment length: 7\n", + "Processing H2A.Z\n", + "\t Node is internal, progressive alignment:\n", + "\t\t For H2A.Z aligned 20 sequences\n", + "\t\t Adding child H2A.Z aligned 3 sequences\n", + "\t\t Adding child H2A.Z aligned 7 sequences\n", + "H2A.Z Alignment length: 30\n", + "Processing macroH2A.1.s1_(Homo_sapiens)\n", + "macroH2A.1.s1_(Homo_sapiens) Alignment length: 1\n", + "Processing macroH2A.1.s1_(Mammalia)\n", + "\t Node is internal, progressive alignment:\n", + "\t\t For macroH2A.1.s1_(Mammalia) aligned 0 sequences\n", + "\t\t Adding child macroH2A.1.s1_(Mammalia) aligned 1 sequences\n", + "macroH2A.1.s1_(Mammalia) Alignment length: 1\n", + "Processing macroH2A.1.s2_(Homo_sapiens)\n", + "macroH2A.1.s2_(Homo_sapiens) Alignment length: 2\n", + "Processing macroH2A.1.s2_(Mammalia)\n", + "\t Node is internal, progressive alignment:\n", + "\t\t For macroH2A.1.s2_(Mammalia) aligned 0 sequences\n", + "\t\t Adding child macroH2A.1.s2_(Mammalia) aligned 2 sequences\n", + "macroH2A.1.s2_(Mammalia) Alignment length: 2\n", + "Processing macroH2A.1_(Mammalia)\n", + "\t Node is internal, progressive alignment:\n", + "\t\t For macroH2A.1_(Mammalia) aligned 0 sequences\n", + "\t\t Adding child macroH2A.1_(Mammalia) aligned 1 sequences\n", + "\t\t Adding child macroH2A.1_(Mammalia) aligned 2 sequences\n", + "macroH2A.1_(Mammalia) Alignment length: 3\n", + "Processing macroH2A.2_(Homo_sapiens)\n", + "macroH2A.2_(Homo_sapiens) Alignment length: 1\n", + "Processing macroH2A.2_(Mammalia)\n", + "\t Node is internal, progressive alignment:\n", + "\t\t For macroH2A.2_(Mammalia) aligned 0 sequences\n", + "\t\t Adding child macroH2A.2_(Mammalia) aligned 1 sequences\n", + "macroH2A.2_(Mammalia) Alignment length: 1\n", + "Processing macroH2A\n", + "\t Node is internal, progressive alignment:\n", + "\t\t For macroH2A aligned 7 sequences\n", + "\t\t Adding child macroH2A aligned 3 sequences\n", + "\t\t Adding child macroH2A aligned 1 sequences\n", + "macroH2A Alignment length: 11\n", + "Processing H2A.B.1_(Homo_sapiens)\n", + "H2A.B.1_(Homo_sapiens) Alignment length: 1\n", + "Processing H2A.B.2_(Homo_sapiens)\n", + "H2A.B.2_(Homo_sapiens) Alignment length: 2\n", + "Processing H2A.B_(Homo_sapiens)\n", + "\t Node is internal, progressive alignment:\n", + "\t\t For H2A.B_(Homo_sapiens) aligned 0 sequences\n", + "\t\t Adding child H2A.B_(Homo_sapiens) aligned 1 sequences\n", + "\t\t Adding child H2A.B_(Homo_sapiens) aligned 2 sequences\n", + "H2A.B_(Homo_sapiens) Alignment length: 3\n", + "Processing H2A.B.1_(Mus_musculus)\n", + "H2A.B.1_(Mus_musculus) Alignment length: 1\n", + "Processing H2A.B.2_(Mus_musculus)\n", + "H2A.B.2_(Mus_musculus) Alignment length: 1\n", + "Processing H2A.B.3_(Mus_musculus)\n", + "H2A.B.3_(Mus_musculus) Alignment length: 1\n", + "Processing H2A.B_(Mus_musculus)\n", + "\t Node is internal, progressive alignment:\n", + "\t\t For H2A.B_(Mus_musculus) aligned 0 sequences\n", + "\t\t Adding child H2A.B_(Mus_musculus) aligned 1 sequences\n", + "\t\t Adding child H2A.B_(Mus_musculus) aligned 1 sequences\n", + "\t\t Adding child H2A.B_(Mus_musculus) aligned 1 sequences\n", + "H2A.B_(Mus_musculus) Alignment length: 3\n", + "Processing H2A.B\n", + "\t Node is internal, progressive alignment:\n", + "\t\t For H2A.B aligned 11 sequences\n", + "\t\t Adding child H2A.B aligned 3 sequences\n", + "\t\t Adding child H2A.B aligned 3 sequences\n", + "H2A.B Alignment length: 17\n", + "Processing H2A.L.1_(Homo_sapiens)\n", + "H2A.L.1_(Homo_sapiens) Alignment length: 1\n", + "Processing H2A.L.3_(Homo_sapiens)\n", + "H2A.L.3_(Homo_sapiens) Alignment length: 1\n", + "Processing H2A.L_(Homo_sapiens)\n", + "\t Node is internal, progressive alignment:\n", + "\t\t For H2A.L_(Homo_sapiens) aligned 0 sequences\n", + "\t\t Adding child H2A.L_(Homo_sapiens) aligned 1 sequences\n", + "\t\t Adding child H2A.L_(Homo_sapiens) aligned 1 sequences\n", + "H2A.L_(Homo_sapiens) Alignment length: 2\n", + "Processing H2A.L.1_(Mus_musculus)\n", + "H2A.L.1_(Mus_musculus) Alignment length: 1\n", + "Processing H2A.L.2_(Mus_musculus)\n", + "H2A.L.2_(Mus_musculus) Alignment length: 1\n", + "Processing H2A.L.3_(Mus_musculus)\n", + "H2A.L.3_(Mus_musculus) Alignment length: 1\n", + "Processing H2A.L_(Mus_musculus)\n", + "\t Node is internal, progressive alignment:\n", + "\t\t For H2A.L_(Mus_musculus) aligned 0 sequences\n", + "\t\t Adding child H2A.L_(Mus_musculus) aligned 1 sequences\n", + "\t\t Adding child H2A.L_(Mus_musculus) aligned 1 sequences\n", + "\t\t Adding child H2A.L_(Mus_musculus) aligned 1 sequences\n", + "H2A.L_(Mus_musculus) Alignment length: 3\n", + "Processing H2A.L\n", + "\t Node is internal, progressive alignment:\n", + "\t\t For H2A.L aligned 15 sequences\n", + "\t\t Adding child H2A.L aligned 2 sequences\n", + "\t\t Adding child H2A.L aligned 3 sequences\n", + "H2A.L Alignment length: 20\n", + "Processing H2A.P_(Homo_sapiens)\n", + "H2A.P_(Homo_sapiens) Alignment length: 1\n", + "Processing H2A.P_(Mus_musculus)\n", + "H2A.P_(Mus_musculus) Alignment length: 1\n", + "Processing H2A.P\n", + "\t Node is internal, progressive alignment:\n", + "\t\t For H2A.P aligned 8 sequences\n", + "\t\t Adding child H2A.P aligned 1 sequences\n", + "\t\t Adding child H2A.P aligned 1 sequences\n", + "H2A.P Alignment length: 10\n", + "Processing H2A.Q\n", + "H2A.Q Alignment length: 28\n", + "Processing short_H2A\n", + "\t Node is internal, progressive alignment:\n", + "\t\t For short_H2A aligned 0 sequences\n", + "\t\t Adding child short_H2A aligned 17 sequences\n", + "\t\t Adding child short_H2A aligned 20 sequences\n", + "\t\t Adding child short_H2A aligned 10 sequences\n", + "\t\t Adding child short_H2A aligned 28 sequences\n", + "short_H2A Alignment length: 75\n", + "Processing H2A\n", + "\t Node is internal, progressive alignment:\n", + "\t\t For H2A aligned 0 sequences\n", + "\t\t Adding child H2A aligned 57 sequences\n", + "\t\t Adding child H2A aligned 1 sequences\n", + "\t\t Adding child H2A aligned 1 sequences\n", + "\t\t Adding child H2A aligned 7 sequences\n", + "\t\t Adding child H2A aligned 5 sequences\n", + "\t\t Adding child H2A aligned 9 sequences\n", + "\t\t Adding child H2A aligned 23 sequences\n", + "\t\t Adding child H2A aligned 30 sequences\n", + "\t\t Adding child H2A aligned 11 sequences\n", + "\t\t Adding child H2A aligned 75 sequences\n", + "H2A Alignment length: 219\n", + "Processing cH2B.10_(Homo_sapiens)\n", + "cH2B.10_(Homo_sapiens) Alignment length: 1\n", + "Processing cH2B.11_(Homo_sapiens)\n", + "cH2B.11_(Homo_sapiens) Alignment length: 1\n", + "Processing cH2B.12_(Homo_sapiens)\n", + "cH2B.12_(Homo_sapiens) Alignment length: 2\n", + "Processing cH2B.13_(Homo_sapiens)\n", + "cH2B.13_(Homo_sapiens) Alignment length: 1\n", + "Processing cH2B.14_(Homo_sapiens)\n", + "cH2B.14_(Homo_sapiens) Alignment length: 1\n", + "Processing cH2B.15_(Homo_sapiens)\n", + "cH2B.15_(Homo_sapiens) Alignment length: 0\n", + "Processing cH2B.1_(Homo_sapiens)\n", + "cH2B.1_(Homo_sapiens) Alignment length: 1\n", + "Processing cH2B.2_(Homo_sapiens)\n", + "cH2B.2_(Homo_sapiens) Alignment length: 1\n", + "Processing cH2B.3_(Homo_sapiens)\n", + "cH2B.3_(Homo_sapiens) Alignment length: 5\n", + "Processing cH2B.4_(Homo_sapiens)\n", + "cH2B.4_(Homo_sapiens) Alignment length: 1\n", + "Processing cH2B.5_(Homo_sapiens)\n", + "cH2B.5_(Homo_sapiens) Alignment length: 1\n", + "Processing cH2B.6_(Homo_sapiens)\n", + "cH2B.6_(Homo_sapiens) Alignment length: 1\n", + "Processing cH2B.7_(Homo_sapiens)\n", + "cH2B.7_(Homo_sapiens) Alignment length: 1\n", + "Processing cH2B.8_(Homo_sapiens)\n", + "cH2B.8_(Homo_sapiens) Alignment length: 1\n", + "Processing cH2B.9_(Homo_sapiens)\n", + "cH2B.9_(Homo_sapiens) Alignment length: 1\n", + "Processing cH2B_(Homo_sapiens)\n", + "\t Node is internal, progressive alignment:\n", + "\t\t For cH2B_(Homo_sapiens) aligned 0 sequences\n", + "\t\t Adding child cH2B_(Homo_sapiens) aligned 1 sequences\n", + "\t\t Adding child cH2B_(Homo_sapiens) aligned 1 sequences\n", + "\t\t Adding child cH2B_(Homo_sapiens) aligned 2 sequences\n", + "\t\t Adding child cH2B_(Homo_sapiens) aligned 1 sequences\n", + "\t\t Adding child cH2B_(Homo_sapiens) aligned 1 sequences\n", + "\t\t Adding child cH2B_(Homo_sapiens) aligned 1 sequences\n", + "\t\t Adding child cH2B_(Homo_sapiens) aligned 1 sequences\n", + "\t\t Adding child cH2B_(Homo_sapiens) aligned 5 sequences\n", + "\t\t Adding child cH2B_(Homo_sapiens) aligned 1 sequences\n", + "\t\t Adding child cH2B_(Homo_sapiens) aligned 1 sequences\n", + "\t\t Adding child cH2B_(Homo_sapiens) aligned 1 sequences\n", + "\t\t Adding child cH2B_(Homo_sapiens) aligned 1 sequences\n", + "\t\t Adding child cH2B_(Homo_sapiens) aligned 1 sequences\n", + "\t\t Adding child cH2B_(Homo_sapiens) aligned 1 sequences\n", + "cH2B_(Homo_sapiens) Alignment length: 19\n", + "Processing cH2B.1_(Mus_musculus)\n", + "cH2B.1_(Mus_musculus) Alignment length: 2\n", + "Processing cH2B.E_(Mus_musculus)\n", + "cH2B.E_(Mus_musculus) Alignment length: 1\n", + "Processing cH2B_(Mus_musculus)\n", + "\t Node is internal, progressive alignment:\n", + "\t\t For cH2B_(Mus_musculus) aligned 1 sequences\n", + "\t\t Adding child cH2B_(Mus_musculus) aligned 2 sequences\n", + "\t\t Adding child cH2B_(Mus_musculus) aligned 1 sequences\n", + "cH2B_(Mus_musculus) Alignment length: 4\n", + "Processing cH2B_(Mammalia)\n", + "\t Node is internal, progressive alignment:\n", + "\t\t For cH2B_(Mammalia) aligned 16 sequences\n", + "\t\t Adding child cH2B_(Mammalia) aligned 19 sequences\n", + "\t\t Adding child cH2B_(Mammalia) aligned 4 sequences\n", + "cH2B_(Mammalia) Alignment length: 39\n", + "Processing cH2B_(Vertebrata)\n", + "\t Node is internal, progressive alignment:\n", + "\t\t For cH2B_(Vertebrata) aligned 2 sequences\n", + "\t\t Adding child cH2B_(Vertebrata) aligned 39 sequences\n", + "cH2B_(Vertebrata) Alignment length: 41\n", + "Processing cH2B_(Animals)\n", + "\t Node is internal, progressive alignment:\n", + "\t\t For cH2B_(Animals) aligned 4 sequences\n", + "\t\t Adding child cH2B_(Animals) aligned 41 sequences\n", + "cH2B_(Animals) Alignment length: 45\n", + "Processing cH2B_(Fungi)\n", + "cH2B_(Fungi) Alignment length: 2\n", + "Processing cH2B_(Chlorophyta)\n", + "cH2B_(Chlorophyta) Alignment length: 35\n", + "Processing cH2B_(Embryophyta)\n", + "cH2B_(Embryophyta) Alignment length: 4\n", + "Processing cH2B_(Plants)\n", + "\t Node is internal, progressive alignment:\n", + "\t\t For cH2B_(Plants) aligned 0 sequences\n", + "\t\t Adding child cH2B_(Plants) aligned 35 sequences\n", + "\t\t Adding child cH2B_(Plants) aligned 4 sequences\n", + "cH2B_(Plants) Alignment length: 39\n", + "Processing cH2B_(Protists)\n", + "cH2B_(Protists) Alignment length: 1\n", + "Processing cH2B\n", + "\t Node is internal, progressive alignment:\n", + "\t\t For cH2B aligned 0 sequences\n", + "\t\t Adding child cH2B aligned 45 sequences\n", + "\t\t Adding child cH2B aligned 2 sequences\n", + "\t\t Adding child cH2B aligned 39 sequences\n", + "\t\t Adding child cH2B aligned 1 sequences\n", + "cH2B Alignment length: 87\n", + "Processing CS_H2B_(Echinoidea)\n", + "CS_H2B_(Echinoidea) Alignment length: 1\n", + "Processing early_H2B_(Echinoidea)\n", + "early_H2B_(Echinoidea) Alignment length: 1\n", + "Processing gH2B\n", + "gH2B Alignment length: 8\n", + "Processing H2B.K_(Homo_sapiens)\n", + "H2B.K_(Homo_sapiens) Alignment length: 1\n", + "Processing H2B.K\n", + "\t Node is internal, progressive alignment:\n", + "\t\t For H2B.K aligned 19 sequences\n", + "\t\t Adding child H2B.K aligned 1 sequences\n", + "H2B.K Alignment length: 20\n", + "Processing H2B.L\n", + "H2B.L Alignment length: 11\n", + "Processing H2B.N_(Homo_sapiens)\n", + "H2B.N_(Homo_sapiens) Alignment length: 1\n", + "Processing H2B.N\n", + "\t Node is internal, progressive alignment:\n", + "\t\t For H2B.N aligned 13 sequences\n", + "\t\t Adding child H2B.N aligned 1 sequences\n", + "H2B.N Alignment length: 14\n", + "Processing H2B.O\n", + "H2B.O Alignment length: 3\n", + "Processing H2B.S\n", + "H2B.S Alignment length: 47\n", + "Processing H2B.V\n", + "H2B.V Alignment length: 1\n", + "Processing H2B.W.1_(Homo_sapiens)\n", + "H2B.W.1_(Homo_sapiens) Alignment length: 1\n", + "Processing H2B.W.2_(Homo_sapiens)\n", + "H2B.W.2_(Homo_sapiens) Alignment length: 1\n", + "Processing H2B.W_(Homo_sapiens)\n", + "\t Node is internal, progressive alignment:\n", + "\t\t For H2B.W_(Homo_sapiens) aligned 0 sequences\n", + "\t\t Adding child H2B.W_(Homo_sapiens) aligned 1 sequences\n", + "\t\t Adding child H2B.W_(Homo_sapiens) aligned 1 sequences\n", + "H2B.W_(Homo_sapiens) Alignment length: 2\n", + "Processing H2B.W\n", + "\t Node is internal, progressive alignment:\n", + "\t\t For H2B.W aligned 5 sequences\n", + "\t\t Adding child H2B.W aligned 2 sequences\n", + "H2B.W Alignment length: 7\n", + "Processing H2B.Z\n", + "H2B.Z Alignment length: 9\n", + "Processing late_H2B_(Echinoidea)\n", + "late_H2B_(Echinoidea) Alignment length: 2\n", + "Processing sperm_H2B_(Echinoidea)\n", + "sperm_H2B_(Echinoidea) Alignment length: 7\n", + "Processing H2B\n", + "\t Node is internal, progressive alignment:\n", + "\t\t For H2B aligned 0 sequences\n", + "\t\t Adding child H2B aligned 87 sequences\n", + "\t\t Adding child H2B aligned 1 sequences\n", + "\t\t Adding child H2B aligned 1 sequences\n", + "\t\t Adding child H2B aligned 8 sequences\n", + "\t\t Adding child H2B aligned 20 sequences\n", + "\t\t Adding child H2B aligned 11 sequences\n", + "\t\t Adding child H2B aligned 14 sequences\n", + "\t\t Adding child H2B aligned 3 sequences\n", + "\t\t Adding child H2B aligned 47 sequences\n", + "\t\t Adding child H2B aligned 1 sequences\n", + "\t\t Adding child H2B aligned 7 sequences\n", + "\t\t Adding child H2B aligned 9 sequences\n", + "\t\t Adding child H2B aligned 2 sequences\n", + "\t\t Adding child H2B aligned 7 sequences\n", + "H2B Alignment length: 218\n", + "Processing cenH3_(Homo_sapiens)\n", + "cenH3_(Homo_sapiens) Alignment length: 2\n", + "Processing cenH3_(Mammalia)\n", + "\t Node is internal, progressive alignment:\n", + "\t\t For cenH3_(Mammalia) aligned 1 sequences\n", + "\t\t Adding child cenH3_(Mammalia) aligned 2 sequences\n", + "cenH3_(Mammalia) Alignment length: 3\n", + "Processing cenH3_(Animals)\n", + "\t Node is internal, progressive alignment:\n", + "\t\t For cenH3_(Animals) aligned 2 sequences\n", + "\t\t Adding child cenH3_(Animals) aligned 3 sequences\n", + "cenH3_(Animals) Alignment length: 5\n", + "Processing cenH3_(Fungi)\n", + "cenH3_(Fungi) Alignment length: 2\n", + "Processing cenH3_(Plants)\n", + "cenH3_(Plants) Alignment length: 3\n", + "Processing cenH3_(Eukarya)\n", + "\t Node is internal, progressive alignment:\n", + "\t\t For cenH3_(Eukarya) aligned 5 sequences\n", + "\t\t Adding child cenH3_(Eukarya) aligned 5 sequences\n", + "\t\t Adding child cenH3_(Eukarya) aligned 2 sequences\n", + "\t\t Adding child cenH3_(Eukarya) aligned 3 sequences\n", + "cenH3_(Eukarya) Alignment length: 15\n", + "Processing cH3.1_(Homo_sapiens)\n", + "cH3.1_(Homo_sapiens) Alignment length: 10\n", + "Processing cH3.1_(Mammalia)\n", + "\t Node is internal, progressive alignment:\n", + "\t\t For cH3.1_(Mammalia) aligned 0 sequences\n", + "\t\t Adding child cH3.1_(Mammalia) aligned 10 sequences\n", + "cH3.1_(Mammalia) Alignment length: 10\n", + "Processing cH3.2_(Homo_sapiens)\n", + "cH3.2_(Homo_sapiens) Alignment length: 3\n", + "Processing cH3.2_(Mammalia)\n", + "\t Node is internal, progressive alignment:\n", + "\t\t For cH3.2_(Mammalia) aligned 0 sequences\n", + "\t\t Adding child cH3.2_(Mammalia) aligned 3 sequences\n", + "cH3.2_(Mammalia) Alignment length: 3\n", + "Processing cH3_(Mammalia)\n", + "\t Node is internal, progressive alignment:\n", + "\t\t For cH3_(Mammalia) aligned 0 sequences\n", + "\t\t Adding child cH3_(Mammalia) aligned 10 sequences\n", + "\t\t Adding child cH3_(Mammalia) aligned 3 sequences\n", + "cH3_(Mammalia) Alignment length: 13\n", + "Processing cH3_(Vertebrata)\n", + "\t Node is internal, progressive alignment:\n", + "\t\t For cH3_(Vertebrata) aligned 0 sequences\n", + "\t\t Adding child cH3_(Vertebrata) aligned 13 sequences\n", + "cH3_(Vertebrata) Alignment length: 13\n", + "Processing cH3_(Animals)\n", + "\t Node is internal, progressive alignment:\n", + "\t\t For cH3_(Animals) aligned 0 sequences\n", + "\t\t Adding child cH3_(Animals) aligned 13 sequences\n", + "cH3_(Animals) Alignment length: 13\n", + "Processing cH3_(Fungi)\n", + "cH3_(Fungi) Alignment length: 0\n", + "Processing cH3_(Chlorophyta)\n", + "cH3_(Chlorophyta) Alignment length: 0\n", + "Processing cH3_(Embryophyta)\n", + "cH3_(Embryophyta) Alignment length: 0\n", + "Processing cH3_(Plants)\n", + "\t Node is internal, progressive alignment:\n", + "\t\t For cH3_(Plants) aligned 0 sequences\n", + "\t\t Adding child cH3_(Plants) aligned 0 sequences\n", + "\t\t Adding child cH3_(Plants) aligned 0 sequences\n", + "cH3_(Plants) Alignment length: 0\n", + "Processing cH3_(Protists)\n", + "cH3_(Protists) Alignment length: 0\n", + "Processing cH3\n", + "\t Node is internal, progressive alignment:\n", + "\t\t For cH3 aligned 23 sequences\n", + "\t\t Adding child cH3 aligned 13 sequences\n", + "cH3 Alignment length: 36\n", + "Processing H3.1-like_(Plants)\n", + "H3.1-like_(Plants) Alignment length: 0\n", + "Processing H3.3_(Homo_sapiens)\n", + "H3.3_(Homo_sapiens) Alignment length: 2\n", + "Processing H3.3_(Animals)\n", + "\t Node is internal, progressive alignment:\n", + "\t\t For H3.3_(Animals) aligned 0 sequences\n", + "\t\t Adding child H3.3_(Animals) aligned 2 sequences\n", + "H3.3_(Animals) Alignment length: 2\n", + "Processing H3.3_(Ascomycota)\n", + "H3.3_(Ascomycota) Alignment length: 0\n", + "Processing H3.3_(Fungi)\n", + "\t Node is internal, progressive alignment:\n", + "\t\t For H3.3_(Fungi) aligned 0 sequences\n", + "\t\t Adding child H3.3_(Fungi) aligned 0 sequences\n", + "H3.3_(Fungi) Alignment length: 0\n", + "Processing H3.3_(Plants)\n", + "H3.3_(Plants) Alignment length: 0\n", + "Processing H3.3_(Protists)\n", + "H3.3_(Protists) Alignment length: 0\n", + "Processing H3.3\n", + "\t Node is internal, progressive alignment:\n", + "\t\t For H3.3 aligned 18 sequences\n", + "\t\t Adding child H3.3 aligned 2 sequences\n", + "H3.3 Alignment length: 20\n", + "Processing H3.X_(Homo_sapiens)\n", + "H3.X_(Homo_sapiens) Alignment length: 0\n", + "Processing H3.X_(Primates?)\n", + "\t Node is internal, progressive alignment:\n", + "\t\t For H3.X_(Primates?) aligned 0 sequences\n", + "\t\t Adding child H3.X_(Primates?) aligned 0 sequences\n", + "H3.X_(Primates?) Alignment length: 0\n", + "Processing H3.Y.1_(Homo_sapiens)\n", + "H3.Y.1_(Homo_sapiens) Alignment length: 1\n", + "Processing H3.Y.2_(Homo_sapiens)\n", + "H3.Y.2_(Homo_sapiens) Alignment length: 1\n", + "Processing H3.Y_(Homo_sapiens)\n", + "\t Node is internal, progressive alignment:\n", + "\t\t For H3.Y_(Homo_sapiens) aligned 0 sequences\n", + "\t\t Adding child H3.Y_(Homo_sapiens) aligned 1 sequences\n", + "\t\t Adding child H3.Y_(Homo_sapiens) aligned 1 sequences\n", + "H3.Y_(Homo_sapiens) Alignment length: 2\n", + "Processing H3.Y_(Primates?)\n", + "\t Node is internal, progressive alignment:\n", + "\t\t For H3.Y_(Primates?) aligned 4 sequences\n", + "\t\t Adding child H3.Y_(Primates?) aligned 2 sequences\n", + "H3.Y_(Primates?) Alignment length: 6\n", + "Processing H3.3-like_(Animals)\n", + "\t Node is internal, progressive alignment:\n", + "\t\t For H3.3-like_(Animals) aligned 0 sequences\n", + "\t\t Adding child H3.3-like_(Animals) aligned 0 sequences\n", + "\t\t Adding child H3.3-like_(Animals) aligned 6 sequences\n", + "H3.3-like_(Animals) Alignment length: 6\n", + "Processing H3_(Lilly???)\n", + "H3_(Lilly???) Alignment length: 0\n", + "Processing TS H3.10\n", + "TS H3.10 Alignment length: 0\n", + "Processing H3.3-like_(Plants)\n", + "\t Node is internal, progressive alignment:\n", + "\t\t For H3.3-like_(Plants) aligned 0 sequences\n", + "\t\t Adding child H3.3-like_(Plants) aligned 0 sequences\n", + "\t\t Adding child H3.3-like_(Plants) aligned 0 sequences\n", + "H3.3-like_(Plants) Alignment length: 0\n", + "Processing H3.3-like?\n", + "\t Node is internal, progressive alignment:\n", + "\t\t For H3.3-like? aligned 0 sequences\n", + "\t\t Adding child H3.3-like? aligned 6 sequences\n", + "H3.3-like? Alignment length: 6\n", + "Processing H3.4_(Homo_sapiens)\n", + "H3.4_(Homo_sapiens) Alignment length: 1\n", + "Processing H3.4_(Mammalia)\n", + "\t Node is internal, progressive alignment:\n", + "\t\t For H3.4_(Mammalia) aligned 1 sequences\n", + "\t\t Adding child H3.4_(Mammalia) aligned 1 sequences\n", + "H3.4_(Mammalia) Alignment length: 2\n", + "Processing H3.5_(Homo_sapiens)\n", + "H3.5_(Homo_sapiens) Alignment length: 1\n", + "Processing H3.5_(Hominidae)\n", + "\t Node is internal, progressive alignment:\n", + "\t\t For H3.5_(Hominidae) aligned 1 sequences\n", + "\t\t Adding child H3.5_(Hominidae) aligned 1 sequences\n", + "H3.5_(Hominidae) Alignment length: 2\n", + "Processing H3.6_(Mammals?)?\n", + "H3.6_(Mammals?)? Alignment length: 0\n", + "Processing H3.7_(Mammals?)?\n", + "H3.7_(Mammals?)? Alignment length: 0\n", + "Processing H3.8_(Mammals?)?\n", + "H3.8_(Mammals?)? Alignment length: 0\n", + "Processing H3.B_(Giardia?)\n", + "H3.B_(Giardia?) Alignment length: 0\n", + "Processing H3.P_(Moneuplotes?)\n", + "H3.P_(Moneuplotes?) Alignment length: 0\n", + "Processing H3.V_(Trypanosomatidae)\n", + "H3.V_(Trypanosomatidae) Alignment length: 0\n", + "Processing H3\n", + "\t Node is internal, progressive alignment:\n", + "\t\t For H3 aligned 0 sequences\n", + "\t\t Adding child H3 aligned 15 sequences\n", + "\t\t Adding child H3 aligned 36 sequences\n", + "\t\t Adding child H3 aligned 20 sequences\n", + "\t\t Adding child H3 aligned 6 sequences\n", + "\t\t Adding child H3 aligned 2 sequences\n", + "\t\t Adding child H3 aligned 2 sequences\n", + "H3 Alignment length: 81\n", + "Processing cH4_(Homo_sapiens)\n", + "cH4_(Homo_sapiens) Alignment length: 14\n", + "Processing cH4\n", + "\t Node is internal, progressive alignment:\n", + "\t\t For cH4 aligned 12 sequences\n", + "\t\t Adding child cH4 aligned 14 sequences\n", + "cH4 Alignment length: 26\n", + "Processing H4\n", + "\t Node is internal, progressive alignment:\n", + "\t\t For H4 aligned 0 sequences\n", + "\t\t Adding child H4 aligned 26 sequences\n", + "H4 Alignment length: 26\n", + "Processing Viral\n", + "Viral Alignment length: 0\n", + "Processing \n" + ] + } + ], + "source": [ + "generate_draft_seeds(\n", + " hist_tree,\n", + " sequence_df,\n", + " \"/home/l_singh/_scratch/hdb/project_dir/histonedb/CURATED_SET/draft_seeds\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "2569fada-becb-4419-8926-e0c131b4b7bf", + "metadata": {}, + "source": [ + "# Close connection" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "b7719968-002f-4229-846d-e4a8f180ec6b", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "cursor.close()\n", + "conn.close()\n", + "tunnel.stop()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "763540a8-72aa-4529-88dc-95ccee1dfdff", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".conda-histdb_env", + "language": "python", + "name": "conda-env-.conda-histdb_env-py" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.1" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/CURATED_SET/curated_service/curatedDB/save_curatedDB_toFiles.ipynb b/CURATED_SET/curated_service/curatedDB/save_curatedDB_toFiles.ipynb index 9296509..063fff6 100644 --- a/CURATED_SET/curated_service/curatedDB/save_curatedDB_toFiles.ipynb +++ b/CURATED_SET/curated_service/curatedDB/save_curatedDB_toFiles.ipynb @@ -4661,7 +4661,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 15, "id": "b7719968-002f-4229-846d-e4a8f180ec6b", "metadata": { "tags": [] diff --git a/CURATED_SET/draft_seeds/Archaeal.fasta b/CURATED_SET/draft_seeds/Archaeal.fasta old mode 100755 new mode 100644 diff --git a/CURATED_SET/draft_seeds/CS_H2B_(Echinoidea).fasta b/CURATED_SET/draft_seeds/CS_H2B_(Echinoidea).fasta new file mode 100644 index 0000000..2996cb2 --- /dev/null +++ b/CURATED_SET/draft_seeds/CS_H2B_(Echinoidea).fasta @@ -0,0 +1,4 @@ +>Psammechinus|AAB48832.1|CS_H2B_(Echinoidea) organism=Psammechinus miliaris phylum=Echinodermata class=Echinoidea +MPAKGAATKGEKKQAVKSKAMASSRTGDKKRRRRRLESYNIYIYKVLKQVHPDTGISSKA +MSIMNSFVNDIFERIAAEASRLAQYNKKSTISSREVQTAVRLLLPGELAKHAVSEGTKAV +TKYTTSR diff --git a/CURATED_SET/draft_seeds/H1.0.fasta b/CURATED_SET/draft_seeds/H1.0.fasta new file mode 100644 index 0000000..7b62eec --- /dev/null +++ b/CURATED_SET/draft_seeds/H1.0.fasta @@ -0,0 +1,75 @@ +>Thalassiosira|EED88841.1|H1.0 organism=Thalassiosira pseudonana CCMP1335 phylum=Bacillariophyta class=Coscinodiscophyceae +----------------------------------------MSYKAGIAKAITELKDRTGS +SSIAIKKHMQANLPADKKWMNATFLKALKDMVASGELVKTK-----ASYKLSA------- +-------VAKQKASSAGKPKKAPKKKA---------------------APKKTAPKKKAA +PKKKTATKKAATAKKPAAKKATTAK---KSTTKKTAKK- +>Esox|XP_010887142.1|H1.0 organism=Esox lucius phylum=Chordata class=Actinopteri +---MAETVAAPAP-------------KAKKAKAPKKPASHPKYSDMIKAAVQADKSRGGA +SRQSVQKYIKSHYKVGDN-ADSQIKLSLKRMVSGGLLRHTKGIGASGSFKLAKAEDTKKA +PKPKPVVKAKKSPVKAAKPKKVAKPKKVVKSPAKAKKAKVAVKKVKK-SPKKVAPKPKKV +VK-KVKAAKPAKAVKP--KKAKAAKPKPKAAAKKAAKKK +>Salmo|ACH70944.1|H1.0 organism=Salmo salar phylum=Chordata class=Actinopteri +---MAETAAAPAP-------------KAKKAKAPKKPASHPKYSDMIKAAVHADKSRGGA +SRQSVQKYIKSHYKVGDN-ADSQIKLSLKRMVSEGVLRHTKGIGASGSFKLAKAEDTKKA +PKVKAVVKAKKSPVKSAKPKKVAKPKKVAKSPAKAKKAKVAVKKVKK-SPKKAAPKPKKV +AK-KTKVAKPAKATKP--KKAKAAKPKPKAAAKKAAKKK +>Salmo|ACM08534.1|H1.0 organism=Salmo salar phylum=Chordata class=Actinopteri +---------------------------------------------MIKAAVHADKSRGGA +SRQSVQKYIKSHYKVGDN-ADSQIKLSLKRMVSEGVLRHTKGIGASGSFKLAKAEDTKKA +PKVKAVVKAKKSPVKSAKPKKVAKPKKVAKSPAKAKKAKVAVKKVKK-SPKKAAPKPKKV +VK-KTKVAKPAKATKP--KKAKAAKPKPKAAAKKAAKKK +>Salmo|ACM09660.1|H1.0 organism=Salmo salar phylum=Chordata class=Actinopteri +---------------------------------------------MIKAAVHADKSRGGA +SRQSVQKYIKSHYKVGDN-ADSQIKLSLKRMVSEGVLRHTKGIGASGSFKLAKAEDTKKA +PKVKAVVKAKKSPVKSAKPKKVAKPKKVAKSPAKAKKAKVAVKKVKK-SPKKAAPKPKKV +AK-KTKVAKPAKATKP--KKAKAAKPKPKAAAKKAAKKK +>Xenopus|NP_998836.1|H1.0 organism=Xenopus tropicalis phylum=Chordata class=Amphibia +--MTENSAAAPAG-------------KPKRSKASKKATDHPKYSDMILAAVQAEKSRSGS +SRQSIQKYIKNHYKVGEN-ADSQIKLSIKRLVTSGTLKQTKGVGASGSFRLAKADEGKKP +AK-----KPKKEIKKAASPKKAAKPKKAAKSPAKAKKPKVAEKKVKKPAKKKPAPSPKKA +KKTKTVKAKPVRASRV--KKAKPSKPKAKASPKKSGRKK +>Cairina|P06513.2|H1.0 organism=Cairina moschata phylum=Chordata class=Aves +--MTDSPIPAPAPAA-----------KPKRAKAPRKPASHPSYSEMIVAAIRAEKSRGGS +SRQSIQKYVKSHYKVGQH-ADLQIKLSIRRLLAAGVLKQTKGVGASGSYRLAKGDKAKKS +PAGRK--KKKKAARRSTSPRKAARPRK---ARSPAKKPKAA---ARK-ARKKSRASPKKA +KKPKTVKAKSLKTSKV--KKAKRSKPRAKSGARKSPKKK +>Gallus|NP_001038138.1|H1.0 organism=Gallus gallus phylum=Chordata class=Aves +--MTESLVLSPAPA------------KPKRVKASRRSASHPTYSEMIAAAIRAEKSRGGS +SRQSIQKYIKSHYKVGHN-ADLQIKLSIRRLLAAGVLKQTKGVGASGSFRLAKSDKAKRS +PG-----KKKKAVRRSTSPKKAARPRK---ARSPAKKPKAT---ARK-ARKKSRASPKKA +KKPKTVKAKSRKASKA--KKVKRSKPRAKSGARKSPKKK +>Taeniopygia|XP_004175972.1|H1.0 organism=Taeniopygia guttata phylum=Chordata class=Aves +--MTRLPSMCKASSSLMSCVHLCAFPPPKRARSARRPAAHPAYSDMVTAAVRADKSRGGA +SRQSIQKYVKSNYKVGQN-ADVQIRLAIRRLLAAGVLKQTKGVGASGSFRLAKAGKAKRS +PSR----KRKKAARRSTSPRKTARSRK---ARSPAKKPKSA---ARK-ARKKSRS-PKKA +KKPKTVKAKSLKASKP--KKARRSKSRAKSGARKSPKKK +>Bos|NP_001069955.1|H1.0 organism=Bos taurus phylum=Chordata class=Mammalia +--MTENSTSTPAA-------------KPKRAKASKKSTDHPKYSDMIVAAIQAEKNRAGS +SRQSIQKYIKSHYKVGEN-ADSQIKLSIKRLVTTGVLKQTKGVGASGSFRLAKSDEPKRS +VAFK---KTKKEVKKVATPKKAAKPKKAA-SKAPSKKPKATP--VKK-AKKKPAATPKKT +KKPKTVKAKPVKASKP--KKTKPVKPKAKSSAKRTGKKK +>Mus|NP_032223.2|H1.0 organism=Mus musculus phylum=Chordata class=Mammalia +--MTENSTSAPAA-------------KPKRAKASKKSTDHPKYSDMIVAAIQAEKNRAGS +SRQSIQKYIKSHYKVGEN-ADSQIKLSIKRLVTTGVLKQTKGVGASGSFRLAKGDEPKRS +VAFK---KTKKEVKKVATPKKAAKPKKAA-SKAPSKKPKATP--VKK-AKKKPAATPKKA +KKPKVVKVKPVKASKP--KKAKTVKPKAKSSAKRASKKK +>Pongo|NP_001127680.1|H1.0 organism=Pongo abelii phylum=Chordata class=Mammalia +--MTENSTSAPAA-------------KPKRAKASKKSTDHPKYSDMVVAAIQAEKNRAGS +SRQSIQKYIKSHYKVGEN-ADSQIKLSIKRLVTTGVLKQTKGVGASGSFRLAKSDEPKKS +VAFK---KTKKEIKKVATPKKASKPKKAA-SKAPTKKPKATP--VKK-AKKKLAATPKKA +KKPKTVKAKPVKASKP--KKAKPVKPKAKSSAKRAGKKK +>Rattus|NP_036710.1|H1.0 organism=Rattus norvegicus phylum=Chordata class=Mammalia +--MTENSTSTPAA-------------KPKRAKAAKKSTDHPKYSDMIVAAIQAEKNRAGS +SRQSIQKYIKSHYKVGEN-ADSQIKLSIKRLVTTGVLKQTKGVGASGSFRLAKGDEPKRS +VAFK---KTKKEVKKVATPKKAAKPKKAA-SKAPSKKPKATP--VKK-AKKKPAATPKKA +KKPKIVKVKPVKASKP--KKAKPVKPKAKSSAKRASKKK +>Strongylocentrotus|NP_999722.1|H1.0 organism=Strongylocentrotus purpuratus phylum=Echinodermata class=Echinoidea +MADTDAAPAAPAPSTPKKA-------AKKKASKPKTPASHPKYSDMIASALESLKEKKGS +SRQAILKYVKANFTVGDN-ANVHIKQALKRGVTSGQLRHVKGSGASGSFLLAEKTK---- +-------TPKKAAAKKATPKKKPAAKK---TKKPA---------AKK-ATKKPAKKP--A +AKKKVAKPAAKKAAKPVAKKATPKKKVVKKAAKGKGKKK +>Homo|NP_005309.1|H1.0_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +--MTENSTSAPAA-------------KPKRAKASKKSTDHPKYSDMIVAAIQAEKNRAGS +SRQSIQKYIKSHYKVGEN-ADSQIKLSIKRLVTTGVLKQTKGVGASGSFRLAKSDEPKKS +VAFK---KTKKEIKKVATPKKASKPKKAA-SKAPTKKPKATP--VKK-AKKKLAATPKKA +KKPKTVKAKPVKASKP--KKAKPVKPKAKSSAKRAGKKK diff --git a/CURATED_SET/draft_seeds/H1.0_(Homo_sapiens).fasta b/CURATED_SET/draft_seeds/H1.0_(Homo_sapiens).fasta new file mode 100644 index 0000000..82b91c4 --- /dev/null +++ b/CURATED_SET/draft_seeds/H1.0_(Homo_sapiens).fasta @@ -0,0 +1,5 @@ +>Homo|NP_005309.1|H1.0_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MTENSTSAPAAKPKRAKASKKSTDHPKYSDMIVAAIQAEKNRAGSSRQSIQKYIKSHYKV +GENADSQIKLSIKRLVTTGVLKQTKGVGASGSFRLAKSDEPKKSVAFKKTKKEIKKVATP +KKASKPKKAASKAPTKKPKATPVKKAKKKLAATPKKAKKPKTVKAKPVKASKPKKAKPVK +PKAKSSAKRAGKKK diff --git a/CURATED_SET/draft_seeds/H1.0_only.fasta b/CURATED_SET/draft_seeds/H1.0_only.fasta new file mode 100644 index 0000000..5da7fb4 --- /dev/null +++ b/CURATED_SET/draft_seeds/H1.0_only.fasta @@ -0,0 +1,70 @@ +>Thalassiosira|EED88841.1|H1.0 organism=Thalassiosira pseudonana CCMP1335 phylum=Bacillariophyta class=Coscinodiscophyceae +----------------------------------------MSYKAGIAKAITELKDRTGS +SSIAIKKHMQANLPADKKWMNATFLKALKDMVASGELVKTK-----ASYKLSA------- +-------VAKQKASSAGKPKKAPKKKA---------------------APKKTAPKKKAA +PKKKTATKKAATAKKPAAKKATTAK---KSTTKKTAKK- +>Esox|XP_010887142.1|H1.0 organism=Esox lucius phylum=Chordata class=Actinopteri +---MAETVAAPAP-------------KAKKAKAPKKPASHPKYSDMIKAAVQADKSRGGA +SRQSVQKYIKSHYKVGDN-ADSQIKLSLKRMVSGGLLRHTKGIGASGSFKLAKAEDTKKA +PKPKPVVKAKKSPVKAAKPKKVAKPKKVVKSPAKAKKAKVAVKKVKK-SPKKVAPKPKKV +VK-KVKAAKPAKAVKP--KKAKAAKPKPKAAAKKAAKKK +>Salmo|ACH70944.1|H1.0 organism=Salmo salar phylum=Chordata class=Actinopteri +---MAETAAAPAP-------------KAKKAKAPKKPASHPKYSDMIKAAVHADKSRGGA +SRQSVQKYIKSHYKVGDN-ADSQIKLSLKRMVSEGVLRHTKGIGASGSFKLAKAEDTKKA +PKVKAVVKAKKSPVKSAKPKKVAKPKKVAKSPAKAKKAKVAVKKVKK-SPKKAAPKPKKV +AK-KTKVAKPAKATKP--KKAKAAKPKPKAAAKKAAKKK +>Salmo|ACM08534.1|H1.0 organism=Salmo salar phylum=Chordata class=Actinopteri +---------------------------------------------MIKAAVHADKSRGGA +SRQSVQKYIKSHYKVGDN-ADSQIKLSLKRMVSEGVLRHTKGIGASGSFKLAKAEDTKKA +PKVKAVVKAKKSPVKSAKPKKVAKPKKVAKSPAKAKKAKVAVKKVKK-SPKKAAPKPKKV +VK-KTKVAKPAKATKP--KKAKAAKPKPKAAAKKAAKKK +>Salmo|ACM09660.1|H1.0 organism=Salmo salar phylum=Chordata class=Actinopteri +---------------------------------------------MIKAAVHADKSRGGA +SRQSVQKYIKSHYKVGDN-ADSQIKLSLKRMVSEGVLRHTKGIGASGSFKLAKAEDTKKA +PKVKAVVKAKKSPVKSAKPKKVAKPKKVAKSPAKAKKAKVAVKKVKK-SPKKAAPKPKKV +AK-KTKVAKPAKATKP--KKAKAAKPKPKAAAKKAAKKK +>Xenopus|NP_998836.1|H1.0 organism=Xenopus tropicalis phylum=Chordata class=Amphibia +--MTENSAAAPAG-------------KPKRSKASKKATDHPKYSDMILAAVQAEKSRSGS +SRQSIQKYIKNHYKVGEN-ADSQIKLSIKRLVTSGTLKQTKGVGASGSFRLAKADEGKKP +AK-----KPKKEIKKAASPKKAAKPKKAAKSPAKAKKPKVAEKKVKKPAKKKPAPSPKKA +KKTKTVKAKPVRASRV--KKAKPSKPKAKASPKKSGRKK +>Cairina|P06513.2|H1.0 organism=Cairina moschata phylum=Chordata class=Aves +--MTDSPIPAPAPAA-----------KPKRAKAPRKPASHPSYSEMIVAAIRAEKSRGGS +SRQSIQKYVKSHYKVGQH-ADLQIKLSIRRLLAAGVLKQTKGVGASGSYRLAKGDKAKKS +PAGRK--KKKKAARRSTSPRKAARPRK---ARSPAKKPKAA---ARK-ARKKSRASPKKA +KKPKTVKAKSLKTSKV--KKAKRSKPRAKSGARKSPKKK +>Gallus|NP_001038138.1|H1.0 organism=Gallus gallus phylum=Chordata class=Aves +--MTESLVLSPAPA------------KPKRVKASRRSASHPTYSEMIAAAIRAEKSRGGS +SRQSIQKYIKSHYKVGHN-ADLQIKLSIRRLLAAGVLKQTKGVGASGSFRLAKSDKAKRS +PG-----KKKKAVRRSTSPKKAARPRK---ARSPAKKPKAT---ARK-ARKKSRASPKKA +KKPKTVKAKSRKASKA--KKVKRSKPRAKSGARKSPKKK +>Taeniopygia|XP_004175972.1|H1.0 organism=Taeniopygia guttata phylum=Chordata class=Aves +--MTRLPSMCKASSSLMSCVHLCAFPPPKRARSARRPAAHPAYSDMVTAAVRADKSRGGA +SRQSIQKYVKSNYKVGQN-ADVQIRLAIRRLLAAGVLKQTKGVGASGSFRLAKAGKAKRS +PSR----KRKKAARRSTSPRKTARSRK---ARSPAKKPKSA---ARK-ARKKSRS-PKKA +KKPKTVKAKSLKASKP--KKARRSKSRAKSGARKSPKKK +>Bos|NP_001069955.1|H1.0 organism=Bos taurus phylum=Chordata class=Mammalia +--MTENSTSTPAA-------------KPKRAKASKKSTDHPKYSDMIVAAIQAEKNRAGS +SRQSIQKYIKSHYKVGEN-ADSQIKLSIKRLVTTGVLKQTKGVGASGSFRLAKSDEPKRS +VAFK---KTKKEVKKVATPKKAAKPKKAA-SKAPSKKPKATP--VKK-AKKKPAATPKKT +KKPKTVKAKPVKASKP--KKTKPVKPKAKSSAKRTGKKK +>Mus|NP_032223.2|H1.0 organism=Mus musculus phylum=Chordata class=Mammalia +--MTENSTSAPAA-------------KPKRAKASKKSTDHPKYSDMIVAAIQAEKNRAGS +SRQSIQKYIKSHYKVGEN-ADSQIKLSIKRLVTTGVLKQTKGVGASGSFRLAKGDEPKRS +VAFK---KTKKEVKKVATPKKAAKPKKAA-SKAPSKKPKATP--VKK-AKKKPAATPKKA +KKPKVVKVKPVKASKP--KKAKTVKPKAKSSAKRASKKK +>Pongo|NP_001127680.1|H1.0 organism=Pongo abelii phylum=Chordata class=Mammalia +--MTENSTSAPAA-------------KPKRAKASKKSTDHPKYSDMVVAAIQAEKNRAGS +SRQSIQKYIKSHYKVGEN-ADSQIKLSIKRLVTTGVLKQTKGVGASGSFRLAKSDEPKKS +VAFK---KTKKEIKKVATPKKASKPKKAA-SKAPTKKPKATP--VKK-AKKKLAATPKKA +KKPKTVKAKPVKASKP--KKAKPVKPKAKSSAKRAGKKK +>Rattus|NP_036710.1|H1.0 organism=Rattus norvegicus phylum=Chordata class=Mammalia +--MTENSTSTPAA-------------KPKRAKAAKKSTDHPKYSDMIVAAIQAEKNRAGS +SRQSIQKYIKSHYKVGEN-ADSQIKLSIKRLVTTGVLKQTKGVGASGSFRLAKGDEPKRS +VAFK---KTKKEVKKVATPKKAAKPKKAA-SKAPSKKPKATP--VKK-AKKKPAATPKKA +KKPKIVKVKPVKASKP--KKAKPVKPKAKSSAKRASKKK +>Strongylocentrotus|NP_999722.1|H1.0 organism=Strongylocentrotus purpuratus phylum=Echinodermata class=Echinoidea +MADTDAAPAAPAPSTPKKA-------AKKKASKPKTPASHPKYSDMIASALESLKEKKGS +SRQAILKYVKANFTVGDN-ANVHIKQALKRGVTSGQLRHVKGSGASGSFLLAEKTK---- +-------TPKKAAAKKATPKKKPAAKK---TKKPA---------AKK-ATKKPAKKP--A +AKKKVAKPAAKKAAKPVAKKATPKKKVVKKAAKGKGKKK diff --git a/CURATED_SET/draft_seeds/H1.1.fasta b/CURATED_SET/draft_seeds/H1.1.fasta new file mode 100644 index 0000000..8130291 --- /dev/null +++ b/CURATED_SET/draft_seeds/H1.1.fasta @@ -0,0 +1,5 @@ +>Homo|NP_005316.1|H1.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSETVPPAPAASAAPEKPLAGKKAKKPAKAAAASKKKPAGPSVSELIVQAASSSKERGGV +SLAALKKALAAAGYDVEKNNSRIKLGIKSLVSKGTLVQTKGTGASGSFKLNKKASSVETK +PGASKVATKTKATGASKKLKKATGASKKSVKTPKKAKKPAATRKSSKNPKKPKTVKPKKV +AKSPAKAKAVKPKAAKARVTKPKTAKPKKAAPKKK diff --git a/CURATED_SET/draft_seeds/H1.10.fasta b/CURATED_SET/draft_seeds/H1.10.fasta new file mode 100644 index 0000000..b3c3d92 --- /dev/null +++ b/CURATED_SET/draft_seeds/H1.10.fasta @@ -0,0 +1,30 @@ +>Caligus|ACO10502.1|H1.10 organism=Caligus rogercresseyi phylum=Arthropoda class=Hexanauplia +MVKSEVEVTINAEEAPV--------ASSLKPAK---K---------KKNKKKKNKPGKYS +VLVLDAVKKLNERSGSSLVKIYNEAKKASWFDEQNGRTYLRYSIRALVLNNTLIQVKGMG +ANGSFRLNEDKFAKGVPKKTQS--KPAKNTTKTAKASTTKKATV-VKAKSSPKKAPDAKM +PAAKLKKLGVKKVSAAQ---K------NKKPKKASKPPAKS-PRKK-- +>Oncorhynchus|ACO07616.1|H1.10 organism=Oncorhynchus mykiss phylum=Chordata class=Actinopteri +MVKSEVDVTINAEEAPV--------ASGPKPAK---K---------KKKKKKKNKPGKYS +VLVLDAVKKLNERSGSSLVKIYNEAKKASWFDEQNGRTYLRYSIRALVLNNTLIQVKGMG +ANGSFRLNEDKFAKEVPKKTQS--KPAKTTTKTAKASTTKKATVKPKAKSSPKKAPDAKK +PAAKMKKLGVKKVIAAQ---K------NKKPKKASKPPAKS-PRKK-- +>Osmerus|ACO09903.1|H1.10 organism=Osmerus mordax phylum=Chordata class=Actinopteri +-MASDTEV-VPAAEAPVAAKSKKRTATKPKPKA---KPATVATSSAKKKKRKGKGPGKYS +VLVVDAIKQLGERNGSSLAKIYNKAREAIWFDQQHGRTYLRYSIRALVLNDTLIQVKGTG +ANGSFKLNKKKFETKAPKKAPTPVKAVKTKAPAKKAKAAIKTKAKPKASPKKKSTPK-KK +PAAKPKKLAAKKATPVKS--K------KPKPKKASKPAAKS-PRKK-- +>Salmo|ACM09455.1|H1.10 organism=Salmo salar phylum=Chordata class=Actinopteri +MVKSEVEVTINAEEAPV--------ASSLKPAK---K---------KKNKKKKNKPGKYS +VLVLDAVKKLNERSGSSLVKIYNEAKKASWFDEQNGRTYLRYSIRALVLNNTLIQVKGMG +ANGSFRLNEDKFAKGVPKKTQS--KPAKNTTKTAKASTTKKATV-VKAKSSPKKAPDAKM +PAAKLKKLGVKKVSAAQ---K------NKKPKKASKPPAKS-PRKK-- +>Xenopus|NP_001080265.1|H1.10 organism=Xenopus laevis phylum=Chordata class=Amphibia +-MALELEENLHSTEEEDEEEEEEEEGDEMRSRSTRNKGGAASSSGNKKKKKKKNQPGRYS +QLVVDTIRKLGERNGSSLAKIYSEAKKVSWFDQQNGRTYLKYSIKALVQNDTLLQVKGVG +ANGSFRLNKKKLE-GLPYDKKP--PPAKPSSSSSNKKQQQQGPSSSPSKSHKKAKPKAKA +EKEKPKTSSAKAKSPKKSAAK------GKKMKKGAKPSVRKAPKSKKA +>Homo|NP_006017.1|H1.10_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +-MSVELEEALPVTTAEGMAKKVTKAG----------GSAALSPSKKRKNSKKKNQPGKYS +QLVVETIRRLGERNGSSLAKIYTEAKKVPWFDQQNGRTYLKYSIKALVQNDTLLQVKGTG +ANGSFKLNRKKLEGGGERRGAP--AAATAPAPTAH-KAKKAAPGAAGSRRADKKPARGQK +PEQRSHKKGAGAKKDKGGKAKKTAAAGGKKVKKAAKPSVPKVPKGRK- diff --git a/CURATED_SET/draft_seeds/H1.10_(Homo_sapiens).fasta b/CURATED_SET/draft_seeds/H1.10_(Homo_sapiens).fasta new file mode 100644 index 0000000..b3ab6e1 --- /dev/null +++ b/CURATED_SET/draft_seeds/H1.10_(Homo_sapiens).fasta @@ -0,0 +1,5 @@ +>Homo|NP_006017.1|H1.10_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSVELEEALPVTTAEGMAKKVTKAGGSAALSPSKKRKNSKKKNQPGKYSQLVVETIRRLG +ERNGSSLAKIYTEAKKVPWFDQQNGRTYLKYSIKALVQNDTLLQVKGTGANGSFKLNRKK +LEGGGERRGAPAAATAPAPTAHKAKKAAPGAAGSRRADKKPARGQKPEQRSHKKGAGAKK +DKGGKAKKTAAAGGKKVKKAAKPSVPKVPKGRK diff --git a/CURATED_SET/draft_seeds/H1.10_only.fasta b/CURATED_SET/draft_seeds/H1.10_only.fasta new file mode 100644 index 0000000..f09763c --- /dev/null +++ b/CURATED_SET/draft_seeds/H1.10_only.fasta @@ -0,0 +1,25 @@ +>Caligus|ACO10502.1|H1.10 organism=Caligus rogercresseyi phylum=Arthropoda class=Hexanauplia +MVKSEVEVTINAEEAPV--------ASSLKPAK---K---------KKNKKKKNKPGKYS +VLVLDAVKKLNERSGSSLVKIYNEAKKASWFDEQNGRTYLRYSIRALVLNNTLIQVKGMG +ANGSFRLNEDKFAKGVPKKTQS--KPAKNTTKTAKASTTKKATV-VKAKSSPKKAPDAKM +PAAKLKKLGVKKVSAAQ---KNKKPKKASKPPAKS-PRKK-- +>Oncorhynchus|ACO07616.1|H1.10 organism=Oncorhynchus mykiss phylum=Chordata class=Actinopteri +MVKSEVDVTINAEEAPV--------ASGPKPAK---K---------KKKKKKKNKPGKYS +VLVLDAVKKLNERSGSSLVKIYNEAKKASWFDEQNGRTYLRYSIRALVLNNTLIQVKGMG +ANGSFRLNEDKFAKEVPKKTQS--KPAKTTTKTAKASTTKKATVKPKAKSSPKKAPDAKK +PAAKMKKLGVKKVIAAQ---KNKKPKKASKPPAKS-PRKK-- +>Osmerus|ACO09903.1|H1.10 organism=Osmerus mordax phylum=Chordata class=Actinopteri +-MASDTEV-VPAAEAPVAAKSKKRTATKPKPKA---KPATVATSSAKKKKRKGKGPGKYS +VLVVDAIKQLGERNGSSLAKIYNKAREAIWFDQQHGRTYLRYSIRALVLNDTLIQVKGTG +ANGSFKLNKKKFETKAPKKAPTPVKAVKTKAPAKKAKAAIKTKAKPKASPKKKSTPK-KK +PAAKPKKLAAKKATPVKS--KKPKPKKASKPAAKS-PRKK-- +>Salmo|ACM09455.1|H1.10 organism=Salmo salar phylum=Chordata class=Actinopteri +MVKSEVEVTINAEEAPV--------ASSLKPAK---K---------KKNKKKKNKPGKYS +VLVLDAVKKLNERSGSSLVKIYNEAKKASWFDEQNGRTYLRYSIRALVLNNTLIQVKGMG +ANGSFRLNEDKFAKGVPKKTQS--KPAKNTTKTAKASTTKKATV-VKAKSSPKKAPDAKM +PAAKLKKLGVKKVSAAQ---KNKKPKKASKPPAKS-PRKK-- +>Xenopus|NP_001080265.1|H1.10 organism=Xenopus laevis phylum=Chordata class=Amphibia +-MALELEENLHSTEEEDEEEEEEEEGDEMRSRSTRNKGGAASSSGNKKKKKKKNQPGRYS +QLVVDTIRKLGERNGSSLAKIYSEAKKVSWFDQQNGRTYLKYSIKALVQNDTLLQVKGVG +ANGSFRLNKKKLE-GLPYDKKP--PPAKPSSSSSNKKQQQQGPSSSPSKSHKKAKPKAKA +EKEKPKTSSAKAKSPKKSAAKGKKMKKGAKPSVRKAPKSKKA diff --git a/CURATED_SET/draft_seeds/H1.1_(Homo_sapiens).fasta b/CURATED_SET/draft_seeds/H1.1_(Homo_sapiens).fasta new file mode 100644 index 0000000..8130291 --- /dev/null +++ b/CURATED_SET/draft_seeds/H1.1_(Homo_sapiens).fasta @@ -0,0 +1,5 @@ +>Homo|NP_005316.1|H1.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSETVPPAPAASAAPEKPLAGKKAKKPAKAAAASKKKPAGPSVSELIVQAASSSKERGGV +SLAALKKALAAAGYDVEKNNSRIKLGIKSLVSKGTLVQTKGTGASGSFKLNKKASSVETK +PGASKVATKTKATGASKKLKKATGASKKSVKTPKKAKKPAATRKSSKNPKKPKTVKPKKV +AKSPAKAKAVKPKAAKARVTKPKTAKPKKAAPKKK diff --git a/CURATED_SET/draft_seeds/H1.1_only.fasta b/CURATED_SET/draft_seeds/H1.1_only.fasta new file mode 100644 index 0000000..e69de29 diff --git a/CURATED_SET/draft_seeds/H1.2.fasta b/CURATED_SET/draft_seeds/H1.2.fasta new file mode 100644 index 0000000..9a60c84 --- /dev/null +++ b/CURATED_SET/draft_seeds/H1.2.fasta @@ -0,0 +1,5 @@ +>Homo|NP_005310.1|H1.2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSETAPAAPAAAPPAEKAPVKKKAAKKAGGTPRKASGPPVSELITKAVAASKERSGVSLA +ALKKALAAAGYDVEKNNSRIKLGLKSLVSKGTLVQTKGTGASGSFKLNKKAASGEAKPKV +KKAGGTKPKKPVGAAKKPKKAAGGATPKKSAKKTPKKAKKPAAATVTKKVAKSPKKAKVA +KPKKAAKSAAKAVKPKAAKPKVVKPKKAAPKKK diff --git a/CURATED_SET/draft_seeds/H1.2_(Homo_sapiens).fasta b/CURATED_SET/draft_seeds/H1.2_(Homo_sapiens).fasta new file mode 100644 index 0000000..9a60c84 --- /dev/null +++ b/CURATED_SET/draft_seeds/H1.2_(Homo_sapiens).fasta @@ -0,0 +1,5 @@ +>Homo|NP_005310.1|H1.2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSETAPAAPAAAPPAEKAPVKKKAAKKAGGTPRKASGPPVSELITKAVAASKERSGVSLA +ALKKALAAAGYDVEKNNSRIKLGLKSLVSKGTLVQTKGTGASGSFKLNKKAASGEAKPKV +KKAGGTKPKKPVGAAKKPKKAAGGATPKKSAKKTPKKAKKPAAATVTKKVAKSPKKAKVA +KPKKAAKSAAKAVKPKAAKPKVVKPKKAAPKKK diff --git a/CURATED_SET/draft_seeds/H1.2_only.fasta b/CURATED_SET/draft_seeds/H1.2_only.fasta new file mode 100644 index 0000000..e69de29 diff --git a/CURATED_SET/draft_seeds/H1.3.fasta b/CURATED_SET/draft_seeds/H1.3.fasta new file mode 100644 index 0000000..ad0d511 --- /dev/null +++ b/CURATED_SET/draft_seeds/H1.3.fasta @@ -0,0 +1,5 @@ +>Homo|NP_005311.1|H1.3_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSETAPLAPTIPAPAEKTPVKKKAKKAGATAGKRKASGPPVSELITKAVAASKERSGVSL +AALKKALAAAGYDVEKNNSRIKLGLKSLVSKGTLVQTKGTGASGSFKLNKKAASGEGKPK +AKKAGAAKPRKPAGAAKKPKKVAGAATPKKSIKKTPKKVKKPATAAGTKKVAKSAKKVKT +PQPKKAAKSPAKAKAPKPKAAKPKSGKPKVTKAKKAAPKKK diff --git a/CURATED_SET/draft_seeds/H1.3_(Homo_sapiens).fasta b/CURATED_SET/draft_seeds/H1.3_(Homo_sapiens).fasta new file mode 100644 index 0000000..ad0d511 --- /dev/null +++ b/CURATED_SET/draft_seeds/H1.3_(Homo_sapiens).fasta @@ -0,0 +1,5 @@ +>Homo|NP_005311.1|H1.3_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSETAPLAPTIPAPAEKTPVKKKAKKAGATAGKRKASGPPVSELITKAVAASKERSGVSL +AALKKALAAAGYDVEKNNSRIKLGLKSLVSKGTLVQTKGTGASGSFKLNKKAASGEGKPK +AKKAGAAKPRKPAGAAKKPKKVAGAATPKKSIKKTPKKVKKPATAAGTKKVAKSAKKVKT +PQPKKAAKSPAKAKAPKPKAAKPKSGKPKVTKAKKAAPKKK diff --git a/CURATED_SET/draft_seeds/H1.3_only.fasta b/CURATED_SET/draft_seeds/H1.3_only.fasta new file mode 100644 index 0000000..e69de29 diff --git a/CURATED_SET/draft_seeds/H1.4.fasta b/CURATED_SET/draft_seeds/H1.4.fasta new file mode 100644 index 0000000..4e34125 --- /dev/null +++ b/CURATED_SET/draft_seeds/H1.4.fasta @@ -0,0 +1,5 @@ +>Homo|NP_005312.1|H1.4_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSETAPAAPAAPAPAEKTPVKKKARKSAGAAKRKASGPPVSELITKAVAASKERSGVSLA +ALKKALAAAGYDVEKNNSRIKLGLKSLVSKGTLVQTKGTGASGSFKLNKKAASGEAKPKA +KKAGAAKAKKPAGAAKKPKKATGAATPKKSAKKTPKKAKKPAAAAGAKKAKSPKKAKAAK +PKKAPKSPAKAKAVKPKAAKPKTAKPKAAKPKKAAAKKK diff --git a/CURATED_SET/draft_seeds/H1.4_(Homo_sapiens).fasta b/CURATED_SET/draft_seeds/H1.4_(Homo_sapiens).fasta new file mode 100644 index 0000000..4e34125 --- /dev/null +++ b/CURATED_SET/draft_seeds/H1.4_(Homo_sapiens).fasta @@ -0,0 +1,5 @@ +>Homo|NP_005312.1|H1.4_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSETAPAAPAAPAPAEKTPVKKKARKSAGAAKRKASGPPVSELITKAVAASKERSGVSLA +ALKKALAAAGYDVEKNNSRIKLGLKSLVSKGTLVQTKGTGASGSFKLNKKAASGEAKPKA +KKAGAAKAKKPAGAAKKPKKATGAATPKKSAKKTPKKAKKPAAAAGAKKAKSPKKAKAAK +PKKAPKSPAKAKAVKPKAAKPKTAKPKAAKPKKAAAKKK diff --git a/CURATED_SET/draft_seeds/H1.4_only.fasta b/CURATED_SET/draft_seeds/H1.4_only.fasta new file mode 100644 index 0000000..e69de29 diff --git a/CURATED_SET/draft_seeds/H1.5.fasta b/CURATED_SET/draft_seeds/H1.5.fasta new file mode 100644 index 0000000..2b5861d --- /dev/null +++ b/CURATED_SET/draft_seeds/H1.5.fasta @@ -0,0 +1,5 @@ +>Homo|NP_005313.1|H1.5_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSETAPAETATPAPVEKSPAKKKATKKAAGAGAAKRKATGPPVSELITKAVAASKERNGL +SLAALKKALAAGGYDVEKNNSRIKLGLKSLVSKGTLVQTKGTGASGSFKLNKKAASGEAK +PKAKKAGAAKAKKPAGATPKKAKKAAGAKKAVKKTPKKAKKPAAAGVKKVAKSPKKAKAA +AKPKKATKSPAKPKAVKPKAAKPKAAKPKAAKPKAAKAKKAAAKKK diff --git a/CURATED_SET/draft_seeds/H1.5_(Homo_sapiens).fasta b/CURATED_SET/draft_seeds/H1.5_(Homo_sapiens).fasta new file mode 100644 index 0000000..2b5861d --- /dev/null +++ b/CURATED_SET/draft_seeds/H1.5_(Homo_sapiens).fasta @@ -0,0 +1,5 @@ +>Homo|NP_005313.1|H1.5_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSETAPAETATPAPVEKSPAKKKATKKAAGAGAAKRKATGPPVSELITKAVAASKERNGL +SLAALKKALAAGGYDVEKNNSRIKLGLKSLVSKGTLVQTKGTGASGSFKLNKKAASGEAK +PKAKKAGAAKAKKPAGATPKKAKKAAGAKKAVKKTPKKAKKPAAAGVKKVAKSPKKAKAA +AKPKKATKSPAKPKAVKPKAAKPKAAKPKAAKPKAAKAKKAAAKKK diff --git a/CURATED_SET/draft_seeds/H1.5_only.fasta b/CURATED_SET/draft_seeds/H1.5_only.fasta new file mode 100644 index 0000000..e69de29 diff --git a/CURATED_SET/draft_seeds/H1.6_(Homo_sapiens).fasta b/CURATED_SET/draft_seeds/H1.6_(Homo_sapiens).fasta new file mode 100644 index 0000000..3efb619 --- /dev/null +++ b/CURATED_SET/draft_seeds/H1.6_(Homo_sapiens).fasta @@ -0,0 +1,5 @@ +>Homo|NP_005314.2|H1.6_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSETVPAASASAGVAAMEKLPTKKRGRKPAGLISASRKVPNLSVSKLITEALSVSQERVG +MSLVALKKALAAAGYDVEKNNSRIKLSLKSLVNKGILVQTRGTGASGSFKLSKKVIPKST +RSKAKKSVSAKTKKLVLSRDSKSPKTAKTNKRAKKPRATTPKTVRSGRKAKGAKGKQQQK +SPVKARASKSKLTQHHEVNVRKATSKK diff --git a/CURATED_SET/draft_seeds/H1.7_(Homo_sapiens).fasta b/CURATED_SET/draft_seeds/H1.7_(Homo_sapiens).fasta new file mode 100644 index 0000000..556a0df --- /dev/null +++ b/CURATED_SET/draft_seeds/H1.7_(Homo_sapiens).fasta @@ -0,0 +1,6 @@ +>Homo|NP_861453.1|H1.7_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MEQALTGEAQSRWPRRGGSGAMAEAPGPSGESRGHSATQLPAEKTVGGPSRGCSSSVLRV +SQLVLQAISTHKGLTLAALKKELRNAGYEVRRKSGRHEAPRGQAKATLLRVSGSDAAGYF +RVWKVPKPRRKPGRARQEEGTRAPWRTPAAPRSSRRRRQPLRKAARKAREVWRRNARAKA +KANARARRTRRARPRAKEPPCARAKEEAGATAADEGRGQAVKEDTTPRSGKDKRRSSKPR +EEKQEPKKPAQRTIQ diff --git a/CURATED_SET/draft_seeds/H1.8_(Homo_sapiens).fasta b/CURATED_SET/draft_seeds/H1.8_(Homo_sapiens).fasta new file mode 100644 index 0000000..dd77862 --- /dev/null +++ b/CURATED_SET/draft_seeds/H1.8_(Homo_sapiens).fasta @@ -0,0 +1,14 @@ +>Homo|NP_001295191.1|H1.8_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +------------------------------------------------------------ +------------------------------------------------------------ +-------------------MAPATAPRRAGEAKGKGPKKPSEAKEDPPNVGKVKKAAKRP +AKVQKPPPKPGAATEKARKQGGAAKDTRAQSGEARKVPPKPDKAMRAPSSAGGLSRKAKA +KGSRSSQGDAEAYRKTKAESKSSKPTASKVKNGAASPTKKKVVAKAKAPKAGQGPNTKAA +APAKGSGSKVVPAHLSRKTEAPKGPRKAGLPIKASSSKVSSQRAEA +>Homo|NP_722575.1|H1.8_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MAPGSVTSDISPSSTSTAGSSRSPESEKPGPSHGGVPPGGPSHSSLPVGRRHPPVLRMVL +EALQAGEQRRGTSVAAIKLYILHKYPTVDVLRFKYLLKQALATGMRRGLLARPLNSKARG +ATGSFKLVPKHKKKIQPRKMAPATAPRRAGEAKGKGPKKPSEAKEDPPNVGKVKKAAKRP +AKVQKPPPKPGAATEKARKQGGAAKDTRAQSGEARKVPPKPDKAMRAPSSAGGLSRKAKA +KGSRSSQGDAEAYRKTKAESKSSKPTASKVKNGAASPTKKKVVAKAKAPKAGQGPNTKAA +APAKGSGSKVVPAHLSRKTEAPKGPRKAGLPIKASSSKVSSQRAEA diff --git a/CURATED_SET/draft_seeds/H1.fasta b/CURATED_SET/draft_seeds/H1.fasta new file mode 100644 index 0000000..8427def --- /dev/null +++ b/CURATED_SET/draft_seeds/H1.fasta @@ -0,0 +1,570 @@ +>Drosophila|AAZ66580.1|generic_H1 organism=Drosophila melanogaster phylum=Arthropoda class=Insecta +---------MSDSAVATSASPVAAPPATV------------------EKKVVQKKASGSA +GTKAKKASAT--PSHPP------------------------TQQMVDASIKNLKERGGSS +LLAIKKYITATYK--CDAQKLAPFIKKYLKSAVVNGKLIQ---TK-GKGASGSFKLS--- +ASA---------KKEKDPKAKSKVLSAEKK-VQSKK-VASKKIGVSSKKTAVGAADKK-- +-----------------------------------PKAKKAVAT---------------- +----------------KKTAENKKTEKAKA--KD-------AKKTGIIKS---------- +-------KPAA---TKAKVT--------------AAKPKA-----------VVAKASKAK +PAVSA-----------KPKKTVKKASVSATAKKPKAKTTAAKK----------------- +------- +>Drosophila|NP_650383.2|generic_H1 organism=Drosophila melanogaster phylum=Arthropoda class=Insecta +MKLKPVERNDGSDDESEEEMPNDHPESEDSNMGEEEELPEEDEEEMEEDEEEDRQDGDEV +ETDNLGADRNPYPTPPPDDGSKLVPPDSDNPKSMVPKPKGTLISLALMAIGKLASRSGSS +VQAIMTYLKDNGQEWKDPKKTARLLHRALKLAEANGEVVM---VK-RS-----FKLTDKQ +KNS---------SKAVEKMKAKKQKEKEKK-AKVEK-VLKEKIQKKEAKAKMKEKKASKE +K----------------------------------SSKPTERKT---------------- +----------------KQAVKKKKPEDGTKDNPP-------ASKAASSAAAQAMLETSQT +AIPEAGKKPAK---TKVKLQ--------------ADSSEAGKTKKSRKSIGTLAQPKAAR +PKVKAVKKLVAGKGASTPDLSIMEAQATSTPQGATKAKRKRKV----------------- +------- +>Xenopus|NP_001081535.1|generic_H1 organism=Xenopus laevis phylum=Chordata class=Amphibia +---------------MAPKKAVAAPEGGNK-----------------ENAAVKGSSKVKV +KRKSIKLVKT--QSHPP------------------------TLSMVVEVLKKNTERKGTS +VQAIRTRILSAHPT-VDPLRLKFLLRTALNKGLEKGILIRPLNSS-ATGATGRFKLAKPV +KTT---------KAGKENVASENVDPNAEQ-ETQKK-APKKEKKAKTEKEPKGEKTKAVA +K----------------------------------KAKEDSDEK---------------- +----------------PKVAKSKKAKEAKEVDKA-------NKEAKEVDKANKEAKEVDK +ANKEV-KEVDK---APAKKP--------------KAKTEAAKAE-------GGGKAKKEP +PKAKA-----------KDVKAQKDSTDEGAPVKAGKKGKKVTN----------------- +------- +>Gallus|XP_425456.1|generic_H1 organism=Gallus gallus phylum=Chordata class=Aves +---------------MSETAPVAAPAVSAP---------------------GAKAAAKKP +KKAAGGAKARK-PAGPS------------------------VTELITKAVSASKERKGLS +LAALKKALAAGG---YDVEKNNSRIKLGLKSLVSKGTLVQ---TK-GTGASGSFKLN--- +KKP---------GETKEKATKKKPAAKPKK-PA----AKKPAAAAKKPKKAA-------- +-----------------------------------AVKKSPKKA---------------- +----------------KKPAAAA-TKKAAK--SP-------KKATKAGRP---------- +------KKTAK---SPAKAK--------------AVKPKA-------------AKSKAAK +PKAA------------KAKKAATKKK---------------------------------- +------- +>Bos|XP_010816821.1|generic_H1 organism=Bos taurus phylum=Chordata class=Mammalia +---------------MSEVAL-PAPAASTS-----------------PEKPSAGKKAKKP +AKAAAAAKKK--PAGPS------------------------VSELIVQAVSSSKERSGVS +LAALKKALAAAG---YDVEKNNSRIKLGLKSLVGKGTLVQ---TK-GTGASGSFKLN--- +KKV---------ASVDAKPTATKVAT-----------KTKVTSASKKPKKASGAAAAK-- +-----------------------------------KSVKTPKKA---------------- +----------------RKSVLTKKSSK-----SP-------KKP-KAVKP---------- +------KKVAK---SPAKAK--------------AVKPKG-------------AKVKVTK +PKTAA-----------KPKKAAPKKK---------------------------------- +------- +>Mus|NP_056601.1|generic_H1 organism=Mus musculus phylum=Chordata class=Mammalia +---------------MSEAAP-AAPAAAPP-----------------AEKAPAKKKAAKK +---PAGVRRK--ASGPP------------------------VSELITKAVAASKERSGVS +LAALKKALAAAG---YDVEKNNSRIKLGLKSLVSKGILVQ---TK-GTGASGSFKLN--- +KKA---------ASGEAKPQAKKAGA-----AK----AKKPAGAAKKPKKATGAATPK-- +K----------------------------------AAKKTPKKA---------------- +----------------KKPAAAAVTKKVAK--SP-------KKA-KVTKP---------- +------KKVKS---A----------------------SKA-------------VKPKAAK +PKVA------------KAKKVAAKKK---------------------------------- +------- +>Mus|NP_085112.1|generic_H1 organism=Mus musculus phylum=Chordata class=Mammalia +---------------MSETAP-VAQAASTA-----------------TEKPAAAKKTKKP +AK-AAAPRKK--PAGPS------------------------VSELIVQAVSSSKERSGVS +LAALKKSLAAAG---YDVEKNNSRIKLGLKSLVNKGTLVQ---TK-GTGAAGSFKLN--- +------------KKAESKAITTKVSV-----------KAKASGAAKKPKKTAGAAAKK-- +------------------------------------TVKTPKKP---------------- +----------------KKPAVSKKTSK-----SP-------KKP-KVVKA---------- +------KKVAK---SPAKAK--------------AVKPKA-------------SKAKVTK +PKTPA-----------KPKKAAPKKK---------------------------------- +------- +>Rattus|NP_579819.1|generic_H1 organism=Rattus norvegicus phylum=Chordata class=Mammalia +---------------MSETAP-AAPAAPAP-----------------AEKTPIKKKARKA +---AGGAKRK--ASGPP------------------------VSELITKAVAASKERSGVS +LAALKKALAAAG---YDVEKNNSRIKLGLKSLVSKGTLVQ---TK-GTGASGSFKLN--- +KKA---------ASGEAKPKAKKAGA-----AK----AKKPAGAAKKPKKATGTATPK-- +K----------------------------------STKKTPKKA---------------- +----------------KKPAAAAGAKKAK---SP-------KKA-KATKA---------- +------KKAPK---SPAKAR--------------AVKPKA-------------AKPKTSK +PKAA------------KPKKTAAKKK---------------------------------- +------- +>Strongylocentrotus|NP_999714.1|generic_H1 organism=Strongylocentrotus purpuratus phylum=Echinodermata class=Echinoidea +---------------MAEKNS------------------------------SKKVTTKKP +------------AAHPP------------------------AAEMVATAITELKDRNGSS +LQAIKKYIATNFD--VQMDRQLLFIKRALKSGVEKGKLVQ---TK-GKGASGSFKVN--- +VQA---------AKAQASEKAKKEKEKAKL-LAQRE-KAKEKGCSEEGETAEGSRPKKVK +A----------------------------------APKKAKKPV---------------- +----------------KKTTEKKEKKKTPKK-AP-------KKP-AAKKS---------- +-------TPKK---TPKKAA--------------AKKPKT-------------AKPKKPA +XKKAA-----------KSK----------------------------------------- +------- +>Strongylocentrotus|NP_999720.1|generic_H1 organism=Strongylocentrotus purpuratus phylum=Echinodermata class=Echinoidea +---------------MSAAKP------------------------------KVAKKARVA +------------PAHPP------------------------SSQMVVAAVTALKERGGSS +TQAIKKYIAANYT--VDMTKQGPFIRRALVKGVASGALVQ---TK-GKGASGSFKL---- +GKK---------KEGKSDAQKARIAAKKAK-LAAKKKEQREKKALKTKARKEKVAAKKAA +K----------------------------------KATKKTKKV---------------- +----------------KKPAAKKAKKPAAKKPAA-------KKP-AAKKA---------- +------KKPAKKVAKPAKKA--------------AAKP---------------AKKAAKP +AKKAA-----------KPAKKAAKPAKK-------------------------------- +------- +>Caenorhabditis|O17536.3|generic_H1 organism=Caenorhabditis elegans phylum=Nematoda class=Chromadorea +----MSDVAVAADTTETPAAPTKASKATKA-----------------SKATKASKATKAK +TTKVPMVKAD--AAHPP------------------------FINMVTEAISSIKDRKGPS +RAAILKYITTKYTLGDQANKINAHLRKALNKGLESNAFVQ---AS-GNGANGRFRLAE-- +KTA---------SVAKSPAAAKKDATGEKK-ATT---TVAKKAATGEKKATTTVAKKAAT +G----------------------------------EKKATTTVA---------------- +----------------KKAAAGDKAKK-----TE-------VKVKKVKSP---------- +------KKIAK---SPVNKV--------------TKSPVKKIA--------KSSSMKAAP +KKAAA-----------KPAKKAPAAAPEA------------------------------- +------- +>Arabidopsis|NP_172161.1|generic_H1 organism=Arabidopsis thaliana phylum=Streptophyta class=Magnoliopsida +MSEVEIENAATIEGNTAADAPVTDAAVEKKPAAK-------------GRKTKNVKEVKEK +KTVAAAPKKRTVSSHPT------------------------YEEMIKDAIVTLKERTGSS +QYAIQKFIEEKRK--ELPPTFRKLLLLNLKRLVASGKLVK---VK-AS-----FKLPSAS +AKA---------SSPKAAAEKSAPAKKKPA-TVAVT-KAKRKVAAASKAKKTIAVKPKTA +A----------------------------------AKKVTAKAK---------------- +----------------AKPVPRATAAATKRKAVD-------AKPKAKARPAKA------- +------AKTAK-VTSPAKKA--------------VAATKKVA---------TVATKKKTP +VKKVV-----------KPKTVKSPAKRASSRVKK-------------------------- +------- +>Zea|P23444.2|generic_H1 organism=Zea mays phylum=Streptophyta class=Magnoliopsida +-MATDVTETPAPLVDAAPEAPADAPAAPAA-----------------DANAAKAKKATAP +KKRAS-------PTHLP------------------------YAEMVSEAITSLKERTGSS +SYAIAKFVEDKHKA-KLPPNFRKLLNVQLKKLVAGGKLTK---VK-NS-----YKLSSAT +KPN---------PKPKAAPKKPKTGAKKPK-AAAKP-KAKTPAKAKPATKPKPAAKPKAV +V----------------------------------KPKTPAKPK---------------- +----------------AKPAAKAKPKTAGAKPKP-------LAK-KAGRA---------- +-------KAAK---TSAKDT--------------PGK--------------KAPAKKAAP +SKKAA-----------TPVRKAPSRKAKK------------------------------- +------- +>Thalassiosira|EED88841.1|H1.0 organism=Thalassiosira pseudonana CCMP1335 phylum=Bacillariophyta class=Coscinodiscophyceae +------------------------------------------------------------ +---------------MS------------------------YKAGIAKAITELKDRTGSS +SIAIKKHMQANLP--ADKKWMNATFLKALKDMVASGELVK---TK------ASYKLSA-- +---------------------VAKQKASSA-GKPKK-APKKKA----------------- +--------------------------------------APKKTA---------------- +----------------PKKKAAPKKKT-----AT-------KKAATAKKP---------- +---------------------------------------------------AAKKATTAK +---KS-----------TTKKTAKK------------------------------------ +------- +>Esox|XP_010887142.1|H1.0 organism=Esox lucius phylum=Chordata class=Actinopteri +-------------MAETVAAPAP--------------------------KAKKAKAPKKP +------------ASHPK------------------------YSDMIKAAVQADKSRGGAS +RQSVQKYIKSHYK--VGDN-ADSQIKLSLKRMVSGGLLRH---TK-GIGASGSFKLAKAE +DTK---------KAPKPKPVVKAKKSPVKA-AKPKK-VAKPKKVVKSPAKAKKAKVAVKK +V----------------------------------KK-SPKKVA---------------- +----------------PKPKKVVK-KV-----KA-------AKPAKAVKP---------- +-----------------------------------------------------KKAKAAK +PKPKA-----------AAKKAAKKK----------------------------------- +------- +>Salmo|ACH70944.1|H1.0 organism=Salmo salar phylum=Chordata class=Actinopteri +-------------MAETAAAPAP--------------------------KAKKAKAPKKP +------------ASHPK------------------------YSDMIKAAVHADKSRGGAS +RQSVQKYIKSHYK--VGDN-ADSQIKLSLKRMVSEGVLRH---TK-GIGASGSFKLAKAE +DTK---------KAPKVKAVVKAKKSPVKS-AKPKK-VAKPKKVAKSPAKAKKAKVAVKK +V----------------------------------KK-SPKKAA---------------- +----------------PKPKKVAK-KT-----KV-------AKPAKATKP---------- +-----------------------------------------------------KKAKAAK +PKPKA-----------AAKKAAKKK----------------------------------- +------- +>Salmo|ACM08534.1|H1.0 organism=Salmo salar phylum=Chordata class=Actinopteri +------------------------------------------------------------ +--------------------------------------------MIKAAVHADKSRGGAS +RQSVQKYIKSHYK--VGDN-ADSQIKLSLKRMVSEGVLRH---TK-GIGASGSFKLAKAE +DTK---------KAPKVKAVVKAKKSPVKS-AKPKK-VAKPKKVAKSPAKAKKAKVAVKK +V----------------------------------KK-SPKKAA---------------- +----------------PKPKKVVK-KT-----KV-------AKPAKATKP---------- +-----------------------------------------------------KKAKAAK +PKPKA-----------AAKKAAKKK----------------------------------- +------- +>Salmo|ACM09660.1|H1.0 organism=Salmo salar phylum=Chordata class=Actinopteri +------------------------------------------------------------ +--------------------------------------------MIKAAVHADKSRGGAS +RQSVQKYIKSHYK--VGDN-ADSQIKLSLKRMVSEGVLRH---TK-GIGASGSFKLAKAE +DTK---------KAPKVKAVVKAKKSPVKS-AKPKK-VAKPKKVAKSPAKAKKAKVAVKK +V----------------------------------KK-SPKKAA---------------- +----------------PKPKKVAK-KT-----KV-------AKPAKATKP---------- +-----------------------------------------------------KKAKAAK +PKPKA-----------AAKKAAKKK----------------------------------- +------- +>Xenopus|NP_998836.1|H1.0 organism=Xenopus tropicalis phylum=Chordata class=Amphibia +------------MTENSAAAPAG--------------------------KPKRSKASKKA +------------TDHPK------------------------YSDMILAAVQAEKSRSGSS +RQSIQKYIKNHYK--VGEN-ADSQIKLSIKRLVTSGTLKQ---TK-GVGASGSFRLAKAD +EGK---------KPAK-----KPKKEIKKA-ASPKK-AAKPKKAAKSPAKAKKPKVAEKK +V----------------------------------KKPAKKKPA---------------- +----------------PSPKKAKKTKT-----VK-------AKPVRASRV---------- +-----------------------------------------------------KKAKPSK +PKAKA-----------SPKKSGRKK----------------------------------- +------- +>Cairina|P06513.2|H1.0 organism=Cairina moschata phylum=Chordata class=Aves +------------MTDSPIPAPAPAA------------------------KPKRAKAPRKP +------------ASHPS------------------------YSEMIVAAIRAEKSRGGSS +RQSIQKYVKSHYK--VGQH-ADLQIKLSIRRLLAAGVLKQ---TK-GVGASGSYRLAKGD +KAK---------KSPAGRK--KKKKAARRS-TSPRK-AARPRK---ARSPAKKPKAA--- +A----------------------------------RK-ARKKSR---------------- +----------------ASPKKAKKPKT-----VK-------AKSLKTSKV---------- +-----------------------------------------------------KKAKRSK +PRAKS-----------GARKSPKKK----------------------------------- +------- +>Gallus|NP_001038138.1|H1.0 organism=Gallus gallus phylum=Chordata class=Aves +------------MTESLVLSPAPA-------------------------KPKRVKASRRS +------------ASHPT------------------------YSEMIAAAIRAEKSRGGSS +RQSIQKYIKSHYK--VGHN-ADLQIKLSIRRLLAAGVLKQ---TK-GVGASGSFRLAKSD +KAK---------RSPG-----KKKKAVRRS-TSPKK-AARPRK---ARSPAKKPKAT--- +A----------------------------------RK-ARKKSR---------------- +----------------ASPKKAKKPKT-----VK-------AKSRKASKA---------- +-----------------------------------------------------KKVKRSK +PRAKS-----------GARKSPKKK----------------------------------- +------- +>Taeniopygia|XP_004175972.1|H1.0 organism=Taeniopygia guttata phylum=Chordata class=Aves +------------MTRLPSMCKASSSLMSCVHLCA-------------FPPPKRARSARRP +------------AAHPA------------------------YSDMVTAAVRADKSRGGAS +RQSIQKYVKSNYK--VGQN-ADVQIRLAIRRLLAAGVLKQ---TK-GVGASGSFRLAKAG +KAK---------RSPSR----KRKKAARRS-TSPRK-TARSRK---ARSPAKKPKSA--- +A----------------------------------RK-ARKKSR---------------- +----------------S-PKKAKKPKT-----VK-------AKSLKASKP---------- +-----------------------------------------------------KKARRSK +SRAKS-----------GARKSPKKK----------------------------------- +------- +>Bos|NP_001069955.1|H1.0 organism=Bos taurus phylum=Chordata class=Mammalia +------------MTENSTSTPAA--------------------------KPKRAKASKKS +------------TDHPK------------------------YSDMIVAAIQAEKNRAGSS +RQSIQKYIKSHYK--VGEN-ADSQIKLSIKRLVTTGVLKQ---TK-GVGASGSFRLAKSD +EPK---------RSVAFK---KTKKEVKKV-ATPKK-AAKPKKAA-SKAPSKKPKATP-- +V----------------------------------KK-AKKKPA---------------- +----------------ATPKKTKKPKT-----VK-------AKPVKASKP---------- +-----------------------------------------------------KKTKPVK +PKAKS-----------SAKRTGKKK----------------------------------- +------- +>Mus|NP_032223.2|H1.0 organism=Mus musculus phylum=Chordata class=Mammalia +------------MTENSTSAPAA--------------------------KPKRAKASKKS +------------TDHPK------------------------YSDMIVAAIQAEKNRAGSS +RQSIQKYIKSHYK--VGEN-ADSQIKLSIKRLVTTGVLKQ---TK-GVGASGSFRLAKGD +EPK---------RSVAFK---KTKKEVKKV-ATPKK-AAKPKKAA-SKAPSKKPKATP-- +V----------------------------------KK-AKKKPA---------------- +----------------ATPKKAKKPKV-----VK-------VKPVKASKP---------- +-----------------------------------------------------KKAKTVK +PKAKS-----------SAKRASKKK----------------------------------- +------- +>Pongo|NP_001127680.1|H1.0 organism=Pongo abelii phylum=Chordata class=Mammalia +------------MTENSTSAPAA--------------------------KPKRAKASKKS +------------TDHPK------------------------YSDMVVAAIQAEKNRAGSS +RQSIQKYIKSHYK--VGEN-ADSQIKLSIKRLVTTGVLKQ---TK-GVGASGSFRLAKSD +EPK---------KSVAFK---KTKKEIKKV-ATPKK-ASKPKKAA-SKAPTKKPKATP-- +V----------------------------------KK-AKKKLA---------------- +----------------ATPKKAKKPKT-----VK-------AKPVKASKP---------- +-----------------------------------------------------KKAKPVK +PKAKS-----------SAKRAGKKK----------------------------------- +------- +>Rattus|NP_036710.1|H1.0 organism=Rattus norvegicus phylum=Chordata class=Mammalia +------------MTENSTSTPAA--------------------------KPKRAKAAKKS +------------TDHPK------------------------YSDMIVAAIQAEKNRAGSS +RQSIQKYIKSHYK--VGEN-ADSQIKLSIKRLVTTGVLKQ---TK-GVGASGSFRLAKGD +EPK---------RSVAFK---KTKKEVKKV-ATPKK-AAKPKKAA-SKAPSKKPKATP-- +V----------------------------------KK-AKKKPA---------------- +----------------ATPKKAKKPKI-----VK-------VKPVKASKP---------- +-----------------------------------------------------KKAKPVK +PKAKS-----------SAKRASKKK----------------------------------- +------- +>Strongylocentrotus|NP_999722.1|H1.0 organism=Strongylocentrotus purpuratus phylum=Echinodermata class=Echinoidea +----------MADTDAAPAAPAPSTPKKA--------------------AKKKASKPKTP +------------ASHPK------------------------YSDMIASALESLKEKKGSS +RQAILKYVKANFT--VGDN-ANVHIKQALKRGVTSGQLRH---VK-GSGASGSFLLAEKT +K--------------------TPKKAAAKK-ATPKK-KPAAKK---TKKPA--------- +A----------------------------------KK-ATKKPA---------------- +----------------KKP--AAKKKV-----AK-------PAAKKAAKP---------- +---------------------------------------------------VAKKATPKK +KVVKK-----------AAKGKGKKK----------------------------------- +------- +>Homo|NP_005309.1|H1.0_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +------------MTENSTSAPAA--------------------------KPKRAKASKKS +------------TDHPK------------------------YSDMIVAAIQAEKNRAGSS +RQSIQKYIKSHYK--VGEN-ADSQIKLSIKRLVTTGVLKQ---TK-GVGASGSFRLAKSD +EPK---------KSVAFK---KTKKEIKKV-ATPKK-ASKPKKAA-SKAPTKKPKATP-- +V----------------------------------KK-AKKKLA---------------- +----------------ATPKKAKKPKT-----VK-------AKPVKASKP---------- +-----------------------------------------------------KKAKPVK +PKAKS-----------SAKRAGKKK----------------------------------- +------- +>Homo|NP_005316.1|H1.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +------------MSETVPPAPAASAA---------------------PEKPLAGKKAKKP +AKAAAASKKK--PAGPS------------------------VSELIVQAASSSKERGGVS +LAALKKALAAAG---YDVEKNNSRIKLGIKSLVSKGTLVQ---TK-GTGASGSFKLN--- +KKA---------SSVETKPGASKVAT-----------KTKATGASKKLKKATGASKKSV- +--------------------------------------KTPKKA---------------- +----------------KKPAATRKSSKNPKKPKT-------VKPKKVAKS---------- +---------------PAKAK--------------AVKPKA-------------AKARVTK +PKTA------------KPKKAAPKKK---------------------------------- +------- +>Caligus|ACO10502.1|H1.10 organism=Caligus rogercresseyi phylum=Arthropoda class=Hexanauplia +----------MVKSEVEVTINAEEAPV--------AS----------SLKPAK---K--- +------KKNKKKKNKPGK-----------------------YSVLVLDAVKKLNERSGSS +LVKI--YNEAKKASWFDEQNGRTYLRYSIRALVLNNTLIQ---VK-GMGANGSFRLNEDK +FAK---------GVPKKTQS--KPAKNTTK-TAKAS-TTKKATV-VKAKSSPKKAPDAKM +P----------------------------------AAKLKKLGV---------------- +----------------KKVSAAQ---K------N-------KKPKKASKP---------- +------------------------------------------------------PAKS-P +RKK--------------------------------------------------------- +------- +>Oncorhynchus|ACO07616.1|H1.10 organism=Oncorhynchus mykiss phylum=Chordata class=Actinopteri +----------MVKSEVDVTINAEEAPV--------AS----------GPKPAK---K--- +------KKKKKKKNKPGK-----------------------YSVLVLDAVKKLNERSGSS +LVKI--YNEAKKASWFDEQNGRTYLRYSIRALVLNNTLIQ---VK-GMGANGSFRLNEDK +FAK---------EVPKKTQS--KPAKTTTK-TAKAS-TTKKATVKPKAKSSPKKAPDAKK +P----------------------------------AAKMKKLGV---------------- +----------------KKVIAAQ---K------N-------KKPKKASKP---------- +------------------------------------------------------PAKS-P +RKK--------------------------------------------------------- +------- +>Osmerus|ACO09903.1|H1.10 organism=Osmerus mordax phylum=Chordata class=Actinopteri +-----------MASDTEV-VPAAEAPVAAKSKKRTAT----------KPKPKA---KPAT +VATSSAKKKKRKGKGPGK-----------------------YSVLVVDAIKQLGERNGSS +LAKI--YNKAREAIWFDQQHGRTYLRYSIRALVLNDTLIQ---VK-GTGANGSFKLNKKK +FET---------KAPKKAPTPVKAVKTKAP-AKKAK-AAIKTKAKPKASPKKKSTPK-KK +P----------------------------------AAKPKKLAA---------------- +----------------KKATPVKS--K------K-------PKPKKASKP---------- +------------------------------------------------------AAKS-P +RKK--------------------------------------------------------- +------- +>Salmo|ACM09455.1|H1.10 organism=Salmo salar phylum=Chordata class=Actinopteri +----------MVKSEVEVTINAEEAPV--------AS----------SLKPAK---K--- +------KKNKKKKNKPGK-----------------------YSVLVLDAVKKLNERSGSS +LVKI--YNEAKKASWFDEQNGRTYLRYSIRALVLNNTLIQ---VK-GMGANGSFRLNEDK +FAK---------GVPKKTQS--KPAKNTTK-TAKAS-TTKKATV-VKAKSSPKKAPDAKM +P----------------------------------AAKLKKLGV---------------- +----------------KKVSAAQ---K------N-------KKPKKASKP---------- +------------------------------------------------------PAKS-P +RKK--------------------------------------------------------- +------- +>Xenopus|NP_001080265.1|H1.10 organism=Xenopus laevis phylum=Chordata class=Amphibia +-----------MALELEENLHSTEEEDEEEEEEEEGD----------EMRSRSTRNKGGA +ASSSGNKKKKKKKNQPGR-----------------------YSQLVVDTIRKLGERNGSS +LAKI--YSEAKKVSWFDQQNGRTYLKYSIKALVQNDTLLQ---VK-GVGANGSFRLNKKK +LE----------GLPYDKKP--PPAKPSSS-SSNKK-QQQQGPSSSPSKSHKKAKPKAKA +E----------------------------------KEKPKTSSA---------------- +----------------KAKSPKKSAAK------G-------KKMKKGAKP---------- +------------------------------------------------------SVRKAP +KSKKA------------------------------------------------------- +------- +>Homo|NP_006017.1|H1.10_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +-----------MSVELEEALPVTTAEGMAKKVTKAG--------------------GSAA +LSPSKKRKNSKKKNQPGK-----------------------YSQLVVETIRRLGERNGSS +LAKI--YTEAKKVPWFDQQNGRTYLKYSIKALVQNDTLLQ---VK-GTGANGSFKLNRKK +LEG---------GGERRGAP--AAATAPAP-TAH-K-AKKAAPGAAGSRRADKKPARGQK +P----------------------------------EQRSHKKGA---------------- +----------------GAKKDKGGKAKKTAAAGG-------KKVKKAAKP---------- +------------------------------------------------------SVPKVP +KGRK-------------------------------------------------------- +------- +>Homo|NP_005310.1|H1.2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +------------MSETAPAAPAAAPPA--------------------EKAPVKKKAAKKA +GGTPRK------ASGPP------------------------VSELITKAVAASKERSGVS +LAALKKALAAAG---YDVEKNNSRIKLGLKSLVSKGTLVQ---TK-GTGASGSFKLN--- +KKA---------ASGEAKPKVKKAGGTKPK-KPVGA-AKKPKKAAGGATPKKSAKKTPKK +A----------------------------------KKPAAATVT---------------- +----------------KKVAKSPKKAK------V-------AKPKKAAKS---------- +-------------------------------------------------AAKAVKPKAAK +PKVV------------KPKKAAPKKK---------------------------------- +------- +>Homo|NP_005311.1|H1.3_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +------------MSETAPLAPTIPAP---------------------AEKTPVKKKAKKA +GATAGKRK----ASGPP------------------------VSELITKAVAASKERSGVS +LAALKKALAAAG---YDVEKNNSRIKLGLKSLVSKGTLVQ---TK-GTGASGSFKLN--- +KKA---------ASGEGKPKAKKAGAAKPR-KPAGA-AKKPKKVAGAATPKKSIKKTPKK +V----------------------------------KKPATAAGT---------------- +----------------KKVAKSAKKVK------T-------PQPKKAAKS---------- +---------------PAKAK--------------APKPKA-------------AKPKSGK +PKVT------------KAKKAAPKKK---------------------------------- +------- +>Homo|NP_005312.1|H1.4_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +------------MSETAPAAPAAPAPA--------------------EKTPVKKKARKSA +GAAKRK------ASGPP------------------------VSELITKAVAASKERSGVS +LAALKKALAAAG---YDVEKNNSRIKLGLKSLVSKGTLVQ---TK-GTGASGSFKLN--- +KKA---------ASGEAKPKAKKAGAAK---------AKKPAGAAKKPKKATGAATPKKS +A----------------------------------KKTPKKAKK---------------- +----------------PAAAAGAKKAKSPKKAKA-------AKPKKAPKS---------- +---------------PAKAK--------------AVKPKA-------------AKPKTAK +PKAA------------KPKKAAAKKK---------------------------------- +------- +>Homo|NP_005313.1|H1.5_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +------------MSETAPAETATPAPV--------------------EKSPAKKKATKKA +AGAGAAKRK---ATGPP------------------------VSELITKAVAASKERNGLS +LAALKKALAAGG---YDVEKNNSRIKLGLKSLVSKGTLVQ---TK-GTGASGSFKLN--- +KKA---------ASGEAKPKAKKAGAAKAK-KPAGATPKKAKKAAGAKKAVKKTPKKAKK +P----------------------------------AAAGVKKVA---------------- +----------------KSPKKAKAAAKPKKATKS-------PAKPKAVKP---------- +-------------------K--------------AAKPKA-------------AKPKAAK +PKAA------------KAKKAAAKKK---------------------------------- +------- +>Mus|NP_612184.1|OO_H1.8 organism=Mus musculus phylum=Chordata class=Mammalia +MAPGSVSSVSSSSFPSRDTSPSGSCGLP-------------------GADKP-------- +--GPSCRRIQAGQRNPT------------------------MLHMVLEALKAREARQGTS +VVAIKVYIQHKYPT-VDTTRFKYLLKQALETGVRRGLLTRPAHSK-AKGATGSFKLVPKP +KTK----KACAPKAGRGAAGAKETGSKKSG-LLKKDQVGKATMEKGQKRRAY-----PCK +A----------------------------------ATLEMAPKK---------------- +----------------------AKAKPKEVRKAP-------LKQDKAAGAPLTANG-GQK +VKRSGSRQ---EANAHGKTK--------------GE--KSKPLASKVQNSVASLAKRKMA +DMAHTVTVVQGAETVQETKVPTPSQDIGHKVQPIPRVRKAKTP----------------- +--ENTQA +>Homo|NP_001295191.1|H1.8_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------MAPATAPRRAGEAKGKGPKKPS-EAKEDPPNVGKVKKAAKRPAKVQKPPPKP +G----------------------------------AATEKARKQ---------------- +----------------GGAAKDTRAQSGEARKVP-------PKPDKAMRAPSSAGGLSRK +AKAKGSRSSQGDAEAYRKTK--------------AESKSSKPTASKVKNGAASPTKKKVV +AKAKAPKAGQGP----NTKAAAPAKGSGSKVVPAHLSRKTEAPKGPRKAGLPIKASSSKV +SSQRAEA +>Homo|NP_722575.1|H1.8_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MAPGSVTSDISPS----STSTAGSSRSP-------------------ESEKPGPSHGGVP +PGGPSHSSLPVGRRHPP------------------------VLRMVLEALQAGEQRRGTS +VAAIKLYILHKYPT-VDVLRFKYLLKQALATGMRRGLLARPLNSK-ARGATGSFKLVPKH +KKKIQPRKMAPATAPRRAGEAKGKGPKKPS-EAKEDPPNVGKVKKAAKRPAKVQKPPPKP +G----------------------------------AATEKARKQ---------------- +----------------GGAAKDTRAQSGEARKVP-------PKPDKAMRAPSSAGGLSRK +AKAKGSRSSQGDAEAYRKTK--------------AESKSSKPTASKVKNGAASPTKKKVV +AKAKAPKAGQGP----NTKAAAPAKGSGSKVVPAHLSRKTEAPKGPRKAGLPIKASSSKV +SSQRAEA +>Saccharomyces|XP_011105792.1|scH1 organism=Saccharomyces arboricola H-6 phylum=Ascomycota class=Saccharomycetes +---------------MAPKKTSTKTTTTNK-----------------GKKPVTSKGKDKP +VIKTAVKKNAAKKEEPSSKS---------------------YKELIVEGLAALKERKGSS +RPALKKFIKENYPLVGSTSNFDLYFNNAIKKGVETGDFEQ------PKGPAGTLKLAKKK +SPE------LK--KETSPKPKQAAAATTTT-TTTTPTSLKAKAKTASKKQAPKKVVKKKV +P----------------------------------AVAVIPKKT---------------- +----------------SSPSALTYKEMILKSMPE-------LNDGKGSSR---------- +------IVLKKYVKDTFSSK--------------LKTSSNFDYLFNSAIKKCVENGELVQ +PKGPS-----------GIIKINKKKAKLST------------------------------ +------- +>Saccharomyces|NP_015198.1|scH1 organism=Saccharomyces cerevisiae S288C phylum=Ascomycota class=Saccharomycetes +---------------MAPKKSTTKTTS--K-----------------GKKPATSKGKEKS +TSKAAIKKTTAKKEEASSKS---------------------YRELIIEGLTALKERKGSS +RPALKKFIKENYPIVGSASNFDLYFNNAIKKGVEAGDFEQ------PKGPAGAVKLAKKK +SPE------VKKEKEVSPKPKQAATSVSAT-AS--------KAKAASTKLAPKKVVKKKS +P----------------------------------TVTA--KKA---------------- +----------------SSPSSLTYKEMILKSMPQ-------LNDGKGSSR---------- +------IVLKKYVKDTFSSK--------------LKTSSNFDYLFNSAIKKCVENGELVQ +PKGPS-----------GIIKLNKKKVKLST------------------------------ +------- +>Salmo|ACM09522.1|TS_H1.6 organism=Salmo salar phylum=Chordata class=Actinopteri +----------------MSGVIAIPLATPA-------------------TTPKKRSKPKK- +-------------TGPT------------------------VSDRILKVVSASSGRSGVS +LAALKKSLAASG---YDVVKNNARLKLAVRRLVAKGYLLQ---PK-GTGASGSFKINKNK +--------------AVAKKKRPTKNKVKK---VGAKKVRRA--SPKKAAGAKKSPKKTKR +---------------------------------------KSPKK---------------- +----------------AKRPAAAKKPKS-------------PRKTKRRVA---------- +-----------------------------------------------------KSTR--- +-----------------AKTAPKKK----------------------------------- +------- +>Xenopus|NP_001087957.1|TS_H1.6 organism=Xenopus laevis phylum=Chordata class=Amphibia +------------MAATTESAPVAPPAEPA------------------AAKKTKKQQPKKV +AG-GAKAKK---PSGPS------------------------ASELIVKAVSASKERSGVS +LAALKKALAAGG---YDVDKNNSRLKLALKALVTKGTLTQ---VK-GSGASGSFKLNKKQ +-PE---------TKDKAAKKKPAAPKAKKP-AAGAKKAPKSPKKPKKVSAAAKSPKKVKK +P----------------------------------AKAAKSPKK---------------- +----------------PKAVKAKKVAKSPAKKATKPKTAKSPAKAKVAKP---------- +-----------------------------------------------------KAAK--- +-----------------AKKPAPKK----------------------------------- +------- +>Canis|XP_005640152.1|TS_H1.6 organism=Canis lupus familiaris phylum=Chordata class=Mammalia +------------MSETVPAVAAGTALASM-------------------ENPSAKKRGRKP +GGIPEAAPK---APGLS------------------------VSKLIMEALSVSQERAGMS +LAALKKALAAAG---YDVEKNNSRIKLCLKSLVSKGTLVQ---TK-GTGASGSFKLNKKA +LLP---------TPAKSRVKRPPSTKTKR---LVLSRDSKSPKAAKTNK-AKKP--GGAG +A----------------------------------QKAACSGRK---------------- +----------------AKGAKDKQPRKS-------------PGKAPTGKP---------- +-----------------------------------------------------KAAKPRL +NQQKV-----------NPRKAVSKK----------------------------------- +------- +>Equus|NP_001243880.1|TS_H1.6 organism=Equus caballus phylum=Chordata class=Mammalia +------------MSETAPAAPAEPVLSSM-------------------EKPPAKKRGKKP +VGLTGGSRK---VPGSS------------------------VSKLITEALSVSQERAGMS +LAALKKALAAAG---YDVEKNNRRIKLGLKSLVSKGTLVQ---TR-GTGASGSFKLSKKA +-TP---------EPAKGRVKKGASANAKK---LVLPKGSKSPKSAKTNKRTSKA--RTPA +A----------------------------------QPSARGGRK---------------- +----------------SKGAKGKQQLKS-------------PGKGRTGKP---------- +-----------------------------------------------------KTGKPKL +TQQRT-----------NPRKTASKK----------------------------------- +------- +>Macaca|NP_001074230.1|TS_H1.6 organism=Macaca mulatta phylum=Chordata class=Mammalia +------------MSETVPAASAGAVPAVM-------------------EKPLTKKRGKKP +AGLTSASRK---APNLS------------------------VSKLITEALSVSQERVGMS +LAALKKALAAAG---YDVEKNNSRIKLSLKSLVNKGILVQ---TR-GTGASGSFKLSKKV +-LP---------KSTRRKANKSASAKTKK---LVLSRDSKSPKTAKTNKRAKKP--RATA +P----------------------------------KKAVRSGRK---------------- +----------------AKGAKGKQQQKS-------------PVKARATKP---------- +-----------------------------------------------------KLTQ--- +-HHKA-----------NIRKATSRK----------------------------------- +------- +>Mus|NP_034507.2|TS_H1.6 organism=Mus musculus phylum=Chordata class=Mammalia +------------MSETAPAASSTLVPAPV------------------EEKPSSKRRGKKP +G--LAPARK---PRGFS------------------------VSKLIPEALSTSQERAGMS +LAALKKALAAAG---YDVEKNNSRIKLALKRLVNKGVLVQ---TK-GTGASGSFKLSKKA +-AS---------GNDKGKGKKSASAKAKK---MGLPRASRSPKSSKTKA-VKKP--KAT- +P----------------------------------TKASGSGRK---------------- +----------------TKGAKGVQQRKS-------------PAKARAANP---------- +-----------------------------------------------------NSGKAKM +VMQKT-----------DLRKAAGRK----------------------------------- +------- +>Rattus|NP_036711.1|TS_H1.6 organism=Rattus norvegicus phylum=Chordata class=Mammalia +------------MSETAPAASSTLVPAPV-------------------EKPATKRRGKKP +G--MATARK---PRGFS------------------------VSKLIPEALSMSQERAGMS +LAALKKALAAAG---YDVEKNNSRIKLALKRLVNKGVLVQ---TK-GTGASGSFKLSKKA +-AS---------GNDKGKGKKSASAKAKK---LGLSRASRSPKSSKTKV-VKKP--KAT- +P----------------------------------TKGSGSRRK---------------- +----------------TKGAKGLQQRKS-------------PAKARATNS---------- +-----------------------------------------------------NSGKSKM +VMQKT-----------DLRKAAGRK----------------------------------- +------- +>Homo|NP_005314.2|H1.6_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +------------MSETVPAASASAGVAAM-------------------EKLPTKKRGRKP +AGLISASRK---VPNLS------------------------VSKLITEALSVSQERVGMS +LVALKKALAAAG---YDVEKNNSRIKLSLKSLVNKGILVQ---TR-GTGASGSFKLSKKV +-IP---------KSTRSKAKKSVSAKTKK---LVLSRDSKSPKTAKTNKRAKKP--RATT +P-----------------------------------KTVRSGRK---------------- +----------------AKGAKGKQQQKS-------------PVKARASKS---------- +-----------------------------------------------------KLTQ--- +-HHEV-----------NVRKATSKK----------------------------------- +------- +>Mus|NP_081580.2|TS_H1.7 organism=Mus musculus phylum=Chordata class=Mammalia +---------------------MAEAVQPS------------------GESQGAELTIQIQ +QPAERALRTPAKRGTQS------------------------VLRVSQLLLRAIAGHQHLT +LDALKKELGNAG---YEVRREISSHHEGKSTRLEKGTLLR---VS-GSDAAGYFRVWKIS +KPR--------EKAGQSRLTLGSHSSGKTVLKSPRPLRPRSR--RKAAKKAREVWRRKAR +ALKARSRRVRTRSTSGARSRTRSRASSRATSRATSRARSRARSRAQSSARSSARSSAKSS +AKSSTRSSAKSWARSKARSRARSRAKDLVRSKAREQAQAREQARARAREQAHARARTQDW +VRAKAQEFVSAKEQQYVRAKEQERAKAREQVRIGARDEARIKAKDYNRVRPTKEDTSPRP +AEEKSSNSKLREEKGQEPERPVKQTIQKPALDNAPSIQGKACTKSFTKSGQPGDTESP-- +------- +>Homo|NP_861453.1|H1.7_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MEQALTGEAQSRWPRRGGSGAMAEAPGPS------------------GESRGHSAT---Q +LPAEKTVGGPSRGCSSS------------------------VLRVSQLVLQAISTHKGLT +LAALKKELRNAG---YEVRRKSGRHEAPRGQ--AKATLLR---VS-GSDAAGYFRVWKVP +KPR--------RKPGRARQEEGTRAPWRTP-AAPRSSRRRRQPLRKAARKAREVWRRNAR +A----------------------------------KAKANARAR---------------- +---------------------RTR-----------------RARPRAKEPPCAR------ +----------------------------------AKEEAGATAADEGRGQAVKEDTTPRS +GKDKRRSSKPREEK-QEPKKPAQRTIQ--------------------------------- +------- +>Mus|NP_061262.1|TS_H1.9 organism=Mus musculus phylum=Chordata class=Mammalia +----------------------MAQMVAG------------------DQDAGTLWVPSQS +ESQTESDISTQSLRKPT------------------------MSYVILKTLADKRVHNCVS +LATLKKAVSITG---YNMTHNTWRFKRVLQNLLDKGMIMH---VTCCKGASGSLCLCKER +ALK---------SNHRAKRCQDRQKSQKPQ-KPGQRESEPCQLLLSSKKKNDQLFKGVRR +V----------------------------------------------------------- +-----------------------------------------AKGNRHCHY---------- +------------------------------------------------------------ +------------------------------------------------------------ +------- +>Peromyscus|ACI22865.1|TS_H1.9 organism=Peromyscus californicus insignis phylum=Chordata class=Mammalia +---------MQRDTLLVSPSAAPNSAVAV------------------DQDASTSDDPSKS +E--TGPYTCPQTMRKPS------------------------MSKVILRAVTDKGLHRRVS +LAALKKAVSTTG---YNMAHNSWRFKRVVKNLVKKGMLKQ---VT-GKGASGSFRLGKKQ +AFK---------SKRKARR---RQRRQQRQ-KPRQRRSGPRQSLLGSGRSLKGLF----- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------- +>Rattus|NP_001103035.1|TS_H1.9 organism=Rattus norvegicus phylum=Chordata class=Mammalia +-------------MSLVSPSPDSNAVMAG------------------DQDASTSQVPSQS +ESKIGPNVATQTLRKPT------------------------MSKVILRTVADKGVHSRVS +LAALKKAVSITG---YNMAQNTWRFKRVLQNLVKKGMLKQ---VT-GKGASGSFRLGKKQ +AFK---------SKCKAKR---RQRRQK----PGQRRTGSRRSLLGSKKSNNRLFKGVRR +V----------------------------------------------------------- +-----------------------------------------AKGRRH------------- +------------------------------------------------------------ +------------------------------------------------------------ +------- diff --git a/CURATED_SET/draft_seeds/H1_only.fasta b/CURATED_SET/draft_seeds/H1_only.fasta new file mode 100644 index 0000000..e69de29 diff --git a/CURATED_SET/draft_seeds/H2A.B.1_(Homo_sapiens).fasta b/CURATED_SET/draft_seeds/H2A.B.1_(Homo_sapiens).fasta new file mode 100644 index 0000000..62d1f91 --- /dev/null +++ b/CURATED_SET/draft_seeds/H2A.B.1_(Homo_sapiens).fasta @@ -0,0 +1,3 @@ +>Homo|NP_001017990.1|H2A.B.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPRRRRRRGSSGAGGRGRTCSRTVRAELSFSVSQVERSLREGHYAQRLSRTAPVYLAAVI +EYLTAKVPELAGNEAQNSGERNITPLLLDMVVHNDRLLSTLFNTTTISQVAPGED diff --git a/CURATED_SET/draft_seeds/H2A.B.1_(Mus_musculus).fasta b/CURATED_SET/draft_seeds/H2A.B.1_(Mus_musculus).fasta new file mode 100644 index 0000000..cc949ab --- /dev/null +++ b/CURATED_SET/draft_seeds/H2A.B.1_(Mus_musculus).fasta @@ -0,0 +1,3 @@ +>Mus|NP_001268460.1|H2A.B.1_(Mus_musculus) organism=Mus musculus phylum=Chordata class=Mammalia +MPRNRENCLRESSGRRHRRSRTSRAELIFAVSLVEQHLREVSRARRLSDTVPIFLAAILE +SLTRRLLELAGNEAQRRGTERRITPELLDLAVYSNMELSDVFQFITISQVAPAHR diff --git a/CURATED_SET/draft_seeds/H2A.B.2_(Homo_sapiens).fasta b/CURATED_SET/draft_seeds/H2A.B.2_(Homo_sapiens).fasta new file mode 100644 index 0000000..e27f7a2 --- /dev/null +++ b/CURATED_SET/draft_seeds/H2A.B.2_(Homo_sapiens).fasta @@ -0,0 +1,6 @@ +>Homo|NP_001017991.1|H2A.B.2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPRRRRRRGSSGAGGRGRTCSRTVRAELSFSVSQVERSLREGHYAQRLSRTAPVYLAAVI +EYLTAKVLELAGNEAQNSGERNITPLLLDMVVHNDRLLSTLFNTTTISQVAPGED +>Homo|NP_542451.1|H2A.B.2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPRRRRRRGSSGAGGRGRTCSRTVRAELSFSVSQVERSLREGHYAQRLSRTAPVYLAAVI +EYLTAKVLELAGNEAQNSGERNITPLLLDMVVHNDRLLSTLFNTTTISQVAPGED diff --git a/CURATED_SET/draft_seeds/H2A.B.2_(Mus_musculus).fasta b/CURATED_SET/draft_seeds/H2A.B.2_(Mus_musculus).fasta new file mode 100644 index 0000000..bfc4633 --- /dev/null +++ b/CURATED_SET/draft_seeds/H2A.B.2_(Mus_musculus).fasta @@ -0,0 +1,3 @@ +>Mus|NP_001268459.1|H2A.B.2_(Mus_musculus) organism=Mus musculus phylum=Chordata class=Mammalia +MPRNTENCLQRSSGHRQHHSRTSRGELIFAVSLVEQHLREVSRARRLSDMVPVSLVAILE +FLTSRLLELAGNEAQRRGTQRLITPQPLDLEVYSSMELSDVFQFITISQVAPAHR diff --git a/CURATED_SET/draft_seeds/H2A.B.3_(Mus_musculus).fasta b/CURATED_SET/draft_seeds/H2A.B.3_(Mus_musculus).fasta new file mode 100644 index 0000000..13a6349 --- /dev/null +++ b/CURATED_SET/draft_seeds/H2A.B.3_(Mus_musculus).fasta @@ -0,0 +1,3 @@ +>Mus|NP_001096135.1|H2A.B.3_(Mus_musculus) organism=Mus musculus phylum=Chordata class=Mammalia +MPRNRENCLRESSGRRHRRSRTSRAELIFAVSLVEQHLREISRAWRLSDMVPIFLAAILE +SLTRRLLELAGNEAQRRGTERRITPELLDLAVYSNMELSDVFQFITISQVAPAHR diff --git a/CURATED_SET/draft_seeds/H2A.B.fasta b/CURATED_SET/draft_seeds/H2A.B.fasta old mode 100755 new mode 100644 index 6466724..6b0e18a --- a/CURATED_SET/draft_seeds/H2A.B.fasta +++ b/CURATED_SET/draft_seeds/H2A.B.fasta @@ -1,64 +1,68 @@ ->H2A.B_Heterocephalus_glaber_EHB05905.1 EHB05905.1 histone: H2A variant: H2A.B organism: Heterocephalus glaber -MPRQA--LALT-------------------NERPPQGRAEPIFSVSQVERALCDGRYAQR -LSCSASVFLAATLQFLSATVLELADREARYRS-RRRITRELLDVATLKDALLCTLLGTTT -ISRVAPARP------- ->H2A.B_Sus_scrofa_XP_003135571.1 XP_003135571.1 histone: H2A variant: H2A.B organism: Sus scrofa -MPGKR--SRRKSPG--RQG-----------RTCARTTRAGLSASVSHMERLLREGPYAQC -LSSSARVFLAATIEYLTARVLELAGDEAQIVG-RRCITPELVAMAVHNNALLSAFFGTLA -ISQVAPTQE------- ->H2A.B_Bos_taurus_NP_001069373.1 NP_001069373.1 histone: H2A variant: H2A.B organism: Bos taurus -MPKKR--GHQRSSG-----------------IRSRTAQSELSFSVSHMEHLLRKGHYAQR +>Ailuropoda|XP_011215272.1|H2A.B organism=Ailuropoda melanoleuca phylum=Chordata class=Mammalia +MPGDR--SRRGSSSG--QR-----------RTRSRTARAELSFSVSHVERLLREGHYAQR +LGSSAPVFLAAVIQYLTAKVLELAGNEAQNSG-GRRITPQLVDMAVHNHALLSGFFGTTT +ISQVAPAWN------- +>Bos|NP_001069373.1|H2A.B organism=Bos taurus phylum=Chordata class=Mammalia +MPKKR--GHQRSSGI-----------------RSRTAQSELSFSVSHMEHLLRKGHYAQR LSSSAPVFLAAVIQDLTSKVLELAGNEAQKNG-EKRITPKLVDMAIHNNALLSSIFGMTT ISLVAPGPH------- ->H2A.B_Loxodonta_africana_XP_003421752.1 XP_003421752.1 histone: H2A variant: H2A.B organism: Loxodonta africana +>Callithrix|XP_002763866.2|H2A.B organism=Callithrix jacchus phylum=Chordata class=Mammalia +MSERR--SRRGSSAAGRRG-----------HTRSRTARAELIFSVSKMERGLWEGHYAQR +LSDNAPVYLAAVIQYLTAKILELAAKGADNRG-ERIITPRLLDMAVHNDGLLSTLFHAIT +ISQVGPGPN------- +>Cricetulus|XP_003514308.1|H2A.B organism=Cricetulus griseus phylum=Chordata class=Mammalia +MPRTRQSSRRGSSSR-----------------RSRTDRAELTFSVSLVEHHLRESGHARR +LSETVPILVTAILEFLTRRLLELASNEAQRLGAQRLITPEILDLTIYNNALLSEMFQFTT +ISQTAPAGPRRRRRQI +>Cricetulus|XP_003515491.1|H2A.B organism=Cricetulus griseus phylum=Chordata class=Mammalia +MPRTRQSSLRGSSSR-----------------RSRTDRAELTFSVSLVEHHLRESGHAPR +LSETVPILLTAILEFLTRRLLELASNEAQRLGAQRLITPEILDLTVYNNTLLSQLLQFTT +ISQTAPAGRRRRRRQT +>Heterocephalus|EHB05905.1|H2A.B organism=Heterocephalus glaber phylum=Chordata class=Mammalia +MPRQA----------LALT-----------NERPPQGRAEPIFSVSQVERALCDGRYAQR +LSCSASVFLAATLQFLSATVLELADREARYRS-RRRITRELLDVATLKDALLCTLLGTTT +ISRVAPARP------- +>Loxodonta|XP_003421752.1|H2A.B organism=Loxodonta africana phylum=Chordata class=Mammalia MAGKR--SRRGGGGGGGGGGGGGGGGGSSRRQRRTRSRTELIFSASHVAHLLREGHYAQR LSSSAPVFLAAILKCLTAKILELAGNEAQNSG-RRLVTPELVDMAVHNNALLSGFFLTTT ISQVAPAR-------- ->H2A.B_Ailuropoda_melanoleuca_XP_011215272.1 XP_011215272.1 histone: H2A variant: H2A.B organism: Ailuropoda melanoleuca -MPGDR--SRRGSSS--GQR-----------RTRSRTARAELSFSVSHVERLLREGHYAQR -LGSSAPVFLAAVIQYLTAKVLELAGNEAQNSG-GRRITPQLVDMAVHNHALLSGFFGTTT -ISQVAPAWN------- ->H2A.B_Callithrix_jacchus_XP_002763866.2 XP_002763866.2 histone: H2A variant: H2A.B organism: Callithrix jacchus -MSERR--SRRGSSAAGRRG-----------HTRSRTARAELIFSVSKMERGLWEGHYAQR -LSDNAPVYLAAVIQYLTAKILELAAKEADNRG-ERIITPRLLDMAVHNDGLLSTLFHAIT -ISQVGPGPN------- ->H2A.B_Macaca_mulatta_NP_001180843.1 NP_001180843.1 histone: H2A variant: H2A.B organism: Macaca mulatta +>Macaca|NP_001180843.1|H2A.B organism=Macaca mulatta phylum=Chordata class=Mammalia MSERR--SHRRSSRAGGRG-----------RTRSRTVRAELSFSVSQVERGLREGHYAQR LSPTAPVYLAAVIEYLTAKVLELAGNEAQNNG-ERNITPLLLDMAVHNNRLLSTLFDTTT ISQVAPGGD------- ->H2A.B_Nomascus_leucogenys_XP_003282204.1 XP_003282204.1 histone: H2A variant: H2A.B organism: Nomascus leucogenys +>Nomascus|XP_003282204.1|H2A.B organism=Nomascus leucogenys phylum=Chordata class=Mammalia MPRRR--SHRGSSGAGGRG-----------RTCSRTVRAELSFSVSQVERGLREGHYAQR LSRTAPVYLAAVIEYLTAKVLELAGNEAQNNG-ERNITPLLLDMVVHNNRLLSTLFHTTT ISRVAPGGD------- ->H2A.B_Pan_troglodytes_XP_001145032.1 XP_001145032.1 histone: H2A variant: H2A.B organism: Pan troglodytes +>Pan|XP_001145032.1|H2A.B organism=Pan troglodytes phylum=Chordata class=Mammalia MPRRR--RHRGSSGAGGRG-----------RTCSRTVRAELSFSVSQVERSLREGQYAQR LSRTAPVYLAAVIEYLTAKVLELAGNEAQNSG-ARNITPLLLDMVVHNDRLLSTLFNTTT ISQVAPGED------- ->H2A.B_Homo_sapiens_NP_001017990.1 NP_001017990.1 histone: H2A variant: H2A.B organism: Homo sapiens +>Sus|XP_003135571.1|H2A.B organism=Sus scrofa phylum=Chordata class=Mammalia +MPGKR--SRRKSPGR--QG-----------RTCARTTRAGLSASVSHMERLLREGPYAQC +LSSSARVFLAATIEYLTARVLELAGDEAQIVG-RRCITPELVAMAVHNNALLSAFFGTLA +ISQVAPTQE------- +>Homo|NP_001017990.1|H2A.B.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia MPRRR--RRRGSSGAGGRG-----------RTCSRTVRAELSFSVSQVERSLREGHYAQR LSRTAPVYLAAVIEYLTAKVPELAGNEAQNSG-ERNITPLLLDMVVHNDRLLSTLFNTTT ISQVAPGED------- ->H2A.B_Homo_sapiens_NP_001017991.1 NP_001017991.1 histone: H2A variant: H2A.B organism: Homo sapiens +>Homo|NP_001017991.1|H2A.B.2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia MPRRR--RRRGSSGAGGRG-----------RTCSRTVRAELSFSVSQVERSLREGHYAQR LSRTAPVYLAAVIEYLTAKVLELAGNEAQNSG-ERNITPLLLDMVVHNDRLLSTLFNTTT ISQVAPGED------- ->H2A.B_Homo_sapiens_NP_542451.1 NP_542451.1 histone: H2A variant: H2A.B organism: Homo sapiens +>Homo|NP_542451.1|H2A.B.2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia MPRRR--RRRGSSGAGGRG-----------RTCSRTVRAELSFSVSQVERSLREGHYAQR LSRTAPVYLAAVIEYLTAKVLELAGNEAQNSG-ERNITPLLLDMVVHNDRLLSTLFNTTT ISQVAPGED------- ->H2A.B_Cricetulus_griseus_XP_003514308.1 XP_003514308.1 histone: H2A variant: H2A.B organism: Cricetulus griseus -MPRTRQSSRRGSS-----------------SRRSRTDRAELTFSVSLVEHHLRESGHARR -LSETVPILVTAILEFLTRRLLELASNEAQRLGAQRLITPEILDLTIYNNALLSEMFQFTT -ISQTAPAGPRRRRRQI ->H2A.B_Cricetulus_griseus_XP_003515491.1 XP_003515491.1 histone: H2A variant: H2A.B organism: Cricetulus griseus -MPRTRQSSLRGSS-----------------SRRSRTDRAELTFSVSLVEHHLRESGHAPR -LSETVPILLTAILEFLTRRLLELASNEAQRLGAQRLITPEILDLTVYNNTLLSQLLQFTT -ISQTAPAGRRRRRRQT ->H2A.B_Mus_musculus_NP_001268459.1 NP_001268459.1 histone: H2A variant: H2A.B organism: Mus musculus -MPRNTENCLQRSSG---HR-----------QHHSRTSRGELIFAVSLVEQHLREVSRARR +>Mus|NP_001268460.1|H2A.B.1_(Mus_musculus) organism=Mus musculus phylum=Chordata class=Mammalia +MPRNRENCLRESSGRR--------------HRRSRTSRAELIFAVSLVEQHLREVSRARR +LSDTVPIFLAAILESLTRRLLELAGNEAQRRGTERRITPELLDLAVYSNMELSDVFQFIT +ISQVAPAHR------- +>Mus|NP_001268459.1|H2A.B.2_(Mus_musculus) organism=Mus musculus phylum=Chordata class=Mammalia +MPRNTENCLQRSSGHR--------------QHHSRTSRGELIFAVSLVEQHLREVSRARR LSDMVPVSLVAILEFLTSRLLELAGNEAQRRGTQRLITPQPLDLEVYSSMELSDVFQFIT ISQVAPAHR------- ->H2A.B_Mus_musculus_NP_001268460.1 NP_001268460.1 histone: H2A variant: H2A.B organism: Mus musculus -MPRNRENCLRESSG---RR-----------HRRSRTSRAELIFAVSLVEQHLREVSRARR -LSDTVPIFLAAILESLTRRLLELAGNEAQRRGTERRITPELLDLAVYSNMELSDVFQFIT +>Mus|NP_001096135.1|H2A.B.3_(Mus_musculus) organism=Mus musculus phylum=Chordata class=Mammalia +MPRNRENCLRESSGRR--------------HRRSRTSRAELIFAVSLVEQHLREISRAWR +LSDMVPIFLAAILESLTRRLLELAGNEAQRRGTERRITPELLDLAVYSNMELSDVFQFIT ISQVAPAHR------- diff --git a/CURATED_SET/draft_seeds/H2A.B_(Homo_sapiens).fasta b/CURATED_SET/draft_seeds/H2A.B_(Homo_sapiens).fasta new file mode 100644 index 0000000..eb1e856 --- /dev/null +++ b/CURATED_SET/draft_seeds/H2A.B_(Homo_sapiens).fasta @@ -0,0 +1,9 @@ +>Homo|NP_001017990.1|H2A.B.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPRRRRRRGSSGAGGRGRTCSRTVRAELSFSVSQVERSLREGHYAQRLSRTAPVYLAAVI +EYLTAKVPELAGNEAQNSGERNITPLLLDMVVHNDRLLSTLFNTTTISQVAPGED +>Homo|NP_001017991.1|H2A.B.2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPRRRRRRGSSGAGGRGRTCSRTVRAELSFSVSQVERSLREGHYAQRLSRTAPVYLAAVI +EYLTAKVLELAGNEAQNSGERNITPLLLDMVVHNDRLLSTLFNTTTISQVAPGED +>Homo|NP_542451.1|H2A.B.2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPRRRRRRGSSGAGGRGRTCSRTVRAELSFSVSQVERSLREGHYAQRLSRTAPVYLAAVI +EYLTAKVLELAGNEAQNSGERNITPLLLDMVVHNDRLLSTLFNTTTISQVAPGED diff --git a/CURATED_SET/draft_seeds/H2A.B_(Homo_sapiens)_only.fasta b/CURATED_SET/draft_seeds/H2A.B_(Homo_sapiens)_only.fasta new file mode 100644 index 0000000..e69de29 diff --git a/CURATED_SET/draft_seeds/H2A.B_(Mus_musculus).fasta b/CURATED_SET/draft_seeds/H2A.B_(Mus_musculus).fasta new file mode 100644 index 0000000..1bbb11f --- /dev/null +++ b/CURATED_SET/draft_seeds/H2A.B_(Mus_musculus).fasta @@ -0,0 +1,9 @@ +>Mus|NP_001268460.1|H2A.B.1_(Mus_musculus) organism=Mus musculus phylum=Chordata class=Mammalia +MPRNRENCLRESSGRRHRRSRTSRAELIFAVSLVEQHLREVSRARRLSDTVPIFLAAILE +SLTRRLLELAGNEAQRRGTERRITPELLDLAVYSNMELSDVFQFITISQVAPAHR +>Mus|NP_001268459.1|H2A.B.2_(Mus_musculus) organism=Mus musculus phylum=Chordata class=Mammalia +MPRNTENCLQRSSGHRQHHSRTSRGELIFAVSLVEQHLREVSRARRLSDMVPVSLVAILE +FLTSRLLELAGNEAQRRGTQRLITPQPLDLEVYSSMELSDVFQFITISQVAPAHR +>Mus|NP_001096135.1|H2A.B.3_(Mus_musculus) organism=Mus musculus phylum=Chordata class=Mammalia +MPRNRENCLRESSGRRHRRSRTSRAELIFAVSLVEQHLREISRAWRLSDMVPIFLAAILE +SLTRRLLELAGNEAQRRGTERRITPELLDLAVYSNMELSDVFQFITISQVAPAHR diff --git a/CURATED_SET/draft_seeds/H2A.B_(Mus_musculus)_only.fasta b/CURATED_SET/draft_seeds/H2A.B_(Mus_musculus)_only.fasta new file mode 100644 index 0000000..e69de29 diff --git a/CURATED_SET/draft_seeds/H2A.B_only.fasta b/CURATED_SET/draft_seeds/H2A.B_only.fasta new file mode 100644 index 0000000..ad20664 --- /dev/null +++ b/CURATED_SET/draft_seeds/H2A.B_only.fasta @@ -0,0 +1,44 @@ +>Ailuropoda|XP_011215272.1|H2A.B organism=Ailuropoda melanoleuca phylum=Chordata class=Mammalia +MPGDR--SRRGSSSG--QR-----------RTRSRTARAELSFSVSHVERLLREGHYAQR +LGSSAPVFLAAVIQYLTAKVLELAGNEAQNSG-GRRITPQLVDMAVHNHALLSGFFGTTT +ISQVAPAWN------- +>Bos|NP_001069373.1|H2A.B organism=Bos taurus phylum=Chordata class=Mammalia +MPKKR--GHQRSSGI-----------------RSRTAQSELSFSVSHMEHLLRKGHYAQR +LSSSAPVFLAAVIQDLTSKVLELAGNEAQKNG-EKRITPKLVDMAIHNNALLSSIFGMTT +ISLVAPGPH------- +>Callithrix|XP_002763866.2|H2A.B organism=Callithrix jacchus phylum=Chordata class=Mammalia +MSERR--SRRGSSAAGRRG-----------HTRSRTARAELIFSVSKMERGLWEGHYAQR +LSDNAPVYLAAVIQYLTAKILELAAKGADNRG-ERIITPRLLDMAVHNDGLLSTLFHAIT +ISQVGPGPN------- +>Cricetulus|XP_003514308.1|H2A.B organism=Cricetulus griseus phylum=Chordata class=Mammalia +MPRTRQSSRRGSSSR-----------------RSRTDRAELTFSVSLVEHHLRESGHARR +LSETVPILVTAILEFLTRRLLELASNEAQRLGAQRLITPEILDLTIYNNALLSEMFQFTT +ISQTAPAGPRRRRRQI +>Cricetulus|XP_003515491.1|H2A.B organism=Cricetulus griseus phylum=Chordata class=Mammalia +MPRTRQSSLRGSSSR-----------------RSRTDRAELTFSVSLVEHHLRESGHAPR +LSETVPILLTAILEFLTRRLLELASNEAQRLGAQRLITPEILDLTVYNNTLLSQLLQFTT +ISQTAPAGRRRRRRQT +>Heterocephalus|EHB05905.1|H2A.B organism=Heterocephalus glaber phylum=Chordata class=Mammalia +MPRQA----------LALT-----------NERPPQGRAEPIFSVSQVERALCDGRYAQR +LSCSASVFLAATLQFLSATVLELADREARYRS-RRRITRELLDVATLKDALLCTLLGTTT +ISRVAPARP------- +>Loxodonta|XP_003421752.1|H2A.B organism=Loxodonta africana phylum=Chordata class=Mammalia +MAGKR--SRRGGGGGGGGGGGGGGGGGSSRRQRRTRSRTELIFSASHVAHLLREGHYAQR +LSSSAPVFLAAILKCLTAKILELAGNEAQNSG-RRLVTPELVDMAVHNNALLSGFFLTTT +ISQVAPAR-------- +>Macaca|NP_001180843.1|H2A.B organism=Macaca mulatta phylum=Chordata class=Mammalia +MSERR--SHRRSSRAGGRG-----------RTRSRTVRAELSFSVSQVERGLREGHYAQR +LSPTAPVYLAAVIEYLTAKVLELAGNEAQNNG-ERNITPLLLDMAVHNNRLLSTLFDTTT +ISQVAPGGD------- +>Nomascus|XP_003282204.1|H2A.B organism=Nomascus leucogenys phylum=Chordata class=Mammalia +MPRRR--SHRGSSGAGGRG-----------RTCSRTVRAELSFSVSQVERGLREGHYAQR +LSRTAPVYLAAVIEYLTAKVLELAGNEAQNNG-ERNITPLLLDMVVHNNRLLSTLFHTTT +ISRVAPGGD------- +>Pan|XP_001145032.1|H2A.B organism=Pan troglodytes phylum=Chordata class=Mammalia +MPRRR--RHRGSSGAGGRG-----------RTCSRTVRAELSFSVSQVERSLREGQYAQR +LSRTAPVYLAAVIEYLTAKVLELAGNEAQNSG-ARNITPLLLDMVVHNDRLLSTLFNTTT +ISQVAPGED------- +>Sus|XP_003135571.1|H2A.B organism=Sus scrofa phylum=Chordata class=Mammalia +MPGKR--SRRKSPGR--QG-----------RTCARTTRAGLSASVSHMERLLREGPYAQC +LSSSARVFLAATIEYLTARVLELAGDEAQIVG-RRCITPELVAMAVHNNALLSAFFGTLA +ISQVAPTQE------- diff --git a/CURATED_SET/draft_seeds/H2A.J.fasta b/CURATED_SET/draft_seeds/H2A.J.fasta new file mode 100644 index 0000000..d6dd6d7 --- /dev/null +++ b/CURATED_SET/draft_seeds/H2A.J.fasta @@ -0,0 +1,4 @@ +>Homo|NP_808760.1|H2A.J_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKQGGKVRAKAKSRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGKVTIAQGGVLPNIQAVLLPKK +TESQKTKSK diff --git a/CURATED_SET/draft_seeds/H2A.J_(Homo_sapiens).fasta b/CURATED_SET/draft_seeds/H2A.J_(Homo_sapiens).fasta new file mode 100644 index 0000000..d6dd6d7 --- /dev/null +++ b/CURATED_SET/draft_seeds/H2A.J_(Homo_sapiens).fasta @@ -0,0 +1,4 @@ +>Homo|NP_808760.1|H2A.J_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKQGGKVRAKAKSRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGKVTIAQGGVLPNIQAVLLPKK +TESQKTKSK diff --git a/CURATED_SET/draft_seeds/H2A.J_only.fasta b/CURATED_SET/draft_seeds/H2A.J_only.fasta new file mode 100644 index 0000000..e69de29 diff --git a/CURATED_SET/draft_seeds/H2A.L.1_(Homo_sapiens).fasta b/CURATED_SET/draft_seeds/H2A.L.1_(Homo_sapiens).fasta new file mode 100644 index 0000000..8a37ebb --- /dev/null +++ b/CURATED_SET/draft_seeds/H2A.L.1_(Homo_sapiens).fasta @@ -0,0 +1,5 @@ +>Homo|HISTDB_H2A_L_0|H2A.L.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MAGNKHSRSSCKPRRQCLSRSRRAELQFPVSHMERCLREGQYARHLSSTTPVFLAAVLEY +LTANILEQAGKEAQNSHRVCITPEHLKRALQKNEQLRWILEEEDDIHSQEEEMPQPEEEE +EEDERMEEEEEEKKEEEEKEEEEDERMEEEEEEKKEEEEKKEEKEKEEEKEKKKKKGGFL +SFRAVQDFISNLFQLLKFP diff --git a/CURATED_SET/draft_seeds/H2A.L.1_(Mus_musculus).fasta b/CURATED_SET/draft_seeds/H2A.L.1_(Mus_musculus).fasta new file mode 100644 index 0000000..996c31d --- /dev/null +++ b/CURATED_SET/draft_seeds/H2A.L.1_(Mus_musculus).fasta @@ -0,0 +1,3 @@ +>Mus|NP_083864.1|H2A.L.1_(Mus_musculus) organism=Mus musculus phylum=Chordata class=Mammalia +MAKKMQRRRRQKRTRSQRGELPLSLVDRFLREEFHSSRLSSSALSFLTSVLEYLTSNILE +LAGEVAHTTGRKRVTPEDVRLVVQNNEQLRQLFKPGGTSVNEDDN diff --git a/CURATED_SET/draft_seeds/H2A.L.2_(Mus_musculus).fasta b/CURATED_SET/draft_seeds/H2A.L.2_(Mus_musculus).fasta new file mode 100644 index 0000000..34e332c --- /dev/null +++ b/CURATED_SET/draft_seeds/H2A.L.2_(Mus_musculus).fasta @@ -0,0 +1,3 @@ +>Mus|NP_080903.1|H2A.L.2_(Mus_musculus) organism=Mus musculus phylum=Chordata class=Mammalia +MARKRQRRRRRKVTRSQRAELQFPVSRVDRFLREGNYSRRLSSSAPVFLAGVLEYLTSNI +LELAGEVAHTTGRKRIAPEHVCRVVQNNEQLHQLFKQGGTSVFEPPEPDDN diff --git a/CURATED_SET/draft_seeds/H2A.L.3_(Homo_sapiens).fasta b/CURATED_SET/draft_seeds/H2A.L.3_(Homo_sapiens).fasta new file mode 100644 index 0000000..62256b5 --- /dev/null +++ b/CURATED_SET/draft_seeds/H2A.L.3_(Homo_sapiens).fasta @@ -0,0 +1,4 @@ +>Homo|HISTDB_H2A_L_1|H2A.L.3_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MAGNKMFCRPRRQRLSHSRRAELQFPVSHLERCLRESQHARHLSSTTPVFLAGVLEYLTA +NILEKVGKEVKNSCRLCITPEHVKRALQKDEQLRWILELEDDTHSQVEEMPQSEEEEEEE +EEKEEEMVVLVVMGGRRRRRRRRRRKDS diff --git a/CURATED_SET/draft_seeds/H2A.L.3_(Mus_musculus).fasta b/CURATED_SET/draft_seeds/H2A.L.3_(Mus_musculus).fasta new file mode 100644 index 0000000..561dcfe --- /dev/null +++ b/CURATED_SET/draft_seeds/H2A.L.3_(Mus_musculus).fasta @@ -0,0 +1,4 @@ +>Mus|Q9D4U4|H2A.L.3_(Mus_musculus) organism=Mus musculus phylum=Chordata class=Mammalia +MEDKRQKDSVAPSSGAKLQFPVSEAEHLLQERNLSKCLNSSTPVLFTDMLNYVTSSILEL +TVKDRDSHTSCNKLIAPEQKSKPTDNIDELCQLFKDSQYMADETPGCYKTPRSNKITGLY +EAPRPGPK diff --git a/CURATED_SET/draft_seeds/H2A.L.fasta b/CURATED_SET/draft_seeds/H2A.L.fasta new file mode 100644 index 0000000..7116456 --- /dev/null +++ b/CURATED_SET/draft_seeds/H2A.L.fasta @@ -0,0 +1,120 @@ +>Ailuropoda|XP_002927211.2|H2A.L organism=Ailuropoda melanoleuca phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSGRRSPWHSHQHKRHGLSRSMRAELQFPVSRVDRLLREGCYAQRLSSSTPVF +LTGVLEYLTANILELA--GQEARNHHKMRITPEHVQRALVNNQHLSCLF--EDITS---- +-------------PP---------AKGTPQLRKC-------------------------- +------------------------------------------------------ +>Bos|NP_001071426.1|H2A.L organism=Bos taurus phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSPGRHLWNCRRSRRRSLSRSTRAELQFPVSRVDRLLREGQFANRLSSATPVF +LTGILEYLIANILDLA--GKEACTNHRVRISPEHVQTALVNNENLRCLF--QPGAF---- +-------------SQ---------PAASPPAPEN-------------------------- +------------------------------------------------------ +>Bos|XP_875023.2|H2A.L organism=Bos taurus phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSPRRHLWNCRRSRRHSLSRSTRAELQFPVSRVDRLLREGQGAYRLSSATPVF +LTAVLEYLIANILDLA--GKEACTNHRVRISPEHVQTALINNENLRRLF--QPGAF---- +-------------SQ---------PTASPHLPEN-------------------------- +------------------------------------------------------ +>Canis|XP_548938.1|H2A.L organism=Canis lupus familiaris phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSGERGPGHSRRPRRHGLSRSRRAELQFPVSRVDRLLREGHYAHRLSSSTPVF +LAGILEYLTSNILELA--GQEARNSHKMRITPEHLQKALGNNQYLSQLF--EENTY---- +-------------SQ---------GDGMVQARKWSGPGTGADSRI--------------- +------------------------------------------------------ +>Canis|XP_548947.1|H2A.L organism=Canis lupus familiaris phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSGERGPGHSRRPRRHGLSRSRRAELQFPVSRVDRLLREGCYAHRLSSSTPVF +LTGILEYLTSNILELV--GQEACNSHKMRITPEHMQKALGNNQYLSQLF--EENTY---- +-------------SQ---------GDGMVQARKWSGPGTGADSRI--------------- +------------------------------------------------------ +>Cavia|XP_003469395.1|H2A.L organism=Cavia porcellus phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MAAKKCKGISSKPRKHPVSRSTRAQLQFPVSRVERYLRENGYL-RLSACTPVF +LAGILEYLTASALHLA--ARVAHRRHKKRISPEHLARALEKSEQLRQVF--GDSTK---- +-------------AL---------LDEIIQAKKK-------------------------- +------------------------------------------------------ +>Cavia|XP_003469399.1|H2A.L organism=Cavia porcellus phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MDGKKSNEKSSQLRKHPVSCSRRAELQLPVSRMERYLRENSYAPHLPFSTPVF +LEGVLEYLTASILDLA--RKEARGKRKKHILPQHLETAAENNQQLGLRF--GDSRK---- +-------------SM---------LDEMTQNKKK-------------------------- +------------------------------------------------------ +>Cricetulus|XP_003508207.1|H2A.L organism=Cricetulus griseus phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MEEKK--------RKETISRITRGQLQFSLDRIERFFRDGNFSQRLSASAPVF +LAGVLEFLTSNILDLA--GREAHANGTRLITPEHVTQVVQNNDQLREVF--KEHED---- +-------------PV---------VSETPEPEKN-------------------------- +------------------------------------------------------ +>Heterocephalus|EHB04253.1|H2A.L organism=Heterocephalus glaber phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MAGKKRRQNSSKPRKQAVSRSTRAELQFPVSRVEHYLREGGYAHRLASSTPVF +LAGVLEYLRANILDLA--GKEAQGKRKKCITPQHLETAMENNQHLRPLF--QDDPK---- +-------------SL---------LDETSQPNPRRSDEAWVPEHGKTFSTSSSSPQNTHR +------------------------------------------------------ +>Heterocephalus|EHB10563.1|H2A.L organism=Heterocephalus glaber phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MAGKKRRQNSSKPRKQAVSRSTRAELQFPVSRVEHYLREGGYAHRLGSSTPVF +LAGVLKYLRANILDLA--GKEAQGKRKKCIAPQHLETAMENNQHLRPLF--QDGPK---- +-------------SL---------LDETSQPKKK-------------------------- +------------------------------------------------------ +>Heterocephalus|EHB17227.1|H2A.L organism=Heterocephalus glaber phylum=Chordata class=Mammalia +MDIHRQRPNQSPVDTNDITKPRLSQRQLLTSEKHRETSGWAQALLFGEAQARREIKLGQL +KPNTASIMAGKKRRQNSSKPRKQAVSRSTRAELQFPVSRVERYLREGGYAQRLGSSTPVF +LAGVLEYLTANILDLA--GKEAEGNCKKRITPQHLETVMENNQQLRALF--QGDTK---- +-------------SL---------LDETSQLKKKC------------------------- +------------------------------------------------------ +>Oryctolagus|XP_002719866.1|H2A.L organism=Oryctolagus cuniculus phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MRGKKPSKKAGKRRKPNVSRSTRAELQFPVSRVDRHLHHDRYAQRLSSSTPVF +LAGVLEYLTSNILELA--GEEAHKNSRIRITPEHMRKAIESSEHLRDLL--EEDPK---- +-------------PR---------DEDVAQPEEKE------------------------- +------------------------------------------------------ +>Oryctolagus|XP_002720052.1|H2A.L organism=Oryctolagus cuniculus phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MRGKKPSKKAGKRRKPNVSRSTRAELQFPVSRVDRHLHHDRYAQRLSSSTPVF +LAGVLEYLTSNILELA--GEEAHKNSRVRITPEHMRKAIESSEHLRDLL--EEDPK---- +-------------PR---------DEDVAQPEEKE------------------------- +------------------------------------------------------ +>Sus|XP_003360303.2|H2A.L organism=Sus scrofa phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSRKRNLPQCNRRKKHALSRSSRAELQFPVSRVDRYLREGRYAQRLSSQAPVF +LAGVLEYLTANILELA--ASEARSNNKMRIAPEHVQRAASHNQTLSSLF--QASSV---- +-------------SRGAEE-----GAEEPLPEAGR------------------------- +------------------------------------------------------ +>Sus|XP_003484147.1|H2A.L organism=Sus scrofa phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSRKRNLPQCSHRKKHALSCSSRAELQFPMSSLDCVLPEGQYAQRLSSYTPVF +LAGVLEHLMAHILELA--AREARSSRKVRITPEHVQRALNNNETLSRLF--QASSV---- +-------------SRGAEEGAEEPGAEEPLPEAGQ------------------------- +------------------------------------------------------ +>Homo|HISTDB_H2A_L_0|H2A.L.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MAGNKHSRSSCKPRRQCLSRSRRAELQFPVSHMERCLREGQYARHLSSTTPVF +LAAVLEYLTANILEQA--GKEAQNSHRVCITPEHLKRALQKNEQLRWILEEEDDIH---- +-------------SQ---------EEEMPQPEEEEEEDERMEEEEEEKKEEEEKEEEEDE +RMEEEEEEKKEEEEKKEEKEKEEEKEKKKKKGGFLSFRAVQDFISNLFQLLKFP +>Homo|HISTDB_H2A_L_1|H2A.L.3_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MAGNK---MFCRPRRQRLSHSRRAELQFPVSHLERCLRESQHARHLSSTTPVF +LAGVLEYLTANILEKV--GKEVKNSCRLCITPEHVKRALQKDEQLRWILELEDDTH---- +-------------SQ---------VEEMPQSEEEEE-------------EEEEKEEEMVV +LVVMGGRRRR--------------RRRRRRKDS--------------------- +>Mus|NP_083864.1|H2A.L.1_(Mus_musculus) organism=Mus musculus phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MAKKM-----QRRRRQKRTRSQRGEL--PLSLVDRFLREEFHSSRLSSSALSF +LTSVLEYLTSNILELA--GEVAHTTGRKRVTPEDVRLVVQNNEQLRQLF--K-------- +-PGG---------TS---------VNE----DDN-------------------------- +------------------------------------------------------ +>Mus|NP_080903.1|H2A.L.2_(Mus_musculus) organism=Mus musculus phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MARKR-----QRRRRRKVTRSQRAELQFPVSRVDRFLREGNYSRRLSSSAPVF +LAGVLEYLTSNILELA--GEVAHTTGRKRIAPEHVCRVVQNNEQLHQLF--K-------- +-QGG---------TS---------VFEPPEPDDN-------------------------- +------------------------------------------------------ +>Mus|Q9D4U4|H2A.L.3_(Mus_musculus) organism=Mus musculus phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------ME--------DKRQKDSVAPSSGAKLQFPVSEAEHLLQERNLSKCLNSSTPVL +FTDMLNYVTSSILELTVKDRDSHTSCNKLIAPEQKSKPTDNIDELCQLF--KDSQYMADE +TPGCYKTPRSNKITG---------LYEAPRPGPK-------------------------- +------------------------------------------------------ diff --git a/CURATED_SET/draft_seeds/H2A.L_(Homo_sapiens).fasta b/CURATED_SET/draft_seeds/H2A.L_(Homo_sapiens).fasta new file mode 100644 index 0000000..18a3f2a --- /dev/null +++ b/CURATED_SET/draft_seeds/H2A.L_(Homo_sapiens).fasta @@ -0,0 +1,10 @@ +>Homo|HISTDB_H2A_L_0|H2A.L.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MAGNKHSRSSCKPRRQCLSRSRRAELQFPVSHMERCLREGQYARHLSSTTPVFLAAVLEY +LTANILEQAGKEAQNSHRVCITPEHLKRALQKNEQLRWILEEEDDIHSQEEEMPQPEEEE +EEDERMEEEEEEKKEEEEKEEEEDERMEEEEEEKKEEEEKKEEKEKEEEKEKKKKKGGFL +SFRAVQDFISNLFQLLKFP +>Homo|HISTDB_H2A_L_1|H2A.L.3_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MAGNK---MFCRPRRQRLSHSRRAELQFPVSHLERCLRESQHARHLSSTTPVFLAGVLEY +LTANILEKVGKEVKNSCRLCITPEHVKRALQKDEQLRWILELEDDTHSQVEEMPQSEEEE +E-------------EEEEKEEEMVVLVVMGGRRRR--------------RRRRRRKDS-- +------------------- diff --git a/CURATED_SET/draft_seeds/H2A.L_(Homo_sapiens)_only.fasta b/CURATED_SET/draft_seeds/H2A.L_(Homo_sapiens)_only.fasta new file mode 100644 index 0000000..e69de29 diff --git a/CURATED_SET/draft_seeds/H2A.L_(Mus_musculus).fasta b/CURATED_SET/draft_seeds/H2A.L_(Mus_musculus).fasta new file mode 100644 index 0000000..1959aef --- /dev/null +++ b/CURATED_SET/draft_seeds/H2A.L_(Mus_musculus).fasta @@ -0,0 +1,12 @@ +>Mus|NP_083864.1|H2A.L.1_(Mus_musculus) organism=Mus musculus phylum=Chordata class=Mammalia +MAKKMQRRRRQKRTRSQRGEL--PLSLVDRFLREEFHSSRLSSSALSFLTSVLEYLTSNI +LELA--GEVAHTTGRKRVTPEDVRLVVQNNEQLRQLFK---------PGG---------T +SVNE----DDN +>Mus|NP_080903.1|H2A.L.2_(Mus_musculus) organism=Mus musculus phylum=Chordata class=Mammalia +MARKRQRRRRRKVTRSQRAELQFPVSRVDRFLREGNYSRRLSSSAPVFLAGVLEYLTSNI +LELA--GEVAHTTGRKRIAPEHVCRVVQNNEQLHQLFK---------QGG---------T +SVFEPPEPDDN +>Mus|Q9D4U4|H2A.L.3_(Mus_musculus) organism=Mus musculus phylum=Chordata class=Mammalia +ME---DKRQKDSVAPSSGAKLQFPVSEAEHLLQERNLSKCLNSSTPVLFTDMLNYVTSSI +LELTVKDRDSHTSCNKLIAPEQKSKPTDNIDELCQLFKDSQYMADETPGCYKTPRSNKIT +GLYEAPRPGPK diff --git a/CURATED_SET/draft_seeds/H2A.L_(Mus_musculus)_only.fasta b/CURATED_SET/draft_seeds/H2A.L_(Mus_musculus)_only.fasta new file mode 100644 index 0000000..e69de29 diff --git a/CURATED_SET/draft_seeds/H2A.L_only.fasta b/CURATED_SET/draft_seeds/H2A.L_only.fasta new file mode 100644 index 0000000..76f7003 --- /dev/null +++ b/CURATED_SET/draft_seeds/H2A.L_only.fasta @@ -0,0 +1,75 @@ +>Ailuropoda|XP_002927211.2|H2A.L organism=Ailuropoda melanoleuca phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSGRRSPWHSHQHKRHGLSRSMRAELQFPVSRVDRLLREGCYAQRLSSSTPVF +LTGVLEYLTANILELAGQEARNHHKMRITPEHVQRALVNNQHLSCLFEDITSPP------ +---AKGTPQLRKC-------------------------- +>Bos|NP_001071426.1|H2A.L organism=Bos taurus phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSPGRHLWNCRRSRRRSLSRSTRAELQFPVSRVDRLLREGQFANRLSSATPVF +LTGILEYLIANILDLAGKEACTNHRVRISPEHVQTALVNNENLRCLFQPGAFSQ------ +---PAASPPAPEN-------------------------- +>Bos|XP_875023.2|H2A.L organism=Bos taurus phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSPRRHLWNCRRSRRHSLSRSTRAELQFPVSRVDRLLREGQGAYRLSSATPVF +LTAVLEYLIANILDLAGKEACTNHRVRISPEHVQTALINNENLRRLFQPGAFSQ------ +---PTASPHLPEN-------------------------- +>Canis|XP_548938.1|H2A.L organism=Canis lupus familiaris phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSGERGPGHSRRPRRHGLSRSRRAELQFPVSRVDRLLREGHYAHRLSSSTPVF +LAGILEYLTSNILELAGQEARNSHKMRITPEHLQKALGNNQYLSQLFEENTYSQ------ +---GDGMVQARKWSGPGTGADSRI--------------- +>Canis|XP_548947.1|H2A.L organism=Canis lupus familiaris phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSGERGPGHSRRPRRHGLSRSRRAELQFPVSRVDRLLREGCYAHRLSSSTPVF +LTGILEYLTSNILELVGQEACNSHKMRITPEHMQKALGNNQYLSQLFEENTYSQ------ +---GDGMVQARKWSGPGTGADSRI--------------- +>Cavia|XP_003469395.1|H2A.L organism=Cavia porcellus phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MAAKKCKGISSKPRKHPVSRSTRAQLQFPVSRVERYLRENGYL-RLSACTPVF +LAGILEYLTASALHLAARVAHRRHKKRISPEHLARALEKSEQLRQVFGDSTKAL------ +---LDEIIQAKKK-------------------------- +>Cavia|XP_003469399.1|H2A.L organism=Cavia porcellus phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MDGKKSNEKSSQLRKHPVSCSRRAELQLPVSRMERYLRENSYAPHLPFSTPVF +LEGVLEYLTASILDLARKEARGKRKKHILPQHLETAAENNQQLGLRFGDSRKSM------ +---LDEMTQNKKK-------------------------- +>Cricetulus|XP_003508207.1|H2A.L organism=Cricetulus griseus phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MEEKK--------RKETISRITRGQLQFSLDRIERFFRDGNFSQRLSASAPVF +LAGVLEFLTSNILDLAGREAHANGTRLITPEHVTQVVQNNDQLREVFKEHEDPV------ +---VSETPEPEKN-------------------------- +>Heterocephalus|EHB04253.1|H2A.L organism=Heterocephalus glaber phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MAGKKRRQNSSKPRKQAVSRSTRAELQFPVSRVEHYLREGGYAHRLASSTPVF +LAGVLEYLRANILDLAGKEAQGKRKKCITPQHLETAMENNQHLRPLFQDDPKSL------ +---LDETSQPNPRRSDEAWVPEHGKTFSTSSSSPQNTHR +>Heterocephalus|EHB10563.1|H2A.L organism=Heterocephalus glaber phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MAGKKRRQNSSKPRKQAVSRSTRAELQFPVSRVEHYLREGGYAHRLGSSTPVF +LAGVLKYLRANILDLAGKEAQGKRKKCIAPQHLETAMENNQHLRPLFQDGPKSL------ +---LDETSQPKKK-------------------------- +>Heterocephalus|EHB17227.1|H2A.L organism=Heterocephalus glaber phylum=Chordata class=Mammalia +MDIHRQRPNQSPVDTNDITKPRLSQRQLLTSEKHRETSGWAQALLFGEAQARREIKLGQL +KPNTASIMAGKKRRQNSSKPRKQAVSRSTRAELQFPVSRVERYLREGGYAQRLGSSTPVF +LAGVLEYLTANILDLAGKEAEGNCKKRITPQHLETVMENNQQLRALFQGDTKSL------ +---LDETSQLKKKC------------------------- +>Oryctolagus|XP_002719866.1|H2A.L organism=Oryctolagus cuniculus phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MRGKKPSKKAGKRRKPNVSRSTRAELQFPVSRVDRHLHHDRYAQRLSSSTPVF +LAGVLEYLTSNILELAGEEAHKNSRIRITPEHMRKAIESSEHLRDLLEEDPKPR------ +---DEDVAQPEEKE------------------------- +>Oryctolagus|XP_002720052.1|H2A.L organism=Oryctolagus cuniculus phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MRGKKPSKKAGKRRKPNVSRSTRAELQFPVSRVDRHLHHDRYAQRLSSSTPVF +LAGVLEYLTSNILELAGEEAHKNSRVRITPEHMRKAIESSEHLRDLLEEDPKPR------ +---DEDVAQPEEKE------------------------- +>Sus|XP_003360303.2|H2A.L organism=Sus scrofa phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSRKRNLPQCNRRKKHALSRSSRAELQFPVSRVDRYLREGRYAQRLSSQAPVF +LAGVLEYLTANILELAASEARSNNKMRIAPEHVQRAASHNQTLSSLFQASSVSRGAEE-- +---GAEEPLPEAGR------------------------- +>Sus|XP_003484147.1|H2A.L organism=Sus scrofa phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSRKRNLPQCSHRKKHALSCSSRAELQFPMSSLDCVLPEGQYAQRLSSYTPVF +LAGVLEHLMAHILELAAREARSSRKVRITPEHVQRALNNNETLSRLFQASSVSRGAEEGA +EEPGAEEPLPEAGQ------------------------- diff --git a/CURATED_SET/draft_seeds/H2A.M.fasta b/CURATED_SET/draft_seeds/H2A.M.fasta new file mode 100644 index 0000000..c99fa14 --- /dev/null +++ b/CURATED_SET/draft_seeds/H2A.M.fasta @@ -0,0 +1,28 @@ +>Physcomitrium|XP_024376580.1|H2A.M organism=Physcomitrium patens phylum=Streptophyta class=Bryopsida +--MSGRGKGAGAAARKKSVSRSAKAGLQFPVGRLGRYLKKGRYARRVGSGAPVYLAAVLE +YLAAEVLELAGNASRDNKKSRIIPRHIQLAIRNDEELGKLLSGVTIAYGGVLPNIHSVLL +PKKTAGGAG-ADKSEKPEKEKKTK--------------VEKASKG +>Physcomitrium|XP_024376581.1|H2A.M organism=Physcomitrium patens phylum=Streptophyta class=Bryopsida +--MSGRGKGAGAAARKKSVTKSAKAGLQFPVGRLGRYLKKGRYAQRVGSGAPVYLAAVLE +YLAAEVLELAGNASRDNKKSRIIPRHIQLAIRNDEELGKLLSGVTIAYGGVLPNIHSVLL +PKKTAGGTG-GEKPEKPEKEKKEK--------------KGKADKE +>Physcomitrium|XP_024377711.1|H2A.M organism=Physcomitrium patens phylum=Streptophyta class=Bryopsida +--MSGRGKGAGAAARKKSVTKSAKAGLQFPVGRLGRYLKKGRYAQRVGSGAPVYLAAVLE +YLAAEVLELAGNASRDNKKSRIIPRHIQLAIRNDEELGKLLSGVTIAYGGVLPNIHSVLL +PKKTAGGTG-TEKPAKPEKEKKVK--------------GEKASKE +>Selaginella|HISTDB_H2A_M_1|H2A.M organism=Selaginella moellendorffii phylum=Streptophyta class=Lycopodiopsida +--MVVQG-G-GRKGKKKSVSKSARAGLQFPVGRLARYLKNGRYAKRVGSGAPVYLAAVLE +YLAAEVLELAGNAARDNKKTRIIPRHIQLAVRNDDELGKLLQGVTIAHGGVIPHIHGVLL +PKKSSSGAGSAEKSPKPEKS------------------------- +>Marchantia|HISTDB_H2A_M_2|H2A.M organism=Marchantia polymorpha phylum=Streptophyta class=Marchantiopsida +--MSGRGH--SAKAKRKAISKSARAGLQFPVGRLARYLKNGRYAKRVGAGAPVYLAAVLE +YLAAEVLELAGNACRDNGKTRIIPRHIQLAIRNDEELGKLLASVTIAHGGVLPNIHQLLL +PKKTAAKLE-KEEKSKAEKSSTKS--------------DSKSTEK +>Marchantia|OAE20401.1|H2A.M organism=Marchantia polymorpha subsp. ruderalis phylum=Streptophyta class=Marchantiopsida +--MSARS-G-TTAVKKKPVSKSQKAGLQFPVGRMARFLKNGRYAKRIGAGAPVYLAAVLE +YLAAELLELAGNACRDNKKTRIIPRHIQLAVRNDEELSKLLAEVTISRGGVLPNINPSLL +PKKTASKAE-KEVAEADDKSTKDKAKEKETVTSSKRSPKAKSEKK +>Picea|HISTDB_H2A_M_0|H2A.M organism=Picea abies phylum=Streptophyta class=Pinopsida +MEPATQGSG-GRGGKKKPVSKSERAGLQFPVGRLARYLKKGRYAKRVGTGAPIYLAAVLE +YLAAEILELSGNAAKDNKKSRIIPRHILLAVKNDDELNKLLANVTIAYGGVVPNIHQVLL +PKKTAEKAKAKESSEI----------------------------- diff --git a/CURATED_SET/draft_seeds/H2A.P.fasta b/CURATED_SET/draft_seeds/H2A.P.fasta new file mode 100644 index 0000000..fd8434d --- /dev/null +++ b/CURATED_SET/draft_seeds/H2A.P.fasta @@ -0,0 +1,40 @@ +>Ailuropoda|XP_002913536.1|H2A.P organism=Ailuropoda melanoleuca phylum=Chordata class=Mammalia +--------MSGKKSHESSY----QTQAHLITTELQVPVSYVDRLLQENQYNHPLSSSTTD +FLLTMLDYLTDYILDVVGTEA-NNSNMPTAPQDVERAVDSSGEPYHRSKDTAFTLFDEMP +GSRRNG +>Canis|XP_005641297.1|H2A.P organism=Canis lupus familiaris phylum=Chordata class=Mammalia +MEPNPANIMSGNKNHESSN----QTQAHLVTTELQFPVSYVDRLLQEDQRTHCLSSTSTE +FLLAMLDSLTDYILERVGTEA-NNNNMQTAPQDVERAVGSNREPQQCLKDTAFTLFDEMP +RSRRNG +>Cricetulus|XP_003508203.1|H2A.P organism=Cricetulus griseus phylum=Chordata class=Mammalia +--------MSGKKNQGKSCSDNKKMEDPSSKPEVQIPVNYVYHLLQEEQYTPCLGSTTSD +FLLAMLDYITDYILEVVGSEA-NINSQQDIPQDRERQGDNDHDHSHAFKNAPFSLFDEMP +GPRRNG +>Heterocephalus|EHB10562.1|H2A.P organism=Heterocephalus glaber phylum=Chordata class=Mammalia +-------MMCEQKSQYGSYKDNNQQEDPASRPEQQLPVSDIYCILHEE-YNPYF-SSTSD +LLLAMLESLTDYILTLVGSEG-NNVGMPTNPQDGEREMDNNHEHPPIIPDVSFSFSDEMP +GSRKKG +>Macaca|EHH30639.1|H2A.P organism=Macaca mulatta phylum=Chordata class=Mammalia +--------MSEKNNRKNSSANNNQIQDR-SRNELRVPMSFVDRVVQDEQDAQSQSSSTIN +ILLTLLDCLADYIMEQVGLEAINNGRMRNTSQDGEREGDNHHEPHRTESDGTRFVFDEMP +KSGKND +>Pan|XP_003317470.2|H2A.P organism=Pan troglodytes phylum=Chordata class=Mammalia +--------MSEKKNCKNSSTNNNQTQDP-SRNELQVPMSFVDRVVQDEQDVQSQSSSTIN +TLLTLLDCLADYIMERVGLEASNNGSMRNTSQDREREVDNNREPHSAESDVTRFLFDEMP +KSRKND +>Rattus|NP_001128070.1|H2A.P organism=Rattus norvegicus phylum=Chordata class=Mammalia +--------MSGKKSQEKACSDNKQTEDPSSRPEVQVPVNYVYRLLQEEQYTPCLGSTTSD +FLLAMLDYLTDYILEVVGSEA-NINSQQNISQDRERQRENDREPPQAFKNAPFSLFDEMP +GPRRNG +>Sus|XP_003135058.1|H2A.P organism=Sus scrofa phylum=Chordata class=Mammalia +--------MSGKQSPEGSY----EAPTHLTTTEPQVPVSFVDHLLQEDQYVHTLSSSTTH +FLFSVLEYLTDYILDLVDTKA-NTGRMQMTPQDVERAVDSNAEPHRQVKDTAFALFDEMP +GSRRNG +>Homo|NP_036406.1|H2A.P_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +--------MSEKKNCKNSSTNNNQTQDP-SRNELQVPRSFVDRVVQDERDVQSQSSSTIN +TLLTLLDCLADYIMERVGLEASNNGSMRNTSQDREREVDNNREPHSAESDVTRFLFDEMP +KSRKND +>Mus|NP_080372.1|H2A.P_(Mus_musculus) organism=Mus musculus phylum=Chordata class=Mammalia +--------MSEKKSQEKPCSDNNQIEDPSSRPEVQVPVNYVYRILQEEQYTPCIGSTTSD +FLLAMLDYLTDYILEVVGSEA-NINNQQNISQDRERQRDNDREPSRGFKNAPFSLFDEMP +GPRRNG diff --git a/CURATED_SET/draft_seeds/H2A.P_(Homo_sapiens).fasta b/CURATED_SET/draft_seeds/H2A.P_(Homo_sapiens).fasta new file mode 100644 index 0000000..64e0cc9 --- /dev/null +++ b/CURATED_SET/draft_seeds/H2A.P_(Homo_sapiens).fasta @@ -0,0 +1,3 @@ +>Homo|NP_036406.1|H2A.P_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSEKKNCKNSSTNNNQTQDPSRNELQVPRSFVDRVVQDERDVQSQSSSTINTLLTLLDCL +ADYIMERVGLEASNNGSMRNTSQDREREVDNNREPHSAESDVTRFLFDEMPKSRKND diff --git a/CURATED_SET/draft_seeds/H2A.P_(Mus_musculus).fasta b/CURATED_SET/draft_seeds/H2A.P_(Mus_musculus).fasta new file mode 100644 index 0000000..c5c7a2d --- /dev/null +++ b/CURATED_SET/draft_seeds/H2A.P_(Mus_musculus).fasta @@ -0,0 +1,3 @@ +>Mus|NP_080372.1|H2A.P_(Mus_musculus) organism=Mus musculus phylum=Chordata class=Mammalia +MSEKKSQEKPCSDNNQIEDPSSRPEVQVPVNYVYRILQEEQYTPCIGSTTSDFLLAMLDY +LTDYILEVVGSEANINNQQNISQDRERQRDNDREPSRGFKNAPFSLFDEMPGPRRNG diff --git a/CURATED_SET/draft_seeds/H2A.P_only.fasta b/CURATED_SET/draft_seeds/H2A.P_only.fasta new file mode 100644 index 0000000..0140acf --- /dev/null +++ b/CURATED_SET/draft_seeds/H2A.P_only.fasta @@ -0,0 +1,32 @@ +>Ailuropoda|XP_002913536.1|H2A.P organism=Ailuropoda melanoleuca phylum=Chordata class=Mammalia +--------MSGKKSHESSY----QTQAHLITTELQVPVSYVDRLLQENQYNHPLSSSTTD +FLLTMLDYLTDYILDVVGTEA-NNSNMPTAPQDVERAVDSSGEPYHRSKDTAFTLFDEMP +GSRRNG +>Canis|XP_005641297.1|H2A.P organism=Canis lupus familiaris phylum=Chordata class=Mammalia +MEPNPANIMSGNKNHESSN----QTQAHLVTTELQFPVSYVDRLLQEDQRTHCLSSTSTE +FLLAMLDSLTDYILERVGTEA-NNNNMQTAPQDVERAVGSNREPQQCLKDTAFTLFDEMP +RSRRNG +>Cricetulus|XP_003508203.1|H2A.P organism=Cricetulus griseus phylum=Chordata class=Mammalia +--------MSGKKNQGKSCSDNKKMEDPSSKPEVQIPVNYVYHLLQEEQYTPCLGSTTSD +FLLAMLDYITDYILEVVGSEA-NINSQQDIPQDRERQGDNDHDHSHAFKNAPFSLFDEMP +GPRRNG +>Heterocephalus|EHB10562.1|H2A.P organism=Heterocephalus glaber phylum=Chordata class=Mammalia +-------MMCEQKSQYGSYKDNNQQEDPASRPEQQLPVSDIYCILHEE-YNPYF-SSTSD +LLLAMLESLTDYILTLVGSEG-NNVGMPTNPQDGEREMDNNHEHPPIIPDVSFSFSDEMP +GSRKKG +>Macaca|EHH30639.1|H2A.P organism=Macaca mulatta phylum=Chordata class=Mammalia +--------MSEKNNRKNSSANNNQIQDR-SRNELRVPMSFVDRVVQDEQDAQSQSSSTIN +ILLTLLDCLADYIMEQVGLEAINNGRMRNTSQDGEREGDNHHEPHRTESDGTRFVFDEMP +KSGKND +>Pan|XP_003317470.2|H2A.P organism=Pan troglodytes phylum=Chordata class=Mammalia +--------MSEKKNCKNSSTNNNQTQDP-SRNELQVPMSFVDRVVQDEQDVQSQSSSTIN +TLLTLLDCLADYIMERVGLEASNNGSMRNTSQDREREVDNNREPHSAESDVTRFLFDEMP +KSRKND +>Rattus|NP_001128070.1|H2A.P organism=Rattus norvegicus phylum=Chordata class=Mammalia +--------MSGKKSQEKACSDNKQTEDPSSRPEVQVPVNYVYRLLQEEQYTPCLGSTTSD +FLLAMLDYLTDYILEVVGSEA-NINSQQNISQDRERQRENDREPPQAFKNAPFSLFDEMP +GPRRNG +>Sus|XP_003135058.1|H2A.P organism=Sus scrofa phylum=Chordata class=Mammalia +--------MSGKQSPEGSY----EAPTHLTTTEPQVPVSFVDHLLQEDQYVHTLSSSTTH +FLFSVLEYLTDYILDLVDTKA-NTGRMQMTPQDVERAVDSNAEPHRQVKDTAFALFDEMP +GSRRNG diff --git a/CURATED_SET/draft_seeds/H2A.Q.fasta b/CURATED_SET/draft_seeds/H2A.Q.fasta new file mode 100644 index 0000000..a6534f3 --- /dev/null +++ b/CURATED_SET/draft_seeds/H2A.Q.fasta @@ -0,0 +1,84 @@ +>Ammotragus|HISTDB_H2A_Q_2|H2A.Q organism=Ammotragus lervia phylum=Chordata class=Mammalia +MSGNRSYHSSDGFRKHILPCTMKTKLQFSKSHVDHHLQENQDAQNQNLTNQIYLSAILKY +LSSNALKLVSNETQSDCRIH-------KAMDNHPQLHHAFDKDAKSQVHEM- +>Bison|HISTDB_H2A_Q_3|H2A.Q organism=Bison bison bison phylum=Chordata class=Mammalia +MSGNRSYHSSDGLRKRTVPCTMKTKLQFSKSHVDHHLQGNQDAHNQNLTTQIYLSAILKN +VSSNVLKLVSDETQSNCKIH-------RAKHNHPQLQHVFDKDAKSQVHEMF +>Bos|HISTDB_H2A_Q_14|H2A.Q organism=Bos mutus phylum=Chordata class=Mammalia +MSGNRSYHSSDGLRKRTVPCTMKTKLQFSKSHVDHHLQGNQDAHNQNLTTQIYLSAILKN +VSSNVLKLVSDETQSNCKIH-------RAKHNHPQLQHVFDKDAKSQVHEMF +>Bos|HISTDB_H2A_Q_16|H2A.Q organism=Bos taurus phylum=Chordata class=Mammalia +MSGNRSYHSSDGLKKRTVPCTMKTKLQFSKSHVDHHLQGNQDAHNQNLTTQIYLSAILKN +VSSNVLKLVSDETQSNCKIH-------RAKHNHPQLQHVFDKDAKSQVHEMF +>Camelus|XP_010950849.1|H2A.Q organism=Camelus bactrianus phylum=Chordata class=Mammalia +MSGNRSCQSSCWLRKQTFSCSTKNKPLFPKSHADHLLQENHFAQQLNLPTQVFLSAILKY +VTSNVLEVVGNKTHSNCRIQ-------KAVDNDLQLSHLFEEDTNSQARETF +>Camelus|XP_010995375.1|H2A.Q organism=Camelus dromedarius phylum=Chordata class=Mammalia +MSGNRSCQSSCWLRKQTFSCSTKNKPLFPKSHADHLLQENHFAQQLNLPTQVFLSAILKY +VTSNVLEVVGNKTHSNCRIQ-------KAVDNDLQLSHLFEEDTNSQARETF +>Canis|XP_013966888.1|H2A.Q organism=Canis lupus familiaris phylum=Chordata class=Mammalia +MSGKRSSQNSCRLGKQTLSSSTKTKLKFSVSHEDHLLQENHPAQHLRFSSQVCLSAILKY +VATNILELVGNEAHNDCRVQ-------RAVNNNMQSSHLFEDDTTSQVSEMF +>Ceratotherium|HISTDB_H2A_Q_13|H2A.Q organism=Ceratotherium simum phylum=Chordata class=Mammalia +MSEKRSCQNSSRLKKQTFSCSTKAKLHFPVSHMDRHQQENHSAQQLSLSTPVFLSAIRKY +VTNNILELVGNESHNNRRIR-------RAVDNAEQLSHLFEDDNQFSG---- +>Cercocebus|HISTDB_H2A_Q_11|H2A.Q organism=Cercocebus atys phylum=Chordata class=Mammalia +MFWKRRQRRSYRCRNQTFSYSIKAKQQFPLSCVHCLLWKNHCPRPE-------LVHYFQI +LGGNILELMGNKVHKNYRMHITPKYVERAVDNNSLISHLFEGDTN------- +>Cercopithecus|HISTDB_H2A_Q_0|H2A.Q organism=Cercopithecus aethiops phylum=Chordata class=Mammalia +MFWKRRQRRSYRCRNQTFSYSIKAKQQFPSSCVHCLLWKNHCPRPE-------LVHYFQI +LGGNILELMGNKVHKTYRMHITPKYVERAVDNNSLISHLFEGDTN------- +>Equus|HISTDB_H2A_Q_17|H2A.Q organism=Equus caballus phylum=Chordata class=Mammalia +MSGKRSCQN-YKLMKQTFSCSAKTKLQFPVSHVDCLQQENHSAQHLSLSTQVFLPAILKY +VTNNILEWVGNEAHNSCRIR-------KAVANNQQLSHLFEDDTDSQVNEMF +>Giraffa|HISTDB_H2A_Q_7|H2A.Q organism=Giraffa camelopardalis tippelskirchi phylum=Chordata class=Mammalia +MSGNRSYHSSDGLKKRTLPCTMKTKLQFSKSHVDHLLQENQAAQNQNLTTQIFLSAILRH +MSSNILKLMSNETQSNCRIH-------RAMDNPTQLHHVFDNDAKSQVHEMF +>Mandrillus|HISTDB_H2A_Q_5|H2A.Q organism=Mandrillus leucophaeus phylum=Chordata class=Mammalia +MFWKRRQRRSYRCRNQTFSYSIKAKQQFPLSCVHCLLWKNHCPRPE-------LVHYFQI +LGGNILELMGNKVHKNYRMHITPKYVERAVDNNSLISHLFEGDTN------- +>Mustela|HISTDB_H2A_Q_6|H2A.Q organism=Mustela putorius furo phylum=Chordata class=Mammalia +MSGKRSSQTSYRLGKQTSSCSTKTKLQVPVSHVEPILQEHQPVQNLSFSSQVCLSAILKY +VATNILELVGNEAQHNCRVQ-------RAMNNNMQNSHLFEDDTTSQL---- +>Nasalis|HISTDB_H2A_Q_21|H2A.Q organism=Nasalis larvatus phylum=Chordata class=Mammalia +MFWKRRQRRSYRCRNQTFSYSIKAKQQFPLSCVHCLLWKNHCPRPE-------LVHYFQI +LGGNILELMGNKVHKNYRMHITPKYVERAVDNNSLISHLFEGDTNSG----- +>Nomascus|HISTDB_H2A_Q_22|H2A.Q organism=Nomascus leucogenys phylum=Chordata class=Mammalia +MFWKRRQRSSYRCRNQTFSYSIRAKQQFPLSCVHCLLWKNHCPRPE-------LVHYFQI +LGGNILELMGNKVHKNYRMHITPKYVERVVDNNLLLSHLFEGDTNSG----- +>Odobenus|HISTDB_H2A_Q_12|H2A.Q organism=Odobenus rosmarus divergens phylum=Chordata class=Mammalia +MSGKRSGRSSYRLGKQTSSCSTKTKLQFSMSHEEPLLQENHPAQKLSFSRQVCLSAILKY +VATNILELVGNKAHHNCRVQ-------TAMDHDMQISHLFEDDTTSQVSEMF +>Okapia|HISTDB_H2A_Q_9|H2A.Q organism=Okapia johnstoni phylum=Chordata class=Mammalia +MSGNRSYHSSDGLKKHTLPCTMKTKLQFSKSHVDHLLRENQAAQNQNLTTQIFLSAILKY +TSSNILKLVSNETQSNCRIH-------RAMDNPTQLHHVFDNGAKSQVHEIF +>Ovis|HISTDB_H2A_Q_8|H2A.Q organism=Ovis aries musimon phylum=Chordata class=Mammalia +MSGNRSYHSSDGFRKHILPCTMKTKLQFSKSHVDHHLQENQDAQNQNLTNQIYLSAILKY +LSSNVLKLVSNETQSDCRIH-------KAMDNHPQLHHAFDKDAKSQVHEM- +>Pan|HISTDB_H2A_Q_4|H2A.Q organism=Pan paniscus phylum=Chordata class=Mammalia +MFWKRRQRSSYRCRNQTFSYSIRAKQQFLLSCVHCLLWKNHCPRPE-------LVHYFQI +LGGNILELMGNKVHKNYRMHITPKYVERVVDNNPLLSHLFEGDTNSG----- +>Pan|HISTDB_H2A_Q_15|H2A.Q organism=Pan troglodytes phylum=Chordata class=Mammalia +MFWKRRQRSSYRCRNQTFSYSIRAKQQFLLSCVHCLLWKNHCPRPE-------LVHYFQI +LGGNILELMGNKVHKNYRMHITPKYVERVVDNNPLLSHLFEGDRNSG----- +>Pantholops|HISTDB_H2A_Q_1|H2A.Q organism=Pantholops hodgsonii phylum=Chordata class=Mammalia +MSGNRSYHSSDGFRKHILPCTMKTKLQFSKSHVDHHLQENQDAQNQNLTNQIYLSAILKY +LSSNVLKLVSNETQSNCRIH-------KAMDNHPQLHHVFDKDAKSQVHEM- +>Papio|HISTDB_H2A_Q_18|H2A.Q organism=Papio anubis phylum=Chordata class=Mammalia +MFWKRRQRRSYRCRNQTFSYSIKAKQQFPLSCVHCLLWKNHCPRQE-------LVHYFQI +LGGNILELMGNKVHKNYRMHITPKYVERAVDNNSLISHLFEGDTN------- +>Pongo|HISTDB_H2A_Q_19|H2A.Q organism=Pongo pygmaeus abelii phylum=Chordata class=Mammalia +MFWKRRQRSSYRCRNQTFSYSIRAKQQFPLCCVHCLLWKDHCPRPE-------LVHYFQI +LGGNILELMGNKVHKNYRMHITPKYVERVVDNNPLLSHLL------------ +>Pteropus|ELK02218.1|H2A.Q organism=Pteropus alecto phylum=Chordata class=Mammalia +MSGKRSCLN-YKLKKQTLSCTTKTKLHLPVSHEDSLLQGNNSARNLRFSTQVFLSVILKY +VTTNILELVDNEAHNNCCVQ-------RAVDKNPQLGQHFKNGNNSQVDEMF +>Sus|HISTDB_H2A_Q_20|H2A.Q organism=Sus scrofa phylum=Chordata class=Mammalia +MSANRSCQNSCGLRKHTLSRSTKTKLQFPESQGGPLLQENHSAQHLNLSTRVFLSAILKY +VTANVLELVGNETPSNCRIQ-------RTVGNNPQLSQLFENETDPQVREMF +>Ursus|HISTDB_H2A_Q_10|H2A.Q organism=Ursus maritimus phylum=Chordata class=Mammalia +MSGKRSGPSSYRLGKQTSSCFPKSKLQFPVSHEEPLLQEDHPAQQLSLSSQVCLSAILKY +VATNILELVGNEAQHNCRVQ-------RAVNNNMQSGHLFKDDAASQVSEMF +>Vicugna|XP_015107649.1|H2A.Q organism=Vicugna pacos phylum=Chordata class=Mammalia +MSGNRSCQSSCWLRKQTFSCSTKNKPLFPKSHADHLLQENHFAQQLNLSTQVFLSAILKY +VTSNVLEVVGNKSHSNCRIQ-------KAADNDLQLSHLFEEDTNSQARETF diff --git a/CURATED_SET/draft_seeds/H2A.R.fasta b/CURATED_SET/draft_seeds/H2A.R.fasta new file mode 100644 index 0000000..1f2074b --- /dev/null +++ b/CURATED_SET/draft_seeds/H2A.R.fasta @@ -0,0 +1,20 @@ +>Monodelphis|XP_001380078.1|H2A.R organism=Monodelphis domestica phylum=Chordata class=Mammalia +-----MSEKRSHPGPSHPRSRTRSRSSRAQLQFPVSRVDRFLRQGHYAQRLASGAPVFLA +AVLEYLTAEILELAGNAARDNQKSRIAPRHVQLAVRNDAELNQLFGDVTISQGGVLPRIH +SELLQSVNKAQSSRNLGGNSFIQTVKTK +>Monodelphis|XP_003341803.1|H2A.R organism=Monodelphis domestica phylum=Chordata class=Mammalia +-----MPEKRSHHGSLNTHNQIRSRSSRAQLQFPVSRVDRFLRQGHYAQRLASSAPVFLA +AVLEYLTAEILELAGNAARDNKKTRIAPCHVQLAVRNDVELNQLFGHVTISQGGVLPRIH +PELVQPATGGRSSQSHVGRNHNYSVKAK +>Ornithorhynchus|XP_001519563.1|H2A.R organism=Ornithorhynchus anatinus phylum=Chordata class=Mammalia +MAARVPSAEGSPSGPRRSGPR-RSRSSRAQLRFSVSLVDRFLRRGRYSRRVAEGTPVFLA +AVLEYLTAELLELAGHTAGAHRRQRIAPVHLRQAVRDDPELDRLFGDIVSSPGAGLPRLH +SALLKPWT-VRTWRGRTVSFGEDPGHPR +>Sarcophilus|XP_003762607.1|H2A.R organism=Sarcophilus harrisii phylum=Chordata class=Mammalia +-----MSEKRSHPGPSHPRARTRSRSSRAQLQFPVSRVDRFLRQGHYAQRLASGAPVFLA +AVLEYLTAEILELAGNAARDNQKSRIAPRHVQLAVRNDAELNQLFGDVTISQGGVLPRIH +SELLQSVNKAQGSRNRGGNSCIQSVKTK +>Sarcophilus|XP_012399201.1|H2A.R organism=Sarcophilus harrisii phylum=Chordata class=Mammalia +-----MPEKRCHQGPLPPRARTRSRSSRAQLQFPVSRVDRFLRQGHYAQRLASGAPVFLA +AVLEYLTAEILELAGNAARDNQKTRIAPCHVQLAVRNDAELNQLFGHVTISQGAVLPRIH +SELLQPTSKARSSQCHVGQSHTYSMKAK diff --git a/CURATED_SET/draft_seeds/H2A.W.fasta b/CURATED_SET/draft_seeds/H2A.W.fasta new file mode 100644 index 0000000..fc455f4 --- /dev/null +++ b/CURATED_SET/draft_seeds/H2A.W.fasta @@ -0,0 +1,36 @@ +>Arabidopsis|NP_198119.1|H2A.W organism=Arabidopsis thaliana phylum=Streptophyta class=Magnoliopsida +MESSQ--ATTK-PTRGAGGRK--GGDRKKSVSKSVKAGLQFPVGRIARYLKKGRYALRYG +SGAPVYLAAVLEYLAAEVLELAGNAARDNKKNRINPRHLCLAIRNDEELGRLLHGVTIAS +GGVLPNINPVLLPKKSTASSS--------QAEKASATKSPKKA------- +>Arabidopsis|NP_200795.1|H2A.W organism=Arabidopsis thaliana phylum=Streptophyta class=Magnoliopsida +MESTG-----K-VKKAFGGRKPPGAPKTKSVSKSMKAGLQFPVGRITRFLKKGRYAQRLG +GGAPVYMAAVLEYLAAEVLELAGNAARDNKKSRIIPRHLLLAIRNDEELGKLLSGVTIAH +GGVLPNINSVLLPKKSATKPA-----EEKATKS--PVKSPKKA------- +>Zea|NP_001105357.1|H2A.W organism=Zea mays phylum=Streptophyta class=Magnoliopsida +MDSTGTGAGGK-GKKGAAGRKV-GGPRKKSVSRSVKAGLQFPVGRIGRYLKKGRYAQXVG +TGAPVYLAAVLEYLAAEVLELAGNAARDNKKTRIIPRHVLLAIRNDEELGKLLGGVTIAH +GGVLPNINPVLLPKKTAEKASSGGSKEAKSPKK--AAKSPKKA------- +>Zea|NP_001141182.1|H2A.W organism=Zea mays phylum=Streptophyta class=Magnoliopsida +MDVSG--AGGK-AKKGAAGRKA-GGPTKKSVSRSSRAGLQFPVSRVGRYLKKGRYAQRVG +TGAPVYLAAVLEYLAAEVLELAGNAARDNKKTRIIPRHVLLAIRNDEELGKLLAGVTIAH +GGVLPNIHTVLLPKKVAEKAA-------KEPKK--AAKSPKKA------- +>Zea|NP_001183143.1|H2A.W organism=Zea mays phylum=Streptophyta class=Magnoliopsida +MDASG--AGSK-AKKGAAGRKA-GGPRKKSVSRSVKAGLQFPVGRIGRYLKKGRYAQRVG +TGAPVYLAAVLEYLAAEVLELAGNAAKDNKKTRIVPRHVLLAIRNDVELGKLLAGVTIAH +GGVLPNINPVLLPKKVAEKASSGGSKESKSPKK--AAKSPKKAAKSPKKA +>Zea|NP_001183439.1|H2A.W organism=Zea mays phylum=Streptophyta class=Magnoliopsida +MDASG--AGSK-GKKGAAGRKA-GGPRKKSVTRSVKAGLQFPVGRIGRYLKKGRYAQRVG +TGAPVYLAAVLEYLAAEVLELAGNAAKDNKKTRIIPRHVLLAIRNDEELGKLLAGVTIAH +GGVLPNIHSVLLPKKAAEKAASGGSKEPKSPKK--GAKSPKKA------- +>Zea|NP_001183510.1|H2A.W organism=Zea mays phylum=Streptophyta class=Magnoliopsida +MDASAAGAGGK-AKKGAAGRKA-GGPRKKSVTRSVKAGLQFPVGRIGRYLKKGRYAQRVG +TGAPVYLAAVLEYLAAEVLELAGNAAKDNKKTRIIPRHVLLAIRNDEELGKLLSGVTIAH +GGVLPNINPVLLPKKTAEKAA---AKEAKSPKK--AAKSPKKA------- +>Zea|NP_001278724.1|H2A.W organism=Zea mays phylum=Streptophyta class=Magnoliopsida +MD-----AGAKVVKKAAAGRRGGGGPKKKPVSRSVKAGLQFPVGRIGRYLKQGRYSQRVG +TGAPVYLAAVLEYLAAELLELAGNAARDNKKNRIIPRHVLLAIRNDEELGKLLAGVTIAH +GGVLPNINPVLLPKKTAVAAAKEG-KEKKSPKKAAAAKSPKKVAAS---- +>Zea|NP_001281214.1|H2A.W organism=Zea mays phylum=Streptophyta class=Magnoliopsida +MDATGTGAGGK-AKKGAAGRKA-GGPRKKSVTRSVKAGLQFPVGRIGRYLKKGRYAQRVG +SGAPVYLAAVLEYLAAEVLELAGNAAKDNKKTRIVPRHVLLAIRNDEELGKLLTGVTIAH +GGVLPNINPVLLPKKTAEKASSGGSKEAKSPKK--AAKSPKKA------- diff --git a/CURATED_SET/draft_seeds/H2A.X.fasta b/CURATED_SET/draft_seeds/H2A.X.fasta new file mode 100644 index 0000000..3bc278e --- /dev/null +++ b/CURATED_SET/draft_seeds/H2A.X.fasta @@ -0,0 +1,92 @@ +>Apis|XP_624700.1|H2A.X_(Animals) organism=Apis mellifera phylum=Arthropoda class=Insecta +-MSGRGK----GGKAK-------------------GKAKTRSSRAGLQFPVGRIHRLLRK +GNYAERVGAGAPVYLAAVMEYLAAEVLELAGNAARDNKKTRIIPRHLQLAIRNDEELNKL +LSGVTIAQGGVLPNIQAVLLPKKTGT--G-------GSGKGDKASQEY +>Trichoplax|XP_002116274.1|H2A.X_(Animals) organism=Trichoplax adhaerens phylum=Placozoa class=Uniplacotomia +-MSGRGK----GGKAR-------------------NKAKSRSTRAGLQFPVGRVHRMLRK +GNYAERVGAGAPVYLAAVMEYLAAEILELAGNAARDNKKQRIVPRHLQLAIRNDEELNKL +LSGVTIAQGGVLPNIQAVLLPKKSKVPIA-------GSKKGSSQSQEY +>Danio|NP_957367.1|H2A.X_(Vertebrata) organism=Danio rerio phylum=Chordata class=Actinopteri +-MSGRGKT---GGKAR-------------------AKAKTRSSRAGLQFPVGRVHRLLRK +GNYAERVGAGAPVYLAAVLEYLTAEILELAGNAARDNKKTRIIPRHLQLAVRNDEELNKL +LGGVTIAQGGVLPNIQAVLLPKKTGQAAASSGK---SGKKGSSQSQEY +>Bos|NP_001073248.1|H2A.X_(Mammalia) organism=Bos taurus phylum=Chordata class=Mammalia +-MSGRGKT---GGKAR-------------------AKAKSRSSRAGLQFPVGRVHRLLRK +GHYAERVGAGAPVYLAAVLEYLTAEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKL +LGGVTIAQGGVLPNIQAVLLPKKTSATVGPKAP--AGGKKATQASQEY +>Monodelphis|XP_001370540.1|H2A.X_(Mammalia) organism=Monodelphis domestica phylum=Chordata class=Mammalia +-MSGRGKT---GGKAR-------------------AKAKSRSSRAGLQFPVGRVHRLLRK +GHYAERVGAGAPVYLAAVLEYLTAEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKL +LGGVTIAQGGVLPNIQAVLLPKKSGAITGPKAPGSGGSKKSTQASQEY +>Rattus|NP_001102761.1|H2A.X_(Mammalia) organism=Rattus norvegicus phylum=Chordata class=Mammalia +-MSGRGKT---GGKAR-------------------AKAKSRSSRAGLQFPVGRVHRLLRK +GHYAERVGAGAPVYLAAVLEYLTAEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKL +LGGVTIAQGGVLPNIQAVLLPKKTSATVGPKAP--AGGKKASQASQEY +>Homo|NP_002096.1|H2A.X_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +-MSGRGKT---GGKAR-------------------AKAKSRSSRAGLQFPVGRVHRLLRK +GHYAERVGAGAPVYLAAVLEYLTAEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKL +LGGVTIAQGGVLPNIQAVLLPKKTSATVGPKAP--SGGKKATQASQEY +>Mus|NP_034566.1|H2A.X_(Mus_musculus) organism=Mus musculus phylum=Chordata class=Mammalia +-MSGRGKT---GGKAR-------------------AKAKSRSSRAGLQFPVGRVHRLLRK +GHYAERVGAGAPVYLAAVLEYLTAEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKL +LGGVTIAQGGVLPNIQAVLLPKKSSATVGPKAP--AVGKKASQASQEY +>Saccharomyces|NP_009552.1|H2A.X_(Fungi) organism=Saccharomyces cerevisiae S288C phylum=Ascomycota class=Saccharomycetes +--MSGGK----GGKAGS---------------A-AKASQSRSAKAGLTFPVGRVHRLLRR +GNYAQRIGSGAPVYLTAVLEYLAAEILELAGNAARDNKKTRIIPRHLQLAIRNDDELNKL +LGNVTIAQGGVLPNIHQNLLPKKS------------AKTAKA--SQEL +>Saccharomyces|NP_010511.3|H2A.X_(Fungi) organism=Saccharomyces cerevisiae S288C phylum=Ascomycota class=Saccharomycetes +--MSGGK----GGKAGS---------------A-AKASQSRSAKAGLTFPVGRVHRLLRR +GNYAQRIGSGAPVYLTAVLEYLAAEILELAGNAARDNKKTRIIPRHLQLAIRNDDELNKL +LGNVTIAQGGVLPNIHQNLLPKKS------------AKATKA--SQEL +>Schizosaccharomyces|NP_594421.1|H2A.X_(Fungi) organism=Schizosaccharomyces pombe phylum=Ascomycota class=Schizosaccharomycetes +--MSGGKS---GGKAA----------------V-AKSAQSRSAKAGLAFPVGRVHRLLRK +GNYAQRVGAGAPVYLAAVLEYLAAEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKL +LGHVTIAQGGVVPNINAHLLPKQS------------GKGKP---SQEL +>Ustilago|XP_011387544.1|H2A.X_(Fungi) organism=Ustilago maydis 521 phylum=Basidiomycota class=Ustilaginomycetes +-MSSGGKS---GGKAGD---------------A-SSKAQSRSAKAGLQFPVGRIHRLLRK +GNYAQRVGAGAPVYLAAVLEYLAAEILELAGNAARDNKKSRIIPRHLQLAIRNDEELNKL +LGGVTISQGGVLPFIQSELLPAKS------------GKPKKAGGSQDI +>Arabidopsis|NP_172363.1|H2A.X_(Plants) organism=Arabidopsis thaliana phylum=Streptophyta class=Magnoliopsida +MSTGAGSGTTKGGRGKP---------------K-ATKSVSRSSKAGLQFPVGRIARFLKS +GKYAERVGAGAPVYLSAVLEYLAAEVLELAGNAARDNKKTRIVPRHIQLAVRNDEELSKL +LGSVTIANGGVLPNIHQTLLPSKVG-K---------NKGDIGSASQEF +>Arabidopsis|NP_175868.1|H2A.X_(Plants) organism=Arabidopsis thaliana phylum=Streptophyta class=Magnoliopsida +MSSGAGSGTTKGGRGKP---------------K-ATKSVSRSSKAGLQFPVGRIARFLKA +GKYAERVGAGAPVYLSAVLEYLAAEVLELAGNAARDNKKTRIVPRHIQLAVRNDEELSKL +LGSVTIANGGVLPNIHQTLLPSKVG-K---------NKGDIGSASQEF +>Oryza|NP_001066920.1|H2A.X_(Plants) organism=Oryza sativa Japonica Group phylum=Streptophyta class=Magnoliopsida +-MSSAGGG---GGRGKS---------------K-GSKSVSRSSKAGLQFPVGRIARYLKA +GKYAERVGAGAPVYLSAVLEYLAAEVLELAGNAARDNKKNRIVPRHIQLAVRNDEELSRL +LGAVTIAAGGVLPNIHQTLLPKKGG-K---------DKADIGSASQEF +>Zea|XP_008644553.1|H2A.X_(Plants) organism=Zea mays phylum=Streptophyta class=Magnoliopsida +-MSSTG-G---GGRGKA---------------KPATKSVSRSSKAGLQFPVGRIARYLKA +GKYAERVGAGAPVYLSAVLEYLAAEVLELAGNAARDNKKNRIVPRHIQLAVRNDEELSKL +LGTVTIAAGGVMPNIHQTLLPKKAG-----------QKGDIGSASQEF +>Zea|XP_008662624.1|H2A.X_(Plants) organism=Zea mays phylum=Streptophyta class=Magnoliopsida +-MSS---G---GGRGKP---------------K-GSKALSRSTKAGLQFPVGRIARYLKA +GKYAERVGGGAPVYLSAVLEYLAAEVLELAGNAARDNKKNRIVPRHIQLAVRNDEELSKL +LGAVTIAAGGVLPNIHQTLLPKKAGGK---------GKADIGSASQEF +>Cryptosporidium|XP_627120.1|H2A.X_(Protists) organism=Cryptosporidium parvum Iowa II phylum=Apicomplexa class=Conoidasida +-MSGKVT----SSGGRGGGK------------KTTRKTMSNSAKAGLQFPVGRVARYLKK +GRYAKRIGAAAPVYLAAVLEYLCAELLELAGNAARDAKKTRITPRQIQLAVRNDEELSKF +LGNVTIASGGVLPNIPTVLLPKKSK-----------SK--QG-NSQEF +>Toxoplasma|XP_002365290.1|H2A.X_(Protists) organism=Toxoplasma gondii ME49 phylum=Apicomplexa class=Conoidasida +-MSAKGA----GGRKK----------------TSSGKKVSRSAKAGLQFPVSRIGRYLKK +GRYAKRVGVGAPVYLAAVLEYLCAEILELAGNAARDHKKTRIIPRHIQLAVRNDEELSKF +LGGVTIANGGVMPHVHAVLLPKHSK-----------SKGKHG-VSQEF +>Tetrahymena|AAC37291.1|H2A.X_(Protists) organism=Tetrahymena thermophila phylum=Ciliophora class=Oligohymenophorea +-MSTTGK----GGKAK--GK------------TASSKQVSRSARAGLQFPVGRISRFLKH +GRYSERVGTGAPVYLAAVLEYLAAEVLELAGNAAKDNKKTRIVPRHILLAIRNDEELNKL +MANTTIADGGVLPNINPMLLPSKSK-----------KTESRGQASQDL +>Dictyostelium|XP_641587.1|H2A.X_(Protists) organism=Dictyostelium discoideum AX4 phylum=Evosea class=Eumycetozoa +MSETKPA----SSKPAAAAKPKKVIPRVSRTGEPKSKPESRSARAGITFPVSRVDRLLRE +GRFAPRVESTAPVYLAAVLEYLVFEILELAHNTCSISKKTRITPQHINWAVGNDLELNSL +FQHVTIAYGGVLPTPQQSTGEKKKKPS---------KKAAEG-SSQIY +>Giardia|XP_001704715.1|H2A.X_(Protists) organism=Giardia intestinalis phylum=Fornicata class= +-MSTKPV-------------------------KDNSKMKSRSARAGISFPIGRIHRHLRE +GRYAERISSDAPVYLAAVLENVVAEVFREACNHRDKKSQKRIVPNHILTALRKDKELATI +FANVTIREGGVARSAK-----EGRE-----------GKGSH--RSQDL +>Perkinsus|XP_002784006.1|H2A.X_(Protists) organism=Perkinsus marinus ATCC 50983 phylum=Perkinsozoa class= +-MSGKGK----GGRGKA-GK------------KSGSGAKSRSAKAGLQFPVGRIARYLKK +GRYAKRVGSGAPVYLAAVLEYLVAEILELAGNAARDHKKTRIIPRHIQLAVRNDEELNKF +LAGVTLASGGVLPNIHTTLLPKKSK-----------GKSFT--ASQEI diff --git a/CURATED_SET/draft_seeds/H2A.X_(Animals).fasta b/CURATED_SET/draft_seeds/H2A.X_(Animals).fasta new file mode 100644 index 0000000..08491fd --- /dev/null +++ b/CURATED_SET/draft_seeds/H2A.X_(Animals).fasta @@ -0,0 +1,32 @@ +>Apis|XP_624700.1|H2A.X_(Animals) organism=Apis mellifera phylum=Arthropoda class=Insecta +MSGRGK-GGKAKGKAKTRSSRAGLQFPVGRIHRLLRKGNYAERVGAGAPVYLAAVMEYLA +AEVLELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLSGVTIAQGGVLPNIQAVLLPKK +TGT--G-------GSGKGDKASQEY +>Trichoplax|XP_002116274.1|H2A.X_(Animals) organism=Trichoplax adhaerens phylum=Placozoa class=Uniplacotomia +MSGRGK-GGKARNKAKSRSTRAGLQFPVGRVHRMLRKGNYAERVGAGAPVYLAAVMEYLA +AEILELAGNAARDNKKQRIVPRHLQLAIRNDEELNKLLSGVTIAQGGVLPNIQAVLLPKK +SKVPIA-------GSKKGSSQSQEY +>Danio|NP_957367.1|H2A.X_(Vertebrata) organism=Danio rerio phylum=Chordata class=Actinopteri +MSGRGKTGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAVRNDEELNKLLGGVTIAQGGVLPNIQAVLLPKK +TGQAAASSGK---SGKKGSSQSQEY +>Bos|NP_001073248.1|H2A.X_(Mammalia) organism=Bos taurus phylum=Chordata class=Mammalia +MSGRGKTGGKARAKAKSRSSRAGLQFPVGRVHRLLRKGHYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGGVTIAQGGVLPNIQAVLLPKK +TSATVGPKAP--AGGKKATQASQEY +>Monodelphis|XP_001370540.1|H2A.X_(Mammalia) organism=Monodelphis domestica phylum=Chordata class=Mammalia +MSGRGKTGGKARAKAKSRSSRAGLQFPVGRVHRLLRKGHYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGGVTIAQGGVLPNIQAVLLPKK +SGAITGPKAPGSGGSKKSTQASQEY +>Rattus|NP_001102761.1|H2A.X_(Mammalia) organism=Rattus norvegicus phylum=Chordata class=Mammalia +MSGRGKTGGKARAKAKSRSSRAGLQFPVGRVHRLLRKGHYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGGVTIAQGGVLPNIQAVLLPKK +TSATVGPKAP--AGGKKASQASQEY +>Homo|NP_002096.1|H2A.X_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKTGGKARAKAKSRSSRAGLQFPVGRVHRLLRKGHYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGGVTIAQGGVLPNIQAVLLPKK +TSATVGPKAP--SGGKKATQASQEY +>Mus|NP_034566.1|H2A.X_(Mus_musculus) organism=Mus musculus phylum=Chordata class=Mammalia +MSGRGKTGGKARAKAKSRSSRAGLQFPVGRVHRLLRKGHYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGGVTIAQGGVLPNIQAVLLPKK +SSATVGPKAP--AVGKKASQASQEY diff --git a/CURATED_SET/draft_seeds/H2A.X_(Animals)_only.fasta b/CURATED_SET/draft_seeds/H2A.X_(Animals)_only.fasta new file mode 100644 index 0000000..ebdcc53 --- /dev/null +++ b/CURATED_SET/draft_seeds/H2A.X_(Animals)_only.fasta @@ -0,0 +1,8 @@ +>Apis|XP_624700.1|H2A.X_(Animals) organism=Apis mellifera phylum=Arthropoda class=Insecta +MSGRGKGGKAKGKAKTRSSRAGLQFPVGRIHRLLRKGNYAERVGAGAPVYLAAVMEYLAA +EVLELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLSGVTIAQGGVLPNIQAVLLPKKT +GT--GGSGKGDKASQEY +>Trichoplax|XP_002116274.1|H2A.X_(Animals) organism=Trichoplax adhaerens phylum=Placozoa class=Uniplacotomia +MSGRGKGGKARNKAKSRSTRAGLQFPVGRVHRMLRKGNYAERVGAGAPVYLAAVMEYLAA +EILELAGNAARDNKKQRIVPRHLQLAIRNDEELNKLLSGVTIAQGGVLPNIQAVLLPKKS +KVPIAGSKKGSSQSQEY diff --git a/CURATED_SET/draft_seeds/H2A.X_(Fungi).fasta b/CURATED_SET/draft_seeds/H2A.X_(Fungi).fasta new file mode 100644 index 0000000..c6c5494 --- /dev/null +++ b/CURATED_SET/draft_seeds/H2A.X_(Fungi).fasta @@ -0,0 +1,16 @@ +>Saccharomyces|NP_009552.1|H2A.X_(Fungi) organism=Saccharomyces cerevisiae S288C phylum=Ascomycota class=Saccharomycetes +-MSGGK-GGKAGSAAKASQSRSAKAGLTFPVGRVHRLLRRGNYAQRIGSGAPVYLTAVLE +YLAAEILELAGNAARDNKKTRIIPRHLQLAIRNDDELNKLLGNVTIAQGGVLPNIHQNLL +PKKSAKTAKA--SQEL +>Saccharomyces|NP_010511.3|H2A.X_(Fungi) organism=Saccharomyces cerevisiae S288C phylum=Ascomycota class=Saccharomycetes +-MSGGK-GGKAGSAAKASQSRSAKAGLTFPVGRVHRLLRRGNYAQRIGSGAPVYLTAVLE +YLAAEILELAGNAARDNKKTRIIPRHLQLAIRNDDELNKLLGNVTIAQGGVLPNIHQNLL +PKKSAKATKA--SQEL +>Schizosaccharomyces|NP_594421.1|H2A.X_(Fungi) organism=Schizosaccharomyces pombe phylum=Ascomycota class=Schizosaccharomycetes +-MSGGKSGGKAA-VAKSAQSRSAKAGLAFPVGRVHRLLRKGNYAQRVGAGAPVYLAAVLE +YLAAEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGHVTIAQGGVVPNINAHLL +PKQSGKGKP---SQEL +>Ustilago|XP_011387544.1|H2A.X_(Fungi) organism=Ustilago maydis 521 phylum=Basidiomycota class=Ustilaginomycetes +MSSGGKSGGKAGDASSKAQSRSAKAGLQFPVGRIHRLLRKGNYAQRVGAGAPVYLAAVLE +YLAAEILELAGNAARDNKKSRIIPRHLQLAIRNDEELNKLLGGVTISQGGVLPFIQSELL +PAKSGKPKKAGGSQDI diff --git a/CURATED_SET/draft_seeds/H2A.X_(Homo_sapiens).fasta b/CURATED_SET/draft_seeds/H2A.X_(Homo_sapiens).fasta new file mode 100644 index 0000000..ba77414 --- /dev/null +++ b/CURATED_SET/draft_seeds/H2A.X_(Homo_sapiens).fasta @@ -0,0 +1,4 @@ +>Homo|NP_002096.1|H2A.X_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKTGGKARAKAKSRSSRAGLQFPVGRVHRLLRKGHYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGGVTIAQGGVLPNIQAVLLPKK +TSATVGPKAPSGGKKATQASQEY diff --git a/CURATED_SET/draft_seeds/H2A.X_(Mammalia).fasta b/CURATED_SET/draft_seeds/H2A.X_(Mammalia).fasta new file mode 100644 index 0000000..31f4308 --- /dev/null +++ b/CURATED_SET/draft_seeds/H2A.X_(Mammalia).fasta @@ -0,0 +1,20 @@ +>Bos|NP_001073248.1|H2A.X_(Mammalia) organism=Bos taurus phylum=Chordata class=Mammalia +MSGRGKTGGKARAKAKSRSSRAGLQFPVGRVHRLLRKGHYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGGVTIAQGGVLPNIQAVLLPKK +TSATVGPKAP--AGGKKATQASQEY +>Monodelphis|XP_001370540.1|H2A.X_(Mammalia) organism=Monodelphis domestica phylum=Chordata class=Mammalia +MSGRGKTGGKARAKAKSRSSRAGLQFPVGRVHRLLRKGHYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGGVTIAQGGVLPNIQAVLLPKK +SGAITGPKAPGSGGSKKSTQASQEY +>Rattus|NP_001102761.1|H2A.X_(Mammalia) organism=Rattus norvegicus phylum=Chordata class=Mammalia +MSGRGKTGGKARAKAKSRSSRAGLQFPVGRVHRLLRKGHYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGGVTIAQGGVLPNIQAVLLPKK +TSATVGPKAP--AGGKKASQASQEY +>Homo|NP_002096.1|H2A.X_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKTGGKARAKAKSRSSRAGLQFPVGRVHRLLRKGHYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGGVTIAQGGVLPNIQAVLLPKK +TSATVGPKAP--SGGKKATQASQEY +>Mus|NP_034566.1|H2A.X_(Mus_musculus) organism=Mus musculus phylum=Chordata class=Mammalia +MSGRGKTGGKARAKAKSRSSRAGLQFPVGRVHRLLRKGHYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGGVTIAQGGVLPNIQAVLLPKK +SSATVGPKAP--AVGKKASQASQEY diff --git a/CURATED_SET/draft_seeds/H2A.X_(Mammalia)_only.fasta b/CURATED_SET/draft_seeds/H2A.X_(Mammalia)_only.fasta new file mode 100644 index 0000000..e488cbb --- /dev/null +++ b/CURATED_SET/draft_seeds/H2A.X_(Mammalia)_only.fasta @@ -0,0 +1,12 @@ +>Bos|NP_001073248.1|H2A.X_(Mammalia) organism=Bos taurus phylum=Chordata class=Mammalia +MSGRGKTGGKARAKAKSRSSRAGLQFPVGRVHRLLRKGHYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGGVTIAQGGVLPNIQAVLLPKK +TSATVGPKAP--AGGKKATQASQEY +>Monodelphis|XP_001370540.1|H2A.X_(Mammalia) organism=Monodelphis domestica phylum=Chordata class=Mammalia +MSGRGKTGGKARAKAKSRSSRAGLQFPVGRVHRLLRKGHYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGGVTIAQGGVLPNIQAVLLPKK +SGAITGPKAPGSGGSKKSTQASQEY +>Rattus|NP_001102761.1|H2A.X_(Mammalia) organism=Rattus norvegicus phylum=Chordata class=Mammalia +MSGRGKTGGKARAKAKSRSSRAGLQFPVGRVHRLLRKGHYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGGVTIAQGGVLPNIQAVLLPKK +TSATVGPKAP--AGGKKASQASQEY diff --git a/CURATED_SET/draft_seeds/H2A.X_(Mus_musculus).fasta b/CURATED_SET/draft_seeds/H2A.X_(Mus_musculus).fasta new file mode 100644 index 0000000..49e3d81 --- /dev/null +++ b/CURATED_SET/draft_seeds/H2A.X_(Mus_musculus).fasta @@ -0,0 +1,4 @@ +>Mus|NP_034566.1|H2A.X_(Mus_musculus) organism=Mus musculus phylum=Chordata class=Mammalia +MSGRGKTGGKARAKAKSRSSRAGLQFPVGRVHRLLRKGHYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGGVTIAQGGVLPNIQAVLLPKK +SSATVGPKAPAVGKKASQASQEY diff --git a/CURATED_SET/draft_seeds/H2A.X_(Plants).fasta b/CURATED_SET/draft_seeds/H2A.X_(Plants).fasta new file mode 100644 index 0000000..9614530 --- /dev/null +++ b/CURATED_SET/draft_seeds/H2A.X_(Plants).fasta @@ -0,0 +1,20 @@ +>Arabidopsis|NP_172363.1|H2A.X_(Plants) organism=Arabidopsis thaliana phylum=Streptophyta class=Magnoliopsida +MSTGAGSGTTKGGRGKPK-ATKSVSRSSKAGLQFPVGRIARFLKSGKYAERVGAGAPVYL +SAVLEYLAAEVLELAGNAARDNKKTRIVPRHIQLAVRNDEELSKLLGSVTIANGGVLPNI +HQTLLPSKVG-KNKGDIGSASQEF +>Arabidopsis|NP_175868.1|H2A.X_(Plants) organism=Arabidopsis thaliana phylum=Streptophyta class=Magnoliopsida +MSSGAGSGTTKGGRGKPK-ATKSVSRSSKAGLQFPVGRIARFLKAGKYAERVGAGAPVYL +SAVLEYLAAEVLELAGNAARDNKKTRIVPRHIQLAVRNDEELSKLLGSVTIANGGVLPNI +HQTLLPSKVG-KNKGDIGSASQEF +>Oryza|NP_001066920.1|H2A.X_(Plants) organism=Oryza sativa Japonica Group phylum=Streptophyta class=Magnoliopsida +-MSSAGGG---GGRGKSK-GSKSVSRSSKAGLQFPVGRIARYLKAGKYAERVGAGAPVYL +SAVLEYLAAEVLELAGNAARDNKKNRIVPRHIQLAVRNDEELSRLLGAVTIAAGGVLPNI +HQTLLPKKGG-KDKADIGSASQEF +>Zea|XP_008644553.1|H2A.X_(Plants) organism=Zea mays phylum=Streptophyta class=Magnoliopsida +-MSSTG-G---GGRGKAKPATKSVSRSSKAGLQFPVGRIARYLKAGKYAERVGAGAPVYL +SAVLEYLAAEVLELAGNAARDNKKNRIVPRHIQLAVRNDEELSKLLGTVTIAAGGVMPNI +HQTLLPKKAG--QKGDIGSASQEF +>Zea|XP_008662624.1|H2A.X_(Plants) organism=Zea mays phylum=Streptophyta class=Magnoliopsida +-MSS---G---GGRGKPK-GSKALSRSTKAGLQFPVGRIARYLKAGKYAERVGGGAPVYL +SAVLEYLAAEVLELAGNAARDNKKNRIVPRHIQLAVRNDEELSKLLGAVTIAAGGVLPNI +HQTLLPKKAGGKGKADIGSASQEF diff --git a/CURATED_SET/draft_seeds/H2A.X_(Protists).fasta b/CURATED_SET/draft_seeds/H2A.X_(Protists).fasta new file mode 100644 index 0000000..f6da881 --- /dev/null +++ b/CURATED_SET/draft_seeds/H2A.X_(Protists).fasta @@ -0,0 +1,24 @@ +>Cryptosporidium|XP_627120.1|H2A.X_(Protists) organism=Cryptosporidium parvum Iowa II phylum=Apicomplexa class=Conoidasida +-MSGKVTSSGGRGGGK------------KTTRKTMSNSAKAGLQFPVGRVARYLKKGRYA +KRIGAAAPVYLAAVLEYLCAELLELAGNAARDAKKTRITPRQIQLAVRNDEELSKFLGNV +TIASGGVLPNIPTVLLPKKSK--SK--QG-NSQEF +>Toxoplasma|XP_002365290.1|H2A.X_(Protists) organism=Toxoplasma gondii ME49 phylum=Apicomplexa class=Conoidasida +-MSAKGAGGRKK----------------TSSGKKVSRSAKAGLQFPVSRIGRYLKKGRYA +KRVGVGAPVYLAAVLEYLCAEILELAGNAARDHKKTRIIPRHIQLAVRNDEELSKFLGGV +TIANGGVMPHVHAVLLPKHSK--SKGKHG-VSQEF +>Tetrahymena|AAC37291.1|H2A.X_(Protists) organism=Tetrahymena thermophila phylum=Ciliophora class=Oligohymenophorea +-MSTTGKGGKAK--GK------------TASSKQVSRSARAGLQFPVGRISRFLKHGRYS +ERVGTGAPVYLAAVLEYLAAEVLELAGNAAKDNKKTRIVPRHILLAIRNDEELNKLMANT +TIADGGVLPNINPMLLPSKSK--KTESRGQASQDL +>Dictyostelium|XP_641587.1|H2A.X_(Protists) organism=Dictyostelium discoideum AX4 phylum=Evosea class=Eumycetozoa +MSETKPASSKPAAAAKPKKVIPRVSRTGEPKSKPESRSARAGITFPVSRVDRLLREGRFA +PRVESTAPVYLAAVLEYLVFEILELAHNTCSISKKTRITPQHINWAVGNDLELNSLFQHV +TIAYGGVLPTPQQSTGEKKKKPSKKAAEG-SSQIY +>Giardia|XP_001704715.1|H2A.X_(Protists) organism=Giardia intestinalis phylum=Fornicata class= +-MSTKPV---------------------KDNSKMKSRSARAGISFPIGRIHRHLREGRYA +ERISSDAPVYLAAVLENVVAEVFREACNHRDKKSQKRIVPNHILTALRKDKELATIFANV +TIREGGVARSAK-----EGRE--GKGSH--RSQDL +>Perkinsus|XP_002784006.1|H2A.X_(Protists) organism=Perkinsus marinus ATCC 50983 phylum=Perkinsozoa class= +-MSGKGKGGRGKA-GK------------KSGSGAKSRSAKAGLQFPVGRIARYLKKGRYA +KRVGSGAPVYLAAVLEYLVAEILELAGNAARDHKKTRIIPRHIQLAVRNDEELNKFLAGV +TLASGGVLPNIHTTLLPKKSK--GKSFT--ASQEI diff --git a/CURATED_SET/draft_seeds/H2A.X_(Vertebrata).fasta b/CURATED_SET/draft_seeds/H2A.X_(Vertebrata).fasta new file mode 100644 index 0000000..dfdcdb6 --- /dev/null +++ b/CURATED_SET/draft_seeds/H2A.X_(Vertebrata).fasta @@ -0,0 +1,24 @@ +>Danio|NP_957367.1|H2A.X_(Vertebrata) organism=Danio rerio phylum=Chordata class=Actinopteri +MSGRGKTGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAVRNDEELNKLLGGVTIAQGGVLPNIQAVLLPKK +TGQAAASSGK---SGKKGSSQSQEY +>Bos|NP_001073248.1|H2A.X_(Mammalia) organism=Bos taurus phylum=Chordata class=Mammalia +MSGRGKTGGKARAKAKSRSSRAGLQFPVGRVHRLLRKGHYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGGVTIAQGGVLPNIQAVLLPKK +TSATVGPKAP--AGGKKATQASQEY +>Monodelphis|XP_001370540.1|H2A.X_(Mammalia) organism=Monodelphis domestica phylum=Chordata class=Mammalia +MSGRGKTGGKARAKAKSRSSRAGLQFPVGRVHRLLRKGHYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGGVTIAQGGVLPNIQAVLLPKK +SGAITGPKAPGSGGSKKSTQASQEY +>Rattus|NP_001102761.1|H2A.X_(Mammalia) organism=Rattus norvegicus phylum=Chordata class=Mammalia +MSGRGKTGGKARAKAKSRSSRAGLQFPVGRVHRLLRKGHYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGGVTIAQGGVLPNIQAVLLPKK +TSATVGPKAP--AGGKKASQASQEY +>Homo|NP_002096.1|H2A.X_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKTGGKARAKAKSRSSRAGLQFPVGRVHRLLRKGHYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGGVTIAQGGVLPNIQAVLLPKK +TSATVGPKAP--SGGKKATQASQEY +>Mus|NP_034566.1|H2A.X_(Mus_musculus) organism=Mus musculus phylum=Chordata class=Mammalia +MSGRGKTGGKARAKAKSRSSRAGLQFPVGRVHRLLRKGHYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGGVTIAQGGVLPNIQAVLLPKK +SSATVGPKAP--AVGKKASQASQEY diff --git a/CURATED_SET/draft_seeds/H2A.X_(Vertebrata)_only.fasta b/CURATED_SET/draft_seeds/H2A.X_(Vertebrata)_only.fasta new file mode 100644 index 0000000..cfe6c53 --- /dev/null +++ b/CURATED_SET/draft_seeds/H2A.X_(Vertebrata)_only.fasta @@ -0,0 +1,4 @@ +>Danio|NP_957367.1|H2A.X_(Vertebrata) organism=Danio rerio phylum=Chordata class=Actinopteri +MSGRGKTGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAVRNDEELNKLLGGVTIAQGGVLPNIQAVLLPKK +TGQAAASSGKSGKKGSSQSQEY diff --git a/CURATED_SET/draft_seeds/H2A.X_only.fasta b/CURATED_SET/draft_seeds/H2A.X_only.fasta new file mode 100644 index 0000000..e69de29 diff --git a/CURATED_SET/draft_seeds/H2A.Z.1_(Chordata).fasta b/CURATED_SET/draft_seeds/H2A.Z.1_(Chordata).fasta new file mode 100644 index 0000000..9562c54 --- /dev/null +++ b/CURATED_SET/draft_seeds/H2A.Z.1_(Chordata).fasta @@ -0,0 +1,12 @@ +>Danio|NP_001036788.1|H2A.Z.1_(Chordata) organism=Danio rerio phylum=Chordata class=Actinopteri +MAGGKAGKDSGKAKTKAVSRSQRAGLQFPVGRIHRHLKTRTTSHGRVGATAAVYSAAILE +YLTAEVLELAGNASKDLKVKRITPRHLQLAIRGDEELDSLIKATIAGGGVIPHIHKSLIG +KKGQQKTV +>Gallus|NP_001026545.1|H2A.Z.1_(Chordata) organism=Gallus gallus phylum=Chordata class=Aves +MAGGKAGKDSGKTKTKAVSRSQRAGLQFPVGRIHRHLKSRTTSHGRVGATAAVYSAAILE +YLTAEVLELAGNASKDLKVKRITPRHLQLAIRGDEELDSLIKATIAGGGVIPHIHKSLIG +KKGQQKTV +>Homo|NP_002097.1|H2A.Z.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MAGGKAGKDSGKAKTKAVSRSQRAGLQFPVGRIHRHLKSRTTSHGRVGATAAVYSAAILE +YLTAEVLELAGNASKDLKVKRITPRHLQLAIRGDEELDSLIKATIAGGGVIPHIHKSLIG +KKGQQKTV diff --git a/CURATED_SET/draft_seeds/H2A.Z.1_(Chordata)_only.fasta b/CURATED_SET/draft_seeds/H2A.Z.1_(Chordata)_only.fasta new file mode 100644 index 0000000..2fe8e51 --- /dev/null +++ b/CURATED_SET/draft_seeds/H2A.Z.1_(Chordata)_only.fasta @@ -0,0 +1,8 @@ +>Danio|NP_001036788.1|H2A.Z.1_(Chordata) organism=Danio rerio phylum=Chordata class=Actinopteri +MAGGKAGKDSGKAKTKAVSRSQRAGLQFPVGRIHRHLKTRTTSHGRVGATAAVYSAAILE +YLTAEVLELAGNASKDLKVKRITPRHLQLAIRGDEELDSLIKATIAGGGVIPHIHKSLIG +KKGQQKTV +>Gallus|NP_001026545.1|H2A.Z.1_(Chordata) organism=Gallus gallus phylum=Chordata class=Aves +MAGGKAGKDSGKTKTKAVSRSQRAGLQFPVGRIHRHLKSRTTSHGRVGATAAVYSAAILE +YLTAEVLELAGNASKDLKVKRITPRHLQLAIRGDEELDSLIKATIAGGGVIPHIHKSLIG +KKGQQKTV diff --git a/CURATED_SET/draft_seeds/H2A.Z.1_(Homo_sapiens).fasta b/CURATED_SET/draft_seeds/H2A.Z.1_(Homo_sapiens).fasta new file mode 100644 index 0000000..90be2b3 --- /dev/null +++ b/CURATED_SET/draft_seeds/H2A.Z.1_(Homo_sapiens).fasta @@ -0,0 +1,4 @@ +>Homo|NP_002097.1|H2A.Z.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MAGGKAGKDSGKAKTKAVSRSQRAGLQFPVGRIHRHLKSRTTSHGRVGATAAVYSAAILE +YLTAEVLELAGNASKDLKVKRITPRHLQLAIRGDEELDSLIKATIAGGGVIPHIHKSLIG +KKGQQKTV diff --git a/CURATED_SET/draft_seeds/H2A.Z.1_(Primates).fasta b/CURATED_SET/draft_seeds/H2A.Z.1_(Primates).fasta new file mode 100644 index 0000000..90be2b3 --- /dev/null +++ b/CURATED_SET/draft_seeds/H2A.Z.1_(Primates).fasta @@ -0,0 +1,4 @@ +>Homo|NP_002097.1|H2A.Z.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MAGGKAGKDSGKAKTKAVSRSQRAGLQFPVGRIHRHLKSRTTSHGRVGATAAVYSAAILE +YLTAEVLELAGNASKDLKVKRITPRHLQLAIRGDEELDSLIKATIAGGGVIPHIHKSLIG +KKGQQKTV diff --git a/CURATED_SET/draft_seeds/H2A.Z.1_(Primates)_only.fasta b/CURATED_SET/draft_seeds/H2A.Z.1_(Primates)_only.fasta new file mode 100644 index 0000000..e69de29 diff --git a/CURATED_SET/draft_seeds/H2A.Z.2.s1_(Homo_sapiens).fasta b/CURATED_SET/draft_seeds/H2A.Z.2.s1_(Homo_sapiens).fasta new file mode 100644 index 0000000..8357f00 --- /dev/null +++ b/CURATED_SET/draft_seeds/H2A.Z.2.s1_(Homo_sapiens).fasta @@ -0,0 +1,12 @@ +>Homo|NP_036544.1|H2A.Z.2.s1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MAGGKAGKDSGKAKAKAVSRSQRAGLQFPVGRIHRHLKTRTTSHGRVGATAAVYSAAILE +YLTAEVLELAGNASKDLKVKRITPRHLQLAIRGDEELDSLIKATIAGGGVIPHIHKSLIG +KKGQQKTA +>Homo|NP_958844.1|H2A.Z.2.s1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +--------------------------MFPVGRIHRHLKTRTTSHGRVGATAAVYSAAILE +YLTAEVLELAGNASKDLKVKRITPRHLQLAIRGDEELDSLIKATIAGGGVIPHIHKSLIG +KKGQQKTA +>Homo|NP_958924.1|H2A.Z.2.s1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MAGGKAGKDSGKAKAKAVSRSQRAGLQFPVGRIHRHLKTRTTSHGRVGATAAVYSAAILE +YLTAEV------------------------------------------------------ +-------- diff --git a/CURATED_SET/draft_seeds/H2A.Z.2.s1_(Primates).fasta b/CURATED_SET/draft_seeds/H2A.Z.2.s1_(Primates).fasta new file mode 100644 index 0000000..8357f00 --- /dev/null +++ b/CURATED_SET/draft_seeds/H2A.Z.2.s1_(Primates).fasta @@ -0,0 +1,12 @@ +>Homo|NP_036544.1|H2A.Z.2.s1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MAGGKAGKDSGKAKAKAVSRSQRAGLQFPVGRIHRHLKTRTTSHGRVGATAAVYSAAILE +YLTAEVLELAGNASKDLKVKRITPRHLQLAIRGDEELDSLIKATIAGGGVIPHIHKSLIG +KKGQQKTA +>Homo|NP_958844.1|H2A.Z.2.s1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +--------------------------MFPVGRIHRHLKTRTTSHGRVGATAAVYSAAILE +YLTAEVLELAGNASKDLKVKRITPRHLQLAIRGDEELDSLIKATIAGGGVIPHIHKSLIG +KKGQQKTA +>Homo|NP_958924.1|H2A.Z.2.s1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MAGGKAGKDSGKAKAKAVSRSQRAGLQFPVGRIHRHLKTRTTSHGRVGATAAVYSAAILE +YLTAEV------------------------------------------------------ +-------- diff --git a/CURATED_SET/draft_seeds/H2A.Z.2.s1_(Primates)_only.fasta b/CURATED_SET/draft_seeds/H2A.Z.2.s1_(Primates)_only.fasta new file mode 100644 index 0000000..e69de29 diff --git a/CURATED_SET/draft_seeds/H2A.Z.2.s2_(Homo_sapiens).fasta b/CURATED_SET/draft_seeds/H2A.Z.2.s2_(Homo_sapiens).fasta new file mode 100644 index 0000000..5958e32 --- /dev/null +++ b/CURATED_SET/draft_seeds/H2A.Z.2.s2_(Homo_sapiens).fasta @@ -0,0 +1,8 @@ +>Homo|NP_619541.1|H2A.Z.2.s2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MAGGKAGKDSGKAKAKAVSRSQRAGLQFPVGRIHRHLKTRTTSHGRVGATAAVYSAAILE +YLTAEVLELAGNASKDLKVKRITPRHLQLAIRGDEELDSLIKATIAGG------------ +--EKRRCS +>Homo|NP_958925.1|H2A.Z.2.s2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MAGGKAGKDSGKAKAKAVSRSQRAGLQ--------------------------------- +-----VLELAGNASKDLKVKRITPRHLQLAIRGDEELDSLIKATIAGGGVIPHIHKSLIG +KKGQQKTA diff --git a/CURATED_SET/draft_seeds/H2A.Z.2.s2_(Primates).fasta b/CURATED_SET/draft_seeds/H2A.Z.2.s2_(Primates).fasta new file mode 100644 index 0000000..5958e32 --- /dev/null +++ b/CURATED_SET/draft_seeds/H2A.Z.2.s2_(Primates).fasta @@ -0,0 +1,8 @@ +>Homo|NP_619541.1|H2A.Z.2.s2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MAGGKAGKDSGKAKAKAVSRSQRAGLQFPVGRIHRHLKTRTTSHGRVGATAAVYSAAILE +YLTAEVLELAGNASKDLKVKRITPRHLQLAIRGDEELDSLIKATIAGG------------ +--EKRRCS +>Homo|NP_958925.1|H2A.Z.2.s2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MAGGKAGKDSGKAKAKAVSRSQRAGLQ--------------------------------- +-----VLELAGNASKDLKVKRITPRHLQLAIRGDEELDSLIKATIAGGGVIPHIHKSLIG +KKGQQKTA diff --git a/CURATED_SET/draft_seeds/H2A.Z.2.s2_(Primates)_only.fasta b/CURATED_SET/draft_seeds/H2A.Z.2.s2_(Primates)_only.fasta new file mode 100644 index 0000000..e69de29 diff --git a/CURATED_SET/draft_seeds/H2A.Z.2_(Chordata).fasta b/CURATED_SET/draft_seeds/H2A.Z.2_(Chordata).fasta new file mode 100644 index 0000000..a8339c3 --- /dev/null +++ b/CURATED_SET/draft_seeds/H2A.Z.2_(Chordata).fasta @@ -0,0 +1,28 @@ +>Danio|NP_705930.1|H2A.Z.2_(Chordata) organism=Danio rerio phylum=Chordata class=Actinopteri +MAGGKAGKDSGKAKAKAVSRSQRAGLQFPVGRIHRHLKTRTTSHGRVGATAAVYSAAILE +YLTAEVLELAGNASKDLKVKRITPRHLQLAIRGDEELDSLIKATIAGGGVIPHIHKSLIG +KKGQQKTA +>Gallus|P02272.2|H2A.Z.2_(Chordata) organism=Gallus gallus phylum=Chordata class=Aves +MAGGKAGKDSGKAKAKAVSRSQRAGLQFPVGRIHRHLKTRTTSHGRVGATAAVYSAAILE +YLTAEVLELAGNASKDLKVKRITPRHLQLAIRGDEELDSLIKATIAGGGVIPHIHKSLIG +KKGQQKTA +>Homo|NP_036544.1|H2A.Z.2.s1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MAGGKAGKDSGKAKAKAVSRSQRAGLQFPVGRIHRHLKTRTTSHGRVGATAAVYSAAILE +YLTAEVLELAGNASKDLKVKRITPRHLQLAIRGDEELDSLIKATIAGGGVIPHIHKSLIG +KKGQQKTA +>Homo|NP_958844.1|H2A.Z.2.s1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +--------------------------MFPVGRIHRHLKTRTTSHGRVGATAAVYSAAILE +YLTAEVLELAGNASKDLKVKRITPRHLQLAIRGDEELDSLIKATIAGGGVIPHIHKSLIG +KKGQQKTA +>Homo|NP_958924.1|H2A.Z.2.s1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MAGGKAGKDSGKAKAKAVSRSQRAGLQFPVGRIHRHLKTRTTSHGRVGATAAVYSAAILE +YLTAEV------------------------------------------------------ +-------- +>Homo|NP_619541.1|H2A.Z.2.s2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MAGGKAGKDSGKAKAKAVSRSQRAGLQFPVGRIHRHLKTRTTSHGRVGATAAVYSAAILE +YLTAEVLELAGNASKDLKVKRITPRHLQLAIRGDEELDSLIKATIAGG------------ +--EKRRCS +>Homo|NP_958925.1|H2A.Z.2.s2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MAGGKAGKDSGKAKAKAVSRSQRAGLQ--------------------------------- +-----VLELAGNASKDLKVKRITPRHLQLAIRGDEELDSLIKATIAGGGVIPHIHKSLIG +KKGQQKTA diff --git a/CURATED_SET/draft_seeds/H2A.Z.2_(Chordata)_only.fasta b/CURATED_SET/draft_seeds/H2A.Z.2_(Chordata)_only.fasta new file mode 100644 index 0000000..095efc5 --- /dev/null +++ b/CURATED_SET/draft_seeds/H2A.Z.2_(Chordata)_only.fasta @@ -0,0 +1,8 @@ +>Danio|NP_705930.1|H2A.Z.2_(Chordata) organism=Danio rerio phylum=Chordata class=Actinopteri +MAGGKAGKDSGKAKAKAVSRSQRAGLQFPVGRIHRHLKTRTTSHGRVGATAAVYSAAILE +YLTAEVLELAGNASKDLKVKRITPRHLQLAIRGDEELDSLIKATIAGGGVIPHIHKSLIG +KKGQQKTA +>Gallus|P02272.2|H2A.Z.2_(Chordata) organism=Gallus gallus phylum=Chordata class=Aves +MAGGKAGKDSGKAKAKAVSRSQRAGLQFPVGRIHRHLKTRTTSHGRVGATAAVYSAAILE +YLTAEVLELAGNASKDLKVKRITPRHLQLAIRGDEELDSLIKATIAGGGVIPHIHKSLIG +KKGQQKTA diff --git a/CURATED_SET/draft_seeds/H2A.Z.2_(Primates).fasta b/CURATED_SET/draft_seeds/H2A.Z.2_(Primates).fasta new file mode 100644 index 0000000..6c2d7fe --- /dev/null +++ b/CURATED_SET/draft_seeds/H2A.Z.2_(Primates).fasta @@ -0,0 +1,20 @@ +>Homo|NP_036544.1|H2A.Z.2.s1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MAGGKAGKDSGKAKAKAVSRSQRAGLQFPVGRIHRHLKTRTTSHGRVGATAAVYSAAILE +YLTAEVLELAGNASKDLKVKRITPRHLQLAIRGDEELDSLIKATIAGGGVIPHIHKSLIG +KKGQQKTA +>Homo|NP_958844.1|H2A.Z.2.s1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +--------------------------MFPVGRIHRHLKTRTTSHGRVGATAAVYSAAILE +YLTAEVLELAGNASKDLKVKRITPRHLQLAIRGDEELDSLIKATIAGGGVIPHIHKSLIG +KKGQQKTA +>Homo|NP_958924.1|H2A.Z.2.s1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MAGGKAGKDSGKAKAKAVSRSQRAGLQFPVGRIHRHLKTRTTSHGRVGATAAVYSAAILE +YLTAEV------------------------------------------------------ +-------- +>Homo|NP_619541.1|H2A.Z.2.s2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MAGGKAGKDSGKAKAKAVSRSQRAGLQFPVGRIHRHLKTRTTSHGRVGATAAVYSAAILE +YLTAEVLELAGNASKDLKVKRITPRHLQLAIRGDEELDSLIKATIAGG------------ +--EKRRCS +>Homo|NP_958925.1|H2A.Z.2.s2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MAGGKAGKDSGKAKAKAVSRSQRAGLQ--------------------------------- +-----VLELAGNASKDLKVKRITPRHLQLAIRGDEELDSLIKATIAGGGVIPHIHKSLIG +KKGQQKTA diff --git a/CURATED_SET/draft_seeds/H2A.Z.2_(Primates)_only.fasta b/CURATED_SET/draft_seeds/H2A.Z.2_(Primates)_only.fasta new file mode 100644 index 0000000..e69de29 diff --git a/CURATED_SET/draft_seeds/H2A.Z.fasta b/CURATED_SET/draft_seeds/H2A.Z.fasta old mode 100755 new mode 100644 index f2b21d2..d3cd29a --- a/CURATED_SET/draft_seeds/H2A.Z.fasta +++ b/CURATED_SET/draft_seeds/H2A.Z.fasta @@ -1,145 +1,150 @@ ->H2A.Z_Trypanosoma_brucei_brucei_TREU927_XP_846259.1 XP_846259.1 histone: H2A variant: H2A.Z organism: Trypanosoma brucei brucei TREU927 ------------------MSLTGDDAVPQAPLVGGVAMSPEQASALTGGKLGGKAVGPAHG -KGKGKGKGKRGGKTGGKAGRRDKMTRAARADLNFPVGRIHSRLKDGLNRKQRCGASAAIY -CAALLEYLTSEVIELAGAAAKAQKTERIKPRHLLLAIRGDEELNQIVNATIARGGVVPFV -HKS-----LEKKIIKKSKRGS----------- ->H2A.Z_Saccharomyces_cerevisiae_S288C_NP_014631.1 NP_014631.1 histone: H2A variant: H2A.Z organism: Saccharomyces cerevisiae S288C ---------------------------------------------MSGKAHGGKGKSG--- --------A----KDSGSL---RSQSSSARAGLQFPVGRIKRYLKRHATGRTRVGSKAAIY -LTAVLEYLTAEVLELAGNAAKDLKVKRITPRHLQLAIRGDDELDSLIRATIASGGVLPHI -NKA-----LLLKVEKKGSKK------------ ->H2A.Z_Perkinsus_marinus_ATCC_50983_XP_002776752.1 XP_002776752.1 histone: H2A variant: H2A.Z organism: Perkinsus marinus ATCC 50983 -MSTRKQTDVSLPVPVVTQ-------------------LAGGKGIKGAGLQGGKGGKGGKG -AKVHGGKG----KIGGKG---SSITRSARAGLQFPVGRVRRYLKDRATANCRVGSTAAVY -TAAILEYLTAEVLELAGNAAKDQKVKRITPRHLHLAIRGDDELDVLIRATIAGGGVVPYI -HQS-----LTVKTPYHKKKRVM---------- ->H2A.Z_Perkinsus_marinus_ATCC_50983_XP_002784054.1 XP_002784054.1 histone: H2A variant: H2A.Z organism: Perkinsus marinus ATCC 50983 -MSDLKKTDINKSVPVVTQ-------------------LAGGKGIKGAGLQGGKGGKGGKG -AKVHGGKG----KMGGKG---ASMTRSARAGLQFPVGRVRRYLKDRATANCRVGSTAAVY -TAAILEYLTAEVLELAGNAAKDQKVKRITPRHLHLAIRGDDELDVLIRATIAGGGVVPYI -HQS-----LTAKAPYHKKKRVM---------- ->H2A.Z_Perkinsus_marinus_ATCC_50983_XP_002784055.1 XP_002784055.1 histone: H2A variant: H2A.Z organism: Perkinsus marinus ATCC 50983 -MSDLKKTDINKSVPVVTQLGRLLDVASLSTPSSSLFCLAGGKGIKGAGLQGGKGGKGGKG -AKVHGGKG----KIGGKG---TSMTRSARAGLQFPVGRVRRYLKDRATANCRVGSTAAVY -TAAILEYLTAEVLELAGNAAKDQKVKRITPRHLHLAIRGDDELDVLIRATIAGGGVVPYI -HQS-----LTAKAPYHKKKRVM---------- ->H2A.Z_Schizosaccharomyces_pombe_NP_595630.3 NP_595630.3 histone: H2A variant: H2A.Z organism: Schizosaccharomyces pombe ------------------------------------------------MSGGGKGKHVG-- --------G----KGGSKIGERGQMSHSARAGLQFPVGRVRRFLKAKTQNNMRVGAKSAVY -SAAVLEYLTAEVLELAGNAAKDLKVKRITPRHLQLAIRGDEELDTLIRATIAGGGVLPHI -NKQ-----LLIRTKEKYPEEEEII-------- ->H2A.Z_Arabidopsis_thaliana_NP_193093.1 NP_193093.1 histone: H2A variant: H2A.Z organism: Arabidopsis thaliana -------------------------------------------------MVCNTN--IL-- --------K----DVSTKISAFENVRMIMVEGEMFQVARIHKQLKNRVSAHSSVGATDVVY -MTSILEYLTTEVLQLAENTSKDLKVKRITPRHLQLAIRGDEELDTLIKGTIIGGSVIPHI -H------------------------------- ->H2A.Z_Tetrahymena_thermophila_CAA33554.1 CAA33554.1 histone: H2A variant: H2A.Z organism: Tetrahymena thermophila -------------------------------------------------MAGGKGGKGGKG -GKG----G----KVGGAKNKKTPQSRSYKAGLQFPVGRIHRFLKGRVSAKNRVGATAAVY -AAAILEYLTAEVLELAGNASKDFKVRRITPRHLLLAIRGDEELDILIKATIAGGGVIPHI -HKA-----LLGKHSTKNRSSAKTAEPR----- ->H2A.Z_Toxoplasma_gondii_ME49_XP_002371743.1 XP_002371743.1 histone: H2A variant: H2A.Z organism: Toxoplasma gondii ME49 --------------------------------------------MDGAGKVGGKVGGKVGG -KVGGMGKGGKGKSGSGKG-KKAPLSRAARAGLQFPVGRVHRMLKSRISSEGRVGSTAAVY -ASAILEYLTAEVLELAGNASKDLKVKRITPRHLQLAIRGDEELDTLIKATIAGGGVIPHI -HKS-----LMTKGPSTQPMKKAKK-------- ->H2A.Z_Cryptosporidium_parvum_Iowa_II_XP_626045.1 XP_626045.1 histone: H2A variant: H2A.Z organism: Cryptosporidium parvum Iowa II --------------------------------------------------MDGATSSGKIG -GKVGGKVGGKGKAGSGKGSKKQPTSRAARAGLQFPVGRIQRMLKHRIPGDCRVGSTASVY -AAAILEYLTAEVLELAGNASKDLKVKRITPRHLQLAIRGDEELDSLIKATIAGGGVIPHI -EKSLMGKALIGKKGKKGNMSP----------- ->H2A.Z_Zea_mays_NP_001141633.1 NP_001141633.1 histone: H2A variant: H2A.Z organism: Zea mays --------------------------------------------------MAGKGGKGLLA -AKTTAAKS----TDKDKDRKKAPVSRSSRAGLQFPVGRIHRQLKSRASAHGRVGATAAVY -SAAILEYLTAEVLELAGNASKDLKVKRITPRHLQLAIRGDEELDTLIKGTIAGGGVIPHI -HKS-----LINKTAKE---------------- ->H2A.Z_Oryza_sativa_Japonica_Group_NP_001051232.1 NP_001051232.1 histone: H2A variant: H2A.Z organism: Oryza sativa Japonica Group --------------------------------------------------MAGKGGKGLLA -AKTTAAKS----AEKDKG-KKAPVSRSSRAGLQFPVGRIHRQLKQRTQANGRVGATAAVY -SAAILEYLTAEVLELAGNASKDLKVKRITPRHLQLAIRGDEELDTLIKGTIAGGGVIPHI -HKS-----LINKSSKE---------------- ->H2A.Z_Zea_mays_NP_001136523.1 NP_001136523.1 histone: H2A variant: H2A.Z organism: Zea mays --------------------------------------------------MAGKGGKGLLA -AKTTAAKS----AEKDKG-KKAPISRSSRAGLQFPVGRIHRQLKQRTQANGRVGATAAVY -SAAILEYLTAEVLELAGNASKDLKVKRITPRHLQLAIRGDEELDTLIKGTIAGGGVIPHI -HKS-----LINKSSKE---------------- ->H2A.Z_Arabidopsis_thaliana_NP_181415.1 NP_181415.1 histone: H2A variant: H2A.Z organism: Arabidopsis thaliana --------------------------------------------------MAGKGGKGLLA -AKTTAA-A----ANKDSV-KKKSISRSSRAGIQFPVGRIHRQLKQRVSAHGRVGATAAVY -TASILEYLTAEVLELAGNASKDLKVKRITPRHLQLAIRGDEELDTLIKGTIAGGGVIPHI -HKS-----LVNKVTKD---------------- ->H2A.Z_Arabidopsis_thaliana_NP_191019.1 NP_191019.1 histone: H2A variant: H2A.Z organism: Arabidopsis thaliana --------------------------------------------------MAGKGGKGLVA -AKTMAA-N----KDKDKD-KKKPISRSARAGIQFPVGRIHRQLKTRVSAHGRVGATAAVY -TASILEYLTAEVLELAGNASKDLKVKRITPRHLQLAIRGDEELDTLIKGTIAGGGVIPHI -HKS-----LINKTTKE---------------- ->H2A.Z_Arabidopsis_thaliana_NP_175683.1 NP_175683.1 histone: H2A variant: H2A.Z organism: Arabidopsis thaliana --------------------------------------------------MSGKGAKGLIM -GKPS---G----SDKDKD-KKKPITRSSRAGLQFPVGRVHRLLKTRSTAHGRVGATAAVY -TAAILEYLTAEVLELAGNASKDLKVKRISPRHLQLAIRGDEELDTLIKGTIAGGGVIPHI -HKS-----LINKSAKE---------------- ->H2A.Z_Nematostella_vectensis_EDO46289.1 EDO46289.1 histone: H2A variant: H2A.Z organism: Nematostella vectensis -------------------------------------------------MAGGKA------ --------G----KDS-KA-KAKAVSRSARAGLQFPVGRIHRHLKNRTTSHGRVGATAAVY -SAAILEYLTAEVLELAGNASKDLKVKRITPRHLQLAIRGDEELDSLIKATIAGGGVIPHI -HKS-----LIGKKGANKPT------------- ->H2A.Z_Trichoplax_adhaerens_XP_002111498.1 XP_002111498.1 histone: H2A variant: H2A.Z organism: Trichoplax adhaerens -------------------------------------------------MAGGKA------ --------G----KDSKT--KAKAVSRSARAGLQFPVGRIHRHLKNRTTSHGRVGATAAVY -SAAILEYLTAEVLELAGNASKDLKVKRITPRHLQLAIRGDEELDSLIKATIAGGGVIPHI -HKS-----LIGKKGANKPN------------- ->H2A.Z_Drosophila_melanogaster_NP_524519.1 NP_524519.1 histone: H2A variant: H2A.Z organism: Drosophila melanogaster -------------------------------------------------MAGGKA------ --------G----KDSGKA-KAKAVSRSARAGLQFPVGRIHRHLKSRTTSHGRVGATAAVY -SAAILEYLTAEVLELAGNASKDLKVKRITPRHLQLAIRGDEELDSLIKATIAGGGVIPHI -HKS-----LIGKKEETVQDPQRKGNVILSQAY ->H2A.Z_Gallus_gallus_NP_001026545.1 NP_001026545.1 histone: H2A variant: H2A.Z organism: Gallus gallus -------------------------------------------------MAGGKA------ --------G----KDSGKT-KTKAVSRSQRAGLQFPVGRIHRHLKSRTTSHGRVGATAAVY -SAAILEYLTAEVLELAGNASKDLKVKRITPRHLQLAIRGDEELDSLIKATIAGGGVIPHI -HKS-----LIGKKGQQKTV------------- ->H2A.Z_Danio_rerio_NP_001036788.1 NP_001036788.1 histone: H2A variant: H2A.Z organism: Danio rerio -------------------------------------------------MAGGKA------ --------G----KDSGKA-KTKAVSRSQRAGLQFPVGRIHRHLKTRTTSHGRVGATAAVY -SAAILEYLTAEVLELAGNASKDLKVKRITPRHLQLAIRGDEELDSLIKATIAGGGVIPHI -HKS-----LIGKKGQQKTV------------- ->H2A.Z_Homo_sapiens_NP_002097.1 NP_002097.1 histone: H2A variant: H2A.Z organism: Homo sapiens -------------------------------------------------MAGGKA------ --------G----KDSGKA-KTKAVSRSQRAGLQFPVGRIHRHLKSRTTSHGRVGATAAVY -SAAILEYLTAEVLELAGNASKDLKVKRITPRHLQLAIRGDEELDSLIKATIAGGGVIPHI -HKS-----LIGKKGQQKTV------------- ->H2A.Z_Homo_sapiens_NP_958844.1 NP_958844.1 histone: H2A variant: H2A.Z organism: Homo sapiens +>Cryptosporidium|XP_626045.1|H2A.Z organism=Cryptosporidium parvum Iowa II phylum=Apicomplexa class=Conoidasida +--------------------------------------------MDGATSSGKIGG---- +----KV-GGKVGGKGKAGSGKGSKKQPTSRAARAGLQFPVGRIQRMLKHRIPGDCRVGST +ASVYAAAILEYLTAEVLELA----GNASKDL-KVKRITPRHLQLAIRGDEELDSLIK--A +TIAGGGVIPHIEKSL--MGKALIGKKGKKGNMSP------ +>Toxoplasma|XP_002371743.1|H2A.Z organism=Toxoplasma gondii ME49 phylum=Apicomplexa class=Conoidasida +---------------------------------------MDGAGKVGGKVGGKVGG---- +----KVGGMGKGGKGKSGSGKG-KKAPLSRAARAGLQFPVGRVHRMLKSRISSEGRVGST +AAVYASAILEYLTAEVLELA----GNASKDL-KVKRITPRHLQLAIRGDEELDTLIK--A +TIAGGGVIPHIHKSL--MTKGPSTQPMKKAKK-------- +>Drosophila|NP_524519.1|H2A.Z organism=Drosophila melanogaster phylum=Arthropoda class=Insecta +------------------------------------------------MAGGKAGK---- +-----------------DSGKA-KAKAVSRSARAGLQFPVGRIHRHLKSRTTSHGRVGAT +AAVYSAAILEYLTAEVLELA----GNASKDL-KVKRITPRHLQLAIRGDEELDSLIK--A +TIAGGGVIPHIHKSL--IGKKEETVQDPQRKGNVILSQAY +>Saccharomyces|NP_014631.1|H2A.Z organism=Saccharomyces cerevisiae S288C phylum=Ascomycota class=Saccharomycetes +--------------------------------------------MSGKAHGGKGKS---- +----GA------------KDSG-SLRSQSSSARAGLQFPVGRIKRYLKRHATGRTRVGSK +AAIYLTAVLEYLTAEVLELA----GNAAKDL-KVKRITPRHLQLAIRGDDELDSLIR--A +TIASGGVLPHINKAL--LLKVEKKGSKK------------ +>Schizosaccharomyces|NP_595630.3|H2A.Z organism=Schizosaccharomyces pombe phylum=Ascomycota class=Schizosaccharomycetes +------------------------------------------------MSGGGKGK---- +----HV----GGKGGSKIGERG----QMSHSARAGLQFPVGRVRRFLKAKTQNNMRVGAK +SAVYSAAVLEYLTAEVLELA----GNAAKDL-KVKRITPRHLQLAIRGDEELDTLIR--A +TIAGGGVLPHINKQL--LIRTKEKYPEEEEII-------- +>Tetrahymena|CAA33554.1|H2A.Z organism=Tetrahymena thermophila phylum=Ciliophora class=Oligohymenophorea +------------------------------------------------MAGGKGGK---- +-------GGKGGKGGKVGGAKN-KKTPQSRSYKAGLQFPVGRIHRFLKGRVSAKNRVGAT +AAVYAAAILEYLTAEVLELA----GNASKDF-KVRRITPRHLLLAIRGDEELDILIK--A +TIAGGGVIPHIHKAL--LGKHSTKNRSSAKTAEPR----- +>Nematostella|EDO46289.1|H2A.Z organism=Nematostella vectensis phylum=Cnidaria class=Anthozoa +------------------------------------------------MAGGKAGK---- +------------------DSKA-KAKAVSRSARAGLQFPVGRIHRHLKNRTTSHGRVGAT +AAVYSAAILEYLTAEVLELA----GNASKDL-KVKRITPRHLQLAIRGDEELDSLIK--A +TIAGGGVIPHIHKSL--IGKKGANKPT------------- +>Trypanosoma|XP_846259.1|H2A.Z organism=Trypanosoma brucei brucei TREU927 phylum=Euglenozoa class=Kinetoplastea +---------------------MSLTGDDAVPQAPLVGGVAMSPEQASALTGGKLGGKAVG +PAHGKGKGKGKGKRGGKTGGKAGRRDKMTRAARADLNFPVGRIHSRLKDGLNRKQRCGAS +AAIYCAALLEYLTSEVIELA----GAAAKAQ-KTERIKPRHLLLAIRGDEELNQIVN--A +TIARGGVVPFVHKSL--EKKIIKKSKRGS----------- +>Dictyostelium|XP_637656.1|H2A.Z organism=Dictyostelium discoideum AX4 phylum=Evosea class=Eumycetozoa +-------------------------------------------MTESETTSKKVNK---- +-----------------------RVKPVPKSTKAGLIFPVGRIHRMLKNKVPLK-RVSIL +SSVYLAAILEYLASEVLELTISQVSIQSKEYHNVRRISPRHLLLAIKTDEELDNLIRVST +TIAGGGVIPYIHEVLKKVEQKPTHPQQKQTIKSI------ +>Perkinsus|XP_002776752.1|H2A.Z organism=Perkinsus marinus ATCC 50983 phylum=Perkinsozoa class= +MSTRKQTDVSLPVPVVTQ-------------------LAGGKGIKGAGLQGGKGGK---- +----GGKGAKVHGGKGKIGGKG---SSITRSARAGLQFPVGRVRRYLKDRATANCRVGST +AAVYTAAILEYLTAEVLELA----GNAAKDQ-KVKRITPRHLHLAIRGDDELDVLIR--A +TIAGGGVVPYIHQSL--TVKTPYHKKKRVM---------- +>Perkinsus|XP_002784054.1|H2A.Z organism=Perkinsus marinus ATCC 50983 phylum=Perkinsozoa class= +MSDLKKTDINKSVPVVTQ-------------------LAGGKGIKGAGLQGGKGGK---- +----GGKGAKVHGGKGKMGGKG---ASMTRSARAGLQFPVGRVRRYLKDRATANCRVGST +AAVYTAAILEYLTAEVLELA----GNAAKDQ-KVKRITPRHLHLAIRGDDELDVLIR--A +TIAGGGVVPYIHQSL--TAKAPYHKKKRVM---------- +>Perkinsus|XP_002784055.1|H2A.Z organism=Perkinsus marinus ATCC 50983 phylum=Perkinsozoa class= +MSDLKKTDINKSVPVVTQLGRLLDVASLSTPSSSLFCLAGGKGIKGAGLQGGKGGK---- +----GGKGAKVHGGKGKIGGKG---TSMTRSARAGLQFPVGRVRRYLKDRATANCRVGST +AAVYTAAILEYLTAEVLELA----GNAAKDQ-KVKRITPRHLHLAIRGDDELDVLIR--A +TIAGGGVVPYIHQSL--TAKAPYHKKKRVM---------- +>Trichoplax|XP_002111498.1|H2A.Z organism=Trichoplax adhaerens phylum=Placozoa class=Uniplacotomia +------------------------------------------------MAGGKAGK---- +------------------DSKT-KAKAVSRSARAGLQFPVGRIHRHLKNRTTSHGRVGAT +AAVYSAAILEYLTAEVLELA----GNASKDL-KVKRITPRHLQLAIRGDEELDSLIK--A +TIAGGGVIPHIHKSL--IGKKGANKPN------------- +>Arabidopsis|NP_175683.1|H2A.Z organism=Arabidopsis thaliana phylum=Streptophyta class=Magnoliopsida +-------------------------------------------------MSGKGAK---- +----GLIMGKPSGS---DKDKD-KKKPITRSSRAGLQFPVGRVHRLLKTRSTAHGRVGAT +AAVYTAAILEYLTAEVLELA----GNASKDL-KVKRISPRHLQLAIRGDEELDTLIK--G +TIAGGGVIPHIHKSL--INKSAKE---------------- +>Arabidopsis|NP_181415.1|H2A.Z organism=Arabidopsis thaliana phylum=Streptophyta class=Magnoliopsida +-------------------------------------------------MAGKGGK---- +----GLLAAKTTAA-AANKDSV-KKKSISRSSRAGIQFPVGRIHRQLKQRVSAHGRVGAT +AAVYTASILEYLTAEVLELA----GNASKDL-KVKRITPRHLQLAIRGDEELDTLIK--G +TIAGGGVIPHIHKSL--VNKVTKD---------------- +>Arabidopsis|NP_191019.1|H2A.Z organism=Arabidopsis thaliana phylum=Streptophyta class=Magnoliopsida +-------------------------------------------------MAGKGGK---- +----GLVAAKTMAA-NKDKDKD-KKKPISRSARAGIQFPVGRIHRQLKTRVSAHGRVGAT +AAVYTASILEYLTAEVLELA----GNASKDL-KVKRITPRHLQLAIRGDEELDTLIK--G +TIAGGGVIPHIHKSL--INKTTKE---------------- +>Arabidopsis|NP_193093.1|H2A.Z organism=Arabidopsis thaliana phylum=Streptophyta class=Magnoliopsida +-----------------------------------------------MVCNTNILK---- +----DV------------STKISAFENVRMIMVEGEMFQVARIHKQLKNRVSAHSSVGAT +DVVYMTSILEYLTTEVLQLA----ENTSKDL-KVKRITPRHLQLAIRGDEELDTLIK--G +TIIGGSVIPHIH---------------------------- +>Oryza|NP_001051232.1|H2A.Z organism=Oryza sativa Japonica Group phylum=Streptophyta class=Magnoliopsida +-------------------------------------------------MAGKGGK---- +----GLLAAKTTAAKSAEKDKG-KKAPVSRSSRAGLQFPVGRIHRQLKQRTQANGRVGAT +AAVYSAAILEYLTAEVLELA----GNASKDL-KVKRITPRHLQLAIRGDEELDTLIK--G +TIAGGGVIPHIHKSL--INKSSKE---------------- +>Zea|NP_001136523.1|H2A.Z organism=Zea mays phylum=Streptophyta class=Magnoliopsida +-------------------------------------------------MAGKGGK---- +----GLLAAKTTAAKSAEKDKG-KKAPISRSSRAGLQFPVGRIHRQLKQRTQANGRVGAT +AAVYSAAILEYLTAEVLELA----GNASKDL-KVKRITPRHLQLAIRGDEELDTLIK--G +TIAGGGVIPHIHKSL--INKSSKE---------------- +>Zea|NP_001141633.1|H2A.Z organism=Zea mays phylum=Streptophyta class=Magnoliopsida +-------------------------------------------------MAGKGGK---- +----GLLAAKTTAAKSTDKDKDRKKAPVSRSSRAGLQFPVGRIHRQLKSRASAHGRVGAT +AAVYSAAILEYLTAEVLELA----GNASKDL-KVKRITPRHLQLAIRGDEELDTLIK--G +TIAGGGVIPHIHKSL--INKTAKE---------------- +>Danio|NP_001036788.1|H2A.Z.1_(Chordata) organism=Danio rerio phylum=Chordata class=Actinopteri +------------------------------------------------MAGGKAGK---- +-----------------DSGKA-KTKAVSRSQRAGLQFPVGRIHRHLKTRTTSHGRVGAT +AAVYSAAILEYLTAEVLELA----GNASKDL-KVKRITPRHLQLAIRGDEELDSLIK--A +TIAGGGVIPHIHKSL--IGKKGQQKTV------------- +>Gallus|NP_001026545.1|H2A.Z.1_(Chordata) organism=Gallus gallus phylum=Chordata class=Aves +------------------------------------------------MAGGKAGK---- +-----------------DSGKT-KTKAVSRSQRAGLQFPVGRIHRHLKSRTTSHGRVGAT +AAVYSAAILEYLTAEVLELA----GNASKDL-KVKRITPRHLQLAIRGDEELDSLIK--A +TIAGGGVIPHIHKSL--IGKKGQQKTV------------- +>Homo|NP_002097.1|H2A.Z.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +------------------------------------------------MAGGKAGK---- +-----------------DSGKA-KTKAVSRSQRAGLQFPVGRIHRHLKSRTTSHGRVGAT +AAVYSAAILEYLTAEVLELA----GNASKDL-KVKRITPRHLQLAIRGDEELDSLIK--A +TIAGGGVIPHIHKSL--IGKKGQQKTV------------- +>Danio|NP_705930.1|H2A.Z.2_(Chordata) organism=Danio rerio phylum=Chordata class=Actinopteri +------------------------------------------------MAGGKAGK---- +-----------------DSGKA-KAKAVSRSQRAGLQFPVGRIHRHLKTRTTSHGRVGAT +AAVYSAAILEYLTAEVLELA----GNASKDL-KVKRITPRHLQLAIRGDEELDSLIK--A +TIAGGGVIPHIHKSL--IGKKGQQKTA------------- +>Gallus|P02272.2|H2A.Z.2_(Chordata) organism=Gallus gallus phylum=Chordata class=Aves +------------------------------------------------MAGGKAGK---- +-----------------DSGKA-KAKAVSRSQRAGLQFPVGRIHRHLKTRTTSHGRVGAT +AAVYSAAILEYLTAEVLELA----GNASKDL-KVKRITPRHLQLAIRGDEELDSLIK--A +TIAGGGVIPHIHKSL--IGKKGQQKTA------------- +>Homo|NP_036544.1|H2A.Z.2.s1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +------------------------------------------------MAGGKAGK---- +-----------------DSGKA-KAKAVSRSQRAGLQFPVGRIHRHLKTRTTSHGRVGAT +AAVYSAAILEYLTAEVLELA----GNASKDL-KVKRITPRHLQLAIRGDEELDSLIK--A +TIAGGGVIPHIHKSL--IGKKGQQKTA------------- +>Homo|NP_958844.1|H2A.Z.2.s1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia ------------------------------------------------------------ ---------------------------------MFPVGRIHRHLKTRTTSHGRVGATAAVY -SAAILEYLTAEVLELAGNASKDLKVKRITPRHLQLAIRGDEELDSLIKATIAGGGVIPHI -HKS-----LIGKKGQQKTA------------- ->H2A.Z_Homo_sapiens_NP_619541.1 NP_619541.1 histone: H2A variant: H2A.Z organism: Homo sapiens -------------------------------------------------MAGGKA------ --------G----KDSGKA-KAKAVSRSQRAGLQFPVGRIHRHLKTRTTSHGRVGATAAVY -SAAILEYLTAEVLELAGNASKDLKVKRITPRHLQLAIRGDEELDSLIKATIAGG------ --------------EKRRCS------------- ->H2A.Z_Homo_sapiens_NP_958925.1 NP_958925.1 histone: H2A variant: H2A.Z organism: Homo sapiens -------------------------------------------------MAGGKA------ --------G----KDSGKA-KAKAVSRSQRAGLQ--------------------------- ------------VLELAGNASKDLKVKRITPRHLQLAIRGDEELDSLIKATIAGGGVIPHI -HKS-----LIGKKGQQKTA------------- ->H2A.Z_Danio_rerio_NP_705930.1 NP_705930.1 histone: H2A variant: H2A.Z organism: Danio rerio -------------------------------------------------MAGGKA------ --------G----KDSGKA-KAKAVSRSQRAGLQFPVGRIHRHLKTRTTSHGRVGATAAVY -SAAILEYLTAEVLELAGNASKDLKVKRITPRHLQLAIRGDEELDSLIKATIAGGGVIPHI -HKS-----LIGKKGQQKTA------------- ->H2A.Z_Gallus_gallus_P02272.2 P02272.2 histone: H2A variant: H2A.Z organism: Gallus gallus -------------------------------------------------MAGGKA------ --------G----KDSGKA-KAKAVSRSQRAGLQFPVGRIHRHLKTRTTSHGRVGATAAVY -SAAILEYLTAEVLELAGNASKDLKVKRITPRHLQLAIRGDEELDSLIKATIAGGGVIPHI -HKS-----LIGKKGQQKTA------------- ->H2A.Z_Homo_sapiens_NP_036544.1 NP_036544.1 histone: H2A variant: H2A.Z organism: Homo sapiens -------------------------------------------------MAGGKA------ --------G----KDSGKA-KAKAVSRSQRAGLQFPVGRIHRHLKTRTTSHGRVGATAAVY -SAAILEYLTAEVLELAGNASKDLKVKRITPRHLQLAIRGDEELDSLIKATIAGGGVIPHI -HKS-----LIGKKGQQKTA------------- ->H2A.Z_Homo_sapiens_NP_958924.1 NP_958924.1 histone: H2A variant: H2A.Z organism: Homo sapiens -------------------------------------------------MAGGKA------ --------G----KDSGKA-KAKAVSRSQRAGLQFPVGRIHRHLKTRTTSHGRVGATAAVY -SAAILEYLTAEV------------------------------------------------ --------------------------------- +------------------------------------MFPVGRIHRHLKTRTTSHGRVGAT +AAVYSAAILEYLTAEVLELA----GNASKDL-KVKRITPRHLQLAIRGDEELDSLIK--A +TIAGGGVIPHIHKSL--IGKKGQQKTA------------- +>Homo|NP_958924.1|H2A.Z.2.s1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +------------------------------------------------MAGGKAGK---- +-----------------DSGKA-KAKAVSRSQRAGLQFPVGRIHRHLKTRTTSHGRVGAT +AAVYSAAILEYLTAEV-------------------------------------------- +---------------------------------------- +>Homo|NP_619541.1|H2A.Z.2.s2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +------------------------------------------------MAGGKAGK---- +-----------------DSGKA-KAKAVSRSQRAGLQFPVGRIHRHLKTRTTSHGRVGAT +AAVYSAAILEYLTAEVLELA----GNASKDL-KVKRITPRHLQLAIRGDEELDSLIK--A +TIAGG----------------EKRRCS------------- +>Homo|NP_958925.1|H2A.Z.2.s2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +------------------------------------------------MAGGKAGK---- +-----------------DSGKA-KAKAVSRSQRAGLQ----------------------- +---------------VLELA----GNASKDL-KVKRITPRHLQLAIRGDEELDSLIK--A +TIAGGGVIPHIHKSL--IGKKGQQKTA------------- diff --git a/CURATED_SET/draft_seeds/H2A.Z_only.fasta b/CURATED_SET/draft_seeds/H2A.Z_only.fasta new file mode 100644 index 0000000..afbf324 --- /dev/null +++ b/CURATED_SET/draft_seeds/H2A.Z_only.fasta @@ -0,0 +1,100 @@ +>Cryptosporidium|XP_626045.1|H2A.Z organism=Cryptosporidium parvum Iowa II phylum=Apicomplexa class=Conoidasida +--------------------------------------------MDGATSSGKIGG---- +----KV-GGKVGGKGKAGSGKGSKKQPTSRAARAGLQFPVGRIQRMLKHRIPGDCRVGST +ASVYAAAILEYLTAEVLELA----GNASKDL-KVKRITPRHLQLAIRGDEELDSLIK--A +TIAGGGVIPHIEKSL--MGKALIGKKGKKGNMSP------ +>Toxoplasma|XP_002371743.1|H2A.Z organism=Toxoplasma gondii ME49 phylum=Apicomplexa class=Conoidasida +---------------------------------------MDGAGKVGGKVGGKVGG---- +----KVGGMGKGGKGKSGSGKG-KKAPLSRAARAGLQFPVGRVHRMLKSRISSEGRVGST +AAVYASAILEYLTAEVLELA----GNASKDL-KVKRITPRHLQLAIRGDEELDTLIK--A +TIAGGGVIPHIHKSL--MTKGPSTQPMKKAKK-------- +>Drosophila|NP_524519.1|H2A.Z organism=Drosophila melanogaster phylum=Arthropoda class=Insecta +------------------------------------------------MAGGKAGK---- +-----------------DSGKA-KAKAVSRSARAGLQFPVGRIHRHLKSRTTSHGRVGAT +AAVYSAAILEYLTAEVLELA----GNASKDL-KVKRITPRHLQLAIRGDEELDSLIK--A +TIAGGGVIPHIHKSL--IGKKEETVQDPQRKGNVILSQAY +>Saccharomyces|NP_014631.1|H2A.Z organism=Saccharomyces cerevisiae S288C phylum=Ascomycota class=Saccharomycetes +--------------------------------------------MSGKAHGGKGKS---- +----GA------------KDSG-SLRSQSSSARAGLQFPVGRIKRYLKRHATGRTRVGSK +AAIYLTAVLEYLTAEVLELA----GNAAKDL-KVKRITPRHLQLAIRGDDELDSLIR--A +TIASGGVLPHINKAL--LLKVEKKGSKK------------ +>Schizosaccharomyces|NP_595630.3|H2A.Z organism=Schizosaccharomyces pombe phylum=Ascomycota class=Schizosaccharomycetes +------------------------------------------------MSGGGKGK---- +----HV----GGKGGSKIGERG----QMSHSARAGLQFPVGRVRRFLKAKTQNNMRVGAK +SAVYSAAVLEYLTAEVLELA----GNAAKDL-KVKRITPRHLQLAIRGDEELDTLIR--A +TIAGGGVLPHINKQL--LIRTKEKYPEEEEII-------- +>Tetrahymena|CAA33554.1|H2A.Z organism=Tetrahymena thermophila phylum=Ciliophora class=Oligohymenophorea +------------------------------------------------MAGGKGGK---- +-------GGKGGKGGKVGGAKN-KKTPQSRSYKAGLQFPVGRIHRFLKGRVSAKNRVGAT +AAVYAAAILEYLTAEVLELA----GNASKDF-KVRRITPRHLLLAIRGDEELDILIK--A +TIAGGGVIPHIHKAL--LGKHSTKNRSSAKTAEPR----- +>Nematostella|EDO46289.1|H2A.Z organism=Nematostella vectensis phylum=Cnidaria class=Anthozoa +------------------------------------------------MAGGKAGK---- +------------------DSKA-KAKAVSRSARAGLQFPVGRIHRHLKNRTTSHGRVGAT +AAVYSAAILEYLTAEVLELA----GNASKDL-KVKRITPRHLQLAIRGDEELDSLIK--A +TIAGGGVIPHIHKSL--IGKKGANKPT------------- +>Trypanosoma|XP_846259.1|H2A.Z organism=Trypanosoma brucei brucei TREU927 phylum=Euglenozoa class=Kinetoplastea +---------------------MSLTGDDAVPQAPLVGGVAMSPEQASALTGGKLGGKAVG +PAHGKGKGKGKGKRGGKTGGKAGRRDKMTRAARADLNFPVGRIHSRLKDGLNRKQRCGAS +AAIYCAALLEYLTSEVIELA----GAAAKAQ-KTERIKPRHLLLAIRGDEELNQIVN--A +TIARGGVVPFVHKSL--EKKIIKKSKRGS----------- +>Dictyostelium|XP_637656.1|H2A.Z organism=Dictyostelium discoideum AX4 phylum=Evosea class=Eumycetozoa +-------------------------------------------MTESETTSKKVNK---- +-----------------------RVKPVPKSTKAGLIFPVGRIHRMLKNKVPLK-RVSIL +SSVYLAAILEYLASEVLELTISQVSIQSKEYHNVRRISPRHLLLAIKTDEELDNLIRVST +TIAGGGVIPYIHEVLKKVEQKPTHPQQKQTIKSI------ +>Perkinsus|XP_002776752.1|H2A.Z organism=Perkinsus marinus ATCC 50983 phylum=Perkinsozoa class= +MSTRKQTDVSLPVPVVTQ-------------------LAGGKGIKGAGLQGGKGGK---- +----GGKGAKVHGGKGKIGGKG---SSITRSARAGLQFPVGRVRRYLKDRATANCRVGST +AAVYTAAILEYLTAEVLELA----GNAAKDQ-KVKRITPRHLHLAIRGDDELDVLIR--A +TIAGGGVVPYIHQSL--TVKTPYHKKKRVM---------- +>Perkinsus|XP_002784054.1|H2A.Z organism=Perkinsus marinus ATCC 50983 phylum=Perkinsozoa class= +MSDLKKTDINKSVPVVTQ-------------------LAGGKGIKGAGLQGGKGGK---- +----GGKGAKVHGGKGKMGGKG---ASMTRSARAGLQFPVGRVRRYLKDRATANCRVGST +AAVYTAAILEYLTAEVLELA----GNAAKDQ-KVKRITPRHLHLAIRGDDELDVLIR--A +TIAGGGVVPYIHQSL--TAKAPYHKKKRVM---------- +>Perkinsus|XP_002784055.1|H2A.Z organism=Perkinsus marinus ATCC 50983 phylum=Perkinsozoa class= +MSDLKKTDINKSVPVVTQLGRLLDVASLSTPSSSLFCLAGGKGIKGAGLQGGKGGK---- +----GGKGAKVHGGKGKIGGKG---TSMTRSARAGLQFPVGRVRRYLKDRATANCRVGST +AAVYTAAILEYLTAEVLELA----GNAAKDQ-KVKRITPRHLHLAIRGDDELDVLIR--A +TIAGGGVVPYIHQSL--TAKAPYHKKKRVM---------- +>Trichoplax|XP_002111498.1|H2A.Z organism=Trichoplax adhaerens phylum=Placozoa class=Uniplacotomia +------------------------------------------------MAGGKAGK---- +------------------DSKT-KAKAVSRSARAGLQFPVGRIHRHLKNRTTSHGRVGAT +AAVYSAAILEYLTAEVLELA----GNASKDL-KVKRITPRHLQLAIRGDEELDSLIK--A +TIAGGGVIPHIHKSL--IGKKGANKPN------------- +>Arabidopsis|NP_175683.1|H2A.Z organism=Arabidopsis thaliana phylum=Streptophyta class=Magnoliopsida +-------------------------------------------------MSGKGAK---- +----GLIMGKPSGS---DKDKD-KKKPITRSSRAGLQFPVGRVHRLLKTRSTAHGRVGAT +AAVYTAAILEYLTAEVLELA----GNASKDL-KVKRISPRHLQLAIRGDEELDTLIK--G +TIAGGGVIPHIHKSL--INKSAKE---------------- +>Arabidopsis|NP_181415.1|H2A.Z organism=Arabidopsis thaliana phylum=Streptophyta class=Magnoliopsida +-------------------------------------------------MAGKGGK---- +----GLLAAKTTAA-AANKDSV-KKKSISRSSRAGIQFPVGRIHRQLKQRVSAHGRVGAT +AAVYTASILEYLTAEVLELA----GNASKDL-KVKRITPRHLQLAIRGDEELDTLIK--G +TIAGGGVIPHIHKSL--VNKVTKD---------------- +>Arabidopsis|NP_191019.1|H2A.Z organism=Arabidopsis thaliana phylum=Streptophyta class=Magnoliopsida +-------------------------------------------------MAGKGGK---- +----GLVAAKTMAA-NKDKDKD-KKKPISRSARAGIQFPVGRIHRQLKTRVSAHGRVGAT +AAVYTASILEYLTAEVLELA----GNASKDL-KVKRITPRHLQLAIRGDEELDTLIK--G +TIAGGGVIPHIHKSL--INKTTKE---------------- +>Arabidopsis|NP_193093.1|H2A.Z organism=Arabidopsis thaliana phylum=Streptophyta class=Magnoliopsida +-----------------------------------------------MVCNTNILK---- +----DV------------STKISAFENVRMIMVEGEMFQVARIHKQLKNRVSAHSSVGAT +DVVYMTSILEYLTTEVLQLA----ENTSKDL-KVKRITPRHLQLAIRGDEELDTLIK--G +TIIGGSVIPHIH---------------------------- +>Oryza|NP_001051232.1|H2A.Z organism=Oryza sativa Japonica Group phylum=Streptophyta class=Magnoliopsida +-------------------------------------------------MAGKGGK---- +----GLLAAKTTAAKSAEKDKG-KKAPVSRSSRAGLQFPVGRIHRQLKQRTQANGRVGAT +AAVYSAAILEYLTAEVLELA----GNASKDL-KVKRITPRHLQLAIRGDEELDTLIK--G +TIAGGGVIPHIHKSL--INKSSKE---------------- +>Zea|NP_001136523.1|H2A.Z organism=Zea mays phylum=Streptophyta class=Magnoliopsida +-------------------------------------------------MAGKGGK---- +----GLLAAKTTAAKSAEKDKG-KKAPISRSSRAGLQFPVGRIHRQLKQRTQANGRVGAT +AAVYSAAILEYLTAEVLELA----GNASKDL-KVKRITPRHLQLAIRGDEELDTLIK--G +TIAGGGVIPHIHKSL--INKSSKE---------------- +>Zea|NP_001141633.1|H2A.Z organism=Zea mays phylum=Streptophyta class=Magnoliopsida +-------------------------------------------------MAGKGGK---- +----GLLAAKTTAAKSTDKDKDRKKAPVSRSSRAGLQFPVGRIHRQLKSRASAHGRVGAT +AAVYSAAILEYLTAEVLELA----GNASKDL-KVKRITPRHLQLAIRGDEELDTLIK--G +TIAGGGVIPHIHKSL--INKTAKE---------------- diff --git a/CURATED_SET/draft_seeds/H2A.fasta b/CURATED_SET/draft_seeds/H2A.fasta old mode 100755 new mode 100644 index 257bf7e..ed965ae --- a/CURATED_SET/draft_seeds/H2A.fasta +++ b/CURATED_SET/draft_seeds/H2A.fasta @@ -1,513 +1,2409 @@ ->H2A.Z_Trypanosoma_brucei_brucei_TREU927_XP_846259.1 XP_846259.1 histone: H2A variant: H2A.Z organism: Trypanosoma brucei brucei TREU927 ------------------MSLTGDDAVPQAPLVGGVAMSPEQASALTGGKLGGKAVGPAHG -KGKGKGKGKRGGKTGGKAGRRDKMTRAARADLNFPVGRIHSRLKDGLNRKQRCGASAAIY -CAALLEYLTSEVIELAGAAAKAQK-TERIKPRHLLLAIRGDEELNQIVNA-TIARGGVVP -FVHKS-----LEKKIIKKSKRGS------------------------------------- +>Ixodes|EEC09557.1|cH2A_(Animals) organism=Ixodes scapularis phylum=Arthropoda class=Arachnida ------------------------------------------------------------ +-------MSGRGK-----------------------GGK------------V-------- +---------K----G---------------KSK---TRSSRAGLQFPVGRIHRLLRKGNY +AE-RVGAGAPVYLAAVLEYLAAEVLELA----GNAA--RDN-K-KTRIIPRHLQLAIRND +EELNKLL--S-GVTI-----------------AQ---------GGVLPNIQAVL--LPKK +TE-----------K-----KS--------------------------------------- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------- ->H2A.Z_Saccharomyces_cerevisiae_S288C_NP_014631.1 NP_014631.1 histone: H2A variant: H2A.Z organism: Saccharomyces cerevisiae S288C ---------------------------------------------MSGKAHGGKGKSG--- --------A----KDSGSL---RSQSSSARAGLQFPVGRIKRYLKRHATGRTRVGSKAAIY -LTAVLEYLTAEVLELAGNAAKDLK-VKRITPRHLQLAIRGDDELDSLIRA-TIASGGVLP -HINKA-----LLLKVEKKGSKK-------------------------------------- ------------------------------------------------------------ +--------------------- +>Apis|XP_001119899.1|cH2A_(Animals) organism=Apis mellifera phylum=Arthropoda class=Insecta ------------------------------------------------------------ +-------MSGRGK-----------------------GGK------------A-------- +---------K----A---------------KAK---SRSNRAGLQFPVGRIHRLLRKGNY +AE-RVGAGAPVYLAAVMEYLAAEVLELA----GNAA--RDN-K-KTRIIPRHLQLAIRND +EELNKLL--S-GVTI-----------------AQ---------GGVLPNIQAVL--LPKK +TE-----------K-----KA--------------------------------------- ------------------------------------------------------------ ------------------------------------- ->H2A.Z_Perkinsus_marinus_ATCC_50983_XP_002776752.1 XP_002776752.1 histone: H2A variant: H2A.Z organism: Perkinsus marinus ATCC 50983 -MSTRKQTDVSLPVPVVTQ-------------------LAGGKGIKGAGLQGGKGGKGGKG -AKVHGGKG----KIGGKG---SSITRSARAGLQFPVGRVRRYLKDRATANCRVGSTAAVY -TAAILEYLTAEVLELAGNAAKDQK-VKRITPRHLHLAIRGDDELDVLIRA-TIAGGGVVP -YIHQS-----LTVKTPYHKKKRVM------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ +--------------------- +>Drosophila|NP_724343.1|cH2A_(Animals) organism=Drosophila melanogaster phylum=Arthropoda class=Insecta ------------------------------------------------------------ ------------------------------------- ->H2A.Z_Perkinsus_marinus_ATCC_50983_XP_002784054.1 XP_002784054.1 histone: H2A variant: H2A.Z organism: Perkinsus marinus ATCC 50983 -MSDLKKTDINKSVPVVTQ-------------------LAGGKGIKGAGLQGGKGGKGGKG -AKVHGGKG----KMGGKG---ASMTRSARAGLQFPVGRVRRYLKDRATANCRVGSTAAVY -TAAILEYLTAEVLELAGNAAKDQK-VKRITPRHLHLAIRGDDELDVLIRA-TIAGGGVVP -YIHQS-----LTAKAPYHKKKRVM------------------------------------ +-------MSGRGK-----------------------GGK------------V-------- +---------K----G---------------KAK---SRSNRAGLQFPVGRIHRLLRKGNY +AE-RVGAGAPVYLAAVMEYLAAEVLELA----GNAA--RDN-K-KTRIIPRHLQLAIRND +EELNKLL--S-GVTI-----------------AQ---------GGVLPNIQAVL--LPKK +TE-----------K-----KA--------------------------------------- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------- ->H2A.Z_Perkinsus_marinus_ATCC_50983_XP_002784055.1 XP_002784055.1 histone: H2A variant: H2A.Z organism: Perkinsus marinus ATCC 50983 -MSDLKKTDINKSVPVVTQLGRLLDVASLSTPSSSLFCLAGGKGIKGAGLQGGKGGKGGKG -AKVHGGKG----KIGGKG---TSMTRSARAGLQFPVGRVRRYLKDRATANCRVGSTAAVY -TAAILEYLTAEVLELAGNAAKDQK-VKRITPRHLHLAIRGDDELDVLIRA-TIAGGGVVP -YIHQS-----LTAKAPYHKKKRVM------------------------------------ +--------------------- +>Nematostella|EDO48405.1|cH2A_(Animals) organism=Nematostella vectensis phylum=Cnidaria class=Anthozoa ------------------------------------------------------------ +-------MSGRGK-----------------------GKA------------K-------- +---------G----T---------------KSK---TRSSRAGLQFPVGRIHRHLRKGNY +AE-RVGAGAPVYMAAVLEYLSAEILELA----GNAA--RDN-K-KTRIIPRHLQLAVRND +EELNRLL--H-GVTI-----------------AQ---------GGVLPNIQAVL--LPKK +TE-----------K-----KAKA------------------------------------- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------- ->H2A.Z_Schizosaccharomyces_pombe_NP_595630.3 NP_595630.3 histone: H2A variant: H2A.Z organism: Schizosaccharomyces pombe ------------------------------------------------MSGGGKGKHVG-- --------G----KGGSKIGERGQMSHSARAGLQFPVGRVRRFLKAKTQNNMRVGAKSAVY -SAAVLEYLTAEVLELAGNAAKDLK-VKRITPRHLQLAIRGDEELDTLIRA-TIAGGGVLP -HINKQ-----LLIRTKEKYPEEEEII---------------------------------- ------------------------------------------------------------ +--------------------- +>Caenorhabditis|NP_505463.1|cH2A_(Animals) organism=Caenorhabditis elegans phylum=Nematoda class=Chromadorea ------------------------------------------------------------ +-------MSGRGK-----------------------GGK------------A-------- +---------K--TGG---------------KAK---SRSSRAGLQFPVGRLHRILRKGNY +AQ-RVGAGAPVYLAAVLEYLAAEVLELA----GNAA--RDN-K-KTRIAPRHLQLAVRND +EELNKLL--A-GVTI-----------------AQ---------GGVLPNIQAVL--LPKK +TA-----------G-----DKE-------------------------------------- ------------------------------------------------------------ ------------------------------------- ->H2A.Z_Arabidopsis_thaliana_NP_193093.1 NP_193093.1 histone: H2A variant: H2A.Z organism: Arabidopsis thaliana -------------------------------------------------MVCNTN--IL-- --------K----DVSTKISAFENVRMIMVEGEMFQVARIHKQLKNRVSAHSSVGATDVVY -MTSILEYLTTEVLQLAENTSKDLK-VKRITPRHLQLAIRGDEELDTLIKG-TIIGGSVIP -HIH--------------------------------------------------------- ------------------------------------------------------------ ------------------------------------------------------------ +--------------------- +>Danio|XP_009296490.1|cH2A_(Vertebrata) organism=Danio rerio phylum=Chordata class=Actinopteri ------------------------------------------------------------ ------------------------------------- ->H2A.Z_Tetrahymena_thermophila_CAA33554.1 CAA33554.1 histone: H2A variant: H2A.Z organism: Tetrahymena thermophila -------------------------------------------------MAGGKGGKGGKG -GKG----G----KVGGAKNKKTPQSRSYKAGLQFPVGRIHRFLKGRVSAKNRVGATAAVY -AAAILEYLTAEVLELAGNASKDFK-VRRITPRHLLLAIRGDEELDILIKA-TIAGGGVIP -HIHKA-----LLGKHSTKNRSSAKTAEPR------------------------------- +-------MSGRGK--------------------T--GGK------------A-------- +---------R----A---------------KAK---TRSSRAGLQFPVGRVHRLLRKGNY +AE-RVGAGAPVYLAAVLEYLTAEILELA----GNAA--RDN-K-KTRIIPRHLQLAVRND +EELNKLL--G-GVTI-----------------AQ---------GGVLPNIQAVL--LPKK +TE--------------KAAKGK-------------------------------------- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------- ->H2A.Z_Toxoplasma_gondii_ME49_XP_002371743.1 XP_002371743.1 histone: H2A variant: H2A.Z organism: Toxoplasma gondii ME49 --------------------------------------------MDGAGKVGGKVGGKVGG -KVGGMGKGGKGKSGSGKG-KKAPLSRAARAGLQFPVGRVHRMLKSRISSEGRVGSTAAVY -ASAILEYLTAEVLELAGNASKDLK-VKRITPRHLQLAIRGDEELDTLIKA-TIAGGGVIP -HIHKS-----LMTKGPSTQPMKKAKK---------------------------------- +--------------------- +>Xenopus|NP_001087948.1|cH2A_(Vertebrata) organism=Xenopus laevis phylum=Chordata class=Amphibia ------------------------------------------------------------ +-------MSGRGK--------------------Q--GGK------------T-------- +---------R----A---------------KAK---TRSSRAGLQFPVGRVHRLLRKGNY +AE-RVGAGAPVYLAAVLEYLTAEILELA----GNAA--RDN-K-KTRIIPRHLQLAVRND +EELNKLL--G-RVTI-----------------AQ---------GGVLPNIQSVL--LPKK +TE-----------S-SKSAKSK-------------------------------------- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------- ->H2A.Z_Cryptosporidium_parvum_Iowa_II_XP_626045.1 XP_626045.1 histone: H2A variant: H2A.Z organism: Cryptosporidium parvum Iowa II --------------------------------------------------MDGATSSGKIG -GKVGGKVGGKGKAGSGKGSKKQPTSRAARAGLQFPVGRIQRMLKHRIPGDCRVGSTASVY -AAAILEYLTAEVLELAGNASKDLK-VKRITPRHLQLAIRGDEELDSLIKA-TIAGGGVIP -HIEKSLMGKALIGKKGKKGNMSP------------------------------------- ------------------------------------------------------------ +--------------------- +>Gallus|NP_001072943.1|cH2A_(Vertebrata) organism=Gallus gallus phylum=Chordata class=Aves ------------------------------------------------------------ +-------MSGRGK--------------------Q--GGK------------A-------- +---------R----A---------------KAK---SRSSRAGLQFPVGRVHRLLRKGNY +AE-RVGAGAPVYLAAVLEYLTAEILELA----GNAA--RDN-K-KTRIIPRHLQLAIRND +EELNKLL--G-KVTI-----------------AQ---------GGVLPNIQAVL--LPKK +TD-------------SHKAKAK-------------------------------------- ------------------------------------------------------------ ------------------------------------- ->H2A.Z_Zea_mays_NP_001141633.1 NP_001141633.1 histone: H2A variant: H2A.Z organism: Zea mays --------------------------------------------------MAGKGGKGLLA -AKTTAAKS----TDKDKDRKKAPVSRSSRAGLQFPVGRIHRQLKSRASAHGRVGATAAVY -SAAILEYLTAEVLELAGNASKDLK-VKRITPRHLQLAIRGDEELDTLIKG-TIAGGGVIP -HIHKS-----LINKTAKE------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ +--------------------- +>Ailuropoda|XP_011215280.1|cH2A_(Mammalia) organism=Ailuropoda melanoleuca phylum=Chordata class=Mammalia ------------------------------------------------------------ ------------------------------------- ->H2A.Z_Oryza_sativa_Japonica_Group_NP_001051232.1 NP_001051232.1 histone: H2A variant: H2A.Z organism: Oryza sativa Japonica Group --------------------------------------------------MAGKGGKGLLA -AKTTAAKS----AEKDKG-KKAPVSRSSRAGLQFPVGRIHRQLKQRTQANGRVGATAAVY -SAAILEYLTAEVLELAGNASKDLK-VKRITPRHLQLAIRGDEELDTLIKG-TIAGGGVIP -HIHKS-----LINKSSKE------------------------------------------ +-------MSGRGK--------------------Q--GGK------------A-------- +---------R----A---------------KAK---SRSSRAGLQFPVGRVHRLLRKGNY +AE-RVGAGAPVYLAAVLEYLTAEILELA----GNAA--RDN-K-KTRIIPRHLQLAIRND +EELNKLL--G-RVTI-----------------AQ---------GGVLPNIQAVL--LPKK +TE-----------S-HHKAKGK-------------------------------------- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------- ->H2A.Z_Zea_mays_NP_001136523.1 NP_001136523.1 histone: H2A variant: H2A.Z organism: Zea mays --------------------------------------------------MAGKGGKGLLA -AKTTAAKS----AEKDKG-KKAPISRSSRAGLQFPVGRIHRQLKQRTQANGRVGATAAVY -SAAILEYLTAEVLELAGNASKDLK-VKRITPRHLQLAIRGDEELDTLIKG-TIAGGGVIP -HIHKS-----LINKSSKE------------------------------------------ +--------------------- +>Bos|NP_001192525.1|cH2A_(Mammalia) organism=Bos taurus phylum=Chordata class=Mammalia ------------------------------------------------------------ +-------MSGRGK--------------------Q--GGK------------A-------- +---------R----A---------------KAK---SRSSRAGLQFPVGRVHRLLRKGNY +AE-RVGAGAPVYLAAVLEYLTAEILELA----GNAA--RDN-K-KTRIIPRHLQLAIRND +EELNKLL--G-RVTI-----------------AQ---------GGVLPNIQAVL--LPKK +TE-----------S-HHKAKGK-------------------------------------- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------- ->H2A.Z_Arabidopsis_thaliana_NP_181415.1 NP_181415.1 histone: H2A variant: H2A.Z organism: Arabidopsis thaliana --------------------------------------------------MAGKGGKGLLA -AKTTAA-A----ANKDSV-KKKSISRSSRAGIQFPVGRIHRQLKQRVSAHGRVGATAAVY -TASILEYLTAEVLELAGNASKDLK-VKRITPRHLQLAIRGDEELDTLIKG-TIAGGGVIP -HIHKS-----LVNKVTKD------------------------------------------ ------------------------------------------------------------ +--------------------- +>Callithrix|XP_008992112.1|cH2A_(Mammalia) organism=Callithrix jacchus phylum=Chordata class=Mammalia ------------------------------------------------------------ +-------MSGRGK--------------------Q--GGK------------V-------- +---------R----A---------------KAK---SRSSRAGLQFPVGRIHRLLRKGKY +AD-RIGAGAPVYLAAVLEYLTAEILELA----GNAS--RDN-K-KTRIIPRHLQLAIRND +EELNKLL--G-GVTI-----------------AQ---------GGVLPNIQAVL--LPKK +TE-----------SHHHKSQSK-------------------------------------- ------------------------------------------------------------ ------------------------------------- ->H2A.Z_Arabidopsis_thaliana_NP_191019.1 NP_191019.1 histone: H2A variant: H2A.Z organism: Arabidopsis thaliana --------------------------------------------------MAGKGGKGLVA -AKTMAA-N----KDKDKD-KKKPISRSARAGIQFPVGRIHRQLKTRVSAHGRVGATAAVY -TASILEYLTAEVLELAGNASKDLK-VKRITPRHLQLAIRGDEELDTLIKG-TIAGGGVIP -HIHKS-----LINKTTKE------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ +--------------------- +>Canis|XP_005640150.1|cH2A_(Mammalia) organism=Canis lupus familiaris phylum=Chordata class=Mammalia ------------------------------------------------------------ ------------------------------------- ->H2A.Z_Arabidopsis_thaliana_NP_175683.1 NP_175683.1 histone: H2A variant: H2A.Z organism: Arabidopsis thaliana --------------------------------------------------MSGKGAKGLIM -GKPS---G----SDKDKD-KKKPITRSSRAGLQFPVGRVHRLLKTRSTAHGRVGATAAVY -TAAILEYLTAEVLELAGNASKDLK-VKRISPRHLQLAIRGDEELDTLIKG-TIAGGGVIP -HIHKS-----LINKSAKE------------------------------------------ +-------MSGRGK--------------------Q--GGK------------A-------- +---------R----A---------------KAK---SRSSRAGLQFPVGRVHRLLRKGNY +AE-RVGAGAPVYLAAVLEYLTAEILELA----GNAA--RDN-K-KTRIIPRHLQLAIRND +EELNKLL--G-RVTI-----------------AQ---------GGVLPNIQAVL--LPKK +TE-----------S-HHKAKGK-------------------------------------- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------- ->H2A.Z_Nematostella_vectensis_EDO46289.1 EDO46289.1 histone: H2A variant: H2A.Z organism: Nematostella vectensis -------------------------------------------------MAGGKA------ --------G----KDS-KA-KAKAVSRSARAGLQFPVGRIHRHLKNRTTSHGRVGATAAVY -SAAILEYLTAEVLELAGNASKDLK-VKRITPRHLQLAIRGDEELDSLIKA-TIAGGGVIP -HIHKS-----LIGKKGANKPT--------------------------------------- +--------------------- +>Cavia|XP_003478913.1|cH2A_(Mammalia) organism=Cavia porcellus phylum=Chordata class=Mammalia ------------------------------------------------------------ +-------MSGRGK--------------------Q--GGK------------A-------- +---------R----A---------------KAK---SRSSRAGLQFPVGRVHRLLRKGNY +AE-RVGAGAPVYMAAVLEYLTAEILELA----GNAA--RDN-K-KTRIIPRHLQLAIRND +EELNKLL--G-KVTI-----------------AQ---------GGVLPNIQAVL--LPKK +TE-----------S-HHKAKGK-------------------------------------- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------- ->H2A.Z_Trichoplax_adhaerens_XP_002111498.1 XP_002111498.1 histone: H2A variant: H2A.Z organism: Trichoplax adhaerens -------------------------------------------------MAGGKA------ --------G----KDSKT--KAKAVSRSARAGLQFPVGRIHRHLKNRTTSHGRVGATAAVY -SAAILEYLTAEVLELAGNASKDLK-VKRITPRHLQLAIRGDEELDSLIKA-TIAGGGVIP -HIHKS-----LIGKKGANKPN--------------------------------------- ------------------------------------------------------------ +--------------------- +>Cricetulus|XP_007634672.1|cH2A_(Mammalia) organism=Cricetulus griseus phylum=Chordata class=Mammalia ------------------------------------------------------------ +-------MSGRGK--------------------Q--GGK------------A-------- +---------R----A---------------KAK---TRSSRAGLQFPVGRVHRLLRKGNY +SE-RVGAGAPVYLAAVLEYLTAEILELA----GNAA--RDN-K-KTRIIPRHLQLAIRND +EELNKLL--G-RVTI-----------------AQ---------GGVLPNIQAVL--LPKK +TE-----------S-HHKAKGK-------------------------------------- ------------------------------------------------------------ ------------------------------------- ->H2A.Z_Drosophila_melanogaster_NP_524519.1 NP_524519.1 histone: H2A variant: H2A.Z organism: Drosophila melanogaster -------------------------------------------------MAGGKA------ --------G----KDSGKA-KAKAVSRSARAGLQFPVGRIHRHLKSRTTSHGRVGATAAVY -SAAILEYLTAEVLELAGNASKDLK-VKRITPRHLQLAIRGDEELDSLIKA-TIAGGGVIP -HIHKS-----LIGKKEETVQDPQRKGNVILSQAY-------------------------- ------------------------------------------------------------ ------------------------------------------------------------ +--------------------- +>Heterocephalus|XP_004847876.1|cH2A_(Mammalia) organism=Heterocephalus glaber phylum=Chordata class=Mammalia ------------------------------------------------------------ ------------------------------------- ->H2A.Z_Gallus_gallus_NP_001026545.1 NP_001026545.1 histone: H2A variant: H2A.Z organism: Gallus gallus -------------------------------------------------MAGGKA------ --------G----KDSGKT-KTKAVSRSQRAGLQFPVGRIHRHLKSRTTSHGRVGATAAVY -SAAILEYLTAEVLELAGNASKDLK-VKRITPRHLQLAIRGDEELDSLIKA-TIAGGGVIP -HIHKS-----LIGKKGQQKTV--------------------------------------- +-------MSGRGK--------------------Q--GGK------------A-------- +---------R----A---------------KAK---TRSSRAGLQFPVGRVHRLLRKGNY +AE-RVGAGAPVYLAAVLEYLTAEILELA----GNAA--RDN-K-KTRIIPRHLQLAIRND +EELNKLL--G-KVTI-----------------AQ---------GGVLPNIQAVL--LPKK +TE-----------S-HHKAKGK-------------------------------------- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------- ->H2A.Z_Danio_rerio_NP_001036788.1 NP_001036788.1 histone: H2A variant: H2A.Z organism: Danio rerio -------------------------------------------------MAGGKA------ --------G----KDSGKA-KTKAVSRSQRAGLQFPVGRIHRHLKTRTTSHGRVGATAAVY -SAAILEYLTAEVLELAGNASKDLK-VKRITPRHLQLAIRGDEELDSLIKA-TIAGGGVIP -HIHKS-----LIGKKGQQKTV--------------------------------------- +--------------------- +>Loxodonta|XP_003422330.1|cH2A_(Mammalia) organism=Loxodonta africana phylum=Chordata class=Mammalia ------------------------------------------------------------ +-------MSGRGK--------------------Q--GGK------------A-------- +---------R----A---------------KAK---TRSSRAGLQFPVGRVHRLLRKGNY +AE-RVGAGAPVYLAAVLEYLTAEILELA----GNAA--RDN-K-KTRIIPRHLQLAIRND +EELNKLL--G-KVTI-----------------AQ---------GGVLPNIQAVL--LPKK +TE-----------S-HHKAKGK-------------------------------------- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------- ->H2A.Z_Homo_sapiens_NP_002097.1 NP_002097.1 histone: H2A variant: H2A.Z organism: Homo sapiens -------------------------------------------------MAGGKA------ --------G----KDSGKA-KTKAVSRSQRAGLQFPVGRIHRHLKSRTTSHGRVGATAAVY -SAAILEYLTAEVLELAGNASKDLK-VKRITPRHLQLAIRGDEELDSLIKA-TIAGGGVIP -HIHKS-----LIGKKGQQKTV--------------------------------------- ------------------------------------------------------------ +--------------------- +>Macaca|NP_001180653.1|cH2A_(Mammalia) organism=Macaca mulatta phylum=Chordata class=Mammalia ------------------------------------------------------------ +-------MSGRGK--------------------Q--GGK------------A-------- +---------R----A---------------KAK---TRSSRAGLQFPVGRVHRLLRKGNY +SE-RVGAGAPVYLAAVLEYLTAEILELA----GNAA--RDN-K-KTRIIPRHLQLAIRND +EELNKLL--G-RVTI-----------------AQ---------GGVLPNIQAVL--LPKK +TE-----------S-HHKAKGK-------------------------------------- ------------------------------------------------------------ ------------------------------------- ->H2A.Z_Homo_sapiens_NP_958844.1 NP_958844.1 histone: H2A variant: H2A.Z organism: Homo sapiens ------------------------------------------------------------ ---------------------------------MFPVGRIHRHLKTRTTSHGRVGATAAVY -SAAILEYLTAEVLELAGNASKDLK-VKRITPRHLQLAIRGDEELDSLIKA-TIAGGGVIP -HIHKS-----LIGKKGQQKTA--------------------------------------- ------------------------------------------------------------ +--------------------- +>Monodelphis|XP_007485414.1|cH2A_(Mammalia) organism=Monodelphis domestica phylum=Chordata class=Mammalia ------------------------------------------------------------ +-------MSGRGK--------------------Q--GGK------------A-------- +---------R----A---------------KAK---SRSSRAGLQFPVGRVHRLLRKGNY +AE-RVGAGAPVYMAAVLEYLTAEILELA----GNAA--RDN-K-KTRIIPRHLQLAIRND +EELNKLL--G-KVTI-----------------AQ---------GGVLPNIQAVL--LPKK +TE-----------S-HHKTKGK-------------------------------------- ------------------------------------------------------------ ------------------------------------- ->H2A.Z_Homo_sapiens_NP_619541.1 NP_619541.1 histone: H2A variant: H2A.Z organism: Homo sapiens -------------------------------------------------MAGGKA------ --------G----KDSGKA-KAKAVSRSQRAGLQFPVGRIHRHLKTRTTSHGRVGATAAVY -SAAILEYLTAEVLELAGNASKDLK-VKRITPRHLQLAIRGDEELDSLIKA-TIAGG---- ----------------EKRRCS--------------------------------------- ------------------------------------------------------------ ------------------------------------------------------------ +--------------------- +>Mus|NP_835489.1|cH2A_(Mammalia) organism=Mus musculus phylum=Chordata class=Mammalia ------------------------------------------------------------ ------------------------------------- ->H2A.Z_Homo_sapiens_NP_958925.1 NP_958925.1 histone: H2A variant: H2A.Z organism: Homo sapiens -------------------------------------------------MAGGKA------ --------G----KDSGKA-KAKAVSRSQRAGLQ--------------------------- ------------VLELAGNASKDLK-VKRITPRHLQLAIRGDEELDSLIKA-TIAGGGVIP -HIHKS-----LIGKKGQQKTA--------------------------------------- +-------MSGRGK--------------------Q--GGK------------A-------- +---------R----A---------------KAK---TRSSRAGLQFPVGRVHRLLRKGNY +SE-RVGAGAPVYLAAVLEYLTAEILELA----GNAA--RDN-K-KTRIIPRHLQLAIRND +EELNKLL--G-RVTI-----------------AQ---------GGVLPNIQAVL--LPKK +TE-----------S-HHKAKGK-------------------------------------- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------- ->H2A.Z_Danio_rerio_NP_705930.1 NP_705930.1 histone: H2A variant: H2A.Z organism: Danio rerio -------------------------------------------------MAGGKA------ --------G----KDSGKA-KAKAVSRSQRAGLQFPVGRIHRHLKTRTTSHGRVGATAAVY -SAAILEYLTAEVLELAGNASKDLK-VKRITPRHLQLAIRGDEELDSLIKA-TIAGGGVIP -HIHKS-----LIGKKGQQKTA--------------------------------------- +--------------------- +>Nomascus|XP_004088806.1|cH2A_(Mammalia) organism=Nomascus leucogenys phylum=Chordata class=Mammalia ------------------------------------------------------------ +-------MSGRGK--------------------Q--GGK------------A-------- +---------R----A---------------KAK---SRSSRAGLQFPVGRVHRLLRKGNY +AE-RVGAGAPVYLAAVLEYLTAEILELA----GNAA--RDN-K-KTRIIPRHLQLAIRND +EELNKLL--G-RVTI-----------------AQ---------GGVLPNIQAVL--LPKK +TE-----------S-HHKAKGK-------------------------------------- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------- ->H2A.Z_Gallus_gallus_P02272.2 P02272.2 histone: H2A variant: H2A.Z organism: Gallus gallus -------------------------------------------------MAGGKA------ --------G----KDSGKA-KAKAVSRSQRAGLQFPVGRIHRHLKTRTTSHGRVGATAAVY -SAAILEYLTAEVLELAGNASKDLK-VKRITPRHLQLAIRGDEELDSLIKA-TIAGGGVIP -HIHKS-----LIGKKGQQKTA--------------------------------------- ------------------------------------------------------------ +--------------------- +>Oryctolagus|XP_008246690.1|cH2A_(Mammalia) organism=Oryctolagus cuniculus phylum=Chordata class=Mammalia ------------------------------------------------------------ +-------MSGRGK--------------------Q--GGK------------A-------- +---------R----A---------------KAK---TRSSRAGLQFPVGRVHRLLRKGNY +AE-RVGAGAPVYLAAVLEYLTAEILELA----GNAA--RDN-K-KTRIIPRHLQLAIRND +EELNKLL--G-KVTI-----------------AQ---------GGVLPNIQAVL--LPKK +TE-----------S-HHKAKGK-------------------------------------- ------------------------------------------------------------ ------------------------------------- ->H2A.Z_Homo_sapiens_NP_036544.1 NP_036544.1 histone: H2A variant: H2A.Z organism: Homo sapiens -------------------------------------------------MAGGKA------ --------G----KDSGKA-KAKAVSRSQRAGLQFPVGRIHRHLKTRTTSHGRVGATAAVY -SAAILEYLTAEVLELAGNASKDLK-VKRITPRHLQLAIRGDEELDSLIKA-TIAGGGVIP -HIHKS-----LIGKKGQQKTA--------------------------------------- ------------------------------------------------------------ ------------------------------------------------------------ +--------------------- +>Pan|XP_009448979.1|cH2A_(Mammalia) organism=Pan troglodytes phylum=Chordata class=Mammalia ------------------------------------------------------------ ------------------------------------- ->H2A.Z_Homo_sapiens_NP_958924.1 NP_958924.1 histone: H2A variant: H2A.Z organism: Homo sapiens -------------------------------------------------MAGGKA------ --------G----KDSGKA-KAKAVSRSQRAGLQFPVGRIHRHLKTRTTSHGRVGATAAVY -SAAILEYLTAEV------------------------------------------------ +-------MSGRGK--------------------Q--GGK------------A-------- +---------R----A---------------KAK---TRSSRAGLQFPVGRVHRLLRKGNY +AE-RVGAGAPVYLAAVLEYLTAEILELA----GNAA--RDN-K-KTRIIPRHLQLAIRND +EELNKLL--G-KVTI-----------------AQ---------GGVLPNIQAVL--LPKK +TE-----------S-HHKAKGKA------------------------------------- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ +--------------------- +>Rattus|NP_068612.2|cH2A_(Mammalia) organism=Rattus norvegicus phylum=Chordata class=Mammalia ------------------------------------------------------------ ------------------------------------- ->H2A.B_Heterocephalus_glaber_EHB05905.1 EHB05905.1 histone: H2A variant: H2A.B organism: Heterocephalus glaber --------------------------------------------------MPRQA--LALT ----------------------NERPPQGRAEPIFSVSQVERALCDGRYAQ-RLSCSASVF -LAATLQFLSATVLELADREARYRS-RRRITRELLDVATLKDALLCTLLGT-TTIS----- -----------RVAPARP------------------------------------------- +-------MSGRGK--------------------Q--GGK------------A-------- +---------R----A---------------KAK---SRSSRAGLQFPVGRVHRLLRKGNY +SE-RVGAGAPVYLAAVLEYLTAEILELA----GNAA--RDN-K-KTRIIPRHLQLAIRND +EELNKLL--G-RVTI-----------------AQ---------GGVLPNIQAVL--LPKK +TE-----------S-HHKAKGK-------------------------------------- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------- ->H2A.B_Sus_scrofa_XP_003135571.1 XP_003135571.1 histone: H2A variant: H2A.B organism: Sus scrofa --------------------------------------------------MPGKR--SRRK -SPG--RQG-------------RTCARTTRAGLSASVSHMERLLREGPYAQ-CLSSSARVF -LAATIEYLTARVLELAGDEAQIVG-RRCITPELVAMAVHNNALLSAFFGT-LAIS----- -----------QVAPTQE------------------------------------------- +--------------------- +>Sus|XP_003356618.1|cH2A_(Mammalia) organism=Sus scrofa phylum=Chordata class=Mammalia ------------------------------------------------------------ +-------MSGRGK--------------------Q--GGK------------A-------- +---------R----A---------------KAK---TRSSRAGLQFPVGRVHRLLRKGNY +AE-RVGAGAPVYLAAVLEYLTAEILELA----GNAA--RDN-K-KTRIIPRHLQLAIRND +EELNKLL--G-KVTI-----------------AQ---------GGVLPNIQAVL--LPKK +TE-----------S-HHKAKGK-------------------------------------- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------- ->H2A.B_Bos_taurus_NP_001069373.1 NP_001069373.1 histone: H2A variant: H2A.B organism: Bos taurus --------------------------------------------------MPKKR--GHQR -SSG-------------------IRSRTAQSELSFSVSHMEHLLRKGHYAQ-RLSSSAPVF -LAAVIQDLTSKVLELAGNEAQKNG-EKRITPKLVDMAIHNNALLSSIFGM-TTIS----- -----------LVAPGPH------------------------------------------- ------------------------------------------------------------ +--------------------- +>Homo|NP_778235.1|cH2A.10_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia ------------------------------------------------------------ +-------MSGRGK--------------------Q--GGK------------A-------- +---------R----A---------------KAK---SRSSRAGLQFPVGRVHRLLRKGNY +AE-RVGAGAPVYLAAVLEYLTAEILELA----GNAA--RDN-K-KTRIIPRHLQLAVRND +EELNKLL--G-GVTI-----------------AQ---------GGVLPNIQAVL--LPKK +TE-----------S-HKPGKNK-------------------------------------- ------------------------------------------------------------ ------------------------------------- ->H2A.B_Loxodonta_africana_XP_003421752.1 XP_003421752.1 histone: H2A variant: H2A.B organism: Loxodonta africana --------------------------------------------------MAGKR--SRRG -GGGGGGGG--GGGGGGGGSSRRQRRTRSRTELIFSASHVAHLLREGHYAQ-RLSSSAPVF -LAAILKCLTAKILELAGNEAQNSG-RRLVTPELVDMAVHNNALLSGFFLT-TTIS----- -----------QVAPAR-------------------------------------------- ------------------------------------------------------------ ------------------------------------------------------------ +--------------------- +>Homo|NP_254280.1|cH2A.11_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia ------------------------------------------------------------ ------------------------------------- ->H2A.B_Ailuropoda_melanoleuca_XP_011215272.1 XP_011215272.1 histone: H2A variant: H2A.B organism: Ailuropoda melanoleuca --------------------------------------------------MPGDR--SRRG -SSS--GQR-------------RTRSRTARAELSFSVSHVERLLREGHYAQ-RLGSSAPVF -LAAVIQYLTAKVLELAGNEAQNSG-GRRITPQLVDMAVHNHALLSGFFGT-TTIS----- -----------QVAPAWN------------------------------------------- +-------MSGRGK--------------------Q--GGK------------A-------- +---------R----A---------------KAK---SRSSRAGLQFPVGRVHRLLRKGNY +SE-RVGAGAPVYLAAVLEYLTAEILELA----GNAA--RDN-K-KTRIIPRHLQLAIRND +EELNKLL--G-RVTI-----------------AQ---------GGVLPNIQAVL--LPKK +TE-----------S-HHKAKGK-------------------------------------- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------- ->H2A.B_Callithrix_jacchus_XP_002763866.2 XP_002763866.2 histone: H2A variant: H2A.B organism: Callithrix jacchus --------------------------------------------------MSERR--SRRG -SSAAGRRG-------------HTRSRTARAELIFSVSKMERGLWEGHYAQ-RLSDNAPVY -LAAVIQYLTAKILELAAKEADNRG-ERIITPRLLDMAVHNDGLLSTLFHA-ITIS----- -----------QVGPGPN------------------------------------------- +--------------------- +>Homo|NP_734466.1|cH2A.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia ------------------------------------------------------------ +-------MSGRGK--------------------Q--GGK------------A-------- +---------R----A---------------KSK---SRSSRAGLQFPVGRIHRLLRKGNY +AE-RIGAGAPVYLAAVLEYLTAEILELA----GNAS--RDN-K-KTRIIPRHLQLAIRND +EELNKLL--G-GVTI-----------------AQ---------GGVLPNIQAVL--LPKK +TE-----------SHHHKAQSK-------------------------------------- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------- ->H2A.B_Macaca_mulatta_NP_001180843.1 NP_001180843.1 histone: H2A variant: H2A.B organism: Macaca mulatta --------------------------------------------------MSERR--SHRR -SSRAGGRG-------------RTRSRTVRAELSFSVSQVERGLREGHYAQ-RLSPTAPVY -LAAVIEYLTAKVLELAGNEAQNNG-ERNITPLLLDMAVHNNRLLSTLFDT-TTIS----- -----------QVAPGGD------------------------------------------- ------------------------------------------------------------ +--------------------- +>Homo|NP_003504.2|cH2A.2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia ------------------------------------------------------------ +-------MSGRGK--------------------Q--GGK------------A-------- +---------R----A---------------KAK---TRSSRAGLQFPVGRVHRLLRKGNY +SE-RVGAGAPVYLAAVLEYLTAEILELA----GNAA--RDN-K-KTRIIPRHLQLAIRND +EELNKLL--G-RVTI-----------------AQ---------GGVLPNIQAVL--LPKK +TE-----------S-HHKAKGK-------------------------------------- ------------------------------------------------------------ ------------------------------------- ->H2A.B_Nomascus_leucogenys_XP_003282204.1 XP_003282204.1 histone: H2A variant: H2A.B organism: Nomascus leucogenys --------------------------------------------------MPRRR--SHRG -SSGAGGRG-------------RTCSRTVRAELSFSVSQVERGLREGHYAQ-RLSRTAPVY -LAAVIEYLTAKVLELAGNEAQNNG-ERNITPLLLDMVVHNNRLLSTLFHT-TTIS----- -----------RVAPGGD------------------------------------------- ------------------------------------------------------------ ------------------------------------------------------------ +--------------------- +>Homo|NP_066390.1|cH2A.2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia ------------------------------------------------------------ ------------------------------------- ->H2A.B_Pan_troglodytes_XP_001145032.1 XP_001145032.1 histone: H2A variant: H2A.B organism: Pan troglodytes --------------------------------------------------MPRRR--RHRG -SSGAGGRG-------------RTCSRTVRAELSFSVSQVERSLREGQYAQ-RLSRTAPVY -LAAVIEYLTAKVLELAGNEAQNSG-ARNITPLLLDMVVHNDRLLSTLFNT-TTIS----- -----------QVAPGED------------------------------------------- +-------MSGRGK--------------------Q--GGK------------A-------- +---------R----A---------------KAK---TRSSRAGLQFPVGRVHRLLRKGNY +SE-RVGAGAPVYLAAVLEYLTAEILELA----GNAA--RDN-K-KTRIIPRHLQLAIRND +EELNKLL--G-RVTI-----------------AQ---------GGVLPNIQAVL--LPKK +TE-----------S-HHKAKGK-------------------------------------- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------- ->H2A.B_Homo_sapiens_NP_001017990.1 NP_001017990.1 histone: H2A variant: H2A.B organism: Homo sapiens --------------------------------------------------MPRRR--RRRG -SSGAGGRG-------------RTCSRTVRAELSFSVSQVERSLREGHYAQ-RLSRTAPVY -LAAVIEYLTAKVPELAGNEAQNSG-ERNITPLLLDMVVHNDRLLSTLFNT-TTIS----- -----------QVAPGED------------------------------------------- +--------------------- +>Homo|NP_003503.1|cH2A.3_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia ------------------------------------------------------------ +-------MSGRGK--------------------Q--GGK------------A-------- +---------R----A---------------KAK---SRSSRAGLQFPVGRVHRLLRKGNY +AE-RVGAGAPVYLAAVLEYLTAEILELA----GNAA--RDN-K-KTRIIPRHLQLAIRND +EELNKLL--G-RVTI-----------------AQ---------GGVLPNIQAVL--LPKK +TE-----------S-HHKAKGK-------------------------------------- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------- ->H2A.B_Homo_sapiens_NP_001017991.1 NP_001017991.1 histone: H2A variant: H2A.B organism: Homo sapiens --------------------------------------------------MPRRR--RRRG -SSGAGGRG-------------RTCSRTVRAELSFSVSQVERSLREGHYAQ-RLSRTAPVY -LAAVIEYLTAKVLELAGNEAQNSG-ERNITPLLLDMVVHNDRLLSTLFNT-TTIS----- -----------QVAPGED------------------------------------------- ------------------------------------------------------------ +--------------------- +>Homo|NP_066409.1|cH2A.4_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia ------------------------------------------------------------ +-------MSGRGK--------------------Q--GGK------------A-------- +---------R----A---------------KAK---TRSSRAGLQFPVGRVHRLLRKGNY +SE-RVGAGAPVYLAAVLEYLTAEILELA----GNAA--RDN-K-KTRIIPRHLQLAIRND +EELNKLL--G-KVTI-----------------AQ---------GGVLPNIQAVL--LPKK +TE-----------S-HHKAKGK-------------------------------------- ------------------------------------------------------------ ------------------------------------- ->H2A.B_Homo_sapiens_NP_542451.1 NP_542451.1 histone: H2A variant: H2A.B organism: Homo sapiens --------------------------------------------------MPRRR--RRRG -SSGAGGRG-------------RTCSRTVRAELSFSVSQVERSLREGHYAQ-RLSRTAPVY -LAAVIEYLTAKVLELAGNEAQNSG-ERNITPLLLDMVVHNDRLLSTLFNT-TTIS----- -----------QVAPGED------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------- ->H2A.B_Cricetulus_griseus_XP_003514308.1 XP_003514308.1 histone: H2A variant: H2A.B organism: Cricetulus griseus --------------------------------------------------MPRTRQSSRRG -SS-------------------SRRSRTDRAELTFSVSLVEHHLRESGHAR-RLSETVPIL -VTAILEFLTRRLLELASNEAQRLGAQRLITPEILDLTIYNNALLSEMFQF-TTIS----- -----------QTAPAGPRRRRRQI------------------------------------ ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------- ->H2A.B_Cricetulus_griseus_XP_003515491.1 XP_003515491.1 histone: H2A variant: H2A.B organism: Cricetulus griseus --------------------------------------------------MPRTRQSSLRG -SS-------------------SRRSRTDRAELTFSVSLVEHHLRESGHAP-RLSETVPIL -LTAILEFLTRRLLELASNEAQRLGAQRLITPEILDLTVYNNTLLSQLLQF-TTIS----- -----------QTAPAGRRRRRRQT------------------------------------ ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------- ->H2A.B_Mus_musculus_NP_001268459.1 NP_001268459.1 histone: H2A variant: H2A.B organism: Mus musculus --------------------------------------------------MPRNTENCLQR -SSG---HR-------------QHHSRTSRGELIFAVSLVEQHLREVSRAR-RLSDMVPVS -LVAILEFLTSRLLELAGNEAQRRGTQRLITPQPLDLEVYSSMELSDVFQF-ITIS----- -----------QVAPAHR------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------- ->H2A.B_Mus_musculus_NP_001268460.1 NP_001268460.1 histone: H2A variant: H2A.B organism: Mus musculus --------------------------------------------------MPRNRENCLRE -SSG---RR-------------HRRSRTSRAELIFAVSLVEQHLREVSRAR-RLSDTVPIF -LAAILESLTRRLLELAGNEAQRRGTERRITPELLDLAVYSNMELSDVFQF-ITIS----- -----------QVAPAHR------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------- ->macroH2A_Ixodes_scapularis_XP_002403551.1 XP_002403551.1 histone: H2A variant: macroH2A organism: Ixodes scapularis --------------------------------------------------MSARGGK--KR -A--------------------KVVSKSTKAGVLFPVGRMRRYLKKGTHHF-RIGAGAPVY -MAAVIEYLSGEAISCQDSSPRDPL-LSALTKRKCCSGRPN-LFFLQLLKGVTIASGGVLP -RILPE-----LLARRKGGRFKTVALAKKPVAAAIAKEKAVPPKEKAKLAKGKVCRKSSHC -HVPLGDRGIILSLWPTAPQGEHFTLVKHKTGLSLSVQLTVIQGDMASV---TADAAIHPT -NASLSLSGEVGQVLEKAGGKEFVQEVKDLFSAHGPLESAGAVICPGHQFPAKFVIHCNVP -SGSSEP------LEKCVRNCLALADEKNIRVLAVPPLATHSVASQKQQAAQTILKAISNY -FVNVMSSSLKQIYFVLSDMESIGIYTSELAKLDS-- ->macroH2A_Trichoplax_adhaerens_XP_002111582.1 XP_002111582.1 histone: H2A variant: macroH2A organism: Trichoplax adhaerens --------------------------------------------------MSGRGGK--AR -K--------------------KPSSRSARAGLQFPVGRMHRRLKSSTHHL-RIGSGAPVY -LAACIEYLTAEILELAGNAARDNK-KLRIIPRHIQLAIGNDEELHKLLSDVTIASGGVLP -HVHTE-----LLSKKAKGGGASVAAAAAP--KKSKVRVSRVGKST--------PAKSNFS -KKSG--------SSTKAFKNSEVTILSEKQ-LFLGQKLIVTKGDITKI---STDGIVHPT -SSNFSHAGMIGGALSSAGGKQYMDGVAKVEQETGSLPVAGVTGSPAANLSAQEVIHVHSP -SWGSTD--CQGNLEKAVRNILDYADKKGMKSVAIPSIGSGSNNFPKLTAAQIILRSIAKY -FVGVMSSSLKEVYFVLWDEESINIYTSELNKLDVSG ->macroH2A_Nematostella_vectensis_XP_001637578.1 XP_001637578.1 histone: H2A variant: macroH2A organism: Nematostella vectensis --------------------------------------------------MSARGGKAAKR -A--------------------KAVSRSAKAGLQFPVSRVHRYLRKCTHHY-RISAAAPVY -QAAVMEYLTAEILELAGNAARDNK-KTRIIPRHILLAVANDEELHKLLKGVTIASGGVLP -NIHPE-----LLKKRKGGK------LVSP--EELKSKKPKPAPPPS-------PKKPVSS -KKGR--------GKADKGPGDGFSVLSEKT-LFLGQKLTVVQGDIAAI---DADAVVLPT -NAKFKLEGEVGEALKKAGGKEFKDEIKKLSEDNGDLALLDAAICDGHNFPAAYVISLHSP -VYSEDSTTASDDLEKAVKNVLTIADEKNLKILAIPSIGTGSNKYPKDLAAQVTLKAISNY -FVSAMASSLKQIYFVLSDPENIGMYTMELARLDS-- ->macroH2A_Gallus_gallus_NP_990338.1 NP_990338.1 histone: H2A variant: macroH2A organism: Gallus gallus --------------------------------------------------MSSRGGK--KK -S--------------------TKTSRSAKAGVIFPVGRMLRYIKKGHPKY-RIGVGAPVY -MAAVLEYLTAEILELAGNAARDNK-KGRVTPRHILLAVANDEELNQLLKGVTIASGGVLP -NIHPE-----LLAKKRGSKGKLEAIITPP--PAKKAKSPSQKKTVSKKTGGKKGARKSKK -KQGEVSKSASADSTTEGTPADGFTVLSTKS-LFLGQKLNLIHSEISNLAGFEVEAIINPT -NADIDLKDDLGSTLEKKGGKEFVEAVIELRKKNGPLDIAGAVVSAGHGLPAKFVIHCNSP -GWGSDK--CEELLEKTVKNCLALADEKKLKSIAFPSIGSGRNGFPKQTAAQLILKAISSY -FVSTMSSSIKTVYFVLFDSESIGIYVQEMAKLDAN- ->macroH2A_Homo_sapiens_NP_001035248.1 NP_001035248.1 histone: H2A variant: macroH2A organism: Homo sapiens --------------------------------------------------MSSRGGK--KK -S--------------------TKTSRSAKAGVIFPVGRMLRYIKKGHPKY-RIGVGAPVY -MAAVLEYLTAEILELAGNAARDNK-KGRVTPRHILLAVANDEELNQLLKGVTIASGGVLP -NIHPE-----LLAKKRGSKGKLEAIITPP--PAKKAKSPSQKKPVSKKAGGKKGARKSK- -KQGEVSKAASADSTTEGTPADGFTVLSTKS-LFLGQKLNLIHSEISNLAGFEVEAIINPT -NADIDLKDDLGNTLEKKGGKEFVEAVLELRKKNGPLEVAGAAVSAGHGLPAKFVIHCNSP -VWGADK--CEELLEKTVKNCLALADDKKLKSIAFPSIGSGRNGFPKQTAAQLILKAISSY -FVSTMSSSIKTVYFVLFDSESIGIYVQEMAKLDAN- ->macroH2A_Homo_sapiens_NP_613258.2 NP_613258.2 histone: H2A variant: macroH2A organism: Homo sapiens --------------------------------------------------MSSRGGK--KK -S--------------------TKTSRSAKAGVIFPVGRMLRYIKKGHPKY-RIGVGAPVY -MAAVLEYLTAEILELAGNAARDNK-KGRVTPRHILLAVANDEELNQLLKGVTIASGGVLP -NIHPE-----LLAKKRGSKGKLEAIITPP--PAKKAKSPSQKKPVSKKAGGKKGARKSKK -KQGEVSKAASADSTTEGTPADGFTVLSTKS-LFLGQKLNLIHSEISNLAGFEVEAIINPT -NADIDLKDDLGNTLEKKGGKEFVEAVLELRKKNGPLEVAGAAVSAGHGLPAKFVIHCNSP -VWGADK--CEELLEKTVKNCLALADDKKLKSIAFPSIGSGRNGFPKQTAAQLILKAISSY -FVSTMSSSIKTVYFVLFDSESIGIYVQEMAKLDAN- ->macroH2A_Gallus_gallus_AAC28846.1 AAC28846.1 histone: H2A variant: macroH2A organism: Gallus gallus --------------------------------------------------MSSRGGK--KK -S--------------------TKTSRSAKAGVIFPVGRMLRYIKKGHPKY-RIGVGAPVY -MAAVLEYLTAEILELAGNAARDNK-KGRVTPRHILLAVANDEELNQLLKGVTIASGGVLP -NIHPE-----LLAKKRGSKGKLEAIITPP--PAKKAKSPSQKKTVSKKTGGKKGARKSKK -KQGEVSKSASADSTTEGTPADGFTVLSTKS-LFLGQKLQVVQADIATI---DSDAVVHPT -NSDFYTGGEVGSTLEKKGGKEFVEAVIELRKKNGPLDIAGAVVSAGHGLPAKFVIHCNSP -GWGSDK--CEELLEKTVKNCLALADEKKLKSIAFPSIGSGRNGFPKQTAAQLILKAISSY -FVSTMSSSIKTVYFVLFDSESIGIYVQEMAKLDAN- ->macroH2A_Homo_sapiens_NP_613075.1 NP_613075.1 histone: H2A variant: macroH2A organism: Homo sapiens --------------------------------------------------MSSRGGK--KK -S--------------------TKTSRSAKAGVIFPVGRMLRYIKKGHPKY-RIGVGAPVY -MAAVLEYLTAEILELAGNAARDNK-KGRVTPRHILLAVANDEELNQLLKGVTIASGGVLP -NIHPE-----LLAKKRGSKGKLEAIITPP--PAKKAKSPSQKKPVSKKAGGKKGARKSKK -KQGEVSKAASADSTTEGTPADGFTVLSTKS-LFLGQKLQVVQADIASI---DSDAVVHPT -NTDFYIGGEVGNTLEKKGGKEFVEAVLELRKKNGPLEVAGAAVSAGHGLPAKFVIHCNSP -VWGADK--CEELLEKTVKNCLALADDKKLKSIAFPSIGSGRNGFPKQTAAQLILKAISSY -FVSTMSSSIKTVYFVLFDSESIGIYVQEMAKLDAN- ->macroH2A_Homo_sapiens_XP_005272189.1 XP_005272189.1 histone: H2A variant: macroH2A organism: Homo sapiens --------------------------------------------------MSSRGGK--KK -S--------------------TKTSRSAKAGVIFPVGRMLRYIKKGHPKY-RIGVGAPVY -MAAVLEYLTAEILELAGNAARDNK-KGRVTPRHILLAVANDEELNQLLKGVTIASGGVLP -NIHPE-----LLAKKRGSKGKLEAIITPP--PAKKAKSPSQKKPVSKKAGGKKGARKSK- -KQGEVSKAASADSTTEGTPADGFTVLSTKS-LFLGQKLQVVQADIASI---DSDAVVHPT -NTDFYIGGEVGNTLEKKGGKEFVEAVLELRKKNGPLEVAGAAVSAGHGLPAKFVIHCNSP -VWGADK--CEELLEKTVKNCLALADDKKLKSIAFPSIGSGRNGFPKQTAAQLILKAISSY -FVSTMSSSIKTVYFVLFDSESIGIYVQEMAKLDAN- ->macroH2A_Danio_rerio_NP_001020673.1 NP_001020673.1 histone: H2A variant: macroH2A organism: Danio rerio --------------------------------------------------MSARGGK--KK -I--------------------TKLSRSARAGVIFPVGRMMRYLRTGTHKY-RIGMGAPVY -MAAVIEYLAAEILELAGNAARDNK-KGRITPRHIKLAVANDEELNQLLRGVTISNGGVLP -RIHPE-----LLSKKRGGKVKVESQVAVP--EKSAKRKPV--KKPYKKSKGKPGRKPKKS -TENDKEADA---NAMEDGPGEGFTILSAKS-LFLGQKLSLTESEISKIGTIKVEGIINPT -NAEIDLKEGIGNALEKTGGKDFLETVKELRKSQGPLEVASVAVSQANGMAARFIIHCHVP -QWGSDK--CEDQLEKTVKNCLSAAEEKKLKSVAFPSLPAGRNGFPKQTAAQLILKAISNH -FVSATTSSLKNIYFVLFDSESIGIYLQEMAKMDAK- ->macroH2A_Gallus_gallus_NP_001264267.1 NP_001264267.1 histone: H2A variant: macroH2A organism: Gallus gallus --------------------------------------------------MSGRSGK--KK -M--------------------SKLSRSSRAGVIFPVGRMMRYLKKGTYKY-RIGVGAPVY -MAAVIEYLAAEILELAGNAARDNK-KGRIAPRHILLAVANDEELNQLLKGVTIASGGVLP -RIQPE-----LLAKKRGAKGKSETILSPA--PEKKGRKSMVSKKSGKKAKSNKARTPKKN -KQKDSEKEGASNSTSEDGPGDGFTILSSKS-LVPGQKLSLTQSDISHIGSMKVEGIVHPT -TAEIDLKEEIGKALEKAGGKEFLETVKELRKSQGPLEVAEAALTQSSGLAAKFVIHCHIP -QWGSDK--CEEQLEETIKNCLTAAEDKKLKSVAFPPFPSGRNCFPKQTAAQVTLRAISTH -FDGTSSSSLKNIYFLLFDSESIGIYVQEMAKLDTK- ->macroH2A_Homo_sapiens_NP_061119.1 NP_061119.1 histone: H2A variant: macroH2A organism: Homo sapiens --------------------------------------------------MSGRSGK--KK -M--------------------SKLSRSARAGVIFPVGRLMRYLKKGTFKY-RISVGAPVY -MAAVIEYLAAEILELAGNAARDNK-KARIAPRHILLAVANDEELNQLLKGVTIASGGVLP -RIHPE-----LLAKKRGTKGKSETILSPP--PEKRGRKATSGKKGGKKSKAAKPRTSKKS -KPKDSDKEGTSNSTSEDGPGDGFTILSSKS-LVLGQKLSLTQSDISHIGSMRVEGIVHPT -TAEIDLKEDIGKALEKAGGKEFLETVKELRKSQGPLEVAEAAVSQSSGLAAKFVIHCHIP -QWGSDK--CEEQLEETIKNCLSAAEDKKLKSVAFPPFPSGRNCFPKQTAAQVTLKAISAH -FDDSSASSLKNVYFLLFDSESIGIYVQEMAKLDAK- +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Homo|NP_003500.1|cH2A.5_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSGRGK--------------------Q--GGK------------A-------- +---------R----A---------------KAK---TRSSRAGLQFPVGRVHRLLRKGNY +AE-RVGAGAPVYLAAVLEYLTAEILELA----GNAA--RDN-K-KTRIIPRHLQLAIRND +EELNKLL--G-KVTI-----------------AQ---------GGVLPNIQAVL--LPKK +TE-----------S-HHKAKGK-------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Homo|NP_003501.1|cH2A.5_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSGRGK--------------------Q--GGK------------A-------- +---------R----A---------------KAK---TRSSRAGLQFPVGRVHRLLRKGNY +AE-RVGAGAPVYLAAVLEYLTAEILELA----GNAA--RDN-K-KTRIIPRHLQLAIRND +EELNKLL--G-KVTI-----------------AQ---------GGVLPNIQAVL--LPKK +TE-----------S-HHKAKGK-------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Homo|NP_003502.1|cH2A.5_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSGRGK--------------------Q--GGK------------A-------- +---------R----A---------------KAK---TRSSRAGLQFPVGRVHRLLRKGNY +AE-RVGAGAPVYLAAVLEYLTAEILELA----GNAA--RDN-K-KTRIIPRHLQLAIRND +EELNKLL--G-KVTI-----------------AQ---------GGVLPNIQAVL--LPKK +TE-----------S-HHKAKGK-------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Homo|NP_003505.1|cH2A.5_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSGRGK--------------------Q--GGK------------A-------- +---------R----A---------------KAK---TRSSRAGLQFPVGRVHRLLRKGNY +AE-RVGAGAPVYLAAVLEYLTAEILELA----GNAA--RDN-K-KTRIIPRHLQLAIRND +EELNKLL--G-KVTI-----------------AQ---------GGVLPNIQAVL--LPKK +TE-----------S-HHKAKGK-------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Homo|NP_066408.1|cH2A.5_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSGRGK--------------------Q--GGK------------A-------- +---------R----A---------------KAK---TRSSRAGLQFPVGRVHRLLRKGNY +AE-RVGAGAPVYLAAVLEYLTAEILELA----GNAA--RDN-K-KTRIIPRHLQLAIRND +EELNKLL--G-KVTI-----------------AQ---------GGVLPNIQAVL--LPKK +TE-----------S-HHKAKGK-------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Homo|NP_542163.1|cH2A.6_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSGRGK--------------------Q--GGK------------A-------- +---------R----A---------------KAK---TRSSRAGLQFPVGRVHRLLRKGNY +AE-RVGAGAPVYLAAVLEYLTAEILELA----GNAA--RDN-K-KTRIIPRHLQLAIRND +EELNKLL--G-KVTI-----------------AQ---------GGVLPNIQAVL--LPKK +TE-----------S-HHKAK---------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Homo|NP_066544.1|cH2A.7_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSGRGK--------------------Q--GGK------------A-------- +---------R----A---------------KAK---TRSSRAGLQFPVGRVHRLLRKGNY +AE-RVGAGAPVYLAAVLEYLTAEILELA----GNAA--RDN-K-KTRIIPRHLQLAIRND +EELNKLL--G-KVTI-----------------AQ---------GGVLPNIQAVL--LPKK +TE-----------S-HHKTK---------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Homo|NP_001035807.1|cH2A.8_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSGRGK--------------------Q--GGK------------A-------- +---------R----A---------------KAK---SRSSRAGLQFPVGRVHRLLRKGNY +AE-RVGAGAPVYMAAVLEYLTAEILELA----GNAA--RDN-K-KTRIIPRHLQLAIRND +EELNKLL--G-KVTI-----------------AQ---------GGVLPNIQAVL--LPKK +TE-----------S-HHKAKGK-------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Homo|NP_003507.1|cH2A.8_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSGRGK--------------------Q--GGK------------A-------- +---------R----A---------------KAK---SRSSRAGLQFPVGRVHRLLRKGNY +AE-RVGAGAPVYMAAVLEYLTAEILELA----GNAA--RDN-K-KTRIIPRHLQLAIRND +EELNKLL--G-KVTI-----------------AQ---------GGVLPNIQAVL--LPKK +TE-----------S-HHKAKGK-------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Homo|NP_003508.1|cH2A.9_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSGRGK--------------------Q--GGK------------A-------- +---------R----A---------------KAK---SRSSRAGLQFPVGRVHRLLRKGNY +AE-RVGAGAPVYMAAVLEYLTAEILELA----GNAA--RDN-K-KTRIIPRHLQLAIRND +EELNKLL--G-KVTI-----------------AQ---------GGVLPNIQAVL--LPKK +TE-----------S--HKAKSK-------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Mus|NP_783589.1|cH2A.1_(Mus_musculus) organism=Mus musculus phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSGPTK--------------------R--GGK------------A-------- +---------R----A---------------KVK---SRSSRAGLQFPVGRVHRLLRQGNY +AQ-RIGAGAPVYLAAVLEYLTAEVLELA----GNAA--RDN-K-KTRITPRHLQLAIRND +EELNKLL--G-RVTI-----------------AQ---------GGVLPNIQAVL--LPKK +TE-----------S--HKSQTK-------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Encephalitozoon|NP_584598.1|cH2A_(Fungi) organism=Encephalitozoon cuniculi GB-M1 phylum=Microsporidia class= +------------------------------------------------------------ +-------MVVIQG--------------------K--GGK------------A-------- +---------D--PRV---------------IGK---DEEHQKSI-VKLSQIKKIMKDRTR +M--RISKDALVAVSACVMYLISEITDGA----KNVA--STD-G-KKKVMPKHINNAICND +TELHFVG--H-DWLI-----------------KN---------GG----MKSYI--APGD +FA-----------VSSKKGSSRD------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Encephalitozoon|HISTDB_cH2A_0|cH2A_(Fungi) organism=Encephalitozoon intestinalis phylum=Microsporidia class= +------------------------------------------------------------ +-------MAMVQG--------------------K--GGK------------A-------- +---------D--PRV---------------MGK---DEEHQKSI-VKLSQIKKIMKDRTR +M--RISKDALIGVSACVMYLISEITDGA----KNVA--NTD-G-KKKVIPKHINHAICND +TELHFVG--H-DWLI-----------------KN---------GG----MKSYI--SPGD +FS-----------VSSKKGGSRD------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Ostreococcus|XP_003080758.2|cH2A_(Chlorophyta) organism=Ostreococcus tauri phylum=Chlorophyta class=Mamiellophyceae +------------------------------------------------------------ +-------MSGRGK------------------------GK------------T-------- +---------G----K---------------KAM---SRSAKAGLQFPVGRVARYLKQGKY +AT-RVGAGAPVYLAAVLEYLAAEVLELA----GNAS--RDN-K-KSRIVPRHIQLAIRND +EELSKLL--G-TVTI-----------------AS---------GGVLPNIHSVL--LPKK +SK-----------K---------------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Arabidopsis|NP_001190852.1|cH2A_(Embryophyta) organism=Arabidopsis thaliana phylum=Streptophyta class=Magnoliopsida +------------------------------------------------------------ +-------MAGRGKQ-------------------L--GSG------------A-------- +---------A----K---------------KST---SRSSKAGLQFPVGRIARFLKAGKY +AE-RVGAGAPVYLAAVLEYLAAEVLELA----GNAA--RDN-K-KTRIVPRHIQLAVRND +EELSKLL--G-DVTI-----------------AN---------GGVMPNIHNLL--LPKK +AG-----------SSKPTEED--------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Arabidopsis|NP_175517.1|cH2A_(Embryophyta) organism=Arabidopsis thaliana phylum=Streptophyta class=Magnoliopsida +------------------------------------------------------------ +-------MAGRGKT-------------------L--GSG------------S-------- +---------A----K---------------KAT---TRSSKAGLQFPVGRIARFLKKGKY +AE-RVGAGAPVYLAAVLEYLAAEVLELA----GNAA--RDN-K-KTRIVPRHIQLAVRND +EELSKLL--G-DVTI-----------------AN---------GGVMPNIHNLL--LPKK +TG-----------ASKPSAEDD-------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Arabidopsis|NP_188703.1|cH2A_(Embryophyta) organism=Arabidopsis thaliana phylum=Streptophyta class=Magnoliopsida +------------------------------------------------------------ +-------MAGRGKT-------------------L--GSG------------V-------- +---------A----K---------------KST---SRSSKAGLQFPVGRIARFLKNGKY +AT-RVGAGAPVYLAAVLEYLAAEVLELA----GNAA--RDN-K-KTRIVPRHIQLAVRND +EELSKLL--G-DVTI-----------------AN---------GGVMPNIHSLL--LPKK +AG-----------ASKPSADED-------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Arabidopsis|NP_200275.1|cH2A_(Embryophyta) organism=Arabidopsis thaliana phylum=Streptophyta class=Magnoliopsida +------------------------------------------------------------ +-------MAGRGKT-------------------L--GSG------------G-------- +---------A----K---------------KAT---SRSSKAGLQFPVGRIARFLKAGKY +AE-RVGAGAPVYLAAVLEYLAAEVLELA----GNAA--RDN-K-KTRIVPRHIQLAVRND +EELSKLL--G-DVTI-----------------AN---------GGVMPNIHNLL--LPKK +AG-----------ASKPQED---------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Beta|XP_010685819.1|cH2A_(Embryophyta) organism=Beta vulgaris subsp. vulgaris phylum=Streptophyta class=Magnoliopsida +----------------------------------------------------------MD +STAGGKAKKGAGGR-------------------K--GGG------------P-------- +---------K----K---------------KPV---SRSVKAGLQFPVGRIGRYLKKGRY +AQ-RVGTGAPVYLAAVLEYLAAEVLELA----GNAA--RDN-K-KNRIIPRHVLLAVRND +DELGKLL--S-GVTI-----------------AH---------GGVLPNINPVL--LPKK +AG-----------GDKATKEPKSPSKATKSPKKA-------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Oryza|NP_001066688.1|cH2A_(Embryophyta) organism=Oryza sativa Japonica Group phylum=Streptophyta class=Magnoliopsida +------------------------------------------------------------ +-------MAGRGKA-------------------I--GAG------------A-------- +---------A----K---------------KAT---SRSSKAGLQFPVGRIARFLKAGKY +AE-RVGAGAPVYLAAVLEYLAAEVLELA----GNAA--RDN-K-KTRIVPRHIQLAVRND +EELTKLL--G-GATI-----------------AS---------GGVMPNIHQHL--LPKK +AG-----------SSKASHADDDDN----------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Zea|NP_001132837.1|cH2A_(Embryophyta) organism=Zea mays phylum=Streptophyta class=Magnoliopsida +------------------------------------------------------------ +-------MAGRGKA-------------------I--GSG------------A-------- +---------A----K---------------KAT---SRSSKAGLQFPVGRIARFLKAGKY +AE-RVGAGAPVYLAAVLEYLAAEVLELA----GNAA--RDN-K-KTRIVPRHIQLAVRND +EELSRLL--G-TVTI-----------------AS---------GGVMPNIHNLL--LPKK +AG-----------GGSAKAAAGDED----------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Zea|NP_001141988.1|cH2A_(Embryophyta) organism=Zea mays phylum=Streptophyta class=Magnoliopsida +------------------------------------------------------------ +-------MAGRGKA-------------------I--GAG------------A-------- +---------A----K---------------KAT---SRSSKAGLQFPVGRIARFLKAGKY +AE-RVGAGAPVYLAAVLEYLAAEVLELA----GNAA--RDN-K-KTRIVPRHIQLAVRND +EELTKLL--G-GATI-----------------AS---------GGVMPNIHQHL--LPKK +AA-----------SSKASVDDDDN------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Toxoplasma|XP_002365268.1|cH2A_(Protists) organism=Toxoplasma gondii ME49 phylum=Apicomplexa class=Conoidasida +------------------------------------------------------------ +-------MSAKGK-----------------------GGR------------A-------- +---------K--KSG---------------KSS---SKSAKAGLQFPVGRIGRYLKKGRY +AK-RVGAGAPVYMAAVLEYLCAEILELA----GNAA--RDH-K-KTRIIPRHIQLAVRND +EELSKFL--G-GVTI-----------------AS---------GGVMPNVHSVL--LPKK +SK-----------GKKSQ------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Tetrahymena|AAC37292.1|cH2A_(Protists) organism=Tetrahymena thermophila phylum=Ciliophora class=Oligohymenophorea +------------------------------------------------------------ +-------MSTTGK-----------------------GGK------------A-------- +---------K--GKT---------------ASSKQVSRSARAGLQFPVGRISRFLKNGRY +SE-RIGTGAPVYLAAVLEYLAAEVLELA----GNAA--KDN-K-KTRIVPRHILLAIRND +EELNKLM--A-NTTI-----------------AD---------GGVLPNINPML--LPSK +TK-----------KSTEPEH---------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Trypanosoma|XP_845905.1|cH2A_(Protists) organism=Trypanosoma brucei brucei TREU927 phylum=Euglenozoa class=Kinetoplastea +------------------------------------------------------------ +-------MATPKQ-----------------------AVK------------K-------- +---------A--SKG---------------GS----SRSVKAGLIFPVGRVGTLLRRGQY +AR-RIGASGAVYMAAVLEYLTAELLELS----VKAAAQQTK-K-TKRLTPRTVTLAVRHD +DDLGALL--R-NVTM-----------------SR---------GGVMPSLNKAL--AKKQ +KS-----------GKHAKATPSV------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Perkinsus|EER16127.1|cH2A_(Protists) organism=Perkinsus marinus ATCC 50983 phylum=Perkinsozoa class= +------------------------------------------------------------ +-------MSGKGK-----------------------GAV------------L-------- +---------E--GMH---------------KDKK--TRSAKAGLQFPVGRIARYMKHGRY +AK-RVGAGAPVYLAAVLEYLVAEILELA----GNAA--RDH-K-KTRINPRHIQLAVRND +DELNEFL--S-NVTI-----------------AS---------GGVLPNIHTSL--LPKK +ST-----------KKSMEY----------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Lilium|BAE47493.1|gH2A organism=Lilium longiflorum phylum=Streptophyta class=Magnoliopsida +------------------------------------------------------------ +-------MISSANN-------------------K--GAG------------T-------- +---------S--RRK---------------------LRSEKAALQFSVSRVEYSLKKGRY +CR-RLGATAPVYLAAVLENLVAEVLEMA----ANVT--EKH-K-RIVIKPRHIMLAVRND +VEVNKLF--H-GVTI-----------------SA---------SGVVPKTRKEL--DRRK +RR-----------STSQAD----------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Homo|NP_808760.1|H2A.J_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSGRGK--------------------Q--GGK------------V-------- +---------R----A---------------KAK---SRSSRAGLQFPVGRVHRLLRKGNY +AE-RVGAGAPVYLAAVLEYLTAEILELA----GNAA--RDN-K-KTRIIPRHLQLAIRND +EELNKLL--G-KVTI-----------------AQ---------GGVLPNIQAVL--LPKK +TE-----------SQKTKSK---------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Physcomitrium|XP_024376580.1|H2A.M organism=Physcomitrium patens phylum=Streptophyta class=Bryopsida +------------------------------------------------------------ +-------MSGRGK-----------------------GAG------------A-------- +---------A--ARK---------------KSV---SRSAKAGLQFPVGRLGRYLKKGRY +AR-RVGSGAPVYLAAVLEYLAAEVLELA----GNAS--RDN-K-KSRIIPRHIQLAIRND +EELGKLL--S-GVTI-----------------AY---------GGVLPNIHSVL--LPKK +TA-----------GGAG-ADKSEKPEKEKKTK--------------VEKASKG------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Physcomitrium|XP_024376581.1|H2A.M organism=Physcomitrium patens phylum=Streptophyta class=Bryopsida +------------------------------------------------------------ +-------MSGRGK-----------------------GAG------------A-------- +---------A--ARK---------------KSV---TKSAKAGLQFPVGRLGRYLKKGRY +AQ-RVGSGAPVYLAAVLEYLAAEVLELA----GNAS--RDN-K-KSRIIPRHIQLAIRND +EELGKLL--S-GVTI-----------------AY---------GGVLPNIHSVL--LPKK +TA-----------GGTG-GEKPEKPEKEKKEK--------------KGKADKE------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Physcomitrium|XP_024377711.1|H2A.M organism=Physcomitrium patens phylum=Streptophyta class=Bryopsida +------------------------------------------------------------ +-------MSGRGK-----------------------GAG------------A-------- +---------A--ARK---------------KSV---TKSAKAGLQFPVGRLGRYLKKGRY +AQ-RVGSGAPVYLAAVLEYLAAEVLELA----GNAS--RDN-K-KSRIIPRHIQLAIRND +EELGKLL--S-GVTI-----------------AY---------GGVLPNIHSVL--LPKK +TA-----------GGTG-TEKPAKPEKEKKVK--------------GEKASKE------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Selaginella|HISTDB_H2A_M_1|H2A.M organism=Selaginella moellendorffii phylum=Streptophyta class=Lycopodiopsida +------------------------------------------------------------ +-------MVVQG------------------------G-G------------R-------- +---------K--GKK---------------KSV---SKSARAGLQFPVGRLARYLKNGRY +AK-RVGSGAPVYLAAVLEYLAAEVLELA----GNAA--RDN-K-KTRIIPRHIQLAVRND +DELGKLL--Q-GVTI-----------------AH---------GGVIPHIHGVL--LPKK +SS-----------SGAGSAEKSPKPEKS-------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Marchantia|HISTDB_H2A_M_2|H2A.M organism=Marchantia polymorpha phylum=Streptophyta class=Marchantiopsida +------------------------------------------------------------ +-------MSGRGH-------------------------S------------A-------- +---------K--AKR---------------KAI---SKSARAGLQFPVGRLARYLKNGRY +AK-RVGAGAPVYLAAVLEYLAAEVLELA----GNAC--RDN-G-KTRIIPRHIQLAIRND +EELGKLL--A-SVTI-----------------AH---------GGVLPNIHQLL--LPKK +TA-----------AKLE-KEEKSKAEKSSTKS--------------DSKSTEK------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Marchantia|OAE20401.1|H2A.M organism=Marchantia polymorpha subsp. ruderalis phylum=Streptophyta class=Marchantiopsida +------------------------------------------------------------ +-------MSARS------------------------G-T------------T-------- +---------A--VKK---------------KPV---SKSQKAGLQFPVGRMARFLKNGRY +AK-RIGAGAPVYLAAVLEYLAAELLELA----GNAC--RDN-K-KTRIIPRHIQLAVRND +EELSKLL--A-EVTI-----------------SR---------GGVLPNINPSL--LPKK +TA-----------SKAE-KEVAEADDKSTKDKAKEKETVTSSKRSPKAKSEKK------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Picea|HISTDB_H2A_M_0|H2A.M organism=Picea abies phylum=Streptophyta class=Pinopsida +------------------------------------------------------------ +-----MEPATQGS-----------------------G-G------------R-------- +---------G--GKK---------------KPV---SKSERAGLQFPVGRLARYLKKGRY +AK-RVGTGAPIYLAAVLEYLAAEILELS----GNAA--KDN-K-KSRIIPRHILLAVKND +DELNKLL--A-NVTI-----------------AY---------GGVVPNIHQVL--LPKK +TA-----------EKAKAKESSEI------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Monodelphis|XP_001380078.1|H2A.R organism=Monodelphis domestica phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSEKRS--------------------H--PGP------------S-------- +---------H--PRS---------------RTR---SRSSRAQLQFPVSRVDRFLRQGHY +AQ-RLASGAPVFLAAVLEYLTAEILELA----GNAA--RDN-Q-KSRIAPRHVQLAVRND +AELNQLF--G-DVTI-----------------SQ---------GGVLPRIHSEL--LQSV +NK-----------AQSSRNLGGNSFIQTVKTK---------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Monodelphis|XP_003341803.1|H2A.R organism=Monodelphis domestica phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MPEKRS--------------------H--HGS------------L-------- +---------N--THN---------------QIR---SRSSRAQLQFPVSRVDRFLRQGHY +AQ-RLASSAPVFLAAVLEYLTAEILELA----GNAA--RDN-K-KTRIAPCHVQLAVRND +VELNQLF--G-HVTI-----------------SQ---------GGVLPRIHPEL--VQPA +TG-----------GRSSQSHVGRNHNYSVKAK---------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Ornithorhynchus|XP_001519563.1|H2A.R organism=Ornithorhynchus anatinus phylum=Chordata class=Mammalia +------------------------------------------------------------ +--MAARVPSAEGS--------------------P--SGP------------R-------- +---------R--SGP---------------R-R---SRSSRAQLRFSVSLVDRFLRRGRY +SR-RVAEGTPVFLAAVLEYLTAELLELA----GHTA--GAH-R-RQRIAPVHLRQAVRDD +PELDRLF--G-DIVS-----------------SP---------GAGLPRLHSAL--LKPW +T------------VRTWRGRTVSFGEDPGHPR---------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Sarcophilus|XP_003762607.1|H2A.R organism=Sarcophilus harrisii phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSEKRS--------------------H--PGP------------S-------- +---------H--PRA---------------RTR---SRSSRAQLQFPVSRVDRFLRQGHY +AQ-RLASGAPVFLAAVLEYLTAEILELA----GNAA--RDN-Q-KSRIAPRHVQLAVRND +AELNQLF--G-DVTI-----------------SQ---------GGVLPRIHSEL--LQSV +NK-----------AQGSRNRGGNSCIQSVKTK---------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Sarcophilus|XP_012399201.1|H2A.R organism=Sarcophilus harrisii phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MPEKRC--------------------H--QGP------------L-------- +---------P--PRA---------------RTR---SRSSRAQLQFPVSRVDRFLRQGHY +AQ-RLASGAPVFLAAVLEYLTAEILELA----GNAA--RDN-Q-KTRIAPCHVQLAVRND +AELNQLF--G-HVTI-----------------SQ---------GAVLPRIHSEL--LQPT +SK-----------ARSSQCHVGQSHTYSMKAK---------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Arabidopsis|NP_198119.1|H2A.W organism=Arabidopsis thaliana phylum=Streptophyta class=Magnoliopsida +------------------------------------------------------------ +-------MESSQ---------------------A--TTK-PTRGAGGRK--G-------- +---------G--DRK---------------KSV---SKSVKAGLQFPVGRIARYLKKGRY +AL-RYGSGAPVYLAAVLEYLAAEVLELA----GNAA--RDN-K-KNRINPRHLCLAIRND +EELGRLL--H-GVTI-----------------AS---------GGVLPNINPVL--LPKK +ST-----------ASSS--------QAEKASATKSPKKA--------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Arabidopsis|NP_200795.1|H2A.W organism=Arabidopsis thaliana phylum=Streptophyta class=Magnoliopsida +------------------------------------------------------------ +-------MESTG--------------------------K-VKKAFGGRKPPG-------- +---------A--PKT---------------KSV---SKSMKAGLQFPVGRITRFLKKGRY +AQ-RLGGGAPVYMAAVLEYLAAEVLELA----GNAA--RDN-K-KSRIIPRHLLLAIRND +EELGKLL--S-GVTI-----------------AH---------GGVLPNINSVL--LPKK +SA-----------TKPA-----EEKATKS--PVKSPKKA--------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Zea|NP_001105357.1|H2A.W organism=Zea mays phylum=Streptophyta class=Magnoliopsida +------------------------------------------------------------ +-------MDSTGTG-------------------A--GGK-GKKGAAGRKV-G-------- +---------G--PRK---------------KSV---SRSVKAGLQFPVGRIGRYLKKGRY +AQ-XVGTGAPVYLAAVLEYLAAEVLELA----GNAA--RDN-K-KTRIIPRHVLLAIRND +EELGKLL--G-GVTI-----------------AH---------GGVLPNINPVL--LPKK +TA-----------EKASSGGSKEAKSPKK--AAKSPKKA--------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Zea|NP_001141182.1|H2A.W organism=Zea mays phylum=Streptophyta class=Magnoliopsida +------------------------------------------------------------ +-------MDVSG---------------------A--GGK-AKKGAAGRKA-G-------- +---------G--PTK---------------KSV---SRSSRAGLQFPVSRVGRYLKKGRY +AQ-RVGTGAPVYLAAVLEYLAAEVLELA----GNAA--RDN-K-KTRIIPRHVLLAIRND +EELGKLL--A-GVTI-----------------AH---------GGVLPNIHTVL--LPKK +VA-----------EKAA-------KEPKK--AAKSPKKA--------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Zea|NP_001183143.1|H2A.W organism=Zea mays phylum=Streptophyta class=Magnoliopsida +------------------------------------------------------------ +-------MDASG---------------------A--GSK-AKKGAAGRKA-G-------- +---------G--PRK---------------KSV---SRSVKAGLQFPVGRIGRYLKKGRY +AQ-RVGTGAPVYLAAVLEYLAAEVLELA----GNAA--KDN-K-KTRIVPRHVLLAIRND +VELGKLL--A-GVTI-----------------AH---------GGVLPNINPVL--LPKK +VA-----------EKASSGGSKESKSPKK--AAKSPKKAAKSPKKA-------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Zea|NP_001183439.1|H2A.W organism=Zea mays phylum=Streptophyta class=Magnoliopsida +------------------------------------------------------------ +-------MDASG---------------------A--GSK-GKKGAAGRKA-G-------- +---------G--PRK---------------KSV---TRSVKAGLQFPVGRIGRYLKKGRY +AQ-RVGTGAPVYLAAVLEYLAAEVLELA----GNAA--KDN-K-KTRIIPRHVLLAIRND +EELGKLL--A-GVTI-----------------AH---------GGVLPNIHSVL--LPKK +AA-----------EKAASGGSKEPKSPKK--GAKSPKKA--------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Zea|NP_001183510.1|H2A.W organism=Zea mays phylum=Streptophyta class=Magnoliopsida +------------------------------------------------------------ +-------MDASAAG-------------------A--GGK-AKKGAAGRKA-G-------- +---------G--PRK---------------KSV---TRSVKAGLQFPVGRIGRYLKKGRY +AQ-RVGTGAPVYLAAVLEYLAAEVLELA----GNAA--KDN-K-KTRIIPRHVLLAIRND +EELGKLL--S-GVTI-----------------AH---------GGVLPNINPVL--LPKK +TA-----------EKAA---AKEAKSPKK--AAKSPKKA--------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Zea|NP_001278724.1|H2A.W organism=Zea mays phylum=Streptophyta class=Magnoliopsida +------------------------------------------------------------ +-------MD------------------------A--GAKVVKKAAAGRRGGG-------- +---------G--PKK---------------KPV---SRSVKAGLQFPVGRIGRYLKQGRY +SQ-RVGTGAPVYLAAVLEYLAAELLELA----GNAA--RDN-K-KNRIIPRHVLLAIRND +EELGKLL--A-GVTI-----------------AH---------GGVLPNINPVL--LPKK +TA-----------VAAAKEG-KEKKSPKKAAAAKSPKKVAAS------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Zea|NP_001281214.1|H2A.W organism=Zea mays phylum=Streptophyta class=Magnoliopsida +------------------------------------------------------------ +-------MDATGTG-------------------A--GGK-AKKGAAGRKA-G-------- +---------G--PRK---------------KSV---TRSVKAGLQFPVGRIGRYLKKGRY +AQ-RVGSGAPVYLAAVLEYLAAEVLELA----GNAA--KDN-K-KTRIVPRHVLLAIRND +EELGKLL--T-GVTI-----------------AH---------GGVLPNINPVL--LPKK +TA-----------EKASSGGSKEAKSPKK--AAKSPKKA--------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Apis|XP_624700.1|H2A.X_(Animals) organism=Apis mellifera phylum=Arthropoda class=Insecta +------------------------------------------------------------ +-------MSGRGK-----------------------GGK------------AK------- +--------------G---------------KAK---TRSSRAGLQFPVGRIHRLLRKGNY +AE-RVGAGAPVYLAAVMEYLAAEVLELA----GNAA--RDN-K-KTRIIPRHLQLAIRND +EELNKLL--S-GVTI-----------------AQ---------GGVLPNIQAVL--LPKK +TGT--G-------GSGKGDKASQEY----------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Trichoplax|XP_002116274.1|H2A.X_(Animals) organism=Trichoplax adhaerens phylum=Placozoa class=Uniplacotomia +------------------------------------------------------------ +-------MSGRGK-----------------------GGK------------AR------- +--------------N---------------KAK---SRSTRAGLQFPVGRVHRMLRKGNY +AE-RVGAGAPVYLAAVMEYLAAEILELA----GNAA--RDN-K-KQRIVPRHLQLAIRND +EELNKLL--S-GVTI-----------------AQ---------GGVLPNIQAVL--LPKK +SKVPIA-------GSKKGSSQSQEY----------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Danio|NP_957367.1|H2A.X_(Vertebrata) organism=Danio rerio phylum=Chordata class=Actinopteri +------------------------------------------------------------ +-------MSGRGKT----------------------GGK------------AR------- +--------------A---------------KAK---TRSSRAGLQFPVGRVHRLLRKGNY +AE-RVGAGAPVYLAAVLEYLTAEILELA----GNAA--RDN-K-KTRIIPRHLQLAVRND +EELNKLL--G-GVTI-----------------AQ---------GGVLPNIQAVL--LPKK +TGQAAASSGK---SGKKGSSQSQEY----------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Bos|NP_001073248.1|H2A.X_(Mammalia) organism=Bos taurus phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSGRGKT----------------------GGK------------AR------- +--------------A---------------KAK---SRSSRAGLQFPVGRVHRLLRKGHY +AE-RVGAGAPVYLAAVLEYLTAEILELA----GNAA--RDN-K-KTRIIPRHLQLAIRND +EELNKLL--G-GVTI-----------------AQ---------GGVLPNIQAVL--LPKK +TSATVGPKAP--AGGKKATQASQEY----------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Monodelphis|XP_001370540.1|H2A.X_(Mammalia) organism=Monodelphis domestica phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSGRGKT----------------------GGK------------AR------- +--------------A---------------KAK---SRSSRAGLQFPVGRVHRLLRKGHY +AE-RVGAGAPVYLAAVLEYLTAEILELA----GNAA--RDN-K-KTRIIPRHLQLAIRND +EELNKLL--G-GVTI-----------------AQ---------GGVLPNIQAVL--LPKK +SGAITGPKAPGSGGSKKSTQASQEY----------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Rattus|NP_001102761.1|H2A.X_(Mammalia) organism=Rattus norvegicus phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSGRGKT----------------------GGK------------AR------- +--------------A---------------KAK---SRSSRAGLQFPVGRVHRLLRKGHY +AE-RVGAGAPVYLAAVLEYLTAEILELA----GNAA--RDN-K-KTRIIPRHLQLAIRND +EELNKLL--G-GVTI-----------------AQ---------GGVLPNIQAVL--LPKK +TSATVGPKAP--AGGKKASQASQEY----------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Homo|NP_002096.1|H2A.X_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSGRGKT----------------------GGK------------AR------- +--------------A---------------KAK---SRSSRAGLQFPVGRVHRLLRKGHY +AE-RVGAGAPVYLAAVLEYLTAEILELA----GNAA--RDN-K-KTRIIPRHLQLAIRND +EELNKLL--G-GVTI-----------------AQ---------GGVLPNIQAVL--LPKK +TSATVGPKAP--SGGKKATQASQEY----------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Mus|NP_034566.1|H2A.X_(Mus_musculus) organism=Mus musculus phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSGRGKT----------------------GGK------------AR------- +--------------A---------------KAK---SRSSRAGLQFPVGRVHRLLRKGHY +AE-RVGAGAPVYLAAVLEYLTAEILELA----GNAA--RDN-K-KTRIIPRHLQLAIRND +EELNKLL--G-GVTI-----------------AQ---------GGVLPNIQAVL--LPKK +SSATVGPKAP--AVGKKASQASQEY----------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Saccharomyces|NP_009552.1|H2A.X_(Fungi) organism=Saccharomyces cerevisiae S288C phylum=Ascomycota class=Saccharomycetes +------------------------------------------------------------ +--------MSGGK-----------------------GGK------------AGS------ +---------A---AK---------------ASQ---SRSAKAGLTFPVGRVHRLLRRGNY +AQ-RIGSGAPVYLTAVLEYLAAEILELA----GNAA--RDN-K-KTRIIPRHLQLAIRND +DELNKLL--G-NVTI-----------------AQ---------GGVLPNIHQNL--LPKK +S------------AKTAKA--SQEL----------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Saccharomyces|NP_010511.3|H2A.X_(Fungi) organism=Saccharomyces cerevisiae S288C phylum=Ascomycota class=Saccharomycetes +------------------------------------------------------------ +--------MSGGK-----------------------GGK------------AGS------ +---------A---AK---------------ASQ---SRSAKAGLTFPVGRVHRLLRRGNY +AQ-RIGSGAPVYLTAVLEYLAAEILELA----GNAA--RDN-K-KTRIIPRHLQLAIRND +DELNKLL--G-NVTI-----------------AQ---------GGVLPNIHQNL--LPKK +S------------AKATKA--SQEL----------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Schizosaccharomyces|NP_594421.1|H2A.X_(Fungi) organism=Schizosaccharomyces pombe phylum=Ascomycota class=Schizosaccharomycetes +------------------------------------------------------------ +--------MSGGKS----------------------GGK------------AA------- +---------V---AK---------------SAQ---SRSAKAGLAFPVGRVHRLLRKGNY +AQ-RVGAGAPVYLAAVLEYLAAEILELA----GNAA--RDN-K-KTRIIPRHLQLAIRND +EELNKLL--G-HVTI-----------------AQ---------GGVVPNINAHL--LPKQ +S------------GKGKP---SQEL----------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Ustilago|XP_011387544.1|H2A.X_(Fungi) organism=Ustilago maydis 521 phylum=Basidiomycota class=Ustilaginomycetes +------------------------------------------------------------ +-------MSSGGKS----------------------GGK------------AGD------ +---------A---SS---------------KAQ---SRSAKAGLQFPVGRIHRLLRKGNY +AQ-RVGAGAPVYLAAVLEYLAAEILELA----GNAA--RDN-K-KSRIIPRHLQLAIRND +EELNKLL--G-GVTI-----------------SQ---------GGVLPFIQSEL--LPAK +S------------GKPKKAGGSQDI----------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Arabidopsis|NP_172363.1|H2A.X_(Plants) organism=Arabidopsis thaliana phylum=Streptophyta class=Magnoliopsida +------------------------------------------------------------ +------MSTGAGSG-------------------TTKGGR------------GKP------ +---------K---AT---------------KSV---SRSSKAGLQFPVGRIARFLKSGKY +AE-RVGAGAPVYLSAVLEYLAAEVLELA----GNAA--RDN-K-KTRIVPRHIQLAVRND +EELSKLL--G-SVTI-----------------AN---------GGVLPNIHQTL--LPSK +VG-K---------NKGDIGSASQEF----------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Arabidopsis|NP_175868.1|H2A.X_(Plants) organism=Arabidopsis thaliana phylum=Streptophyta class=Magnoliopsida +------------------------------------------------------------ +------MSSGAGSG-------------------TTKGGR------------GKP------ +---------K---AT---------------KSV---SRSSKAGLQFPVGRIARFLKAGKY +AE-RVGAGAPVYLSAVLEYLAAEVLELA----GNAA--RDN-K-KTRIVPRHIQLAVRND +EELSKLL--G-SVTI-----------------AN---------GGVLPNIHQTL--LPSK +VG-K---------NKGDIGSASQEF----------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Oryza|NP_001066920.1|H2A.X_(Plants) organism=Oryza sativa Japonica Group phylum=Streptophyta class=Magnoliopsida +------------------------------------------------------------ +-------MSSAGGG----------------------GGR------------GKS------ +---------K---GS---------------KSV---SRSSKAGLQFPVGRIARYLKAGKY +AE-RVGAGAPVYLSAVLEYLAAEVLELA----GNAA--RDN-K-KNRIVPRHIQLAVRND +EELSRLL--G-AVTI-----------------AA---------GGVLPNIHQTL--LPKK +GG-K---------DKADIGSASQEF----------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Zea|XP_008644553.1|H2A.X_(Plants) organism=Zea mays phylum=Streptophyta class=Magnoliopsida +------------------------------------------------------------ +-------MSSTG-G----------------------GGR------------GKA------ +---------K--PAT---------------KSV---SRSSKAGLQFPVGRIARYLKAGKY +AE-RVGAGAPVYLSAVLEYLAAEVLELA----GNAA--RDN-K-KNRIVPRHIQLAVRND +EELSKLL--G-TVTI-----------------AA---------GGVMPNIHQTL--LPKK +AG-----------QKGDIGSASQEF----------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Zea|XP_008662624.1|H2A.X_(Plants) organism=Zea mays phylum=Streptophyta class=Magnoliopsida +------------------------------------------------------------ +-------MSS---G----------------------GGR------------GKP------ +---------K---GS---------------KAL---SRSTKAGLQFPVGRIARYLKAGKY +AE-RVGGGAPVYLSAVLEYLAAEVLELA----GNAA--RDN-K-KNRIVPRHIQLAVRND +EELSKLL--G-AVTI-----------------AA---------GGVLPNIHQTL--LPKK +AGGK---------GKADIGSASQEF----------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Cryptosporidium|XP_627120.1|H2A.X_(Protists) organism=Cryptosporidium parvum Iowa II phylum=Apicomplexa class=Conoidasida +------------------------------------------------------------ +-------MSGKVT-----------------------SSG------------GRGGGK--- +---------K--TTR---------------KTM---SNSAKAGLQFPVGRVARYLKKGRY +AK-RIGAAAPVYLAAVLEYLCAELLELA----GNAA--RDA-K-KTRITPRQIQLAVRND +EELSKFL--G-NVTI-----------------AS---------GGVLPNIPTVL--LPKK +SK-----------SK--QG-NSQEF----------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Toxoplasma|XP_002365290.1|H2A.X_(Protists) organism=Toxoplasma gondii ME49 phylum=Apicomplexa class=Conoidasida +------------------------------------------------------------ +-------MSAKGA-----------------------GGR------------KK------- +---------T--SSG---------------KKV---SRSAKAGLQFPVSRIGRYLKKGRY +AK-RVGVGAPVYLAAVLEYLCAEILELA----GNAA--RDH-K-KTRIIPRHIQLAVRND +EELSKFL--G-GVTI-----------------AN---------GGVMPHVHAVL--LPKH +SK-----------SKGKHG-VSQEF----------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Tetrahymena|AAC37291.1|H2A.X_(Protists) organism=Tetrahymena thermophila phylum=Ciliophora class=Oligohymenophorea +------------------------------------------------------------ +-------MSTTGK-----------------------GGK------------AK--GK--- +---------T--ASS---------------KQV---SRSARAGLQFPVGRISRFLKHGRY +SE-RVGTGAPVYLAAVLEYLAAEVLELA----GNAA--KDN-K-KTRIVPRHILLAIRND +EELNKLM--A-NTTI-----------------AD---------GGVLPNINPML--LPSK +SK-----------KTESRGQASQDL----------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Dictyostelium|XP_641587.1|H2A.X_(Protists) organism=Dictyostelium discoideum AX4 phylum=Evosea class=Eumycetozoa +------------------------------------------------------------ +------MSETKPA-----------------------SSK------------PAAAAKPKK +VIPRVSRTGE--PKS---------------KPE---SRSARAGITFPVSRVDRLLREGRF +AP-RVESTAPVYLAAVLEYLVFEILELA----HNTC--SIS-K-KTRITPQHINWAVGND +LELNSLF--Q-HVTI-----------------AY---------GGVLPTPQQST--GEKK +KKPS---------KKAAEG-SSQIY----------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Giardia|XP_001704715.1|H2A.X_(Protists) organism=Giardia intestinalis phylum=Fornicata class= +------------------------------------------------------------ +-------MSTKPV----------------------------------------------- +---------K--DNS---------------KMK---SRSARAGISFPIGRIHRHLREGRY +AE-RISSDAPVYLAAVLENVVAEVFREA----CNHR--DKK-S-QKRIVPNHILTALRKD +KELATIF--A-NVTI-----------------RE---------GGVARSAK-------EG +RE-----------GKGSH--RSQDL----------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Perkinsus|XP_002784006.1|H2A.X_(Protists) organism=Perkinsus marinus ATCC 50983 phylum=Perkinsozoa class= +------------------------------------------------------------ +-------MSGKGK-----------------------GGR------------GKA-GK--- +---------K--SGS---------------GAK---SRSAKAGLQFPVGRIARYLKKGRY +AK-RVGSGAPVYLAAVLEYLVAEILELA----GNAA--RDH-K-KTRIIPRHIQLAVRND +EELNKFL--A-GVTL-----------------AS---------GGVLPNIHTTL--LPKK +SK-----------GKSFT--ASQEI----------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Cryptosporidium|XP_626045.1|H2A.Z organism=Cryptosporidium parvum Iowa II phylum=Apicomplexa class=Conoidasida +------------------------------------------------------------ +--MDGATSSGKIGG--------KV-GGKVGGKGKAGSGK--------------------- +---------G--SKK---------------QPT---SRAARAGLQFPVGRIQRMLKHRIP +GDCRVGSTASVYAAAILEYLTAEVLELA----GNAS--KDL-K-VKRITPRHLQLAIRGD +EELDSLI--K--ATI-----------------AG---------GGVIPHIEKSL--MGKA +LI-----------GKKGKKGNMSP------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Toxoplasma|XP_002371743.1|H2A.Z organism=Toxoplasma gondii ME49 phylum=Apicomplexa class=Conoidasida +---------------------------------------------------------MDG +AGKVGGKVGGKVGG--------KVGGMGKGGKGKSGSGK--------------------- +---------G---KK---------------APL---SRAARAGLQFPVGRVHRMLKSRIS +SEGRVGSTAAVYASAILEYLTAEVLELA----GNAS--KDL-K-VKRITPRHLQLAIRGD +EELDTLI--K--ATI-----------------AG---------GGVIPHIHKSL--MTKG +PS-----------TQPMKKAKK-------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Drosophila|NP_524519.1|H2A.Z organism=Drosophila melanogaster phylum=Arthropoda class=Insecta +------------------------------------------------------------ +------MAGGKAGK---------------------DSGK--------------------- +---------A---KA---------------KAV---SRSARAGLQFPVGRIHRHLKSRTT +SHGRVGATAAVYSAAILEYLTAEVLELA----GNAS--KDL-K-VKRITPRHLQLAIRGD +EELDSLI--K--ATI-----------------AG---------GGVIPHIHKSL--IGKK +EE-----------TVQDPQRKGNVILSQAY------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Saccharomyces|NP_014631.1|H2A.Z organism=Saccharomyces cerevisiae S288C phylum=Ascomycota class=Saccharomycetes +------------------------------------------------------------ +--MSGKAHGGKGKS--------GA------------KDS--------------------- +---------G---SL---------------RSQ---SSSARAGLQFPVGRIKRYLKRHAT +GRTRVGSKAAIYLTAVLEYLTAEVLELA----GNAA--KDL-K-VKRITPRHLQLAIRGD +DELDSLI--R--ATI-----------------AS---------GGVLPHINKAL--LLKV +EK-----------KGSKK------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Schizosaccharomyces|NP_595630.3|H2A.Z organism=Schizosaccharomyces pombe phylum=Ascomycota class=Schizosaccharomycetes +------------------------------------------------------------ +------MSGGGKGK--------HV----GGKGGSKIGER--------------------- +---------G---------------------QM---SHSARAGLQFPVGRVRRFLKAKTQ +NNMRVGAKSAVYSAAVLEYLTAEVLELA----GNAA--KDL-K-VKRITPRHLQLAIRGD +EELDTLI--R--ATI-----------------AG---------GGVLPHINKQL--LIRT +KE-----------KYPEEEEII-------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Tetrahymena|CAA33554.1|H2A.Z organism=Tetrahymena thermophila phylum=Ciliophora class=Oligohymenophorea +------------------------------------------------------------ +------MAGGKGGK-----------GGKGGKGGKVGGAK--------------------- +---------N---KK---------------TPQ---SRSYKAGLQFPVGRIHRFLKGRVS +AKNRVGATAAVYAAAILEYLTAEVLELA----GNAS--KDF-K-VRRITPRHLLLAIRGD +EELDILI--K--ATI-----------------AG---------GGVIPHIHKAL--LGKH +ST-----------KNRSSAKTAEPR----------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Nematostella|EDO46289.1|H2A.Z organism=Nematostella vectensis phylum=Cnidaria class=Anthozoa +------------------------------------------------------------ +------MAGGKAGK----------------------DSK--------------------- +---------A---KA---------------KAV---SRSARAGLQFPVGRIHRHLKNRTT +SHGRVGATAAVYSAAILEYLTAEVLELA----GNAS--KDL-K-VKRITPRHLQLAIRGD +EELDSLI--K--ATI-----------------AG---------GGVIPHIHKSL--IGKK +GA-----------NKPT------------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Trypanosoma|XP_846259.1|H2A.Z organism=Trypanosoma brucei brucei TREU927 phylum=Euglenozoa class=Kinetoplastea +---------------------------------------MSLTGDDAVPQAPLVGGVAMS +PEQASALTGGKLGGKAVGPAHGKGKGKGKGKRGGKTGGK--------------------- +---------A--GRR---------------DKM---TRAARADLNFPVGRIHSRLKDGLN +RKQRCGASAAIYCAALLEYLTSEVIELA----GAAA--KAQ-K-TERIKPRHLLLAIRGD +EELNQIV--N--ATI-----------------AR---------GGVVPFVHKSL--EKKI +IK-----------KSKRGS----------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Dictyostelium|XP_637656.1|H2A.Z organism=Dictyostelium discoideum AX4 phylum=Evosea class=Eumycetozoa +------------------------------------------------------------ +-MTESETTSKKVNK---------------------------------------------- +-------------RV---------------KPV---PKSTKAGLIFPVGRIHRMLKNKVP +LK-RVSILSSVYLAAILEYLASEVLELTISQVSIQS--KEYHN-VRRISPRHLLLAIKTD +EELDNLI--RVSTTI-----------------AG---------GGVIPYIHEVLKKVEQK +PT-----------HPQQKQTIKSI------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Perkinsus|XP_002776752.1|H2A.Z organism=Perkinsus marinus ATCC 50983 phylum=Perkinsozoa class= +------------------MSTRKQTDVSLPVPVVTQ-------------------LAGGK +GIKGAGLQGGKGGK--------GGKGAKVHGGKGKIGGK--------------------- +---------G--------------------SSI---TRSARAGLQFPVGRVRRYLKDRAT +ANCRVGSTAAVYTAAILEYLTAEVLELA----GNAA--KDQ-K-VKRITPRHLHLAIRGD +DELDVLI--R--ATI-----------------AG---------GGVVPYIHQSL--TVKT +PY-----------HKKKRVM---------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Perkinsus|XP_002784054.1|H2A.Z organism=Perkinsus marinus ATCC 50983 phylum=Perkinsozoa class= +------------------MSDLKKTDINKSVPVVTQ-------------------LAGGK +GIKGAGLQGGKGGK--------GGKGAKVHGGKGKMGGK--------------------- +---------G--------------------ASM---TRSARAGLQFPVGRVRRYLKDRAT +ANCRVGSTAAVYTAAILEYLTAEVLELA----GNAA--KDQ-K-VKRITPRHLHLAIRGD +DELDVLI--R--ATI-----------------AG---------GGVVPYIHQSL--TAKA +PY-----------HKKKRVM---------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Perkinsus|XP_002784055.1|H2A.Z organism=Perkinsus marinus ATCC 50983 phylum=Perkinsozoa class= +------------------MSDLKKTDINKSVPVVTQLGRLLDVASLSTPSSSLFCLAGGK +GIKGAGLQGGKGGK--------GGKGAKVHGGKGKIGGK--------------------- +---------G--------------------TSM---TRSARAGLQFPVGRVRRYLKDRAT +ANCRVGSTAAVYTAAILEYLTAEVLELA----GNAA--KDQ-K-VKRITPRHLHLAIRGD +DELDVLI--R--ATI-----------------AG---------GGVVPYIHQSL--TAKA +PY-----------HKKKRVM---------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Trichoplax|XP_002111498.1|H2A.Z organism=Trichoplax adhaerens phylum=Placozoa class=Uniplacotomia +------------------------------------------------------------ +------MAGGKAGK----------------------DSK--------------------- +---------T---KA---------------KAV---SRSARAGLQFPVGRIHRHLKNRTT +SHGRVGATAAVYSAAILEYLTAEVLELA----GNAS--KDL-K-VKRITPRHLQLAIRGD +EELDSLI--K--ATI-----------------AG---------GGVIPHIHKSL--IGKK +GA-----------NKPN------------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Arabidopsis|NP_175683.1|H2A.Z organism=Arabidopsis thaliana phylum=Streptophyta class=Magnoliopsida +------------------------------------------------------------ +-------MSGKGAK--------GLIMGKPSGS---DKDK--------------------- +---------D---KK---------------KPI---TRSSRAGLQFPVGRVHRLLKTRST +AHGRVGATAAVYTAAILEYLTAEVLELA----GNAS--KDL-K-VKRISPRHLQLAIRGD +EELDTLI--K--GTI-----------------AG---------GGVIPHIHKSL--INKS +AK-----------E---------------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Arabidopsis|NP_181415.1|H2A.Z organism=Arabidopsis thaliana phylum=Streptophyta class=Magnoliopsida +------------------------------------------------------------ +-------MAGKGGK--------GLLAAKTTAA-AANKDS--------------------- +---------V---KK---------------KSI---SRSSRAGIQFPVGRIHRQLKQRVS +AHGRVGATAAVYTASILEYLTAEVLELA----GNAS--KDL-K-VKRITPRHLQLAIRGD +EELDTLI--K--GTI-----------------AG---------GGVIPHIHKSL--VNKV +TK-----------D---------------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Arabidopsis|NP_191019.1|H2A.Z organism=Arabidopsis thaliana phylum=Streptophyta class=Magnoliopsida +------------------------------------------------------------ +-------MAGKGGK--------GLVAAKTMAA-NKDKDK--------------------- +---------D---KK---------------KPI---SRSARAGIQFPVGRIHRQLKTRVS +AHGRVGATAAVYTASILEYLTAEVLELA----GNAS--KDL-K-VKRITPRHLQLAIRGD +EELDTLI--K--GTI-----------------AG---------GGVIPHIHKSL--INKT +TK-----------E---------------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Arabidopsis|NP_193093.1|H2A.Z organism=Arabidopsis thaliana phylum=Streptophyta class=Magnoliopsida +------------------------------------------------------------ +-----MVCNTNILK--------DV------------STK--------------------- +---------I--SAF---------------ENV---RMIMVEGEMFQVARIHKQLKNRVS +AHSSVGATDVVYMTSILEYLTTEVLQLA----ENTS--KDL-K-VKRITPRHLQLAIRGD +EELDTLI--K--GTI-----------------IG---------GSVIPHIH--------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Oryza|NP_001051232.1|H2A.Z organism=Oryza sativa Japonica Group phylum=Streptophyta class=Magnoliopsida +------------------------------------------------------------ +-------MAGKGGK--------GLLAAKTTAAKSAEKDK--------------------- +---------G---KK---------------APV---SRSSRAGLQFPVGRIHRQLKQRTQ +ANGRVGATAAVYSAAILEYLTAEVLELA----GNAS--KDL-K-VKRITPRHLQLAIRGD +EELDTLI--K--GTI-----------------AG---------GGVIPHIHKSL--INKS +SK-----------E---------------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Zea|NP_001136523.1|H2A.Z organism=Zea mays phylum=Streptophyta class=Magnoliopsida +------------------------------------------------------------ +-------MAGKGGK--------GLLAAKTTAAKSAEKDK--------------------- +---------G---KK---------------API---SRSSRAGLQFPVGRIHRQLKQRTQ +ANGRVGATAAVYSAAILEYLTAEVLELA----GNAS--KDL-K-VKRITPRHLQLAIRGD +EELDTLI--K--GTI-----------------AG---------GGVIPHIHKSL--INKS +SK-----------E---------------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Zea|NP_001141633.1|H2A.Z organism=Zea mays phylum=Streptophyta class=Magnoliopsida +------------------------------------------------------------ +-------MAGKGGK--------GLLAAKTTAAKSTDKDK--------------------- +---------D--RKK---------------APV---SRSSRAGLQFPVGRIHRQLKSRAS +AHGRVGATAAVYSAAILEYLTAEVLELA----GNAS--KDL-K-VKRITPRHLQLAIRGD +EELDTLI--K--GTI-----------------AG---------GGVIPHIHKSL--INKT +AK-----------E---------------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Danio|NP_001036788.1|H2A.Z.1_(Chordata) organism=Danio rerio phylum=Chordata class=Actinopteri +------------------------------------------------------------ +------MAGGKAGK---------------------DSGK--------------------- +---------A---KT---------------KAV---SRSQRAGLQFPVGRIHRHLKTRTT +SHGRVGATAAVYSAAILEYLTAEVLELA----GNAS--KDL-K-VKRITPRHLQLAIRGD +EELDSLI--K--ATI-----------------AG---------GGVIPHIHKSL--IGKK +GQ-----------QKTV------------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Gallus|NP_001026545.1|H2A.Z.1_(Chordata) organism=Gallus gallus phylum=Chordata class=Aves +------------------------------------------------------------ +------MAGGKAGK---------------------DSGK--------------------- +---------T---KT---------------KAV---SRSQRAGLQFPVGRIHRHLKSRTT +SHGRVGATAAVYSAAILEYLTAEVLELA----GNAS--KDL-K-VKRITPRHLQLAIRGD +EELDSLI--K--ATI-----------------AG---------GGVIPHIHKSL--IGKK +GQ-----------QKTV------------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Homo|NP_002097.1|H2A.Z.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +------------------------------------------------------------ +------MAGGKAGK---------------------DSGK--------------------- +---------A---KT---------------KAV---SRSQRAGLQFPVGRIHRHLKSRTT +SHGRVGATAAVYSAAILEYLTAEVLELA----GNAS--KDL-K-VKRITPRHLQLAIRGD +EELDSLI--K--ATI-----------------AG---------GGVIPHIHKSL--IGKK +GQ-----------QKTV------------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Danio|NP_705930.1|H2A.Z.2_(Chordata) organism=Danio rerio phylum=Chordata class=Actinopteri +------------------------------------------------------------ +------MAGGKAGK---------------------DSGK--------------------- +---------A---KA---------------KAV---SRSQRAGLQFPVGRIHRHLKTRTT +SHGRVGATAAVYSAAILEYLTAEVLELA----GNAS--KDL-K-VKRITPRHLQLAIRGD +EELDSLI--K--ATI-----------------AG---------GGVIPHIHKSL--IGKK +GQ-----------QKTA------------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Gallus|P02272.2|H2A.Z.2_(Chordata) organism=Gallus gallus phylum=Chordata class=Aves +------------------------------------------------------------ +------MAGGKAGK---------------------DSGK--------------------- +---------A---KA---------------KAV---SRSQRAGLQFPVGRIHRHLKTRTT +SHGRVGATAAVYSAAILEYLTAEVLELA----GNAS--KDL-K-VKRITPRHLQLAIRGD +EELDSLI--K--ATI-----------------AG---------GGVIPHIHKSL--IGKK +GQ-----------QKTA------------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Homo|NP_036544.1|H2A.Z.2.s1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +------------------------------------------------------------ +------MAGGKAGK---------------------DSGK--------------------- +---------A---KA---------------KAV---SRSQRAGLQFPVGRIHRHLKTRTT +SHGRVGATAAVYSAAILEYLTAEVLELA----GNAS--KDL-K-VKRITPRHLQLAIRGD +EELDSLI--K--ATI-----------------AG---------GGVIPHIHKSL--IGKK +GQ-----------QKTA------------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Homo|NP_958844.1|H2A.Z.2.s1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +------------------------------------------------------------ +------------------------------------------------------------ +--------------------------------------------MFPVGRIHRHLKTRTT +SHGRVGATAAVYSAAILEYLTAEVLELA----GNAS--KDL-K-VKRITPRHLQLAIRGD +EELDSLI--K--ATI-----------------AG---------GGVIPHIHKSL--IGKK +GQ-----------QKTA------------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Homo|NP_958924.1|H2A.Z.2.s1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +------------------------------------------------------------ +------MAGGKAGK---------------------DSGK--------------------- +---------A---KA---------------KAV---SRSQRAGLQFPVGRIHRHLKTRTT +SHGRVGATAAVYSAAILEYLTAEV------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Homo|NP_619541.1|H2A.Z.2.s2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +------------------------------------------------------------ +------MAGGKAGK---------------------DSGK--------------------- +---------A---KA---------------KAV---SRSQRAGLQFPVGRIHRHLKTRTT +SHGRVGATAAVYSAAILEYLTAEVLELA----GNAS--KDL-K-VKRITPRHLQLAIRGD +EELDSLI--K--ATI-----------------AG---------G---------------- +EK-----------RRCS------------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Homo|NP_958925.1|H2A.Z.2.s2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +------------------------------------------------------------ +------MAGGKAGK---------------------DSGK--------------------- +---------A---KA---------------KAV---SRSQRAGLQ--------------- +-----------------------VLELA----GNAS--KDL-K-VKRITPRHLQLAIRGD +EELDSLI--K--ATI-----------------AG---------GGVIPHIHKSL--IGKK +GQ-----------QKTA------------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Ixodes|XP_002403551.1|macroH2A organism=Ixodes scapularis phylum=Arthropoda class=Arachnida +------------------------------------------------------------ +-------MSARGG----------------------------------------------- +---------K--KRA---------------KVV---SKSTKAGVLFPVGRMRRYLKKGTH +HF-RIGAGAPVYMAAVIEYLSGEAISCQ----DSSP--RDP-L-LSALTKRKCCSGRPN- +LFFLQLL--K-GVTI-----------------AS---------GGVLPRILPEL--LARR +KGGRFKTVALAKKPVAAAIAKEKAVPPKEKAKLAKGKVCRKSSHCHVPLGDRGIILSLWP +TAPQGEHFTLVKHKTGLSLSVQLTVIQGDMASV---TADAAIHPTNASLSLSGEVGQVLE +KAGGKEFVQEVKDLFSAHGPLESAGAVICPGHQFPAKFVIHCNVPSGSSEP------LEK +CVRNCLALADEKNIRVLAVPPLATHSVASQKQQAAQTILKAISNYFVNVMSSSLKQIYFV +LSDMESIGIYTSELAKLDS-- +>Danio|NP_001020673.1|macroH2A organism=Danio rerio phylum=Chordata class=Actinopteri +------------------------------------------------------------ +-------MSARGG----------------------------------------------- +---------K--KKI---------------TKL---SRSARAGVIFPVGRMMRYLRTGTH +KY-RIGMGAPVYMAAVIEYLAAEILELA----GNAA--RDN-K-KGRITPRHIKLAVAND +EELNQLL--R-GVTI-----------------SN---------GGVLPRIHPEL--LSKK +RGGKVKVESQVAVPEKSAKRKPV----KKPYKKSKGKPGRKPKKSTENDKEADANA---M +EDGPGEGFTILSAKS-LFLGQKLSLTESEISKIGTIKVEGIINPTNAEIDLKEGIGNALE +KTGGKDFLETVKELRKSQGPLEVASVAVSQANGMAARFIIHCHVPQWGSDK--CEDQLEK +TVKNCLSAAEEKKLKSVAFPSLPAGRNGFPKQTAAQLILKAISNHFVSATTSSLKNIYFV +LFDSESIGIYLQEMAKMDAK- +>Gallus|AAC28846.1|macroH2A organism=Gallus gallus phylum=Chordata class=Aves +------------------------------------------------------------ +-------MSSRGG----------------------------------------------- +---------K--KKS---------------TKT---SRSAKAGVIFPVGRMLRYIKKGHP +KY-RIGVGAPVYMAAVLEYLTAEILELA----GNAA--RDN-K-KGRVTPRHILLAVAND +EELNQLL--K-GVTI-----------------AS---------GGVLPNIHPEL--LAKK +RGSKGKLEAIITPPPAKKAKSPS--QKKTVSKKTGGKKGARKSKKKQGEVSKSASADSTT +EGTPADGFTVLSTKS-LFLGQKLQVVQADIATI---DSDAVVHPTNSDFYTGGEVGSTLE +KKGGKEFVEAVIELRKKNGPLDIAGAVVSAGHGLPAKFVIHCNSPGWGSDK--CEELLEK +TVKNCLALADEKKLKSIAFPSIGSGRNGFPKQTAAQLILKAISSYFVSTMSSSIKTVYFV +LFDSESIGIYVQEMAKLDAN- +>Gallus|NP_001264267.1|macroH2A organism=Gallus gallus phylum=Chordata class=Aves +------------------------------------------------------------ +-------MSGRSG----------------------------------------------- +---------K--KKM---------------SKL---SRSSRAGVIFPVGRMMRYLKKGTY +KY-RIGVGAPVYMAAVIEYLAAEILELA----GNAA--RDN-K-KGRIAPRHILLAVAND +EELNQLL--K-GVTI-----------------AS---------GGVLPRIQPEL--LAKK +RGAKGKSETILSPAPEKKGRKSM--VSKKSGKKAKSNKARTPKKNKQKDSEKEGASNSTS +EDGPGDGFTILSSKS-LVPGQKLSLTQSDISHIGSMKVEGIVHPTTAEIDLKEEIGKALE +KAGGKEFLETVKELRKSQGPLEVAEAALTQSSGLAAKFVIHCHIPQWGSDK--CEEQLEE +TIKNCLTAAEDKKLKSVAFPPFPSGRNCFPKQTAAQVTLRAISTHFDGTSSSSLKNIYFL +LFDSESIGIYVQEMAKLDTK- +>Gallus|NP_990338.1|macroH2A organism=Gallus gallus phylum=Chordata class=Aves +------------------------------------------------------------ +-------MSSRGG----------------------------------------------- +---------K--KKS---------------TKT---SRSAKAGVIFPVGRMLRYIKKGHP +KY-RIGVGAPVYMAAVLEYLTAEILELA----GNAA--RDN-K-KGRVTPRHILLAVAND +EELNQLL--K-GVTI-----------------AS---------GGVLPNIHPEL--LAKK +RGSKGKLEAIITPPPAKKAKSPS--QKKTVSKKTGGKKGARKSKKKQGEVSKSASADSTT +EGTPADGFTVLSTKS-LFLGQKLNLIHSEISNLAGFEVEAIINPTNADIDLKDDLGSTLE +KKGGKEFVEAVIELRKKNGPLDIAGAVVSAGHGLPAKFVIHCNSPGWGSDK--CEELLEK +TVKNCLALADEKKLKSIAFPSIGSGRNGFPKQTAAQLILKAISSYFVSTMSSSIKTVYFV +LFDSESIGIYVQEMAKLDAN- +>Nematostella|XP_001637578.1|macroH2A organism=Nematostella vectensis phylum=Cnidaria class=Anthozoa +------------------------------------------------------------ +-------MSARGG----------------------------------------------- +---------KAAKRA---------------KAV---SRSAKAGLQFPVSRVHRYLRKCTH +HY-RISAAAPVYQAAVMEYLTAEILELA----GNAA--RDN-K-KTRIIPRHILLAVAND +EELHKLL--K-GVTI-----------------AS---------GGVLPNIHPEL--LKKR +KGGK------LVSPEELKSKKPKPAPPPSPKKPVSSKKGRGKADK--------------- +--GPGDGFSVLSEKT-LFLGQKLTVVQGDIAAI---DADAVVLPTNAKFKLEGEVGEALK +KAGGKEFKDEIKKLSEDNGDLALLDAAICDGHNFPAAYVISLHSPVYSEDSTTASDDLEK +AVKNVLTIADEKNLKILAIPSIGTGSNKYPKDLAAQVTLKAISNYFVSAMASSLKQIYFV +LSDPENIGMYTMELARLDS-- +>Trichoplax|XP_002111582.1|macroH2A organism=Trichoplax adhaerens phylum=Placozoa class=Uniplacotomia +------------------------------------------------------------ +-------MSGRGG----------------------------------------------- +---------K--ARK---------------KPS---SRSARAGLQFPVGRMHRRLKSSTH +HL-RIGSGAPVYLAACIEYLTAEILELA----GNAA--RDN-K-KLRIIPRHIQLAIGND +EELHKLL--S-DVTI-----------------AS---------GGVLPHVHTEL--LSKK +AKGGGASVAAAAAPKKSKVRVSR-VGKSTPAKSNFSKKSGSSTKAFKN------------ +-----SEVTILSEKQ-LFLGQKLIVTKGDITKI---STDGIVHPTSSNFSHAGMIGGALS +SAGGKQYMDGVAKVEQETGSLPVAGVTGSPAANLSAQEVIHVHSPSWGSTD--CQGNLEK +AVRNILDYADKKGMKSVAIPSIGSGSNNFPKLTAAQIILRSIAKYFVGVMSSSLKEVYFV +LWDEESINIYTSELNKLDVSG +>Homo|NP_613075.1|macroH2A.1.s1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSSRGG----------------------------------------------- +---------K--KKS---------------TKT---SRSAKAGVIFPVGRMLRYIKKGHP +KY-RIGVGAPVYMAAVLEYLTAEILELA----GNAA--RDN-K-KGRVTPRHILLAVAND +EELNQLL--K-GVTI-----------------AS---------GGVLPNIHPEL--LAKK +RGSKGKLEAIITPPPAKKAKSPS--QKKPVSKKAGGKKGARKSKKKQGEVSKAASADSTT +EGTPADGFTVLSTKS-LFLGQKLQVVQADIASI---DSDAVVHPTNTDFYIGGEVGNTLE +KKGGKEFVEAVLELRKKNGPLEVAGAAVSAGHGLPAKFVIHCNSPVWGADK--CEELLEK +TVKNCLALADDKKLKSIAFPSIGSGRNGFPKQTAAQLILKAISSYFVSTMSSSIKTVYFV +LFDSESIGIYVQEMAKLDAN- +>Homo|NP_001035248.1|macroH2A.1.s2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSSRGG----------------------------------------------- +---------K--KKS---------------TKT---SRSAKAGVIFPVGRMLRYIKKGHP +KY-RIGVGAPVYMAAVLEYLTAEILELA----GNAA--RDN-K-KGRVTPRHILLAVAND +EELNQLL--K-GVTI-----------------AS---------GGVLPNIHPEL--LAKK +RGSKGKLEAIITPPPAKKAKSPS--QKKPVSKKAGGKKGARKS-KKQGEVSKAASADSTT +EGTPADGFTVLSTKS-LFLGQKLNLIHSEISNLAGFEVEAIINPTNADIDLKDDLGNTLE +KKGGKEFVEAVLELRKKNGPLEVAGAAVSAGHGLPAKFVIHCNSPVWGADK--CEELLEK +TVKNCLALADDKKLKSIAFPSIGSGRNGFPKQTAAQLILKAISSYFVSTMSSSIKTVYFV +LFDSESIGIYVQEMAKLDAN- +>Homo|NP_613258.2|macroH2A.1.s2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSSRGG----------------------------------------------- +---------K--KKS---------------TKT---SRSAKAGVIFPVGRMLRYIKKGHP +KY-RIGVGAPVYMAAVLEYLTAEILELA----GNAA--RDN-K-KGRVTPRHILLAVAND +EELNQLL--K-GVTI-----------------AS---------GGVLPNIHPEL--LAKK +RGSKGKLEAIITPPPAKKAKSPS--QKKPVSKKAGGKKGARKSKKKQGEVSKAASADSTT +EGTPADGFTVLSTKS-LFLGQKLNLIHSEISNLAGFEVEAIINPTNADIDLKDDLGNTLE +KKGGKEFVEAVLELRKKNGPLEVAGAAVSAGHGLPAKFVIHCNSPVWGADK--CEELLEK +TVKNCLALADDKKLKSIAFPSIGSGRNGFPKQTAAQLILKAISSYFVSTMSSSIKTVYFV +LFDSESIGIYVQEMAKLDAN- +>Homo|NP_061119.1|macroH2A.2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSGRSG----------------------------------------------- +---------K--KKM---------------SKL---SRSARAGVIFPVGRLMRYLKKGTF +KY-RISVGAPVYMAAVIEYLAAEILELA----GNAA--RDN-K-KARIAPRHILLAVAND +EELNQLL--K-GVTI-----------------AS---------GGVLPRIHPEL--LAKK +RGTKGKSETILSPPPEKRGRKAT--SGKKGGKKSKAAKPRTSKKSKPKDSDKEGTSNSTS +EDGPGDGFTILSSKS-LVLGQKLSLTQSDISHIGSMRVEGIVHPTTAEIDLKEDIGKALE +KAGGKEFLETVKELRKSQGPLEVAEAAVSQSSGLAAKFVIHCHIPQWGSDK--CEEQLEE +TIKNCLSAAEDKKLKSVAFPPFPSGRNCFPKQTAAQVTLKAISAHFDDSSASSLKNVYFL +LFDSESIGIYVQEMAKLDAK- +>Ailuropoda|XP_011215272.1|H2A.B organism=Ailuropoda melanoleuca phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MPGDR-------------------------SR--------------------- +---------RGSSSG--QR-----------RTR---SRTARAELSFSVSHVERLLREGHY +AQ-RLGSSAPVFLAAVIQYLTAKVLELA----GNEA--QNS-G-GRRITPQLVDMAVHNH +ALLSGFF--G-TTTI-----------------SQ---------VAPAWN----------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Bos|NP_001069373.1|H2A.B organism=Bos taurus phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MPKKR-------------------------GH--------------------- +---------QRSSGI-----------------R---SRTAQSELSFSVSHMEHLLRKGHY +AQ-RLSSSAPVFLAAVIQDLTSKVLELA----GNEA--QKN-G-EKRITPKLVDMAIHNN +ALLSSIF--G-MTTI-----------------SL---------VAPGPH----------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Callithrix|XP_002763866.2|H2A.B organism=Callithrix jacchus phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSERR-------------------------SR--------------------- +---------RGSSAAGRRG-----------HTR---SRTARAELIFSVSKMERGLWEGHY +AQ-RLSDNAPVYLAAVIQYLTAKILELA----AKGA--DNR-G-ERIITPRLLDMAVHND +GLLSTLF--H-AITI-----------------SQ---------VGPGPN----------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Cricetulus|XP_003514308.1|H2A.B organism=Cricetulus griseus phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MPRTRQ-----------------------SSR--------------------- +---------RGSSSR-----------------R---SRTDRAELTFSVSLVEHHLRESGH +AR-RLSETVPILVTAILEFLTRRLLELA----SNEA--QRL-GAQRLITPEILDLTIYNN +ALLSEMF--Q-FTTI-----------------SQ---------TAPAGPRRRRRQI---- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Cricetulus|XP_003515491.1|H2A.B organism=Cricetulus griseus phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MPRTRQ-----------------------SSL--------------------- +---------RGSSSR-----------------R---SRTDRAELTFSVSLVEHHLRESGH +AP-RLSETVPILLTAILEFLTRRLLELA----SNEA--QRL-GAQRLITPEILDLTVYNN +TLLSQLL--Q-FTTI-----------------SQ---------TAPAGRRRRRRQT---- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Heterocephalus|EHB05905.1|H2A.B organism=Heterocephalus glaber phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MPRQA------------------------------------------------ +---------------LALT-----------NER---PPQGRAEPIFSVSQVERALCDGRY +AQ-RLSCSASVFLAATLQFLSATVLELA----DREA--RYR-S-RRRITRELLDVATLKD +ALLCTLL--G-TTTI-----------------SR---------VAPARP----------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Loxodonta|XP_003421752.1|H2A.B organism=Loxodonta africana phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MAGKR-------------------------SR--------------------- +---------RGGGGGGGGGGGGGGGGGSSRRQR---RTRSRTELIFSASHVAHLLREGHY +AQ-RLSSSAPVFLAAILKCLTAKILELA----GNEA--QNS-G-RRLVTPELVDMAVHNN +ALLSGFF--L-TTTI-----------------SQ---------VAPAR------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Macaca|NP_001180843.1|H2A.B organism=Macaca mulatta phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSERR-------------------------SH--------------------- +---------RRSSRAGGRG-----------RTR---SRTVRAELSFSVSQVERGLREGHY +AQ-RLSPTAPVYLAAVIEYLTAKVLELA----GNEA--QNN-G-ERNITPLLLDMAVHNN +RLLSTLF--D-TTTI-----------------SQ---------VAPGGD----------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Nomascus|XP_003282204.1|H2A.B organism=Nomascus leucogenys phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MPRRR-------------------------SH--------------------- +---------RGSSGAGGRG-----------RTC---SRTVRAELSFSVSQVERGLREGHY +AQ-RLSRTAPVYLAAVIEYLTAKVLELA----GNEA--QNN-G-ERNITPLLLDMVVHNN +RLLSTLF--H-TTTI-----------------SR---------VAPGGD----------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Pan|XP_001145032.1|H2A.B organism=Pan troglodytes phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MPRRR-------------------------RH--------------------- +---------RGSSGAGGRG-----------RTC---SRTVRAELSFSVSQVERSLREGQY +AQ-RLSRTAPVYLAAVIEYLTAKVLELA----GNEA--QNS-G-ARNITPLLLDMVVHND +RLLSTLF--N-TTTI-----------------SQ---------VAPGED----------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Sus|XP_003135571.1|H2A.B organism=Sus scrofa phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MPGKR-------------------------SR--------------------- +---------RKSPGR--QG-----------RTC---ARTTRAGLSASVSHMERLLREGPY +AQ-CLSSSARVFLAATIEYLTARVLELA----GDEA--QIV-G-RRCITPELVAMAVHNN +ALLSAFF--G-TLAI-----------------SQ---------VAPTQE----------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Homo|NP_001017990.1|H2A.B.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MPRRR-------------------------RR--------------------- +---------RGSSGAGGRG-----------RTC---SRTVRAELSFSVSQVERSLREGHY +AQ-RLSRTAPVYLAAVIEYLTAKVPELA----GNEA--QNS-G-ERNITPLLLDMVVHND +RLLSTLF--N-TTTI-----------------SQ---------VAPGED----------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Homo|NP_001017991.1|H2A.B.2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MPRRR-------------------------RR--------------------- +---------RGSSGAGGRG-----------RTC---SRTVRAELSFSVSQVERSLREGHY +AQ-RLSRTAPVYLAAVIEYLTAKVLELA----GNEA--QNS-G-ERNITPLLLDMVVHND +RLLSTLF--N-TTTI-----------------SQ---------VAPGED----------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Homo|NP_542451.1|H2A.B.2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MPRRR-------------------------RR--------------------- +---------RGSSGAGGRG-----------RTC---SRTVRAELSFSVSQVERSLREGHY +AQ-RLSRTAPVYLAAVIEYLTAKVLELA----GNEA--QNS-G-ERNITPLLLDMVVHND +RLLSTLF--N-TTTI-----------------SQ---------VAPGED----------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Mus|NP_001268460.1|H2A.B.1_(Mus_musculus) organism=Mus musculus phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MPRNRE-----------------------NCL--------------------- +---------RESSGRR--------------HRR---SRTSRAELIFAVSLVEQHLREVSR +AR-RLSDTVPIFLAAILESLTRRLLELA----GNEA--QRR-GTERRITPELLDLAVYSN +MELSDVF--Q-FITI-----------------SQ---------VAPAHR----------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Mus|NP_001268459.1|H2A.B.2_(Mus_musculus) organism=Mus musculus phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MPRNTE-----------------------NCL--------------------- +---------QRSSGHR--------------QHH---SRTSRGELIFAVSLVEQHLREVSR +AR-RLSDMVPVSLVAILEFLTSRLLELA----GNEA--QRR-GTQRLITPQPLDLEVYSS +MELSDVF--Q-FITI-----------------SQ---------VAPAHR----------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Mus|NP_001096135.1|H2A.B.3_(Mus_musculus) organism=Mus musculus phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MPRNRE-----------------------NCL--------------------- +---------RESSGRR--------------HRR---SRTSRAELIFAVSLVEQHLREISR +AW-RLSDMVPIFLAAILESLTRRLLELA----GNEA--QRR-GTERRITPELLDLAVYSN +MELSDVF--Q-FITI-----------------SQ---------VAPAHR----------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Ailuropoda|XP_002927211.2|H2A.L organism=Ailuropoda melanoleuca phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSGRRS-----------------------PWH--------------------- +---------SHQHKR---------------HGL---SRSMRAELQFPVSRVDRLLREGCY +AQ-RLSSSTPVFLTGVLEYLTANILELA----GQEA--RNH-H-KMRITPEHVQRALVNN +QHLSCLF--E-DITS-----------------PP---------AKGTPQLRKC------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Bos|NP_001071426.1|H2A.L organism=Bos taurus phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSPGRH-----------------------LWN--------------------- +---------CRRSRR---------------RSL---SRSTRAELQFPVSRVDRLLREGQF +AN-RLSSATPVFLTGILEYLIANILDLA----GKEA--CTN-H-RVRISPEHVQTALVNN +ENLRCLF--Q-PGAF-----------------SQ---------PAASPPAPEN------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Bos|XP_875023.2|H2A.L organism=Bos taurus phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSPRRH-----------------------LWN--------------------- +---------CRRSRR---------------HSL---SRSTRAELQFPVSRVDRLLREGQG +AY-RLSSATPVFLTAVLEYLIANILDLA----GKEA--CTN-H-RVRISPEHVQTALINN +ENLRRLF--Q-PGAF-----------------SQ---------PTASPHLPEN------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Canis|XP_548938.1|H2A.L organism=Canis lupus familiaris phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSGERG-----------------------PGH--------------------- +---------SRRPRR---------------HGL---SRSRRAELQFPVSRVDRLLREGHY +AH-RLSSSTPVFLAGILEYLTSNILELA----GQEA--RNS-H-KMRITPEHLQKALGNN +QYLSQLF--E-ENTY-----------------SQ---------GDGMVQARKWSGPGTGA +DSRI-------------------------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Canis|XP_548947.1|H2A.L organism=Canis lupus familiaris phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSGERG-----------------------PGH--------------------- +---------SRRPRR---------------HGL---SRSRRAELQFPVSRVDRLLREGCY +AH-RLSSSTPVFLTGILEYLTSNILELV----GQEA--CNS-H-KMRITPEHMQKALGNN +QYLSQLF--E-ENTY-----------------SQ---------GDGMVQARKWSGPGTGA +DSRI-------------------------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Cavia|XP_003469395.1|H2A.L organism=Cavia porcellus phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MAAKKC-----------------------KGI--------------------- +---------SSKPRK---------------HPV---SRSTRAQLQFPVSRVERYLRENGY +L--RLSACTPVFLAGILEYLTASALHLA----ARVA--HRR-H-KKRISPEHLARALEKS +EQLRQVF--G-DSTK-----------------AL---------LDEIIQAKKK------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Cavia|XP_003469399.1|H2A.L organism=Cavia porcellus phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MDGKKS-----------------------NEK--------------------- +---------SSQLRK---------------HPV---SCSRRAELQLPVSRMERYLRENSY +AP-HLPFSTPVFLEGVLEYLTASILDLA----RKEA--RGK-R-KKHILPQHLETAAENN +QQLGLRF--G-DSRK-----------------SM---------LDEMTQNKKK------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Cricetulus|XP_003508207.1|H2A.L organism=Cricetulus griseus phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MEEKK------------------------------------------------ +-------------RK---------------ETI---SRITRGQLQFSLDRIERFFRDGNF +SQ-RLSASAPVFLAGVLEFLTSNILDLA----GREA--HAN-G-TRLITPEHVTQVVQNN +DQLREVF--K-EHED-----------------PV---------VSETPEPEKN------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Heterocephalus|EHB04253.1|H2A.L organism=Heterocephalus glaber phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MAGKKR-----------------------RQN--------------------- +---------SSKPRK---------------QAV---SRSTRAELQFPVSRVEHYLREGGY +AH-RLASSTPVFLAGVLEYLRANILDLA----GKEA--QGK-R-KKCITPQHLETAMENN +QHLRPLF--Q-DDPK-----------------SL---------LDETSQPNPRRSDEAWV +PEHGKTFSTSSSSPQNTHR----------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Heterocephalus|EHB10563.1|H2A.L organism=Heterocephalus glaber phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MAGKKR-----------------------RQN--------------------- +---------SSKPRK---------------QAV---SRSTRAELQFPVSRVEHYLREGGY +AH-RLGSSTPVFLAGVLKYLRANILDLA----GKEA--QGK-R-KKCIAPQHLETAMENN +QHLRPLF--Q-DGPK-----------------SL---------LDETSQPKKK------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Heterocephalus|EHB17227.1|H2A.L organism=Heterocephalus glaber phylum=Chordata class=Mammalia +MDIHRQRPNQSPVDTNDITKPRLSQRQLLTSEKHRETSGWAQALLFGEAQARREIKLGQL +KPNTASIMAGKKR-----------------------RQN--------------------- +---------SSKPRK---------------QAV---SRSTRAELQFPVSRVERYLREGGY +AQ-RLGSSTPVFLAGVLEYLTANILDLA----GKEA--EGN-C-KKRITPQHLETVMENN +QQLRALF--Q-GDTK-----------------SL---------LDETSQLKKKC------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Oryctolagus|XP_002719866.1|H2A.L organism=Oryctolagus cuniculus phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MRGKKP-----------------------SKK--------------------- +---------AGKRRK---------------PNV---SRSTRAELQFPVSRVDRHLHHDRY +AQ-RLSSSTPVFLAGVLEYLTSNILELA----GEEA--HKN-S-RIRITPEHMRKAIESS +EHLRDLL--E-EDPK-----------------PR---------DEDVAQPEEKE------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Oryctolagus|XP_002720052.1|H2A.L organism=Oryctolagus cuniculus phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MRGKKP-----------------------SKK--------------------- +---------AGKRRK---------------PNV---SRSTRAELQFPVSRVDRHLHHDRY +AQ-RLSSSTPVFLAGVLEYLTSNILELA----GEEA--HKN-S-RVRITPEHMRKAIESS +EHLRDLL--E-EDPK-----------------PR---------DEDVAQPEEKE------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Sus|XP_003360303.2|H2A.L organism=Sus scrofa phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSRKRN-----------------------LPQ--------------------- +---------CNRRKK---------------HAL---SRSSRAELQFPVSRVDRYLREGRY +AQ-RLSSQAPVFLAGVLEYLTANILELA----ASEA--RSN-N-KMRIAPEHVQRAASHN +QTLSSLF--Q-ASSV-----------------SRGAEE-----GAEEPLPEAGR------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Sus|XP_003484147.1|H2A.L organism=Sus scrofa phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSRKRN-----------------------LPQ--------------------- +---------CSHRKK---------------HAL---SCSSRAELQFPMSSLDCVLPEGQY +AQ-RLSSYTPVFLAGVLEHLMAHILELA----AREA--RSS-R-KVRITPEHVQRALNNN +ETLSRLF--Q-ASSV-----------------SRGAEEGAEEPGAEEPLPEAGQ------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Homo|HISTDB_H2A_L_0|H2A.L.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MAGNKH-----------------------SRS--------------------- +---------SCKPRR---------------QCL---SRSRRAELQFPVSHMERCLREGQY +AR-HLSSTTPVFLAAVLEYLTANILEQA----GKEA--QNS-H-RVCITPEHLKRALQKN +EQLRWILEEE-DDIH-----------------SQ---------EEEMPQPEEEEEEDERM +EEEEEEKKEEEEKEEEEDERMEEEEEEKKEEEEKKEEKEKEEEKEKKKKKGGFLSFRAVQ +DFISNLFQLLKFP----------------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Homo|HISTDB_H2A_L_1|H2A.L.3_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MAGNK--------------------------M--------------------- +---------FCRPRR---------------QRL---SHSRRAELQFPVSHLERCLRESQH +AR-HLSSTTPVFLAGVLEYLTANILEKV----GKEV--KNS-C-RLCITPEHVKRALQKD +EQLRWILELE-DDTH-----------------SQ---------VEEMPQSEEEEE----- +--------EEEEKEEEMVVLVVMGGRRRR--------------RRRRRRKDS-------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Mus|NP_083864.1|H2A.L.1_(Mus_musculus) organism=Mus musculus phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MAKKM------------------------------------------------ +----------QRRRR---------------QKR---TRSQRGEL--PLSLVDRFLREEFH +SS-RLSSSALSFLTSVLEYLTSNILELA----GEVA--HTT-G-RKRVTPEDVRLVVQNN +EQLRQLF--K----------PGG---------TS---------VNE----DDN------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Mus|NP_080903.1|H2A.L.2_(Mus_musculus) organism=Mus musculus phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MARKR------------------------------------------------ +----------QRRRR---------------RKV---TRSQRAELQFPVSRVDRFLREGNY +SR-RLSSSAPVFLAGVLEYLTSNILELA----GEVA--HTT-G-RKRIAPEHVCRVVQNN +EQLHQLF--K----------QGG---------TS---------VFEPPEPDDN------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Mus|Q9D4U4|H2A.L.3_(Mus_musculus) organism=Mus musculus phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------ME--------------------------------------------------- +----------DKRQK---------------DSV---APSSGAKLQFPVSEAEHLLQERNL +SK-CLNSSTPVLFTDMLNYVTSSILELTVK--DRDS--HTS-C-NKLIAPEQKSKPTDNI +DELCQLF--K-DSQYMADETPGCYKTPRSNKITG---------LYEAPRPGPK------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Ailuropoda|XP_002913536.1|H2A.P organism=Ailuropoda melanoleuca phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSGKKS-----------------------HES--------------------- +---------SY-------------------QTQ---AHLITTELQVPVSYVDRLLQENQY +NH-PLSSSTTDFLLTMLDYLTDYILDVV----GTEA---NN-S-NMPTAPQDVERAVDSS +GEPYHRS--K-DTAF-----------------TL---------FDEMPGSRRNG------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Canis|XP_005641297.1|H2A.P organism=Canis lupus familiaris phylum=Chordata class=Mammalia +-----------------------------------------------------------M +EPNPANIMSGNKN-----------------------HES--------------------- +---------SN-------------------QTQ---AHLVTTELQFPVSYVDRLLQEDQR +TH-CLSSTSTEFLLAMLDSLTDYILERV----GTEA---NN-N-NMQTAPQDVERAVGSN +REPQQCL--K-DTAF-----------------TL---------FDEMPRSRRNG------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Cricetulus|XP_003508203.1|H2A.P organism=Cricetulus griseus phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSGKKN-----------------------QGK--------------------- +---------SCSDNK---------------KME---DPSSKPEVQIPVNYVYHLLQEEQY +TP-CLGSTTSDFLLAMLDYITDYILEVV----GSEA---NI-N-SQQDIPQDRERQGDND +HDHSHAF--K-NAPF-----------------SL---------FDEMPGPRRNG------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Heterocephalus|EHB10562.1|H2A.P organism=Heterocephalus glaber phylum=Chordata class=Mammalia +------------------------------------------------------------ +------MMCEQKS-----------------------QYG--------------------- +---------SYKDNN---------------QQE---DPASRPEQQLPVSDIYCILHEE-Y +NP-YF-SSTSDLLLAMLESLTDYILTLV----GSEG---NN-V-GMPTNPQDGEREMDNN +HEHPPII--P-DVSF-----------------SF---------SDEMPGSRKKG------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Macaca|EHH30639.1|H2A.P organism=Macaca mulatta phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSEKNN-----------------------RKN--------------------- +---------SSANNN---------------QIQ---DR-SRNELRVPMSFVDRVVQDEQD +AQ-SQSSSTINILLTLLDCLADYIMEQV----GLEA--INN-G-RMRNTSQDGEREGDNH +HEPHRTE--S-DGTR-----------------FV---------FDEMPKSGKND------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Pan|XP_003317470.2|H2A.P organism=Pan troglodytes phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSEKKN-----------------------CKN--------------------- +---------SSTNNN---------------QTQ---DP-SRNELQVPMSFVDRVVQDEQD +VQ-SQSSSTINTLLTLLDCLADYIMERV----GLEA--SNN-G-SMRNTSQDREREVDNN +REPHSAE--S-DVTR-----------------FL---------FDEMPKSRKND------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Rattus|NP_001128070.1|H2A.P organism=Rattus norvegicus phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSGKKS-----------------------QEK--------------------- +---------ACSDNK---------------QTE---DPSSRPEVQVPVNYVYRLLQEEQY +TP-CLGSTTSDFLLAMLDYLTDYILEVV----GSEA---NI-N-SQQNISQDRERQREND +REPPQAF--K-NAPF-----------------SL---------FDEMPGPRRNG------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Sus|XP_003135058.1|H2A.P organism=Sus scrofa phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSGKQS-----------------------PEG--------------------- +---------SY-------------------EAP---THLTTTEPQVPVSFVDHLLQEDQY +VH-TLSSSTTHFLFSVLEYLTDYILDLV----DTKA---NT-G-RMQMTPQDVERAVDSN +AEPHRQV--K-DTAF-----------------AL---------FDEMPGSRRNG------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Homo|NP_036406.1|H2A.P_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSEKKN-----------------------CKN--------------------- +---------SSTNNN---------------QTQ---DP-SRNELQVPRSFVDRVVQDERD +VQ-SQSSSTINTLLTLLDCLADYIMERV----GLEA--SNN-G-SMRNTSQDREREVDNN +REPHSAE--S-DVTR-----------------FL---------FDEMPKSRKND------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Mus|NP_080372.1|H2A.P_(Mus_musculus) organism=Mus musculus phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSEKKS-----------------------QEK--------------------- +---------PCSDNN---------------QIE---DPSSRPEVQVPVNYVYRILQEEQY +TP-CIGSTTSDFLLAMLDYLTDYILEVV----GSEA---NI-N-NQQNISQDRERQRDND +REPSRGF--K-NAPF-----------------SL---------FDEMPGPRRNG------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Ammotragus|HISTDB_H2A_Q_2|H2A.Q organism=Ammotragus lervia phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSGNRS-----------------------YHS--------------------- +---------SDGFRK---------------HIL---PCTMKTKLQFSKSHVDHHLQENQD +AQ-NQNLTNQIYLSAILKYLSSNALKLV----SNET--QSD-C-RIH-------KAMDNH +PQLHHAF--D-KDAK-----------------SQ---------VHEM------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Bison|HISTDB_H2A_Q_3|H2A.Q organism=Bison bison bison phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSGNRS-----------------------YHS--------------------- +---------SDGLRK---------------RTV---PCTMKTKLQFSKSHVDHHLQGNQD +AH-NQNLTTQIYLSAILKNVSSNVLKLV----SDET--QSN-C-KIH-------RAKHNH +PQLQHVF--D-KDAK-----------------SQ---------VHEMF------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Bos|HISTDB_H2A_Q_14|H2A.Q organism=Bos mutus phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSGNRS-----------------------YHS--------------------- +---------SDGLRK---------------RTV---PCTMKTKLQFSKSHVDHHLQGNQD +AH-NQNLTTQIYLSAILKNVSSNVLKLV----SDET--QSN-C-KIH-------RAKHNH +PQLQHVF--D-KDAK-----------------SQ---------VHEMF------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Bos|HISTDB_H2A_Q_16|H2A.Q organism=Bos taurus phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSGNRS-----------------------YHS--------------------- +---------SDGLKK---------------RTV---PCTMKTKLQFSKSHVDHHLQGNQD +AH-NQNLTTQIYLSAILKNVSSNVLKLV----SDET--QSN-C-KIH-------RAKHNH +PQLQHVF--D-KDAK-----------------SQ---------VHEMF------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Camelus|XP_010950849.1|H2A.Q organism=Camelus bactrianus phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSGNRS-----------------------CQS--------------------- +---------SCWLRK---------------QTF---SCSTKNKPLFPKSHADHLLQENHF +AQ-QLNLPTQVFLSAILKYVTSNVLEVV----GNKT--HSN-C-RIQ-------KAVDND +LQLSHLF--E-EDTN-----------------SQ---------ARETF------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Camelus|XP_010995375.1|H2A.Q organism=Camelus dromedarius phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSGNRS-----------------------CQS--------------------- +---------SCWLRK---------------QTF---SCSTKNKPLFPKSHADHLLQENHF +AQ-QLNLPTQVFLSAILKYVTSNVLEVV----GNKT--HSN-C-RIQ-------KAVDND +LQLSHLF--E-EDTN-----------------SQ---------ARETF------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Canis|XP_013966888.1|H2A.Q organism=Canis lupus familiaris phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSGKRS-----------------------SQN--------------------- +---------SCRLGK---------------QTL---SSSTKTKLKFSVSHEDHLLQENHP +AQ-HLRFSSQVCLSAILKYVATNILELV----GNEA--HND-C-RVQ-------RAVNNN +MQSSHLF--E-DDTT-----------------SQ---------VSEMF------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Ceratotherium|HISTDB_H2A_Q_13|H2A.Q organism=Ceratotherium simum phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSEKRS-----------------------CQN--------------------- +---------SSRLKK---------------QTF---SCSTKAKLHFPVSHMDRHQQENHS +AQ-QLSLSTPVFLSAIRKYVTNNILELV----GNES--HNN-R-RIR-------RAVDNA +EQLSHLF--E-DDNQ-----------------FS---------G---------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Cercocebus|HISTDB_H2A_Q_11|H2A.Q organism=Cercocebus atys phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MFWKRR-----------------------QRR--------------------- +---------SYRCRN---------------QTF---SYSIKAKQQFPLSCVHCLLWKNHC +PR-PE-------LVHYFQILGGNILELM----GNKV--HKN-Y-RMHITPKYVERAVDNN +SLISHLF--E-GDTN--------------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Cercopithecus|HISTDB_H2A_Q_0|H2A.Q organism=Cercopithecus aethiops phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MFWKRR-----------------------QRR--------------------- +---------SYRCRN---------------QTF---SYSIKAKQQFPSSCVHCLLWKNHC +PR-PE-------LVHYFQILGGNILELM----GNKV--HKT-Y-RMHITPKYVERAVDNN +SLISHLF--E-GDTN--------------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Equus|HISTDB_H2A_Q_17|H2A.Q organism=Equus caballus phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSGKRS-----------------------CQN--------------------- +----------YKLMK---------------QTF---SCSAKTKLQFPVSHVDCLQQENHS +AQ-HLSLSTQVFLPAILKYVTNNILEWV----GNEA--HNS-C-RIR-------KAVANN +QQLSHLF--E-DDTD-----------------SQ---------VNEMF------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Giraffa|HISTDB_H2A_Q_7|H2A.Q organism=Giraffa camelopardalis tippelskirchi phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSGNRS-----------------------YHS--------------------- +---------SDGLKK---------------RTL---PCTMKTKLQFSKSHVDHLLQENQA +AQ-NQNLTTQIFLSAILRHMSSNILKLM----SNET--QSN-C-RIH-------RAMDNP +TQLHHVF--D-NDAK-----------------SQ---------VHEMF------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Mandrillus|HISTDB_H2A_Q_5|H2A.Q organism=Mandrillus leucophaeus phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MFWKRR-----------------------QRR--------------------- +---------SYRCRN---------------QTF---SYSIKAKQQFPLSCVHCLLWKNHC +PR-PE-------LVHYFQILGGNILELM----GNKV--HKN-Y-RMHITPKYVERAVDNN +SLISHLF--E-GDTN--------------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Mustela|HISTDB_H2A_Q_6|H2A.Q organism=Mustela putorius furo phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSGKRS-----------------------SQT--------------------- +---------SYRLGK---------------QTS---SCSTKTKLQVPVSHVEPILQEHQP +VQ-NLSFSSQVCLSAILKYVATNILELV----GNEA--QHN-C-RVQ-------RAMNNN +MQNSHLF--E-DDTT-----------------SQ---------L---------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Nasalis|HISTDB_H2A_Q_21|H2A.Q organism=Nasalis larvatus phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MFWKRR-----------------------QRR--------------------- +---------SYRCRN---------------QTF---SYSIKAKQQFPLSCVHCLLWKNHC +PR-PE-------LVHYFQILGGNILELM----GNKV--HKN-Y-RMHITPKYVERAVDNN +SLISHLF--E-GDTN-----------------SG-------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Nomascus|HISTDB_H2A_Q_22|H2A.Q organism=Nomascus leucogenys phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MFWKRR-----------------------QRS--------------------- +---------SYRCRN---------------QTF---SYSIRAKQQFPLSCVHCLLWKNHC +PR-PE-------LVHYFQILGGNILELM----GNKV--HKN-Y-RMHITPKYVERVVDNN +LLLSHLF--E-GDTN-----------------SG-------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Odobenus|HISTDB_H2A_Q_12|H2A.Q organism=Odobenus rosmarus divergens phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSGKRS-----------------------GRS--------------------- +---------SYRLGK---------------QTS---SCSTKTKLQFSMSHEEPLLQENHP +AQ-KLSFSRQVCLSAILKYVATNILELV----GNKA--HHN-C-RVQ-------TAMDHD +MQISHLF--E-DDTT-----------------SQ---------VSEMF------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Okapia|HISTDB_H2A_Q_9|H2A.Q organism=Okapia johnstoni phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSGNRS-----------------------YHS--------------------- +---------SDGLKK---------------HTL---PCTMKTKLQFSKSHVDHLLRENQA +AQ-NQNLTTQIFLSAILKYTSSNILKLV----SNET--QSN-C-RIH-------RAMDNP +TQLHHVF--D-NGAK-----------------SQ---------VHEIF------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Ovis|HISTDB_H2A_Q_8|H2A.Q organism=Ovis aries musimon phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSGNRS-----------------------YHS--------------------- +---------SDGFRK---------------HIL---PCTMKTKLQFSKSHVDHHLQENQD +AQ-NQNLTNQIYLSAILKYLSSNVLKLV----SNET--QSD-C-RIH-------KAMDNH +PQLHHAF--D-KDAK-----------------SQ---------VHEM------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Pan|HISTDB_H2A_Q_4|H2A.Q organism=Pan paniscus phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MFWKRR-----------------------QRS--------------------- +---------SYRCRN---------------QTF---SYSIRAKQQFLLSCVHCLLWKNHC +PR-PE-------LVHYFQILGGNILELM----GNKV--HKN-Y-RMHITPKYVERVVDNN +PLLSHLF--E-GDTN-----------------SG-------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Pan|HISTDB_H2A_Q_15|H2A.Q organism=Pan troglodytes phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MFWKRR-----------------------QRS--------------------- +---------SYRCRN---------------QTF---SYSIRAKQQFLLSCVHCLLWKNHC +PR-PE-------LVHYFQILGGNILELM----GNKV--HKN-Y-RMHITPKYVERVVDNN +PLLSHLF--E-GDRN-----------------SG-------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Pantholops|HISTDB_H2A_Q_1|H2A.Q organism=Pantholops hodgsonii phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSGNRS-----------------------YHS--------------------- +---------SDGFRK---------------HIL---PCTMKTKLQFSKSHVDHHLQENQD +AQ-NQNLTNQIYLSAILKYLSSNVLKLV----SNET--QSN-C-RIH-------KAMDNH +PQLHHVF--D-KDAK-----------------SQ---------VHEM------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Papio|HISTDB_H2A_Q_18|H2A.Q organism=Papio anubis phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MFWKRR-----------------------QRR--------------------- +---------SYRCRN---------------QTF---SYSIKAKQQFPLSCVHCLLWKNHC +PR-QE-------LVHYFQILGGNILELM----GNKV--HKN-Y-RMHITPKYVERAVDNN +SLISHLF--E-GDTN--------------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Pongo|HISTDB_H2A_Q_19|H2A.Q organism=Pongo pygmaeus abelii phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MFWKRR-----------------------QRS--------------------- +---------SYRCRN---------------QTF---SYSIRAKQQFPLCCVHCLLWKDHC +PR-PE-------LVHYFQILGGNILELM----GNKV--HKN-Y-RMHITPKYVERVVDNN +PLLSHLL----------------------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Pteropus|ELK02218.1|H2A.Q organism=Pteropus alecto phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSGKRS-----------------------CLN--------------------- +----------YKLKK---------------QTL---SCTTKTKLHLPVSHEDSLLQGNNS +AR-NLRFSTQVFLSVILKYVTTNILELV----DNEA--HNN-C-CVQ-------RAVDKN +PQLGQHF--K-NGNN-----------------SQ---------VDEMF------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Sus|HISTDB_H2A_Q_20|H2A.Q organism=Sus scrofa phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSANRS-----------------------CQN--------------------- +---------SCGLRK---------------HTL---SRSTKTKLQFPESQGGPLLQENHS +AQ-HLNLSTRVFLSAILKYVTANVLELV----GNET--PSN-C-RIQ-------RTVGNN +PQLSQLF--E-NETD-----------------PQ---------VREMF------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Ursus|HISTDB_H2A_Q_10|H2A.Q organism=Ursus maritimus phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSGKRS-----------------------GPS--------------------- +---------SYRLGK---------------QTS---SCFPKSKLQFPVSHEEPLLQEDHP +AQ-QLSLSSQVCLSAILKYVATNILELV----GNEA--QHN-C-RVQ-------RAVNNN +MQSGHLF--K-DDAA-----------------SQ---------VSEMF------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- +>Vicugna|XP_015107649.1|H2A.Q organism=Vicugna pacos phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSGNRS-----------------------CQS--------------------- +---------SCWLRK---------------QTF---SCSTKNKPLFPKSHADHLLQENHF +AQ-QLNLSTQVFLSAILKYVTSNVLEVV----GNKS--HSN-C-RIQ-------KAADND +LQLSHLF--E-EDTN-----------------SQ---------ARETF------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------- diff --git a/CURATED_SET/draft_seeds/H2A_only.fasta b/CURATED_SET/draft_seeds/H2A_only.fasta old mode 100755 new mode 100644 diff --git a/CURATED_SET/draft_seeds/H2B.K.fasta b/CURATED_SET/draft_seeds/H2B.K.fasta new file mode 100644 index 0000000..2471494 --- /dev/null +++ b/CURATED_SET/draft_seeds/H2B.K.fasta @@ -0,0 +1,80 @@ +>Latimeria|XP_006007163.1|H2B.K organism=Latimeria chalumnae phylum=Chordata class= +-------------------MTNDPGK-----------------KKSK--NPGEKKSSKKK +AKRRETYSVYIYKVLKQVHPDTGISSKAMSIMNSFVNDVFERIATEASRLAQYNKRSTIT +SREVQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK +>Danio|NP_001002724.1|H2B.K organism=Danio rerio phylum=Chordata class=Actinopteri +-------------------MSNEGAK-----------------KKGK--APGDKKGSKRK +SKRRETYAVYIYKVLKQVHPDTGISSRAMSIMNSFVNDVFERIATEASRLAHYNKRSTIT +SREVQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK +>Gallus|XP_423715.4|H2B.K organism=Gallus gallus phylum=Chordata class=Aves +-------------------MSAESGR-----------------MRGHPSSSGDKK-SKRK +PKRKETYSVYIYKVLKQVHPDTGISSKAMSIMNSFVNDIFERLAVEASRLAQYNHRSTIT +SREVQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK +>Taeniopygia|XP_002190629.1|H2B.K organism=Taeniopygia guttata phylum=Chordata class=Aves +-------------------MSSERLK-----------------KRGHAVASGKKS-SKRK +PKRKEAFSVYIYKVLKQVHPDLAISSKAMSIMNSFVNDMLERLAAEASRLARYRCHTTVS +SREVQAAARQLLPGQLAQHAVSEGTKAVTKYTTSK +>Ailuropoda|XP_019651116.1|H2B.K organism=Ailuropoda melanoleuca phylum=Chordata class=Mammalia +-------------------MSAEHGRQQQP-----------GGRRGR--SSGDKK-SRKR +SRRKETYSMYIYKVLKQVHPDIGISSKAMSIMNSFVNDVFERLAGEAARLAQYSGRTTLT +SREVQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK +>Bos|XP_010799227.1|H2B.K organism=Bos taurus phylum=Chordata class=Mammalia +-------------------MSAEHGQLQQS-----------GGRRGR--SPGDKK-SRRR +SRRKETYSMYIYKVLKQVHPDIGISSKAMSIVNLFVNDLFERLAGKAAWLAQYSGRTTLT +SREVQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK +>Bos|XP_024846715.1|H2B.K organism=Bos taurus phylum=Chordata class=Mammalia +-------------------MSAEHGQLQQS-----------GGRRGR--SPGDKK-SRRR +SRRKETYSMYIYKVLKQVHPDIGISSKAMSIMNSFVNDLFERLAGEAARLAQYSGRTTLT +SREVQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK +>Bos|XP_059747847.1|H2B.K organism=Bos taurus phylum=Chordata class=Mammalia +MMDTRSSKLVCHNILRTRRMSAEHGQLQQS-----------GGRRGR--SPGDKK-SRRR +SRRKETYSMYIYKVLKQVHPDIGISSKAMSIMNSFVNDLFERLAGEAARLAQYSGRTTLT +SREVQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK +>Canis|HISTDB_H2B_K_1|H2B.K organism=Canis lupus familiaris phylum=Chordata class=Mammalia +-------------------MGTEHGQQPQS-----------GGRRGH--GSGDKK-SKKH +SRRKETYSMYIYKVLKQVHPDIGIFSKAMSIMNSFVNDVFERLAGKAAQLAQYLGQTTLT +SWEVQTAVRWLLPGELAKHAISEGTKAITKYTGSK +>Canis|XP_022259586.1|H2B.K organism=Canis lupus familiaris phylum=Chordata class=Mammalia +-------------------MGAEHGQQPQS-----------GGRRGR--GSGDKK-SKKR +SRRKETYSMYIYKVLKQVHPDIGISSKAMSIMNSFVNDVFERLAGEAARLAQYSGRTTLT +SREVQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK +>Ceratotherium|XP_014643104.1|H2B.K organism=Ceratotherium simum simum phylum=Chordata class=Mammalia +-------------------MSTEHGQQHHP-----------GGRRGC--SPGDKK-FKKR +SRRKETYSMYIYKVLKQVHPDIGISSKAMSIMNSFVNDVFERLAGEAARLAQYSGRTTLT +SREVQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK +>Equus|XP_005609614.1|H2B.K organism=Equus caballus phylum=Chordata class=Mammalia +-------------------MSTEHGQQHQS-----------GGRRGC--SSGDKK-SKKR +SRRKETYSMYIYKVLKQVHPDIGISSKAMSIMNSFVNDVFERLAGEAAQLAQYSGRTTLT +SREVQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK +>Felis|XP_019681595.1|H2B.K organism=Felis catus phylum=Chordata class=Mammalia +-------------------MSAEHGQQQQS-----------GGRRGR--SSGDKK-SKKR +SRRKETYSMYIYKVLKQVHPDIGISSKAMSIMNSFVNDVFERLAGEAARLAQYSGRTTLT +SREVQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK +>Loxodonta|XP_023403847.1|H2B.K organism=Loxodonta africana phylum=Chordata class=Mammalia +-------------------MSAELGQQQQQ--------QQSGGQRGR--SSGDKK-PKKR +SRRKENYSVYIYKVLKQVHPDISISSKAMSIMNSFVNDVFERLAGEATRLAQYSGRTTLT +SREVQTAARLLLPGELAKHAVSEGTKAVTKYISSK +>Ornithorhynchus|HISTDB_H2B_K_2|H2B.K organism=Ornithorhynchus anatinus phylum=Chordata class=Mammalia +-------------------MSPEGGQQQQQ------------QPRPR--ARGDRR-PKRR +TRRKETYSVYIYKVLKQVHPDTGISSKAMSIMNSFVNDVFEQLAGEAARLAQYLGRSTLT +SREVQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK +>Oryctolagus|XP_002715119.2|H2B.K organism=Oryctolagus cuniculus phylum=Chordata class=Mammalia +-------------------MSAERGQQQQQ----------ASSRRGR--SSGNKK-SRKR +SKRKETYSMYIYKVLKQVHPDIGISARAMSIMNSFVNDVFERLAGEAAQLAQYSGRSTLT +SREVQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK +>Ovis|HISTDB_H2B_K_0|H2B.K organism=Ovis aries phylum=Chordata class=Mammalia +-------------------MSAEHGQLQQA-----------GGRRGR--SPGDKK-SRRR +SRRKETYSMYIYKVLKQVHPDIGISSKAMSIMNSFVNDLFERLAGEAARLAQYSGRTTLT +SREVQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK +>Ovis|XP_027824938.1|H2B.K organism=Ovis aries phylum=Chordata class=Mammalia +-------------------MSAEHGQLQQS-----------GGRRGR--SPGDKK-SRRR +SRRKETYSMYIYKVLKQVHPDIGISSKAMSIMNSFVNDLFERLAGEAARLAQYSGRTTLT +SREVQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK +>Sus|XP_013846203.1|H2B.K organism=Sus scrofa phylum=Chordata class=Mammalia +-------------------MSSAHGQQQQQQQQQQQQQQQGGGRRGR--SSGEKK-SKKR +NRRKETYSMYIYKVLKQVHPDIGISSKAMSIMNSFVNDVFERLAGEAARLAQYSGRTTLT +SREVQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK +>Homo|NP_001356054.2|H2B.K_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +-------------------MSAEYGQRQQP-----------GGRGGR--SSGNKK-SKKR +CRRKESYSMYIYKVLKQVHPDIGISAKAMSIMNSFVNDVFEQLACEAARLAQYSGRTTLT +SREVQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK diff --git a/CURATED_SET/draft_seeds/H2B.K_(Homo_sapiens).fasta b/CURATED_SET/draft_seeds/H2B.K_(Homo_sapiens).fasta new file mode 100644 index 0000000..b23f9b2 --- /dev/null +++ b/CURATED_SET/draft_seeds/H2B.K_(Homo_sapiens).fasta @@ -0,0 +1,4 @@ +>Homo|NP_001356054.2|H2B.K_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSAEYGQRQQPGGRGGRSSGNKKSKKRCRRKESYSMYIYKVLKQVHPDIGISAKAMSIMN +SFVNDVFEQLACEAARLAQYSGRTTLTSREVQTAVRLLLPGELAKHAVSEGTKAVTKYTS +SK diff --git a/CURATED_SET/draft_seeds/H2B.K_only.fasta b/CURATED_SET/draft_seeds/H2B.K_only.fasta new file mode 100644 index 0000000..63e5407 --- /dev/null +++ b/CURATED_SET/draft_seeds/H2B.K_only.fasta @@ -0,0 +1,76 @@ +>Latimeria|XP_006007163.1|H2B.K organism=Latimeria chalumnae phylum=Chordata class= +-------------------MTNDPGK-----------------KKSK--NPGEKKSSKKK +AKRRETYSVYIYKVLKQVHPDTGISSKAMSIMNSFVNDVFERIATEASRLAQYNKRSTIT +SREVQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK +>Danio|NP_001002724.1|H2B.K organism=Danio rerio phylum=Chordata class=Actinopteri +-------------------MSNEGAK-----------------KKGK--APGDKKGSKRK +SKRRETYAVYIYKVLKQVHPDTGISSRAMSIMNSFVNDVFERIATEASRLAHYNKRSTIT +SREVQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK +>Gallus|XP_423715.4|H2B.K organism=Gallus gallus phylum=Chordata class=Aves +-------------------MSAESGR-----------------MRGHPSSSGDKK-SKRK +PKRKETYSVYIYKVLKQVHPDTGISSKAMSIMNSFVNDIFERLAVEASRLAQYNHRSTIT +SREVQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK +>Taeniopygia|XP_002190629.1|H2B.K organism=Taeniopygia guttata phylum=Chordata class=Aves +-------------------MSSERLK-----------------KRGHAVASGKKS-SKRK +PKRKEAFSVYIYKVLKQVHPDLAISSKAMSIMNSFVNDMLERLAAEASRLARYRCHTTVS +SREVQAAARQLLPGQLAQHAVSEGTKAVTKYTTSK +>Ailuropoda|XP_019651116.1|H2B.K organism=Ailuropoda melanoleuca phylum=Chordata class=Mammalia +-------------------MSAEHGRQQQP-----------GGRRGR--SSGDKK-SRKR +SRRKETYSMYIYKVLKQVHPDIGISSKAMSIMNSFVNDVFERLAGEAARLAQYSGRTTLT +SREVQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK +>Bos|XP_010799227.1|H2B.K organism=Bos taurus phylum=Chordata class=Mammalia +-------------------MSAEHGQLQQS-----------GGRRGR--SPGDKK-SRRR +SRRKETYSMYIYKVLKQVHPDIGISSKAMSIVNLFVNDLFERLAGKAAWLAQYSGRTTLT +SREVQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK +>Bos|XP_024846715.1|H2B.K organism=Bos taurus phylum=Chordata class=Mammalia +-------------------MSAEHGQLQQS-----------GGRRGR--SPGDKK-SRRR +SRRKETYSMYIYKVLKQVHPDIGISSKAMSIMNSFVNDLFERLAGEAARLAQYSGRTTLT +SREVQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK +>Bos|XP_059747847.1|H2B.K organism=Bos taurus phylum=Chordata class=Mammalia +MMDTRSSKLVCHNILRTRRMSAEHGQLQQS-----------GGRRGR--SPGDKK-SRRR +SRRKETYSMYIYKVLKQVHPDIGISSKAMSIMNSFVNDLFERLAGEAARLAQYSGRTTLT +SREVQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK +>Canis|HISTDB_H2B_K_1|H2B.K organism=Canis lupus familiaris phylum=Chordata class=Mammalia +-------------------MGTEHGQQPQS-----------GGRRGH--GSGDKK-SKKH +SRRKETYSMYIYKVLKQVHPDIGIFSKAMSIMNSFVNDVFERLAGKAAQLAQYLGQTTLT +SWEVQTAVRWLLPGELAKHAISEGTKAITKYTGSK +>Canis|XP_022259586.1|H2B.K organism=Canis lupus familiaris phylum=Chordata class=Mammalia +-------------------MGAEHGQQPQS-----------GGRRGR--GSGDKK-SKKR +SRRKETYSMYIYKVLKQVHPDIGISSKAMSIMNSFVNDVFERLAGEAARLAQYSGRTTLT +SREVQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK +>Ceratotherium|XP_014643104.1|H2B.K organism=Ceratotherium simum simum phylum=Chordata class=Mammalia +-------------------MSTEHGQQHHP-----------GGRRGC--SPGDKK-FKKR +SRRKETYSMYIYKVLKQVHPDIGISSKAMSIMNSFVNDVFERLAGEAARLAQYSGRTTLT +SREVQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK +>Equus|XP_005609614.1|H2B.K organism=Equus caballus phylum=Chordata class=Mammalia +-------------------MSTEHGQQHQS-----------GGRRGC--SSGDKK-SKKR +SRRKETYSMYIYKVLKQVHPDIGISSKAMSIMNSFVNDVFERLAGEAAQLAQYSGRTTLT +SREVQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK +>Felis|XP_019681595.1|H2B.K organism=Felis catus phylum=Chordata class=Mammalia +-------------------MSAEHGQQQQS-----------GGRRGR--SSGDKK-SKKR +SRRKETYSMYIYKVLKQVHPDIGISSKAMSIMNSFVNDVFERLAGEAARLAQYSGRTTLT +SREVQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK +>Loxodonta|XP_023403847.1|H2B.K organism=Loxodonta africana phylum=Chordata class=Mammalia +-------------------MSAELGQQQQQ--------QQSGGQRGR--SSGDKK-PKKR +SRRKENYSVYIYKVLKQVHPDISISSKAMSIMNSFVNDVFERLAGEATRLAQYSGRTTLT +SREVQTAARLLLPGELAKHAVSEGTKAVTKYISSK +>Ornithorhynchus|HISTDB_H2B_K_2|H2B.K organism=Ornithorhynchus anatinus phylum=Chordata class=Mammalia +-------------------MSPEGGQQQQQ------------QPRPR--ARGDRR-PKRR +TRRKETYSVYIYKVLKQVHPDTGISSKAMSIMNSFVNDVFEQLAGEAARLAQYLGRSTLT +SREVQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK +>Oryctolagus|XP_002715119.2|H2B.K organism=Oryctolagus cuniculus phylum=Chordata class=Mammalia +-------------------MSAERGQQQQQ----------ASSRRGR--SSGNKK-SRKR +SKRKETYSMYIYKVLKQVHPDIGISARAMSIMNSFVNDVFERLAGEAAQLAQYSGRSTLT +SREVQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK +>Ovis|HISTDB_H2B_K_0|H2B.K organism=Ovis aries phylum=Chordata class=Mammalia +-------------------MSAEHGQLQQA-----------GGRRGR--SPGDKK-SRRR +SRRKETYSMYIYKVLKQVHPDIGISSKAMSIMNSFVNDLFERLAGEAARLAQYSGRTTLT +SREVQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK +>Ovis|XP_027824938.1|H2B.K organism=Ovis aries phylum=Chordata class=Mammalia +-------------------MSAEHGQLQQS-----------GGRRGR--SPGDKK-SRRR +SRRKETYSMYIYKVLKQVHPDIGISSKAMSIMNSFVNDLFERLAGEAARLAQYSGRTTLT +SREVQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK +>Sus|XP_013846203.1|H2B.K organism=Sus scrofa phylum=Chordata class=Mammalia +-------------------MSSAHGQQQQQQQQQQQQQQQGGGRRGR--SSGEKK-SKKR +NRRKETYSMYIYKVLKQVHPDIGISSKAMSIMNSFVNDVFERLAGEAARLAQYSGRTTLT +SREVQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK diff --git a/CURATED_SET/draft_seeds/H2B.L.fasta b/CURATED_SET/draft_seeds/H2B.L.fasta new file mode 100644 index 0000000..28b2431 --- /dev/null +++ b/CURATED_SET/draft_seeds/H2B.L.fasta @@ -0,0 +1,44 @@ +>Ailuropoda|XP_002921383.1|H2B.L organism=Ailuropoda melanoleuca phylum=Chordata class=Mammalia +MARSITKKNKRSRGHRSPISKKKS-HSSTDFGRRNYSLYINRVLKEVVPQRSISSRTLDV +MNTLIKDIFERISVEARSLMCFRNRCTLTPEDIQKAVYLLLPGKLAKYAVAFGSEAVQRY +VQS- +>Bos|NP_991343.1|H2B.L organism=Bos taurus phylum=Chordata class=Mammalia +MARNVTKRNKRCRGHQKAIYKKKS-HSSSESGLRNYSLYINRVLKEVVPQKGISSRTIDI +INTMINDMFERISTEACNLMYYRKRCTLTPEDIEKAVYLLLPEKLAKYAVAFGKEAVQRY +VRS- +>Cavia|XP_003461969.1|H2B.L organism=Cavia porcellus phylum=Chordata class=Mammalia +MVKSVIKPRRYFRGRRTSISSKKS-CLSSNSGYRNYSLYVSRVLKEVVPERAISSCTVNI +MNTLIDDIFERISEEAHHLMCSQKRCTLTPKDIQKAVYLLLPRKLAKYAVAFGDGAVDRY +VHS- +>Cricetulus|XP_003515979.1|H2B.L organism=Cricetulus griseus phylum=Chordata class=Mammalia +MAKSIIKRYQFVKRRQRRAFRKR--YSSINFGQRNYSLYISRVLKEVVPMRGLSSNTVDI +MNTLINDLFERIATEACQLMYFRKRCTLTLEDIQKAVYLLVPKKLAKSAVTFGSKAVHRF +IHS- +>Equus|XP_001504072.2|H2B.L organism=Equus caballus phylum=Chordata class=Mammalia +MARDSTKKSRCSRRRQSPASRKKS-HASTYRGHRNYSLYINRVLKEVVPQRGISARTLDT +MNILINNIFERISTEACSMMYFRNRCTLTPQDVQKAVYSLFPGKLAKYAVAFGSEAVQRY +LHS- +>Heterocephalus|XP_004885290.1|H2B.L organism=Heterocephalus glaber phylum=Chordata class=Mammalia +MVRSIIKQYGYSRRHLTPTYRKKS-YLSTSFGHRNYSLYISRVLKEVAPQRHISSRTLDM +MNALINNIFERIATEAHHLMCSRNRCTLAPEDIQRAVYLLLPGKLAKYAMAFGDEAVHRY +VHS- +>Loxodonta|XP_003407998.1|H2B.L organism=Loxodonta africana phylum=Chordata class=Mammalia +MARSIIKKYRYSNGHLSSISIKKL-HSSTNFGHRNYSLYVNRVLKEVVPQRGISSRTLDV +MNTLINNIFKCIATEACNLMYFRNRCTLTPEDIQRAVYVRLPGKLAKHAVAFGSEAVNRY +VHS- +>Macaca|XP_001095287.1|H2B.L organism=Macaca mulatta phylum=Chordata class=Mammalia +MARSSTKKHKYSKRHQSPTSRKKA-HSSIDFVHGNYSFFVNKVLKEVVSHRGTSSRTLDL +MNTLINNFFQHISMKAYRLMYFRNRCTLTPEDILKAAYLLLPQKTANYAVAFGSEVFRRY +VHS- +>Monodelphis|XP_001366286.1|H2B.L organism=Monodelphis domestica phylum=Chordata class=Mammalia +MTKAV--RSNESQKLTTNQEKKKK-KKKKLLLSRNYSLYTHRVLKEVIPNQGLTYKTTEI +MNSMINNILERIAEEAGNLLCYKRHLTLGHQDIQMAVYRLLPDELAKHAVAFGTRAVTTY +NDSK +>Mus|NP_081340.1|H2B.L organism=Mus musculus phylum=Chordata class=Mammalia +MAKPTFKRQCYIKRHLRPLYRKHSRCSSINLGHGNYSLYINRVLKEVVPNRGISSYSVDI +MNILINDIFERIATEACQQMFLRKRCTLTPGDIQQAVHLLLPKKLATLAVTFGSKAVHRF +IHS- +>Sus|XP_003134084.1|H2B.L organism=Sus scrofa phylum=Chordata class=Mammalia +MARYITKKNRCSRGHRHPNSRKKT-CSSTECGRRNYSLYVNRVLKEVVPQSGISSRTLDM +MNTVINDIFERISMEASNLMYFRNRCTLTPEDVQKAVYLLLPRKLAKHAVAFGSDAVHRY +VHS- diff --git a/CURATED_SET/draft_seeds/H2B.N.fasta b/CURATED_SET/draft_seeds/H2B.N.fasta new file mode 100644 index 0000000..2ec7621 --- /dev/null +++ b/CURATED_SET/draft_seeds/H2B.N.fasta @@ -0,0 +1,70 @@ +>Ailuropoda|HISTDB_H2B_N_3|H2B.N organism=Ailuropoda melanoleuca phylum=Chordata class=Mammalia +------MYYVCLHDPRF------PKKRTTL---YIPAKAKYECANSAL------RHKRKK +KEVYFSYMGKILKQT--HPDFSGCSWILDAL-----------------GSLEDWLLEWVS +LEAVRLSFYNHRRAVTSREILGAVKQRSFRKSFCINKVF--------------------- +--- +>Bos|XP_059734412.1|H2B.N organism=Bos taurus phylum=Chordata class=Mammalia +------MYFICLHGLQF------PKRKLTI---YIPAKEKDEWVHSATG-----KKRRKK +KETYFNYMGKLLKQV--HPDFSGCSWILDAL-----------------RVLEDWQLEWVS +LEAVRLSLYNHRRTITSREILEAVKQRCSQKSLGINEVDLHGSVVEMIALVQKQKIGSFG +GLS +>Bos|XP_059746706.1|H2B.N organism=Bos taurus phylum=Chordata class=Mammalia +------MHFICLHGLQF------PKRKLTI---YIPAKEKDEWVCSATG-----KKRRKK +KEAYFNYMEKLLKQV--HPDFSGCSWILDAL-----------------RVLEDWQLEWVS +LEAVRLSFYNHRRTITTKEILKAVKQRCSQKSLGINEVDLHGSVVEMIALVQKQKIGSFG +GLS +>Canis|HISTDB_H2B_N_2|H2B.N organism=Canis lupus familiaris phylum=Chordata class=Mammalia +------MYYICLHGLRF------PEKRTIL---YIPAREKYEWANSAL------RKKRKK +KEVYFSYMGKILKQT--HPDFSGCSWILDAL-----------------GSLEDWLLEQVS +LEAVRLSFYNHRRAVTSREILGAIKQRSFLKSFCVNEVF--------------------- +--- +>Ceratotherium|HISTDB_H2B_N_1|H2B.N organism=Ceratotherium simum phylum=Chordata class=Mammalia +------MYFICLRGLRF------PKKTTN----YILAKKKYEWTSSAIGK----KRRRKK +KEAYFSYMGKILEQIAHYRKLSRLCLILVPFLPRPTQTSVGAPGSWMHWALEAWRLEWVS +LEAVRLSFCNHRRAVTSREILEAVKRRSSWKSF--------------------------- +--- +>Dasypus|XP_058139847.1|H2B.N organism=Dasypus novemcinctus phylum=Chordata class=Mammalia +------MYYVCLDSLKF------PKKKTDV---YSLAERKYEWARSAFGKRRRRRWRRKK +KEVYFSYMRKILKQV--HADFSGCSWVLDAL-----------------GSLDDWRLEWVS +LEAVRLSFYNHRRAVTSREILEAVKQRLSWKSF--------------------------- +--- +>Loxodonta|HISTDB_H2B_N_4|H2B.N organism=Loxodonta africana phylum=Chordata class=Mammalia +------MYYVCLGGLKF------PKKSEV----HIPAKKKYEWANSAFEKKRR-RRRRKK +KEAHFCYMGKILKQT--HPDFSGCSWVLEAL-----------------GCLDDWQLEWVS +LEAVRLSFYKHRRAITSREILEAMKQRSPRRSF--------------------------- +--- +>Monodelphis|XP_007485268.2|H2B.N organism=Monodelphis domestica phylum=Chordata class=Mammalia +--------------MKI------PRAGATVPRSFITTGKRRGYMRTVSG---------KK +KDFYFSYIAKILKQV--HQDFSGYSWVLDAL-----------------WSLDYYLFEQAT +LEAVRLSFYNHRRVVTSREMLETLSKVPLEGWM--------------------------- +--- +>Monodelphis|XP_007485606.1|H2B.N organism=Monodelphis domestica phylum=Chordata class=Mammalia +--------MEVGTEMKI------PRAGATVPRSFLRTGKRRGYMRTVSG---------KK +KDFYFSYIAKILKQV--HQDFSGYSWVLDAL-----------------WSLDYYLFEQAT +LEAVRLSFYNHRRVVTSREMLEALNKVPLEGWM--------------------------- +--- +>Monodelphis|XP_007485607.1|H2B.N organism=Monodelphis domestica phylum=Chordata class=Mammalia +------ME---GTEMKI------PRAGATVPRSFLRTGKRRGYMRTVSG---------KK +KDFYFSYIAKILKQV--HQDFSGYSWVLDAL-----------------WSLDYYLFEQAT +LEAVRLSFYNHRRVVTSREMLEALNKVPLEGWM--------------------------- +--- +>Ornithorhynchus|HISTDB_H2B_N_5|H2B.N organism=Ornithorhynchus anatinus phylum=Chordata class=Mammalia +MGGPLGPGFLSLLETETKSPAVAPEAEMEG---PVEARKEYRCVRTSLS---------KK +KEAYSSYIAHVLKQT--QPEPRGWGRAEGNL-----------------ESRDGQLLERVA +GEAVRLTLLQAAKTVTSRVVRGALELVLAELVEE-------------------------- +--- +>Ovis|HISTDB_H2B_N_0|H2B.N organism=Ovis aries phylum=Chordata class=Mammalia +------MHFICLHGLQF------PKRKLTI---SIPAKEKDEWVHSATG-----KKRRKK +KEAYFNYMGKLLKQG--HPDFSGCSWILDAL-----------------RALEDWQLEWVS +LEAVRLSLYNHRRTVTSREILEAVQQRCSQKTLGINEVALHGSVVEMIALVQKQKIGSFG +GLS +>Ovis|XP_060251208.1|H2B.N organism=Ovis aries phylum=Chordata class=Mammalia +------MHFICLHGLQF------PKRKLTI---SIPAKEKDEWVHSATG-----KKRRKK +KEAYFNYMGKLLKQG--HPDFSGCSWILDAL-----------------RALEDWQLEWIS +LEAVRLSLYNHRRTVTSREILEAVQQRCSQKTLGINEVALHGSVVEMIALVQKQKIGSFG +GLS +>Homo|NP_001388269.1|H2B.N_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +------MYFICLNDLRF------PKNKTEL---YFPVKKKHEWANSATGKKRRWRKK-RR +KEAYFSYMGKILKQI--HPDFSGRSWVLYAL-----------------GALNAWQLEWVS +LEAFRLSFYNHRRAITGREILGAVKQRSSQKSF--------------------------- +--- diff --git a/CURATED_SET/draft_seeds/H2B.N_(Homo_sapiens).fasta b/CURATED_SET/draft_seeds/H2B.N_(Homo_sapiens).fasta new file mode 100644 index 0000000..2043bb3 --- /dev/null +++ b/CURATED_SET/draft_seeds/H2B.N_(Homo_sapiens).fasta @@ -0,0 +1,3 @@ +>Homo|NP_001388269.1|H2B.N_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MYFICLNDLRFPKNKTELYFPVKKKHEWANSATGKKRRWRKKRRKEAYFSYMGKILKQIH +PDFSGRSWVLYALGALNAWQLEWVSLEAFRLSFYNHRRAITGREILGAVKQRSSQKSF diff --git a/CURATED_SET/draft_seeds/H2B.N_only.fasta b/CURATED_SET/draft_seeds/H2B.N_only.fasta new file mode 100644 index 0000000..bcc3dfa --- /dev/null +++ b/CURATED_SET/draft_seeds/H2B.N_only.fasta @@ -0,0 +1,65 @@ +>Ailuropoda|HISTDB_H2B_N_3|H2B.N organism=Ailuropoda melanoleuca phylum=Chordata class=Mammalia +------MYYVCLHDPRF------PKKRTTL---YIPAKAKYECANSAL------RHKRKK +KEVYFSYMGKILKQT--HPDFSGCSWILDAL-----------------GSLEDWLLEWVS +LEAVRLSFYNHRRAVTSREILGAVKQRSFRKSFCINKVF--------------------- +--- +>Bos|XP_059734412.1|H2B.N organism=Bos taurus phylum=Chordata class=Mammalia +------MYFICLHGLQF------PKRKLTI---YIPAKEKDEWVHSATG-----KKRRKK +KETYFNYMGKLLKQV--HPDFSGCSWILDAL-----------------RVLEDWQLEWVS +LEAVRLSLYNHRRTITSREILEAVKQRCSQKSLGINEVDLHGSVVEMIALVQKQKIGSFG +GLS +>Bos|XP_059746706.1|H2B.N organism=Bos taurus phylum=Chordata class=Mammalia +------MHFICLHGLQF------PKRKLTI---YIPAKEKDEWVCSATG-----KKRRKK +KEAYFNYMEKLLKQV--HPDFSGCSWILDAL-----------------RVLEDWQLEWVS +LEAVRLSFYNHRRTITTKEILKAVKQRCSQKSLGINEVDLHGSVVEMIALVQKQKIGSFG +GLS +>Canis|HISTDB_H2B_N_2|H2B.N organism=Canis lupus familiaris phylum=Chordata class=Mammalia +------MYYICLHGLRF------PEKRTIL---YIPAREKYEWANSAL------RKKRKK +KEVYFSYMGKILKQT--HPDFSGCSWILDAL-----------------GSLEDWLLEQVS +LEAVRLSFYNHRRAVTSREILGAIKQRSFLKSFCVNEVF--------------------- +--- +>Ceratotherium|HISTDB_H2B_N_1|H2B.N organism=Ceratotherium simum phylum=Chordata class=Mammalia +------MYFICLRGLRF------PKKTTN----YILAKKKYEWTSSAIGK----KRRRKK +KEAYFSYMGKILEQIAHYRKLSRLCLILVPFLPRPTQTSVGAPGSWMHWALEAWRLEWVS +LEAVRLSFCNHRRAVTSREILEAVKRRSSWKSF--------------------------- +--- +>Dasypus|XP_058139847.1|H2B.N organism=Dasypus novemcinctus phylum=Chordata class=Mammalia +------MYYVCLDSLKF------PKKKTDV---YSLAERKYEWARSAFGKRRRRRWRRKK +KEVYFSYMRKILKQV--HADFSGCSWVLDAL-----------------GSLDDWRLEWVS +LEAVRLSFYNHRRAVTSREILEAVKQRLSWKSF--------------------------- +--- +>Loxodonta|HISTDB_H2B_N_4|H2B.N organism=Loxodonta africana phylum=Chordata class=Mammalia +------MYYVCLGGLKF------PKKSEV----HIPAKKKYEWANSAFEKKRR-RRRRKK +KEAHFCYMGKILKQT--HPDFSGCSWVLEAL-----------------GCLDDWQLEWVS +LEAVRLSFYKHRRAITSREILEAMKQRSPRRSF--------------------------- +--- +>Monodelphis|XP_007485268.2|H2B.N organism=Monodelphis domestica phylum=Chordata class=Mammalia +--------------MKI------PRAGATVPRSFITTGKRRGYMRTVSG---------KK +KDFYFSYIAKILKQV--HQDFSGYSWVLDAL-----------------WSLDYYLFEQAT +LEAVRLSFYNHRRVVTSREMLETLSKVPLEGWM--------------------------- +--- +>Monodelphis|XP_007485606.1|H2B.N organism=Monodelphis domestica phylum=Chordata class=Mammalia +--------MEVGTEMKI------PRAGATVPRSFLRTGKRRGYMRTVSG---------KK +KDFYFSYIAKILKQV--HQDFSGYSWVLDAL-----------------WSLDYYLFEQAT +LEAVRLSFYNHRRVVTSREMLEALNKVPLEGWM--------------------------- +--- +>Monodelphis|XP_007485607.1|H2B.N organism=Monodelphis domestica phylum=Chordata class=Mammalia +------ME---GTEMKI------PRAGATVPRSFLRTGKRRGYMRTVSG---------KK +KDFYFSYIAKILKQV--HQDFSGYSWVLDAL-----------------WSLDYYLFEQAT +LEAVRLSFYNHRRVVTSREMLEALNKVPLEGWM--------------------------- +--- +>Ornithorhynchus|HISTDB_H2B_N_5|H2B.N organism=Ornithorhynchus anatinus phylum=Chordata class=Mammalia +MGGPLGPGFLSLLETETKSPAVAPEAEMEG---PVEARKEYRCVRTSLS---------KK +KEAYSSYIAHVLKQT--QPEPRGWGRAEGNL-----------------ESRDGQLLERVA +GEAVRLTLLQAAKTVTSRVVRGALELVLAELVEE-------------------------- +--- +>Ovis|HISTDB_H2B_N_0|H2B.N organism=Ovis aries phylum=Chordata class=Mammalia +------MHFICLHGLQF------PKRKLTI---SIPAKEKDEWVHSATG-----KKRRKK +KEAYFNYMGKLLKQG--HPDFSGCSWILDAL-----------------RALEDWQLEWVS +LEAVRLSLYNHRRTVTSREILEAVQQRCSQKTLGINEVALHGSVVEMIALVQKQKIGSFG +GLS +>Ovis|XP_060251208.1|H2B.N organism=Ovis aries phylum=Chordata class=Mammalia +------MHFICLHGLQF------PKRKLTI---SIPAKEKDEWVHSATG-----KKRRKK +KEAYFNYMGKLLKQG--HPDFSGCSWILDAL-----------------RALEDWQLEWIS +LEAVRLSLYNHRRTVTSREILEAVQQRCSQKTLGINEVALHGSVVEMIALVQKQKIGSFG +GLS diff --git a/CURATED_SET/draft_seeds/H2B.O.fasta b/CURATED_SET/draft_seeds/H2B.O.fasta new file mode 100644 index 0000000..5eb2241 --- /dev/null +++ b/CURATED_SET/draft_seeds/H2B.O.fasta @@ -0,0 +1,9 @@ +>Ornithorhynchus|XP_001511074.1|H2B.O organism=Ornithorhynchus anatinus phylum=Chordata class=Mammalia +YSIYVYKVLKQVHPLTSISTKAVGIMDSFINDIFERIASEASRLARYNKRSTITSREIQT +AVLLTLPGELARHAVSEGTKAITKYTS +>Ornithorhynchus|XP_001521160.2|H2B.O organism=Ornithorhynchus anatinus phylum=Chordata class=Mammalia +WENYVYKVLKQVHPLTSISTKAVGIVDSFI-DIFKRITSDASHLARYNKCSTITSREIQT +AVQLMLPGELDRYAGSEGTKAITKYTT +>Ornithorhynchus|XP_028926523.1|H2B.O organism=Ornithorhynchus anatinus phylum=Chordata class=Mammalia +YSIYVYKVLKQVHPLTSISTKAVGIMDSFINDIFDRIASEASRLARYTKRSTIASREIQT +AVLLTLPGELARHAVSEGTKAITKYTS diff --git a/CURATED_SET/draft_seeds/H2B.S.fasta b/CURATED_SET/draft_seeds/H2B.S.fasta new file mode 100644 index 0000000..8683328 --- /dev/null +++ b/CURATED_SET/draft_seeds/H2B.S.fasta @@ -0,0 +1,376 @@ +>Chara|GBG59214.1|H2B.S organism=Chara braunii phylum=Streptophyta class=Charophyceae +------MAEGGYPLEA----VAGDICG-TSMDPLDPSGTVRRRS-PRGDGPDDQGVGRDQ +Q--------------------------------SDQMHLPAEEQRKSDTQSVVLSRQSTP +QQAESAKLPAN---YPEVEIGADRKASKRSKTQ--------------------------- +------------KKRTAAAENVPGTV--------PGETRPGGKKESGSRPEGKNK----G +RR----RRKRT-------TLIVPSGRTYKIYIYKVLKEIHPELAISSQGMSIMNSFMVDV +CERIAAEASRLSRHAKRATLSSRDIQSAVQLCLPGELAVNAMSEAKKAVIKFI-RHGKAS +ERRNIIIKSNRFYLINKMIVI-------------------------------------- +>Chara|GBG60584.1|H2B.S organism=Chara braunii phylum=Streptophyta class=Charophyceae +MWNLLRMPPGQWSSRS----AASSLPRQNGVVGVRRGRSAAVVV-LEDSGDGDANGIGIG +TGIANGNGNG-----------------------KDMNRRMEKGGALGTSTATGMMKESSA +KKKGGGSGSGA---GSELGSEPGSEPGTGPGAGRHK------------------------ +------------GSPSGVAKGLPMMW--------PTQRTGYVTPKKKERVTRAKKKAISG +SR----RTARK---------TNAFQPSFATHIRRVLKQVHPNLSITCDGIDIMNDFLIDI +FERIAGEAALLIRVHKRCTLTCREIMAAVQLVLGGELSKHAIHLATEVLTLFS-Q----- +----------------------------------------------------------- +>Amaranthus|HISTDB_H2B_S_7|H2B.S organism=Amaranthus hypochondriacus phylum=Streptophyta class=Magnoliopsida +------MAPKK---------TARKVVK--TTKIVEETVEVVSIP-GSQSQQNPTQI---- +--------------------------------QTELISERKEEQSSITTKSIPIQESDHD +DQDE-------------------------------------------------------- +------------EETETQDQDHPDLS---------TPPRKEAPPRQVEPKPEPKG----- +---------KG---------EGGGE-GYKRYVFRVLKQVHPGMGISSRAMIVINNFMNDM +FERIAEEGSRLNKQNKKMTMSAREIQGAVKLVLPGELGKHAVAEGAKAVTNYV-NFGFHK +K---------------------------------------------------------- +>Ananas|XP_020113371.1|H2B.S organism=Ananas comosus phylum=Streptophyta class=Magnoliopsida +------MAPRKPRK------MVGAVVK-TTAKVVEETVKVAPVV-GVGDGDGDGDG---- +-----------------------------------------AEEVEEAAVPLKDSKVVQV +VVVGGEKGDGEVPEANDGRDEPEKRKEAMEVDENQAPKETGEESKRRGRGRPL------- +------------KERGPETPTEKSEI---------PPANKKKEKDRGGRSEGEAEEGKGR +RR----RRRKRRFGSAGDAGSGGVG-GYKRYVFRVLKQVHPELGASARAMQVLDMMMADM +FQRLAEEAARLSKYTGRATLTSREIQNAVRLVLPGELGRHAVSEGTKAVTNYMASQSS-- +----------------------------------------------------------- +>Aquilegia|PIA54901.1|H2B.S organism=Aquilegia coerulea phylum=Streptophyta class=Magnoliopsida +------MAPKRSQKK-----VIGSLVR-KTNKVVEETVNITVVEKTKGAEAGNKET---- +-----------------------------------------EQEETAISTKDPVSTPGEK +QQKEQPKKGVG---KQLKLEAPTQK----------------------------------- +------------KEETKSKKNEKSTT---------TTTTTTSSAKEGEEKKGKKS----- +------GRKRM---------LDTGE-TYNTYVYKVLKQVHPDLGITFKGMMVLNGFMNDM +FERLAREASKLTDYTGKKTMSAREIQGAVRLVLPGELGKHAIVEGTKAITTYF-SNSS-- +----------------------------------------------------------- +>Arabidopsis|XP_002892444.1|H2B.S organism=Arabidopsis lyrata subsp. lyrata phylum=Streptophyta class=Magnoliopsida +------MAPRKPK-------VVSVTKK---KKVVEETIKVTV-T-EGEDPCVTTET---- +---------------------------------------ANDQETQDLTFSIPVGENVTT +VEI------------------------------------------------PVEVR---- +------------DEQSPQPPETPASK---------SEGTL-KKTDTVEKKKKKKK----- +------KKKRD---------DLAGD-EYRRYVYKVMKQVHPDLGITSKAMTVVNMFMGDM +FERIAQEAARLSDYTKRKTLSSREIEAAVRLVLPGELSRHAVAEGSKAVSNYV-GYGSRK +R---------------------------------------------------------- +>Arabidopsis|NP_172295.1|H2B.S organism=Arabidopsis thaliana phylum=Streptophyta class=Magnoliopsida +------MAPRKPK-------VVSVTKK---KKVVEETIKVTV-T-EEGDPCVITET---- +---------------------------------------ANDQETQDLTFSIPVGENVTT +VEIPVEVPDER---SLPVGENVTTVKIPVDDRDESS---------PQPPETPVEVR---- +------------DEPSPQPPETPASK---------SEGTL-KKTDKVEKKQENKK----K +KK----KKKRD---------DLAGD-EYRRYVYKVMKQVHPDLGITSKAMTVVNMFMGDM +FERIAQEAARLSDYTKRRTLSSREIEAAVRLVLPGELSRHAVAEGSKAVSNFV-GYDSRK +R---------------------------------------------------------- +>Boechera|HISTDB_H2B_S_1|H2B.S organism=Boechera stricta phylum=Streptophyta class=Magnoliopsida +------MAPRKPK-------VVSVTKK---KKVVEETVKVTV-T-EGGDPNATTEI---- +--------------------------------------TENDQETQDLTFSIPVGENVTT +VEI------------------------------------------------PVEVR---- +------------DEQSPQPPETPAST---------SEGIVKKKTKKVEKKQAKKK----- +-K----KKKRD---------DLAGD-EYRRYVYKVMKQVHPDLGITSKAMTVVNMFMGDM +FERIAQEAARLGDYTKRRTLSSREIEAAVRLVLPGELSRHAVAEGSKAISNYV-AYDSRK +R---------------------------------------------------------- +>Brachypodium|XP_014757906.1|H2B.S organism=Brachypodium distachyon phylum=Streptophyta class=Magnoliopsida +------MAPKRRVKK-----VVSSVVRKKTTKVVQETVQVSTAILPDDSAQPEPEVVVDV +STPATV---------------------------VKHVEVTSDVGDDQATAAGATAVNNKP +PQSKSPDKPADDNQATAPAVPSLQSQETQTQDPNEKKKTPQQEIVVVTTKGPPGLEPEEK +T-----------KKLQPDAPETPKQA--------GGAGTGGKDEAAAPKKKKKKKKKKRK +ARRGG-GRRRG---PVGDMGMMGG--GYKRYVYRVLKQVHPDMGASGRAMEVLDMMMGDM +FERLADEAARLAKVAGRATLSSREVQNAVRLVLPGELAKHAISEGTKAVTSYM-SLA--- +----------------------------------------------------------- +>Brachypodium|HISTDB_H2B_S_5|H2B.S organism=Brachypodium stacei phylum=Streptophyta class=Magnoliopsida +------MAPKRRGKQ-----VVSSVVR-KTTKVVKETVQVSTAAIVADDSTHPEYTEPEV +VDMS----------TPATVVKHV----------EITTTSDGDQAADAGATTVNKSPDKQP +AEANQAPQPAVASLQSQETQDPNEEAAAAEAPQTEKPQQEIVVVSTKAPEPEEKIT---- +------------KKQHPEAPETPKQE--------AGAGTTGGKEKAAAAPKKKKA----- +------RRGRG---PVGDMGMMGG--GYKRYVYRVLKQVHPDMGASGRAMEVLDMMMGDM +FERLADEAARLAKVARRATLSSREVQSAVRLVLPGELAKHAISEGTKAVTSYM-SLA--- +----------------------------------------------------------- +>Brassica|CAF1924216.1|H2B.S organism=Brassica napus phylum=Streptophyta class=Magnoliopsida +------MAPKKSKK------VVSVTKK---KKVVEETIKVTV---TDGVPNVTTET---- +-----------------------------------DTQETQELETQDLPLSIPVEEENVT +RVEIPVDVGDD---RSPPPSETVTPASEGTVKETHK------------VEIPVDVR---- +------------DDRSPQPPETPAPA----------SEVPSKETHKVEEKEGNKKKKMLK +KR----NKNRS---------EVAGD-EYKRYVYKVMKQVHPDLGISSKAMTVINMFMGDM +FERLAVEAAKLNDYSKRRTLSSREIEAAVRLVLPGELSRHAVAEGSKAISNFV-AYGAKK +R---------------------------------------------------------- +>Brassica|RID57103.1|H2B.S organism=Brassica rapa phylum=Streptophyta class=Magnoliopsida +------MAPRKSKK------VVSVTKK---KKVVEETIKVTV---TDGVPNVTTNTDTQE +TQYL-----------------------------ETQELDTQELETQDLPFSLPLEEENVT +RVEIPVDVGYD---RSPPPPETVAPASEGTVKETHK------------VEIPVEFG---- +------------DDRSPQPPETPAPA----------SEVPAKETHKVEEKQGNKK-TTSK +KR----KKNRS---------EVAGD-EYKRYVYKVMKQVHPDLGISSKAMTVINMFMGDM +FERLAVEAAKLNDYSKRRTLSSREIEAAVRLVLPGELSRHAVAEGSKAVSNFV-AYGAKK +R---------------------------------------------------------- +>Capsella|HISTDB_H2B_S_0|H2B.S organism=Capsella grandiflora phylum=Streptophyta class=Magnoliopsida +------MAPRKPK-------VVSVTKK---KTVVEETVKVTVA--EGGDPNVTTEI---- +--------------------------------------TENDQETQDLTFSIPVGENVTT +VEVPVEVL---------------------------------------------------- +------------GERSPQPPETPVST---------SEGTLKKKTNEVEKKQEKKK----- +KK----NKKRD---------DLAGD-EYRRYVYKVLKQVHPDLGITSKAMTVVNMFMGDM +FERIAQEAARLSDYTKRRTLSSREIESAVRLVLPGELSRHAVAEGSKAISNYV-SYDSRK +L---------------------------------------------------------- +>Capsella|XP_006306103.1|H2B.S organism=Capsella rubella phylum=Streptophyta class=Magnoliopsida +------MAPRKPK-------VVSVTKK---KTVVEETVKVTVA--EGGDPNVTTEI---- +--------------------------------------TENDQETQDLTFSIPVGENVTT +VEVPVEVL---------------------------------------------------- +------------DERSPQPPETPAST---------SEGTLKKKTNEVEKKQEKKK----- +KK----NKKRD---------DLAGD-EYRRYVYKVLKQVHPDLGITSKAMTVVNMFMGDM +FERIAQEAARLSDYTKRRTLSSREIESAVRLVLPGELSRHAVAEGSKAISNYV-SYDSRK +L---------------------------------------------------------- +>Carica|XP_021901556.1|H2B.S organism=Carica papaya phylum=Streptophyta class=Magnoliopsida +------MAPKRS--------TRLLALK-TTQKIIEK-VEVSVVP-SSGREQEITDVAVQK +SPVKVIPVEEKS---------------------RKTVRIPVEETPSLKTIPVKTPEKEQE +TID---------------DQEPVTTSEEVAADNEQE------------------------ +------------QEKEEETDQTQEGI-------TSSEPAGTTKEEKVEKRPSRRG-RPRR +RR----KKKKG-------SDEGNYK-GYKRYVFKVLKQVHPELAISSKAMVIINGFMNDM +FERLADEAANLSRYSHKATLSSKEIQGAVRLVLPGELSKHATAEGSKAVTNYM-SFPLHN +S---------------------------------------------------------- +>Citrus|ESR37664.1|H2B.S organism=Citrus clementina phylum=Streptophyta class=Magnoliopsida +------MPPRR---------SARVVLT---KKVVTETVEVSVVN-EKKKGKQEIAIHSEE +TLP------------------------------SKTITVEDKEEGKRTTVEVPIEEPEPP +TEPEAEPSSSV---AAEAATPAKKEEKKTSIETSPE------------PEHDVAA----- +------------SAEQEEPPEQSKNK-------EKEKPNEAQKTAQQEARAAHEKPGSKK +RK----RRKRN---------EGTGE-EYKTYVFRVLKQVHPGMAISSKAMTVINNLMNDM +FERIAGEAATLSKHCHRTTMSSREIQGAVKLVLPGELGKHAVAEGTKAVTNYT-SYDAKR +SKA-------------------------------------------------------- +>Citrus|KDO60308.1|H2B.S organism=Citrus sinensis phylum=Streptophyta class=Magnoliopsida +------MPPRR---------SARVVLT---KKVVEETVQVSVVNEKKKKGKREIAIHSEE +TLP------------------------------SKTITVEDKEEGKHTTVEVPIEEPEPP +TEPEAEPSSSVAAEAATPAKKEEKKTSIETSPEPEHDVAA-------------------- +------------SAEQEEPPEQSKNK-------EKEKPNEAQKTAQQEARAAHEKPGSKK +RK----RRKRN---------EGTGE-EYKTYVFRVLKQVHPGMAISSKAMTVINNLMNDM +FERIAGEAATLSKHCHRTTMSSREIQGAVKLVLPGELGKHAVAEGTKAVTNYT-SYDAKR +SKA-------------------------------------------------------- +>Daucus|XP_017234272.1|H2B.S organism=Daucus carota subsp. sativus phylum=Streptophyta class=Magnoliopsida +------MAPKKSPKKK----AVGAVVK-TTTKVIQETVQVSV---IQTKPKPQQET---- +---------------------------------------PQTENNKNGPKDIEIQDV--- +------------------------------------------------------------ +------------TTPTPTPKKATKTI----------PTQDTAKKTKKDSAQGATK----- +------KRKRS------------VE-GYKRYVYKVLKQVHPDIGISSKAMTIVNNLMTDM +FERLADEAARLTKYTKKMTLSSREIQGAVKLVLPGELGKHAVAEGAKAVTNYV-QYASGP +SKP-------------------------------------------------------- +>Erythranthe|XP_012838320.1|H2B.S organism=Erythranthe guttata phylum=Streptophyta class=Magnoliopsida +------MAPKKRPGR-----AKKTVVT--STKVVEETVKVVVTP-GGSGGEDDDNDNNES +VEMI----------SSSTKQNTEN---------VEIFTSSPEKEHVLRTIPVEDKEEQIP +APD---IVPQE---QDEDETQPYSEPETASTPPRKEAP---------------------- +------------PPKSSEPLETPPPA-------EKRETRKKKFQEKAKEAGQEKKATTEK +LRPK--RRRRS------VAGAGAGE-SYKRYVFKVMKQVHPEMGISSKAMTIVNNLMTDM +FERFAEEAARLQKYTGRKTMSSREVQGAVKLVLPGELGKHAVAEGAKAVTNYV-SYVPKS +----------------------------------------------------------- +>Eutrema|XP_006417719.1|H2B.S organism=Eutrema salsugineum phylum=Streptophyta class=Magnoliopsida +------MAPRREKK------VVSVTKK---KKVVEETLKVTV---TDGDPNVITET---- +--------------------------------------QTQDEETQDLTFSIPVEENVTT +VEI------------------------------------------------PVDVR---- +------------DDQSPQAPETPAPV---------SEGTV-KEAHKVEKKQSKKK----K +KK----MMKRG---------DLAGDHEYKRYVYKVMKQVHPELGITSKAMSVINTFMGDM +FERIAEEAARLSDHTKRRTLSSREIEAAVRLVLPGELSRHAVSEGAKAISNYV-AYGAKK +R---------------------------------------------------------- +>Fragaria|XP_004295898.2|H2B.S organism=Fragaria vesca subsp. vesca phylum=Streptophyta class=Magnoliopsida +------MSPKRRS-------SSRLVVK-TTKQVVKETVEVSVVRSKKRQKKVNDDEQQEP +VESTIAFETKKENQTEKIEVSVEKDPAKRAIESQEETQIAEEDKEPEEIRTIPVEVPGME +TPEKLRGATLSVDKDEEPKEPSTVLDDKEPVSNDVVTETQLDSDETQNFEEDGEQARTIS +MDSQEGSE----TQNFEETPMTPEKK----EVNQSSKVEKEKSDDVKQTGDGEKK----D +KK----RKRRS------PGNKREGGKGYKRYVYKVFKQVHPELGMSAKAMVVLNNYMNDM +FERLAGEAAKLTMYTSRKTLSSREIQGAVKLVLPGELGKHAMAEGTKAVSNYLSKNAAMS +HKS-------------------------------------------------------- +>Glycine|KAH1159328.1|H2B.S organism=Glycine max phylum=Streptophyta class=Magnoliopsida +------MAPKR---------AEKLVVR-STKKVVESIVQVSV-V-GKRLTQVIPQA---- +---------------------------------------QKVSPNSDITTENKAEQENNT +HQDGGVQ----------------------------------------------------- +------------NQEEEQKGVVNEEA---------KEEKNKSKTAKEQNGKEKKR----G +RK----KRNIE---------------GYQRYVYGVLKQVHPEMGISSKCMTALNNLMNDM +FERLTFEVSKLTDYTGHMTLSSREIQGVVRLVLPGELEKHAIAEGVKAVNNYT-SYDA-- +----------------------------------------------------------- +>Glycine|XP_003539797.1|H2B.S organism=Glycine max phylum=Streptophyta class=Magnoliopsida +------MAPKR---------AEKLVVR-STKKVVESSVLVSVVGKRLTRGKKDTQT---- +------------------------------------TDGEEEVGSQEHLVVIPIQE-VTP +QAQKDSPNSAI---TTENKAEQENNTQDGGV----------------------------- +------------ENNQEEEEEHEEVK----------EKKNKAKTPKGKNGKEKKR----- +------GRKKG---------RRSVE-GYQRYVYRVLKQVHPEMGISSKCMTVLNNLMNDM +FERLAFEASKLKDYTGHMTLSSREIQGAVRLVLPGELGKHAIAEGVKAVNNYT-SYDA-- +----------------------------------------------------------- +>Gossypium|XP_012484989.1|H2B.S organism=Gossypium raimondii phylum=Streptophyta class=Magnoliopsida +------MAPKR---------RAKVVVR-STKKIVKETVQVAVIDKTEGDNNGDQQQ---- +---------------------------------LDTVPLEDIEEAGERVITEIPIQGSTE +DK---------------------------------------------------------- +------------AEKEPRKVEAPGQK---------NRVQGEEKTEPVHEEEEPRKEEKKG +KR----KRGKK--------KELVGHEGYKTYVFRVLKQVHPGMAISSKAMSVINSLMNDM +FEKITNEATKLSQYTDRKTLSSREIQGAVRLVLPGELGKHAVAEGSKAVTNYA-SYDIKR +SKLV------------------------------------------------------- +>Linum|HISTDB_H2B_S_8|H2B.S organism=Linum usitatissimum phylum=Streptophyta class=Magnoliopsida +------MAPRRRSAGR----VVGVVRS--TRKVVKETVEVSILAGDTQETTPEDNT---- +---------------------------------EDINLLDTEELIDVVTPEAGVKLQEDA +TTT------------STVRTIPVEDAGPEREEELVI------------------------ +------------SEDRQFEDAKPKKE--------------EKKAPEKEKKVNKKK----- +------RKSRF---------VEGGE-GYRRYVYKVMKQVHPDMKISGVAMSIINSLMKDM +FERIADEAATLSRYSKRMTISSKEIQDAVKLVLPGELGKHAVAEGSKAVANYA-SYSHNK +----------------------------------------------------------- +>Malus|XP_008359166.3|H2B.S organism=Malus domestica phylum=Streptophyta class=Magnoliopsida +------MAPKR---------SAKMVVK-TTKRVVKEMVEVSVVK-TRRKKQQEDRP---- +-----------------------------------LETISVENNDSNQTQNVEVSVGKEP +LKTSIIPIETL---EQVIPIETQAENQTLKTQNAEVQVDREAEENPTTPDPQETEKLS-- +------------KEEEQKSEEDKTLR-----GGENKDAEDLTKTEEQASKKGEKKSEVKG +GK----RREKR---------RSRGREEYKTYVYKVLKQVHPGMGVSSKAMTVLNNLMNDM +FEKLADEAARLTTYTARKTLSSREIQGAVKLVLTGELGRHAMAEGTKAVSTYV-SYGGGS +SKS-------------------------------------------------------- +>Manihot|XP_021629190.1|H2B.S organism=Manihot esculenta phylum=Streptophyta class=Magnoliopsida +------MAPKGKRGKKK---VLGTVLR-SSKRVIKETVKIAV---FEGDTQESTQE---- +---------------------------------------DQNGDTEELPENEPLVVRTIP +VEERV---------EEEEEAQTIEVSVKKPKEEKRK------------------------ +------------QEKIETHEEKQEPA----------KTTKTKKRTQEEKGQEKKR----- +RR----RRRRG---------VEEGGEGYKRYVFRVLKQVHPELRISSMAMSVINSLMKDM +FERIADEAAKLSQHSHKMTLSSREIQGAVKLVLPGELGRHAIAEGSKAVTNYM-SYEAKG +SKA-------------------------------------------------------- +>Medicago|XP_013464866.1|H2B.S organism=Medicago truncatula phylum=Streptophyta class=Magnoliopsida +------MAPKS---------AKKVVVR-STRKVVQESVQVSVVSSHKRSTRGNNKD---- +---------------------------------VEIDKDAGNATQQEHVRIIPVQEVTSQ +TKEDTNTNTNTTTVTSEDTTNQENT----------------------------------- +------------PNDATMEPKTPLSN---------KEQEKKVRTKEGGNDGKGKR----- +------KKKRG---------RRMGE-GYQRYVYRVLKQVHPQMGISSQAMTILNNLMNDM +FEKLADEAAKLTAYTKHMTLTSREIQGAVKLVLPGELGKHAIAEGAKAVTNYV-SYVA-- +----------------------------------------------------------- +>Musa|HISTDB_H2B_S_6|H2B.S organism=Musa acuminata phylum=Streptophyta class=Magnoliopsida +------MAPKR---------TSRVLKT--TKTVIEETVEVVVEA-KDAQGPKEDLG---- +---------------------------------------EGKEAEPEGRQQAEEEEPS-- +------------------------------------------------------------ +------------REKEAEPPFEKSII-------QQEETAGEGKEAESITDKETQEEAAAV +DK----RFSGG-----DGGEMDGTGRGYKRYVFRVLKHVHPGMGISSRAMVVLDGMMGDM +FERLAGEASRLSTYTGKATLSSREIQGAVRLVLPGELGKRAISEGTKAVSNYM-AADRHE +QQ--------------------------------------------------------- +>Nymphaea|XP_031483632.1|H2B.S organism=Nymphaea colorata phylum=Streptophyta class=Magnoliopsida +------MGPRRSGR------LVGSVVK--ETKVVEETVKVVADV-DDLSSLEPSAI---- +-----------------------------------------SAGIIREIPVVEIKESKTP +QKK------------VEPEAAPFP------------------------------------ +------------LEKKQKGIEAKEVE--------EKEQKGHVDQKKERKPQELNR----R +RK----RLRRN---------EEGGVGAYRSYVYKVLKQVHPELGISSKAMDVLNGFMGDM +FERLAEEAATLQKHTGRRTLSSREIQMAVRLVLPGELGKHAISEGSKAITNYY-RYPEKK +RR--------------------------------------------------------- +>Oryza|XP_015612586.1|H2B.S organism=Oryza sativa Japonica Group phylum=Streptophyta class=Magnoliopsida +------MAPKQKE-------AANKKKKKKEVAVLVKTKTKVVQL-TTTTAELELEP---- +------------------------------------------------TVTVQVDDNKTG +AAA-----------DETPPVVPL------------------------------------- +------------QSQETQDPNEPKAA-----------------AAKKKKRAGHGR----K +RS----RRRRG--------GALEYG-GYKRYVWRVLKQVHPDLGASAQTMDVLDMMMADM +FERLADEASRLSKLSGRLTLTSREVQSAVRLVLPADLANHAISEGTKAISNYL-S----- +----------------------------------------------------------- +>Panicum|PAN11031.1|H2B.S organism=Panicum hallii phylum=Streptophyta class=Magnoliopsida +------MAPKRRSGGK----VVGSVVK---TKVVQETVEVTTAFVADGEPGQRATEDLAL +APPAVDASGG-----------------------SRSRVVHIEVTTPDGDTTTGGSNAKQA +TSKRGRGGRR----EEEKPAPPAEEAAQEPPVAQSQETQDPNEEQEEEEDAGKKKKKKKP +PQQ---------ELQDEEPPETPRVASERKTAAAKRTPQQQQKRGGGGAGGGDKTKTTKA +KKGG--RRRLGQASPGGDAGMGGVG-GYKRYVWRVLKQVHPELGVSGNAMRVLDMMMADM +FERLADEAARLSKVSGRATLSSREVQSAVRLVLPGELSRHAMSEGTKAISKYM-SYDA-- +----------------------------------------------------------- +>Panicum|HISTDB_H2B_S_3|H2B.S organism=Panicum virgatum phylum=Streptophyta class=Magnoliopsida +------MAPKRRGGGK----VVGSVVK---TKVVQETVEVTTAVVPDGEPEQRGTEALAL +APPAVDVSG------------------------GSRRVVHIEVTTPDGDATTGGSNAKKQ +ATSKRGRRGGR--REEEKPAPPAEEAAQEPPVAQSQETQDPNEEQEEEVEEDARKKKKRK +PPPPPQ------ERQDEEPPETPRVASERKAAAAKTTPQQQKKRGGGDKARAKKG----G +QR----RRRLGQASPGGDAGMGGVG-GYKRYVWRVLKQVHPELGVSGLAMRVLDMMMADM +FERLADEAARLSKASGRATLSSREVQSAVRLVLPGELGRHAMSEGTKAISKYM-SYDA-- +----------------------------------------------------------- +>Phaseolus|XP_007132352.1|H2B.S organism=Phaseolus vulgaris phylum=Streptophyta class=Magnoliopsida +------MAPKR---------AQKLVVR-STKKVVESSVQVSVVS-SSSRKRQTRGN---- +----------------------------------KDNIQTEEAVGEEKHVMVIPIQEGNP +QAQKDSSTSAM---TNENKGEQENSVQDDGVQNEEK------------------------ +------------KKKGENEEVKEEKK---------------GRFPKGSNGKEKKL----G +KK----KGRRS------------AE-GYQRYVYRVLKQVHPEMGISSKCMTILNNLMNDM +FERLAGEASKLKDYTGHMTLSSREIQGAVKLVLPGELGKHAIAEGVKAVNNFT-SYDA-- +----------------------------------------------------------- +>Piper|HISTDB_H2B_S_10|H2B.S organism=Piper nigrum phylum=Streptophyta class=Magnoliopsida +------MASTRQGRRNTPEVVSTVVKKKTTRKVVNETTIAAVAV-VESNEPPIVKT---- +--------------------------------------VPVEEESSDSVINVE--AGKTP +PKE-------------VPIARPRSDAAKGTNQRKDG---------------AATIAET-- +------------QQPPEKLREEKHAM---------EEVKKGSKRRKGERKEGEKK----R +KR----RKKRW-------SYNEEMS-GYSRYVFRVLKQVHPDLAISSKAMAVLNAFVWDM +FERLAGEAGKLADYTRRATLSSREIQDAVRLVLPGELGKHAISEGSKAVTNYV-TNDD-- +----------------------------------------------------------- +>Piper|HISTDB_H2B_S_9|H2B.S organism=Piper nigrum phylum=Streptophyta class=Magnoliopsida +------MASTRQGRRNTPEVVSTVVKKKTTRKVVNETTIAAVAV-VESNEPPIVKT---- +--------------------------------------VPVEEESSDSVINVE--AGKTP +PKE-------------VPIARPRSDAAKGTNQRKDG---------------AATIAET-- +------------QQPPEKPREEKHAM---------EEVKKGTKRRKGERKVEEKK----R +KR----RKKRW-------SYNEEMS-GYSRYVFRVLKQVHPDLAISSKAMAVLNAFVWDM +FERLAGEAGKLADYTRRATLSSREIQDAVRLVLPGELGKHAISEGSKAVTNYVHALGSWR +RPALQYGRHEEERRQRKCRQDRVTSVFGFGFEVLGLRISFLFATLRTAPGGRKHGVQPT +>Prunus|ONI00968.1|H2B.S organism=Prunus persica phylum=Streptophyta class=Magnoliopsida +------MAPKR---------STKTVVK-TTKQVVRETVQVSSVV-QSKRRKKQSEDSGDQ +TRKP----------VKTIK--------------TISIETQEENQTQNVEISEPLKTRRIP +IQTEEENQILK---GQNAEAITTLTSDQKEAAAEEEEEGEGEKKEDQEDTVETSITSDEK +EDEQKSEEVKTLEGEKEDSMETNTTSDEREEEEEETTKKEEHKSDEVKTQKGGKKSSEKK +RK----RKRRE---------RGRGE-EYKIYVHRVLKQVHPGMGVSSKGMTVLNNLMNDM +FERLADEAARLTKYTARKTLSSREIQAAVKLVLPGELGRHAMAEGTKAVSTYVSNNNGRQ +SKS-------------------------------------------------------- +>Setaria|XP_022680431.1|H2B.S organism=Setaria italica phylum=Streptophyta class=Magnoliopsida +------MAPKRRSSGK----VVGSVVK---TKVVQETVEVTTAIVADGEPEQQLAPGALA +LAPRTGEV-------------------------SRSKVVHVEITTPDSDNTTGRSSAKQQ +PTAKRGRGGRR---EEEKPPAPAEEAAQEPPPQSLETQEPNEEEEEEDVDVSKKRRKPPP +QQ----------RRRDEEEPETPRVA-------SERKTAGTKTTPQKQKKRGGGGGGGGK +AKTGR-RRRLGEASPGGDAGMGGMG-GYKRYVWRVLKQVHPELGVSGHAMRVLDMMMADM +FERLADEAARLSKAAGRATLSSREVQSAVRLVLPGELGRHAMSEGTKAISKYM-SYDDA- +----------------------------------------------------------- +>Setaria|XP_034583365.1|H2B.S organism=Setaria viridis phylum=Streptophyta class=Magnoliopsida +------MAPKRRSSGK----VVGSVVK---TKVVQETVEVTTAIVADGEPEQQLAPGALA +LAPRTGEV-------------------------SRSKVVHVEITTPDSDNTTGRSSAKQQ +PTAKRGRGGRR---EEEKPPAPAEEAAQEPPPQSLETQEPNEEEEEDVDVSKKRRKPPPQ +Q-----------RRRDEEEPETPRVA-------SERKTTGTKTTPQKQKKRGGGGGGGGK +AKTGR-RRRLGEASPGGDAGMGGMG-GYKRYVWRVLKQVHPELGVSGHAMRVLDMMMADM +FERLADEAARLSKAAGRATLSSREVQSAVRLVLPGELGRHAMSEGTKAISKYM-SYDDT- +----------------------------------------------------------- +>Solanum|XP_004241294.1|H2B.S organism=Solanum lycopersicum phylum=Streptophyta class=Magnoliopsida +------MAPKKRGGR-----VRATVVT--ARKVVEETVSVVVTP-VAGETETESQTLAEE +NQSF-----------------------------EILTPAPSEEPTPKRTINVQDISEGKK +APRRKPDPAQQ---VDEDETQPADEPEEMPSPPKKESGQKKTQKRKPEPAQRGDEDE--- +------------TQPSEEPEEMPTPP-------KMEADQRKAQKRKPDPAQKAKGGGERK +KK----RAKVG-------GGVGPSE-GYRRYVFRVMKQVHPDMGISSKAMTVLNNLMGDM +FERIANEAAILTKYVGRTTLASVDIQDAVKLVLPGELGKHAIAEGTNAVANYVTNVEKSK +SKP-------------------------------------------------------- +>Solanum|XP_006347209.1|H2B.S organism=Solanum tuberosum phylum=Streptophyta class=Magnoliopsida +------MAPKKRGGR-----ARATVVT--ARKVVEETVSVVVSGETETESQTLTEENQSF +---------------------------------EILTPLPYEEPTPKRTINVQDKSEGKK +AQQRKPDPAQQ---VDEVETQPADEPEEMPSPPKKEAVRKKAQKRKPDPAQRV------- +------------DEDETQPAEEPEEM--------PTPPKMEADQKKAQKAKGGGG--ERK +KK----RAKVG-------GGVGPSE-GYRRYVFRVMKQVHPDMGISSKAMMILNNLMGDM +FERIANEAAILTKYAGRATLASVDIQDAVKLVLPGELGKHAIAEGTKAVANYVTSVEKSK +SKP-------------------------------------------------------- +>Sorghum|XP_002462010.1|H2B.S organism=Sorghum bicolor phylum=Streptophyta class=Magnoliopsida +------MAPKRRGNK-----VVGSVVK---TKVVQETVEVIVAD-DDDTAEAEQQMVPEA +LAVAPSAVDVS----------------------GSTVVHVVEVTTPDGGDNATGSNVKQP +AVAKRGRGRRE--EEKEKQPAPPEDSVLVPQSQETQDPNEEEEDQEDASKKKKQKQKQRQ +Q-----------DEDDEAQPETPRVA------SERKKATPKKAKAKAQPQQQAGGGGDAG +KKRPKARRRLGQASAGGDAGMGGVG-GYKRYVWRVLKQVHPDLGVSGHAMQVLDMMMADM +FERLADEAARLSKATGRMTLTSREVQSAVRLVLPGDLGKHAIAEGTKAISKYM-SYA--- +----------------------------------------------------------- +>Spirodela|HISTDB_H2B_S_2|H2B.S organism=Spirodela polyrhiza phylum=Streptophyta class=Magnoliopsida +------M------------------VR-TTRKVVQETIEVSVVK-EKDATAGRKKVVEVK +VQDTTEMPQP-----------------------QAEAFEGEREGEAAVEEKSAATGEKEA +EEAGVQEKKEQ---RQPPQVETISREMVETAAAPEGEKHAVEKTRQREDETPPALESEQQ +PPQIGEE-----RKSVEEHEEKKKEK---------VEGDLRAEEAASGKEKEETKSAMRR +RR----RKRAG-----RDVGGFGGKRGYKRYVFRVLKQVHPELRVSSMAMTVLDSLVKDM +FERLAGEASRLSKYSGRATLSTREIQAAVRLVLPRELGEHALAEGNKAVANFMTAATAKP +SS--------------------------------------------------------- +>Trifolium|XP_045802699.1|H2B.S organism=Trifolium pratense phylum=Streptophyta class=Magnoliopsida +------MAPKRA--------NKKMVVR-STRKVVEESVQVSVVSSNKRSTRANKDN---- +---------------------------------EIDKDVGSDHDQREEHVRIIPVQEVTP +SAKEDSNASTT---TFT------------------------------------------- +------------TEDKTNQENTPNEA---------TMEPKESENKKVKNKEGNYGKEKRK +RK----RVRRM------------GE-GYQRYVYRVLKQVHPDMGISFKAMTILNNLMNDM +FEKLADEAAKLTTYIGHMTLSSREIQGAVKLVLPGELGKHAIAEGAKAVTNYISSYGA-- +----------------------------------------------------------- +>Vitis|XP_002272312.1|H2B.S organism=Vitis vinifera phylum=Streptophyta class=Magnoliopsida +------MAPKRSGKT-----RSKVVVK-ATRKVVQQTVEVTVLA-SKQKPPREEQG---- +------------------------------------KKISKKDKAPEELQREQVSADEEP +PKELPTPVT--------------------------------------------------- +------------QEEPPKKEEEKKTT---------TTQEGREEKKRGRRRRRRTS----- +------GRRRK----------EGGE-GYKRYVYRVLKQVHPGLGVSSKAMTVLSGFMNDM +FERIAEEAAKLSKYTGKTTLSAREIQGAVKLVLPGELQKHAMAEGTKAVSNYM-DYAAAG +GHKQ------------------------------------------------------- +>Zea|HISTDB_H2B_S_4|H2B.S organism=Zea mays phylum=Streptophyta class=Magnoliopsida +------MAPKRRGNK-----VVGSVVK---TKLVQETVEVIVAD-DDGLHAEKQQVPE-- +---------------------------------ALALAHPTVDVSGSTVVHVVEVTAKRG +RGGGGGGGGGR---RNEGKPPPEEDSAAVPVPQSQE---------TQDPNEELEFEL--- +------------EDEEEKQPETPRVA---------SEKRKKAATPTKKTKTQQPR----- +RR----RQRLGQASSGGDAGMGGVG-GYRRYVWRVLKQVHPDLGVSGHAMQVLDMMMADM +FERLAEEAARLSKATGRATLTSREVQSAVRLVLPGELGRHAISEGTKAISKYM-SYAA-- +----------------------------------------------------------- +>Zea|XP_008670031.1|H2B.S organism=Zea mays phylum=Streptophyta class=Magnoliopsida +------MAPKRRGNK-----VVGSVVK---TKLVQETVEVIVAD-DDGLHAEKQQVPEA- +---------------------------------LALAHPTVDVSGSTVVHVVEVTAKRGR +GGGGGGGGGGR---RNEGKPPPEEDSAAVPVPQSQE---------TQDPNEELEFEL--- +------------EDEEEKQPETPRVA---------SEKRKKAATPTKKTKTQQPR----- +RR----RQRLGQASSGGDAGMGGVG-GYRRYVWRVLKQVHPDLGVSGHAMQVLDMMMADM +FERLAEEAARLSKATGRATLTSREVQSAVRLVLPGELGRHAISEGTKAISKYM-SYAA-- +----------------------------------------------------------- diff --git a/CURATED_SET/draft_seeds/H2B.V.fasta b/CURATED_SET/draft_seeds/H2B.V.fasta new file mode 100644 index 0000000..de6f635 --- /dev/null +++ b/CURATED_SET/draft_seeds/H2B.V.fasta @@ -0,0 +1,4 @@ +>Trypanosoma|AAO24603.1|H2B.V organism=Trypanosoma brucei brucei phylum=Euglenozoa class=Kinetoplastea +MPPTKGGKRPLPLGGKGKGKRPPGQTTKSSSSRKKSGARRGKKQQRWDLYIHRTLRQVYK +RGTLSKAAVRVLSSFIEDMYGKIQAEAVHVACINNVKTLTAREIQTSARLLLPPELAKHA +MSEGTKAVAKYNASREEAYSKVL diff --git a/CURATED_SET/draft_seeds/H2B.W.1_(Homo_sapiens).fasta b/CURATED_SET/draft_seeds/H2B.W.1_(Homo_sapiens).fasta new file mode 100644 index 0000000..4f90608 --- /dev/null +++ b/CURATED_SET/draft_seeds/H2B.W.1_(Homo_sapiens).fasta @@ -0,0 +1,4 @@ +>Homo|NP_001002916.4|H2B.W.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MAGPSSETTSEEQLITQEPKEANSTTSQKQSKQRKRGRHGPRRCHSNCRGDSFATYFRRV +LKQVHQGLSLSREAVSVMDSLVHDILDRIATEAGHLARSTKRQTITAWETRMAVRLLLPG +QMGKLAESEGTKAVLRTSLYAIQQQRK diff --git a/CURATED_SET/draft_seeds/H2B.W.2_(Homo_sapiens).fasta b/CURATED_SET/draft_seeds/H2B.W.2_(Homo_sapiens).fasta new file mode 100644 index 0000000..f8ec79c --- /dev/null +++ b/CURATED_SET/draft_seeds/H2B.W.2_(Homo_sapiens).fasta @@ -0,0 +1,4 @@ +>Homo|NP_001375393.1|H2B.W.2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MAEASSETTSEEGQSIQEPKEANSTKAQKQKRRGCRGSRRRHANRRGDSFGDSFTPYFPR +VLKQVHQGLSLSQEAVSVMDSMIHDILDRIATEAGQLAHYTKRVTITSRDIQMAVRLLLP +GKMGKLAEAQGTNAALRTSLCAIWQQRK diff --git a/CURATED_SET/draft_seeds/H2B.W.fasta b/CURATED_SET/draft_seeds/H2B.W.fasta new file mode 100644 index 0000000..e18d7df --- /dev/null +++ b/CURATED_SET/draft_seeds/H2B.W.fasta @@ -0,0 +1,35 @@ +>Ailuropoda|XP_002925981.1|H2B.W organism=Ailuropoda melanoleuca phylum=Chordata class=Mammalia +-------------------------------------------------------MAEPG +CETSSEESLGTEEPSAANPKSPKQKQKQPR---------RQCRRRCR---RCP-----NS +FATYFPRVLKQVHEGLSLSKKAVSVMDSFLKDIFEHIADEAAHLARSTKGSNKHSTINSR +EIQTAVRLLLPGEIGKHTVSKATKAVIRFQ--------------RCE +>Bos|DAA13058.1|H2B.W organism=Bos taurus phylum=Chordata class=Mammalia +-------MGIGGSILSETSSDSYEEDVITKETGISEIEPSEKEMAKVETSKPDPYDAEPI +KVETSKPDPYDAEPKKAETSKPDPYDAEPKKAKQKTAKGRRRRRHCH---HDS----FSS +FATYFPRVLRQIHKGMSLSHDSVNILDSFVKDTFERIAEEAGRLAGDN----KRRTITTE +DIEAAVRLLLPGKLGKYAVLKATKSLITYR--------------TCK +>Canis|XP_548517.2|H2B.W organism=Canis lupus familiaris phylum=Chordata class=Mammalia +MIPGKPEEGKGSSEGPICDTEVACDVRKCSDYKGASAPQQPPLSVLLSCWEGEPHMAEPG +CETSSEECLGTKEPREAEPETP--KRKKPR---------RQCHRRCR---RSR----SDS +FAIYFPRVLKQVDEGLSLSQKAVSIMDSFVKDIFERIADEASRLARST----KRSTITSR +EIQTAVRLLLPGEIGKYAVSEATKALMRNQVGALEKQVKNVLSLGCG +>Macaca|NP_001180847.1|H2B.W organism=Macaca mulatta phylum=Chordata class=Mammalia +---------MLRTQVPPLLRSTTAIVWSCRVMAAASA------------------MAEPS +SETTSEEQLITQEPKEANSTMAQKQSKQRK---------RGRRGPCRCHANCR----GDS +FATYFRRVLKQVHQGLSLSREAVSVMDSLVHDILDRIATEAGRLARST----KRQTITAW +ETRIAVRLLLPGEMGKLAESEGTKAVLRTSLYAVQQ--------QRK +>Oryctolagus|XP_002720211.1|H2B.W organism=Oryctolagus cuniculus phylum=Chordata class=Mammalia +-------------------------------------------------------MAEPA +SHVASEENL-SLEPKTTASSTP--KEKQPR---------RRRRRR-----QGH----NYS +FASYFPKVLKHVHKGLSLSKEAKGVMDSIVRDVFERIAHEAASLVRYS----KHSTLTSR +DVQSAVRLLLPGQLHKHADVEGTKALLKFI--------------THP +>Homo|NP_001002916.4|H2B.W.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +-------------------------------------------------------MAGPS +SETTSEEQLITQEPKEANSTTSQKQSKQRK---------RGRHGPRRCHSNCR----GDS +FATYFRRVLKQVHQGLSLSREAVSVMDSLVHDILDRIATEAGHLARST----KRQTITAW +ETRMAVRLLLPGQMGKLAESEGTKAVLRTSLYAIQQ--------QRK +>Homo|NP_001375393.1|H2B.W.2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +-------------------------------------------------------MAEAS +SETTSEEGQSIQEPKEANSTKAQ---KQKR---------RGCRGSRRRHANRRGDSFGDS +FTPYFPRVLKQVHQGLSLSQEAVSVMDSMIHDILDRIATEAGQLAHYT----KRVTITSR +DIQMAVRLLLPGKMGKLAEAQGTNAALRTSLCAIWQ--------QRK diff --git a/CURATED_SET/draft_seeds/H2B.W_(Homo_sapiens).fasta b/CURATED_SET/draft_seeds/H2B.W_(Homo_sapiens).fasta new file mode 100644 index 0000000..446cb13 --- /dev/null +++ b/CURATED_SET/draft_seeds/H2B.W_(Homo_sapiens).fasta @@ -0,0 +1,8 @@ +>Homo|NP_001002916.4|H2B.W.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MAGPSSETTSEEQLITQEPKEANSTTSQKQSKQRKRGRHGPRRCHSNCR----GDSFATY +FRRVLKQVHQGLSLSREAVSVMDSLVHDILDRIATEAGHLARSTKRQTITAWETRMAVRL +LLPGQMGKLAESEGTKAVLRTSLYAIQQQRK +>Homo|NP_001375393.1|H2B.W.2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MAEASSETTSEEGQSIQEPKEANSTKAQ---KQKRRGCRGSRRRHANRRGDSFGDSFTPY +FPRVLKQVHQGLSLSQEAVSVMDSMIHDILDRIATEAGQLAHYTKRVTITSRDIQMAVRL +LLPGKMGKLAEAQGTNAALRTSLCAIWQQRK diff --git a/CURATED_SET/draft_seeds/H2B.W_(Homo_sapiens)_only.fasta b/CURATED_SET/draft_seeds/H2B.W_(Homo_sapiens)_only.fasta new file mode 100644 index 0000000..e69de29 diff --git a/CURATED_SET/draft_seeds/H2B.W_only.fasta b/CURATED_SET/draft_seeds/H2B.W_only.fasta new file mode 100644 index 0000000..694b14a --- /dev/null +++ b/CURATED_SET/draft_seeds/H2B.W_only.fasta @@ -0,0 +1,25 @@ +>Ailuropoda|XP_002925981.1|H2B.W organism=Ailuropoda melanoleuca phylum=Chordata class=Mammalia +-------------------------------------------------------MAEPG +CETSSEESLGTEEPSAANPKSPKQKQKQPR---------RQCRRRCR---RCP-NSFATY +FPRVLKQVHEGLSLSKKAVSVMDSFLKDIFEHIADEAAHLARSTKGSNKHSTINSREIQT +AVRLLLPGEIGKHTVSKATKAVIRFQ--------------RCE +>Bos|DAA13058.1|H2B.W organism=Bos taurus phylum=Chordata class=Mammalia +-------MGIGGSILSETSSDSYEEDVITKETGISEIEPSEKEMAKVETSKPDPYDAEPI +KVETSKPDPYDAEPKKAETSKPDPYDAEPKKAKQKTAKGRRRRRHCH---HDSFSSFATY +FPRVLRQIHKGMSLSHDSVNILDSFVKDTFERIAEEAGRLAGDN----KRRTITTEDIEA +AVRLLLPGKLGKYAVLKATKSLITYR--------------TCK +>Canis|XP_548517.2|H2B.W organism=Canis lupus familiaris phylum=Chordata class=Mammalia +MIPGKPEEGKGSSEGPICDTEVACDVRKCSDYKGASAPQQPPLSVLLSCWEGEPHMAEPG +CETSSEECLGTKEPREAEPETP--KRKKPR---------RQCHRRCR---RSRSDSFAIY +FPRVLKQVDEGLSLSQKAVSIMDSFVKDIFERIADEASRLARST----KRSTITSREIQT +AVRLLLPGEIGKYAVSEATKALMRNQVGALEKQVKNVLSLGCG +>Macaca|NP_001180847.1|H2B.W organism=Macaca mulatta phylum=Chordata class=Mammalia +---------MLRTQVPPLLRSTTAIVWSCRVMAAASA------------------MAEPS +SETTSEEQLITQEPKEANSTMAQKQSKQRK---------RGRRGPCRCHANCRGDSFATY +FRRVLKQVHQGLSLSREAVSVMDSLVHDILDRIATEAGRLARST----KRQTITAWETRI +AVRLLLPGEMGKLAESEGTKAVLRTSLYAVQQ--------QRK +>Oryctolagus|XP_002720211.1|H2B.W organism=Oryctolagus cuniculus phylum=Chordata class=Mammalia +-------------------------------------------------------MAEPA +SHVASEENL-SLEPKTTASSTP--KEKQPR---------RRRRRR-----QGHNYSFASY +FPKVLKHVHKGLSLSKEAKGVMDSIVRDVFERIAHEAASLVRYS----KHSTLTSRDVQS +AVRLLLPGQLHKHADVEGTKALLKFI--------------THP diff --git a/CURATED_SET/draft_seeds/H2B.Z.fasta b/CURATED_SET/draft_seeds/H2B.Z.fasta new file mode 100644 index 0000000..0a38bf7 --- /dev/null +++ b/CURATED_SET/draft_seeds/H2B.Z.fasta @@ -0,0 +1,36 @@ +>Vitrella|CEM32013.1|H2B.Z organism=Vitrella brassicaformis CCMP3155 phylum= class= +MPG------KGPAEKRQAAKKTAGKTPAE---AGKKRRRKRTESFALYIYKVLKQVHPET +GISKKSMSIMNSFINDIFDRMATEATNLIRFNKKKTLSSREVQTSVRLMLPGELSKHAVS +EGTKAVTKYTTAAGN +>Babesia|XP_001610608.1|H2B.Z organism=Babesia bovis T2Bo phylum=Apicomplexa class=Aconoidasida +MSG------KVPSSKSQAAKKTAGKSL-----GIRYRRKKRIESFSLYIYKVLKQVHPET +GVSKKSMSIMNSFINDIFDRMALEATRLIRYNKKSTLSSREIQTAVRLLLPGELSKHAVS +EGTKAVTKYTTSGA- +>Cytauxzoon|HISTDB_H2B_Z_0|H2B.Z organism=Cytauxzoon felis phylum=Apicomplexa class=Aconoidasida +MSG------KVPSTKSQAAKKTAGKTL-----GVRYRRKKRIESFALYIYKVLKQVHPET +GVSKKSMSIMNSFINDIFDRLALEATRLIRYNKKSTLSSREIQTAVRLLLPGELSKHAVS +EGTKAVTKYTTSGV- +>Plasmodium|XP_678689.1|H2B.Z organism=Plasmodium berghei ANKA phylum=Apicomplexa class=Aconoidasida +MSG------KGPAQKSQAAKKTAGKTL-----GPRHKRKRRTESFSLYIFKVLKQVHPET +GVTKKSMNIMNSFINDIFDRLVTEATRLIRYNKKRTLSSREIQTAVRLLLPGELSKHAVS +EGTKAVTKYTTSGA- +>Plasmodium|XP_001349046.1|H2B.Z organism=Plasmodium falciparum 3D7 phylum=Apicomplexa class=Aconoidasida +MSG------KGPAQKSQAAKKTAGKTL-----GPRHKRKRRTESFSLYIFKVLKQVHPET +GVTKKSMNIMNSFINDIFDRLVTEATRLIRYNKKRTLSSREIQTAVRLLLPGELSKHAVS +EGTKAVTKYTTSAA- +>Cryptosporidium|XP_628349.1|H2B.Z organism=Cryptosporidium parvum Iowa II phylum=Apicomplexa class=Conoidasida +MSGKSGKSIKGPAQKQQAAKKTAGKSPAD---GGKRKRRKRTESFALYIYKVLKQVHPET +GISKKSMSIMNSFINDVFDRLSAEAVKLVQYNKKRTLSSREVQTSVRLMLPGELSKHAVS +EGTKAVTKYTSASA- +>Eimeria|XP_013228334.1|H2B.Z organism=Eimeria tenella phylum=Apicomplexa class=Conoidasida +MSG------KGPAQKSQAAKKTAGKSL-----GPRYRRRKRTESFALYIYKVLKQVHPET +GVSKKSMSIMNSFINDIFDRLADEAVRLIRYNKKRTLSSREIQTAVRLLLPGELSKHAVS +EGTKAVSKYTTSGA- +>Gregarina|XP_011128492.1|H2B.Z organism=Gregarina niphandrodes phylum=Apicomplexa class=Conoidasida +MSG------KG--GKQQLAKKTAANKLPSHHLDKNKKRRRRGETFSIYIYKVLRQVQPKL +GMSRKSMAIMNSFINDIFERLATEAVKLIQYNKKRTLSSREMQTSVRLLLPGELSKHAAT +EGAKAVEKYENRPIA +>Toxoplasma|XP_002369740.1|H2B.Z organism=Toxoplasma gondii ME49 phylum=Apicomplexa class=Conoidasida +MSG------KGPAQKSQAAKKTAGKSL-----GPRYRRRKRTESFALYIYKVLKQVHPET +GVSKKSMSIMNSFINDIFDRLADEAVRLIRYNKKRTLSSREIQTAVRLLLPGELSKHAVS +EGTKAVTKYTTSGA- diff --git a/CURATED_SET/draft_seeds/H2B.fasta b/CURATED_SET/draft_seeds/H2B.fasta old mode 100755 new mode 100644 index 049fc39..f00906b --- a/CURATED_SET/draft_seeds/H2B.fasta +++ b/CURATED_SET/draft_seeds/H2B.fasta @@ -1,450 +1,1962 @@ ->subH2B_Mus_musculus_NP_081340.1 NP_081340.1 histone: H2B variant: subH2B organism: Mus musculus ------------------------------------------------------------- ----MAKPT---------------FKRQCYIKRHLRPLYR--------------------- -KHS-RCSSINLGHGNYSL--YINRVLKEVVPNRGISSYSVDIMNILINDIFERIATEACQ -QMF----LRKRCTLTPGDIQQAVHLLLPKKLATLAVTFGSKAVHRFI------------- --HS--------- ->subH2B_Cricetulus_griseus_XP_003515979.1 XP_003515979.1 histone: H2B variant: subH2B organism: Cricetulus griseus ------------------------------------------------------------- ----MAKSII---------------------KRYQFVKRRQR------RA----------- -FRK-RYSSINFGQRNYSL--YISRVLKEVVPMRGLSSNTVDIMNTLINDLFERIATEACQ -LMY----FRKRCTLTLEDIQKAVYLLVPKKLAKSAVTFGSKAVHRFI------------- --HS--------- ->subH2B_Macaca_mulatta_XP_001095287.1 XP_001095287.1 histone: H2B variant: subH2B organism: Macaca mulatta ------------------------------------------------------------- ----MARSST---------------------KKHKYSKRHQS------PT----------- -SRKKAHSSIDFVHGNYSF--FVNKVLKEVVSHRGTSSRTLDLMNTLINNFFQHISMKAYR -LMY----FRNRCTLTPEDILKAAYLLLPQKTANYAVAFGSEVFRRYV------------- --HS--------- ->subH2B_Cavia_porcellus_XP_003461969.1 XP_003461969.1 histone: H2B variant: subH2B organism: Cavia porcellus ------------------------------------------------------------- ----MVKSVI--------------KPR----RYFRGRRTSIS------------------- -SKK-SCLSSNSGYRNYSL--YVSRVLKEVVPERAISSCTVNIMNTLIDDIFERISEEAHH -LMC----SQKRCTLTPKDIQKAVYLLLPRKLAKYAVAFGDGAVDRYV------------- --HS--------- ->subH2B_Heterocephalus_glaber_XP_004885290.1 XP_004885290.1 histone: H2B variant: subH2B organism: Heterocephalus glaber ------------------------------------------------------------- ----MVRSII---------------------KQYGYSRRHLT------PT----------- -YRKKSYLSTSFGHRNYSL--YISRVLKEVAPQRHISSRTLDMMNALINNIFERIATEAHH -LMC----SRNRCTLAPEDIQRAVYLLLPGKLAKYAMAFGDEAVHRYV------------- --HS--------- ->subH2B_Equus_caballus_XP_001504072.2 XP_001504072.2 histone: H2B variant: subH2B organism: Equus caballus ------------------------------------------------------------- ----MARAST---------------------KKSRCSRRRQS------PA----------- -SRKKSHASTYRGHRNYSL--YINRVLKEVVPQRGISARTLDTMNILINNIFERISTEACS -MMY----FRNRCTLTPQDVQKAVYSLFPGKLAKYAVAFGSEAVQRYL------------- --HS--------- ->subH2B_Bos_taurus_NP_991343.1 NP_991343.1 histone: H2B variant: subH2B organism: Bos taurus ------------------------------------------------------------- ----MARNVT---------------KRNKRCRGHQKAIYKKK------------------- -SHS-SS---ESGLRNYSL--YINRVLKEVVPQKGISSRTIDIINTMINDMFERISTEACN -LMY----YRKRCTLTPEDIEKAVYLLLPEKLAKYAVAFGKEAVQRYV------------- --RS--------- ->subH2B_Loxodonta_africana_XP_003407998.1 XP_003407998.1 histone: H2B variant: subH2B organism: Loxodonta africana ------------------------------------------------------------- ----MARSII---------------KKYRYSNGHLSSISIKK------------------- -LHS-ST---NFGHRNYSL--YVNRVLKEVVPQRGISSRTLDVMNTLINNIFKCIATEACN -LMY----FRNRCTLTPEDIQRAVYVRLPGKLAKHAVAFGSEAVNRYV------------- --HS--------- ->subH2B_Sus_scrofa_XP_003134084.1 XP_003134084.1 histone: H2B variant: subH2B organism: Sus scrofa ------------------------------------------------------------- ----MARYIT---------------------KKNRCSRGHRH------PN----------- -SRKKTCSSTECGRRNYSL--YVNRVLKEVVPQSGISSRTLDMMNTVINDIFERISMEASN -LMY----FRNRCTLTPEDVQKAVYLLLPRKLAKHAVAFGSDAVHRYV------------- --HS--------- ->subH2B_Ailuropoda_melanoleuca_XP_002921383.1 XP_002921383.1 histone: H2B variant: subH2B organism: Ailuropoda melanoleuca ------------------------------------------------------------- ----MARSIT---------------KKNKRSRGHRSPISKKK------------------- -SHS-ST---DFGRRNYSL--YINRVLKEVVPQRSISSRTLDVMNTLIKDIFERISVEARS -LMC----FRNRCTLTPEDIQKAVYLLLPGKLAKYAVAFGSEAVQRYV------------- --QS--------- ->subH2B_Monodelphis_domestica_XP_001366286.1 XP_001366286.1 histone: H2B variant: subH2B organism: Monodelphis domestica ------------------------------------------------------------- ----MTKAVR--------------------SNESQKLTTNQE------KK----------- -KKKKKK---LLLSRNYSL--YTHRVLKEVIPNQGLTYKTTEIMNSMINNILERIAEEAGN -LLC----YKRHLTLGHQDIQMAVYRLLPDELAKHAVAFGTRAVTTYN------------- --DSK-------- ->H2B.W_Bos_taurus_DAA13058.1 DAA13058.1 histone: H2B variant: H2B.W organism: Bos taurus --------MGIGGSILSETSSDSYEEDVITKETGISEIEPSEKEMAKVETSKPDPYDAE-- ----PIKVET--------------SKPDPYDAEPKKAETSKPDPYDAEPK--KAKQKTAKG -RRR-RRHCHHDSFSSFAT--YFPRVLRQIHKGMSLSHDSVNILDSFVKDTFERIAEEAGR -LAG----DNKRRTITTEDIEAAVRLLLPGKLGKYAVLKATKSLITYR------------- --TCK-------- ->canonical_H2B_Trypanosoma_brucei_brucei_TREU927_XP_823266.1 XP_823266.1 histone: H2B variant: canonical_H2B organism: Trypanosoma brucei brucei TREU927 -----------------------------------------------------MA------ ----TPKSTP---------------------AKTRKEAKKTR------------------- -----RQ---RK--RTWNV--YVSRSLRSINSQMSMTSRTMKIVNSFVNDLFERIAAEAAT -IVR----VNRKRTLGARELQTAVRLVLPADLAKHAMAEGTKAVSHAS------------- --S---------- ->H2B.Z_Toxoplasma_gondii_ME49_XP_002369740.1 XP_002369740.1 histone: H2B variant: H2B.Z organism: Toxoplasma gondii ME49 ----------------------------------------------------MSG--KG-- ----PAQKSQ---------------------AAKKTAGKSLG------PR----------- -YRR-RK---RT--ESFAL--YIYKVLKQVHPETGVSKKSMSIMNSFINDIFDRLADEAVR -LIR----YNKKRTLSSREIQTAVRLLLPGELSKHAVSEGTKAVTKYT------------- --TSGA------- ->H2B.Z_Plasmodium_berghei_ANKA_XP_678689.1 XP_678689.1 histone: H2B variant: H2B.Z organism: Plasmodium berghei ANKA ----------------------------------------------------MSG--KG-- ----PAQKSQ---------------------AAKKTAGKTLG------PR----------- -HKR-KR---RT--ESFSL--YIFKVLKQVHPETGVTKKSMNIMNSFINDIFDRLVTEATR -LIR----YNKKRTLSSREIQTAVRLLLPGELSKHAVSEGTKAVTKYT------------- --TSGA------- ->H2B.Z_Plasmodium_falciparum_3D7_XP_001349046.1 XP_001349046.1 histone: H2B variant: H2B.Z organism: Plasmodium falciparum 3D7 ----------------------------------------------------MSG--KG-- ----PAQKSQ---------------------AAKKTAGKTLG------PR----------- -HKR-KR---RT--ESFSL--YIFKVLKQVHPETGVTKKSMNIMNSFINDIFDRLVTEATR -LIR----YNKKRTLSSREIQTAVRLLLPGELSKHAVSEGTKAVTKYT------------- --TSAA------- ->canonical_H2B_Saccharomyces_cerevisiae_S288C_NP_009553.1 NP_009553.1 histone: H2B variant: canonical_H2B organism: Saccharomyces cerevisiae S288C --------------------------------------------------MSSAAEKK--- ----PASKAP--------------AEK----KPAAKKTSTSV-----------DGK----- -KRS-KV---RK--ETYSS--YIYKVLKQTHPDTGISQKSMSILNSFVNDIFERIATEASK -LAA----YNKKSTISAREIQTAVRLILPGELAKHAVSEGTRAVTKYS------------- --SSTQA------ ->canonical_H2B_Saccharomyces_cerevisiae_S288C_NP_010510.3 NP_010510.3 histone: H2B variant: canonical_H2B organism: Saccharomyces cerevisiae S288C --------------------------------------------------MSAKAEKK--- ----PASKAP--------------AEK----KPAAKKTSTST-----------DGK----- -KRS-KA---RK--ETYSS--YIYKVLKQTHPDTGISQKSMSILNSFVNDIFERIATEASK -LAA----YNKKSTISAREIQTAVRLILPGELAKHAVSEGTRAVTKYS------------- --SSTQA------ ->canonical_H2B_Arabidopsis_thaliana_Q9LZ45.3 Q9LZ45.3 histone: H2B variant: canonical_H2B organism: Arabidopsis thaliana ----------------------------------------------------MAP------ ----KAEKKP--------------AEK----APAPKAEKKIA------KE---GGTSEIVK -KKK-KT---KKSTETYKI--YIFKVLKQVHPDIGISGKAMGIMNSFINDIFEKLAQESSR -LAR----YNKKPTITSREIQTAVRLVLPGELAKHAVSEGTKAVTKFT------------- --SS--------- ->canonical_H2B_Arabidopsis_thaliana_Q9FFC0.3 Q9FFC0.3 histone: H2B variant: canonical_H2B organism: Arabidopsis thaliana -----------------------------------------------MAKADKKPAEKK-- ----PAEKTPA-------------AEPAAAAEKKPKAGKKLP------KEPAGAGD----- -KKKKRS---KKNVETYKI--YIFKVLKQVHPDIGISSKAMGIMNSFINDIFEKLAGESSK -LAR----YNKKPTITSREIQTAVRLVLPGELAKHAVSEGTKAVTKFT------------- --SS--------- ->canonical_H2B_Arabidopsis_thaliana_Q9LQQ4.3 Q9LQQ4.3 histone: H2B variant: canonical_H2B organism: Arabidopsis thaliana ----------------------------------------------MAPRAEKKPAEKKTA -AERPVEENK--------------AAEKAPAEKKPKAGKKLP------PK--EAGD----- -KKKKRS---KKNVETYKI--YIFKVLKQVHPDIGISSKAMGIMNSFINDIFEKLAQESSK -LAR----YNKKPTITSREIQTAVRLVLPGELAKHAVSEGTKAVTKFT------------- --SS--------- ->canonical_H2B_Arabidopsis_thaliana_Q9LZT0.3 Q9LZT0.3 histone: H2B variant: canonical_H2B organism: Arabidopsis thaliana ----------------------------------------------MAPKAEKKPAEKK-- ----PVEEKS--------------KAEKAPAEKKPKAGKKLP------KEAGAGGD----- -KKKKMK---KKSVETYKI--YIFKVLKQVHPDIGISSKAMGIMNSFINDIFEKLASESSK -LAR----YNKKPTITSREIQTAVRLVLPGELAKHAVSEGTKAVTKFT------------- --SS--------- ->sperm_H2B_Psammechinus_miliaris_Q27749.3 Q27749.3 histone: H2B variant: sperm_H2B organism: Psammechinus miliaris -----------------------------------------------------MPSQKS-- ----PTKRSPTK------------RSPQKGGKGAKRGGKAGK------RR---RGVAV--- -KRR-RR---RR--ESYGI--YIYKVLKQVHPDTGISSRAMSVMNSFVNDVFERIASEAGR -LTT----YNRRNTVSSREVQTAVRLLLPGELAKHAVSEGTKAVTKYT------------- --TSR-------- ->sperm_H2B_Strongylocentrotus_purpuratus_NP_999706.1 NP_999706.1 histone: H2B variant: sperm_H2B organism: Strongylocentrotus purpuratus -----------------------------------------------------MPSQRS-- ----PTKRSP---------------TKRSPQKGAGKGGKGSK------RGGKARRRGGAAV -RRR-RR---RR--ESYGI--YIYKVLKQVHPDTGISSRGMSIMNSFVNDVFERVAAEASR -LTK----YNRRSTVSSREIQTAVRLLLPGELAKHAVSEGTKAVTKYT------------- --TSR-------- ->sperm_H2B_Strongylocentrotus_purpuratus_NP_999721.1 NP_999721.1 histone: H2B variant: sperm_H2B organism: Strongylocentrotus purpuratus -----------------------------------------------------MP--RS-- ----PSKTSPRKGSPRRGSPSRKASPK----RGGKGAKRAGK-----------GGRRRNVV -RRR-RR---RR--ESYGI--YIYKVLKQVHPDTGISSRGMSVMNSFVNDIFGRIAGEASR -LTR----ANRRSTISSREIQTAVRLLLPGELAKHAVSEGTKAVTKYT------------- --TSR-------- ->sperm_H2B_Parechinus_angulosus_P02291.2 P02291.2 histone: H2B variant: sperm_H2B organism: Parechinus angulosus -----------------------------------------------------MP--RS-- ----PAKTSPRKGSPRKGSPSRKASPK----RGGKGAKRAGK-----------GGRRRRVV -KRR-RR---RR--ESYGI--YIYKVLKQVHPDTGISSRAMSVMNSFVNDVFERIAGEASR -LTS----ANRRSTVSSREIQTAVRLLLPGELAKHAVSEGTKAVTKYT------------- --TSR-------- ->sperm_H2B_Psammechinus_miliaris_Q27750.3 Q27750.3 histone: H2B variant: sperm_H2B organism: Psammechinus miliaris -----------------------------------------------------MP--KS-- ----PSKSSPRKG-----------SPR----KGSPRKGSPKRGGKGAKRA--GKGGRRNVV -KRR-RR---RR--ESYGI--YIYKVLKQVHPDTGISSRGMSVMNSFVNDVFERIAGEASR -LTS----ANRRSTISSREIQTAVRLLLPGELAKHAVSEGTKAVTKYT------------- --TARR------- ->canonical_H2B_Psammechinus_miliaris_AAA30025.1 AAA30025.1 histone: H2B variant: canonical_H2B organism: Psammechinus miliaris ------------------------------------------------------------- ----MAPTAQ--------------VAK----KGSKKAVKAPR------PS---GGK----- -KRN-RK---RK--ESYGI--YIYKVLKQVHPDTGISSRAMIIMNSFVNDIFERIAGESSR -LAQ----YNKKSTISSREIQTAVRLILPGELAKHAVSEGTKAVTKYT------------- --TSK-------- ->canonical_H2B_Strongylocentrotus_purpuratus_NP_999710.2 NP_999710.2 histone: H2B variant: canonical_H2B organism: Strongylocentrotus purpuratus ------------------------------------------------------------- ----MAPTAQ--------------VAK----KGSKKAVKGTK------TA--XGGK----- -KRN-RK---RK--ESYGI--YIYKVLKQVHPDTGISSRAMVIMNSXVBDIFERIAGESSR -LAQ----YNKKXTXSSREIQTAVRLILPGELAKHAVSEGTKAVTKYT------------- --TSK-------- ->canonical_H2B_Caenorhabditis_elegans_NP_505464.1 NP_505464.1 histone: H2B variant: canonical_H2B organism: Caenorhabditis elegans -----------------------------------------------------MA------ ----PPKPSA---------------------KGAKKAAKTVS------KP--KDGK----- -KRK-HA---RK--ESYSV--YIYRVLKQVHPDTGVSSKAMSIMNSFVNDVFERIASEASR -LAH----YNKRSTISSREIQTAVRLILPGELAKHAVSEGTKAVTKYT------------- --SSK-------- ->canonical_H2B_Drosophila_melanogaster_NP_724342.1 NP_724342.1 histone: H2B variant: canonical_H2B organism: Drosophila melanogaster -----------------------------------------------------MP------ ----PKTSGK--------------AAK----KAGK--AQKNI------TK---TDK----- -KKK-RK---RK--ESYAI--YIYKVLKQVHPDTGISSKAMSIMNSFVNDIFERIAAEASR -LAH----YNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYT------------- --SSK-------- ->canonical_H2B_Xenopus_laevis_NP_001086753.1 NP_001086753.1 histone: H2B variant: canonical_H2B organism: Xenopus laevis -----------------------------------------------------MP--E--- ----PAKSAP--------------APK----KGSKKAVTKTQ------KK---DGK----- -KRR-KS---RK--ESYAI--YVYKVLKQVHPDTGISSKAMSIMNSFVNDVFERIAGEASR -LAH----YNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYT------------- --SAK-------- ->canonical_H2B_Homo_sapiens_NP_778225.1 NP_778225.1 histone: H2B variant: canonical_H2B organism: Homo sapiens -----------------------------------------------------MP--D--- ----PSKSAP--------------APK----KGSKKAVTKAQ------KK---DGK----- -KRK-RG---RK--ESYSI--YVYKVLKQVHPDTGISSKAMGIMNSFVNDIFERIASEASR -LAH----YNKRSTITSREVQTAVRLLLPGELAKHAVSEGTKAVTKYT------------- --SSK-------- ->canonical_H2B_Gallus_gallus_NP_001073188.1 NP_001073188.1 histone: H2B variant: canonical_H2B organism: Gallus gallus -----------------------------------------------------MP--E--- ----PAKSAP--------------APK----KGSKKAVTKTQ------KK---GDK----- -KRK-KS---RK--ESYSI--YVYKVLKQVHPDTGISSKAMGIMNSFVNDIFERIAGEASR -LAH----YNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYT------------- --SSK-------- ->canonical_H2B_Homo_sapiens_NP_003512.1 NP_003512.1 histone: H2B variant: canonical_H2B organism: Homo sapiens -----------------------------------------------------MP--E--- ----PVKSAP--------------VPK----KGSKKAINKAQ------KK---DGK----- -KRK-RS---RK--ESYSV--YVYKVLKQVHPDTGISSKAMGIMNSFVNDIFERIAGEASR -LAH----YNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYT------------- --SSK-------- ->canonical_H2B_Rattus_norvegicus_NP_072173.2 NP_072173.2 histone: H2B variant: canonical_H2B organism: Rattus norvegicus -----------------------------------------------------MP--E--- ----PAKSRP--------------APK----KGSKKAVTKAQ------KK---DGK----- -ERK-RS---RK--ESYSV--YVYKVLKQVHPDTGISSKAMGIMNSFVNDIFERIAGE-RR -LAH----YNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYT------------- --SSK-------- ->canonical_H2B_Mus_musculus_NP_783595.1 NP_783595.1 histone: H2B variant: canonical_H2B organism: Mus musculus -----------------------------------------------------MP--E--- ----PSKSAP--------------APK----KGSKKAISKAQ------KK---DGK----- -KRK-RS---RK--ESYSV--YVYKVLKQVHPDTGISSKAMGIMNSFVNDIFERIASEASR -LAH----YNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYT------------- --SSK-------- ->canonical_H2B_Homo_sapiens_NP_003510.1 NP_003510.1 histone: H2B variant: canonical_H2B organism: Homo sapiens -----------------------------------------------------MP--E--- ----LAKSAP--------------APK----KGSKKAVTKAQ------KK---DGK----- -KRK-RS---RK--ESYSV--YVYKVLKQVHPDTGISSKAMGIMNSFVNDIFERIASEASR -LAH----YNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYT------------- --SSK-------- ->canonical_H2B_Monodelphis_domestica_XP_007485418.1 XP_007485418.1 histone: H2B variant: canonical_H2B organism: Monodelphis domestica -----------------------------------------------------MP--E--- ----PGKSAP--------------APK----KGSKKAVTKAQ------KK---DGK----- -KRK-RS---RK--ESYSV--YVYKVLKQVHPDTGISSKAMGIMNSFVNDIFERIASEASR -LAH----YNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYT------------- --SSK-------- ->H2B.S_Homo_sapiens_NP_059141.1 NP_059141.1 histone: H2B variant: H2B.S organism: Homo sapiens -----------------------------------------------------MP--E--- ----PAKSAP--------------APK----KGSKKAVTKAQ------KK---DGR----- -KRK-RS---RK--ESYSV--YVYKVLKQVHPDTGISSKAMGIMNSFVNDIFERIAGEASR -LPH----YNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYT------------- --SAK-------- ->canonical_H2B_Homo_sapiens_NP_066406.1 NP_066406.1 histone: H2B variant: canonical_H2B organism: Homo sapiens -----------------------------------------------------MP--E--- ----PSKSAP--------------APK----KGSKKAITKAQ------KK---DGK----- -KRK-RS---RK--ESYSI--YVYKVLKQVHPDTGISSKAMGIMNSFVNDIFERIAGEASR -LAH----YNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYT------------- --SSK-------- ->canonical_H2B_Ailuropoda_melanoleuca_XP_011216221.1 XP_011216221.1 histone: H2B variant: canonical_H2B organism: Ailuropoda melanoleuca -----------------------------------------------------MP--D--- ----PAKSAP--------------APK----KGSKKAVTKVQ------KK---DGK----- -KRK-RS---RK--ESYSV--YVYKVLKQVHPDTGISSKAMGIMNSFVNDIFERIAGEASR -LAH----YNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYT------------- --SSK-------- ->canonical_H2B_Homo_sapiens_NP_001019770.1 NP_001019770.1 histone: H2B variant: canonical_H2B organism: Homo sapiens -----------------------------------------------------MP--D--- ----PAKSAP--------------APK----KGSKKAVTKVQ------KK---DGK----- -KRK-RS---RK--ESYSV--YVYKVLKQVHPDTGISSKAMGIMNSFVNDIFERIAGEASR -LAH----YNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYT------------- --SSK-------- ->canonical_H2B_Homo_sapiens_NP_001154806.1 NP_001154806.1 histone: H2B variant: canonical_H2B organism: Homo sapiens -----------------------------------------------------MP--D--- ----PAKSAP--------------APK----KGSKKAVTKVQ------KK---DGK----- -KRK-RS---RK--ESYSV--YVYKVLKQVHPDTGISSKAMGIMNSFVNDIFERIAGEASR -LAH----YNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYT------------- --SSKLIGPILWK ->canonical_H2B_Homo_sapiens_NP_003515.1 NP_003515.1 histone: H2B variant: canonical_H2B organism: Homo sapiens -----------------------------------------------------MP--D--- ----PAKSAP--------------APK----KGSKKAVTKAQ------KK---DGK----- -KRK-RS---RK--ESYSV--YVYKVLKQVHPDTGISSKAMGIMNSFVNDIFERIAGEASR -LAH----YNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYT------------- --SSK-------- ->canonical_H2B_Sus_scrofa_XP_005665716.1 XP_005665716.1 histone: H2B variant: canonical_H2B organism: Sus scrofa -----------------------------------------------------MP--D--- ----PAKSAP--------------APK----KGSKKAVTKAQ------KK---DGK----- -KRK-RS---RK--ESYSV--YVYKVLKQVHPDTGISSKAMGIMNSFVNDIFERIAGEASR -LAH----YNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYT------------- --SSK-------- ->canonical_H2B_Homo_sapiens_NP_066407.1 NP_066407.1 histone: H2B variant: canonical_H2B organism: Homo sapiens -----------------------------------------------------MP--E--- ----PTKSAP--------------APK----KGSKKAVTKAQ------KK---DGK----- -KRK-RS---RK--ESYSV--YVYKVLKQVHPDTGISSKAMGIMNSFVNDIFERIAGEASR -LAH----YNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYT------------- --SSK-------- ->canonical_H2B_Homo_sapiens_NP_066402.2 NP_066402.2 histone: H2B variant: canonical_H2B organism: Homo sapiens -----------------------------------------------------MP--E--- ----PAKSAP--------------APK----KGSKKAVTKAQ------KK---DGK----- -KRK-RS---RK--ESYSI--YVYKVLKQVHPDTGISSKAMGIMNSFVNDIFERIAGEASR -LAH----YNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYT------------- --SAK-------- ->canonical_H2B_Loxodonta_africana_XP_003422331.1 XP_003422331.1 histone: H2B variant: canonical_H2B organism: Loxodonta africana -----------------------------------------------------MP--E--- ----PAKSAP--------------APK----KGSKKAVTKAQ------KK---DGK----- -KRK-RS---RK--ESYSI--YVYKVLKQVHPDTGISSKAMGIMNSFVNDIFERIAGEASR -LAH----YNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYT------------- --SAK-------- ->canonical_H2B_Homo_sapiens_NP_003518.2 NP_003518.2 histone: H2B variant: canonical_H2B organism: Homo sapiens -----------------------------------------------------MP--D--- ----PAKSAP--------------APK----KGSKKAVTKAQ------KK---DGK----- -KRK-RS---RK--ESYSI--YVYKVLKQVHPDTGISSKAMGIMNSFVNDIFERIAGEASR -LAH----YNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYT------------- --SSK-------- ->canonical_H2B_Homo_sapiens_NP_003511.1 NP_003511.1 histone: H2B variant: canonical_H2B organism: Homo sapiens -----------------------------------------------------MP--E--- ----PSKSAP--------------APK----KGSKKAVTKAQ------KK---DGK----- -KRK-RS---RK--ESYSV--YVYKVLKQVHPDTGISSKAMGIMNSFVNDIFERIAGEASR -LAH----YNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYT------------- --SSK-------- ->canonical_H2B_Equus_caballus_XP_005610255.1 XP_005610255.1 histone: H2B variant: canonical_H2B organism: Equus caballus -----------------------------------------------------MP--E--- ----PAKSAP--------------APK----KGSKKAVTKAQ------KK---DGK----- -KRK-RS---RK--ESYSI--YVYKVLKQVHPDTGISSKAMGIMNSFVNDIFERIAGEASR -LAH----YNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYT------------- --SSK-------- ->canonical_H2B_Homo_sapiens_NP_003519.1 NP_003519.1 histone: H2B variant: canonical_H2B organism: Homo sapiens -----------------------------------------------------MP--E--- ----PAKSAP--------------APK----KGSKKAVTKAQ------KK---DGK----- -KRK-RS---RK--ESYSI--YVYKVLKQVHPDTGISSKAMGIMNSFVNDIFERIAGEASR -LAH----YNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYT------------- --SSK-------- ->canonical_H2B_Bos_taurus_NP_001032546.1 NP_001032546.1 histone: H2B variant: canonical_H2B organism: Bos taurus -----------------------------------------------------MP--E--- ----PAKSAP--------------APK----KGSKKAVTKAQ------KK---DGK----- -KRK-RS---RK--ESYSV--YVYKVLKQVHPDTGISSKAMGIMNSFVNDIFERIAGEASR -LAH----YNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYT------------- --SSK-------- ->canonical_H2B_Cavia_porcellus_XP_013006892.1 XP_013006892.1 histone: H2B variant: canonical_H2B organism: Cavia porcellus -----------------------------------------------------MP--E--- ----PAKSAP--------------APK----KGSKKAVTKAQ------KK---DGK----- -KRK-RS---RK--ESYSV--YVYKVLKQVHPDTGISSKAMGIMNSFVNDIFERIAGEASR -LAH----YNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYT------------- --SSK-------- ->canonical_H2B_Heterocephalus_glaber_XP_012928647.1 XP_012928647.1 histone: H2B variant: canonical_H2B organism: Heterocephalus glaber -----------------------------------------------------MP--E--- ----PAKSAP--------------APK----KGSKKAVTKAQ------KK---DGK----- -KRK-RS---RK--ESYSV--YVYKVLKQVHPDTGISSKAMGIMNSFVNDIFERIAGEASR -LAH----YNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYT------------- --SSK-------- ->canonical_H2B_Homo_sapiens_NP_003509.1 NP_003509.1 histone: H2B variant: canonical_H2B organism: Homo sapiens -----------------------------------------------------MP--E--- ----PAKSAP--------------APK----KGSKKAVTKAQ------KK---DGK----- -KRK-RS---RK--ESYSV--YVYKVLKQVHPDTGISSKAMGIMNSFVNDIFERIAGEASR -LAH----YNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYT------------- --SSK-------- ->canonical_H2B_Homo_sapiens_NP_003513.1 NP_003513.1 histone: H2B variant: canonical_H2B organism: Homo sapiens -----------------------------------------------------MP--E--- ----PAKSAP--------------APK----KGSKKAVTKAQ------KK---DGK----- -KRK-RS---RK--ESYSV--YVYKVLKQVHPDTGISSKAMGIMNSFVNDIFERIAGEASR -LAH----YNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYT------------- --SSK-------- ->canonical_H2B_Homo_sapiens_NP_003514.2 NP_003514.2 histone: H2B variant: canonical_H2B organism: Homo sapiens -----------------------------------------------------MP--E--- ----PAKSAP--------------APK----KGSKKAVTKAQ------KK---DGK----- -KRK-RS---RK--ESYSV--YVYKVLKQVHPDTGISSKAMGIMNSFVNDIFERIAGEASR -LAH----YNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYT------------- --SSK-------- ->canonical_H2B_Homo_sapiens_NP_003516.1 NP_003516.1 histone: H2B variant: canonical_H2B organism: Homo sapiens -----------------------------------------------------MP--E--- ----PAKSAP--------------APK----KGSKKAVTKAQ------KK---DGK----- -KRK-RS---RK--ESYSV--YVYKVLKQVHPDTGISSKAMGIMNSFVNDIFERIAGEASR -LAH----YNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYT------------- --SSK-------- ->canonical_H2B_Homo_sapiens_NP_003517.2 NP_003517.2 histone: H2B variant: canonical_H2B organism: Homo sapiens -----------------------------------------------------MP--E--- ----PAKSAP--------------APK----KGSKKAVTKAQ------KK---DGK----- -KRK-RS---RK--ESYSV--YVYKVLKQVHPDTGISSKAMGIMNSFVNDIFERIAGEASR -LAH----YNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYT------------- --SSK-------- ->canonical_H2B_Macaca_nemestrina_XP_011741102.1 XP_011741102.1 histone: H2B variant: canonical_H2B organism: Macaca nemestrina -----------------------------------------------------MP--E--- ----PAKSAP--------------APK----KGSKKAVTKAQ------KK---DGK----- -KRK-RS---RK--ESYSV--YVYKVLKQVHPDTGISSKAMGIMNSFVNDIFERIAGEASR -LAH----YNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYT------------- --SSK-------- ->canonical_H2B_Oryctolagus_cuniculus_XP_008246696.1 XP_008246696.1 histone: H2B variant: canonical_H2B organism: Oryctolagus cuniculus -----------------------------------------------------MP--E--- ----PAKSAP--------------APK----KGSKKAVTKAQ------KK---DGK----- -KRK-RS---RK--ESYSV--YVYKVLKQVHPDTGISSKAMGIMNSFVNDIFERIAGEASR -LAH----YNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYT------------- --SSK-------- ->canonical_H2B_Cricetulus_griseus_XP_007634673.1 XP_007634673.1 histone: H2B variant: canonical_H2B organism: Cricetulus griseus -----------------------------------------------------MP--E--- ----PAKSAP--------------APK----KGSKKAVTKAQ------KK---DGK----- -KRK-RS---RK--ESYSV--YVYKVLKQVHPDTGISSKAMGIMNSFVNDIFERIAGEASR -LAH----YNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYT------------- --SAK-------- ->canonical_H2B_Homo_sapiens_NP_001299582.1 NP_001299582.1 histone: H2B variant: canonical_H2B organism: Homo sapiens -----------------------------------------------------MP--E--- ----PAKSAP--------------APK----KGSKKAVTKAQ------KK---DGK----- -KRK-RS---RK--ESYSV--YVYKVLKQVHPDTGISSKAMGIMNSFVNDIFERIAGEASR -LAH----YNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYT------------- --SAK-------- ->H2B.1_Canis_lupus_familiaris_XP_005640164.1 XP_005640164.1 histone: H2B variant: H2B.1 organism: Canis lupus familiaris -----------------------------------------------------MP--EL-- ----TSKGTT--------------ISK----KGFKRAVAKTQ------KK---EGK----- -KRR-RC---RK--ESYSI--YIYKVLKQVHPDTGISSKAMGIMNSFVNDIFERIAGEASR -LAH----YNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYT------------- --SSK-------- ->H2B.1_Homo_sapiens_NP_733759.1 NP_733759.1 histone: H2B variant: H2B.1 organism: Homo sapiens -----------------------------------------------------MP--EV-- ----SSKGAT--------------ISK----KGFKKAVVKTQ------KK---EGK----- -KRK-RT---RK--ESYSI--YIYKVLKQVHPDTGISSKAMSIMNSFVTDIFERIASEASR -LAH----YSKRSTISSREIQTAVRLLLPGELAKHAVSEGTKAVTKYT------------- --SSK-------- ->H2B.1_Mus_musculus_NP_783594.1 NP_783594.1 histone: H2B variant: H2B.1 organism: Mus musculus -----------------------------------------------------MP--EV-- ----AVKGAT--------------ISK----KGFKKAVTKTQ------KK---EGR----- -KRK-RC---RK--ESYSI--YIYKVLKQVHPDTGISSKAMSIMNSFVTDIFERIASEASR -LAH----YNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYT------------- --SSK-------- ->H2B.1_Rattus_norvegicus_NP_072169.1 NP_072169.1 histone: H2B variant: H2B.1 organism: Rattus norvegicus -----------------------------------------------------MP--EV-- ----SAKGTT--------------ISK----KGFKKAVTKTQ------KK---EGR----- -KRK-RC---RE--ESYSI--YIYKVLKQVHPDTGISSKAMSIMNSFVTDIFERIAGEASR -LAH----YNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYT------------- --SSK-------- ->H2B.W_Macaca_mulatta_NP_001180847.1 NP_001180847.1 histone: H2B variant: H2B.W organism: Macaca mulatta ---------------------------------MLRTQVPPLLRSTTAIVWSCRVMAAASA -MAEPSSETT--------------SEEQLITQEPKEANSTMAQKQSKQRK---RGR----- -RGPCRCHANCRG-DSFAT--YFRRVLKQVHQGLSLSREAVSVMDSLVHDILDRIATEAGR -LAR----STKRQTITAWETRIAVRLLLPGEMGKLAESEGTKAVLRTSLYAVQQ------- --QRK-------- ->H2B.W_Homo_sapiens_NP_001002916.4 NP_001002916.4 histone: H2B variant: H2B.W organism: Homo sapiens -----------------------------------------------------MAG----- ----PSSETT--------------SEEQLITQEPKEANSTTS------QKQSKQRKRGRHG -PRRCHS---NCRGDSFAT--YFRRVLKQVHQGLSLSREAVSVMDSLVHDILDRIATEAGH -LAR----STKRQTITAWETRMAVRLLLPGQMGKLAESEGTKAVLRTSLYAIQQ------- --QRK-------- ->H2B.W_Homo_sapiens_Q7Z2G1.2 Q7Z2G1.2 histone: H2B variant: H2B.W organism: Homo sapiens -------------------------MLRTEVPRLPRSTTAIVWSCHLMATASAMA--G--- ----PSSETT--------------SEEQLITQEPKEANSTTS------QK---QSKQRKRG -RHGPRRCHSNCRGDSFAT--YFRRVLKQVHQGLSLSREAVSVMDSLVHDILDRIATEAGR -LAR----STKRQTITAWETRMAVRLLLPGQMGKLAESEGTKAVLRTSLYAIQQ------- --QRK-------- ->H2B.W_Oryctolagus_cuniculus_XP_002720211.1 XP_002720211.1 histone: H2B variant: H2B.W organism: Oryctolagus cuniculus -----------------------------------------------------MA--E--- ----PASHVA---------------SEENLSLEPKTTASSTP------KE---KQP----- -RRR-RR---RRQGHNYSFASYFPKVLKHVHKGLSLSKEAKGVMDSIVRDVFERIAHEAAS -LVR----YSKHSTLTSRDVQSAVRLLLPGQLHKHADVEGTKALLKFI------------- --THP-------- ->H2B.W_Ailuropoda_melanoleuca_XP_002925981.1 XP_002925981.1 histone: H2B variant: H2B.W organism: Ailuropoda melanoleuca -----------------------------------------------------MA--E--- ----PGCETS--------------SEESLGTEEPSAANPKSP------KQ---KQKQPRRQ -CRR-RC---RRCPNSFAT--YFPRVLKQVHEGLSLSKKAVSVMDSFLKDIFEHIADEAAH -LARSTKGSNKHSTINSREIQTAVRLLLPGEIGKHTVSKATKAVIRFQ------------- --RCE-------- ->H2B.W_Canis_lupus_familiaris_XP_548517.2 XP_548517.2 histone: H2B variant: H2B.W organism: Canis lupus familiaris -MIPGKPEEGKGSSEGPICDTEVACDVRKCSDYKGASAPQQPPLSVLLSCWEGEPHMAE-- ----PGCETS--------------SEECLGTKEPREAEPETP------KR---KKP----- -RRQCHRRCRRSRSDSFAI--YFPRVLKQVDEGLSLSQKAVSIMDSFVKDIFERIADEASR -LAR----STKRSTITSREIQTAVRLLLPGEIGKYAVSEATKALMRNQVGALEKQVKNVLS -LGCG-------- +>Drosophila|NP_724342.1|cH2B_(Animals) organism=Drosophila melanogaster phylum=Arthropoda class=Insecta +--------------------------MP-P-----------------------------K +T----------------------------------------------------------- +------------------------------------------------------------ +-------------------------------------SGKAAK-----------KAGKAQ +-KNITK--TDKKK------K----RKRKE---------------SYAIYIYKVLKQV--H +PDTGISSKAMSIM-----------------NSFVNDIFERIAAEASRLAHYN----KRST +ITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK----------------------- +---------------------------------------- +>Psammechinus|AAA30025.1|cH2B_(Animals) organism=Psammechinus miliaris phylum=Echinodermata class=Echinoidea +-----------------------M--APTA-----------------------------Q +V----------------------------------------------------------- +------------------------------------------------------------ +-------------------------------------AKKGSK-----------KAVKAP +-R--PS--GGKKR------N----RKRKE---------------SYGIYIYKVLKQV--H +PDTGISSRAMIIM-----------------NSFVNDIFERIAGESSRLAQYN----KKST +ISSREIQTAVRLILPGELAKHAVSEGTKAVTKYTTSK----------------------- +---------------------------------------- +>Strongylocentrotus|NP_999710.2|cH2B_(Animals) organism=Strongylocentrotus purpuratus phylum=Echinodermata class=Echinoidea +-----------------------M--APTA-----------------------------Q +V----------------------------------------------------------- +------------------------------------------------------------ +-------------------------------------AKKGSK-----------KAVKGT +-KT-AX--GGKKR------N----RKRKE---------------SYGIYIYKVLKQV--H +PDTGISSRAMVIM-----------------NSXVBDIFERIAGESSRLAQYN----KKXT +XSSREIQTAVRLILPGELAKHAVSEGTKAVTKYTTSK----------------------- +---------------------------------------- +>Caenorhabditis|NP_505464.1|cH2B_(Animals) organism=Caenorhabditis elegans phylum=Nematoda class=Chromadorea +-----------------------M--AP-P-----------------------------K +P----------------------------------------------------------- +------------------------------------------------------------ +-------------------------------------SAKGAK-----------KAAKTV +-SK-PK--DGKKR------K----HARKE---------------SYSVYIYRVLKQV--H +PDTGVSSKAMSIM-----------------NSFVNDVFERIASEASRLAHYN----KRST +ISSREIQTAVRLILPGELAKHAVSEGTKAVTKYTSSK----------------------- +---------------------------------------- +>Xenopus|NP_001086753.1|cH2B_(Vertebrata) organism=Xenopus laevis phylum=Chordata class=Amphibia +-------------------MPE-P--AKSA-----------------------------P +A----------------------------------------------------------- +------------------------------------------------------------ +-------------------------------------PKKGSK-----------KAVTKT +-Q--KK--DGKKR------R----KSRKE---------------SYAIYVYKVLKQV--H +PDTGISSKAMSIM-----------------NSFVNDVFERIAGEASRLAHYN----KRST +ITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYTSAK----------------------- +---------------------------------------- +>Gallus|NP_001073188.1|cH2B_(Vertebrata) organism=Gallus gallus phylum=Chordata class=Aves +-------------------MPE-P--AKSA-----------------------------P +A----------------------------------------------------------- +------------------------------------------------------------ +-------------------------------------PKKGSK-----------KAVTKT +-Q--KK--GDKKR------K----KSRKE---------------SYSIYVYKVLKQV--H +PDTGISSKAMGIM-----------------NSFVNDIFERIAGEASRLAHYN----KRST +ITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK----------------------- +---------------------------------------- +>Ailuropoda|XP_011216221.1|cH2B_(Mammalia) organism=Ailuropoda melanoleuca phylum=Chordata class=Mammalia +-------------------MPD-P--AKSA-----------------------------P +A----------------------------------------------------------- +------------------------------------------------------------ +-------------------------------------PKKGSK-----------KAVTKV +-Q--KK--DGKKR------K----RSRKE---------------SYSVYVYKVLKQV--H +PDTGISSKAMGIM-----------------NSFVNDIFERIAGEASRLAHYN----KRST +ITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK----------------------- +---------------------------------------- +>Bos|NP_001032546.1|cH2B_(Mammalia) organism=Bos taurus phylum=Chordata class=Mammalia +-------------------MPE-P--AKSA-----------------------------P +A----------------------------------------------------------- +------------------------------------------------------------ +-------------------------------------PKKGSK-----------KAVTKA +-Q--KK--DGKKR------K----RSRKE---------------SYSVYVYKVLKQV--H +PDTGISSKAMGIM-----------------NSFVNDIFERIAGEASRLAHYN----KRST +ITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK----------------------- +---------------------------------------- +>Canis|XP_005640164.1|cH2B_(Mammalia) organism=Canis lupus familiaris phylum=Chordata class=Mammalia +-------------------MPELT--SKGT-----------------------------T +I----------------------------------------------------------- +------------------------------------------------------------ +-------------------------------------SKKGFK-----------RAVAKT +-Q--KK--EGKKR------R----RCRKE---------------SYSIYIYKVLKQV--H +PDTGISSKAMGIM-----------------NSFVNDIFERIAGEASRLAHYN----KRST +ITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK----------------------- +---------------------------------------- +>Cavia|XP_013006892.1|cH2B_(Mammalia) organism=Cavia porcellus phylum=Chordata class=Mammalia +-------------------MPE-P--AKSA-----------------------------P +A----------------------------------------------------------- +------------------------------------------------------------ +-------------------------------------PKKGSK-----------KAVTKA +-Q--KK--DGKKR------K----RSRKE---------------SYSVYVYKVLKQV--H +PDTGISSKAMGIM-----------------NSFVNDIFERIAGEASRLAHYN----KRST +ITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK----------------------- +---------------------------------------- +>Cricetulus|XP_007634673.1|cH2B_(Mammalia) organism=Cricetulus griseus phylum=Chordata class=Mammalia +-------------------MPE-P--AKSA-----------------------------P +A----------------------------------------------------------- +------------------------------------------------------------ +-------------------------------------PKKGSK-----------KAVTKA +-Q--KK--DGKKR------K----RSRKE---------------SYSVYVYKVLKQV--H +PDTGISSKAMGIM-----------------NSFVNDIFERIAGEASRLAHYN----KRST +ITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYTSAK----------------------- +---------------------------------------- +>Equus|XP_005610255.1|cH2B_(Mammalia) organism=Equus caballus phylum=Chordata class=Mammalia +-------------------MPE-P--AKSA-----------------------------P +A----------------------------------------------------------- +------------------------------------------------------------ +-------------------------------------PKKGSK-----------KAVTKA +-Q--KK--DGKKR------K----RSRKE---------------SYSIYVYKVLKQV--H +PDTGISSKAMGIM-----------------NSFVNDIFERIAGEASRLAHYN----KRST +ITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK----------------------- +---------------------------------------- +>Heterocephalus|XP_012928647.1|cH2B_(Mammalia) organism=Heterocephalus glaber phylum=Chordata class=Mammalia +-------------------MPE-P--AKSA-----------------------------P +A----------------------------------------------------------- +------------------------------------------------------------ +-------------------------------------PKKGSK-----------KAVTKA +-Q--KK--DGKKR------K----RSRKE---------------SYSVYVYKVLKQV--H +PDTGISSKAMGIM-----------------NSFVNDIFERIAGEASRLAHYN----KRST +ITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK----------------------- +---------------------------------------- +>Loxodonta|XP_003422331.1|cH2B_(Mammalia) organism=Loxodonta africana phylum=Chordata class=Mammalia +-------------------MPE-P--AKSA-----------------------------P +A----------------------------------------------------------- +------------------------------------------------------------ +-------------------------------------PKKGSK-----------KAVTKA +-Q--KK--DGKKR------K----RSRKE---------------SYSIYVYKVLKQV--H +PDTGISSKAMGIM-----------------NSFVNDIFERIAGEASRLAHYN----KRST +ITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYTSAK----------------------- +---------------------------------------- +>Macaca|XP_011741102.1|cH2B_(Mammalia) organism=Macaca nemestrina phylum=Chordata class=Mammalia +-------------------MPE-P--AKSA-----------------------------P +A----------------------------------------------------------- +------------------------------------------------------------ +-------------------------------------PKKGSK-----------KAVTKA +-Q--KK--DGKKR------K----RSRKE---------------SYSVYVYKVLKQV--H +PDTGISSKAMGIM-----------------NSFVNDIFERIAGEASRLAHYN----KRST +ITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK----------------------- +---------------------------------------- +>Monodelphis|XP_007485418.1|cH2B_(Mammalia) organism=Monodelphis domestica phylum=Chordata class=Mammalia +-------------------MPE-P--GKSA-----------------------------P +A----------------------------------------------------------- +------------------------------------------------------------ +-------------------------------------PKKGSK-----------KAVTKA +-Q--KK--DGKKR------K----RSRKE---------------SYSVYVYKVLKQV--H +PDTGISSKAMGIM-----------------NSFVNDIFERIASEASRLAHYN----KRST +ITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK----------------------- +---------------------------------------- +>Mus|NP_783594.1|cH2B_(Mammalia) organism=Mus musculus phylum=Chordata class=Mammalia +-------------------MPEVA--VKGA-----------------------------T +I----------------------------------------------------------- +------------------------------------------------------------ +-------------------------------------SKKGFK-----------KAVTKT +-Q--KK--EGRKR------K----RCRKE---------------SYSIYIYKVLKQV--H +PDTGISSKAMSIM-----------------NSFVTDIFERIASEASRLAHYN----KRST +ITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK----------------------- +---------------------------------------- +>Oryctolagus|XP_008246696.1|cH2B_(Mammalia) organism=Oryctolagus cuniculus phylum=Chordata class=Mammalia +-------------------MPE-P--AKSA-----------------------------P +A----------------------------------------------------------- +------------------------------------------------------------ +-------------------------------------PKKGSK-----------KAVTKA +-Q--KK--DGKKR------K----RSRKE---------------SYSVYVYKVLKQV--H +PDTGISSKAMGIM-----------------NSFVNDIFERIAGEASRLAHYN----KRST +ITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK----------------------- +---------------------------------------- +>Rattus|HISTDB_cH2B_1_0|cH2B_(Mammalia) organism=Rattus norvegicus phylum=Chordata class=Mammalia +-------------------MPEVS--AKGT-----------------------------T +I----------------------------------------------------------- +------------------------------------------------------------ +-------------------------------------SKKGFK-----------KAVTKT +-Q--KK--EGRKR------K----RCREE---------------SYSIYIYKVLKQV--H +PDTGISSKAMSIM-----------------NSFVTDIFERIAGEASRLAHYN----KRST +ITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK----------------------- +---------------------------------------- +>Rattus|NP_072169.1|cH2B_(Mammalia) organism=Rattus norvegicus phylum=Chordata class=Mammalia +-------------------MPEVS--AKGT-----------------------------T +I----------------------------------------------------------- +------------------------------------------------------------ +-------------------------------------SKKGFK-----------KAVTKT +-Q--KK--EGRKR------K----RCREE---------------SYSIYIYKVLKQV--H +PDTGISSKAMSIM-----------------NSFVTDIFERIAGEASRLAHYN----KRST +ITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK----------------------- +---------------------------------------- +>Rattus|NP_072173.2|cH2B_(Mammalia) organism=Rattus norvegicus phylum=Chordata class=Mammalia +-------------------MPE-P--AKSA-----------------------------P +A----------------------------------------------------------- +------------------------------------------------------------ +-------------------------------------PKKGSK-----------KAVTKA +-Q--KK--DGKKR------K----RSRKE---------------SYSVYVYKVLKQV--H +PDTGISSKAMGIM-----------------NSFVNDIFERIAGEASRLAHYN----KRST +ITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK----------------------- +---------------------------------------- +>Sus|XP_005665716.1|cH2B_(Mammalia) organism=Sus scrofa phylum=Chordata class=Mammalia +-------------------MPD-P--AKSA-----------------------------P +A----------------------------------------------------------- +------------------------------------------------------------ +-------------------------------------PKKGSK-----------KAVTKA +-Q--KK--DGKKR------K----RSRKE---------------SYSVYVYKVLKQV--H +PDTGISSKAMGIM-----------------NSFVNDIFERIAGEASRLAHYN----KRST +ITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK----------------------- +---------------------------------------- +>Homo|NP_003511.1|cH2B.10_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +-------------------MPE-P--SKSA-----------------------------P +A----------------------------------------------------------- +------------------------------------------------------------ +-------------------------------------PKKGSK-----------KAVTKA +-Q--KK--DGKKR------K----RSRKE---------------SYSVYVYKVLKQV--H +PDTGISSKAMGIM-----------------NSFVNDIFERIAGEASRLAHYN----KRST +ITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK----------------------- +---------------------------------------- +>Homo|NP_003518.2|cH2B.11_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +-------------------MPD-P--AKSA-----------------------------P +A----------------------------------------------------------- +------------------------------------------------------------ +-------------------------------------PKKGSK-----------KAVTKA +-Q--KK--DGKKR------K----RSRKE---------------SYSIYVYKVLKQV--H +PDTGISSKAMGIM-----------------NSFVNDIFERIAGEASRLAHYN----KRST +ITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK----------------------- +---------------------------------------- +>Homo|NP_001019770.1|cH2B.12_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +-------------------MPD-P--AKSA-----------------------------P +A----------------------------------------------------------- +------------------------------------------------------------ +-------------------------------------PKKGSK-----------KAVTKV +-Q--KK--DGKKR------K----RSRKE---------------SYSVYVYKVLKQV--H +PDTGISSKAMGIM-----------------NSFVNDIFERIAGEASRLAHYN----KRST +ITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK----------------------- +---------------------------------------- +>Homo|NP_001154806.1|cH2B.12_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +-------------------MPD-P--AKSA-----------------------------P +A----------------------------------------------------------- +------------------------------------------------------------ +-------------------------------------PKKGSK-----------KAVTKV +-Q--KK--DGKKR------K----RSRKE---------------SYSVYVYKVLKQV--H +PDTGISSKAMGIM-----------------NSFVNDIFERIAGEASRLAHYN----KRST +ITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYTSSKLIGPILWK--------------- +---------------------------------------- +>Homo|NP_003519.1|cH2B.13_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +-------------------MPE-P--AKSA-----------------------------P +A----------------------------------------------------------- +------------------------------------------------------------ +-------------------------------------PKKGSK-----------KAVTKA +-Q--KK--DGKKR------K----RSRKE---------------SYSIYVYKVLKQV--H +PDTGISSKAMGIM-----------------NSFVNDIFERIAGEASRLAHYN----KRST +ITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK----------------------- +---------------------------------------- +>Homo|NP_778225.1|cH2B.14_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +-------------------MPD-P--SKSA-----------------------------P +A----------------------------------------------------------- +------------------------------------------------------------ +-------------------------------------PKKGSK-----------KAVTKA +-Q--KK--DGKKR------K----RGRKE---------------SYSIYVYKVLKQV--H +PDTGISSKAMGIM-----------------NSFVNDIFERIASEASRLAHYN----KRST +ITSREVQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK----------------------- +---------------------------------------- +>Homo|NP_733759.1|cH2B.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +-------------------MPEVS--SKGA-----------------------------T +I----------------------------------------------------------- +------------------------------------------------------------ +-------------------------------------SKKGFK-----------KAVVKT +-Q--KK--EGKKR------K----RTRKE---------------SYSIYIYKVLKQV--H +PDTGISSKAMSIM-----------------NSFVTDIFERIASEASRLAHYS----KRST +ISSREIQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK----------------------- +---------------------------------------- +>Homo|NP_066406.1|cH2B.2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +-------------------MPE-P--SKSA-----------------------------P +A----------------------------------------------------------- +------------------------------------------------------------ +-------------------------------------PKKGSK-----------KAITKA +-Q--KK--DGKKR------K----RSRKE---------------SYSIYVYKVLKQV--H +PDTGISSKAMGIM-----------------NSFVNDIFERIAGEASRLAHYN----KRST +ITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK----------------------- +---------------------------------------- +>Homo|NP_003509.1|cH2B.3_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +-------------------MPE-P--AKSA-----------------------------P +A----------------------------------------------------------- +------------------------------------------------------------ +-------------------------------------PKKGSK-----------KAVTKA +-Q--KK--DGKKR------K----RSRKE---------------SYSVYVYKVLKQV--H +PDTGISSKAMGIM-----------------NSFVNDIFERIAGEASRLAHYN----KRST +ITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK----------------------- +---------------------------------------- +>Homo|NP_003513.1|cH2B.3_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +-------------------MPE-P--AKSA-----------------------------P +A----------------------------------------------------------- +------------------------------------------------------------ +-------------------------------------PKKGSK-----------KAVTKA +-Q--KK--DGKKR------K----RSRKE---------------SYSVYVYKVLKQV--H +PDTGISSKAMGIM-----------------NSFVNDIFERIAGEASRLAHYN----KRST +ITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK----------------------- +---------------------------------------- +>Homo|NP_003514.2|cH2B.3_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +-------------------MPE-P--AKSA-----------------------------P +A----------------------------------------------------------- +------------------------------------------------------------ +-------------------------------------PKKGSK-----------KAVTKA +-Q--KK--DGKKR------K----RSRKE---------------SYSVYVYKVLKQV--H +PDTGISSKAMGIM-----------------NSFVNDIFERIAGEASRLAHYN----KRST +ITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK----------------------- +---------------------------------------- +>Homo|NP_003516.1|cH2B.3_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +-------------------MPE-P--AKSA-----------------------------P +A----------------------------------------------------------- +------------------------------------------------------------ +-------------------------------------PKKGSK-----------KAVTKA +-Q--KK--DGKKR------K----RSRKE---------------SYSVYVYKVLKQV--H +PDTGISSKAMGIM-----------------NSFVNDIFERIAGEASRLAHYN----KRST +ITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK----------------------- +---------------------------------------- +>Homo|NP_003517.2|cH2B.3_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +-------------------MPE-P--AKSA-----------------------------P +A----------------------------------------------------------- +------------------------------------------------------------ +-------------------------------------PKKGSK-----------KAVTKA +-Q--KK--DGKKR------K----RSRKE---------------SYSVYVYKVLKQV--H +PDTGISSKAMGIM-----------------NSFVNDIFERIAGEASRLAHYN----KRST +ITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK----------------------- +---------------------------------------- +>Homo|NP_066407.1|cH2B.4_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +-------------------MPE-P--TKSA-----------------------------P +A----------------------------------------------------------- +------------------------------------------------------------ +-------------------------------------PKKGSK-----------KAVTKA +-Q--KK--DGKKR------K----RSRKE---------------SYSVYVYKVLKQV--H +PDTGISSKAMGIM-----------------NSFVNDIFERIAGEASRLAHYN----KRST +ITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK----------------------- +---------------------------------------- +>Homo|NP_003515.1|cH2B.5_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +-------------------MPD-P--AKSA-----------------------------P +A----------------------------------------------------------- +------------------------------------------------------------ +-------------------------------------PKKGSK-----------KAVTKA +-Q--KK--DGKKR------K----RSRKE---------------SYSVYVYKVLKQV--H +PDTGISSKAMGIM-----------------NSFVNDIFERIAGEASRLAHYN----KRST +ITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK----------------------- +---------------------------------------- +>Homo|NP_066402.2|cH2B.6_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +-------------------MPE-P--AKSA-----------------------------P +A----------------------------------------------------------- +------------------------------------------------------------ +-------------------------------------PKKGSK-----------KAVTKA +-Q--KK--DGKKR------K----RSRKE---------------SYSIYVYKVLKQV--H +PDTGISSKAMGIM-----------------NSFVNDIFERIAGEASRLAHYN----KRST +ITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYTSAK----------------------- +---------------------------------------- +>Homo|NP_001299582.1|cH2B.7_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +-------------------MPE-P--AKSA-----------------------------P +A----------------------------------------------------------- +------------------------------------------------------------ +-------------------------------------PKKGSK-----------KAVTKA +-Q--KK--DGKKR------K----RSRKE---------------SYSVYVYKVLKQV--H +PDTGISSKAMGIM-----------------NSFVNDIFERIAGEASRLAHYN----KRST +ITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYTSAK----------------------- +---------------------------------------- +>Homo|NP_003510.1|cH2B.8_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +-------------------MPE-L--AKSA-----------------------------P +A----------------------------------------------------------- +------------------------------------------------------------ +-------------------------------------PKKGSK-----------KAVTKA +-Q--KK--DGKKR------K----RSRKE---------------SYSVYVYKVLKQV--H +PDTGISSKAMGIM-----------------NSFVNDIFERIASEASRLAHYN----KRST +ITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK----------------------- +---------------------------------------- +>Homo|NP_003512.1|cH2B.9_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +-------------------MPE-P--VKSA-----------------------------P +V----------------------------------------------------------- +------------------------------------------------------------ +-------------------------------------PKKGSK-----------KAINKA +-Q--KK--DGKKR------K----RSRKE---------------SYSVYVYKVLKQV--H +PDTGISSKAMGIM-----------------NSFVNDIFERIAGEASRLAHYN----KRST +ITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK----------------------- +---------------------------------------- +>Mus|NP_783595.1|cH2B_(Mus_musculus) organism=Mus musculus phylum=Chordata class=Mammalia +-------------------MPEPS---KSA-----------------------------P +A----------------------------------------------------------- +------------------------------------------------------------ +-------------------------------------PKKGSK-----------KAISKA +-Q--KK--DGKKR------K----RSRKE---------------SYSVYVYKVLKQV--H +PDTGISSKAMGIM-----------------NSFVNDIFERIASEASRLAHYN----KRST +ITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK----------------------- +---------------------------------------- +>Mus|CAA62299.1|cH2B.1_(Mus_musculus) organism=Mus musculus phylum=Chordata class=Mammalia +-------------------MPEVA--VKGA-----------------------------T +I----------------------------------------------------------- +------------------------------------------------------------ +-------------------------------------SKKGFK-----------KAVTKT +-Q--KK--EGRKR------K----RCRKE---------------SYSIYIYKVLKQV--H +PDTGISSKAMSIM-----------------NSFVTDIFERIASEASRLAHYN----KRST +ITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK----------------------- +---------------------------------------- +>Mus|NP_835502.1|cH2B.1_(Mus_musculus) organism=Mus musculus phylum=Chordata class=Mammalia +-------------------MPEPA---KSA-----------------------------P +A----------------------------------------------------------- +------------------------------------------------------------ +-------------------------------------PKKGSK-----------KAVTKA +-Q--KK--DGKKR------K----RSRKE---------------SYSVYVYKVLKQV--H +PDTGISSKAMGIM-----------------NSFVNDIFERIASEASRLAHYN----KRST +ITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK----------------------- +---------------------------------------- +>Mus|Q64524.3|cH2B.E_(Mus_musculus) organism=Mus musculus phylum=Chordata class=Mammalia +-------------------MPELA---KSA-----------------------------P +A----------------------------------------------------------- +------------------------------------------------------------ +-------------------------------------PKKGSK-----------KAVTKA +-Q--KK--DGKKR------K----RSRKE---------------SYSIYVYKVLKQV--H +PDTGISSKAMGIM-----------------NSFVNDIFERIANEASRLAHYN----KRST +ITSREIQTSVRLLLPGELAKHAVSEGTKAVTKYTSAK----------------------- +---------------------------------------- +>Saccharomyces|NP_009553.1|cH2B_(Fungi) organism=Saccharomyces cerevisiae S288C phylum=Ascomycota class=Saccharomycetes +-------------------MSSAA--EKKP-----------------------------A +S----------------------------------------------------------- +------------------------------------------------------------ +-------------------------------------KAPAEK-----------KPAAKK +-TSTSV--DGKKR------S----KVRKE---------------TYSSYIYKVLKQT--H +PDTGISQKSMSIL-----------------NSFVNDIFERIATEASKLAAYN----KKST +ISAREIQTAVRLILPGELAKHAVSEGTRAVTKYSSSTQA--------------------- +---------------------------------------- +>Saccharomyces|NP_010510.3|cH2B_(Fungi) organism=Saccharomyces cerevisiae S288C phylum=Ascomycota class=Saccharomycetes +-------------------MSAKA--EKKP-----------------------------A +S----------------------------------------------------------- +------------------------------------------------------------ +-------------------------------------KAPAEK-----------KPAAKK +-TSTST--DGKKR------S----KARKE---------------TYSSYIYKVLKQT--H +PDTGISQKSMSIL-----------------NSFVNDIFERIATEASKLAAYN----KKST +ISAREIQTAVRLILPGELAKHAVSEGTRAVTKYSSSTQA--------------------- +---------------------------------------- +>Chlamydomonas|XP_001690668.1|cH2B_(Chlorophyta) organism=Chlamydomonas reinhardtii phylum=Chlorophyta class=Chlorophyceae +-------------------MAPKK--DEKP----VTAEAGAEAPAKAEAKPKAEKAA-KK +A----------------------------------------------------------- +------------------------------------------------------------ +-------------------------------------KKEPSK-----------KAAKEP +-KGDGE--KKDKK--KKK-------SAVE---------------TYKLYIYKVLKQV--H +PDTGISSKAMSIM-----------------NSFINDIFEKVATEASKLSRYN----KKPT +VTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSG------------------------ +---------------------------------------- +>Chlamydomonas|XP_001691007.1|cH2B_(Chlorophyta) organism=Chlamydomonas reinhardtii phylum=Chlorophyta class=Chlorophyceae +-------------------MAPKK--DEKP----ATQEAAAEAPAKAEAKPKAEKAA-KK +A----------------------------------------------------------- +------------------------------------------------------------ +-------------------------------------KKEPSK-----------KAAKEP +-KGDGE--KKDKK--KKK-------SAVE---------------TYKLYIYKVLKQV--H +PDTGISSKAMSIM-----------------NSFINDIFEKVATEASKLSRYN----KKPT +VTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSG------------------------ +---------------------------------------- +>Chlamydomonas|XP_001691162.1|cH2B_(Chlorophyta) organism=Chlamydomonas reinhardtii phylum=Chlorophyta class=Chlorophyceae +-------------------MAPKK--DEKP----ATAEAGAEAPAKAEAKPKAEKAA-KK +A----------------------------------------------------------- +------------------------------------------------------------ +-------------------------------------KKEPSK-----------KAAKEP +-KGDGE--KKDKK--KKK-------SAVE---------------TYKLYIYKVLKQV--H +PDTGISSKAMSIM-----------------NSFINDIFEKVATEASKLSRYN----KKPT +VTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSG------------------------ +---------------------------------------- +>Chlamydomonas|XP_001691541.1|cH2B_(Chlorophyta) organism=Chlamydomonas reinhardtii phylum=Chlorophyta class=Chlorophyceae +-------------------MAPKK--DEKP----ATAEAGAEAPAKAEAKPKAEKAG-KK +A----------------------------------------------------------- +------------------------------------------------------------ +-------------------------------------KKEPAK-----------KAAKEP +-KGDGE--KKDKK--KKK-------SAVE---------------TYKLYIYKVLKQV--H +PDTGISSKAMSIM-----------------NSFINDIFEKVATEASKLSRYN----KKPT +VTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSG------------------------ +---------------------------------------- +>Chlamydomonas|XP_001691693.1|cH2B_(Chlorophyta) organism=Chlamydomonas reinhardtii phylum=Chlorophyta class=Chlorophyceae +-------------------MAPKK--DEKS----ATQEAGAEAPAKAEAKPKAEKAA-KK +A----------------------------------------------------------- +------------------------------------------------------------ +-------------------------------------KKEPSK-----------KAAKEP +-KGDGE--KKDKK--KKK-------SAVE---------------TYKLYIYKVLKQV--H +PDTGISSKAMSIM-----------------NSFINDIFEKVATEASKLSRYN----KKPT +VTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSG------------------------ +---------------------------------------- +>Chlamydomonas|XP_001692948.1|cH2B_(Chlorophyta) organism=Chlamydomonas reinhardtii phylum=Chlorophyta class=Chlorophyceae +-------------------MAPKK--DEKK---DAAAPEAAEPKAEKESKPKADKAA-KK +A----------------------------------------------------------- +------------------------------------------------------------ +-------------------------------------KKSPAK-----------KAAKE- +-GGDGE--KGDKKKGKKK-------SSVE---------------TYKLYIYKVLKQV--H +PDTGISSKAMSIM-----------------NSFINDIFEKVATEASKLSRYN----KKPT +VTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSG------------------------ +---------------------------------------- +>Chlamydomonas|XP_001693071.1|cH2B_(Chlorophyta) organism=Chlamydomonas reinhardtii phylum=Chlorophyta class=Chlorophyceae +-------------------MAPKK--DEKK---DAAAPEAAEPKAEKESKPKADKAA-KK +A----------------------------------------------------------- +------------------------------------------------------------ +-------------------------------------KKAPAK-----------KAAKE- +-GGDGE--KGDKKKGKKK-------SSVE---------------TYKLYIYKVLKQV--H +PDTGISSKAMSIM-----------------NSFINDIFEKVATEASKLSRYN----KKPT +VTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSG------------------------ +---------------------------------------- +>Chlamydomonas|XP_001693722.1|cH2B_(Chlorophyta) organism=Chlamydomonas reinhardtii phylum=Chlorophyta class=Chlorophyceae +-------------------MAPKK--DEKPATRAATQEAGAEATAKAEAKPKAEKAA-KK +A----------------------------------------------------------- +------------------------------------------------------------ +-------------------------------------KKEPAK-----------KAAKEP +-KGDGE--KKDKK--KK--------SAVE---------------TYKLYIYKVLKQV--H +PDTGISSKAMSIM-----------------NSFINDIFEKVATEASKLSRYN----KKPT +VTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSG------------------------ +---------------------------------------- +>Chlamydomonas|XP_001696245.1|cH2B_(Chlorophyta) organism=Chlamydomonas reinhardtii phylum=Chlorophyta class=Chlorophyceae +-------------------MAPKK--DEKP----ATAEAGAEAPAKAEAKPKAEKAA-KK +A----------------------------------------------------------- +------------------------------------------------------------ +-------------------------------------KKEPSK-----------KTAKEP +-KGDGE--KKDKK--KKK-------SAVE---------------TYKLYIYKVLKQV--H +PDTGISSKAMSIM-----------------NSFINDIFEKVATEASKLSRYN----KKPT +VTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSG------------------------ +---------------------------------------- +>Chlamydomonas|XP_001696283.1|cH2B_(Chlorophyta) organism=Chlamydomonas reinhardtii phylum=Chlorophyta class=Chlorophyceae +-------------------MAPKK--DEKP----ATAEAGAEAPAKADAKPKAEKAA-KK +A----------------------------------------------------------- +------------------------------------------------------------ +-------------------------------------KKEPSK-----------KAAKEP +-KGDGE--KKDKK--KKK-------SAVE---------------TYKLYIYKVLKQV--H +PDTGISSKAMSIM-----------------NSFINDIFEKVATEASKLSRYN----KKPT +VTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSG------------------------ +---------------------------------------- +>Chlamydomonas|XP_001696556.1|cH2B_(Chlorophyta) organism=Chlamydomonas reinhardtii phylum=Chlorophyta class=Chlorophyceae +-------------------MAPKK--DEKP----ATAEAGAEAPAKAEAKPKAEKAA-KK +A----------------------------------------------------------- +------------------------------------------------------------ +-------------------------------------KKEPSK-----------KAAKEP +-KGEGE--KKDKK--KKK-------SAVE---------------TYKLYIYKVLKQV--H +PDTGISSKAMSIM-----------------NSFINDIFEKVATEASKLSRYN----KKPT +VTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSG------------------------ +---------------------------------------- +>Chlamydomonas|XP_001700194.1|cH2B_(Chlorophyta) organism=Chlamydomonas reinhardtii phylum=Chlorophyta class=Chlorophyceae +-------------------MAPKA--AEKA------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +----------------------------------------PAK-----------KTPAKT +-AEGSK--KKKKL------------NKAE---------------TYKVYIYKVLKQV--H +PDTGISSKAMSIM-----------------NSFINDIFDKMANEAVRLAQYN----KKPT +LTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTST------------------------ +---------------------------------------- +>Chlamydomonas|XP_001700403.1|cH2B_(Chlorophyta) organism=Chlamydomonas reinhardtii phylum=Chlorophyta class=Chlorophyceae +-------------------MAPKK--DEKP----ATQEAGAEAPAKAEAKPKAEKAA-KK +A----------------------------------------------------------- +------------------------------------------------------------ +-------------------------------------KKEPSK-----------KAAKEP +-KGDGA--KKDKK--KKK-------SAVE---------------TYKLYIYKVLKQV--H +PDTGISSKAMSIM-----------------NSFINDIFEKVATEASKLSRYN----KKPT +VTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSG------------------------ +---------------------------------------- +>Chlamydomonas|XP_001700461.1|cH2B_(Chlorophyta) organism=Chlamydomonas reinhardtii phylum=Chlorophyta class=Chlorophyceae +-------------------MAPKK--DEKP----ATQEAGAEAPAKAEAKPKAEKAA-KK +A----------------------------------------------------------- +------------------------------------------------------------ +-------------------------------------KKEPSK-----------KAAKEP +-KGDGE--KKDKK--KKK-------SAVE---------------TYKLYIYKVLKQV--H +PDTGISSKAMSIM-----------------NSFINDIFEKVATEASKLSRYN----KKPT +VTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSG------------------------ +---------------------------------------- +>Chlamydomonas|XP_001702223.2|cH2B_(Chlorophyta) organism=Chlamydomonas reinhardtii phylum=Chlorophyta class=Chlorophyceae +-------------------MAPKK--DEKP----ATQEAAAEAPAKAEAKPKAEKAA-KK +A----------------------------------------------------------- +------------------------------------------------------------ +--------------------------------------KEPSK-----------KAAKEP +-KGDGE--KKDKK--KKK-------SAVE---------------TYKLYIYKVLKQV--H +PDTGISSKAMSIM-----------------NSFINDIFEKVATEASKLSRYN----KKPT +VTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSG------------------------ +---------------------------------------- +>Chlamydomonas|XP_042914553.1|cH2B_(Chlorophyta) organism=Chlamydomonas reinhardtii phylum=Chlorophyta class=Chlorophyceae +-------------------MAPKK--DEKP----PTQEAGAEAPAKAEAKPKAEKAA-KK +A----------------------------------------------------------- +------------------------------------------------------------ +-------------------------------------KKEPSK-----------KAAKEP +-KGDGE--KKDKK--KKK-------SAVE---------------TYKLYIYKVLKQV--H +PDTGISSKAMSIM-----------------NSFINDIFEKVATEASKLSRYN----KKPT +VTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSG------------------------ +---------------------------------------- +>Chlamydomonas|XP_042914596.1|cH2B_(Chlorophyta) organism=Chlamydomonas reinhardtii phylum=Chlorophyta class=Chlorophyceae +-------------------MAPKR--DEKP----ATQEAGAEAPAKAEAKPKAEKAA-KK +A----------------------------------------------------------- +------------------------------------------------------------ +-------------------------------------KKEPSK-----------KAAKEP +-KGDGA--KKDKK--KKK-------SAVE---------------TYKLYIYKVLKQV--H +PDTGISSKAMSIM-----------------NSFINDIFEKVATEASKLSRYN----KKPT +VTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSG------------------------ +---------------------------------------- +>Chlamydomonas|XP_042923653.1|cH2B_(Chlorophyta) organism=Chlamydomonas reinhardtii phylum=Chlorophyta class=Chlorophyceae +-------------------MAPKK--DEKP--------------------PKAEKAA-KK +A----------------------------------------------------------- +------------------------------------------------------------ +-------------------------------------KKEPSK-----------KAAKEP +-KGDGE--KKDKK--KKS-------SAVE---------------TYKLYIYKVLKQV--H +PDTGISSKAMSIM-----------------NSFINDIFEKVATEASKLSRYN----KKPT +VTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSG------------------------ +---------------------------------------- +>Volvox|XP_002946193.1|cH2B_(Chlorophyta) organism=Volvox carteri f. nagariensis phylum=Chlorophyta class=Chlorophyceae +-------------------MAPKNVKEEKA-----EEKAEAGAAAKAEAKAKAAKPA-KK +E----------------------------------------------------------- +------------------------------------------------------------ +-------------------------------------KKAPAK-----------KAAKEP +-SAGGE--GEGDKKAKKKAK----VAKSE---------------TYKLYIYKVLKQV--H +PDTGISSKAMSIM-----------------NSFINDIFEKVATEASKLSRYN----KKPT +VTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSS------------------------ +---------------------------------------- +>Volvox|XP_002946194.1|cH2B_(Chlorophyta) organism=Volvox carteri f. nagariensis phylum=Chlorophyta class=Chlorophyceae +-------------------MAPKNVKEEKV---EEKAEAGAAAKAKAEAKAKAAKPA-KK +E----------------------------------------------------------- +------------------------------------------------------------ +-------------------------------------KKAPAK-----------KAAKEP +-SAGGE--GEGDKKAKKKAK----VAKSE---------------TYKLYIYKVLKQV--H +PDTGISSKAMSIM-----------------NSFINDIFEKVATEASKLSRYN----KKPT +VTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSS------------------------ +---------------------------------------- +>Volvox|XP_002946213.1|cH2B_(Chlorophyta) organism=Volvox carteri f. nagariensis phylum=Chlorophyta class=Chlorophyceae +-------------------MAPKG--KKADAAGAEAKPDPSELKAEPVSEGKADAKP-KK +E----------------------------------------------------------- +------------------------------------------------------------ +-------------------------------------KKAAGK-----------KAAKD- +-SGAGE--EVEKK-GKKKAK----VAKSE---------------TYKLYIYKVLKQV--H +PDTGISSKAMSIM-----------------NSFINDIFEKVATEASRLSRYN----KKPT +VTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTAS------------------------ +---------------------------------------- +>Volvox|XP_002947412.1|cH2B_(Chlorophyta) organism=Volvox carteri f. nagariensis phylum=Chlorophyta class=Chlorophyceae +-------------------MAPKK--EEKP-----ASQAAEAPEVKAEAKPKAVKAPKKK +E----------------------------------------------------------- +------------------------------------------------------------ +-------------------------------------KKAPAK-----------KVAKEP +-SAGGE--DGDKK-SKKKAK----VAKSE---------------TYKLYIYKVLKQV--H +PDTGISSKAMSIM-----------------NSFINDIFEKVATEASKLSRYN----KKPT +VTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSA------------------------ +---------------------------------------- +>Volvox|XP_002947842.1|cH2B_(Chlorophyta) organism=Volvox carteri f. nagariensis phylum=Chlorophyta class=Chlorophyceae +-------------------MAPKK--DEKT------ASQPADAEPKAEAKPKETKV--KK +E----------------------------------------------------------- +------------------------------------------------------------ +-------------------------------------KKAPAK-----------KAAKEP +-AAGGE--EGDKK-AKKKTK----VSKSE---------------TYKLYIYKVLKQV--H +PDTGISSKAMSIM-----------------NSFINDIFEKVATEASRLSRYN----KKPT +VTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSA------------------------ +---------------------------------------- +>Volvox|XP_002948133.1|cH2B_(Chlorophyta) organism=Volvox carteri f. nagariensis phylum=Chlorophyta class=Chlorophyceae +--------------------MAKK--DEKT-----ASQPADAAEPKADAKPKAAKV--KK +E----------------------------------------------------------- +------------------------------------------------------------ +-------------------------------------KKAPAK-----------KTAKEP +-VAGGE--EGDKK-AKKKAK----VAKSE---------------TYKLYIYKVLKQV--H +PDTGISSKAMSIM-----------------NSFINDIFEKVATESSRLSRYN----KKPT +VTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSA------------------------ +---------------------------------------- +>Volvox|XP_002948276.1|cH2B_(Chlorophyta) organism=Volvox carteri f. nagariensis phylum=Chlorophyta class=Chlorophyceae +-------------------MAPKK--DEKT-----ASQPADAAEPKADAKPKAAKV--KK +E----------------------------------------------------------- +------------------------------------------------------------ +-------------------------------------KKAPAK-----------KAAKEP +-VAGSE--EGEKK-AKKKAK----VAKSE---------------TYKLYIYKVLKQV--H +PDTGISSKAMSVM-----------------NSFINDIFEKVATEASRLSRYN----KKPT +VTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSA------------------------ +---------------------------------------- +>Volvox|XP_002948472.1|cH2B_(Chlorophyta) organism=Volvox carteri f. nagariensis phylum=Chlorophyta class=Chlorophyceae +-------------------MAPKA--AEKA------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +----------------------------------------PAK-----------KTPAKT +-AEGSK--KKKKI------------NKAE---------------TYKVYIYKVLKQV--H +PDTGISSKAMSIM-----------------NSFINDIFDKMANEAVRLAQYN----KKPT +LTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSS------------------------ +---------------------------------------- +>Volvox|XP_002951705.1|cH2B_(Chlorophyta) organism=Volvox carteri f. nagariensis phylum=Chlorophyta class=Chlorophyceae +-------------------MAPKK--DEKA-----AAPAADAPEVKVEAKPKKARVP-KK +E----------------------------------------------------------- +------------------------------------------------------------ +-------------------------------------KKAPAK-----------KVAKEP +-AAGGE--EGDKK-AKKKAK----VAKSE---------------TYKLYIYKVLKQV--H +PDTGISSKAMSIM-----------------NSFINDIFEKVATEASKLSRYN----KKPT +VTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSS------------------------ +---------------------------------------- +>Volvox|XP_002955481.1|cH2B_(Chlorophyta) organism=Volvox carteri f. nagariensis phylum=Chlorophyta class=Chlorophyceae +-------------------MAPKNVKEEKQ-------EKAEAVEPKAEAKPKKEKAP-KK +E----------------------------------------------------------- +------------------------------------------------------------ +-------------------------------------KKAPAK-----------KAAKEP +-SAAGD--DGDKK-AKKKAK----VSKSE---------------TYKLYIYKVLKQV--H +PDTGISSKAMSIM-----------------NSFINDIFEKVATEASKLSRYN----KKPT +VTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSA------------------------ +---------------------------------------- +>Volvox|XP_002956800.1|cH2B_(Chlorophyta) organism=Volvox carteri f. nagariensis phylum=Chlorophyta class=Chlorophyceae +-------------------MAPKTK-EEKP--------ASEAVEPKAEAKPKAEKAPKKK +E----------------------------------------------------------- +------------------------------------------------------------ +-------------------------------------KKAPAK-----------KSAKEP +AAGDAA--EGDKK--KKKAK----VAKSE---------------TYKLYIYKVLKQV--H +PDTGISSKAMSIM-----------------NSFINDIFEKVATEASKLSRYN----KKPT +VTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSA------------------------ +---------------------------------------- +>Micromonas|XP_002501781.1|cH2B_(Chlorophyta) organism=Micromonas commoda phylum=Chlorophyta class=Mamiellophyceae +--------------------------MAKP------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +----------------------------------------TSK-----------KPAKKT +-VAKGG--SKAKK------------SKTE---------------TYKIYIYKVLKQV--H +PDTGISSKAMSIM-----------------NSFINDIFEKIATEASKLARYN----KKPT +VTSREIQTAVRLILPGELAKHAVSEGTKAVTKFTSN------------------------ +---------------------------------------- +>Micromonas|XP_002503973.1|cH2B_(Chlorophyta) organism=Micromonas commoda phylum=Chlorophyta class=Mamiellophyceae +--------------------------MAKP------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +----------------------------------------TSK-----------KPAKKT +-VKGAG--GKAKK------------SKTE---------------TYKIYIYKVLKQV--H +PDTGISSKAMSIM-----------------NSFINDIFEKIATEASKLARYN----KKPT +VTSREIQTAVRLILPGELAKHAVSEGTKAVTKFTSN------------------------ +---------------------------------------- +>Micromonas|XP_003055512.1|cH2B_(Chlorophyta) organism=Micromonas pusilla CCMP1545 phylum=Chlorophyta class=Mamiellophyceae +--------------------------MAKP------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +----------------------------------------TSK-----------KPAKKS +-LKGGK--KGGKK------------SKTE---------------TYKIYIYKVLKQV--H +PDTGISSKAMSIM-----------------NSFINDIFEKIATEASKLARYN----KKPT +VTSREIQTAVRLILPGELAKHAVSEGTKAVTKFTSA------------------------ +---------------------------------------- +>Ostreococcus|XP_001419128.1|cH2B_(Chlorophyta) organism=Ostreococcus lucimarinus CCE9901 phylum=Chlorophyta class=Mamiellophyceae +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +----------------------------------------MAK-----------KPAQKK +-PSGAK--KVGRK------------SKSE---------------TYKIYIYKVLKQV--H +PDTGISSKAMSIM-----------------NSFINDIFEKIATEAAKLARYN----KKPT +VTSREIQTAVRLILPGELAKHAVSEGTKAVTKFTSA------------------------ +---------------------------------------- +>Coccomyxa|XP_005643326.1|cH2B_(Chlorophyta) organism=Coccomyxa subellipsoidea C-169 phylum=Chlorophyta class=Trebouxiophyceae +-------------------MAPKG-----------------------------------S +Q----------------------------------------------------------- +------------------------------------------------------------ +-------------------------------------GREEAR-----------QKGHQD +-RQDWC--KKKTK------------AKVE---------------SFKIYMCEVLKQA--H +PYTGISSWAISIL-----------------NAFVTDTFGKMATETAQLARYN----KKPT +VASGKIQTALRLILPGKLAKHTVSEGSKAVTESTSAAITP-------------------- +---------------------------------------- +>Coccomyxa|XP_005643701.1|cH2B_(Chlorophyta) organism=Coccomyxa subellipsoidea C-169 phylum=Chlorophyta class=Trebouxiophyceae +-------------------MAPKA--E--------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +--------------------------------------KKPAK-----------KVAKTA +-KTGGK--RKSK-------------AKVE---------------SFKIYIYKVLKQV--H +PDTGISSRAISIL-----------------NSFITDIFEKIATETAQLARYN----KKPT +VTSREIQTAVRLILPGELAKHAVSEGTKAVTKFTSA------------------------ +---------------------------------------- +>Arabidopsis|Q9FFC0.3|cH2B_(Embryophyta) organism=Arabidopsis thaliana phylum=Streptophyta class=Magnoliopsida +--------------------MAKA--DKKP------AEKK-----PAEKTPAAEPAAAAE +K----------------------------------------------------------- +------------------------------------------------------------ +-------------------------------------KPKAGK-----------KLPKEP +-AGAGD--KKKKR------S----KKNVE---------------TYKIYIFKVLKQV--H +PDIGISSKAMGIM-----------------NSFINDIFEKLAGESSKLARYN----KKPT +ITSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSS------------------------ +---------------------------------------- +>Arabidopsis|Q9LQQ4.3|cH2B_(Embryophyta) organism=Arabidopsis thaliana phylum=Streptophyta class=Magnoliopsida +-------------------MAPRA--EKKP------AEKKTAAERPVEENKAAEK-APAE +K----------------------------------------------------------- +------------------------------------------------------------ +-------------------------------------KPKAGK-----------KLPPK- +--EAGD--KKKKR------S----KKNVE---------------TYKIYIFKVLKQV--H +PDIGISSKAMGIM-----------------NSFINDIFEKLAQESSKLARYN----KKPT +ITSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSS------------------------ +---------------------------------------- +>Arabidopsis|Q9LZ45.3|cH2B_(Embryophyta) organism=Arabidopsis thaliana phylum=Streptophyta class=Magnoliopsida +-------------------MAPKA--EKKP------AEKA-----PA------------- +------------------------------------------------------------ +------------------------------------------------------------ +--------------------------------------PKAEK-----------KIAKE- +-GGTSEIVKKKKK------T----KKSTE---------------TYKIYIFKVLKQV--H +PDIGISGKAMGIM-----------------NSFINDIFEKLAQESSRLARYN----KKPT +ITSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSS------------------------ +---------------------------------------- +>Arabidopsis|Q9LZT0.3|cH2B_(Embryophyta) organism=Arabidopsis thaliana phylum=Streptophyta class=Magnoliopsida +-------------------MAPKA--EKKP------AEKK-----PVEEKSKAEK-APAE +K----------------------------------------------------------- +------------------------------------------------------------ +-------------------------------------KPKAGK-----------KLPKEA +-GAGGD--KKKKM------K----KKSVE---------------TYKIYIFKVLKQV--H +PDIGISSKAMGIM-----------------NSFINDIFEKLASESSKLARYN----KKPT +ITSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSS------------------------ +---------------------------------------- +>Trypanosoma|XP_823266.1|cH2B_(Protists) organism=Trypanosoma brucei brucei TREU927 phylum=Euglenozoa class=Kinetoplastea +--------------------------MATP-----------------------------K +S----------------------------------------------------------- +------------------------------------------------------------ +-------------------------------------TPAKTR-----------KEAKKT +-------------------R----RQRKR---------------TWNVYVSRSLRSI--N +SQMSMTSRTMKIV-----------------NSFVNDLFERIAAEAATIVRVN----RKRT +LGARELQTAVRLVLPADLAKHAMAEGTKAVSHASS------------------------- +---------------------------------------- +>Psammechinus|AAB48832.1|CS_H2B_(Echinoidea) organism=Psammechinus miliaris phylum=Echinodermata class=Echinoidea +-------------------MPAKG-----------------------------------A +A----------------------------------------------------------- +------------------------------------------------------------ +-------------------------------------TKGEKK-----------QAVKSK +-AMASSRTGDKKR------R----RRRLE---------------SYNIYIYKVLKQV--H +PDTGISSKAMSIM-----------------NSFVNDIFERIAAEASRLAQYN----KKST +ISSREVQTAVRLLLPGELAKHAVSEGTKAVTKYTTSR----------------------- +---------------------------------------- +>Strongylocentrotus|P02289.2|early_H2B_(Echinoidea) organism=Strongylocentrotus purpuratus phylum=Echinodermata class=Echinoidea +-------------------MAPTA-----------------------------------Q +V----------------------------------------------------------- +------------------------------------------------------------ +-------------------------------------AKKGSK-----------KAVKGT +-KTAX---GGKKR------N----RKRKE---------------SYGIYIYKVLKQV--H +PDTGISSRAMVIM-----------------NSFVNDIFERIAGESSRLAQYN----KKST +ISSREIQTAVRLILPGELAKHAVSEGTKAVTKYTTSK----------------------- +---------------------------------------- +>Lilium|CUT18445.1|gH2B organism=Lilium davidii var. unicolor phylum=Streptophyta class=Magnoliopsida +-------------------MAPKSEKKPAEKKPVAEKPAAEEEKKSAPAPAAAEKKPAEK +K----------------------------------------------------------- +------------------------------------------------------------ +-------------------------------------PKAGKK-----------LPASKG +-KEGEK--KK-------------------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +---------------------------------------- +>Lilium|CUT18446.1|gH2B organism=Lilium davidii var. unicolor phylum=Streptophyta class=Magnoliopsida +---------------------------------------------------------AEK +K----------------------------------------------------------- +------------------------------------------------------------ +-------------------------------------PKAGKK-----------VPASKE +-GEKKK--KR-SK------------KSVE---------------TYKIYIFKVLKQV--H +PDIGISSKAMGIM-----------------NSFINDLFEKMAQESARLARYN----KKNT +ITSREIQTSVRLVLPGELAKHAVSEGTKAVTKFTSA------------------------ +---------------------------------------- +>Lilium|CUT18447.1|gH2B organism=Lilium davidii var. unicolor phylum=Streptophyta class=Magnoliopsida +-------------------MAPKAEKKPAAKKPAATPPPEEEKE---VVPPP----PAEK +K----------------------------------------------------------- +------------------------------------------------------------ +-------------------------------------PKAGKK-----------LPAAKE +-GDAKK--KKKAK------------KSIE---------------TYKIYIFKVLKQV--H +PDIGISSKAMGIM-----------------NSFINDIFEKLAQESARLARYN----KKPT +ITSREIQTSVRLVLPGELAKHAVSEGTKAVTKFTSA------------------------ +---------------------------------------- +>Lilium|CUT18448.1|gH2B organism=Lilium davidii var. unicolor phylum=Streptophyta class=Magnoliopsida +---------------------------------AATPPPEEEKE---VVPPPAEKKPAEK +K----------------------------------------------------------- +------------------------------------------------------------ +-------------------------------------PKAGKK-----------LPASKE +-GDAKK--KKKSK------------KSIE---------------TYKIYIFKVLKQV--H +PDIGISSKAMGIM-----------------NSFINDIFEKLAQESARLARYN----KKPT +ITSREIQTSVRLVLPGELAKHAVSEGTKAVTKFTSA------------------------ +---------------------------------------- +>Lilium|CUT18449.1|gH2B organism=Lilium davidii var. unicolor phylum=Streptophyta class=Magnoliopsida +-------------------MPPR-------------------RKKTAAGAAAGGKAAAAA +V----------------------------------------------------------- +------------------------------------------------------------ +-------------------------------------GKAGF------------MPPKKP +-KKGKK--KT----------------PIM---------------RYKRYIYKVLKQV--R +PELGISEKSTMIM-----------------NNFVAHNFQNIAKEASILAYYS----KKRT +ITVDELKAAVAMVLPNLLADYANRDGEKAVSNFEGEASAKKSQGRKRGRGQQA------- +---------------------------------------- +>Lilium|CUT18450.1|gH2B organism=Lilium davidii var. unicolor phylum=Streptophyta class=Magnoliopsida +-------------------MAPK--KKPSKLVGTVTKTRKVTETQTLKVSLTKGLKPEDQ +QTTTNKFEVSVTGKQSKTQPLIV------------------------------------- +------------------------------------------------------------ +-------------------------STNTNLV-----PKKEKE-----------ESPTTT +-LMVKK--KRKNR------------KAGG---------------EYKRYVYMVLKTV--H +PDMTVSSKAMMVM-----------------EGMMQDMFERLVTEAVRLVQYM----KKAT +LTCREIQTAVMLVLPGELGKHAVSEGAKAITNYMAAVGSGNGGAA--------------- +---------------------------------------- +>Lilium|CUT18451.1|gH2B organism=Lilium davidii var. unicolor phylum=Streptophyta class=Magnoliopsida +-------------------MAPKAEKKPAAKKPAATPPPEEEKE---VVPPP----PAEK +K----------------------------------------------------------- +------------------------------------------------------------ +-------------------------------------PKAGKK-----------LPAAKE +-GDAKK--KKKAK------------KSIE---------------TYKIYIFKVLKQV--H +PDIGISSKAMGIM-----------------NSFINDIFEKLAQESARLARYN----KKPT +ITSREIQTSVRLVLPGELAKHAVSEGTKAVTKFTSG------------------------ +---------------------------------------- +>Lilium|CUT18452.1|gH2B organism=Lilium davidii var. unicolor phylum=Streptophyta class=Magnoliopsida +-------------------MAPKSEKKPAEKKPVAEKPAAEEEKKAAPAAAPAEKKAAEK +K----------------------------------------------------------- +------------------------------------------------------------ +-------------------------------------PKA-------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +---------------------------------------- +>Latimeria|XP_006007163.1|H2B.K organism=Latimeria chalumnae phylum=Chordata class= +-------------------MTNDPGK---------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +--------------------------------------KKSK-------------NPGEK +--------KSSKK------K----AKRRE---------------TYSVYIYKVLKQV--H +PDTGISSKAMSIM-----------------NSFVNDVFERIATEASRLAQYN----KRST +ITSREVQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK----------------------- +---------------------------------------- +>Danio|NP_001002724.1|H2B.K organism=Danio rerio phylum=Chordata class=Actinopteri +-------------------MSNEGAK---------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +--------------------------------------KKGK-------------APGDK +--------KGSKR------K----SKRRE---------------TYAVYIYKVLKQV--H +PDTGISSRAMSIM-----------------NSFVNDVFERIATEASRLAHYN----KRST +ITSREVQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK----------------------- +---------------------------------------- +>Gallus|XP_423715.4|H2B.K organism=Gallus gallus phylum=Chordata class=Aves +-------------------MSAESGR---------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +--------------------------------------MRGHP-----------SSSGDK +--------K-SKR------K----PKRKE---------------TYSVYIYKVLKQV--H +PDTGISSKAMSIM-----------------NSFVNDIFERLAVEASRLAQYN----HRST +ITSREVQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK----------------------- +---------------------------------------- +>Taeniopygia|XP_002190629.1|H2B.K organism=Taeniopygia guttata phylum=Chordata class=Aves +-------------------MSSERLK---------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +--------------------------------------KRGHA-----------VASGKK +--------S-SKR------K----PKRKE---------------AFSVYIYKVLKQV--H +PDLAISSKAMSIM-----------------NSFVNDMLERLAAEASRLARYR----CHTT +VSSREVQAAARQLLPGQLAQHAVSEGTKAVTKYTTSK----------------------- +---------------------------------------- +>Ailuropoda|XP_019651116.1|H2B.K organism=Ailuropoda melanoleuca phylum=Chordata class=Mammalia +-------------------MSAEHGRQQQP-----------G------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------------------GRRGR-------------SSGDK +--------K-SRK------R----SRRKE---------------TYSMYIYKVLKQV--H +PDIGISSKAMSIM-----------------NSFVNDVFERLAGEAARLAQYS----GRTT +LTSREVQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK----------------------- +---------------------------------------- +>Bos|XP_010799227.1|H2B.K organism=Bos taurus phylum=Chordata class=Mammalia +-------------------MSAEHGQLQQS-----------G------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------------------GRRGR-------------SPGDK +--------K-SRR------R----SRRKE---------------TYSMYIYKVLKQV--H +PDIGISSKAMSIV-----------------NLFVNDLFERLAGKAAWLAQYS----GRTT +LTSREVQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK----------------------- +---------------------------------------- +>Bos|XP_024846715.1|H2B.K organism=Bos taurus phylum=Chordata class=Mammalia +-------------------MSAEHGQLQQS-----------G------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------------------GRRGR-------------SPGDK +--------K-SRR------R----SRRKE---------------TYSMYIYKVLKQV--H +PDIGISSKAMSIM-----------------NSFVNDLFERLAGEAARLAQYS----GRTT +LTSREVQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK----------------------- +---------------------------------------- +>Bos|XP_059747847.1|H2B.K organism=Bos taurus phylum=Chordata class=Mammalia +MMDTRSSKLVCHNILRTRRMSAEHGQLQQS-----------G------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------------------GRRGR-------------SPGDK +--------K-SRR------R----SRRKE---------------TYSMYIYKVLKQV--H +PDIGISSKAMSIM-----------------NSFVNDLFERLAGEAARLAQYS----GRTT +LTSREVQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK----------------------- +---------------------------------------- +>Canis|HISTDB_H2B_K_1|H2B.K organism=Canis lupus familiaris phylum=Chordata class=Mammalia +-------------------MGTEHGQQPQS-----------G------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------------------GRRGH-------------GSGDK +--------K-SKK------H----SRRKE---------------TYSMYIYKVLKQV--H +PDIGIFSKAMSIM-----------------NSFVNDVFERLAGKAAQLAQYL----GQTT +LTSWEVQTAVRWLLPGELAKHAISEGTKAITKYTGSK----------------------- +---------------------------------------- +>Canis|XP_022259586.1|H2B.K organism=Canis lupus familiaris phylum=Chordata class=Mammalia +-------------------MGAEHGQQPQS-----------G------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------------------GRRGR-------------GSGDK +--------K-SKK------R----SRRKE---------------TYSMYIYKVLKQV--H +PDIGISSKAMSIM-----------------NSFVNDVFERLAGEAARLAQYS----GRTT +LTSREVQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK----------------------- +---------------------------------------- +>Ceratotherium|XP_014643104.1|H2B.K organism=Ceratotherium simum simum phylum=Chordata class=Mammalia +-------------------MSTEHGQQHHP-----------G------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------------------GRRGC-------------SPGDK +--------K-FKK------R----SRRKE---------------TYSMYIYKVLKQV--H +PDIGISSKAMSIM-----------------NSFVNDVFERLAGEAARLAQYS----GRTT +LTSREVQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK----------------------- +---------------------------------------- +>Equus|XP_005609614.1|H2B.K organism=Equus caballus phylum=Chordata class=Mammalia +-------------------MSTEHGQQHQS-----------G------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------------------GRRGC-------------SSGDK +--------K-SKK------R----SRRKE---------------TYSMYIYKVLKQV--H +PDIGISSKAMSIM-----------------NSFVNDVFERLAGEAAQLAQYS----GRTT +LTSREVQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK----------------------- +---------------------------------------- +>Felis|XP_019681595.1|H2B.K organism=Felis catus phylum=Chordata class=Mammalia +-------------------MSAEHGQQQQS-----------G------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------------------GRRGR-------------SSGDK +--------K-SKK------R----SRRKE---------------TYSMYIYKVLKQV--H +PDIGISSKAMSIM-----------------NSFVNDVFERLAGEAARLAQYS----GRTT +LTSREVQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK----------------------- +---------------------------------------- +>Loxodonta|XP_023403847.1|H2B.K organism=Loxodonta africana phylum=Chordata class=Mammalia +-------------------MSAELGQQQQQ--------QQSG------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------------------GQRGR-------------SSGDK +--------K-PKK------R----SRRKE---------------NYSVYIYKVLKQV--H +PDISISSKAMSIM-----------------NSFVNDVFERLAGEATRLAQYS----GRTT +LTSREVQTAARLLLPGELAKHAVSEGTKAVTKYISSK----------------------- +---------------------------------------- +>Ornithorhynchus|HISTDB_H2B_K_2|H2B.K organism=Ornithorhynchus anatinus phylum=Chordata class=Mammalia +-------------------MSPEGGQQQQQ------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------------------QPRPR-------------ARGDR +--------R-PKR------R----TRRKE---------------TYSVYIYKVLKQV--H +PDTGISSKAMSIM-----------------NSFVNDVFEQLAGEAARLAQYL----GRST +LTSREVQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK----------------------- +---------------------------------------- +>Oryctolagus|XP_002715119.2|H2B.K organism=Oryctolagus cuniculus phylum=Chordata class=Mammalia +-------------------MSAERGQQQQQ----------AS------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------------------SRRGR-------------SSGNK +--------K-SRK------R----SKRKE---------------TYSMYIYKVLKQV--H +PDIGISARAMSIM-----------------NSFVNDVFERLAGEAAQLAQYS----GRST +LTSREVQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK----------------------- +---------------------------------------- +>Ovis|HISTDB_H2B_K_0|H2B.K organism=Ovis aries phylum=Chordata class=Mammalia +-------------------MSAEHGQLQQA-----------G------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------------------GRRGR-------------SPGDK +--------K-SRR------R----SRRKE---------------TYSMYIYKVLKQV--H +PDIGISSKAMSIM-----------------NSFVNDLFERLAGEAARLAQYS----GRTT +LTSREVQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK----------------------- +---------------------------------------- +>Ovis|XP_027824938.1|H2B.K organism=Ovis aries phylum=Chordata class=Mammalia +-------------------MSAEHGQLQQS-----------G------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------------------GRRGR-------------SPGDK +--------K-SRR------R----SRRKE---------------TYSMYIYKVLKQV--H +PDIGISSKAMSIM-----------------NSFVNDLFERLAGEAARLAQYS----GRTT +LTSREVQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK----------------------- +---------------------------------------- +>Sus|XP_013846203.1|H2B.K organism=Sus scrofa phylum=Chordata class=Mammalia +-------------------MSSAHGQQQQQQQQQQQQQQQGG------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------------------GRRGR-------------SSGEK +--------K-SKK------R----NRRKE---------------TYSMYIYKVLKQV--H +PDIGISSKAMSIM-----------------NSFVNDVFERLAGEAARLAQYS----GRTT +LTSREVQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK----------------------- +---------------------------------------- +>Homo|NP_001356054.2|H2B.K_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +-------------------MSAEYGQRQQP-----------G------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------------------GRGGR-------------SSGNK +--------K-SKK------R----CRRKE---------------SYSMYIYKVLKQV--H +PDIGISAKAMSIM-----------------NSFVNDVFEQLACEAARLAQYS----GRTT +LTSREVQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK----------------------- +---------------------------------------- +>Ailuropoda|XP_002921383.1|H2B.L organism=Ailuropoda melanoleuca phylum=Chordata class=Mammalia +-------------------MARSITKKNKR------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------------------SRGHRS-----------PISKKK +--------S-HSS------T----DFGRR---------------NYSLYINRVLKEV--V +PQRSISSRTLDVM-----------------NTLIKDIFERISVEARSLMCFR----NRCT +LTPEDIQKAVYLLLPGKLAKYAVAFGSEAVQRYVQS------------------------ +---------------------------------------- +>Bos|NP_991343.1|H2B.L organism=Bos taurus phylum=Chordata class=Mammalia +-------------------MARNVTKRNKR------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------------------CRGHQK-----------AIYKKK +--------S-HSS------S----ESGLR---------------NYSLYINRVLKEV--V +PQKGISSRTIDII-----------------NTMINDMFERISTEACNLMYYR----KRCT +LTPEDIEKAVYLLLPEKLAKYAVAFGKEAVQRYVRS------------------------ +---------------------------------------- +>Cavia|XP_003461969.1|H2B.L organism=Cavia porcellus phylum=Chordata class=Mammalia +-------------------MVKSVIKPRRY------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------------------FRGRRT-----------SISSKK +--------S-CLS------S----NSGYR---------------NYSLYVSRVLKEV--V +PERAISSCTVNIM-----------------NTLIDDIFERISEEAHHLMCSQ----KRCT +LTPKDIQKAVYLLLPRKLAKYAVAFGDGAVDRYVHS------------------------ +---------------------------------------- +>Cricetulus|XP_003515979.1|H2B.L organism=Cricetulus griseus phylum=Chordata class=Mammalia +-------------------MAKSIIKRYQF------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------------------VKRRQR-----------RAFRKR +----------YSS------I----NFGQR---------------NYSLYISRVLKEV--V +PMRGLSSNTVDIM-----------------NTLINDLFERIATEACQLMYFR----KRCT +LTLEDIQKAVYLLVPKKLAKSAVTFGSKAVHRFIHS------------------------ +---------------------------------------- +>Equus|XP_001504072.2|H2B.L organism=Equus caballus phylum=Chordata class=Mammalia +-------------------MARDSTKKSRC------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------------------SRRRQS-----------PASRKK +--------S-HAS------T----YRGHR---------------NYSLYINRVLKEV--V +PQRGISARTLDTM-----------------NILINNIFERISTEACSMMYFR----NRCT +LTPQDVQKAVYSLFPGKLAKYAVAFGSEAVQRYLHS------------------------ +---------------------------------------- +>Heterocephalus|XP_004885290.1|H2B.L organism=Heterocephalus glaber phylum=Chordata class=Mammalia +-------------------MVRSIIKQYGY------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------------------SRRHLT-----------PTYRKK +--------S-YLS------T----SFGHR---------------NYSLYISRVLKEV--A +PQRHISSRTLDMM-----------------NALINNIFERIATEAHHLMCSR----NRCT +LAPEDIQRAVYLLLPGKLAKYAMAFGDEAVHRYVHS------------------------ +---------------------------------------- +>Loxodonta|XP_003407998.1|H2B.L organism=Loxodonta africana phylum=Chordata class=Mammalia +-------------------MARSIIKKYRY------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------------------SNGHLS-----------SISIKK +--------L-HSS------T----NFGHR---------------NYSLYVNRVLKEV--V +PQRGISSRTLDVM-----------------NTLINNIFKCIATEACNLMYFR----NRCT +LTPEDIQRAVYVRLPGKLAKHAVAFGSEAVNRYVHS------------------------ +---------------------------------------- +>Macaca|XP_001095287.1|H2B.L organism=Macaca mulatta phylum=Chordata class=Mammalia +-------------------MARSSTKKHKY------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------------------SKRHQS-----------PTSRKK +--------A-HSS------I----DFVHG---------------NYSFFVNKVLKEV--V +SHRGTSSRTLDLM-----------------NTLINNFFQHISMKAYRLMYFR----NRCT +LTPEDILKAAYLLLPQKTANYAVAFGSEVFRRYVHS------------------------ +---------------------------------------- +>Monodelphis|XP_001366286.1|H2B.L organism=Monodelphis domestica phylum=Chordata class=Mammalia +-------------------MTKAV--RSNE------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------------------SQKLTT-----------NQEKKK +--------K-KKK------K----LLLSR---------------NYSLYTHRVLKEV--I +PNQGLTYKTTEIM-----------------NSMINNILERIAEEAGNLLCYK----RHLT +LGHQDIQMAVYRLLPDELAKHAVAFGTRAVTTYNDSK----------------------- +---------------------------------------- +>Mus|NP_081340.1|H2B.L organism=Mus musculus phylum=Chordata class=Mammalia +-------------------MAKPTFKRQCY------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------------------IKRHLR-----------PLYRKH +--------SRCSS------I----NLGHG---------------NYSLYINRVLKEV--V +PNRGISSYSVDIM-----------------NILINDIFERIATEACQQMFLR----KRCT +LTPGDIQQAVHLLLPKKLATLAVTFGSKAVHRFIHS------------------------ +---------------------------------------- +>Sus|XP_003134084.1|H2B.L organism=Sus scrofa phylum=Chordata class=Mammalia +-------------------MARYITKKNRC------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------------------SRGHRH-----------PNSRKK +--------T-CSS------T----ECGRR---------------NYSLYVNRVLKEV--V +PQSGISSRTLDMM-----------------NTVINDIFERISMEASNLMYFR----NRCT +LTPEDVQKAVYLLLPRKLAKHAVAFGSDAVHRYVHS------------------------ +---------------------------------------- +>Ailuropoda|HISTDB_H2B_N_3|H2B.N organism=Ailuropoda melanoleuca phylum=Chordata class=Mammalia +-------------------MYYVCLHDPRF------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------------------PKKRTTL----------YIPAKA +KYECAN--SAL------RHK----RKKKE---------------VYFSYMGKILKQT--H +PDFSGCSWILDAL-----------------GSLEDWLLEWVSLEAVRLSFYN----HRRA +VTSREILGAVKQRSFRKSFCINKVF----------------------------------- +---------------------------------------- +>Bos|XP_059734412.1|H2B.N organism=Bos taurus phylum=Chordata class=Mammalia +-------------------MYFICLHGLQF------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------------------PKRKLTI----------YIPAKE +KDEWVH--SATG-----KKR----RKKKE---------------TYFNYMGKLLKQV--H +PDFSGCSWILDAL-----------------RVLEDWQLEWVSLEAVRLSLYN----HRRT +ITSREILEAVKQRCSQKSLGINEVDLHGSVVEMIALVQKQKIGSFGGLS----------- +---------------------------------------- +>Bos|XP_059746706.1|H2B.N organism=Bos taurus phylum=Chordata class=Mammalia +-------------------MHFICLHGLQF------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------------------PKRKLTI----------YIPAKE +KDEWVC--SATG-----KKR----RKKKE---------------AYFNYMEKLLKQV--H +PDFSGCSWILDAL-----------------RVLEDWQLEWVSLEAVRLSFYN----HRRT +ITTKEILKAVKQRCSQKSLGINEVDLHGSVVEMIALVQKQKIGSFGGLS----------- +---------------------------------------- +>Canis|HISTDB_H2B_N_2|H2B.N organism=Canis lupus familiaris phylum=Chordata class=Mammalia +-------------------MYYICLHGLRF------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------------------PEKRTIL----------YIPARE +KYEWAN--SAL------RKK----RKKKE---------------VYFSYMGKILKQT--H +PDFSGCSWILDAL-----------------GSLEDWLLEQVSLEAVRLSFYN----HRRA +VTSREILGAIKQRSFLKSFCVNEVF----------------------------------- +---------------------------------------- +>Ceratotherium|HISTDB_H2B_N_1|H2B.N organism=Ceratotherium simum phylum=Chordata class=Mammalia +-------------------MYFICLRGLRF------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------------------PKKTTN-----------YILAKK +KYEWTS--SAIGK----KRR----RKKKE---------------AYFSYMGKILEQIAHY +RKLSRLCLILVPFLPRPTQTSVGAPGSWMHWALEAWRLEWVSLEAVRLSFCN----HRRA +VTSREILEAVKRRSSWKSF----------------------------------------- +---------------------------------------- +>Dasypus|XP_058139847.1|H2B.N organism=Dasypus novemcinctus phylum=Chordata class=Mammalia +-------------------MYYVCLDSLKF------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------------------PKKKTDV----------YSLAER +KYEWAR--SAFGKRRRRRWR----RKKKE---------------VYFSYMRKILKQV--H +ADFSGCSWVLDAL-----------------GSLDDWRLEWVSLEAVRLSFYN----HRRA +VTSREILEAVKQRLSWKSF----------------------------------------- +---------------------------------------- +>Loxodonta|HISTDB_H2B_N_4|H2B.N organism=Loxodonta africana phylum=Chordata class=Mammalia +-------------------MYYVCLGGLKF------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------------------PKKSEV-----------HIPAKK +KYEWAN--SAFEKKRR-RRR----RKKKE---------------AHFCYMGKILKQT--H +PDFSGCSWVLEAL-----------------GCLDDWQLEWVSLEAVRLSFYK----HRRA +ITSREILEAMKQRSPRRSF----------------------------------------- +---------------------------------------- +>Monodelphis|XP_007485268.2|H2B.N organism=Monodelphis domestica phylum=Chordata class=Mammalia +---------------------------MKI------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------------------PRAGATV-------PRSFITTGK +RRGYMR--TVSG-------------KKKD---------------FYFSYIAKILKQV--H +QDFSGYSWVLDAL-----------------WSLDYYLFEQATLEAVRLSFYN----HRRV +VTSREMLETLSKVPLEGWM----------------------------------------- +---------------------------------------- +>Monodelphis|XP_007485606.1|H2B.N organism=Monodelphis domestica phylum=Chordata class=Mammalia +---------------------MEVGTEMKI------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------------------PRAGATV-------PRSFLRTGK +RRGYMR--TVSG-------------KKKD---------------FYFSYIAKILKQV--H +QDFSGYSWVLDAL-----------------WSLDYYLFEQATLEAVRLSFYN----HRRV +VTSREMLEALNKVPLEGWM----------------------------------------- +---------------------------------------- +>Monodelphis|XP_007485607.1|H2B.N organism=Monodelphis domestica phylum=Chordata class=Mammalia +-------------------ME---GTEMKI------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------------------PRAGATV-------PRSFLRTGK +RRGYMR--TVSG-------------KKKD---------------FYFSYIAKILKQV--H +QDFSGYSWVLDAL-----------------WSLDYYLFEQATLEAVRLSFYN----HRRV +VTSREMLEALNKVPLEGWM----------------------------------------- +---------------------------------------- +>Ornithorhynchus|HISTDB_H2B_N_5|H2B.N organism=Ornithorhynchus anatinus phylum=Chordata class=Mammalia +-------------MGGPLGPGFLSLLETETKSPAVA------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------------------PEAEMEG----------PVEARK +EYRCVR--TSLS-------------KKKE---------------AYSSYIAHVLKQT--Q +PEPRGWGRAEGNL-----------------ESRDGQLLERVAGEAVRLTLLQ----AAKT +VTSRVVRGALELVLAELVEE---------------------------------------- +---------------------------------------- +>Ovis|HISTDB_H2B_N_0|H2B.N organism=Ovis aries phylum=Chordata class=Mammalia +-------------------MHFICLHGLQF------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------------------PKRKLTI----------SIPAKE +KDEWVH--SATG-----KKR----RKKKE---------------AYFNYMGKLLKQG--H +PDFSGCSWILDAL-----------------RALEDWQLEWVSLEAVRLSLYN----HRRT +VTSREILEAVQQRCSQKTLGINEVALHGSVVEMIALVQKQKIGSFGGLS----------- +---------------------------------------- +>Ovis|XP_060251208.1|H2B.N organism=Ovis aries phylum=Chordata class=Mammalia +-------------------MHFICLHGLQF------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------------------PKRKLTI----------SIPAKE +KDEWVH--SATG-----KKR----RKKKE---------------AYFNYMGKLLKQG--H +PDFSGCSWILDAL-----------------RALEDWQLEWISLEAVRLSLYN----HRRT +VTSREILEAVQQRCSQKTLGINEVALHGSVVEMIALVQKQKIGSFGGLS----------- +---------------------------------------- +>Homo|NP_001388269.1|H2B.N_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +-------------------MYFICLNDLRF------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------------------PKNKTEL----------YFPVKK +KHEWAN--SATGKKRRWRKK-----RRKE---------------AYFSYMGKILKQI--H +PDFSGRSWVLYAL-----------------GALNAWQLEWVSLEAFRLSFYN----HRRA +ITGREILGAVKQRSSQKSF----------------------------------------- +---------------------------------------- +>Ornithorhynchus|XP_001511074.1|H2B.O organism=Ornithorhynchus anatinus phylum=Chordata class=Mammalia +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +---------------------------------------------YSIYVYKVLKQV--H +PLTSISTKAVGIM-----------------DSFINDIFERIASEASRLARYN----KRST +ITSREIQTAVLLTLPGELARHAVSEGTKAITKYTS------------------------- +---------------------------------------- +>Ornithorhynchus|XP_001521160.2|H2B.O organism=Ornithorhynchus anatinus phylum=Chordata class=Mammalia +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +---------------------------------------------WENYVYKVLKQV--H +PLTSISTKAVGIV-----------------DSFI-DIFKRITSDASHLARYN----KCST +ITSREIQTAVQLMLPGELDRYAGSEGTKAITKYTT------------------------- +---------------------------------------- +>Ornithorhynchus|XP_028926523.1|H2B.O organism=Ornithorhynchus anatinus phylum=Chordata class=Mammalia +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +---------------------------------------------YSIYVYKVLKQV--H +PLTSISTKAVGIM-----------------DSFINDIFDRIASEASRLARYT----KRST +IASREIQTAVLLTLPGELARHAVSEGTKAITKYTS------------------------- +---------------------------------------- +>Chara|GBG59214.1|H2B.S organism=Chara braunii phylum=Streptophyta class=Charophyceae +-------------------MAEGGYPLEA----VAGDICG-TSMDPLDPSGTVRRRS-PR +GDGPDDQGVGRDQQ--------------------------------SDQMHLPAEEQRKS +DTQSVVLSRQSTPQQAESAKLPAN---YPEVEIGADRKASKRSKTQ-------------- +-------------------------KKRTAAA-----ENVPGTV--------PGETRPGG +KKESGSRPEGKNK----GRR----RRKRT-------TLIVPSGRTYKIYIYKVLKEI--H +PELAISSQGMSIM-----------------NSFMVDVCERIAAEASRLSRHA----KRAT +LSSRDIQSAVQLCLPGELAVNAMSEAKKAVIKFI-RHGKASERRNIIIKSNRFYLINKMI +VI-------------------------------------- +>Chara|GBG60584.1|H2B.S organism=Chara braunii phylum=Streptophyta class=Charophyceae +-------------MWNLLRMPPGQWSSRS----AASSLPRQNGVVGVRRGRSAAVVV-LE +DSGDGDANGIGIGTGIANGNGNG-----------------------KDMNRRMEKGGALG +TSTATGMMKESSAKKKGGGSGSGA---GSELGSEPGSEPGTGPGAGRHK----------- +-------------------------GSPSGVA-----KGLPMMW--------PTQRTGYV +TPKKKERVTRAKKKAISGSR----RTARK---------TNAFQPSFATHIRRVLKQV--H +PNLSITCDGIDIM-----------------NDFLIDIFERIAGEAALLIRVH----KRCT +LTCREIMAAVQLVLGGELSKHAIHLATEVLTLFS-Q------------------------ +---------------------------------------- +>Amaranthus|HISTDB_H2B_S_7|H2B.S organism=Amaranthus hypochondriacus phylum=Streptophyta class=Magnoliopsida +-------------------MAPKK---------TARKVVK--TTKIVEETVEVVSIP-GS +QSQQNPTQI------------------------------------QTELISERKEEQSSI +TTKSIPIQESDHDDQDE------------------------------------------- +-------------------------EETETQD-----QDHPDLS---------TPPRKEA +PPRQVEPKPEPKG--------------KG---------EGGGE-GYKRYVFRVLKQV--H +PGMGISSRAMIVI-----------------NNFMNDMFERIAEEGSRLNKQN----KKMT +MSAREIQGAVKLVLPGELGKHAVAEGAKAVTNYV-NFGFHKK------------------ +---------------------------------------- +>Ananas|XP_020113371.1|H2B.S organism=Ananas comosus phylum=Streptophyta class=Magnoliopsida +-------------------MAPRKPRK------MVGAVVK-TTAKVVEETVKVAPVV-GV +GDGDGDGDG---------------------------------------------AEEVEE +AAVPLKDSKVVQVVVVGGEKGDGEVPEANDGRDEPEKRKEAMEVDENQAPKETGEESKRR +GRGRPL-------------------KERGPET-----PTEKSEI---------PPANKKK +EKDRGGRSEGEAEEGKGRRR----RRRKRRFGSAGDAGSGGVG-GYKRYVFRVLKQV--H +PELGASARAMQVL-----------------DMMMADMFQRLAEEAARLSKYT----GRAT +LTSREIQNAVRLVLPGELGRHAVSEGTKAVTNYMASQSS--------------------- +---------------------------------------- +>Aquilegia|PIA54901.1|H2B.S organism=Aquilegia coerulea phylum=Streptophyta class=Magnoliopsida +-------------------MAPKRSQKK-----VIGSLVR-KTNKVVEETVNITVVEKTK +GAEAGNKET---------------------------------------------EQEETA +ISTKDPVSTPGEKQQKEQPKKGVG---KQLKLEAPTQK---------------------- +-------------------------KEETKSK-----KNEKSTT---------TTTTTTS +SAKEGEEKKGKKS-----------GRKRM---------LDTGE-TYNTYVYKVLKQV--H +PDLGITFKGMMVL-----------------NGFMNDMFERLAREASKLTDYT----GKKT +MSAREIQGAVRLVLPGELGKHAIVEGTKAITTYF-SNSS--------------------- +---------------------------------------- +>Arabidopsis|XP_002892444.1|H2B.S organism=Arabidopsis lyrata subsp. lyrata phylum=Streptophyta class=Magnoliopsida +-------------------MAPRKPK-------VVSVTKK---KKVVEETIKVTV-T-EG +EDPCVTTET-------------------------------------------ANDQETQD +LTFSIPVGENVTTVEI-------------------------------------------- +----PVEVR----------------DEQSPQP-----PETPASK---------SEGTL-K +KTDTVEKKKKKKK-----------KKKRD---------DLAGD-EYRRYVYKVMKQV--H +PDLGITSKAMTVV-----------------NMFMGDMFERIAQEAARLSDYT----KRKT +LSSREIEAAVRLVLPGELSRHAVAEGSKAVSNYV-GYGSRKR------------------ +---------------------------------------- +>Arabidopsis|NP_172295.1|H2B.S organism=Arabidopsis thaliana phylum=Streptophyta class=Magnoliopsida +-------------------MAPRKPK-------VVSVTKK---KKVVEETIKVTV-T-EE +GDPCVITET-------------------------------------------ANDQETQD +LTFSIPVGENVTTVEIPVEVPDER---SLPVGENVTTVKIPVDDRDESS---------PQ +PPETPVEVR----------------DEPSPQP-----PETPASK---------SEGTL-K +KTDKVEKKQENKK----KKK----KKKRD---------DLAGD-EYRRYVYKVMKQV--H +PDLGITSKAMTVV-----------------NMFMGDMFERIAQEAARLSDYT----KRRT +LSSREIEAAVRLVLPGELSRHAVAEGSKAVSNFV-GYDSRKR------------------ +---------------------------------------- +>Boechera|HISTDB_H2B_S_1|H2B.S organism=Boechera stricta phylum=Streptophyta class=Magnoliopsida +-------------------MAPRKPK-------VVSVTKK---KKVVEETVKVTV-T-EG +GDPNATTEI------------------------------------------TENDQETQD +LTFSIPVGENVTTVEI-------------------------------------------- +----PVEVR----------------DEQSPQP-----PETPAST---------SEGIVKK +KTKKVEKKQAKKK------K----KKKRD---------DLAGD-EYRRYVYKVMKQV--H +PDLGITSKAMTVV-----------------NMFMGDMFERIAQEAARLGDYT----KRRT +LSSREIEAAVRLVLPGELSRHAVAEGSKAISNYV-AYDSRKR------------------ +---------------------------------------- +>Brachypodium|XP_014757906.1|H2B.S organism=Brachypodium distachyon phylum=Streptophyta class=Magnoliopsida +-------------------MAPKRRVKK-----VVSSVVRKKTTKVVQETVQVSTAILPD +DSAQPEPEVVVDVSTPATV---------------------------VKHVEVTSDVGDDQ +ATAAGATAVNNKPPQSKSPDKPADDNQATAPAVPSLQSQETQTQDPNEKKKTPQQEIVVV +TTKGPPGLEPEEKT-----------KKLQPDA-----PETPKQA--------GGAGTGGK +DEAAAPKKKKKKKKKKRKARRGG-GRRRG---PVGDMGMMGG--GYKRYVYRVLKQV--H +PDMGASGRAMEVL-----------------DMMMGDMFERLADEAARLAKVA----GRAT +LSSREVQNAVRLVLPGELAKHAISEGTKAVTSYM-SLA---------------------- +---------------------------------------- +>Brachypodium|HISTDB_H2B_S_5|H2B.S organism=Brachypodium stacei phylum=Streptophyta class=Magnoliopsida +-------------------MAPKRRGKQ-----VVSSVVR-KTTKVVKETVQVSTAAIVA +DDSTHPEYTEPEVVDMS----------TPATVVKHV----------EITTTSDGDQAADA +GATTVNKSPDKQPAEANQAPQPAVASLQSQETQDPNEEAAAAEAPQTEKPQQEIVVVSTK +APEPEEKIT----------------KKQHPEA-----PETPKQE--------AGAGTTGG +KEKAAAAPKKKKA-----------RRGRG---PVGDMGMMGG--GYKRYVYRVLKQV--H +PDMGASGRAMEVL-----------------DMMMGDMFERLADEAARLAKVA----RRAT +LSSREVQSAVRLVLPGELAKHAISEGTKAVTSYM-SLA---------------------- +---------------------------------------- +>Brassica|CAF1924216.1|H2B.S organism=Brassica napus phylum=Streptophyta class=Magnoliopsida +-------------------MAPKKSKK------VVSVTKK---KKVVEETIKVTV---TD +GVPNVTTET---------------------------------------DTQETQELETQD +LPLSIPVEEENVTRVEIPVDVGDD---RSPPPSETVTPASEGTVKETHK----------- +-VEIPVDVR----------------DDRSPQP-----PETPAPA----------SEVPSK +ETHKVEEKEGNKKKKMLKKR----NKNRS---------EVAGD-EYKRYVYKVMKQV--H +PDLGISSKAMTVI-----------------NMFMGDMFERLAVEAAKLNDYS----KRRT +LSSREIEAAVRLVLPGELSRHAVAEGSKAISNFV-AYGAKKR------------------ +---------------------------------------- +>Brassica|RID57103.1|H2B.S organism=Brassica rapa phylum=Streptophyta class=Magnoliopsida +-------------------MAPRKSKK------VVSVTKK---KKVVEETIKVTV---TD +GVPNVTTNTDTQETQYL-----------------------------ETQELDTQELETQD +LPFSLPLEEENVTRVEIPVDVGYD---RSPPPPETVAPASEGTVKETHK----------- +-VEIPVEFG----------------DDRSPQP-----PETPAPA----------SEVPAK +ETHKVEEKQGNKK-TTSKKR----KKNRS---------EVAGD-EYKRYVYKVMKQV--H +PDLGISSKAMTVI-----------------NMFMGDMFERLAVEAAKLNDYS----KRRT +LSSREIEAAVRLVLPGELSRHAVAEGSKAVSNFV-AYGAKKR------------------ +---------------------------------------- +>Capsella|HISTDB_H2B_S_0|H2B.S organism=Capsella grandiflora phylum=Streptophyta class=Magnoliopsida +-------------------MAPRKPK-------VVSVTKK---KTVVEETVKVTVA--EG +GDPNVTTEI------------------------------------------TENDQETQD +LTFSIPVGENVTTVEVPVEVL--------------------------------------- +-------------------------GERSPQP-----PETPVST---------SEGTLKK +KTNEVEKKQEKKK-----KK----NKKRD---------DLAGD-EYRRYVYKVLKQV--H +PDLGITSKAMTVV-----------------NMFMGDMFERIAQEAARLSDYT----KRRT +LSSREIESAVRLVLPGELSRHAVAEGSKAISNYV-SYDSRKL------------------ +---------------------------------------- +>Capsella|XP_006306103.1|H2B.S organism=Capsella rubella phylum=Streptophyta class=Magnoliopsida +-------------------MAPRKPK-------VVSVTKK---KTVVEETVKVTVA--EG +GDPNVTTEI------------------------------------------TENDQETQD +LTFSIPVGENVTTVEVPVEVL--------------------------------------- +-------------------------DERSPQP-----PETPAST---------SEGTLKK +KTNEVEKKQEKKK-----KK----NKKRD---------DLAGD-EYRRYVYKVLKQV--H +PDLGITSKAMTVV-----------------NMFMGDMFERIAQEAARLSDYT----KRRT +LSSREIESAVRLVLPGELSRHAVAEGSKAISNYV-SYDSRKL------------------ +---------------------------------------- +>Carica|XP_021901556.1|H2B.S organism=Carica papaya phylum=Streptophyta class=Magnoliopsida +-------------------MAPKRS--------TRLLALK-TTQKIIEK-VEVSVVP-SS +GREQEITDVAVQKSPVKVIPVEEKS---------------------RKTVRIPVEETPSL +KTIPVKTPEKEQETID---------------DQEPVTTSEEVAADNEQE----------- +-------------------------QEKEEET-----DQTQEGI-------TSSEPAGTT +KEEKVEKRPSRRG-RPRRRR----KKKKG-------SDEGNYK-GYKRYVFKVLKQV--H +PELAISSKAMVII-----------------NGFMNDMFERLADEAANLSRYS----HKAT +LSSKEIQGAVRLVLPGELSKHATAEGSKAVTNYM-SFPLHNS------------------ +---------------------------------------- +>Citrus|ESR37664.1|H2B.S organism=Citrus clementina phylum=Streptophyta class=Magnoliopsida +-------------------MPPRR---------SARVVLT---KKVVTETVEVSVVN-EK +KKGKQEIAIHSEETLP------------------------------SKTITVEDKEEGKR +TTVEVPIEEPEPPTEPEAEPSSSV---AAEAATPAKKEEKKTSIETSPE----------- +-PEHDVAA-----------------SAEQEEP-----PEQSKNK-------EKEKPNEAQ +KTAQQEARAAHEKPGSKKRK----RRKRN---------EGTGE-EYKTYVFRVLKQV--H +PGMAISSKAMTVI-----------------NNLMNDMFERIAGEAATLSKHC----HRTT +MSSREIQGAVKLVLPGELGKHAVAEGTKAVTNYT-SYDAKRSKA---------------- +---------------------------------------- +>Citrus|KDO60308.1|H2B.S organism=Citrus sinensis phylum=Streptophyta class=Magnoliopsida +-------------------MPPRR---------SARVVLT---KKVVEETVQVSVVNEKK +KKGKREIAIHSEETLP------------------------------SKTITVEDKEEGKH +TTVEVPIEEPEPPTEPEAEPSSSVAAEAATPAKKEEKKTSIETSPEPEHDVAA------- +-------------------------SAEQEEP-----PEQSKNK-------EKEKPNEAQ +KTAQQEARAAHEKPGSKKRK----RRKRN---------EGTGE-EYKTYVFRVLKQV--H +PGMAISSKAMTVI-----------------NNLMNDMFERIAGEAATLSKHC----HRTT +MSSREIQGAVKLVLPGELGKHAVAEGTKAVTNYT-SYDAKRSKA---------------- +---------------------------------------- +>Daucus|XP_017234272.1|H2B.S organism=Daucus carota subsp. sativus phylum=Streptophyta class=Magnoliopsida +-------------------MAPKKSPKKK----AVGAVVK-TTTKVIQETVQVSV---IQ +TKPKPQQET-------------------------------------------PQTENNKN +GPKDIEIQDV-------------------------------------------------- +-------------------------TTPTPTP-----KKATKTI----------PTQDTA +KKTKKDSAQGATK-----------KRKRS------------VE-GYKRYVYKVLKQV--H +PDIGISSKAMTIV-----------------NNLMTDMFERLADEAARLTKYT----KKMT +LSSREIQGAVKLVLPGELGKHAVAEGAKAVTNYV-QYASGPSKP---------------- +---------------------------------------- +>Erythranthe|XP_012838320.1|H2B.S organism=Erythranthe guttata phylum=Streptophyta class=Magnoliopsida +-------------------MAPKKRPGR-----AKKTVVT--STKVVEETVKVVVTP-GG +SGGEDDDNDNNESVEMI----------SSSTKQNTEN---------VEIFTSSPEKEHVL +RTIPVEDKEEQIPAPD---IVPQE---QDEDETQPYSEPETASTPPRKEAP--------- +-------------------------PPKSSEP-----LETPPPA-------EKRETRKKK +FQEKAKEAGQEKKATTEKLRPK--RRRRS------VAGAGAGE-SYKRYVFKVMKQV--H +PEMGISSKAMTIV-----------------NNLMTDMFERFAEEAARLQKYT----GRKT +MSSREVQGAVKLVLPGELGKHAVAEGAKAVTNYV-SYVPKS------------------- +---------------------------------------- +>Eutrema|XP_006417719.1|H2B.S organism=Eutrema salsugineum phylum=Streptophyta class=Magnoliopsida +-------------------MAPRREKK------VVSVTKK---KKVVEETLKVTV---TD +GDPNVITET------------------------------------------QTQDEETQD +LTFSIPVEENVTTVEI-------------------------------------------- +----PVDVR----------------DDQSPQA-----PETPAPV---------SEGTV-K +EAHKVEKKQSKKK----KKK----MMKRG---------DLAGDHEYKRYVYKVMKQV--H +PELGITSKAMSVI-----------------NTFMGDMFERIAEEAARLSDHT----KRRT +LSSREIEAAVRLVLPGELSRHAVSEGAKAISNYV-AYGAKKR------------------ +---------------------------------------- +>Fragaria|XP_004295898.2|H2B.S organism=Fragaria vesca subsp. vesca phylum=Streptophyta class=Magnoliopsida +-------------------MSPKRRS-------SSRLVVK-TTKQVVKETVEVSVVRSKK +RQKKVNDDEQQEPVESTIAFETKKENQTEKIEVSVEKDPAKRAIESQEETQIAEEDKEPE +EIRTIPVEVPGMETPEKLRGATLSVDKDEEPKEPSTVLDDKEPVSNDVVTETQLDSDETQ +NFEEDGEQARTISMDSQEGSE----TQNFEET-----PMTPEKK----EVNQSSKVEKEK +SDDVKQTGDGEKK----DKK----RKRRS------PGNKREGGKGYKRYVYKVFKQV--H +PELGMSAKAMVVL-----------------NNYMNDMFERLAGEAAKLTMYT----SRKT +LSSREIQGAVKLVLPGELGKHAMAEGTKAVSNYLSKNAAMSHKS---------------- +---------------------------------------- +>Glycine|KAH1159328.1|H2B.S organism=Glycine max phylum=Streptophyta class=Magnoliopsida +-------------------MAPKR---------AEKLVVR-STKKVVESIVQVSV-V-GK +RLTQVIPQA-------------------------------------------QKVSPNSD +ITTENKAEQENNTHQDGGVQ---------------------------------------- +-------------------------NQEEEQK-----GVVNEEA---------KEEKNKS +KTAKEQNGKEKKR----GRK----KRNIE---------------GYQRYVYGVLKQV--H +PEMGISSKCMTAL-----------------NNLMNDMFERLTFEVSKLTDYT----GHMT +LSSREIQGVVRLVLPGELEKHAIAEGVKAVNNYT-SYDA--------------------- +---------------------------------------- +>Glycine|XP_003539797.1|H2B.S organism=Glycine max phylum=Streptophyta class=Magnoliopsida +-------------------MAPKR---------AEKLVVR-STKKVVESSVLVSVVGKRL +TRGKKDTQT----------------------------------------TDGEEEVGSQE +HLVVIPIQE-VTPQAQKDSPNSAI---TTENKAEQENNTQDGGV---------------- +-------------------------ENNQEEE-----EEHEEVK----------EKKNKA +KTPKGKNGKEKKR-----------GRKKG---------RRSVE-GYQRYVYRVLKQV--H +PEMGISSKCMTVL-----------------NNLMNDMFERLAFEASKLKDYT----GHMT +LSSREIQGAVRLVLPGELGKHAIAEGVKAVNNYT-SYDA--------------------- +---------------------------------------- +>Gossypium|XP_012484989.1|H2B.S organism=Gossypium raimondii phylum=Streptophyta class=Magnoliopsida +-------------------MAPKR---------RAKVVVR-STKKIVKETVQVAVIDKTE +GDNNGDQQQ-------------------------------------LDTVPLEDIEEAGE +RVITEIPIQGSTEDK--------------------------------------------- +-------------------------AEKEPRK-----VEAPGQK---------NRVQGEE +KTEPVHEEEEPRKEEKKGKR----KRGKK--------KELVGHEGYKTYVFRVLKQV--H +PGMAISSKAMSVI-----------------NSLMNDMFEKITNEATKLSQYT----DRKT +LSSREIQGAVRLVLPGELGKHAVAEGSKAVTNYA-SYDIKRSKLV--------------- +---------------------------------------- +>Linum|HISTDB_H2B_S_8|H2B.S organism=Linum usitatissimum phylum=Streptophyta class=Magnoliopsida +-------------------MAPRRRSAGR----VVGVVRS--TRKVVKETVEVSILAGDT +QETTPEDNT-------------------------------------EDINLLDTEELIDV +VTPEAGVKLQEDATTT------------STVRTIPVEDAGPEREEELVI----------- +-------------------------SEDRQFE-----DAKPKKE--------------EK +KAPEKEKKVNKKK-----------RKSRF---------VEGGE-GYRRYVYKVMKQV--H +PDMKISGVAMSII-----------------NSLMKDMFERIADEAATLSRYS----KRMT +ISSKEIQDAVKLVLPGELGKHAVAEGSKAVANYA-SYSHNK------------------- +---------------------------------------- +>Malus|XP_008359166.3|H2B.S organism=Malus domestica phylum=Streptophyta class=Magnoliopsida +-------------------MAPKR---------SAKMVVK-TTKRVVKEMVEVSVVK-TR +RKKQQEDRP---------------------------------------LETISVENNDSN +QTQNVEVSVGKEPLKTSIIPIETL---EQVIPIETQAENQTLKTQNAEVQVDREAEENPT +TPDPQETEKLS--------------KEEEQKS-----EEDKTLR-----GGENKDAEDLT +KTEEQASKKGEKKSEVKGGK----RREKR---------RSRGREEYKTYVYKVLKQV--H +PGMGVSSKAMTVL-----------------NNLMNDMFEKLADEAARLTTYT----ARKT +LSSREIQGAVKLVLTGELGRHAMAEGTKAVSTYV-SYGGGSSKS---------------- +---------------------------------------- +>Manihot|XP_021629190.1|H2B.S organism=Manihot esculenta phylum=Streptophyta class=Magnoliopsida +-------------------MAPKGKRGKKK---VLGTVLR-SSKRVIKETVKIAV---FE +GDTQESTQE-------------------------------------------DQNGDTEE +LPENEPLVVRTIPVEERV---------EEEEEAQTIEVSVKKPKEEKRK----------- +-------------------------QEKIETH-----EEKQEPA----------KTTKTK +KRTQEEKGQEKKR-----RR----RRRRG---------VEEGGEGYKRYVFRVLKQV--H +PELRISSMAMSVI-----------------NSLMKDMFERIADEAAKLSQHS----HKMT +LSSREIQGAVKLVLPGELGRHAIAEGSKAVTNYM-SYEAKGSKA---------------- +---------------------------------------- +>Medicago|XP_013464866.1|H2B.S organism=Medicago truncatula phylum=Streptophyta class=Magnoliopsida +-------------------MAPKS---------AKKVVVR-STRKVVQESVQVSVVSSHK +RSTRGNNKD-------------------------------------VEIDKDAGNATQQE +HVRIIPVQEVTSQTKEDTNTNTNTTTVTSEDTTNQENT---------------------- +-------------------------PNDATME-----PKTPLSN---------KEQEKKV +RTKEGGNDGKGKR-----------KKKRG---------RRMGE-GYQRYVYRVLKQV--H +PQMGISSQAMTIL-----------------NNLMNDMFEKLADEAAKLTAYT----KHMT +LTSREIQGAVKLVLPGELGKHAIAEGAKAVTNYV-SYVA--------------------- +---------------------------------------- +>Musa|HISTDB_H2B_S_6|H2B.S organism=Musa acuminata phylum=Streptophyta class=Magnoliopsida +-------------------MAPKR---------TSRVLKT--TKTVIEETVEVVVEA-KD +AQGPKEDLG-------------------------------------------EGKEAEPE +GRQQAEEEEPS------------------------------------------------- +-------------------------REKEAEP-----PFEKSII-------QQEETAGEG +KEAESITDKETQEEAAAVDK----RFSGG-----DGGEMDGTGRGYKRYVFRVLKHV--H +PGMGISSRAMVVL-----------------DGMMGDMFERLAGEASRLSTYT----GKAT +LSSREIQGAVRLVLPGELGKRAISEGTKAVSNYM-AADRHEQQ----------------- +---------------------------------------- +>Nymphaea|XP_031483632.1|H2B.S organism=Nymphaea colorata phylum=Streptophyta class=Magnoliopsida +-------------------MGPRRSGR------LVGSVVK--ETKVVEETVKVVADV-DD +LSSLEPSAI---------------------------------------------SAGIIR +EIPVVEIKESKTPQKK------------VEPEAAPFP----------------------- +-------------------------LEKKQKG-----IEAKEVE--------EKEQKGHV +DQKKERKPQELNR----RRK----RLRRN---------EEGGVGAYRSYVYKVLKQV--H +PELGISSKAMDVL-----------------NGFMGDMFERLAEEAATLQKHT----GRRT +LSSREIQMAVRLVLPGELGKHAISEGSKAITNYY-RYPEKKRR----------------- +---------------------------------------- +>Oryza|XP_015612586.1|H2B.S organism=Oryza sativa Japonica Group phylum=Streptophyta class=Magnoliopsida +-------------------MAPKQKE-------AANKKKKKKEVAVLVKTKTKVVQL-TT +TTAELELEP--------------------------------------------------- +-TVTVQVDDNKTGAAA-----------DETPPVVPL------------------------ +-------------------------QSQETQD-----PNEPKAA---------------- +-AAKKKKRAGHGR----KRS----RRRRG--------GALEYG-GYKRYVWRVLKQV--H +PDLGASAQTMDVL-----------------DMMMADMFERLADEASRLSKLS----GRLT +LTSREVQSAVRLVLPADLANHAISEGTKAISNYL-S------------------------ +---------------------------------------- +>Panicum|PAN11031.1|H2B.S organism=Panicum hallii phylum=Streptophyta class=Magnoliopsida +-------------------MAPKRRSGGK----VVGSVVK---TKVVQETVEVTTAFVAD +GEPGQRATEDLALAPPAVDASGG-----------------------SRSRVVHIEVTTPD +GDTTTGGSNAKQATSKRGRGGRR----EEEKPAPPAEEAAQEPPVAQSQETQDPNEEQEE +EEDAGKKKKKKKPPQQ---------ELQDEEP-----PETPRVASERKTAAAKRTPQQQQ +KRGGGGAGGGDKTKTTKAKKGG--RRRLGQASPGGDAGMGGVG-GYKRYVWRVLKQV--H +PELGVSGNAMRVL-----------------DMMMADMFERLADEAARLSKVS----GRAT +LSSREVQSAVRLVLPGELSRHAMSEGTKAISKYM-SYDA--------------------- +---------------------------------------- +>Panicum|HISTDB_H2B_S_3|H2B.S organism=Panicum virgatum phylum=Streptophyta class=Magnoliopsida +-------------------MAPKRRGGGK----VVGSVVK---TKVVQETVEVTTAVVPD +GEPEQRGTEALALAPPAVDVSG------------------------GSRRVVHIEVTTPD +GDATTGGSNAKKQATSKRGRRGGR--REEEKPAPPAEEAAQEPPVAQSQETQDPNEEQEE +EVEEDARKKKKRKPPPPPQ------ERQDEEP-----PETPRVASERKAAAAKTTPQQQK +KRGGGDKARAKKG----GQR----RRRLGQASPGGDAGMGGVG-GYKRYVWRVLKQV--H +PELGVSGLAMRVL-----------------DMMMADMFERLADEAARLSKAS----GRAT +LSSREVQSAVRLVLPGELGRHAMSEGTKAISKYM-SYDA--------------------- +---------------------------------------- +>Phaseolus|XP_007132352.1|H2B.S organism=Phaseolus vulgaris phylum=Streptophyta class=Magnoliopsida +-------------------MAPKR---------AQKLVVR-STKKVVESSVQVSVVS-SS +SRKRQTRGN--------------------------------------KDNIQTEEAVGEE +KHVMVIPIQEGNPQAQKDSSTSAM---TNENKGEQENSVQDDGVQNEEK----------- +-------------------------KKKGENE-----EVKEEKK---------------G +RFPKGSNGKEKKL----GKK----KGRRS------------AE-GYQRYVYRVLKQV--H +PEMGISSKCMTIL-----------------NNLMNDMFERLAGEASKLKDYT----GHMT +LSSREIQGAVKLVLPGELGKHAIAEGVKAVNNFT-SYDA--------------------- +---------------------------------------- +>Piper|HISTDB_H2B_S_10|H2B.S organism=Piper nigrum phylum=Streptophyta class=Magnoliopsida +-------------------MASTRQGRRNTPEVVSTVVKKKTTRKVVNETTIAAVAV-VE +SNEPPIVKT------------------------------------------VPVEEESSD +SVINVE--AGKTPPKE-------------VPIARPRSDAAKGTNQRKDG----------- +----AATIAET--------------QQPPEKL-----REEKHAM---------EEVKKGS +KRRKGERKEGEKK----RKR----RKKRW-------SYNEEMS-GYSRYVFRVLKQV--H +PDLAISSKAMAVL-----------------NAFVWDMFERLAGEAGKLADYT----RRAT +LSSREIQDAVRLVLPGELGKHAISEGSKAVTNYV-TNDD--------------------- +---------------------------------------- +>Piper|HISTDB_H2B_S_9|H2B.S organism=Piper nigrum phylum=Streptophyta class=Magnoliopsida +-------------------MASTRQGRRNTPEVVSTVVKKKTTRKVVNETTIAAVAV-VE +SNEPPIVKT------------------------------------------VPVEEESSD +SVINVE--AGKTPPKE-------------VPIARPRSDAAKGTNQRKDG----------- +----AATIAET--------------QQPPEKP-----REEKHAM---------EEVKKGT +KRRKGERKVEEKK----RKR----RKKRW-------SYNEEMS-GYSRYVFRVLKQV--H +PDLAISSKAMAVL-----------------NAFVWDMFERLAGEAGKLADYT----RRAT +LSSREIQDAVRLVLPGELGKHAISEGSKAVTNYVHALGSWRRPALQYGRHEEERRQRKCR +QDRVTSVFGFGFEVLGLRISFLFATLRTAPGGRKHGVQPT +>Prunus|ONI00968.1|H2B.S organism=Prunus persica phylum=Streptophyta class=Magnoliopsida +-------------------MAPKR---------STKTVVK-TTKQVVRETVQVSSVV-QS +KRRKKQSEDSGDQTRKP----------VKTIK--------------TISIETQEENQTQN +VEISEPLKTRRIPIQTEEENQILK---GQNAEAITTLTSDQKEAAAEEEEEGEGEKKEDQ +EDTVETSITSDEKEDEQKSEEVKTLEGEKEDS-----METNTTSDEREEEEEETTKKEEH +KSDEVKTQKGGKKSSEKKRK----RKRRE---------RGRGE-EYKIYVHRVLKQV--H +PGMGVSSKGMTVL-----------------NNLMNDMFERLADEAARLTKYT----ARKT +LSSREIQAAVKLVLPGELGRHAMAEGTKAVSTYVSNNNGRQSKS---------------- +---------------------------------------- +>Setaria|XP_022680431.1|H2B.S organism=Setaria italica phylum=Streptophyta class=Magnoliopsida +-------------------MAPKRRSSGK----VVGSVVK---TKVVQETVEVTTAIVAD +GEPEQQLAPGALALAPRTGEV-------------------------SRSKVVHVEITTPD +SDNTTGRSSAKQQPTAKRGRGGRR---EEEKPPAPAEEAAQEPPPQSLETQEPNEEEEEE +DVDVSKKRRKPPPQQ----------RRRDEEE-----PETPRVA-------SERKTAGTK +TTPQKQKKRGGGGGGGGKAKTGR-RRRLGEASPGGDAGMGGMG-GYKRYVWRVLKQV--H +PELGVSGHAMRVL-----------------DMMMADMFERLADEAARLSKAA----GRAT +LSSREVQSAVRLVLPGELGRHAMSEGTKAISKYM-SYDDA-------------------- +---------------------------------------- +>Setaria|XP_034583365.1|H2B.S organism=Setaria viridis phylum=Streptophyta class=Magnoliopsida +-------------------MAPKRRSSGK----VVGSVVK---TKVVQETVEVTTAIVAD +GEPEQQLAPGALALAPRTGEV-------------------------SRSKVVHVEITTPD +SDNTTGRSSAKQQPTAKRGRGGRR---EEEKPPAPAEEAAQEPPPQSLETQEPNEEEEED +VDVSKKRRKPPPQQ-----------RRRDEEE-----PETPRVA-------SERKTTGTK +TTPQKQKKRGGGGGGGGKAKTGR-RRRLGEASPGGDAGMGGMG-GYKRYVWRVLKQV--H +PELGVSGHAMRVL-----------------DMMMADMFERLADEAARLSKAA----GRAT +LSSREVQSAVRLVLPGELGRHAMSEGTKAISKYM-SYDDT-------------------- +---------------------------------------- +>Solanum|XP_004241294.1|H2B.S organism=Solanum lycopersicum phylum=Streptophyta class=Magnoliopsida +-------------------MAPKKRGGR-----VRATVVT--ARKVVEETVSVVVTP-VA +GETETESQTLAEENQSF-----------------------------EILTPAPSEEPTPK +RTINVQDISEGKKAPRRKPDPAQQ---VDEDETQPADEPEEMPSPPKKESGQKKTQKRKP +EPAQRGDEDE---------------TQPSEEP-----EEMPTPP-------KMEADQRKA +QKRKPDPAQKAKGGGERKKK----RAKVG-------GGVGPSE-GYRRYVFRVMKQV--H +PDMGISSKAMTVL-----------------NNLMGDMFERIANEAAILTKYV----GRTT +LASVDIQDAVKLVLPGELGKHAIAEGTNAVANYVTNVEKSKSKP---------------- +---------------------------------------- +>Solanum|XP_006347209.1|H2B.S organism=Solanum tuberosum phylum=Streptophyta class=Magnoliopsida +-------------------MAPKKRGGR-----ARATVVT--ARKVVEETVSVVVSGETE +TESQTLTEENQSF---------------------------------EILTPLPYEEPTPK +RTINVQDKSEGKKAQQRKPDPAQQ---VDEVETQPADEPEEMPSPPKKEAVRKKAQKRKP +DPAQRV-------------------DEDETQP-----AEEPEEM--------PTPPKMEA +DQKKAQKAKGGGG--ERKKK----RAKVG-------GGVGPSE-GYRRYVFRVMKQV--H +PDMGISSKAMMIL-----------------NNLMGDMFERIANEAAILTKYA----GRAT +LASVDIQDAVKLVLPGELGKHAIAEGTKAVANYVTSVEKSKSKP---------------- +---------------------------------------- +>Sorghum|XP_002462010.1|H2B.S organism=Sorghum bicolor phylum=Streptophyta class=Magnoliopsida +-------------------MAPKRRGNK-----VVGSVVK---TKVVQETVEVIVAD-DD +DTAEAEQQMVPEALAVAPSAVDVS----------------------GSTVVHVVEVTTPD +GGDNATGSNVKQPAVAKRGRGRRE--EEKEKQPAPPEDSVLVPQSQETQDPNEEEEDQED +ASKKKKQKQKQRQQ-----------DEDDEAQ-----PETPRVA------SERKKATPKK +AKAKAQPQQQAGGGGDAGKKRPKARRRLGQASAGGDAGMGGVG-GYKRYVWRVLKQV--H +PDLGVSGHAMQVL-----------------DMMMADMFERLADEAARLSKAT----GRMT +LTSREVQSAVRLVLPGDLGKHAIAEGTKAISKYM-SYA---------------------- +---------------------------------------- +>Spirodela|HISTDB_H2B_S_2|H2B.S organism=Spirodela polyrhiza phylum=Streptophyta class=Magnoliopsida +-------------------M------------------VR-TTRKVVQETIEVSVVK-EK +DATAGRKKVVEVKVQDTTEMPQP-----------------------QAEAFEGEREGEAA +VEEKSAATGEKEAEEAGVQEKKEQ---RQPPQVETISREMVETAAAPEGEKHAVEKTRQR +EDETPPALESEQQPPQIGEE-----RKSVEEH-----EEKKKEK---------VEGDLRA +EEAASGKEKEETKSAMRRRR----RKRAG-----RDVGGFGGKRGYKRYVFRVLKQV--H +PELRVSSMAMTVL-----------------DSLVKDMFERLAGEASRLSKYS----GRAT +LSTREIQAAVRLVLPRELGEHALAEGNKAVANFMTAATAKPSS----------------- +---------------------------------------- +>Trifolium|XP_045802699.1|H2B.S organism=Trifolium pratense phylum=Streptophyta class=Magnoliopsida +-------------------MAPKRA--------NKKMVVR-STRKVVEESVQVSVVSSNK +RSTRANKDN-------------------------------------EIDKDVGSDHDQRE +EHVRIIPVQEVTPSAKEDSNASTT---TFT------------------------------ +-------------------------TEDKTNQ-----ENTPNEA---------TMEPKES +ENKKVKNKEGNYGKEKRKRK----RVRRM------------GE-GYQRYVYRVLKQV--H +PDMGISFKAMTIL-----------------NNLMNDMFEKLADEAAKLTTYI----GHMT +LSSREIQGAVKLVLPGELGKHAIAEGAKAVTNYISSYGA--------------------- +---------------------------------------- +>Vitis|XP_002272312.1|H2B.S organism=Vitis vinifera phylum=Streptophyta class=Magnoliopsida +-------------------MAPKRSGKT-----RSKVVVK-ATRKVVQQTVEVTVLA-SK +QKPPREEQG----------------------------------------KKISKKDKAPE +ELQREQVSADEEPPKELPTPVT-------------------------------------- +-------------------------QEEPPKK-----EEEKKTT---------TTQEGRE +EKKRGRRRRRRTS-----------GRRRK----------EGGE-GYKRYVYRVLKQV--H +PGLGVSSKAMTVL-----------------SGFMNDMFERIAEEAAKLSKYT----GKTT +LSAREIQGAVKLVLPGELQKHAMAEGTKAVSNYM-DYAAAGGHKQ--------------- +---------------------------------------- +>Zea|HISTDB_H2B_S_4|H2B.S organism=Zea mays phylum=Streptophyta class=Magnoliopsida +-------------------MAPKRRGNK-----VVGSVVK---TKLVQETVEVIVAD-DD +GLHAEKQQVPE-----------------------------------ALALAHPTVDVSGS +TVVHVVEVTAKRGRGGGGGGGGGR---RNEGKPPPEEDSAAVPVPQSQE---------TQ +DPNEELEFEL---------------EDEEEKQ-----PETPRVA---------SEKRKKA +ATPTKKTKTQQPR-----RR----RQRLGQASSGGDAGMGGVG-GYRRYVWRVLKQV--H +PDLGVSGHAMQVL-----------------DMMMADMFERLAEEAARLSKAT----GRAT +LTSREVQSAVRLVLPGELGRHAISEGTKAISKYM-SYAA--------------------- +---------------------------------------- +>Zea|XP_008670031.1|H2B.S organism=Zea mays phylum=Streptophyta class=Magnoliopsida +-------------------MAPKRRGNK-----VVGSVVK---TKLVQETVEVIVAD-DD +GLHAEKQQVPEA----------------------------------LALAHPTVDVSGST +VVHVVEVTAKRGRGGGGGGGGGGR---RNEGKPPPEEDSAAVPVPQSQE---------TQ +DPNEELEFEL---------------EDEEEKQ-----PETPRVA---------SEKRKKA +ATPTKKTKTQQPR-----RR----RQRLGQASSGGDAGMGGVG-GYRRYVWRVLKQV--H +PDLGVSGHAMQVL-----------------DMMMADMFERLAEEAARLSKAT----GRAT +LTSREVQSAVRLVLPGELGRHAISEGTKAISKYM-SYAA--------------------- +---------------------------------------- +>Trypanosoma|AAO24603.1|H2B.V organism=Trypanosoma brucei brucei phylum=Euglenozoa class=Kinetoplastea +-------------------MPPTKGG---------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------KRPLPLG-----GKGKGKR----------PPGQTT +KSSSSRKKSGARR-----------GKKQQ---------------RWDLYIHRTLRQV--Y +KRGTLSKAAVRVL-----------------SSFIEDMYGKIQAEAVHVACIN----NVKT +LTAREIQTSARLLLPPELAKHAMSEGTKAVAKYNASREEAYSKVL--------------- +---------------------------------------- +>Ailuropoda|XP_002925981.1|H2B.W organism=Ailuropoda melanoleuca phylum=Chordata class=Mammalia +------------------------------------------------------------ +--------------M--------------------------------------------- +------------------------------------------------------------ +-------------------------AEPGCET-----SSEESLG---------TEEPSAA +NPKSPKQKQKQPR-----------RQCRRRCR---RCP-----NSFATYFPRVLKQV--H +EGLSLSKKAVSVM-----------------DSFLKDIFEHIADEAAHLARSTKGSNKHST +INSREIQTAVRLLLPGEIGKHTVSKATKAVIRFQ--------------RCE--------- +---------------------------------------- +>Bos|DAA13058.1|H2B.W organism=Bos taurus phylum=Chordata class=Mammalia +--------------------------MGIGGSILSETSSDSYEEDVITKETGISEIEPSE +KEMAKVETSKPDPYD--------------------------------------------- +------------------------------------------------------------ +-------------------------AEPIKVE-----TSKPDPY---------DAEPKKA +ETSKPDPYDAEPKKAKQKTAKG--RRRRRHCH---HDS----FSSFATYFPRVLRQI--H +KGMSLSHDSVNIL-----------------DSFVKDTFERIAEEAGRLAGDN----KRRT +ITTEDIEAAVRLLLPGKLGKYAVLKATKSLITYR--------------TCK--------- +---------------------------------------- +>Canis|XP_548517.2|H2B.W organism=Canis lupus familiaris phylum=Chordata class=Mammalia +-------------------MIPGKPEEGKGSSEGPICDTEVACDVRKCSDYKGASAPQQP +PLSVLLSCWEGEPHM--------------------------------------------- +------------------------------------------------------------ +-------------------------AEPGCET-----SSEECLG---------TKEPREA +EPETP--KRKKPR-----------RQCHRRCR---RSR----SDSFAIYFPRVLKQV--D +EGLSLSQKAVSIM-----------------DSFVKDIFERIADEASRLARST----KRST +ITSREIQTAVRLLLPGEIGKYAVSEATKALMRNQVGALEKQVKNVLSLGCG--------- +---------------------------------------- +>Macaca|NP_001180847.1|H2B.W organism=Macaca mulatta phylum=Chordata class=Mammalia +----------------------------MLRTQVPPLLRSTTAIVWSCRVMAAASA---- +--------------M--------------------------------------------- +------------------------------------------------------------ +-------------------------AEPSSET-----TSEEQLI---------TQEPKEA +NSTMAQKQSKQRK-----------RGRRGPCRCHANCR----GDSFATYFRRVLKQV--H +QGLSLSREAVSVM-----------------DSLVHDILDRIATEAGRLARST----KRQT +ITAWETRIAVRLLLPGEMGKLAESEGTKAVLRTSLYAVQQ--------QRK--------- +---------------------------------------- +>Oryctolagus|XP_002720211.1|H2B.W organism=Oryctolagus cuniculus phylum=Chordata class=Mammalia +------------------------------------------------------------ +--------------M--------------------------------------------- +------------------------------------------------------------ +-------------------------AEPASHV-----ASEENL----------SLEPKTT +ASSTP--KEKQPR-----------RRRRRR-----QGH----NYSFASYFPKVLKHV--H +KGLSLSKEAKGVM-----------------DSIVRDVFERIAHEAASLVRYS----KHST +LTSRDVQSAVRLLLPGQLHKHADVEGTKALLKFI--------------THP--------- +---------------------------------------- +>Homo|NP_001002916.4|H2B.W.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +------------------------------------------------------------ +--------------M--------------------------------------------- +------------------------------------------------------------ +-------------------------AGPSSET-----TSEEQLI---------TQEPKEA +NSTTSQKQSKQRK-----------RGRHGPRRCHSNCR----GDSFATYFRRVLKQV--H +QGLSLSREAVSVM-----------------DSLVHDILDRIATEAGHLARST----KRQT +ITAWETRMAVRLLLPGQMGKLAESEGTKAVLRTSLYAIQQ--------QRK--------- +---------------------------------------- +>Homo|NP_001375393.1|H2B.W.2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +------------------------------------------------------------ +--------------M--------------------------------------------- +------------------------------------------------------------ +-------------------------AEASSET-----TSEEGQS---------IQEPKEA +NSTKAQ---KQKR-----------RGCRGSRRRHANRRGDSFGDSFTPYFPRVLKQV--H +QGLSLSQEAVSVM-----------------DSMIHDILDRIATEAGQLAHYT----KRVT +ITSRDIQMAVRLLLPGKMGKLAEAQGTNAALRTSLCAIWQ--------QRK--------- +---------------------------------------- +>Vitrella|CEM32013.1|H2B.Z organism=Vitrella brassicaformis CCMP3155 phylum= class= +-------------------MPG-------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +-----------------------------KGP-----AEKRQAA----------KKTAGK +TPAE---AGKKRR-----------RKRTE---------------SFALYIYKVLKQV--H +PETGISKKSMSIM-----------------NSFINDIFDRMATEATNLIRFN----KKKT +LSSREVQTSVRLMLPGELSKHAVSEGTKAVTKYTTAAGN--------------------- +---------------------------------------- +>Babesia|XP_001610608.1|H2B.Z organism=Babesia bovis T2Bo phylum=Apicomplexa class=Aconoidasida +-------------------MSG-------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +-----------------------------KVP-----SSKSQAA----------KKTAGK +SL-----GIRYRR-----------KKRIE---------------SFSLYIYKVLKQV--H +PETGVSKKSMSIM-----------------NSFINDIFDRMALEATRLIRYN----KKST +LSSREIQTAVRLLLPGELSKHAVSEGTKAVTKYTTSGA---------------------- +---------------------------------------- +>Cytauxzoon|HISTDB_H2B_Z_0|H2B.Z organism=Cytauxzoon felis phylum=Apicomplexa class=Aconoidasida +-------------------MSG-------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +-----------------------------KVP-----STKSQAA----------KKTAGK +TL-----GVRYRR-----------KKRIE---------------SFALYIYKVLKQV--H +PETGVSKKSMSIM-----------------NSFINDIFDRLALEATRLIRYN----KKST +LSSREIQTAVRLLLPGELSKHAVSEGTKAVTKYTTSGV---------------------- +---------------------------------------- +>Plasmodium|XP_678689.1|H2B.Z organism=Plasmodium berghei ANKA phylum=Apicomplexa class=Aconoidasida +-------------------MSG-------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +-----------------------------KGP-----AQKSQAA----------KKTAGK +TL-----GPRHKR-----------KRRTE---------------SFSLYIFKVLKQV--H +PETGVTKKSMNIM-----------------NSFINDIFDRLVTEATRLIRYN----KKRT +LSSREIQTAVRLLLPGELSKHAVSEGTKAVTKYTTSGA---------------------- +---------------------------------------- +>Plasmodium|XP_001349046.1|H2B.Z organism=Plasmodium falciparum 3D7 phylum=Apicomplexa class=Aconoidasida +-------------------MSG-------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +-----------------------------KGP-----AQKSQAA----------KKTAGK +TL-----GPRHKR-----------KRRTE---------------SFSLYIFKVLKQV--H +PETGVTKKSMNIM-----------------NSFINDIFDRLVTEATRLIRYN----KKRT +LSSREIQTAVRLLLPGELSKHAVSEGTKAVTKYTTSAA---------------------- +---------------------------------------- +>Cryptosporidium|XP_628349.1|H2B.Z organism=Cryptosporidium parvum Iowa II phylum=Apicomplexa class=Conoidasida +-------------------MSGKS------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------GKSIKGP-----AQKQQAA----------KKTAGK +SPAD---GGKRKR-----------RKRTE---------------SFALYIYKVLKQV--H +PETGISKKSMSIM-----------------NSFINDVFDRLSAEAVKLVQYN----KKRT +LSSREVQTSVRLMLPGELSKHAVSEGTKAVTKYTSASA---------------------- +---------------------------------------- +>Eimeria|XP_013228334.1|H2B.Z organism=Eimeria tenella phylum=Apicomplexa class=Conoidasida +-------------------MSG-------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +-----------------------------KGP-----AQKSQAA----------KKTAGK +SL-----GPRYRR-----------RKRTE---------------SFALYIYKVLKQV--H +PETGVSKKSMSIM-----------------NSFINDIFDRLADEAVRLIRYN----KKRT +LSSREIQTAVRLLLPGELSKHAVSEGTKAVSKYTTSGA---------------------- +---------------------------------------- +>Gregarina|XP_011128492.1|H2B.Z organism=Gregarina niphandrodes phylum=Apicomplexa class=Conoidasida +-------------------MSG-------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +-----------------------------KG-------GKQQLA----------KKTAAN +KLPSHHLDKNKKR-----------RRRGE---------------TFSIYIYKVLRQV--Q +PKLGMSRKSMAIM-----------------NSFINDIFERLATEAVKLIQYN----KKRT +LSSREMQTSVRLLLPGELSKHAATEGAKAVEKYENRPIA--------------------- +---------------------------------------- +>Toxoplasma|XP_002369740.1|H2B.Z organism=Toxoplasma gondii ME49 phylum=Apicomplexa class=Conoidasida +-------------------MSG-------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +-----------------------------KGP-----AQKSQAA----------KKTAGK +SL-----GPRYRR-----------RKRTE---------------SFALYIYKVLKQV--H +PETGVSKKSMSIM-----------------NSFINDIFDRLADEAVRLIRYN----KKRT +LSSREIQTAVRLLLPGELSKHAVSEGTKAVTKYTTSGA---------------------- +---------------------------------------- +>Strongylocentrotus|P16888.2|late_H2B_(Echinoidea) organism=Strongylocentrotus purpuratus phylum=Echinodermata class=Echinoidea +-------------------MPAKAQPAG-------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------KKGSKK +AKAPRPSGGKKRR-----------RRRKE---------------SYGIYIYKVLKQV--H +PDTGISSRAMSIM-----------------NSFVNDVFERIAAEASRLAHYN----KKST +ITSREVQTVVRLLLPGELAKHAVSEGTKAVTKYTTSK----------------------- +---------------------------------------- +>Strongylocentrotus|P16889.3|late_H2B_(Echinoidea) organism=Strongylocentrotus purpuratus phylum=Echinodermata class=Echinoidea +-------------------MPAKAQAAG-------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------KKGSKK +AKAPKPSGDKKRR-----------RKRKE---------------SYGIYIYKVLKQV--H +PDTGISSRAMSIM-----------------NSFVNDVFERIAAEASRLAHYN----KKST +ITSREVQTAVRLLLPGELAKHAVSEGTKAVTKYTTSK----------------------- +---------------------------------------- +>Parechinus|P02290.2|sperm_H2B_(Echinoidea) organism=Parechinus angulosus phylum=Echinodermata class=Echinoidea +-------------------MPSQKSPTK-------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------RSPTKRS-----PTKRSPQ---------KGGKGGK +GAKRGGKAGKRRRGVQVKRR----RRRRE---------------SYGIYIYKVLKQV--H +PDTGISSRAMSVM-----------------NSFVNDVFERIAAEAGRLTTYN----RRST +VSSREVQTAVRLLLPGELAKHAVSEGTKAVTKYTTSR----------------------- +---------------------------------------- +>Parechinus|P02291.2|sperm_H2B_(Echinoidea) organism=Parechinus angulosus phylum=Echinodermata class=Echinoidea +----------------MPRSPAKTSPRK-------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------GSPRKGS-----PSRKASP---------K--RGGK +GAKRAGKGGRRRR--VVKRR----RRRRE---------------SYGIYIYKVLKQV--H +PDTGISSRAMSVM-----------------NSFVNDVFERIAGEASRLTSAN----RRST +VSSREIQTAVRLLLPGELAKHAVSEGTKAVTKYTTSR----------------------- +---------------------------------------- +>Parechinus|P02292.2|sperm_H2B_(Echinoidea) organism=Parechinus angulosus phylum=Echinodermata class=Echinoidea +----------------MPRSPAKTSPRK-------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------GSPRKGSPRKGSPSRKASP---------K--RGGK +GAKRAGKGGRRRR--VVKRR----RRRRE---------------SYGIYIYKVLKQV--H +PDTGISSRAMSVM-----------------NSFVNDVFERIASEASRLTSAN----RRST +VSSREIQTAVRLLLPGELAKHAVSEGTKAVTKYTTSR----------------------- +---------------------------------------- +>Psammechinus|Q27749.3|sperm_H2B_(Echinoidea) organism=Psammechinus miliaris phylum=Echinodermata class=Echinoidea +-------------------MPSQKSPTK-------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------RSPTKRS-----PQ-----------------KGGK +GAKRGGKAGKRRRGVAVKRR----RRRRE---------------SYGIYIYKVLKQV--H +PDTGISSRAMSVM-----------------NSFVNDVFERIASEAGRLTTYN----RRNT +VSSREVQTAVRLLLPGELAKHAVSEGTKAVTKYTTSR----------------------- +---------------------------------------- +>Psammechinus|Q27750.3|sperm_H2B_(Echinoidea) organism=Psammechinus miliaris phylum=Echinodermata class=Echinoidea +----------------MPKSPSKSSPRK-------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------GSPRKGS-----PRKGSPK------------RGGK +GAKRAGKGGRRN---VVKRR----RRRRE---------------SYGIYIYKVLKQV--H +PDTGISSRGMSVM-----------------NSFVNDVFERIAGEASRLTSAN----RRST +ISSREIQTAVRLLLPGELAKHAVSEGTKAVTKYTTARR---------------------- +---------------------------------------- +>Strongylocentrotus|NP_999706.1|sperm_H2B_(Echinoidea) organism=Strongylocentrotus purpuratus phylum=Echinodermata class=Echinoidea +-------------------MPSQRSPTK-------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------RSPTKRS-----PQKGAG-------------KGGK +GSKRGGKA-RRRGGAAVRRR----RRRRE---------------SYGIYIYKVLKQV--H +PDTGISSRGMSIM-----------------NSFVNDVFERVAAEASRLTKYN----RRST +VSSREIQTAVRLLLPGELAKHAVSEGTKAVTKYTTSR----------------------- +---------------------------------------- +>Strongylocentrotus|NP_999721.1|sperm_H2B_(Echinoidea) organism=Strongylocentrotus purpuratus phylum=Echinodermata class=Echinoidea +----------------MPRSPSKTSPRK-------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------GSPRRGS-----PSRKASP---------K--RGGK +GAKRAGKGGRRRN--VVRRR----RRRRE---------------SYGIYIYKVLKQV--H +PDTGISSRGMSVM-----------------NSFVNDIFGRIAGEASRLTRAN----RRST +ISSREIQTAVRLLLPGELAKHAVSEGTKAVTKYTTSR----------------------- +---------------------------------------- diff --git a/CURATED_SET/draft_seeds/H2B_only.fasta b/CURATED_SET/draft_seeds/H2B_only.fasta new file mode 100644 index 0000000..e69de29 diff --git a/CURATED_SET/draft_seeds/H3.1-like_(Plants).fasta b/CURATED_SET/draft_seeds/H3.1-like_(Plants).fasta new file mode 100644 index 0000000..e69de29 diff --git a/CURATED_SET/draft_seeds/H3.3-like?.fasta b/CURATED_SET/draft_seeds/H3.3-like?.fasta new file mode 100644 index 0000000..ced4ca7 --- /dev/null +++ b/CURATED_SET/draft_seeds/H3.3-like?.fasta @@ -0,0 +1,24 @@ +>Macaca|HISTDB_H3_Y_0|H3.Y_(Primates?) organism=Macaca mulatta phylum=Chordata class=Mammalia +-ARTKQTARKATNWQAPRKPLATKAAAKRAPPRGGIKKPHRYKPGTQALREIRKYQKSTQ +LLLRKLPFQCLVREIAQVISLDLRFQSAAIGALQEASEAYLVNLFEDTNLCAIHARRVTI +MPRDMQLARRIRGEGAXEPTLLGNVAL +>Macaca|HISTDB_H3_Y_1|H3.Y_(Primates?) organism=Macaca mulatta phylum=Chordata class=Mammalia +-ARTKQTARKATNWQAPRKPLATKAPGKRLPPRGGIKKPHRYRPGTQALREIRKYQKSTQ +LLLRKLPFQRLVREIAQAISPDLRFQSAAIGALQEASEAYLVNLFEDTNLCAIHARRVTI +MPRDMQLARRIRGEGA----------- +>Pan|HISTDB_H3_Y_2|H3.Y_(Primates?) organism=Pan troglodytes phylum=Chordata class=Mammalia +-ARTKQTARKATAWQAPRKPLATKAAGKRAPPTGGIKKPHRYKPGTLALREIRKYQKSTQ +LLLRKLPFQRLVREIAQAISPDLRFQSAAIGALQEASEAYLVQLFEDTNLCAIHARRVTI +MPRDMQLARRLRREGP----------- +>Pan|HISTDB_H3_Y_3|H3.Y_(Primates?) organism=Pan troglodytes phylum=Chordata class=Mammalia +-ARTKQTARKATAWQAPRKPLATKAARKRASPTGGIKKPHRYKPGTLALREIRKYQKSTQ +LLLRKLPFQRLVREIAQAISLDLRFQSAAIGALQEASEAYLVQLFEDTNLCAIHARRVTI +MPQDMQLARRLRGEGAREPTLLGNLAL +>Homo|NP_001342187.1|H3.Y.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKATAWQAPRKPLATKAAGKRAPPTGGIKKPHRYKPGTLALREIRKYQKSTQ +LLLRKLPFQRLVREIAQAISPDLRFQSAAIGALQEASEAYLVQLFEDTNLCAIHARRVTI +MPRDMQLARRLRREGP----------- +>Homo|NP_001358848.1|H3.Y.2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKATAWQAPRKPLATKAARKRASPTGGIKKPHRYKPGTLALREIRKYQKSTQ +LLLRKLPFQRLVREIAQAISPDLRFQSAAIGALQEASEAYLVQLFEDTNLCAIHARRVTI +MPRDMQLARRLRGEGAGEPTLLGNLAL diff --git a/CURATED_SET/draft_seeds/H3.3-like?_only.fasta b/CURATED_SET/draft_seeds/H3.3-like?_only.fasta new file mode 100644 index 0000000..e69de29 diff --git a/CURATED_SET/draft_seeds/H3.3-like_(Animals).fasta b/CURATED_SET/draft_seeds/H3.3-like_(Animals).fasta new file mode 100644 index 0000000..ced4ca7 --- /dev/null +++ b/CURATED_SET/draft_seeds/H3.3-like_(Animals).fasta @@ -0,0 +1,24 @@ +>Macaca|HISTDB_H3_Y_0|H3.Y_(Primates?) organism=Macaca mulatta phylum=Chordata class=Mammalia +-ARTKQTARKATNWQAPRKPLATKAAAKRAPPRGGIKKPHRYKPGTQALREIRKYQKSTQ +LLLRKLPFQCLVREIAQVISLDLRFQSAAIGALQEASEAYLVNLFEDTNLCAIHARRVTI +MPRDMQLARRIRGEGAXEPTLLGNVAL +>Macaca|HISTDB_H3_Y_1|H3.Y_(Primates?) organism=Macaca mulatta phylum=Chordata class=Mammalia +-ARTKQTARKATNWQAPRKPLATKAPGKRLPPRGGIKKPHRYRPGTQALREIRKYQKSTQ +LLLRKLPFQRLVREIAQAISPDLRFQSAAIGALQEASEAYLVNLFEDTNLCAIHARRVTI +MPRDMQLARRIRGEGA----------- +>Pan|HISTDB_H3_Y_2|H3.Y_(Primates?) organism=Pan troglodytes phylum=Chordata class=Mammalia +-ARTKQTARKATAWQAPRKPLATKAAGKRAPPTGGIKKPHRYKPGTLALREIRKYQKSTQ +LLLRKLPFQRLVREIAQAISPDLRFQSAAIGALQEASEAYLVQLFEDTNLCAIHARRVTI +MPRDMQLARRLRREGP----------- +>Pan|HISTDB_H3_Y_3|H3.Y_(Primates?) organism=Pan troglodytes phylum=Chordata class=Mammalia +-ARTKQTARKATAWQAPRKPLATKAARKRASPTGGIKKPHRYKPGTLALREIRKYQKSTQ +LLLRKLPFQRLVREIAQAISLDLRFQSAAIGALQEASEAYLVQLFEDTNLCAIHARRVTI +MPQDMQLARRLRGEGAREPTLLGNLAL +>Homo|NP_001342187.1|H3.Y.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKATAWQAPRKPLATKAAGKRAPPTGGIKKPHRYKPGTLALREIRKYQKSTQ +LLLRKLPFQRLVREIAQAISPDLRFQSAAIGALQEASEAYLVQLFEDTNLCAIHARRVTI +MPRDMQLARRLRREGP----------- +>Homo|NP_001358848.1|H3.Y.2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKATAWQAPRKPLATKAARKRASPTGGIKKPHRYKPGTLALREIRKYQKSTQ +LLLRKLPFQRLVREIAQAISPDLRFQSAAIGALQEASEAYLVQLFEDTNLCAIHARRVTI +MPRDMQLARRLRGEGAGEPTLLGNLAL diff --git a/CURATED_SET/draft_seeds/H3.3-like_(Animals)_only.fasta b/CURATED_SET/draft_seeds/H3.3-like_(Animals)_only.fasta new file mode 100644 index 0000000..e69de29 diff --git a/CURATED_SET/draft_seeds/H3.3-like_(Plants).fasta b/CURATED_SET/draft_seeds/H3.3-like_(Plants).fasta new file mode 100644 index 0000000..e69de29 diff --git a/CURATED_SET/draft_seeds/H3.3-like_(Plants)_only.fasta b/CURATED_SET/draft_seeds/H3.3-like_(Plants)_only.fasta new file mode 100644 index 0000000..e69de29 diff --git a/CURATED_SET/draft_seeds/H3.3.fasta b/CURATED_SET/draft_seeds/H3.3.fasta new file mode 100644 index 0000000..ecf25da --- /dev/null +++ b/CURATED_SET/draft_seeds/H3.3.fasta @@ -0,0 +1,80 @@ +>Plasmodium|XP_966164.1|H3.3 organism=Plasmodium falciparum 3D7 phylum=Apicomplexa class=Aconoidasida +MARTKQTARKSTGGKAPRKQLASKAARKSAP--VSTGIKKPHRYRPGTVALREIRKFQKS +TDLLIRKLPFQRLVREIAQEYKTDLRFQSQAVLALQEAAEAYLV-GLFEDTNLCAIHAKR +VTIMPKDIQLARRIRGERS +>Plasmodium|XP_725474.1|H3.3 organism=Plasmodium yoelii phylum=Apicomplexa class=Aconoidasida +MARTKQTARKSTGGKAPRKQLASKAARKSAP--VSTGIKKPHRYRPGTVALREIRKFQKS +TDLLIRKLPFQRLVREIAQEYKTDLRFQSQAVLALQEAAEAYLV-GLFEDTNLCAIHAKR +VTIMPKDIQLARRIRGERS +>Toxoplasma|AAM95790.1|H3.3 organism=Toxoplasma gondii phylum=Apicomplexa class=Conoidasida +MARTKQTARKSTGGKAPRKQLASKAARKSAP--MSGGIKKPHRYRPGTVALREIRKFQKS +TDLLIRKLPFQRLVREIAQDFKTDLRFQSQAILALQEAAEAYLV-GLFEDTNLCAIHAKR +VTIMPKDIQLARRIRGERS +>Drosophila|NP_511095.1|H3.3 organism=Drosophila melanogaster phylum=Arthropoda class=Insecta +MARTKQTARKSTGGKAPRKQLATKAARKSAP--STGGVKKPHRYRPGTVALREIRRYQKS +TELLIRKLPFQRLVREIAQDFKTDLRFQSAAIGALQEASEAYLV-GLFEDTNLCAIHAKR +VTIMPKDIQLARRIRGERA +>Candida|XP_713710.1|H3.3 organism=Candida albicans SC5314 phylum=Ascomycota class=Saccharomycetes +MARTKQTARKSTGGKAPRKQLASKAARKSAP--STGGVKKPHRYKPGTVALREIRRFQKS +TELLIRKLPFQRLVREIAQDFKTDLRFQSSAIGALQEAVEAYLV-GLFEDTNLCAIHAKR +VTIQKKDMQLARRLRGERS +>Debaryomyces|XP_456791.1|H3.3 organism=Debaryomyces hansenii CBS767 phylum=Ascomycota class=Saccharomycetes +MARTKQTARKSTGGKAPRKQLASKAARKSAP--STGGVKKPHRYKPGTVALREIRRFQKS +TELLIRKLPFQRLVREIAQDFKTDLRFQSSAIGALQESVEAYLV-SLFEDTNLCAIHAKR +VTIQKKDIQLARRLRGERS +>Saccharomyces|NP_009564.1|H3.3 organism=Saccharomyces cerevisiae S288C phylum=Ascomycota class=Saccharomycetes +MARTKQTARKSTGGKAPRKQLASKAARKSAP--STGGVKKPHRYKPGTVALREIRRFQKS +TELLIRKLPFQRLVREIAQDFKTDLRFQSSAIGALQESVEAYLV-SLFEDTNLAAIHAKR +VTIQKKDIKLARRLRGERS +>Schizosaccharomyces|P10651.2|H3.3 organism=Schizosaccharomyces pombe 972h- phylum=Ascomycota class=Schizosaccharomycetes +MARTKQTARKSTGGKAPRKQLASKAARKAAP--ATGGVKKPHRYRPPTVALREIRRYQKS +TELLIRKLPFQRLVREIAQDFKTDLRFQSSAIGALQEAVEAYLV-SLFEDTNLCAIHGKR +VTIQPKDMQLARRLRGERS +>Neurospora|XP_956003.1|H3.3 organism=Neurospora crassa OR74A phylum=Ascomycota class=Sordariomycetes +MARTKQTARKSTGGKAPRKQLASKAARKSAP--STGGVKKPHRYKPGTVALREIRRYQKS +TELLIRKLPFQRLVREIAQDFKSDLRFQSSAIGALQESVESYLV-SLFEDTNLCAIHAKR +VTIQSKDIQLARRLRGERN +>Trichoderma|XP_006969783.1|H3.3 organism=Trichoderma reesei QM6a phylum=Ascomycota class=Sordariomycetes +MARTKQTARKSTGGKAPRKQLASKAARKSAP--STGGVKKPHRYKPGTVALREIRRYQKS +TELLIRKLPFQRLVREIAQDFKSDLRFQSSAIGALQESVESYLV-SLFEDTNLCAIHAKR +VTIQSKDIQLARRLRGERN +>Thalassiosira|XP_002293701.1|H3.3 organism=Thalassiosira pseudonana CCMP1335 phylum=Bacillariophyta class=Coscinodiscophyceae +MARTKQTARKSTGGKAPRKQLATKAARKSAP--TAGGVKKPHRYRPGTVALREIRKYQKS +TDLLIRKAPFQRLVREIAQDFKTDLRFQSTAVLALQEASEAYLV-GLFEDTNLCAIHAKR +VTIMPKDIQLARRIRGERA +>Cryptococcus|XP_567545.1|H3.3 organism=Cryptococcus neoformans var. neoformans JEC21 phylum=Basidiomycota class=Tremellomycetes +MARTKQTARKSTGGKAPRKQLATKAARKQAPSQVSGGVKKPHRYRPGTVALREIRRYQKS +TELLIRKLPFQRLVREIAQDFKTDLRFQSSAIGALQEASEAYLV-SLFEDTNLAAIHAKR +VTIQPKDLQLARRLRGERS +>Mus|NP_032236.1|H3.3 organism=Mus musculus phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKSAP--STGGVKKPHRYRPGTVALREIRRYQKS +TELLIRKLPFQRLVREIAQDFKTDLRFQSAAIGALQEASEAYLV-GLFEDTNLCAIHAKR +VTIMPKDIQLARRIRGERA +>Rattus|NP_446437.1|H3.3 organism=Rattus norvegicus phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKSAP--STGGVKKPHRYRPGTVALREIRRYQKS +TELLIRKLPFQRLVREIAQDFKTDLRFQSAAIGALQEASEAYLV-GLFEDTNLCAIHAKR +VTIMPKDIQLARRIRGERA +>Tetrahymena|XP_001008397.1|H3.3 organism=Tetrahymena thermophila SB210 phylum=Ciliophora class=Oligohymenophorea +MARTKQTARKSTGVKAPRKQLATKAARKSAP--VSGGVKKPHKFRPGTVALREIRKYQKT +TDLLIRKLPFQRLVRDIAMEMKSDIRFQSQAILALQEAAEAYLV-GLFEDTNLCAIHARR +VTIMTKDLHLARRIRGERF +>Phytophthora|XP_002895320.1|H3.3 organism=Phytophthora infestans T30-4 phylum=Oomycota class= +MARTKQTARKSTGGKAPRKQLATKAARKSAP--TAGGVKKPHRYRPGTVALREIRKYQKS +TDLLIRKLPFQRLVREIAQDYKTDLRFQSTAILALQEASEAYLV-GLFEDTNLCAIHAKR +VTIMPKDIQLARRIRGERT +>Griffithsia|AAP80725.1|H3.3 organism=Griffithsia japonica phylum=Rhodophyta class=Florideophyceae +MARTKQTARKSTGGKAPRKQLATKAARKSAP--ITGGVKKPHRYRPGTVALREIRKFQKS +TDLLIRKLPFQRLVREIAQDFKSDLRFQSSAVLALQEAAEAYLRGGLFEDTNLCAIHAKR +VTIMPKDIQLARRIRGERA +>Arabidopsis|NP_195713.1|H3.3 organism=Arabidopsis thaliana phylum=Streptophyta class=Magnoliopsida +MARTKQTARKSTGGKAPRKQLATKAARKSAP--TTGGVKKPHRYRPGTVALREIRKYQKS +TELLIRKLPFQRLVREIAQDFKTDLRFQSHAVLALQEAAEAYLV-GLFEDTNLCAIHAKR +VTIMPKDIQLARRIRGERA +>Homo|NP_002098.1|H3.3_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKSAP--STGGVKKPHRYRPGTVALREIRRYQKS +TELLIRKLPFQRLVREIAQDFKTDLRFQSAAIGALQEASEAYLV-GLFEDTNLCAIHAKR +VTIMPKDIQLARRIRGERA +>Homo|NP_005315.1|H3.3_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKSAP--STGGVKKPHRYRPGTVALREIRRYQKS +TELLIRKLPFQRLVREIAQDFKTDLRFQSAAIGALQEASEAYLV-GLFEDTNLCAIHAKR +VTIMPKDIQLARRIRGERA diff --git a/CURATED_SET/draft_seeds/H3.3_(Animals).fasta b/CURATED_SET/draft_seeds/H3.3_(Animals).fasta new file mode 100644 index 0000000..a580d5a --- /dev/null +++ b/CURATED_SET/draft_seeds/H3.3_(Animals).fasta @@ -0,0 +1,8 @@ +>Homo|NP_002098.1|H3.3_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKSAPSTGGVKKPHRYRPGTVALREIRRYQKSTE +LLIRKLPFQRLVREIAQDFKTDLRFQSAAIGALQEASEAYLVGLFEDTNLCAIHAKRVTI +MPKDIQLARRIRGERA +>Homo|NP_005315.1|H3.3_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKSAPSTGGVKKPHRYRPGTVALREIRRYQKSTE +LLIRKLPFQRLVREIAQDFKTDLRFQSAAIGALQEASEAYLVGLFEDTNLCAIHAKRVTI +MPKDIQLARRIRGERA diff --git a/CURATED_SET/draft_seeds/H3.3_(Animals)_only.fasta b/CURATED_SET/draft_seeds/H3.3_(Animals)_only.fasta new file mode 100644 index 0000000..e69de29 diff --git a/CURATED_SET/draft_seeds/H3.3_(Ascomycota).fasta b/CURATED_SET/draft_seeds/H3.3_(Ascomycota).fasta new file mode 100644 index 0000000..e69de29 diff --git a/CURATED_SET/draft_seeds/H3.3_(Fungi).fasta b/CURATED_SET/draft_seeds/H3.3_(Fungi).fasta new file mode 100644 index 0000000..e69de29 diff --git a/CURATED_SET/draft_seeds/H3.3_(Fungi)_only.fasta b/CURATED_SET/draft_seeds/H3.3_(Fungi)_only.fasta new file mode 100644 index 0000000..e69de29 diff --git a/CURATED_SET/draft_seeds/H3.3_(Homo_sapiens).fasta b/CURATED_SET/draft_seeds/H3.3_(Homo_sapiens).fasta new file mode 100644 index 0000000..a580d5a --- /dev/null +++ b/CURATED_SET/draft_seeds/H3.3_(Homo_sapiens).fasta @@ -0,0 +1,8 @@ +>Homo|NP_002098.1|H3.3_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKSAPSTGGVKKPHRYRPGTVALREIRRYQKSTE +LLIRKLPFQRLVREIAQDFKTDLRFQSAAIGALQEASEAYLVGLFEDTNLCAIHAKRVTI +MPKDIQLARRIRGERA +>Homo|NP_005315.1|H3.3_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKSAPSTGGVKKPHRYRPGTVALREIRRYQKSTE +LLIRKLPFQRLVREIAQDFKTDLRFQSAAIGALQEASEAYLVGLFEDTNLCAIHAKRVTI +MPKDIQLARRIRGERA diff --git a/CURATED_SET/draft_seeds/H3.3_(Plants).fasta b/CURATED_SET/draft_seeds/H3.3_(Plants).fasta new file mode 100644 index 0000000..e69de29 diff --git a/CURATED_SET/draft_seeds/H3.3_(Protists).fasta b/CURATED_SET/draft_seeds/H3.3_(Protists).fasta new file mode 100644 index 0000000..e69de29 diff --git a/CURATED_SET/draft_seeds/H3.3_only.fasta b/CURATED_SET/draft_seeds/H3.3_only.fasta new file mode 100644 index 0000000..b567239 --- /dev/null +++ b/CURATED_SET/draft_seeds/H3.3_only.fasta @@ -0,0 +1,72 @@ +>Plasmodium|XP_966164.1|H3.3 organism=Plasmodium falciparum 3D7 phylum=Apicomplexa class=Aconoidasida +MARTKQTARKSTGGKAPRKQLASKAARKSAP--VSTGIKKPHRYRPGTVALREIRKFQKS +TDLLIRKLPFQRLVREIAQEYKTDLRFQSQAVLALQEAAEAYLV-GLFEDTNLCAIHAKR +VTIMPKDIQLARRIRGERS +>Plasmodium|XP_725474.1|H3.3 organism=Plasmodium yoelii phylum=Apicomplexa class=Aconoidasida +MARTKQTARKSTGGKAPRKQLASKAARKSAP--VSTGIKKPHRYRPGTVALREIRKFQKS +TDLLIRKLPFQRLVREIAQEYKTDLRFQSQAVLALQEAAEAYLV-GLFEDTNLCAIHAKR +VTIMPKDIQLARRIRGERS +>Toxoplasma|AAM95790.1|H3.3 organism=Toxoplasma gondii phylum=Apicomplexa class=Conoidasida +MARTKQTARKSTGGKAPRKQLASKAARKSAP--MSGGIKKPHRYRPGTVALREIRKFQKS +TDLLIRKLPFQRLVREIAQDFKTDLRFQSQAILALQEAAEAYLV-GLFEDTNLCAIHAKR +VTIMPKDIQLARRIRGERS +>Drosophila|NP_511095.1|H3.3 organism=Drosophila melanogaster phylum=Arthropoda class=Insecta +MARTKQTARKSTGGKAPRKQLATKAARKSAP--STGGVKKPHRYRPGTVALREIRRYQKS +TELLIRKLPFQRLVREIAQDFKTDLRFQSAAIGALQEASEAYLV-GLFEDTNLCAIHAKR +VTIMPKDIQLARRIRGERA +>Candida|XP_713710.1|H3.3 organism=Candida albicans SC5314 phylum=Ascomycota class=Saccharomycetes +MARTKQTARKSTGGKAPRKQLASKAARKSAP--STGGVKKPHRYKPGTVALREIRRFQKS +TELLIRKLPFQRLVREIAQDFKTDLRFQSSAIGALQEAVEAYLV-GLFEDTNLCAIHAKR +VTIQKKDMQLARRLRGERS +>Debaryomyces|XP_456791.1|H3.3 organism=Debaryomyces hansenii CBS767 phylum=Ascomycota class=Saccharomycetes +MARTKQTARKSTGGKAPRKQLASKAARKSAP--STGGVKKPHRYKPGTVALREIRRFQKS +TELLIRKLPFQRLVREIAQDFKTDLRFQSSAIGALQESVEAYLV-SLFEDTNLCAIHAKR +VTIQKKDIQLARRLRGERS +>Saccharomyces|NP_009564.1|H3.3 organism=Saccharomyces cerevisiae S288C phylum=Ascomycota class=Saccharomycetes +MARTKQTARKSTGGKAPRKQLASKAARKSAP--STGGVKKPHRYKPGTVALREIRRFQKS +TELLIRKLPFQRLVREIAQDFKTDLRFQSSAIGALQESVEAYLV-SLFEDTNLAAIHAKR +VTIQKKDIKLARRLRGERS +>Schizosaccharomyces|P10651.2|H3.3 organism=Schizosaccharomyces pombe 972h- phylum=Ascomycota class=Schizosaccharomycetes +MARTKQTARKSTGGKAPRKQLASKAARKAAP--ATGGVKKPHRYRPPTVALREIRRYQKS +TELLIRKLPFQRLVREIAQDFKTDLRFQSSAIGALQEAVEAYLV-SLFEDTNLCAIHGKR +VTIQPKDMQLARRLRGERS +>Neurospora|XP_956003.1|H3.3 organism=Neurospora crassa OR74A phylum=Ascomycota class=Sordariomycetes +MARTKQTARKSTGGKAPRKQLASKAARKSAP--STGGVKKPHRYKPGTVALREIRRYQKS +TELLIRKLPFQRLVREIAQDFKSDLRFQSSAIGALQESVESYLV-SLFEDTNLCAIHAKR +VTIQSKDIQLARRLRGERN +>Trichoderma|XP_006969783.1|H3.3 organism=Trichoderma reesei QM6a phylum=Ascomycota class=Sordariomycetes +MARTKQTARKSTGGKAPRKQLASKAARKSAP--STGGVKKPHRYKPGTVALREIRRYQKS +TELLIRKLPFQRLVREIAQDFKSDLRFQSSAIGALQESVESYLV-SLFEDTNLCAIHAKR +VTIQSKDIQLARRLRGERN +>Thalassiosira|XP_002293701.1|H3.3 organism=Thalassiosira pseudonana CCMP1335 phylum=Bacillariophyta class=Coscinodiscophyceae +MARTKQTARKSTGGKAPRKQLATKAARKSAP--TAGGVKKPHRYRPGTVALREIRKYQKS +TDLLIRKAPFQRLVREIAQDFKTDLRFQSTAVLALQEASEAYLV-GLFEDTNLCAIHAKR +VTIMPKDIQLARRIRGERA +>Cryptococcus|XP_567545.1|H3.3 organism=Cryptococcus neoformans var. neoformans JEC21 phylum=Basidiomycota class=Tremellomycetes +MARTKQTARKSTGGKAPRKQLATKAARKQAPSQVSGGVKKPHRYRPGTVALREIRRYQKS +TELLIRKLPFQRLVREIAQDFKTDLRFQSSAIGALQEASEAYLV-SLFEDTNLAAIHAKR +VTIQPKDLQLARRLRGERS +>Mus|NP_032236.1|H3.3 organism=Mus musculus phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKSAP--STGGVKKPHRYRPGTVALREIRRYQKS +TELLIRKLPFQRLVREIAQDFKTDLRFQSAAIGALQEASEAYLV-GLFEDTNLCAIHAKR +VTIMPKDIQLARRIRGERA +>Rattus|NP_446437.1|H3.3 organism=Rattus norvegicus phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKSAP--STGGVKKPHRYRPGTVALREIRRYQKS +TELLIRKLPFQRLVREIAQDFKTDLRFQSAAIGALQEASEAYLV-GLFEDTNLCAIHAKR +VTIMPKDIQLARRIRGERA +>Tetrahymena|XP_001008397.1|H3.3 organism=Tetrahymena thermophila SB210 phylum=Ciliophora class=Oligohymenophorea +MARTKQTARKSTGVKAPRKQLATKAARKSAP--VSGGVKKPHKFRPGTVALREIRKYQKT +TDLLIRKLPFQRLVRDIAMEMKSDIRFQSQAILALQEAAEAYLV-GLFEDTNLCAIHARR +VTIMTKDLHLARRIRGERF +>Phytophthora|XP_002895320.1|H3.3 organism=Phytophthora infestans T30-4 phylum=Oomycota class= +MARTKQTARKSTGGKAPRKQLATKAARKSAP--TAGGVKKPHRYRPGTVALREIRKYQKS +TDLLIRKLPFQRLVREIAQDYKTDLRFQSTAILALQEASEAYLV-GLFEDTNLCAIHAKR +VTIMPKDIQLARRIRGERT +>Griffithsia|AAP80725.1|H3.3 organism=Griffithsia japonica phylum=Rhodophyta class=Florideophyceae +MARTKQTARKSTGGKAPRKQLATKAARKSAP--ITGGVKKPHRYRPGTVALREIRKFQKS +TDLLIRKLPFQRLVREIAQDFKSDLRFQSSAVLALQEAAEAYLRGGLFEDTNLCAIHAKR +VTIMPKDIQLARRIRGERA +>Arabidopsis|NP_195713.1|H3.3 organism=Arabidopsis thaliana phylum=Streptophyta class=Magnoliopsida +MARTKQTARKSTGGKAPRKQLATKAARKSAP--TTGGVKKPHRYRPGTVALREIRKYQKS +TELLIRKLPFQRLVREIAQDFKTDLRFQSHAVLALQEAAEAYLV-GLFEDTNLCAIHAKR +VTIMPKDIQLARRIRGERA diff --git a/CURATED_SET/draft_seeds/H3.4_(Homo_sapiens).fasta b/CURATED_SET/draft_seeds/H3.4_(Homo_sapiens).fasta new file mode 100644 index 0000000..31c68a9 --- /dev/null +++ b/CURATED_SET/draft_seeds/H3.4_(Homo_sapiens).fasta @@ -0,0 +1,4 @@ +>Homo|NP_003484.1|H3.4_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKVARKSAPATGGVKKPHRYRPGTVALREIRRYQKSTE +LLIRKLPFQRLMREIAQDFKTDLRFQSSAVMALQEACESYLVGLFEDTNLCVIHAKRVTI +MPKDIQLARRIRGERA diff --git a/CURATED_SET/draft_seeds/H3.4_(Mammalia).fasta b/CURATED_SET/draft_seeds/H3.4_(Mammalia).fasta new file mode 100644 index 0000000..18e9608 --- /dev/null +++ b/CURATED_SET/draft_seeds/H3.4_(Mammalia).fasta @@ -0,0 +1,8 @@ +>Pan|XP_003804825.1|H3.4_(Mammalia) organism=Pan paniscus phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLVTKVARKSAPATGGVKKPHRYRPGTVALREIRRYQKSTE +LLIRKLPFQRLMREIAQDFKTDLRFQSSAVMALQEACESYLVGLFEDTNLCVIHAKRVTI +MPKDIQLARRIRGERA +>Homo|NP_003484.1|H3.4_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKVARKSAPATGGVKKPHRYRPGTVALREIRRYQKSTE +LLIRKLPFQRLMREIAQDFKTDLRFQSSAVMALQEACESYLVGLFEDTNLCVIHAKRVTI +MPKDIQLARRIRGERA diff --git a/CURATED_SET/draft_seeds/H3.4_(Mammalia)_only.fasta b/CURATED_SET/draft_seeds/H3.4_(Mammalia)_only.fasta new file mode 100644 index 0000000..5525c12 --- /dev/null +++ b/CURATED_SET/draft_seeds/H3.4_(Mammalia)_only.fasta @@ -0,0 +1,4 @@ +>Pan|XP_003804825.1|H3.4_(Mammalia) organism=Pan paniscus phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLVTKVARKSAPATGGVKKPHRYRPGTVALREIRRYQKSTE +LLIRKLPFQRLMREIAQDFKTDLRFQSSAVMALQEACESYLVGLFEDTNLCVIHAKRVTI +MPKDIQLARRIRGERA diff --git a/CURATED_SET/draft_seeds/H3.5_(Hominidae).fasta b/CURATED_SET/draft_seeds/H3.5_(Hominidae).fasta new file mode 100644 index 0000000..ac8ec13 --- /dev/null +++ b/CURATED_SET/draft_seeds/H3.5_(Hominidae).fasta @@ -0,0 +1,8 @@ +>Pan|XP_003954426.1|H3.5_(Hominidae) organism=Pan troglodytes phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKSTPSTXGVKKPHRYRPGTVALREIRRYQKSTE +LLIRKLPFQRLVREIAQDFNTDLRFQSAAVGALQEASEAYLVGLLEDTNLCAIHAKRVTI +MPKDIQLARRIRGERA +>Homo|NP_001013721.2|H3.5_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKSTPSTCGV-KPHRYRPGTVALREIRRYQKSTE +LLIRKLPFQRLVREIAQDFNTDLRFQSAAVGALQEASEAYLVGLLEDTNLCAIHAKRVTI +MPKDIQLARRIRGERA diff --git a/CURATED_SET/draft_seeds/H3.5_(Hominidae)_only.fasta b/CURATED_SET/draft_seeds/H3.5_(Hominidae)_only.fasta new file mode 100644 index 0000000..b3f0e95 --- /dev/null +++ b/CURATED_SET/draft_seeds/H3.5_(Hominidae)_only.fasta @@ -0,0 +1,4 @@ +>Pan|XP_003954426.1|H3.5_(Hominidae) organism=Pan troglodytes phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKSTPSTXGVKKPHRYRPGTVALREIRRYQKSTE +LLIRKLPFQRLVREIAQDFNTDLRFQSAAVGALQEASEAYLVGLLEDTNLCAIHAKRVTI +MPKDIQLARRIRGERA diff --git a/CURATED_SET/draft_seeds/H3.5_(Homo_sapiens).fasta b/CURATED_SET/draft_seeds/H3.5_(Homo_sapiens).fasta new file mode 100644 index 0000000..f90351f --- /dev/null +++ b/CURATED_SET/draft_seeds/H3.5_(Homo_sapiens).fasta @@ -0,0 +1,4 @@ +>Homo|NP_001013721.2|H3.5_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKSTPSTCGVKPHRYRPGTVALREIRRYQKSTEL +LIRKLPFQRLVREIAQDFNTDLRFQSAAVGALQEASEAYLVGLLEDTNLCAIHAKRVTIM +PKDIQLARRIRGERA diff --git a/CURATED_SET/draft_seeds/H3.6_(Mammals?)?.fasta b/CURATED_SET/draft_seeds/H3.6_(Mammals?)?.fasta new file mode 100644 index 0000000..e69de29 diff --git a/CURATED_SET/draft_seeds/H3.7_(Mammals?)?.fasta b/CURATED_SET/draft_seeds/H3.7_(Mammals?)?.fasta new file mode 100644 index 0000000..e69de29 diff --git a/CURATED_SET/draft_seeds/H3.8_(Mammals?)?.fasta b/CURATED_SET/draft_seeds/H3.8_(Mammals?)?.fasta new file mode 100644 index 0000000..e69de29 diff --git a/CURATED_SET/draft_seeds/H3.B_(Giardia?).fasta b/CURATED_SET/draft_seeds/H3.B_(Giardia?).fasta new file mode 100644 index 0000000..e69de29 diff --git a/CURATED_SET/draft_seeds/H3.P_(Moneuplotes?).fasta b/CURATED_SET/draft_seeds/H3.P_(Moneuplotes?).fasta new file mode 100644 index 0000000..e69de29 diff --git a/CURATED_SET/draft_seeds/H3.V_(Trypanosomatidae).fasta b/CURATED_SET/draft_seeds/H3.V_(Trypanosomatidae).fasta new file mode 100644 index 0000000..e69de29 diff --git a/CURATED_SET/draft_seeds/H3.X_(Homo_sapiens).fasta b/CURATED_SET/draft_seeds/H3.X_(Homo_sapiens).fasta new file mode 100644 index 0000000..e69de29 diff --git a/CURATED_SET/draft_seeds/H3.X_(Primates?).fasta b/CURATED_SET/draft_seeds/H3.X_(Primates?).fasta new file mode 100644 index 0000000..e69de29 diff --git a/CURATED_SET/draft_seeds/H3.X_(Primates?)_only.fasta b/CURATED_SET/draft_seeds/H3.X_(Primates?)_only.fasta new file mode 100644 index 0000000..e69de29 diff --git a/CURATED_SET/draft_seeds/H3.Y.1_(Homo_sapiens).fasta b/CURATED_SET/draft_seeds/H3.Y.1_(Homo_sapiens).fasta new file mode 100644 index 0000000..633d6bf --- /dev/null +++ b/CURATED_SET/draft_seeds/H3.Y.1_(Homo_sapiens).fasta @@ -0,0 +1,4 @@ +>Homo|NP_001342187.1|H3.Y.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKATAWQAPRKPLATKAAGKRAPPTGGIKKPHRYKPGTLALREIRKYQKSTQ +LLLRKLPFQRLVREIAQAISPDLRFQSAAIGALQEASEAYLVQLFEDTNLCAIHARRVTI +MPRDMQLARRLRREGP diff --git a/CURATED_SET/draft_seeds/H3.Y.2_(Homo_sapiens).fasta b/CURATED_SET/draft_seeds/H3.Y.2_(Homo_sapiens).fasta new file mode 100644 index 0000000..6a582f9 --- /dev/null +++ b/CURATED_SET/draft_seeds/H3.Y.2_(Homo_sapiens).fasta @@ -0,0 +1,4 @@ +>Homo|NP_001358848.1|H3.Y.2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKATAWQAPRKPLATKAARKRASPTGGIKKPHRYKPGTLALREIRKYQKSTQ +LLLRKLPFQRLVREIAQAISPDLRFQSAAIGALQEASEAYLVQLFEDTNLCAIHARRVTI +MPRDMQLARRLRGEGAGEPTLLGNLAL diff --git a/CURATED_SET/draft_seeds/H3.Y_(Homo_sapiens).fasta b/CURATED_SET/draft_seeds/H3.Y_(Homo_sapiens).fasta new file mode 100644 index 0000000..9faf989 --- /dev/null +++ b/CURATED_SET/draft_seeds/H3.Y_(Homo_sapiens).fasta @@ -0,0 +1,8 @@ +>Homo|NP_001342187.1|H3.Y.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKATAWQAPRKPLATKAAGKRAPPTGGIKKPHRYKPGTLALREIRKYQKSTQ +LLLRKLPFQRLVREIAQAISPDLRFQSAAIGALQEASEAYLVQLFEDTNLCAIHARRVTI +MPRDMQLARRLRREGP----------- +>Homo|NP_001358848.1|H3.Y.2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKATAWQAPRKPLATKAARKRASPTGGIKKPHRYKPGTLALREIRKYQKSTQ +LLLRKLPFQRLVREIAQAISPDLRFQSAAIGALQEASEAYLVQLFEDTNLCAIHARRVTI +MPRDMQLARRLRGEGAGEPTLLGNLAL diff --git a/CURATED_SET/draft_seeds/H3.Y_(Homo_sapiens)_only.fasta b/CURATED_SET/draft_seeds/H3.Y_(Homo_sapiens)_only.fasta new file mode 100644 index 0000000..e69de29 diff --git a/CURATED_SET/draft_seeds/H3.Y_(Primates?).fasta b/CURATED_SET/draft_seeds/H3.Y_(Primates?).fasta new file mode 100644 index 0000000..ced4ca7 --- /dev/null +++ b/CURATED_SET/draft_seeds/H3.Y_(Primates?).fasta @@ -0,0 +1,24 @@ +>Macaca|HISTDB_H3_Y_0|H3.Y_(Primates?) organism=Macaca mulatta phylum=Chordata class=Mammalia +-ARTKQTARKATNWQAPRKPLATKAAAKRAPPRGGIKKPHRYKPGTQALREIRKYQKSTQ +LLLRKLPFQCLVREIAQVISLDLRFQSAAIGALQEASEAYLVNLFEDTNLCAIHARRVTI +MPRDMQLARRIRGEGAXEPTLLGNVAL +>Macaca|HISTDB_H3_Y_1|H3.Y_(Primates?) organism=Macaca mulatta phylum=Chordata class=Mammalia +-ARTKQTARKATNWQAPRKPLATKAPGKRLPPRGGIKKPHRYRPGTQALREIRKYQKSTQ +LLLRKLPFQRLVREIAQAISPDLRFQSAAIGALQEASEAYLVNLFEDTNLCAIHARRVTI +MPRDMQLARRIRGEGA----------- +>Pan|HISTDB_H3_Y_2|H3.Y_(Primates?) organism=Pan troglodytes phylum=Chordata class=Mammalia +-ARTKQTARKATAWQAPRKPLATKAAGKRAPPTGGIKKPHRYKPGTLALREIRKYQKSTQ +LLLRKLPFQRLVREIAQAISPDLRFQSAAIGALQEASEAYLVQLFEDTNLCAIHARRVTI +MPRDMQLARRLRREGP----------- +>Pan|HISTDB_H3_Y_3|H3.Y_(Primates?) organism=Pan troglodytes phylum=Chordata class=Mammalia +-ARTKQTARKATAWQAPRKPLATKAARKRASPTGGIKKPHRYKPGTLALREIRKYQKSTQ +LLLRKLPFQRLVREIAQAISLDLRFQSAAIGALQEASEAYLVQLFEDTNLCAIHARRVTI +MPQDMQLARRLRGEGAREPTLLGNLAL +>Homo|NP_001342187.1|H3.Y.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKATAWQAPRKPLATKAAGKRAPPTGGIKKPHRYKPGTLALREIRKYQKSTQ +LLLRKLPFQRLVREIAQAISPDLRFQSAAIGALQEASEAYLVQLFEDTNLCAIHARRVTI +MPRDMQLARRLRREGP----------- +>Homo|NP_001358848.1|H3.Y.2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKATAWQAPRKPLATKAARKRASPTGGIKKPHRYKPGTLALREIRKYQKSTQ +LLLRKLPFQRLVREIAQAISPDLRFQSAAIGALQEASEAYLVQLFEDTNLCAIHARRVTI +MPRDMQLARRLRGEGAGEPTLLGNLAL diff --git a/CURATED_SET/draft_seeds/H3.Y_(Primates?)_only.fasta b/CURATED_SET/draft_seeds/H3.Y_(Primates?)_only.fasta new file mode 100644 index 0000000..34d5677 --- /dev/null +++ b/CURATED_SET/draft_seeds/H3.Y_(Primates?)_only.fasta @@ -0,0 +1,16 @@ +>Macaca|HISTDB_H3_Y_0|H3.Y_(Primates?) organism=Macaca mulatta phylum=Chordata class=Mammalia +ARTKQTARKATNWQAPRKPLATKAAAKRAPPRGGIKKPHRYKPGTQALREIRKYQKSTQL +LLRKLPFQCLVREIAQVISLDLRFQSAAIGALQEASEAYLVNLFEDTNLCAIHARRVTIM +PRDMQLARRIRGEGAXEPTLLGNVAL +>Macaca|HISTDB_H3_Y_1|H3.Y_(Primates?) organism=Macaca mulatta phylum=Chordata class=Mammalia +ARTKQTARKATNWQAPRKPLATKAPGKRLPPRGGIKKPHRYRPGTQALREIRKYQKSTQL +LLRKLPFQRLVREIAQAISPDLRFQSAAIGALQEASEAYLVNLFEDTNLCAIHARRVTIM +PRDMQLARRIRGEGA----------- +>Pan|HISTDB_H3_Y_2|H3.Y_(Primates?) organism=Pan troglodytes phylum=Chordata class=Mammalia +ARTKQTARKATAWQAPRKPLATKAAGKRAPPTGGIKKPHRYKPGTLALREIRKYQKSTQL +LLRKLPFQRLVREIAQAISPDLRFQSAAIGALQEASEAYLVQLFEDTNLCAIHARRVTIM +PRDMQLARRLRREGP----------- +>Pan|HISTDB_H3_Y_3|H3.Y_(Primates?) organism=Pan troglodytes phylum=Chordata class=Mammalia +ARTKQTARKATAWQAPRKPLATKAARKRASPTGGIKKPHRYKPGTLALREIRKYQKSTQL +LLRKLPFQRLVREIAQAISLDLRFQSAAIGALQEASEAYLVQLFEDTNLCAIHARRVTIM +PQDMQLARRLRGEGAREPTLLGNLAL diff --git a/CURATED_SET/draft_seeds/H3.fasta b/CURATED_SET/draft_seeds/H3.fasta old mode 100755 new mode 100644 index e69de29..def141b --- a/CURATED_SET/draft_seeds/H3.fasta +++ b/CURATED_SET/draft_seeds/H3.fasta @@ -0,0 +1,810 @@ +>Plasmodium|XP_001350068.1|cenH3_(Eukarya) organism=Plasmodium falciparum 3D7 phylum=Apicomplexa class=Aconoidasida +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +----------------MVRTKKNIPNHNPLNAF-----------------NRDKSFKTNK +TLP-------NRTVHHGISSKTTNINRPSVNRGGINEVAQKNL--HRTNIRK-------- +-----------PHR-YRPGVLALKEIRAYQASTQLLIPKIPFVRVVKEITRLFELP---D +-E-QFRYTPEALLALQTASEAYLV-SLFEDAYLCSLHANRVTLMPKDIHLARRIRGRD-- +------------- +>Thalassiosira|XP_002287626.1|cenH3_(Eukarya) organism=Thalassiosira pseudonana CCMP1335 phylum=Bacillariophyta class=Coscinodiscophyceae +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +---------------MRPGEKALREIRQYQSSTSLLLRRLPFARLVREIQ--YGMT---R +-Q-PYRWQGSAILALQEAAEAHLV-GLFEDSNLCAIHGKRVTIMPKDMQLARRIRGWVRE +------------- +>Tetrahymena|XP_001011273.1|cenH3_(Eukarya) organism=Tetrahymena thermophila SB210 phylum=Ciliophora class=Oligohymenophorea +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-----------------MARKAYQPKRRSNSNQ-----------------NQQRSDSLKK +NKQ-----DNLRSKSAGNQQGNEKNKKDIQDQRNKASTKKKRE--SSGEKYE-------- +-----SARDKVIRR-FRPGDNALKQLRQYNQTPSLLIRKLPFQRLIREIS--TRMT---E +-EDSLRWTSFALVLLQTVVEDYMV-SFFEDANACALHAKRVTLMSKDLALAARIRGQKNV +TGIFIPTKK---- +>Phytophthora|XP_009526809.1|cenH3_(Eukarya) organism=Phytophthora sojae phylum=Oomycota class= +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------------------------------------MASPRPPALP +ASDASSSSVTDSGTDDSTPPNSPQPQRSPSPPF-----------------AATRGPAAAR +APP--AAPSSVSSASTPSPPPNLPARRPSVAPVGRQAAIHATP--TAPRRKRKATPPRRR +PSSPASATQTVKKR-RKPGEAALREIRLLQRSTKLLLRKLPFARVVREIQ--TEFT---G +-V-GYRWQAEALLALQEAAETYLV-RTFEDAYVGG------------------------- +------------- +>Perkinsus|XP_002767160.1|cenH3_(Eukarya) organism=Perkinsus marinus ATCC 50983 phylum=Perkinsozoa class= +MVGVENLGVGFDELLTRGGCGVRDDAVEIAFRGVEGLEDVLKDYMVRNKDGKILSVARPV +DAEHSEELLGLAAAIGRSYGSLICAAAHNGGVRLPVGKGDDDGDSNNSSDEEADSGCGGA +AEGDEAGDVGAGAGDVGDGAGDGAAEGDGAGDAGNGAGDVGDVGDGAGDGAAEGDGAGDG +AADDAHGAGDDGEGSRNGGPPLVVQMMVLVMMNGNGNGADDGGNGVDDGEGDGDGHQGNV +EGDGHGDGQDDGDGEGSVDSSGNGGDSEPSLEV-----------------SREGSENRPK +LLPPVEGRTSSSAAAIAAPPVPSAGSHIITGSGGKVPTAGKRP--RQFVKKS-------- +-----SAK---KGR-YRPGTVALREIRRHQEITDPLIEKRCFQALARSLS--REVE---A +---SMRWQPQSLVALQEASESFIV-GMLEASQLLAVHGRRITLMEKDVKMWTRLAAMFGS +TTFMDQEKQVGGT +>Drosophila|NP_523730.2|cenH3_(Animals) organism=Drosophila melanogaster phylum=Arthropoda class=Insecta +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +----MPRHSRAKRAPRPSANNSKSPNDD-----DTAFRSPEPEDGTDYGLEFTTSQLTLQ +DNNRRSSTL-----------RRDAGRRQPAARD---------------------SSTSGE +EEDQENRYPTTRSPQTRRMTVQQESKTRAAGPVAAQNQTRRRK--AANPMSR-------- +-----------AKR-MD------REIRRLQHHPGTLIPKLPFSRLVREFI--VKYS---D +-DEPLRVTEGALLAMQESCEMYLT-QRLADSYMLTKHRNRVTLEVRDMALMAYICDRGRQ +F------------ +>Caenorhabditis|NP_499128.1|cenH3_(Animals) organism=Caenorhabditis elegans phylum=Nematoda class=Chromadorea +------------------------------------------------------------ +------------------------------------------------------------ +-----------------------------------------MADDTPIIEEIAEQNESVT +RIMQRLKHDMQRVTSVPGFNTSAAGVNDLIDILNQYKKELEDDAANDY-TEAHIHKIRLV +TGKRNQYVLKLKQAEDEYHARKEQARRRASSMDFTVGRNSTNLVDYSHGRHHMPSYRRHD +SSDEENYSMDGTNGDGNRAGPSNPDRGNRTGP-SSSDRVRMRA--GRNRVTK-------- +-----------TRR-YRPGQKALEEIRKYQKTEDLLIQKAPFARLVREIM--QTST---P +FGADCRIRSDAISALQEAAEAFLV-EMFEGSSLISTHAKRVTLMTTDIQLYRRLCL--RH +L------------ +>Mus|NP_031707.1|cenH3_(Mammalia) organism=Mus musculus phylum=Chordata class=Mammalia +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +----MGPR---R------------------------------------------------ +--------------------KPQTPRRRPSS----------------------------- +------------------PAPGPSRQSSSVGS-QTLRR-RQKF--M-------------- +---------------------WLKEIKTLQKSTDLLFRKKPFSMVVREIC--EKFS---R +-GVDFWWQAQALLALQEAAEAFLI-HLFEDAYLLSLHAGRVTLFPKDIQLTRRIRGFEGG +LP----------- +>Homo|NP_001035891.1|cenH3_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +----MGPRRRSR------------------------------------------------ +--------------------KPEAPRRRSPSPT--------------------------- +------------------PTPGPSRRGPSLGA-SSHQHSRRRQ--G-------------- +---------------------WLKEIRKLQKSTHLLIRKLPFSRL--------------- +-----------------AAEAFLV-HLFEDAYLLTLHAGRVTLFPKDVQLARRIRGLEEG +LG----------- +>Homo|NP_001800.1|cenH3_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +----MGPRRRSR------------------------------------------------ +--------------------KPEAPRRRSPSPT--------------------------- +------------------PTPGPSRRGPSLGA-SSHQHSRRRQ--G-------------- +---------------------WLKEIRKLQKSTHLLIRKLPFSRLAREIC--VKFT---R +-GVDFNWQAQALLALQEAAEAFLV-HLFEDAYLLTLHAGRVTLFPKDVQLARRIRGLEEG +LG----------- +>Saccharomyces|NP_012875.2|cenH3_(Fungi) organism=Saccharomyces cerevisiae S288C phylum=Ascomycota class=Saccharomycetes +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------------------MSSKQQWVSSAIQSDSSGRSLSNVNRLA +GDQQSINDRALSLLQRTRATKNLFPRREERRRY----------------ESSKSDLDIET +DYEDQAGNLEIETENEEEAEMETEVPAPVRTHSYALDRYVRQK--RREKQRK-------- +-----QSLKRVEKK-YTPSELALYEIRKYQRSTDLLISKIPFARLVKEVT--DEFT---T +KDQDLRWQSMAIMALQEASEAYLV-GLLEHTNLLALHAKRITIMKKDMQLARRIRGQFI- +------------- +>Schizosaccharomyces|NP_596473.1|cenH3_(Fungi) organism=Schizosaccharomyces pombe phylum=Ascomycota class=Schizosaccharomycetes +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-----------------MAKKSLM------------------------------------ +------------------AEPGDPIPRPRK------------------------------ +------------KR-YRPGTTALREIRKYQRSTDLLIQRLPFSRIVREIS--SEFVANFS +TDVGLRWQSTALQCLQEAAEAFLV-HLFEDTNLCAIHAKRVTIMQRDMQLARRIRGA--- +------------- +>Guillardia|AAK39657.1|cenH3_(Plants) organism=Guillardia theta phylum= class=Cryptophyceae +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +---------------------MMKKQNLK------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +-------------R-FKKSSNSLVDIRKFQKSTDLLIHRLPFARLVKEIS--LKYH---H +-S--LNWQQVAVEALQFASEDYII-GLLQDANLAAIHAKRVTVMPKDLKLAKIIRGEH-- +------------- +>Cyanidioschyzon|XP_005535607.1|cenH3_(Plants) organism=Cyanidioschyzon merolae strain 10D phylum=Rhodophyta class=Bangiophyceae +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-MVRVFGNPVGGAHQYRQTQALNRDESLREQLQRTTVAEQDENIEPEPERSSPDEAQSRL +KSLLGSVARSSSLLGRRVAGPPGSSLSNLAETRLLTEEAANRPTGAGRSERVSAGRAEPL +PPGPTTATTNADANTVQRGLSVASAFPPRAPLQAAGFTVARRT--TRSGVQR-------- +-----------KHR-FRPGSRAIMEIRKFQRSTELLLRRLPFARLVREIC--ERLF---G +-SSAFRWQASALEALQTAAEDYLI-HLFEDSNLCAIHARRVTIMPRDIALARRIRGYHSD +PHGYL-------- +>Arabidopsis|NP_563627.1|cenH3_(Plants) organism=Arabidopsis thaliana phylum=Streptophyta class=Magnoliopsida +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +----------MARTKHRVTRSQPRNQTDA------------------------------- +-----------------------------------------------------AGASSSQ +AAGPTTTPTRRGGEGGDNTQQTNPTTSPATGTRRGAKRSRQAM--PRGSQKK-------- +-----------SYR-YRPGTVALKEIRHFQKQTNLLIPAASFIREVRSIT--HMLA---P +-PQINRWTAEALVALQEAAEDYLV-GLFSDSMLCAIHARRVTLMRKDFELARRLGGKGRP +W------------ +>Guillardia|XP_001713521.1|cH3 organism=Guillardia theta phylum= class=Cryptophyceae +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +---------MARTKQTARKTVGGDI-NSRISEKRAKSD---NS--KNIDLKK-------- +-----------VHR-YKPGTVALREIRKYQKSTNLLIRKLPFQRLVRELA--QDYK---S +---DLRFQNSAVLALQEASESYLV-NLFEDTNLCAIHAKRVTIMPKDIYLARRIRGEMIF +------------- +>Plasmodium|XP_724814.1|cH3 organism=Plasmodium yoelii phylum=Apicomplexa class=Aconoidasida +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +---------MARTKQTARKSTAGKAPRKQLASKAARKS---AP--ISAGIKK-------- +-----------PHR-YRPGTVALREIRRYQKSTDLLIRKLPFQRLVREIA--QDYK---T +---DLRFQSSAVMALQEAAEAYLV-GLFEDTNLCAIHAKRVTIMPKDIQLARRIRGERS- +------------- +>Drosophila|CAA32434.1|cH3 organism=Drosophila melanogaster phylum=Arthropoda class=Insecta +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +---------MARTKQTARKSTGGKAPRKQLATKAARKS---AP--ATGGVKK-------- +-----------PHR-YRPGTVALREIRRYQKSTELLIRKLPFQRLVREIA--QDFK---T +---DLRFQSSAVMALQEASEAYLV-GLFEDTNLCAIHAKRITIMPKDIQLARRIRGERA- +------------- +>Drosophila|NP_001027285.1|cH3 organism=Drosophila melanogaster phylum=Arthropoda class=Insecta +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +---------MARTKQTARKSTGGKAPRKQLATKAARKS---AP--ATGGVKK-------- +-----------PHR-YRPGTVALREIRRYQKSTELLIRKLPFQRLVREIA--QDFK---T +---DLRFQSSAVMALQEASEAYLV-GLFEDTNLCAIHAKRVTIMPKDIQLARRIRGERA- +------------- +>Candida|XP_719887.1|cH3 organism=Candida albicans SC5314 phylum=Ascomycota class=Saccharomycetes +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +---------MARTKQTARKSTGGKAPRKQLASKAARKS---AP--VSGGVKK-------- +-----------PHR-YKPGTVALREIRRFQKSTELLIRKLPFQRLVREIA--QDFK---S +---DLRFQSSAIGALQEAVEAYLV-GLFEDTNLCAIHAKRVTIQKKDMQLARRLRGERS- +------------- +>Debaryomyces|XP_460476.1|cH3 organism=Debaryomyces hansenii CBS767 phylum=Ascomycota class=Saccharomycetes +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +---------MARTKQTARKSTGGKAPRKQLASKAARKS---AP--VSGGVKK-------- +-----------PHR-YKPGTVALREIRRFQKSTELLIRKLPFQRLVREIA--QDFK---S +---DLRFQSSAIGALQEAVEAYLV-SLFEDTNLCAIHAKRVTIQKKDIQLARRLRGERS- +------------- +>Thalassiosira|XP_002288694.1|cH3 organism=Thalassiosira pseudonana CCMP1335 phylum=Bacillariophyta class=Coscinodiscophyceae +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +---------MARTKQTARKSTGGKAPRKQLATKAARKS---AP--ATGGVKK-------- +-----------PHR-YRPGTVALREIRRYQKSTDLLIRKLPFQRLVREIA--QDFK---S +---DLRFQGSAVLALQEAAEAYLV-GLFEDTNLCAIHAKRVTIMPKDIQLARRIRGERS- +------------- +>Xenopus|NP_001091119.1|cH3 organism=Xenopus laevis phylum=Chordata class=Amphibia +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +---------MARTKQTARKSTGGKAPRKQLATKAARKS---AP--ATGGVKK-------- +-----------PHR-YRPGTVALREIRRYQKSTELLIRKLPFQRLVREIA--QDFK---T +---DLRFQSSAVMALQEASEAYLV-GLFEDTNLCGIHAKRVTIMPKDIQLARRIRGERA- +------------- +>Gallus|NP_001268409.1|cH3 organism=Gallus gallus phylum=Chordata class=Aves +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +---------MARTKQTARKSTGGKAPRKQLATKAARKS---AP--ATGGVKK-------- +-----------PHR-YRPGTVALREIRRYQKSTELLIRKLPFQRLVREIA--QDFK---T +---DLRFQSSAVMALQEASEAYLV-GLFEDTNLCAIHAKRVTIMPKDIQLARRIRGERA- +------------- +>Macaca|NP_001180643.1|cH3 organism=Macaca mulatta phylum=Chordata class=Mammalia +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +---------MARTKQTARKSTGGKAPRKQLATKAARKS---AP--ATGGVKK-------- +-----------PHR-YRPGTVALREIRRYQKSTELLIRKLPFQRLVREIA--QDFK---T +---DLRFQSSAVMALQEACEAYLV-GLFEDTNLCAIHAKRVTIMPKDIQLARRIRGERA- +------------- +>Mus|NP_659539.1|cH3 organism=Mus musculus phylum=Chordata class=Mammalia +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +---------MARTKQTARKSTGGKAPRKQLATKAARKS---AP--ATGGVKK-------- +-----------PHR-YRPGTVALREIRRYQKSTELLIRKLPFQRLVREIA--QDFK---T +---DLRFQSSAVMALQEACEAYLV-GLFEDTNLCAIHAKRVTIMPKDIQLARRIRGERA- +------------- +>Pan|XP_527254.2|cH3 organism=Pan troglodytes phylum=Chordata class=Mammalia +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +---------MARTKQTARKSTGGKAPRKQLATKAARKS---AP--ATGGVKK-------- +-----------PHR-YRPGTVALREIRRYQKSTELLIRKLPFQRLVREIA--QDFK---T +---DLRFQSSAVMALQEACEAYLV-GLFEDTNLCAIHAKRVTIMPKDIQLARRIRGERA- +------------- +>Tetrahymena|XP_001016594.3|cH3 organism=Tetrahymena thermophila SB210 phylum=Ciliophora class=Oligohymenophorea +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +---------MARTKQTARKSTGAKAPRKQLASKAARKS---AP--ATGGIKK-------- +-----------PHR-FRPGTVALREIRKYQKSTDLLIRKLPFQRLVRDIA--HEFK---A +---ELRFQSSAVLALQEAAEAYLV-GLFEDTNLCAIHARRVTIMTKDMQLARRIRGERF- +------------- +>Trypanosoma|XP_001218942.1|cH3 organism=Trypanosoma brucei brucei TREU927 phylum=Euglenozoa class=Kinetoplastea +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +---------MSRTKETART-------KKTITSKKSKKASKGSD--AASGVKT-------- +-----------AQRRWRPGTVALREIRQFQRSTDLLLQKAPFQRLVREVS--GAQK---E +---GLRFQSSAILAAQEATESYIV-SLLADTNRACIHSGRVTIQPKDIHLALCLRGERA- +------------- +>Caenorhabditis|NP_509344.1|cH3 organism=Caenorhabditis elegans phylum=Nematoda class=Chromadorea +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +---------MARTKQTARKSTGGKAPRKQLATKAARKS---AP--TTGGVKK-------- +-----------PHR-YRPGTVALREIRRYQKSTELLIRKLPFQRLVREIA--QDFK---T +---DLRFQSAAIGALQEASEAYLV-GLFEDTNLCAIHAKRVTIMPKDIQLARRIRGERA- +------------- +>Phytophthora|XP_002999294.1|cH3 organism=Phytophthora infestans T30-4 phylum=Oomycota class= +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +---------MARTKQTARKSTGGKAPRKQLATKAARKS---AP--ATGGVKK-------- +-----------PHR-YRPGTVALREIRRYQKSTELLIRKLPFQRLVREIA--QDFK---T +---DLRFQGSAVLALQEAAEAYLV-GLFEDTNLCAIHAKRVTIMPKDIQLARRIRGERS- +------------- +>Perkinsus|XP_002788889.1|cH3 organism=Perkinsus marinus ATCC 50983 phylum=Perkinsozoa class= +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +---------MARTKQTARKSTGGKAPRKQLASKAARKS---TP--STGGIKK-------- +-----------PHR-YRPGTVALREIRRYQKSTDLLIRKLPFQRLVREVA--QDFK---T +---DLRFQSSAVMALQEAAEAYLV-GLFEDTNLCAIHAKRVTIMPKDMQLARRIRGERS- +------------- +>Cyanidioschyzon|XP_005537317.1|cH3 organism=Cyanidioschyzon merolae strain 10D phylum=Rhodophyta class=Bangiophyceae +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +---------MARTKQTARKSTGGKAPRKQLATKAARKS---AP--SVGGVKK-------- +-----------PHR-YRPGTVALREIRRYQKSTELLIRKLPFQRLVREIA--QDFK---T +---DLRFQSSAVLALQEAAEAYLV-NLFEDTNLCAIHAKRVTIMPKDIQLARRIRGERA- +------------- +>Griffithsia|Q7XYZ0.3|cH3 organism=Griffithsia japonica phylum=Rhodophyta class=Florideophyceae +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +---------MARTKQTARKSTGGKAPRKQLATKAARKS---AP--ASGGVKK-------- +-----------PHR-FRPGTVALREIRRFQKSTELLVRKLPFQRLVREIA--QDFK---S +---DLRFQSSAVLALQEAAEAYMV-GLFEDTNLCAIHAKRVTIMPKDIQLARRIRGERT- +------------- +>Selaginella|XP_002977792.1|cH3 organism=Selaginella moellendorffii phylum=Streptophyta class=Lycopodiopsida +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +---------MARTKQTARKSTGGKAPRKQLATKAARKS---AP--TTGGVKK-------- +-----------PHR-YRPGTVALREIRKYQKSTELLIRKLPFQRLVREIA--QDFK---T +---DLRFQSHAVLALQEAAEAYLV-GLFEDTNLCAIHAKRVTIMPKDIQLARRIRGERA- +------------- +>Arabidopsis|NP_189372.1|cH3 organism=Arabidopsis thaliana phylum=Streptophyta class=Magnoliopsida +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +---------MARTKQTARKSTGGKAPRKQLATKAARKS---AP--ATGGVKK-------- +-----------PHR-FRPGTVALREIRKYQKSTELLIRKLPFQRLVREIA--QDFK---T +---DLRFQSSAVAALQEAAEAYLV-GLFEDTNLCAIHAKRVTIMPKDIQLARRIRGERA- +------------- +>Oryza|NP_001044904.1|cH3 organism=Oryza sativa Japonica Group phylum=Streptophyta class=Magnoliopsida +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +---------MARTKQTARKSTGGKAPRKQLATKAARKS---AP--ATGGVKK-------- +-----------PHR-FRPGTVALREIRKYQKSTELLIRKLPFQRLVREIA--QDFK---T +---DLRFQSSAVAALQEAAEAYLV-GLFEDTNLCAIHAKRVTIMPKDIQLARRIRGERA- +------------- +>Oryza|NP_001050276.1|cH3 organism=Oryza sativa Japonica Group phylum=Streptophyta class=Magnoliopsida +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +---------MARTKQTARKSTGGKAPRKQLATKAARKS---AP--TTGGVKK-------- +-----------PHR-YRPGTVALREIRKYQKSTELLIRKLPFQRLVREIA--QDFK---T +---DLRFQSHAVLALQEAAEAYLV-GLFEDTNLCAIHAKRVTIMPKDIQLARRIRGERA- +------------- +>Homo|NP_003520.1|cH3.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +---------MARTKQTARKSTGGKAPRKQLATKAARKS---AP--ATGGVKK-------- +-----------PHR-YRPGTVALREIRRYQKSTELLIRKLPFQRLVREIA--QDFK---T +---DLRFQSSAVMALQEACEAYLV-GLFEDTNLCAIHAKRVTIMPKDIQLARRIRGERA- +------------- +>Homo|NP_003521.2|cH3.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +---------MARTKQTARKSTGGKAPRKQLATKAARKS---AP--ATGGVKK-------- +-----------PHR-YRPGTVALREIRRYQKSTELLIRKLPFQRLVREIA--QDFK---T +---DLRFQSSAVMALQEACEAYLV-GLFEDTNLCAIHAKRVTIMPKDIQLARRIRGERA- +------------- +>Homo|NP_003522.1|cH3.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +---------MARTKQTARKSTGGKAPRKQLATKAARKS---AP--ATGGVKK-------- +-----------PHR-YRPGTVALREIRRYQKSTELLIRKLPFQRLVREIA--QDFK---T +---DLRFQSSAVMALQEACEAYLV-GLFEDTNLCAIHAKRVTIMPKDIQLARRIRGERA- +------------- +>Homo|NP_003523.1|cH3.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +---------MARTKQTARKSTGGKAPRKQLATKAARKS---AP--ATGGVKK-------- +-----------PHR-YRPGTVALREIRRYQKSTELLIRKLPFQRLVREIA--QDFK---T +---DLRFQSSAVMALQEACEAYLV-GLFEDTNLCAIHAKRVTIMPKDIQLARRIRGERA- +------------- +>Homo|NP_003524.1|cH3.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +---------MARTKQTARKSTGGKAPRKQLATKAARKS---AP--ATGGVKK-------- +-----------PHR-YRPGTVALREIRRYQKSTELLIRKLPFQRLVREIA--QDFK---T +---DLRFQSSAVMALQEACEAYLV-GLFEDTNLCAIHAKRVTIMPKDIQLARRIRGERA- +------------- +>Homo|NP_003525.1|cH3.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +---------MARTKQTARKSTGGKAPRKQLATKAARKS---AP--ATGGVKK-------- +-----------PHR-YRPGTVALREIRRYQKSTELLIRKLPFQRLVREIA--QDFK---T +---DLRFQSSAVMALQEACEAYLV-GLFEDTNLCAIHAKRVTIMPKDIQLARRIRGERA- +------------- +>Homo|NP_003526.1|cH3.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +---------MARTKQTARKSTGGKAPRKQLATKAARKS---AP--ATGGVKK-------- +-----------PHR-YRPGTVALREIRRYQKSTELLIRKLPFQRLVREIA--QDFK---T +---DLRFQSSAVMALQEACEAYLV-GLFEDTNLCAIHAKRVTIMPKDIQLARRIRGERA- +------------- +>Homo|NP_003527.1|cH3.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +---------MARTKQTARKSTGGKAPRKQLATKAARKS---AP--ATGGVKK-------- +-----------PHR-YRPGTVALREIRRYQKSTELLIRKLPFQRLVREIA--QDFK---T +---DLRFQSSAVMALQEACEAYLV-GLFEDTNLCAIHAKRVTIMPKDIQLARRIRGERA- +------------- +>Homo|NP_003528.1|cH3.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +---------MARTKQTARKSTGGKAPRKQLATKAARKS---AP--ATGGVKK-------- +-----------PHR-YRPGTVALREIRRYQKSTELLIRKLPFQRLVREIA--QDFK---T +---DLRFQSSAVMALQEACEAYLV-GLFEDTNLCAIHAKRVTIMPKDIQLARRIRGERA- +------------- +>Homo|NP_066298.1|cH3.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +---------MARTKQTARKSTGGKAPRKQLATKAARKS---AP--ATGGVKK-------- +-----------PHR-YRPGTVALREIRRYQKSTELLIRKLPFQRLVREIA--QDFK---T +---DLRFQSSAVMALQEACEAYLV-GLFEDTNLCAIHAKRVTIMPKDIQLARRIRGERA- +------------- +>Homo|NP_001005464.1|cH3.2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +---------MARTKQTARKSTGGKAPRKQLATKAARKS---AP--ATGGVKK-------- +-----------PHR-YRPGTVALREIRRYQKSTELLIRKLPFQRLVREIA--QDFK---T +---DLRFQSSAVMALQEASEAYLV-GLFEDTNLCAIHAKRVTIMPKDIQLARRIRGERA- +------------- +>Homo|NP_001116847.1|cH3.2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +---------MARTKQTARKSTGGKAPRKQLATKAARKS---AP--ATGGVKK-------- +-----------PHR-YRPGTVALREIRRYQKSTELLIRKLPFQRLVREIA--QDFK---T +---DLRFQSSAVMALQEASEAYLV-GLFEDTNLCAIHAKRVTIMPKDIQLARRIRGERA- +------------- +>Homo|NP_066403.2|cH3.2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +---------MARTKQTARKSTGGKAPRKQLATKAARKS---AP--ATGGVKK-------- +-----------PHR-YRPGTVALREIRRYQKSTELLIRKLPFQRLVREIA--QDFK---T +---DLRFQSSAVMALQEASEAYLV-GLFEDTNLCAIHAKRVTIMPKDIQLARRIRGERA- +------------- +>Plasmodium|XP_966164.1|H3.3 organism=Plasmodium falciparum 3D7 phylum=Apicomplexa class=Aconoidasida +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +---------MARTKQTARKSTGGKAPRKQLASKAARKS---AP--VSTGIKK-------- +-----------PHR-YRPGTVALREIRKFQKSTDLLIRKLPFQRLVREIA--QEYK---T +---DLRFQSQAVLALQEAAEAYLV-GLFEDTNLCAIHAKRVTIMPKDIQLARRIRGERS- +------------- +>Plasmodium|XP_725474.1|H3.3 organism=Plasmodium yoelii phylum=Apicomplexa class=Aconoidasida +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +---------MARTKQTARKSTGGKAPRKQLASKAARKS---AP--VSTGIKK-------- +-----------PHR-YRPGTVALREIRKFQKSTDLLIRKLPFQRLVREIA--QEYK---T +---DLRFQSQAVLALQEAAEAYLV-GLFEDTNLCAIHAKRVTIMPKDIQLARRIRGERS- +------------- +>Toxoplasma|AAM95790.1|H3.3 organism=Toxoplasma gondii phylum=Apicomplexa class=Conoidasida +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +---------MARTKQTARKSTGGKAPRKQLASKAARKS---AP--MSGGIKK-------- +-----------PHR-YRPGTVALREIRKFQKSTDLLIRKLPFQRLVREIA--QDFK---T +---DLRFQSQAILALQEAAEAYLV-GLFEDTNLCAIHAKRVTIMPKDIQLARRIRGERS- +------------- +>Drosophila|NP_511095.1|H3.3 organism=Drosophila melanogaster phylum=Arthropoda class=Insecta +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +---------MARTKQTARKSTGGKAPRKQLATKAARKS---AP--STGGVKK-------- +-----------PHR-YRPGTVALREIRRYQKSTELLIRKLPFQRLVREIA--QDFK---T +---DLRFQSAAIGALQEASEAYLV-GLFEDTNLCAIHAKRVTIMPKDIQLARRIRGERA- +------------- +>Candida|XP_713710.1|H3.3 organism=Candida albicans SC5314 phylum=Ascomycota class=Saccharomycetes +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +---------MARTKQTARKSTGGKAPRKQLASKAARKS---AP--STGGVKK-------- +-----------PHR-YKPGTVALREIRRFQKSTELLIRKLPFQRLVREIA--QDFK---T +---DLRFQSSAIGALQEAVEAYLV-GLFEDTNLCAIHAKRVTIQKKDMQLARRLRGERS- +------------- +>Debaryomyces|XP_456791.1|H3.3 organism=Debaryomyces hansenii CBS767 phylum=Ascomycota class=Saccharomycetes +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +---------MARTKQTARKSTGGKAPRKQLASKAARKS---AP--STGGVKK-------- +-----------PHR-YKPGTVALREIRRFQKSTELLIRKLPFQRLVREIA--QDFK---T +---DLRFQSSAIGALQESVEAYLV-SLFEDTNLCAIHAKRVTIQKKDIQLARRLRGERS- +------------- +>Saccharomyces|NP_009564.1|H3.3 organism=Saccharomyces cerevisiae S288C phylum=Ascomycota class=Saccharomycetes +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +---------MARTKQTARKSTGGKAPRKQLASKAARKS---AP--STGGVKK-------- +-----------PHR-YKPGTVALREIRRFQKSTELLIRKLPFQRLVREIA--QDFK---T +---DLRFQSSAIGALQESVEAYLV-SLFEDTNLAAIHAKRVTIQKKDIKLARRLRGERS- +------------- +>Schizosaccharomyces|P10651.2|H3.3 organism=Schizosaccharomyces pombe 972h- phylum=Ascomycota class=Schizosaccharomycetes +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +---------MARTKQTARKSTGGKAPRKQLASKAARKA---AP--ATGGVKK-------- +-----------PHR-YRPPTVALREIRRYQKSTELLIRKLPFQRLVREIA--QDFK---T +---DLRFQSSAIGALQEAVEAYLV-SLFEDTNLCAIHGKRVTIQPKDMQLARRLRGERS- +------------- +>Neurospora|XP_956003.1|H3.3 organism=Neurospora crassa OR74A phylum=Ascomycota class=Sordariomycetes +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +---------MARTKQTARKSTGGKAPRKQLASKAARKS---AP--STGGVKK-------- +-----------PHR-YKPGTVALREIRRYQKSTELLIRKLPFQRLVREIA--QDFK---S +---DLRFQSSAIGALQESVESYLV-SLFEDTNLCAIHAKRVTIQSKDIQLARRLRGERN- +------------- +>Trichoderma|XP_006969783.1|H3.3 organism=Trichoderma reesei QM6a phylum=Ascomycota class=Sordariomycetes +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +---------MARTKQTARKSTGGKAPRKQLASKAARKS---AP--STGGVKK-------- +-----------PHR-YKPGTVALREIRRYQKSTELLIRKLPFQRLVREIA--QDFK---S +---DLRFQSSAIGALQESVESYLV-SLFEDTNLCAIHAKRVTIQSKDIQLARRLRGERN- +------------- +>Thalassiosira|XP_002293701.1|H3.3 organism=Thalassiosira pseudonana CCMP1335 phylum=Bacillariophyta class=Coscinodiscophyceae +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +---------MARTKQTARKSTGGKAPRKQLATKAARKS---AP--TAGGVKK-------- +-----------PHR-YRPGTVALREIRKYQKSTDLLIRKAPFQRLVREIA--QDFK---T +---DLRFQSTAVLALQEASEAYLV-GLFEDTNLCAIHAKRVTIMPKDIQLARRIRGERA- +------------- +>Cryptococcus|XP_567545.1|H3.3 organism=Cryptococcus neoformans var. neoformans JEC21 phylum=Basidiomycota class=Tremellomycetes +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +---------MARTKQTARKSTGGKAPRKQLATKAARKQ---APSQVSGGVKK-------- +-----------PHR-YRPGTVALREIRRYQKSTELLIRKLPFQRLVREIA--QDFK---T +---DLRFQSSAIGALQEASEAYLV-SLFEDTNLAAIHAKRVTIQPKDLQLARRLRGERS- +------------- +>Mus|NP_032236.1|H3.3 organism=Mus musculus phylum=Chordata class=Mammalia +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +---------MARTKQTARKSTGGKAPRKQLATKAARKS---AP--STGGVKK-------- +-----------PHR-YRPGTVALREIRRYQKSTELLIRKLPFQRLVREIA--QDFK---T +---DLRFQSAAIGALQEASEAYLV-GLFEDTNLCAIHAKRVTIMPKDIQLARRIRGERA- +------------- +>Rattus|NP_446437.1|H3.3 organism=Rattus norvegicus phylum=Chordata class=Mammalia +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +---------MARTKQTARKSTGGKAPRKQLATKAARKS---AP--STGGVKK-------- +-----------PHR-YRPGTVALREIRRYQKSTELLIRKLPFQRLVREIA--QDFK---T +---DLRFQSAAIGALQEASEAYLV-GLFEDTNLCAIHAKRVTIMPKDIQLARRIRGERA- +------------- +>Tetrahymena|XP_001008397.1|H3.3 organism=Tetrahymena thermophila SB210 phylum=Ciliophora class=Oligohymenophorea +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +---------MARTKQTARKSTGVKAPRKQLATKAARKS---AP--VSGGVKK-------- +-----------PHK-FRPGTVALREIRKYQKTTDLLIRKLPFQRLVRDIA--MEMK---S +---DIRFQSQAILALQEAAEAYLV-GLFEDTNLCAIHARRVTIMTKDLHLARRIRGERF- +------------- +>Phytophthora|XP_002895320.1|H3.3 organism=Phytophthora infestans T30-4 phylum=Oomycota class= +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +---------MARTKQTARKSTGGKAPRKQLATKAARKS---AP--TAGGVKK-------- +-----------PHR-YRPGTVALREIRKYQKSTDLLIRKLPFQRLVREIA--QDYK---T +---DLRFQSTAILALQEASEAYLV-GLFEDTNLCAIHAKRVTIMPKDIQLARRIRGERT- +------------- +>Griffithsia|AAP80725.1|H3.3 organism=Griffithsia japonica phylum=Rhodophyta class=Florideophyceae +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +---------MARTKQTARKSTGGKAPRKQLATKAARKS---AP--ITGGVKK-------- +-----------PHR-YRPGTVALREIRKFQKSTDLLIRKLPFQRLVREIA--QDFK---S +---DLRFQSSAVLALQEAAEAYLRGGLFEDTNLCAIHAKRVTIMPKDIQLARRIRGERA- +------------- +>Arabidopsis|NP_195713.1|H3.3 organism=Arabidopsis thaliana phylum=Streptophyta class=Magnoliopsida +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +---------MARTKQTARKSTGGKAPRKQLATKAARKS---AP--TTGGVKK-------- +-----------PHR-YRPGTVALREIRKYQKSTELLIRKLPFQRLVREIA--QDFK---T +---DLRFQSHAVLALQEAAEAYLV-GLFEDTNLCAIHAKRVTIMPKDIQLARRIRGERA- +------------- +>Homo|NP_002098.1|H3.3_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +---------MARTKQTARKSTGGKAPRKQLATKAARKS---AP--STGGVKK-------- +-----------PHR-YRPGTVALREIRRYQKSTELLIRKLPFQRLVREIA--QDFK---T +---DLRFQSAAIGALQEASEAYLV-GLFEDTNLCAIHAKRVTIMPKDIQLARRIRGERA- +------------- +>Homo|NP_005315.1|H3.3_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +---------MARTKQTARKSTGGKAPRKQLATKAARKS---AP--STGGVKK-------- +-----------PHR-YRPGTVALREIRRYQKSTELLIRKLPFQRLVREIA--QDFK---T +---DLRFQSAAIGALQEASEAYLV-GLFEDTNLCAIHAKRVTIMPKDIQLARRIRGERA- +------------- +>Macaca|HISTDB_H3_Y_0|H3.Y_(Primates?) organism=Macaca mulatta phylum=Chordata class=Mammalia +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +----------ARTKQTARKATNWQAPRKPLATKAAAKR---AP--PRGGIKK-------- +-----------PHR-YKPGTQALREIRKYQKSTQLLLRKLPFQCLVREIA--QVIS---L +---DLRFQSAAIGALQEASEAYLV-NLFEDTNLCAIHARRVTIMPRDMQLARRIRGEGAX +EPTLLGNVAL--- +>Macaca|HISTDB_H3_Y_1|H3.Y_(Primates?) organism=Macaca mulatta phylum=Chordata class=Mammalia +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +----------ARTKQTARKATNWQAPRKPLATKAPGKR---LP--PRGGIKK-------- +-----------PHR-YRPGTQALREIRKYQKSTQLLLRKLPFQRLVREIA--QAIS---P +---DLRFQSAAIGALQEASEAYLV-NLFEDTNLCAIHARRVTIMPRDMQLARRIRGEGA- +------------- +>Pan|HISTDB_H3_Y_2|H3.Y_(Primates?) organism=Pan troglodytes phylum=Chordata class=Mammalia +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +----------ARTKQTARKATAWQAPRKPLATKAAGKR---AP--PTGGIKK-------- +-----------PHR-YKPGTLALREIRKYQKSTQLLLRKLPFQRLVREIA--QAIS---P +---DLRFQSAAIGALQEASEAYLV-QLFEDTNLCAIHARRVTIMPRDMQLARRLRREGP- +------------- +>Pan|HISTDB_H3_Y_3|H3.Y_(Primates?) organism=Pan troglodytes phylum=Chordata class=Mammalia +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +----------ARTKQTARKATAWQAPRKPLATKAARKR---AS--PTGGIKK-------- +-----------PHR-YKPGTLALREIRKYQKSTQLLLRKLPFQRLVREIA--QAIS---L +---DLRFQSAAIGALQEASEAYLV-QLFEDTNLCAIHARRVTIMPQDMQLARRLRGEGAR +EPTLLGNLAL--- +>Homo|NP_001342187.1|H3.Y.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +---------MARTKQTARKATAWQAPRKPLATKAAGKR---AP--PTGGIKK-------- +-----------PHR-YKPGTLALREIRKYQKSTQLLLRKLPFQRLVREIA--QAIS---P +---DLRFQSAAIGALQEASEAYLV-QLFEDTNLCAIHARRVTIMPRDMQLARRLRREGP- +------------- +>Homo|NP_001358848.1|H3.Y.2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +---------MARTKQTARKATAWQAPRKPLATKAARKR---AS--PTGGIKK-------- +-----------PHR-YKPGTLALREIRKYQKSTQLLLRKLPFQRLVREIA--QAIS---P +---DLRFQSAAIGALQEASEAYLV-QLFEDTNLCAIHARRVTIMPRDMQLARRLRGEGAG +EPTLLGNLAL--- +>Pan|XP_003804825.1|H3.4_(Mammalia) organism=Pan paniscus phylum=Chordata class=Mammalia +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +---------MARTKQTARKSTGGKAPRKQLVTKVARKS---AP--ATGGVKK-------- +-----------PHR-YRPGTVALREIRRYQKSTELLIRKLPFQRLMREIA--QDFK---T +---DLRFQSSAVMALQEACESYLV-GLFEDTNLCVIHAKRVTIMPKDIQLARRIRGERA- +------------- +>Homo|NP_003484.1|H3.4_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +---------MARTKQTARKSTGGKAPRKQLATKVARKS---AP--ATGGVKK-------- +-----------PHR-YRPGTVALREIRRYQKSTELLIRKLPFQRLMREIA--QDFK---T +---DLRFQSSAVMALQEACESYLV-GLFEDTNLCVIHAKRVTIMPKDIQLARRIRGERA- +------------- +>Pan|XP_003954426.1|H3.5_(Hominidae) organism=Pan troglodytes phylum=Chordata class=Mammalia +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +---------MARTKQTARKSTGGKAPRKQLATKAARKS---TP--STXGVKK-------- +-----------PHR-YRPGTVALREIRRYQKSTELLIRKLPFQRLVREIA--QDFN---T +---DLRFQSAAVGALQEASEAYLV-GLLEDTNLCAIHAKRVTIMPKDIQLARRIRGERA- +------------- +>Homo|NP_001013721.2|H3.5_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +---------MARTKQTARKSTGGKAPRKQLATKAARKS---TP--STCGV-K-------- +-----------PHR-YRPGTVALREIRRYQKSTELLIRKLPFQRLVREIA--QDFN---T +---DLRFQSAAVGALQEASEAYLV-GLLEDTNLCAIHAKRVTIMPKDIQLARRIRGERA- +------------- diff --git a/CURATED_SET/draft_seeds/H3_(Lilly???).fasta b/CURATED_SET/draft_seeds/H3_(Lilly???).fasta new file mode 100644 index 0000000..e69de29 diff --git a/CURATED_SET/draft_seeds/H3_only.fasta b/CURATED_SET/draft_seeds/H3_only.fasta old mode 100755 new mode 100644 diff --git a/CURATED_SET/draft_seeds/H4.fasta b/CURATED_SET/draft_seeds/H4.fasta old mode 100755 new mode 100644 index 0e7a6b5..07eb735 --- a/CURATED_SET/draft_seeds/H4.fasta +++ b/CURATED_SET/draft_seeds/H4.fasta @@ -1,81 +1,78 @@ ->canonical_H4_Trypanosoma_brucei_brucei_TREU927_XP_951561.1 XP_951561.1 histone: H4 variant: canonical_H4 organism: Trypanosoma brucei brucei TREU927 ----MAKGKRVGESKGAQKRQ-KKVLRDNVRGITRGSIRRLARRAGVKRISGVIYDEVRGV -LKTFVESIVRDAGAYTEYSRKKTVTAAHVVFALRKRGKVLYGYD- ->canonical_H4_Tetrahymena_thermophila_SB210_XP_001016593.1 XP_001016593.1 histone: H4 variant: canonical_H4 organism: Tetrahymena thermophila SB210 -MAG-GKGGK-GMGKVGAKRHSRKSNKASIEGITKPAIRRLARRGGVKRISSFIYDDSRQV -LKSFLENVVRDAVTYTEHARRKTVTAMDVVYALKRQGRTLYGFGG ->canonical_H4_Homo_sapiens_NP_003538.1 NP_003538.1 histone: H4 variant: canonical_H4 organism: Homo sapiens -MSVRGKAGK-GLGKGGAKCH-RKVLSDNIQGITKCTIRRLARHGGVKRILGLIYEETRRV -FKVFLENVIWYAVTNTEHAKRKTVTAMAVVYVLKRQGRTL----- ->canonical_H4_Saccharomyces_cerevisiae_S288C_NP_009563.1 NP_009563.1 histone: H4 variant: canonical_H4 organism: Saccharomyces cerevisiae S288C -MSGRGKGGK-GLGKGGAKRH-RKILRDNIQGITKPAIRRLARRGGVKRISGLIYEEVRAV -LKSFLESVIRDSVTYTEHAKRKTVTSLDVVYALKRQGRTLYGFGG ->canonical_H4_Drosophila_melanogaster_NP_001027352.1 NP_001027352.1 histone: H4 variant: canonical_H4 organism: Drosophila melanogaster +>Drosophila|NP_001027352.1|cH4 organism=Drosophila melanogaster phylum=Arthropoda class=Insecta MTGRGKGGK-GLGKGGAKRH-RKVLRDNIQGITKPAIRRLARRGGVKRISGLIYEETRGV LKVFLENVIRDAVTYTEHAKRKTVTALDVVYALKRQGRTLYGFGG ->canonical_H4_Arabidopsis_thaliana_NP_180441.1 NP_180441.1 histone: H4 variant: canonical_H4 organism: Arabidopsis thaliana -MSGRGKGGK-GLGKGGAKRH-RKVLRDNIQGITKPAIRRLARRGGVKRISGLIYEETRGV -LKIFLENVIRDAVTYTEHARRKTVTAMDVVYALKRQGRTLYGFGG ->canonical_H4_Zea_mays_NP_001131585.1 NP_001131585.1 histone: H4 variant: canonical_H4 organism: Zea mays -MSGRGKGGK-GLGKGGAKRH-RKVLRDNIQGITKPAIRRLARRGGVKRISGLIYEETRGV -LKIFLENVIRDAVTYTEHARRKTVTAMDVVYALKRQGRTLYGFGG ->canonical_H4_Gallus_gallus_NP_001032934.1 NP_001032934.1 histone: H4 variant: canonical_H4 organism: Gallus gallus +>Saccharomyces|NP_009563.1|cH4 organism=Saccharomyces cerevisiae S288C phylum=Ascomycota class=Saccharomycetes +MSGRGKGGK-GLGKGGAKRH-RKILRDNIQGITKPAIRRLARRGGVKRISGLIYEEVRAV +LKSFLESVIRDSVTYTEHAKRKTVTSLDVVYALKRQGRTLYGFGG +>Xenopus|NP_001087926.1|cH4 organism=Xenopus laevis phylum=Chordata class=Amphibia MSGRGKGGK-GLGKGGAKRH-RKVLRDNIQGITKPAIRRLARRGGVKRISGLIYEETRGV LKVFLENVIRDAVTYTEHAKRKTVTAMDVVYALKRQGRTLYGFGG ->canonical_H4_Heterocephalus_glaber_XP_012928609.2 XP_012928609.2 histone: H4 variant: canonical_H4 organism: Heterocephalus glaber +>Gallus|NP_001032934.1|cH4 organism=Gallus gallus phylum=Chordata class=Aves MSGRGKGGK-GLGKGGAKRH-RKVLRDNIQGITKPAIRRLARRGGVKRISGLIYEETRGV LKVFLENVIRDAVTYTEHAKRKTVTAMDVVYALKRQGRTLYGFGG ->canonical_H4_Homo_sapiens_NP_001029249.1 NP_001029249.1 histone: H4 variant: canonical_H4 organism: Homo sapiens +>Heterocephalus|XP_012928609.1|cH4 organism=Heterocephalus glaber phylum=Chordata class=Mammalia MSGRGKGGK-GLGKGGAKRH-RKVLRDNIQGITKPAIRRLARRGGVKRISGLIYEETRGV LKVFLENVIRDAVTYTEHAKRKTVTAMDVVYALKRQGRTLYGFGG ->canonical_H4_Homo_sapiens_NP_003486.1 NP_003486.1 histone: H4 variant: canonical_H4 organism: Homo sapiens +>Mus|NP_291074.1|cH4 organism=Mus musculus phylum=Chordata class=Mammalia MSGRGKGGK-GLGKGGAKRH-RKVLRDNIQGITKPAIRRLARRGGVKRISGLIYEETRGV LKVFLENVIRDAVTYTEHAKRKTVTAMDVVYALKRQGRTLYGFGG ->canonical_H4_Homo_sapiens_NP_003529.1 NP_003529.1 histone: H4 variant: canonical_H4 organism: Homo sapiens +>Tetrahymena|XP_001016593.1|cH4 organism=Tetrahymena thermophila SB210 phylum=Ciliophora class=Oligohymenophorea +MAG-GKGGK-GMGKVGAKRHSRKSNKASIEGITKPAIRRLARRGGVKRISSFIYDDSRQV +LKSFLENVVRDAVTYTEHARRKTVTAMDVVYALKRQGRTLYGFGG +>Strongylocentrotus|NP_999707.1|cH4 organism=Strongylocentrotus purpuratus phylum=Echinodermata class=Echinoidea MSGRGKGGK-GLGKGGAKRH-RKVLRDNIQGITKPAIRRLARRGGVKRISGLIYEETRGV -LKVFLENVIRDAVTYTEHAKRKTVTAMDVVYALKRQGRTLYGFGG ->canonical_H4_Homo_sapiens_NP_003530.1 NP_003530.1 histone: H4 variant: canonical_H4 organism: Homo sapiens +LKVFLENVIRDAVTYCEHAKRKTVTAMDVVYALKRQGRTLYGFGG +>Trypanosoma|XP_951561.1|cH4 organism=Trypanosoma brucei brucei TREU927 phylum=Euglenozoa class=Kinetoplastea +---MAKGKRVGESKGAQKRQ-KKVLRDNVRGITRGSIRRLARRAGVKRISGVIYDEVRGV +LKTFVESIVRDAGAYTEYSRKKTVTAAHVVFALRKRGKVLYGYD- +>Caenorhabditis|NP_492641.1|cH4 organism=Caenorhabditis elegans phylum=Nematoda class=Chromadorea MSGRGKGGK-GLGKGGAKRH-RKVLRDNIQGITKPAIRRLARRGGVKRISGLIYEETRGV -LKVFLENVIRDAVTYTEHAKRKTVTAMDVVYALKRQGRTLYGFGG ->canonical_H4_Homo_sapiens_NP_003531.1 NP_003531.1 histone: H4 variant: canonical_H4 organism: Homo sapiens +LKVFLENVIRDAVTYCEHAKRKTVTAMDVVYALKRQGRTLYGFGG +>Arabidopsis|NP_180441.1|cH4 organism=Arabidopsis thaliana phylum=Streptophyta class=Magnoliopsida +MSGRGKGGK-GLGKGGAKRH-RKVLRDNIQGITKPAIRRLARRGGVKRISGLIYEETRGV +LKIFLENVIRDAVTYTEHARRKTVTAMDVVYALKRQGRTLYGFGG +>Zea|NP_001131585.1|cH4 organism=Zea mays phylum=Streptophyta class=Magnoliopsida +MSGRGKGGK-GLGKGGAKRH-RKVLRDNIQGITKPAIRRLARRGGVKRISGLIYEETRGV +LKIFLENVIRDAVTYTEHARRKTVTAMDVVYALKRQGRTLYGFGG +>Homo|NP_001029249.1|cH4_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia MSGRGKGGK-GLGKGGAKRH-RKVLRDNIQGITKPAIRRLARRGGVKRISGLIYEETRGV LKVFLENVIRDAVTYTEHAKRKTVTAMDVVYALKRQGRTLYGFGG ->canonical_H4_Homo_sapiens_NP_003532.1 NP_003532.1 histone: H4 variant: canonical_H4 organism: Homo sapiens +>Homo|NP_003486.1|cH4_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia MSGRGKGGK-GLGKGGAKRH-RKVLRDNIQGITKPAIRRLARRGGVKRISGLIYEETRGV LKVFLENVIRDAVTYTEHAKRKTVTAMDVVYALKRQGRTLYGFGG ->canonical_H4_Homo_sapiens_NP_003533.1 NP_003533.1 histone: H4 variant: canonical_H4 organism: Homo sapiens +>Homo|NP_003529.1|cH4_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia MSGRGKGGK-GLGKGGAKRH-RKVLRDNIQGITKPAIRRLARRGGVKRISGLIYEETRGV LKVFLENVIRDAVTYTEHAKRKTVTAMDVVYALKRQGRTLYGFGG ->canonical_H4_Homo_sapiens_NP_003534.1 NP_003534.1 histone: H4 variant: canonical_H4 organism: Homo sapiens +>Homo|NP_003530.1|cH4_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia MSGRGKGGK-GLGKGGAKRH-RKVLRDNIQGITKPAIRRLARRGGVKRISGLIYEETRGV LKVFLENVIRDAVTYTEHAKRKTVTAMDVVYALKRQGRTLYGFGG ->canonical_H4_Homo_sapiens_NP_003535.1 NP_003535.1 histone: H4 variant: canonical_H4 organism: Homo sapiens +>Homo|NP_003531.1|cH4_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia MSGRGKGGK-GLGKGGAKRH-RKVLRDNIQGITKPAIRRLARRGGVKRISGLIYEETRGV LKVFLENVIRDAVTYTEHAKRKTVTAMDVVYALKRQGRTLYGFGG ->canonical_H4_Homo_sapiens_NP_003536.1 NP_003536.1 histone: H4 variant: canonical_H4 organism: Homo sapiens +>Homo|NP_003532.1|cH4_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia MSGRGKGGK-GLGKGGAKRH-RKVLRDNIQGITKPAIRRLARRGGVKRISGLIYEETRGV LKVFLENVIRDAVTYTEHAKRKTVTAMDVVYALKRQGRTLYGFGG ->canonical_H4_Homo_sapiens_NP_003537.1 NP_003537.1 histone: H4 variant: canonical_H4 organism: Homo sapiens +>Homo|NP_003533.1|cH4_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia MSGRGKGGK-GLGKGGAKRH-RKVLRDNIQGITKPAIRRLARRGGVKRISGLIYEETRGV LKVFLENVIRDAVTYTEHAKRKTVTAMDVVYALKRQGRTLYGFGG ->canonical_H4_Homo_sapiens_NP_003539.1 NP_003539.1 histone: H4 variant: canonical_H4 organism: Homo sapiens +>Homo|NP_003534.1|cH4_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia MSGRGKGGK-GLGKGGAKRH-RKVLRDNIQGITKPAIRRLARRGGVKRISGLIYEETRGV LKVFLENVIRDAVTYTEHAKRKTVTAMDVVYALKRQGRTLYGFGG ->canonical_H4_Homo_sapiens_NP_068803.1 NP_068803.1 histone: H4 variant: canonical_H4 organism: Homo sapiens +>Homo|NP_003535.1|cH4_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia MSGRGKGGK-GLGKGGAKRH-RKVLRDNIQGITKPAIRRLARRGGVKRISGLIYEETRGV LKVFLENVIRDAVTYTEHAKRKTVTAMDVVYALKRQGRTLYGFGG ->canonical_H4_Homo_sapiens_NP_778224.1 NP_778224.1 histone: H4 variant: canonical_H4 organism: Homo sapiens +>Homo|NP_003536.1|cH4_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia MSGRGKGGK-GLGKGGAKRH-RKVLRDNIQGITKPAIRRLARRGGVKRISGLIYEETRGV LKVFLENVIRDAVTYTEHAKRKTVTAMDVVYALKRQGRTLYGFGG ->canonical_H4_Mus_musculus_NP_291074.1 NP_291074.1 histone: H4 variant: canonical_H4 organism: Mus musculus +>Homo|NP_003537.1|cH4_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia MSGRGKGGK-GLGKGGAKRH-RKVLRDNIQGITKPAIRRLARRGGVKRISGLIYEETRGV LKVFLENVIRDAVTYTEHAKRKTVTAMDVVYALKRQGRTLYGFGG ->canonical_H4_Xenopus_laevis_NP_001087926.1 NP_001087926.1 histone: H4 variant: canonical_H4 organism: Xenopus laevis +>Homo|NP_003539.1|cH4_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia MSGRGKGGK-GLGKGGAKRH-RKVLRDNIQGITKPAIRRLARRGGVKRISGLIYEETRGV LKVFLENVIRDAVTYTEHAKRKTVTAMDVVYALKRQGRTLYGFGG ->canonical_H4_Caenorhabditis_elegans_NP_492641.1 NP_492641.1 histone: H4 variant: canonical_H4 organism: Caenorhabditis elegans +>Homo|NP_068803.1|cH4_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia MSGRGKGGK-GLGKGGAKRH-RKVLRDNIQGITKPAIRRLARRGGVKRISGLIYEETRGV -LKVFLENVIRDAVTYCEHAKRKTVTAMDVVYALKRQGRTLYGFGG ->canonical_H4_Strongylocentrotus_purpuratus_NP_999707.1 NP_999707.1 histone: H4 variant: canonical_H4 organism: Strongylocentrotus purpuratus +LKVFLENVIRDAVTYTEHAKRKTVTAMDVVYALKRQGRTLYGFGG +>Homo|NP_778224.1|cH4_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia MSGRGKGGK-GLGKGGAKRH-RKVLRDNIQGITKPAIRRLARRGGVKRISGLIYEETRGV -LKVFLENVIRDAVTYCEHAKRKTVTAMDVVYALKRQGRTLYGFGG +LKVFLENVIRDAVTYTEHAKRKTVTAMDVVYALKRQGRTLYGFGG diff --git a/CURATED_SET/draft_seeds/H4_only.fasta b/CURATED_SET/draft_seeds/H4_only.fasta new file mode 100644 index 0000000..e69de29 diff --git a/CURATED_SET/draft_seeds/OO_H1.8.fasta b/CURATED_SET/draft_seeds/OO_H1.8.fasta new file mode 100644 index 0000000..b3c7479 --- /dev/null +++ b/CURATED_SET/draft_seeds/OO_H1.8.fasta @@ -0,0 +1,21 @@ +>Mus|NP_612184.1|OO_H1.8 organism=Mus musculus phylum=Chordata class=Mammalia +MAPGSVSSVSSSSFPSRDTSPSGSCGLPGADKP----------GPSCRRIQAGQRNPTML +HMVLEALKAREARQGTSVVAIKVYIQHKYPTVDTTRFKYLLKQALETGVRRGLLTRPAHS +KAKGATGSFKLVPKPKTK----KACAPKAGRGAAGAKETGSKKSGLLKKDQVGKATMEKG +QKRRAY-----PCKAATLEMAPKK------AKAKPKEVRKAPLKQDKAAGAPLTANG-GQ +KVKRSGSRQ---EANAHGKTKGE--KSKPLASKVQNSVASLAKRKMADMAHTVTVVQGAE +TVQETKVPTPSQDIGHKVQPIPRVRKAKTP-------------------ENTQA +>Homo|NP_001295191.1|H1.8_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +------------------------------------------------------------ +------------------------------------------------------------ +-----------------------MAPATAPRRAGEAKGKGPKKPSEAKEDPPNVGKVKKA +AKRPAKVQKPPPKPGAATEKARKQGGAAKDTRAQSGEARKVPPKPDKAMRAPSSAGGLSR +KAKAKGSRSSQGDAEAYRKTKAESKSSKPTASKVKNGAASPTKKKVVAKAKAPKAGQGP- +---NTKAAAPAKGSGSKVVPAHLSRKTEAPKGPRKAGLPIKASSSKVSSQRAEA +>Homo|NP_722575.1|H1.8_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MAPGSVTSDISPS----STSTAGSSRSPESEKPGPSHGGVPPGGPSHSSLPVGRRHPPVL +RMVLEALQAGEQRRGTSVAAIKLYILHKYPTVDVLRFKYLLKQALATGMRRGLLARPLNS +KARGATGSFKLVPKHKKKIQPRKMAPATAPRRAGEAKGKGPKKPSEAKEDPPNVGKVKKA +AKRPAKVQKPPPKPGAATEKARKQGGAAKDTRAQSGEARKVPPKPDKAMRAPSSAGGLSR +KAKAKGSRSSQGDAEAYRKTKAESKSSKPTASKVKNGAASPTKKKVVAKAKAPKAGQGP- +---NTKAAAPAKGSGSKVVPAHLSRKTEAPKGPRKAGLPIKASSSKVSSQRAEA diff --git a/CURATED_SET/draft_seeds/OO_H1.8_only.fasta b/CURATED_SET/draft_seeds/OO_H1.8_only.fasta new file mode 100644 index 0000000..6b33182 --- /dev/null +++ b/CURATED_SET/draft_seeds/OO_H1.8_only.fasta @@ -0,0 +1,7 @@ +>Mus|NP_612184.1|OO_H1.8 organism=Mus musculus phylum=Chordata class=Mammalia +MAPGSVSSVSSSSFPSRDTSPSGSCGLPGADKPGPSCRRIQAGQRNPTMLHMVLEALKAR +EARQGTSVVAIKVYIQHKYPTVDTTRFKYLLKQALETGVRRGLLTRPAHSKAKGATGSFK +LVPKPKTKKACAPKAGRGAAGAKETGSKKSGLLKKDQVGKATMEKGQKRRAYPCKAATLE +MAPKKAKAKPKEVRKAPLKQDKAAGAPLTANGGQKVKRSGSRQEANAHGKTKGEKSKPLA +SKVQNSVASLAKRKMADMAHTVTVVQGAETVQETKVPTPSQDIGHKVQPIPRVRKAKTPE +NTQA diff --git a/CURATED_SET/draft_seeds/TS H3.10.fasta b/CURATED_SET/draft_seeds/TS H3.10.fasta new file mode 100644 index 0000000..e69de29 diff --git a/CURATED_SET/draft_seeds/TS_H1.6.fasta b/CURATED_SET/draft_seeds/TS_H1.6.fasta new file mode 100644 index 0000000..bfe1622 --- /dev/null +++ b/CURATED_SET/draft_seeds/TS_H1.6.fasta @@ -0,0 +1,40 @@ +>Salmo|ACM09522.1|TS_H1.6 organism=Salmo salar phylum=Chordata class=Actinopteri +----MSGVIAIPLATPA-TTPKKRSKPKK-----------TGPTVSDRILKVVSASSGRS +GVSLAALKKSLAASGYDVVKNNARLKLAVRRLVAKGYLLQPKGTGASGSFKINKNK---- +-AVAKKKRPTKNKVKK--VGAKKVRRA--SPKKAAGAKKSPKKTKR-----KSPKKAKRP +AAAKKPKS-------------PRKTKRRVAKSTR---------AKTAPKKK +>Xenopus|NP_001087957.1|TS_H1.6 organism=Xenopus laevis phylum=Chordata class=Amphibia +MAATTESAPVAPPAEPAAAKKTKKQQPKKVAG-GAKAKKPSGPSASELIVKAVSASKERS +GVSLAALKKALAAGGYDVDKNNSRLKLALKALVTKGTLTQVKGSGASGSFKLNKKQ-PET +KDKAAKKKPAAPKAKKPAAGAKKAPKSPKKPKKVSAAAKSPKKVKKPAKAAKSPKKPKAV +KAKKVAKSPAKKATKPKTAKSPAKAKVAKPKAAK---------AKKPAPKK +>Canis|XP_005640152.1|TS_H1.6 organism=Canis lupus familiaris phylum=Chordata class=Mammalia +MSETVPAVAAGTALASM-ENPSAKKRGRKPGGIPEAAPKAPGLSVSKLIMEALSVSQERA +GMSLAALKKALAAAGYDVEKNNSRIKLCLKSLVSKGTLVQTKGTGASGSFKLNKKALLPT +PAKSRVKRPPSTKTKR--LVLSRDSKSPKAAKTNK-AKKP--GGAGAQKAACSGRKAKGA +KDKQPRKS-------------PGKAPTGKPKAAKPRLNQQKVNPRKAVSKK +>Equus|NP_001243880.1|TS_H1.6 organism=Equus caballus phylum=Chordata class=Mammalia +MSETAPAAPAEPVLSSM-EKPPAKKRGKKPVGLTGGSRKVPGSSVSKLITEALSVSQERA +GMSLAALKKALAAAGYDVEKNNRRIKLGLKSLVSKGTLVQTRGTGASGSFKLSKKA-TPE +PAKGRVKKGASANAKK--LVLPKGSKSPKSAKTNKRTSKA--RTPAAQPSARGGRKSKGA +KGKQQLKS-------------PGKGRTGKPKTGKPKLTQQRTNPRKTASKK +>Macaca|NP_001074230.1|TS_H1.6 organism=Macaca mulatta phylum=Chordata class=Mammalia +MSETVPAASAGAVPAVM-EKPLTKKRGKKPAGLTSASRKAPNLSVSKLITEALSVSQERV +GMSLAALKKALAAAGYDVEKNNSRIKLSLKSLVNKGILVQTRGTGASGSFKLSKKV-LPK +STRRKANKSASAKTKK--LVLSRDSKSPKTAKTNKRAKKP--RATAPKKAVRSGRKAKGA +KGKQQQKS-------------PVKARATKPKLTQ----HHKANIRKATSRK +>Mus|NP_034507.2|TS_H1.6 organism=Mus musculus phylum=Chordata class=Mammalia +MSETAPAASSTLVPAPVEEKPSSKRRGKKPG--LAPARKPRGFSVSKLIPEALSTSQERA +GMSLAALKKALAAAGYDVEKNNSRIKLALKRLVNKGVLVQTKGTGASGSFKLSKKA-ASG +NDKGKGKKSASAKAKK--MGLPRASRSPKSSKTKA-VKKP--KAT-PTKASGSGRKTKGA +KGVQQRKS-------------PAKARAANPNSGKAKMVMQKTDLRKAAGRK +>Rattus|NP_036711.1|TS_H1.6 organism=Rattus norvegicus phylum=Chordata class=Mammalia +MSETAPAASSTLVPAPV-EKPATKRRGKKPG--MATARKPRGFSVSKLIPEALSMSQERA +GMSLAALKKALAAAGYDVEKNNSRIKLALKRLVNKGVLVQTKGTGASGSFKLSKKA-ASG +NDKGKGKKSASAKAKK--LGLSRASRSPKSSKTKV-VKKP--KAT-PTKGSGSRRKTKGA +KGLQQRKS-------------PAKARATNSNSGKSKMVMQKTDLRKAAGRK +>Homo|NP_005314.2|H1.6_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSETVPAASASAGVAAM-EKLPTKKRGRKPAGLISASRKVPNLSVSKLITEALSVSQERV +GMSLVALKKALAAAGYDVEKNNSRIKLSLKSLVNKGILVQTRGTGASGSFKLSKKV-IPK +STRSKAKKSVSAKTKK--LVLSRDSKSPKTAKTNKRAKKP--RATTP-KTVRSGRKAKGA +KGKQQQKS-------------PVKARASKSKLTQ----HHEVNVRKATSKK diff --git a/CURATED_SET/draft_seeds/TS_H1.6_only.fasta b/CURATED_SET/draft_seeds/TS_H1.6_only.fasta new file mode 100644 index 0000000..f99ae30 --- /dev/null +++ b/CURATED_SET/draft_seeds/TS_H1.6_only.fasta @@ -0,0 +1,35 @@ +>Salmo|ACM09522.1|TS_H1.6 organism=Salmo salar phylum=Chordata class=Actinopteri +----MSGVIAIPLATPA-TTPKKRSKPKK-----------TGPTVSDRILKVVSASSGRS +GVSLAALKKSLAASGYDVVKNNARLKLAVRRLVAKGYLLQPKGTGASGSFKINKNK---- +-AVAKKKRPTKNKVKK--VGAKKVRRA--SPKKAAGAKKSPKKTKR-----KSPKKAKRP +AAAKKPKS-------------PRKTKRRVAKSTR---------AKTAPKKK +>Xenopus|NP_001087957.1|TS_H1.6 organism=Xenopus laevis phylum=Chordata class=Amphibia +MAATTESAPVAPPAEPAAAKKTKKQQPKKVAG-GAKAKKPSGPSASELIVKAVSASKERS +GVSLAALKKALAAGGYDVDKNNSRLKLALKALVTKGTLTQVKGSGASGSFKLNKKQ-PET +KDKAAKKKPAAPKAKKPAAGAKKAPKSPKKPKKVSAAAKSPKKVKKPAKAAKSPKKPKAV +KAKKVAKSPAKKATKPKTAKSPAKAKVAKPKAAK---------AKKPAPKK +>Canis|XP_005640152.1|TS_H1.6 organism=Canis lupus familiaris phylum=Chordata class=Mammalia +MSETVPAVAAGTALASM-ENPSAKKRGRKPGGIPEAAPKAPGLSVSKLIMEALSVSQERA +GMSLAALKKALAAAGYDVEKNNSRIKLCLKSLVSKGTLVQTKGTGASGSFKLNKKALLPT +PAKSRVKRPPSTKTKR--LVLSRDSKSPKAAKTNK-AKKP--GGAGAQKAACSGRKAKGA +KDKQPRKS-------------PGKAPTGKPKAAKPRLNQQKVNPRKAVSKK +>Equus|NP_001243880.1|TS_H1.6 organism=Equus caballus phylum=Chordata class=Mammalia +MSETAPAAPAEPVLSSM-EKPPAKKRGKKPVGLTGGSRKVPGSSVSKLITEALSVSQERA +GMSLAALKKALAAAGYDVEKNNRRIKLGLKSLVSKGTLVQTRGTGASGSFKLSKKA-TPE +PAKGRVKKGASANAKK--LVLPKGSKSPKSAKTNKRTSKA--RTPAAQPSARGGRKSKGA +KGKQQLKS-------------PGKGRTGKPKTGKPKLTQQRTNPRKTASKK +>Macaca|NP_001074230.1|TS_H1.6 organism=Macaca mulatta phylum=Chordata class=Mammalia +MSETVPAASAGAVPAVM-EKPLTKKRGKKPAGLTSASRKAPNLSVSKLITEALSVSQERV +GMSLAALKKALAAAGYDVEKNNSRIKLSLKSLVNKGILVQTRGTGASGSFKLSKKV-LPK +STRRKANKSASAKTKK--LVLSRDSKSPKTAKTNKRAKKP--RATAPKKAVRSGRKAKGA +KGKQQQKS-------------PVKARATKPKLTQ----HHKANIRKATSRK +>Mus|NP_034507.2|TS_H1.6 organism=Mus musculus phylum=Chordata class=Mammalia +MSETAPAASSTLVPAPVEEKPSSKRRGKKPG--LAPARKPRGFSVSKLIPEALSTSQERA +GMSLAALKKALAAAGYDVEKNNSRIKLALKRLVNKGVLVQTKGTGASGSFKLSKKA-ASG +NDKGKGKKSASAKAKK--MGLPRASRSPKSSKTKA-VKKP--KAT-PTKASGSGRKTKGA +KGVQQRKS-------------PAKARAANPNSGKAKMVMQKTDLRKAAGRK +>Rattus|NP_036711.1|TS_H1.6 organism=Rattus norvegicus phylum=Chordata class=Mammalia +MSETAPAASSTLVPAPV-EKPATKRRGKKPG--MATARKPRGFSVSKLIPEALSMSQERA +GMSLAALKKALAAAGYDVEKNNSRIKLALKRLVNKGVLVQTKGTGASGSFKLSKKA-ASG +NDKGKGKKSASAKAKK--LGLSRASRSPKSSKTKV-VKKP--KAT-PTKGSGSRRKTKGA +KGLQQRKS-------------PAKARATNSNSGKSKMVMQKTDLRKAAGRK diff --git a/CURATED_SET/draft_seeds/TS_H1.7.fasta b/CURATED_SET/draft_seeds/TS_H1.7.fasta new file mode 100644 index 0000000..f5fbcb3 --- /dev/null +++ b/CURATED_SET/draft_seeds/TS_H1.7.fasta @@ -0,0 +1,18 @@ +>Mus|NP_081580.2|TS_H1.7 organism=Mus musculus phylum=Chordata class=Mammalia +---------------------MAEAVQPSGESQGAELTIQIQQPAERALRTPAKRGTQSV +LRVSQLLLRAIAGHQHLTLDALKKELGNAGYEVRREISSHHEGKSTRLEKGTLLRVSGSD +AAGYFRVWKISKPREKAGQSRLTLGSHSSGKTVLKSPRPLRPRSR--RKAAKKAREVWRR +KARALKARSRRVRTRSTSGARSRTRSRASSRATSRATSRARSRARSRAQSSARSSARSSA +KSSAKSSTRSSAKSWARSKARSRARSRAKDLVRSKAREQAQAREQARARAREQAHARART +QDWVRAKAQEFVSAKEQQYVRAKEQERAKAREQVRIGARDEARIKAKDYNRVRPTKEDTS +PRPAEEKSSNSKLREEKGQEPERPVKQTIQKPALDNAPSIQGKACTKSFTKSGQPGDTES +P +>Homo|NP_861453.1|H1.7_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MEQALTGEAQSRWPRRGGSGAMAEAPGPSGESRGHSAT---QLPAEKTVGGPSRGCSSSV +LRVSQLVLQAISTHKGLTLAALKKELRNAGYEVRRKSGRHEAPRGQ--AKATLLRVSGSD +AAGYFRVWKVPKPRRKPGRARQEEGTRAPWRTP-AAPRSSRRRRQPLRKAARKAREVWRR +NARA----------------------------------KAKANARAR------------- +------------------------RTR-----------------RARPRAKEPPCAR--- +-------------------------------------AKEEAGATAADEGRGQAVKEDTT +PRSGKDKRRSSKPREEK-QEPKKPAQRTIQ------------------------------ +- diff --git a/CURATED_SET/draft_seeds/TS_H1.7_only.fasta b/CURATED_SET/draft_seeds/TS_H1.7_only.fasta new file mode 100644 index 0000000..862395a --- /dev/null +++ b/CURATED_SET/draft_seeds/TS_H1.7_only.fasta @@ -0,0 +1,8 @@ +>Mus|NP_081580.2|TS_H1.7 organism=Mus musculus phylum=Chordata class=Mammalia +MAEAVQPSGESQGAELTIQIQQPAERALRTPAKRGTQSVLRVSQLLLRAIAGHQHLTLDA +LKKELGNAGYEVRREISSHHEGKSTRLEKGTLLRVSGSDAAGYFRVWKISKPREKAGQSR +LTLGSHSSGKTVLKSPRPLRPRSRRKAAKKAREVWRRKARALKARSRRVRTRSTSGARSR +TRSRASSRATSRATSRARSRARSRAQSSARSSARSSAKSSAKSSTRSSAKSWARSKARSR +ARSRAKDLVRSKAREQAQAREQARARAREQAHARARTQDWVRAKAQEFVSAKEQQYVRAK +EQERAKAREQVRIGARDEARIKAKDYNRVRPTKEDTSPRPAEEKSSNSKLREEKGQEPER +PVKQTIQKPALDNAPSIQGKACTKSFTKSGQPGDTESP diff --git a/CURATED_SET/draft_seeds/TS_H1.9.fasta b/CURATED_SET/draft_seeds/TS_H1.9.fasta new file mode 100644 index 0000000..cf616c7 --- /dev/null +++ b/CURATED_SET/draft_seeds/TS_H1.9.fasta @@ -0,0 +1,15 @@ +>Mus|NP_061262.1|TS_H1.9 organism=Mus musculus phylum=Chordata class=Mammalia +-------------MAQMVAGDQDAGTLWVPSQSESQTESDISTQSLRKPTMSYVILKTLA +DKRVHNCVSLATLKKAVSITGYNMTHNTWRFKRVLQNLLDKGMIMHVTCCKGASGSLCLC +KERALKSNHRAKRCQDRQKSQKPQKPGQRESEPCQLLLSSKKKNDQLFKGVRRVAKGNRH +CHY +>Peromyscus|ACI22865.1|TS_H1.9 organism=Peromyscus californicus insignis phylum=Chordata class=Mammalia +MQRDTLLVSPSAAPNSAVAVDQDASTSDDPSKSE--TGPYTCPQTMRKPSMSKVILRAVT +DKGLHRRVSLAALKKAVSTTGYNMAHNSWRFKRVVKNLVKKGMLKQVT-GKGASGSFRLG +KKQAFKSKRKARR---RQRRQQRQKPRQRRSGPRQSLLGSGRSLKGLF------------ +--- +>Rattus|NP_001103035.1|TS_H1.9 organism=Rattus norvegicus phylum=Chordata class=Mammalia +----MSLVSPSPDSNAVMAGDQDASTSQVPSQSESKIGPNVATQTLRKPTMSKVILRTVA +DKGVHSRVSLAALKKAVSITGYNMAQNTWRFKRVLQNLVKKGMLKQVT-GKGASGSFRLG +KKQAFKSKCKAKR---RQRRQK---PGQRRTGSRRSLLGSKKSNNRLFKGVRRVAKGRRH +--- diff --git a/CURATED_SET/draft_seeds/Viral.fasta b/CURATED_SET/draft_seeds/Viral.fasta new file mode 100644 index 0000000..e69de29 diff --git a/CURATED_SET/draft_seeds/cH1.fasta b/CURATED_SET/draft_seeds/cH1.fasta new file mode 100644 index 0000000..e69de29 diff --git a/CURATED_SET/draft_seeds/cH2A.10_(Homo_sapiens).fasta b/CURATED_SET/draft_seeds/cH2A.10_(Homo_sapiens).fasta new file mode 100644 index 0000000..f6a6fe4 --- /dev/null +++ b/CURATED_SET/draft_seeds/cH2A.10_(Homo_sapiens).fasta @@ -0,0 +1,4 @@ +>Homo|NP_778235.1|cH2A.10_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKSRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAVRNDEELNKLLGGVTIAQGGVLPNIQAVLLPKK +TESHKPGKNK diff --git a/CURATED_SET/draft_seeds/cH2A.11_(Homo_sapiens).fasta b/CURATED_SET/draft_seeds/cH2A.11_(Homo_sapiens).fasta new file mode 100644 index 0000000..b213da8 --- /dev/null +++ b/CURATED_SET/draft_seeds/cH2A.11_(Homo_sapiens).fasta @@ -0,0 +1,4 @@ +>Homo|NP_254280.1|cH2A.11_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKSRSSRAGLQFPVGRVHRLLRKGNYSERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGRVTIAQGGVLPNIQAVLLPKK +TESHHKAKGK diff --git a/CURATED_SET/draft_seeds/cH2A.1_(Homo_sapiens).fasta b/CURATED_SET/draft_seeds/cH2A.1_(Homo_sapiens).fasta new file mode 100644 index 0000000..d9a241f --- /dev/null +++ b/CURATED_SET/draft_seeds/cH2A.1_(Homo_sapiens).fasta @@ -0,0 +1,4 @@ +>Homo|NP_734466.1|cH2A.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKQGGKARAKSKSRSSRAGLQFPVGRIHRLLRKGNYAERIGAGAPVYLAAVLEYLT +AEILELAGNASRDNKKTRIIPRHLQLAIRNDEELNKLLGGVTIAQGGVLPNIQAVLLPKK +TESHHHKAQSK diff --git a/CURATED_SET/draft_seeds/cH2A.1_(Mus_musculus).fasta b/CURATED_SET/draft_seeds/cH2A.1_(Mus_musculus).fasta new file mode 100644 index 0000000..0f7a65c --- /dev/null +++ b/CURATED_SET/draft_seeds/cH2A.1_(Mus_musculus).fasta @@ -0,0 +1,4 @@ +>Mus|NP_783589.1|cH2A.1_(Mus_musculus) organism=Mus musculus phylum=Chordata class=Mammalia +MSGPTKRGGKARAKVKSRSSRAGLQFPVGRVHRLLRQGNYAQRIGAGAPVYLAAVLEYLT +AEVLELAGNAARDNKKTRITPRHLQLAIRNDEELNKLLGRVTIAQGGVLPNIQAVLLPKK +TESHKSQTK diff --git a/CURATED_SET/draft_seeds/cH2A.2_(Homo_sapiens).fasta b/CURATED_SET/draft_seeds/cH2A.2_(Homo_sapiens).fasta new file mode 100644 index 0000000..805920f --- /dev/null +++ b/CURATED_SET/draft_seeds/cH2A.2_(Homo_sapiens).fasta @@ -0,0 +1,8 @@ +>Homo|NP_003504.2|cH2A.2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYSERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGRVTIAQGGVLPNIQAVLLPKK +TESHHKAKGK +>Homo|NP_066390.1|cH2A.2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYSERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGRVTIAQGGVLPNIQAVLLPKK +TESHHKAKGK diff --git a/CURATED_SET/draft_seeds/cH2A.3_(Homo_sapiens).fasta b/CURATED_SET/draft_seeds/cH2A.3_(Homo_sapiens).fasta new file mode 100644 index 0000000..74e483e --- /dev/null +++ b/CURATED_SET/draft_seeds/cH2A.3_(Homo_sapiens).fasta @@ -0,0 +1,4 @@ +>Homo|NP_003503.1|cH2A.3_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKSRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGRVTIAQGGVLPNIQAVLLPKK +TESHHKAKGK diff --git a/CURATED_SET/draft_seeds/cH2A.4_(Homo_sapiens).fasta b/CURATED_SET/draft_seeds/cH2A.4_(Homo_sapiens).fasta new file mode 100644 index 0000000..aa09ed4 --- /dev/null +++ b/CURATED_SET/draft_seeds/cH2A.4_(Homo_sapiens).fasta @@ -0,0 +1,4 @@ +>Homo|NP_066409.1|cH2A.4_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYSERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGKVTIAQGGVLPNIQAVLLPKK +TESHHKAKGK diff --git a/CURATED_SET/draft_seeds/cH2A.5_(Homo_sapiens).fasta b/CURATED_SET/draft_seeds/cH2A.5_(Homo_sapiens).fasta new file mode 100644 index 0000000..83fe003 --- /dev/null +++ b/CURATED_SET/draft_seeds/cH2A.5_(Homo_sapiens).fasta @@ -0,0 +1,20 @@ +>Homo|NP_003500.1|cH2A.5_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGKVTIAQGGVLPNIQAVLLPKK +TESHHKAKGK +>Homo|NP_003501.1|cH2A.5_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGKVTIAQGGVLPNIQAVLLPKK +TESHHKAKGK +>Homo|NP_003502.1|cH2A.5_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGKVTIAQGGVLPNIQAVLLPKK +TESHHKAKGK +>Homo|NP_003505.1|cH2A.5_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGKVTIAQGGVLPNIQAVLLPKK +TESHHKAKGK +>Homo|NP_066408.1|cH2A.5_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGKVTIAQGGVLPNIQAVLLPKK +TESHHKAKGK diff --git a/CURATED_SET/draft_seeds/cH2A.6_(Homo_sapiens).fasta b/CURATED_SET/draft_seeds/cH2A.6_(Homo_sapiens).fasta new file mode 100644 index 0000000..344abc0 --- /dev/null +++ b/CURATED_SET/draft_seeds/cH2A.6_(Homo_sapiens).fasta @@ -0,0 +1,4 @@ +>Homo|NP_542163.1|cH2A.6_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGKVTIAQGGVLPNIQAVLLPKK +TESHHKAK diff --git a/CURATED_SET/draft_seeds/cH2A.7_(Homo_sapiens).fasta b/CURATED_SET/draft_seeds/cH2A.7_(Homo_sapiens).fasta new file mode 100644 index 0000000..0924821 --- /dev/null +++ b/CURATED_SET/draft_seeds/cH2A.7_(Homo_sapiens).fasta @@ -0,0 +1,4 @@ +>Homo|NP_066544.1|cH2A.7_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGKVTIAQGGVLPNIQAVLLPKK +TESHHKTK diff --git a/CURATED_SET/draft_seeds/cH2A.8_(Homo_sapiens).fasta b/CURATED_SET/draft_seeds/cH2A.8_(Homo_sapiens).fasta new file mode 100644 index 0000000..e906ad0 --- /dev/null +++ b/CURATED_SET/draft_seeds/cH2A.8_(Homo_sapiens).fasta @@ -0,0 +1,8 @@ +>Homo|NP_001035807.1|cH2A.8_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKSRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYMAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGKVTIAQGGVLPNIQAVLLPKK +TESHHKAKGK +>Homo|NP_003507.1|cH2A.8_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKSRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYMAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGKVTIAQGGVLPNIQAVLLPKK +TESHHKAKGK diff --git a/CURATED_SET/draft_seeds/cH2A.9_(Homo_sapiens).fasta b/CURATED_SET/draft_seeds/cH2A.9_(Homo_sapiens).fasta new file mode 100644 index 0000000..079aa60 --- /dev/null +++ b/CURATED_SET/draft_seeds/cH2A.9_(Homo_sapiens).fasta @@ -0,0 +1,4 @@ +>Homo|NP_003508.1|cH2A.9_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKSRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYMAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGKVTIAQGGVLPNIQAVLLPKK +TESHKAKSK diff --git a/CURATED_SET/draft_seeds/cH2A.fasta b/CURATED_SET/draft_seeds/cH2A.fasta new file mode 100644 index 0000000..2df76bf --- /dev/null +++ b/CURATED_SET/draft_seeds/cH2A.fasta @@ -0,0 +1,228 @@ +>Ixodes|EEC09557.1|cH2A_(Animals) organism=Ixodes scapularis phylum=Arthropoda class=Arachnida +---------MSGRGK--GGKVK--GKSK---TRSSRAGLQFPVGRIHRLLRKGNYAERVG +AGAPVYLAAVLEYLAAEVLELAGNAA--RDNKKTRIIPRHLQLAIRNDEELNKLLSGVTI +AQGGVLPNIQAVLLPKKTEK-----KS------------- +>Apis|XP_001119899.1|cH2A_(Animals) organism=Apis mellifera phylum=Arthropoda class=Insecta +---------MSGRGK--GGKAK--AKAK---SRSNRAGLQFPVGRIHRLLRKGNYAERVG +AGAPVYLAAVMEYLAAEVLELAGNAA--RDNKKTRIIPRHLQLAIRNDEELNKLLSGVTI +AQGGVLPNIQAVLLPKKTEK-----KA------------- +>Drosophila|NP_724343.1|cH2A_(Animals) organism=Drosophila melanogaster phylum=Arthropoda class=Insecta +---------MSGRGK--GGKVK--GKAK---SRSNRAGLQFPVGRIHRLLRKGNYAERVG +AGAPVYLAAVMEYLAAEVLELAGNAA--RDNKKTRIIPRHLQLAIRNDEELNKLLSGVTI +AQGGVLPNIQAVLLPKKTEK-----KA------------- +>Nematostella|EDO48405.1|cH2A_(Animals) organism=Nematostella vectensis phylum=Cnidaria class=Anthozoa +---------MSGRGK--GKAKG--TKSK---TRSSRAGLQFPVGRIHRHLRKGNYAERVG +AGAPVYMAAVLEYLSAEILELAGNAA--RDNKKTRIIPRHLQLAVRNDEELNRLLHGVTI +AQGGVLPNIQAVLLPKKTEK-----KAKA----------- +>Caenorhabditis|NP_505463.1|cH2A_(Animals) organism=Caenorhabditis elegans phylum=Nematoda class=Chromadorea +---------MSGRGK--GGKAKTGGKAK---SRSSRAGLQFPVGRLHRILRKGNYAQRVG +AGAPVYLAAVLEYLAAEVLELAGNAA--RDNKKTRIAPRHLQLAVRNDEELNKLLAGVTI +AQGGVLPNIQAVLLPKKTAG-----DKE------------ +>Danio|XP_009296490.1|cH2A_(Vertebrata) organism=Danio rerio phylum=Chordata class=Actinopteri +---------MSGRGK-TGGKAR--AKAK---TRSSRAGLQFPVGRVHRLLRKGNYAERVG +AGAPVYLAAVLEYLTAEILELAGNAA--RDNKKTRIIPRHLQLAVRNDEELNKLLGGVTI +AQGGVLPNIQAVLLPKKTE---KAAKGK------------ +>Xenopus|NP_001087948.1|cH2A_(Vertebrata) organism=Xenopus laevis phylum=Chordata class=Amphibia +---------MSGRGK-QGGKTR--AKAK---TRSSRAGLQFPVGRVHRLLRKGNYAERVG +AGAPVYLAAVLEYLTAEILELAGNAA--RDNKKTRIIPRHLQLAVRNDEELNKLLGRVTI +AQGGVLPNIQSVLLPKKTES-SKSAKSK------------ +>Gallus|NP_001072943.1|cH2A_(Vertebrata) organism=Gallus gallus phylum=Chordata class=Aves +---------MSGRGK-QGGKAR--AKAK---SRSSRAGLQFPVGRVHRLLRKGNYAERVG +AGAPVYLAAVLEYLTAEILELAGNAA--RDNKKTRIIPRHLQLAIRNDEELNKLLGKVTI +AQGGVLPNIQAVLLPKKTD--SHKAKAK------------ +>Ailuropoda|XP_011215280.1|cH2A_(Mammalia) organism=Ailuropoda melanoleuca phylum=Chordata class=Mammalia +---------MSGRGK-QGGKAR--AKAK---SRSSRAGLQFPVGRVHRLLRKGNYAERVG +AGAPVYLAAVLEYLTAEILELAGNAA--RDNKKTRIIPRHLQLAIRNDEELNKLLGRVTI +AQGGVLPNIQAVLLPKKTES-HHKAKGK------------ +>Bos|NP_001192525.1|cH2A_(Mammalia) organism=Bos taurus phylum=Chordata class=Mammalia +---------MSGRGK-QGGKAR--AKAK---SRSSRAGLQFPVGRVHRLLRKGNYAERVG +AGAPVYLAAVLEYLTAEILELAGNAA--RDNKKTRIIPRHLQLAIRNDEELNKLLGRVTI +AQGGVLPNIQAVLLPKKTES-HHKAKGK------------ +>Callithrix|XP_008992112.1|cH2A_(Mammalia) organism=Callithrix jacchus phylum=Chordata class=Mammalia +---------MSGRGK-QGGKVR--AKAK---SRSSRAGLQFPVGRIHRLLRKGKYADRIG +AGAPVYLAAVLEYLTAEILELAGNAS--RDNKKTRIIPRHLQLAIRNDEELNKLLGGVTI +AQGGVLPNIQAVLLPKKTESHHHKSQSK------------ +>Canis|XP_005640150.1|cH2A_(Mammalia) organism=Canis lupus familiaris phylum=Chordata class=Mammalia +---------MSGRGK-QGGKAR--AKAK---SRSSRAGLQFPVGRVHRLLRKGNYAERVG +AGAPVYLAAVLEYLTAEILELAGNAA--RDNKKTRIIPRHLQLAIRNDEELNKLLGRVTI +AQGGVLPNIQAVLLPKKTES-HHKAKGK------------ +>Cavia|XP_003478913.1|cH2A_(Mammalia) organism=Cavia porcellus phylum=Chordata class=Mammalia +---------MSGRGK-QGGKAR--AKAK---SRSSRAGLQFPVGRVHRLLRKGNYAERVG +AGAPVYMAAVLEYLTAEILELAGNAA--RDNKKTRIIPRHLQLAIRNDEELNKLLGKVTI +AQGGVLPNIQAVLLPKKTES-HHKAKGK------------ +>Cricetulus|XP_007634672.1|cH2A_(Mammalia) organism=Cricetulus griseus phylum=Chordata class=Mammalia +---------MSGRGK-QGGKAR--AKAK---TRSSRAGLQFPVGRVHRLLRKGNYSERVG +AGAPVYLAAVLEYLTAEILELAGNAA--RDNKKTRIIPRHLQLAIRNDEELNKLLGRVTI +AQGGVLPNIQAVLLPKKTES-HHKAKGK------------ +>Heterocephalus|XP_004847876.1|cH2A_(Mammalia) organism=Heterocephalus glaber phylum=Chordata class=Mammalia +---------MSGRGK-QGGKAR--AKAK---TRSSRAGLQFPVGRVHRLLRKGNYAERVG +AGAPVYLAAVLEYLTAEILELAGNAA--RDNKKTRIIPRHLQLAIRNDEELNKLLGKVTI +AQGGVLPNIQAVLLPKKTES-HHKAKGK------------ +>Loxodonta|XP_003422330.1|cH2A_(Mammalia) organism=Loxodonta africana phylum=Chordata class=Mammalia +---------MSGRGK-QGGKAR--AKAK---TRSSRAGLQFPVGRVHRLLRKGNYAERVG +AGAPVYLAAVLEYLTAEILELAGNAA--RDNKKTRIIPRHLQLAIRNDEELNKLLGKVTI +AQGGVLPNIQAVLLPKKTES-HHKAKGK------------ +>Macaca|NP_001180653.1|cH2A_(Mammalia) organism=Macaca mulatta phylum=Chordata class=Mammalia +---------MSGRGK-QGGKAR--AKAK---TRSSRAGLQFPVGRVHRLLRKGNYSERVG +AGAPVYLAAVLEYLTAEILELAGNAA--RDNKKTRIIPRHLQLAIRNDEELNKLLGRVTI +AQGGVLPNIQAVLLPKKTES-HHKAKGK------------ +>Monodelphis|XP_007485414.1|cH2A_(Mammalia) organism=Monodelphis domestica phylum=Chordata class=Mammalia +---------MSGRGK-QGGKAR--AKAK---SRSSRAGLQFPVGRVHRLLRKGNYAERVG +AGAPVYMAAVLEYLTAEILELAGNAA--RDNKKTRIIPRHLQLAIRNDEELNKLLGKVTI +AQGGVLPNIQAVLLPKKTES-HHKTKGK------------ +>Mus|NP_835489.1|cH2A_(Mammalia) organism=Mus musculus phylum=Chordata class=Mammalia +---------MSGRGK-QGGKAR--AKAK---TRSSRAGLQFPVGRVHRLLRKGNYSERVG +AGAPVYLAAVLEYLTAEILELAGNAA--RDNKKTRIIPRHLQLAIRNDEELNKLLGRVTI +AQGGVLPNIQAVLLPKKTES-HHKAKGK------------ +>Nomascus|XP_004088806.1|cH2A_(Mammalia) organism=Nomascus leucogenys phylum=Chordata class=Mammalia +---------MSGRGK-QGGKAR--AKAK---SRSSRAGLQFPVGRVHRLLRKGNYAERVG +AGAPVYLAAVLEYLTAEILELAGNAA--RDNKKTRIIPRHLQLAIRNDEELNKLLGRVTI +AQGGVLPNIQAVLLPKKTES-HHKAKGK------------ +>Oryctolagus|XP_008246690.1|cH2A_(Mammalia) organism=Oryctolagus cuniculus phylum=Chordata class=Mammalia +---------MSGRGK-QGGKAR--AKAK---TRSSRAGLQFPVGRVHRLLRKGNYAERVG +AGAPVYLAAVLEYLTAEILELAGNAA--RDNKKTRIIPRHLQLAIRNDEELNKLLGKVTI +AQGGVLPNIQAVLLPKKTES-HHKAKGK------------ +>Pan|XP_009448979.1|cH2A_(Mammalia) organism=Pan troglodytes phylum=Chordata class=Mammalia +---------MSGRGK-QGGKAR--AKAK---TRSSRAGLQFPVGRVHRLLRKGNYAERVG +AGAPVYLAAVLEYLTAEILELAGNAA--RDNKKTRIIPRHLQLAIRNDEELNKLLGKVTI +AQGGVLPNIQAVLLPKKTES-HHKAKGKA----------- +>Rattus|NP_068612.2|cH2A_(Mammalia) organism=Rattus norvegicus phylum=Chordata class=Mammalia +---------MSGRGK-QGGKAR--AKAK---SRSSRAGLQFPVGRVHRLLRKGNYSERVG +AGAPVYLAAVLEYLTAEILELAGNAA--RDNKKTRIIPRHLQLAIRNDEELNKLLGRVTI +AQGGVLPNIQAVLLPKKTES-HHKAKGK------------ +>Sus|XP_003356618.1|cH2A_(Mammalia) organism=Sus scrofa phylum=Chordata class=Mammalia +---------MSGRGK-QGGKAR--AKAK---TRSSRAGLQFPVGRVHRLLRKGNYAERVG +AGAPVYLAAVLEYLTAEILELAGNAA--RDNKKTRIIPRHLQLAIRNDEELNKLLGKVTI +AQGGVLPNIQAVLLPKKTES-HHKAKGK------------ +>Homo|NP_778235.1|cH2A.10_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +---------MSGRGK-QGGKAR--AKAK---SRSSRAGLQFPVGRVHRLLRKGNYAERVG +AGAPVYLAAVLEYLTAEILELAGNAA--RDNKKTRIIPRHLQLAVRNDEELNKLLGGVTI +AQGGVLPNIQAVLLPKKTES-HKPGKNK------------ +>Homo|NP_254280.1|cH2A.11_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +---------MSGRGK-QGGKAR--AKAK---SRSSRAGLQFPVGRVHRLLRKGNYSERVG +AGAPVYLAAVLEYLTAEILELAGNAA--RDNKKTRIIPRHLQLAIRNDEELNKLLGRVTI +AQGGVLPNIQAVLLPKKTES-HHKAKGK------------ +>Homo|NP_734466.1|cH2A.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +---------MSGRGK-QGGKAR--AKSK---SRSSRAGLQFPVGRIHRLLRKGNYAERIG +AGAPVYLAAVLEYLTAEILELAGNAS--RDNKKTRIIPRHLQLAIRNDEELNKLLGGVTI +AQGGVLPNIQAVLLPKKTESHHHKAQSK------------ +>Homo|NP_003504.2|cH2A.2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +---------MSGRGK-QGGKAR--AKAK---TRSSRAGLQFPVGRVHRLLRKGNYSERVG +AGAPVYLAAVLEYLTAEILELAGNAA--RDNKKTRIIPRHLQLAIRNDEELNKLLGRVTI +AQGGVLPNIQAVLLPKKTES-HHKAKGK------------ +>Homo|NP_066390.1|cH2A.2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +---------MSGRGK-QGGKAR--AKAK---TRSSRAGLQFPVGRVHRLLRKGNYSERVG +AGAPVYLAAVLEYLTAEILELAGNAA--RDNKKTRIIPRHLQLAIRNDEELNKLLGRVTI +AQGGVLPNIQAVLLPKKTES-HHKAKGK------------ +>Homo|NP_003503.1|cH2A.3_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +---------MSGRGK-QGGKAR--AKAK---SRSSRAGLQFPVGRVHRLLRKGNYAERVG +AGAPVYLAAVLEYLTAEILELAGNAA--RDNKKTRIIPRHLQLAIRNDEELNKLLGRVTI +AQGGVLPNIQAVLLPKKTES-HHKAKGK------------ +>Homo|NP_066409.1|cH2A.4_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +---------MSGRGK-QGGKAR--AKAK---TRSSRAGLQFPVGRVHRLLRKGNYSERVG +AGAPVYLAAVLEYLTAEILELAGNAA--RDNKKTRIIPRHLQLAIRNDEELNKLLGKVTI +AQGGVLPNIQAVLLPKKTES-HHKAKGK------------ +>Homo|NP_003500.1|cH2A.5_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +---------MSGRGK-QGGKAR--AKAK---TRSSRAGLQFPVGRVHRLLRKGNYAERVG +AGAPVYLAAVLEYLTAEILELAGNAA--RDNKKTRIIPRHLQLAIRNDEELNKLLGKVTI +AQGGVLPNIQAVLLPKKTES-HHKAKGK------------ +>Homo|NP_003501.1|cH2A.5_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +---------MSGRGK-QGGKAR--AKAK---TRSSRAGLQFPVGRVHRLLRKGNYAERVG +AGAPVYLAAVLEYLTAEILELAGNAA--RDNKKTRIIPRHLQLAIRNDEELNKLLGKVTI +AQGGVLPNIQAVLLPKKTES-HHKAKGK------------ +>Homo|NP_003502.1|cH2A.5_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +---------MSGRGK-QGGKAR--AKAK---TRSSRAGLQFPVGRVHRLLRKGNYAERVG +AGAPVYLAAVLEYLTAEILELAGNAA--RDNKKTRIIPRHLQLAIRNDEELNKLLGKVTI +AQGGVLPNIQAVLLPKKTES-HHKAKGK------------ +>Homo|NP_003505.1|cH2A.5_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +---------MSGRGK-QGGKAR--AKAK---TRSSRAGLQFPVGRVHRLLRKGNYAERVG +AGAPVYLAAVLEYLTAEILELAGNAA--RDNKKTRIIPRHLQLAIRNDEELNKLLGKVTI +AQGGVLPNIQAVLLPKKTES-HHKAKGK------------ +>Homo|NP_066408.1|cH2A.5_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +---------MSGRGK-QGGKAR--AKAK---TRSSRAGLQFPVGRVHRLLRKGNYAERVG +AGAPVYLAAVLEYLTAEILELAGNAA--RDNKKTRIIPRHLQLAIRNDEELNKLLGKVTI +AQGGVLPNIQAVLLPKKTES-HHKAKGK------------ +>Homo|NP_542163.1|cH2A.6_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +---------MSGRGK-QGGKAR--AKAK---TRSSRAGLQFPVGRVHRLLRKGNYAERVG +AGAPVYLAAVLEYLTAEILELAGNAA--RDNKKTRIIPRHLQLAIRNDEELNKLLGKVTI +AQGGVLPNIQAVLLPKKTES-HHKAK-------------- +>Homo|NP_066544.1|cH2A.7_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +---------MSGRGK-QGGKAR--AKAK---TRSSRAGLQFPVGRVHRLLRKGNYAERVG +AGAPVYLAAVLEYLTAEILELAGNAA--RDNKKTRIIPRHLQLAIRNDEELNKLLGKVTI +AQGGVLPNIQAVLLPKKTES-HHKTK-------------- +>Homo|NP_001035807.1|cH2A.8_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +---------MSGRGK-QGGKAR--AKAK---SRSSRAGLQFPVGRVHRLLRKGNYAERVG +AGAPVYMAAVLEYLTAEILELAGNAA--RDNKKTRIIPRHLQLAIRNDEELNKLLGKVTI +AQGGVLPNIQAVLLPKKTES-HHKAKGK------------ +>Homo|NP_003507.1|cH2A.8_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +---------MSGRGK-QGGKAR--AKAK---SRSSRAGLQFPVGRVHRLLRKGNYAERVG +AGAPVYMAAVLEYLTAEILELAGNAA--RDNKKTRIIPRHLQLAIRNDEELNKLLGKVTI +AQGGVLPNIQAVLLPKKTES-HHKAKGK------------ +>Homo|NP_003508.1|cH2A.9_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +---------MSGRGK-QGGKAR--AKAK---SRSSRAGLQFPVGRVHRLLRKGNYAERVG +AGAPVYMAAVLEYLTAEILELAGNAA--RDNKKTRIIPRHLQLAIRNDEELNKLLGKVTI +AQGGVLPNIQAVLLPKKTES--HKAKSK------------ +>Mus|NP_783589.1|cH2A.1_(Mus_musculus) organism=Mus musculus phylum=Chordata class=Mammalia +---------MSGPTK-RGGKAR--AKVK---SRSSRAGLQFPVGRVHRLLRQGNYAQRIG +AGAPVYLAAVLEYLTAEVLELAGNAA--RDNKKTRITPRHLQLAIRNDEELNKLLGRVTI +AQGGVLPNIQAVLLPKKTES--HKSQTK------------ +>Encephalitozoon|NP_584598.1|cH2A_(Fungi) organism=Encephalitozoon cuniculi GB-M1 phylum=Microsporidia class= +---------MVVIQG-KGGKADPRVIGK---DEEHQKSI-VKLSQIKKIMKDRTRM-RIS +KDALVAVSACVMYLISEITDGAKNVA--STDGKKKVMPKHINNAICNDTELHFVGHDWLI +KNGG----MKSYIAPGDFAVSSKKGSSRD----------- +>Encephalitozoon|HISTDB_cH2A_0|cH2A_(Fungi) organism=Encephalitozoon intestinalis phylum=Microsporidia class= +---------MAMVQG-KGGKADPRVMGK---DEEHQKSI-VKLSQIKKIMKDRTRM-RIS +KDALIGVSACVMYLISEITDGAKNVA--NTDGKKKVIPKHINHAICNDTELHFVGHDWLI +KNGG----MKSYISPGDFSVSSKKGGSRD----------- +>Ostreococcus|XP_003080758.2|cH2A_(Chlorophyta) organism=Ostreococcus tauri phylum=Chlorophyta class=Mamiellophyceae +---------MSGRGK---GKTG--KKAM---SRSAKAGLQFPVGRVARYLKQGKYATRVG +AGAPVYLAAVLEYLAAEVLELAGNAS--RDNKKSRIVPRHIQLAIRNDEELSKLLGTVTI +ASGGVLPNIHSVLLPKKSKK-------------------- +>Arabidopsis|NP_001190852.1|cH2A_(Embryophyta) organism=Arabidopsis thaliana phylum=Streptophyta class=Magnoliopsida +---------MAGRGKQLGSGAA--KKST---SRSSKAGLQFPVGRIARFLKAGKYAERVG +AGAPVYLAAVLEYLAAEVLELAGNAA--RDNKKTRIVPRHIQLAVRNDEELSKLLGDVTI +ANGGVMPNIHNLLLPKKAGSSKPTEED------------- +>Arabidopsis|NP_175517.1|cH2A_(Embryophyta) organism=Arabidopsis thaliana phylum=Streptophyta class=Magnoliopsida +---------MAGRGKTLGSGSA--KKAT---TRSSKAGLQFPVGRIARFLKKGKYAERVG +AGAPVYLAAVLEYLAAEVLELAGNAA--RDNKKTRIVPRHIQLAVRNDEELSKLLGDVTI +ANGGVMPNIHNLLLPKKTGASKPSAEDD------------ +>Arabidopsis|NP_188703.1|cH2A_(Embryophyta) organism=Arabidopsis thaliana phylum=Streptophyta class=Magnoliopsida +---------MAGRGKTLGSGVA--KKST---SRSSKAGLQFPVGRIARFLKNGKYATRVG +AGAPVYLAAVLEYLAAEVLELAGNAA--RDNKKTRIVPRHIQLAVRNDEELSKLLGDVTI +ANGGVMPNIHSLLLPKKAGASKPSADED------------ +>Arabidopsis|NP_200275.1|cH2A_(Embryophyta) organism=Arabidopsis thaliana phylum=Streptophyta class=Magnoliopsida +---------MAGRGKTLGSGGA--KKAT---SRSSKAGLQFPVGRIARFLKAGKYAERVG +AGAPVYLAAVLEYLAAEVLELAGNAA--RDNKKTRIVPRHIQLAVRNDEELSKLLGDVTI +ANGGVMPNIHNLLLPKKAGASKPQED-------------- +>Beta|XP_010685819.1|cH2A_(Embryophyta) organism=Beta vulgaris subsp. vulgaris phylum=Streptophyta class=Magnoliopsida +MDSTAGGKAKKGAGGRKGGGPK--KKPV---SRSVKAGLQFPVGRIGRYLKKGRYAQRVG +TGAPVYLAAVLEYLAAEVLELAGNAA--RDNKKNRIIPRHVLLAVRNDDELGKLLSGVTI +AHGGVLPNINPVLLPKKAGGDKATKEPKSPSKATKSPKKA +>Oryza|NP_001066688.1|cH2A_(Embryophyta) organism=Oryza sativa Japonica Group phylum=Streptophyta class=Magnoliopsida +---------MAGRGKAIGAGAA--KKAT---SRSSKAGLQFPVGRIARFLKAGKYAERVG +AGAPVYLAAVLEYLAAEVLELAGNAA--RDNKKTRIVPRHIQLAVRNDEELTKLLGGATI +ASGGVMPNIHQHLLPKKAGSSKASHADDDDN--------- +>Zea|NP_001132837.1|cH2A_(Embryophyta) organism=Zea mays phylum=Streptophyta class=Magnoliopsida +---------MAGRGKAIGSGAA--KKAT---SRSSKAGLQFPVGRIARFLKAGKYAERVG +AGAPVYLAAVLEYLAAEVLELAGNAA--RDNKKTRIVPRHIQLAVRNDEELSRLLGTVTI +ASGGVMPNIHNLLLPKKAGGGSAKAAAGDED--------- +>Zea|NP_001141988.1|cH2A_(Embryophyta) organism=Zea mays phylum=Streptophyta class=Magnoliopsida +---------MAGRGKAIGAGAA--KKAT---SRSSKAGLQFPVGRIARFLKAGKYAERVG +AGAPVYLAAVLEYLAAEVLELAGNAA--RDNKKTRIVPRHIQLAVRNDEELTKLLGGATI +ASGGVMPNIHQHLLPKKAASSKASVDDDDN---------- +>Toxoplasma|XP_002365268.1|cH2A_(Protists) organism=Toxoplasma gondii ME49 phylum=Apicomplexa class=Conoidasida +---------MSAKGK--GGRAKKSGKSS---SKSAKAGLQFPVGRIGRYLKKGRYAKRVG +AGAPVYMAAVLEYLCAEILELAGNAA--RDHKKTRIIPRHIQLAVRNDEELSKFLGGVTI +ASGGVMPNVHSVLLPKKSKGKKSQ---------------- +>Tetrahymena|AAC37292.1|cH2A_(Protists) organism=Tetrahymena thermophila phylum=Ciliophora class=Oligohymenophorea +---------MSTTGK--GGKAKGKTASSKQVSRSARAGLQFPVGRISRFLKNGRYSERIG +TGAPVYLAAVLEYLAAEVLELAGNAA--KDNKKTRIVPRHILLAIRNDEELNKLMANTTI +ADGGVLPNINPMLLPSKTKKSTEPEH-------------- +>Trypanosoma|XP_845905.1|cH2A_(Protists) organism=Trypanosoma brucei brucei TREU927 phylum=Euglenozoa class=Kinetoplastea +---------MATPKQ--AVKKASKGGS----SRSVKAGLIFPVGRVGTLLRRGQYARRIG +ASGAVYMAAVLEYLTAELLELSVKAAAQQTKKTKRLTPRTVTLAVRHDDDLGALLRNVTM +SRGGVMPSLNKALAKKQKSGKHAKATPSV----------- +>Perkinsus|EER16127.1|cH2A_(Protists) organism=Perkinsus marinus ATCC 50983 phylum=Perkinsozoa class= +---------MSGKGK--GAVLEGMHKDKK--TRSAKAGLQFPVGRIARYMKHGRYAKRVG +AGAPVYLAAVLEYLVAEILELAGNAA--RDHKKTRINPRHIQLAVRNDDELNEFLSNVTI +ASGGVLPNIHTSLLPKKSTKKSMEY--------------- diff --git a/CURATED_SET/draft_seeds/cH2A_(Animals).fasta b/CURATED_SET/draft_seeds/cH2A_(Animals).fasta new file mode 100644 index 0000000..4ace6ec --- /dev/null +++ b/CURATED_SET/draft_seeds/cH2A_(Animals).fasta @@ -0,0 +1,168 @@ +>Ixodes|EEC09557.1|cH2A_(Animals) organism=Ixodes scapularis phylum=Arthropoda class=Arachnida +MSGRGK-GGKVK--GKSKTRSSRAGLQFPVGRIHRLLRKGNYAERVGAGAPVYLAAVLEY +LAAEVLELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLSGVTIAQGGVLPNIQAVLLP +KKTEK-----KS-- +>Apis|XP_001119899.1|cH2A_(Animals) organism=Apis mellifera phylum=Arthropoda class=Insecta +MSGRGK-GGKAK--AKAKSRSNRAGLQFPVGRIHRLLRKGNYAERVGAGAPVYLAAVMEY +LAAEVLELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLSGVTIAQGGVLPNIQAVLLP +KKTEK-----KA-- +>Drosophila|NP_724343.1|cH2A_(Animals) organism=Drosophila melanogaster phylum=Arthropoda class=Insecta +MSGRGK-GGKVK--GKAKSRSNRAGLQFPVGRIHRLLRKGNYAERVGAGAPVYLAAVMEY +LAAEVLELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLSGVTIAQGGVLPNIQAVLLP +KKTEK-----KA-- +>Nematostella|EDO48405.1|cH2A_(Animals) organism=Nematostella vectensis phylum=Cnidaria class=Anthozoa +MSGRGK-GKAKG--TKSKTRSSRAGLQFPVGRIHRHLRKGNYAERVGAGAPVYMAAVLEY +LSAEILELAGNAARDNKKTRIIPRHLQLAVRNDEELNRLLHGVTIAQGGVLPNIQAVLLP +KKTEK-----KAKA +>Caenorhabditis|NP_505463.1|cH2A_(Animals) organism=Caenorhabditis elegans phylum=Nematoda class=Chromadorea +MSGRGK-GGKAKTGGKAKSRSSRAGLQFPVGRLHRILRKGNYAQRVGAGAPVYLAAVLEY +LAAEVLELAGNAARDNKKTRIAPRHLQLAVRNDEELNKLLAGVTIAQGGVLPNIQAVLLP +KKTAG-----DKE- +>Danio|XP_009296490.1|cH2A_(Vertebrata) organism=Danio rerio phylum=Chordata class=Actinopteri +MSGRGKTGGKAR--AKAKTRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEY +LTAEILELAGNAARDNKKTRIIPRHLQLAVRNDEELNKLLGGVTIAQGGVLPNIQAVLLP +KKTE---KAAKGK- +>Xenopus|NP_001087948.1|cH2A_(Vertebrata) organism=Xenopus laevis phylum=Chordata class=Amphibia +MSGRGKQGGKTR--AKAKTRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEY +LTAEILELAGNAARDNKKTRIIPRHLQLAVRNDEELNKLLGRVTIAQGGVLPNIQSVLLP +KKTES-SKSAKSK- +>Gallus|NP_001072943.1|cH2A_(Vertebrata) organism=Gallus gallus phylum=Chordata class=Aves +MSGRGKQGGKAR--AKAKSRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEY +LTAEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGKVTIAQGGVLPNIQAVLLP +KKTD--SHKAKAK- +>Ailuropoda|XP_011215280.1|cH2A_(Mammalia) organism=Ailuropoda melanoleuca phylum=Chordata class=Mammalia +MSGRGKQGGKAR--AKAKSRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEY +LTAEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGRVTIAQGGVLPNIQAVLLP +KKTES-HHKAKGK- +>Bos|NP_001192525.1|cH2A_(Mammalia) organism=Bos taurus phylum=Chordata class=Mammalia +MSGRGKQGGKAR--AKAKSRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEY +LTAEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGRVTIAQGGVLPNIQAVLLP +KKTES-HHKAKGK- +>Callithrix|XP_008992112.1|cH2A_(Mammalia) organism=Callithrix jacchus phylum=Chordata class=Mammalia +MSGRGKQGGKVR--AKAKSRSSRAGLQFPVGRIHRLLRKGKYADRIGAGAPVYLAAVLEY +LTAEILELAGNASRDNKKTRIIPRHLQLAIRNDEELNKLLGGVTIAQGGVLPNIQAVLLP +KKTESHHHKSQSK- +>Canis|XP_005640150.1|cH2A_(Mammalia) organism=Canis lupus familiaris phylum=Chordata class=Mammalia +MSGRGKQGGKAR--AKAKSRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEY +LTAEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGRVTIAQGGVLPNIQAVLLP +KKTES-HHKAKGK- +>Cavia|XP_003478913.1|cH2A_(Mammalia) organism=Cavia porcellus phylum=Chordata class=Mammalia +MSGRGKQGGKAR--AKAKSRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYMAAVLEY +LTAEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGKVTIAQGGVLPNIQAVLLP +KKTES-HHKAKGK- +>Cricetulus|XP_007634672.1|cH2A_(Mammalia) organism=Cricetulus griseus phylum=Chordata class=Mammalia +MSGRGKQGGKAR--AKAKTRSSRAGLQFPVGRVHRLLRKGNYSERVGAGAPVYLAAVLEY +LTAEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGRVTIAQGGVLPNIQAVLLP +KKTES-HHKAKGK- +>Heterocephalus|XP_004847876.1|cH2A_(Mammalia) organism=Heterocephalus glaber phylum=Chordata class=Mammalia +MSGRGKQGGKAR--AKAKTRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEY +LTAEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGKVTIAQGGVLPNIQAVLLP +KKTES-HHKAKGK- +>Loxodonta|XP_003422330.1|cH2A_(Mammalia) organism=Loxodonta africana phylum=Chordata class=Mammalia +MSGRGKQGGKAR--AKAKTRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEY +LTAEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGKVTIAQGGVLPNIQAVLLP +KKTES-HHKAKGK- +>Macaca|NP_001180653.1|cH2A_(Mammalia) organism=Macaca mulatta phylum=Chordata class=Mammalia +MSGRGKQGGKAR--AKAKTRSSRAGLQFPVGRVHRLLRKGNYSERVGAGAPVYLAAVLEY +LTAEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGRVTIAQGGVLPNIQAVLLP +KKTES-HHKAKGK- +>Monodelphis|XP_007485414.1|cH2A_(Mammalia) organism=Monodelphis domestica phylum=Chordata class=Mammalia +MSGRGKQGGKAR--AKAKSRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYMAAVLEY +LTAEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGKVTIAQGGVLPNIQAVLLP +KKTES-HHKTKGK- +>Mus|NP_835489.1|cH2A_(Mammalia) organism=Mus musculus phylum=Chordata class=Mammalia +MSGRGKQGGKAR--AKAKTRSSRAGLQFPVGRVHRLLRKGNYSERVGAGAPVYLAAVLEY +LTAEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGRVTIAQGGVLPNIQAVLLP +KKTES-HHKAKGK- +>Nomascus|XP_004088806.1|cH2A_(Mammalia) organism=Nomascus leucogenys phylum=Chordata class=Mammalia +MSGRGKQGGKAR--AKAKSRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEY +LTAEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGRVTIAQGGVLPNIQAVLLP +KKTES-HHKAKGK- +>Oryctolagus|XP_008246690.1|cH2A_(Mammalia) organism=Oryctolagus cuniculus phylum=Chordata class=Mammalia +MSGRGKQGGKAR--AKAKTRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEY +LTAEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGKVTIAQGGVLPNIQAVLLP +KKTES-HHKAKGK- +>Pan|XP_009448979.1|cH2A_(Mammalia) organism=Pan troglodytes phylum=Chordata class=Mammalia +MSGRGKQGGKAR--AKAKTRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEY +LTAEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGKVTIAQGGVLPNIQAVLLP +KKTES-HHKAKGKA +>Rattus|NP_068612.2|cH2A_(Mammalia) organism=Rattus norvegicus phylum=Chordata class=Mammalia +MSGRGKQGGKAR--AKAKSRSSRAGLQFPVGRVHRLLRKGNYSERVGAGAPVYLAAVLEY +LTAEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGRVTIAQGGVLPNIQAVLLP +KKTES-HHKAKGK- +>Sus|XP_003356618.1|cH2A_(Mammalia) organism=Sus scrofa phylum=Chordata class=Mammalia +MSGRGKQGGKAR--AKAKTRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEY +LTAEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGKVTIAQGGVLPNIQAVLLP +KKTES-HHKAKGK- +>Homo|NP_778235.1|cH2A.10_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKQGGKAR--AKAKSRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEY +LTAEILELAGNAARDNKKTRIIPRHLQLAVRNDEELNKLLGGVTIAQGGVLPNIQAVLLP +KKTES-HKPGKNK- +>Homo|NP_254280.1|cH2A.11_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKQGGKAR--AKAKSRSSRAGLQFPVGRVHRLLRKGNYSERVGAGAPVYLAAVLEY +LTAEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGRVTIAQGGVLPNIQAVLLP +KKTES-HHKAKGK- +>Homo|NP_734466.1|cH2A.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKQGGKAR--AKSKSRSSRAGLQFPVGRIHRLLRKGNYAERIGAGAPVYLAAVLEY +LTAEILELAGNASRDNKKTRIIPRHLQLAIRNDEELNKLLGGVTIAQGGVLPNIQAVLLP +KKTESHHHKAQSK- +>Homo|NP_003504.2|cH2A.2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKQGGKAR--AKAKTRSSRAGLQFPVGRVHRLLRKGNYSERVGAGAPVYLAAVLEY +LTAEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGRVTIAQGGVLPNIQAVLLP +KKTES-HHKAKGK- +>Homo|NP_066390.1|cH2A.2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKQGGKAR--AKAKTRSSRAGLQFPVGRVHRLLRKGNYSERVGAGAPVYLAAVLEY +LTAEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGRVTIAQGGVLPNIQAVLLP +KKTES-HHKAKGK- +>Homo|NP_003503.1|cH2A.3_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKQGGKAR--AKAKSRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEY +LTAEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGRVTIAQGGVLPNIQAVLLP +KKTES-HHKAKGK- +>Homo|NP_066409.1|cH2A.4_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKQGGKAR--AKAKTRSSRAGLQFPVGRVHRLLRKGNYSERVGAGAPVYLAAVLEY +LTAEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGKVTIAQGGVLPNIQAVLLP +KKTES-HHKAKGK- +>Homo|NP_003500.1|cH2A.5_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKQGGKAR--AKAKTRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEY +LTAEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGKVTIAQGGVLPNIQAVLLP +KKTES-HHKAKGK- +>Homo|NP_003501.1|cH2A.5_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKQGGKAR--AKAKTRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEY +LTAEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGKVTIAQGGVLPNIQAVLLP +KKTES-HHKAKGK- +>Homo|NP_003502.1|cH2A.5_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKQGGKAR--AKAKTRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEY +LTAEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGKVTIAQGGVLPNIQAVLLP +KKTES-HHKAKGK- +>Homo|NP_003505.1|cH2A.5_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKQGGKAR--AKAKTRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEY +LTAEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGKVTIAQGGVLPNIQAVLLP +KKTES-HHKAKGK- +>Homo|NP_066408.1|cH2A.5_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKQGGKAR--AKAKTRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEY +LTAEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGKVTIAQGGVLPNIQAVLLP +KKTES-HHKAKGK- +>Homo|NP_542163.1|cH2A.6_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKQGGKAR--AKAKTRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEY +LTAEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGKVTIAQGGVLPNIQAVLLP +KKTES-HHKAK--- +>Homo|NP_066544.1|cH2A.7_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKQGGKAR--AKAKTRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEY +LTAEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGKVTIAQGGVLPNIQAVLLP +KKTES-HHKTK--- +>Homo|NP_001035807.1|cH2A.8_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKQGGKAR--AKAKSRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYMAAVLEY +LTAEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGKVTIAQGGVLPNIQAVLLP +KKTES-HHKAKGK- +>Homo|NP_003507.1|cH2A.8_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKQGGKAR--AKAKSRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYMAAVLEY +LTAEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGKVTIAQGGVLPNIQAVLLP +KKTES-HHKAKGK- +>Homo|NP_003508.1|cH2A.9_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKQGGKAR--AKAKSRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYMAAVLEY +LTAEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGKVTIAQGGVLPNIQAVLLP +KKTES--HKAKSK- +>Mus|NP_783589.1|cH2A.1_(Mus_musculus) organism=Mus musculus phylum=Chordata class=Mammalia +MSGPTKRGGKAR--AKVKSRSSRAGLQFPVGRVHRLLRQGNYAQRIGAGAPVYLAAVLEY +LTAEVLELAGNAARDNKKTRITPRHLQLAIRNDEELNKLLGRVTIAQGGVLPNIQAVLLP +KKTES--HKSQTK- diff --git a/CURATED_SET/draft_seeds/cH2A_(Animals)_only.fasta b/CURATED_SET/draft_seeds/cH2A_(Animals)_only.fasta new file mode 100644 index 0000000..f23079b --- /dev/null +++ b/CURATED_SET/draft_seeds/cH2A_(Animals)_only.fasta @@ -0,0 +1,20 @@ +>Ixodes|EEC09557.1|cH2A_(Animals) organism=Ixodes scapularis phylum=Arthropoda class=Arachnida +MSGRGKGGKVK--GKSKTRSSRAGLQFPVGRIHRLLRKGNYAERVGAGAPVYLAAVLEYL +AAEVLELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLSGVTIAQGGVLPNIQAVLLPK +KTEKKS-- +>Apis|XP_001119899.1|cH2A_(Animals) organism=Apis mellifera phylum=Arthropoda class=Insecta +MSGRGKGGKAK--AKAKSRSNRAGLQFPVGRIHRLLRKGNYAERVGAGAPVYLAAVMEYL +AAEVLELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLSGVTIAQGGVLPNIQAVLLPK +KTEKKA-- +>Drosophila|NP_724343.1|cH2A_(Animals) organism=Drosophila melanogaster phylum=Arthropoda class=Insecta +MSGRGKGGKVK--GKAKSRSNRAGLQFPVGRIHRLLRKGNYAERVGAGAPVYLAAVMEYL +AAEVLELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLSGVTIAQGGVLPNIQAVLLPK +KTEKKA-- +>Nematostella|EDO48405.1|cH2A_(Animals) organism=Nematostella vectensis phylum=Cnidaria class=Anthozoa +MSGRGKGKAKG--TKSKTRSSRAGLQFPVGRIHRHLRKGNYAERVGAGAPVYMAAVLEYL +SAEILELAGNAARDNKKTRIIPRHLQLAVRNDEELNRLLHGVTIAQGGVLPNIQAVLLPK +KTEKKAKA +>Caenorhabditis|NP_505463.1|cH2A_(Animals) organism=Caenorhabditis elegans phylum=Nematoda class=Chromadorea +MSGRGKGGKAKTGGKAKSRSSRAGLQFPVGRLHRILRKGNYAQRVGAGAPVYLAAVLEYL +AAEVLELAGNAARDNKKTRIAPRHLQLAVRNDEELNKLLAGVTIAQGGVLPNIQAVLLPK +KTAGDKE- diff --git a/CURATED_SET/draft_seeds/cH2A_(Chlorophyta).fasta b/CURATED_SET/draft_seeds/cH2A_(Chlorophyta).fasta new file mode 100644 index 0000000..d7136d1 --- /dev/null +++ b/CURATED_SET/draft_seeds/cH2A_(Chlorophyta).fasta @@ -0,0 +1,4 @@ +>Ostreococcus|XP_003080758.2|cH2A_(Chlorophyta) organism=Ostreococcus tauri phylum=Chlorophyta class=Mamiellophyceae +MSGRGKGKTGKKAMSRSAKAGLQFPVGRVARYLKQGKYATRVGAGAPVYLAAVLEYLAAE +VLELAGNASRDNKKSRIVPRHIQLAIRNDEELSKLLGTVTIASGGVLPNIHSVLLPKKSK +K diff --git a/CURATED_SET/draft_seeds/cH2A_(Embryophyta).fasta b/CURATED_SET/draft_seeds/cH2A_(Embryophyta).fasta new file mode 100644 index 0000000..294ad58 --- /dev/null +++ b/CURATED_SET/draft_seeds/cH2A_(Embryophyta).fasta @@ -0,0 +1,32 @@ +>Arabidopsis|NP_001190852.1|cH2A_(Embryophyta) organism=Arabidopsis thaliana phylum=Streptophyta class=Magnoliopsida +---------MAGRGKQLGSGAAKKSTSRSSKAGLQFPVGRIARFLKAGKYAERVGAGAPV +YLAAVLEYLAAEVLELAGNAARDNKKTRIVPRHIQLAVRNDEELSKLLGDVTIANGGVMP +NIHNLLLPKKAGSSKPTEED------------- +>Arabidopsis|NP_175517.1|cH2A_(Embryophyta) organism=Arabidopsis thaliana phylum=Streptophyta class=Magnoliopsida +---------MAGRGKTLGSGSAKKATTRSSKAGLQFPVGRIARFLKKGKYAERVGAGAPV +YLAAVLEYLAAEVLELAGNAARDNKKTRIVPRHIQLAVRNDEELSKLLGDVTIANGGVMP +NIHNLLLPKKTGASKPSAEDD------------ +>Arabidopsis|NP_188703.1|cH2A_(Embryophyta) organism=Arabidopsis thaliana phylum=Streptophyta class=Magnoliopsida +---------MAGRGKTLGSGVAKKSTSRSSKAGLQFPVGRIARFLKNGKYATRVGAGAPV +YLAAVLEYLAAEVLELAGNAARDNKKTRIVPRHIQLAVRNDEELSKLLGDVTIANGGVMP +NIHSLLLPKKAGASKPSADED------------ +>Arabidopsis|NP_200275.1|cH2A_(Embryophyta) organism=Arabidopsis thaliana phylum=Streptophyta class=Magnoliopsida +---------MAGRGKTLGSGGAKKATSRSSKAGLQFPVGRIARFLKAGKYAERVGAGAPV +YLAAVLEYLAAEVLELAGNAARDNKKTRIVPRHIQLAVRNDEELSKLLGDVTIANGGVMP +NIHNLLLPKKAGASKPQED-------------- +>Beta|XP_010685819.1|cH2A_(Embryophyta) organism=Beta vulgaris subsp. vulgaris phylum=Streptophyta class=Magnoliopsida +MDSTAGGKAKKGAGGRKGGGPKKKPVSRSVKAGLQFPVGRIGRYLKKGRYAQRVGTGAPV +YLAAVLEYLAAEVLELAGNAARDNKKNRIIPRHVLLAVRNDDELGKLLSGVTIAHGGVLP +NINPVLLPKKAGGDKATKEPKSPSKATKSPKKA +>Oryza|NP_001066688.1|cH2A_(Embryophyta) organism=Oryza sativa Japonica Group phylum=Streptophyta class=Magnoliopsida +---------MAGRGKAIGAGAAKKATSRSSKAGLQFPVGRIARFLKAGKYAERVGAGAPV +YLAAVLEYLAAEVLELAGNAARDNKKTRIVPRHIQLAVRNDEELTKLLGGATIASGGVMP +NIHQHLLPKKAGSSKASHADDDDN--------- +>Zea|NP_001132837.1|cH2A_(Embryophyta) organism=Zea mays phylum=Streptophyta class=Magnoliopsida +---------MAGRGKAIGSGAAKKATSRSSKAGLQFPVGRIARFLKAGKYAERVGAGAPV +YLAAVLEYLAAEVLELAGNAARDNKKTRIVPRHIQLAVRNDEELSRLLGTVTIASGGVMP +NIHNLLLPKKAGGGSAKAAAGDED--------- +>Zea|NP_001141988.1|cH2A_(Embryophyta) organism=Zea mays phylum=Streptophyta class=Magnoliopsida +---------MAGRGKAIGAGAAKKATSRSSKAGLQFPVGRIARFLKAGKYAERVGAGAPV +YLAAVLEYLAAEVLELAGNAARDNKKTRIVPRHIQLAVRNDEELTKLLGGATIASGGVMP +NIHQHLLPKKAASSKASVDDDDN---------- diff --git a/CURATED_SET/draft_seeds/cH2A_(Fungi).fasta b/CURATED_SET/draft_seeds/cH2A_(Fungi).fasta new file mode 100644 index 0000000..1ca99f5 --- /dev/null +++ b/CURATED_SET/draft_seeds/cH2A_(Fungi).fasta @@ -0,0 +1,8 @@ +>Encephalitozoon|NP_584598.1|cH2A_(Fungi) organism=Encephalitozoon cuniculi GB-M1 phylum=Microsporidia class= +MVVIQGKGGKADPRVIGKDEEHQKSIVKLSQIKKIMKDRTRMRISKDALVAVSACVMYLI +SEITDGAKNVASTDGKKKVMPKHINNAICNDTELHFVGHDWLIKNGGMKSYIAPGDFAVS +SKKGSSRD +>Encephalitozoon|HISTDB_cH2A_0|cH2A_(Fungi) organism=Encephalitozoon intestinalis phylum=Microsporidia class= +MAMVQGKGGKADPRVMGKDEEHQKSIVKLSQIKKIMKDRTRMRISKDALIGVSACVMYLI +SEITDGAKNVANTDGKKKVIPKHINHAICNDTELHFVGHDWLIKNGGMKSYISPGDFSVS +SKKGGSRD diff --git a/CURATED_SET/draft_seeds/cH2A_(Homo_sapiens).fasta b/CURATED_SET/draft_seeds/cH2A_(Homo_sapiens).fasta new file mode 100644 index 0000000..d569fbe --- /dev/null +++ b/CURATED_SET/draft_seeds/cH2A_(Homo_sapiens).fasta @@ -0,0 +1,68 @@ +>Homo|NP_778235.1|cH2A.10_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKSRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAVRNDEELNKLLGGVTIAQGGVLPNIQAVLLPKK +TES-HKPGKNK +>Homo|NP_254280.1|cH2A.11_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKSRSSRAGLQFPVGRVHRLLRKGNYSERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGRVTIAQGGVLPNIQAVLLPKK +TES-HHKAKGK +>Homo|NP_734466.1|cH2A.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKQGGKARAKSKSRSSRAGLQFPVGRIHRLLRKGNYAERIGAGAPVYLAAVLEYLT +AEILELAGNASRDNKKTRIIPRHLQLAIRNDEELNKLLGGVTIAQGGVLPNIQAVLLPKK +TESHHHKAQSK +>Homo|NP_003504.2|cH2A.2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYSERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGRVTIAQGGVLPNIQAVLLPKK +TES-HHKAKGK +>Homo|NP_066390.1|cH2A.2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYSERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGRVTIAQGGVLPNIQAVLLPKK +TES-HHKAKGK +>Homo|NP_003503.1|cH2A.3_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKSRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGRVTIAQGGVLPNIQAVLLPKK +TES-HHKAKGK +>Homo|NP_066409.1|cH2A.4_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYSERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGKVTIAQGGVLPNIQAVLLPKK +TES-HHKAKGK +>Homo|NP_003500.1|cH2A.5_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGKVTIAQGGVLPNIQAVLLPKK +TES-HHKAKGK +>Homo|NP_003501.1|cH2A.5_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGKVTIAQGGVLPNIQAVLLPKK +TES-HHKAKGK +>Homo|NP_003502.1|cH2A.5_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGKVTIAQGGVLPNIQAVLLPKK +TES-HHKAKGK +>Homo|NP_003505.1|cH2A.5_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGKVTIAQGGVLPNIQAVLLPKK +TES-HHKAKGK +>Homo|NP_066408.1|cH2A.5_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGKVTIAQGGVLPNIQAVLLPKK +TES-HHKAKGK +>Homo|NP_542163.1|cH2A.6_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGKVTIAQGGVLPNIQAVLLPKK +TES-HHKAK-- +>Homo|NP_066544.1|cH2A.7_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGKVTIAQGGVLPNIQAVLLPKK +TES-HHKTK-- +>Homo|NP_001035807.1|cH2A.8_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKSRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYMAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGKVTIAQGGVLPNIQAVLLPKK +TES-HHKAKGK +>Homo|NP_003507.1|cH2A.8_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKSRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYMAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGKVTIAQGGVLPNIQAVLLPKK +TES-HHKAKGK +>Homo|NP_003508.1|cH2A.9_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKSRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYMAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGKVTIAQGGVLPNIQAVLLPKK +TES--HKAKSK diff --git a/CURATED_SET/draft_seeds/cH2A_(Homo_sapiens)_only.fasta b/CURATED_SET/draft_seeds/cH2A_(Homo_sapiens)_only.fasta new file mode 100644 index 0000000..e69de29 diff --git a/CURATED_SET/draft_seeds/cH2A_(Mammalia).fasta b/CURATED_SET/draft_seeds/cH2A_(Mammalia).fasta new file mode 100644 index 0000000..038a20d --- /dev/null +++ b/CURATED_SET/draft_seeds/cH2A_(Mammalia).fasta @@ -0,0 +1,136 @@ +>Ailuropoda|XP_011215280.1|cH2A_(Mammalia) organism=Ailuropoda melanoleuca phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKSRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGRVTIAQGGVLPNIQAVLLPKK +TES-HHKAKGK- +>Bos|NP_001192525.1|cH2A_(Mammalia) organism=Bos taurus phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKSRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGRVTIAQGGVLPNIQAVLLPKK +TES-HHKAKGK- +>Callithrix|XP_008992112.1|cH2A_(Mammalia) organism=Callithrix jacchus phylum=Chordata class=Mammalia +MSGRGKQGGKVRAKAKSRSSRAGLQFPVGRIHRLLRKGKYADRIGAGAPVYLAAVLEYLT +AEILELAGNASRDNKKTRIIPRHLQLAIRNDEELNKLLGGVTIAQGGVLPNIQAVLLPKK +TESHHHKSQSK- +>Canis|XP_005640150.1|cH2A_(Mammalia) organism=Canis lupus familiaris phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKSRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGRVTIAQGGVLPNIQAVLLPKK +TES-HHKAKGK- +>Cavia|XP_003478913.1|cH2A_(Mammalia) organism=Cavia porcellus phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKSRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYMAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGKVTIAQGGVLPNIQAVLLPKK +TES-HHKAKGK- +>Cricetulus|XP_007634672.1|cH2A_(Mammalia) organism=Cricetulus griseus phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYSERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGRVTIAQGGVLPNIQAVLLPKK +TES-HHKAKGK- +>Heterocephalus|XP_004847876.1|cH2A_(Mammalia) organism=Heterocephalus glaber phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGKVTIAQGGVLPNIQAVLLPKK +TES-HHKAKGK- +>Loxodonta|XP_003422330.1|cH2A_(Mammalia) organism=Loxodonta africana phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGKVTIAQGGVLPNIQAVLLPKK +TES-HHKAKGK- +>Macaca|NP_001180653.1|cH2A_(Mammalia) organism=Macaca mulatta phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYSERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGRVTIAQGGVLPNIQAVLLPKK +TES-HHKAKGK- +>Monodelphis|XP_007485414.1|cH2A_(Mammalia) organism=Monodelphis domestica phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKSRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYMAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGKVTIAQGGVLPNIQAVLLPKK +TES-HHKTKGK- +>Mus|NP_835489.1|cH2A_(Mammalia) organism=Mus musculus phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYSERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGRVTIAQGGVLPNIQAVLLPKK +TES-HHKAKGK- +>Nomascus|XP_004088806.1|cH2A_(Mammalia) organism=Nomascus leucogenys phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKSRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGRVTIAQGGVLPNIQAVLLPKK +TES-HHKAKGK- +>Oryctolagus|XP_008246690.1|cH2A_(Mammalia) organism=Oryctolagus cuniculus phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGKVTIAQGGVLPNIQAVLLPKK +TES-HHKAKGK- +>Pan|XP_009448979.1|cH2A_(Mammalia) organism=Pan troglodytes phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGKVTIAQGGVLPNIQAVLLPKK +TES-HHKAKGKA +>Rattus|NP_068612.2|cH2A_(Mammalia) organism=Rattus norvegicus phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKSRSSRAGLQFPVGRVHRLLRKGNYSERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGRVTIAQGGVLPNIQAVLLPKK +TES-HHKAKGK- +>Sus|XP_003356618.1|cH2A_(Mammalia) organism=Sus scrofa phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGKVTIAQGGVLPNIQAVLLPKK +TES-HHKAKGK- +>Homo|NP_778235.1|cH2A.10_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKSRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAVRNDEELNKLLGGVTIAQGGVLPNIQAVLLPKK +TES-HKPGKNK- +>Homo|NP_254280.1|cH2A.11_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKSRSSRAGLQFPVGRVHRLLRKGNYSERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGRVTIAQGGVLPNIQAVLLPKK +TES-HHKAKGK- +>Homo|NP_734466.1|cH2A.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKQGGKARAKSKSRSSRAGLQFPVGRIHRLLRKGNYAERIGAGAPVYLAAVLEYLT +AEILELAGNASRDNKKTRIIPRHLQLAIRNDEELNKLLGGVTIAQGGVLPNIQAVLLPKK +TESHHHKAQSK- +>Homo|NP_003504.2|cH2A.2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYSERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGRVTIAQGGVLPNIQAVLLPKK +TES-HHKAKGK- +>Homo|NP_066390.1|cH2A.2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYSERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGRVTIAQGGVLPNIQAVLLPKK +TES-HHKAKGK- +>Homo|NP_003503.1|cH2A.3_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKSRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGRVTIAQGGVLPNIQAVLLPKK +TES-HHKAKGK- +>Homo|NP_066409.1|cH2A.4_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYSERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGKVTIAQGGVLPNIQAVLLPKK +TES-HHKAKGK- +>Homo|NP_003500.1|cH2A.5_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGKVTIAQGGVLPNIQAVLLPKK +TES-HHKAKGK- +>Homo|NP_003501.1|cH2A.5_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGKVTIAQGGVLPNIQAVLLPKK +TES-HHKAKGK- +>Homo|NP_003502.1|cH2A.5_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGKVTIAQGGVLPNIQAVLLPKK +TES-HHKAKGK- +>Homo|NP_003505.1|cH2A.5_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGKVTIAQGGVLPNIQAVLLPKK +TES-HHKAKGK- +>Homo|NP_066408.1|cH2A.5_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGKVTIAQGGVLPNIQAVLLPKK +TES-HHKAKGK- +>Homo|NP_542163.1|cH2A.6_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGKVTIAQGGVLPNIQAVLLPKK +TES-HHKAK--- +>Homo|NP_066544.1|cH2A.7_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGKVTIAQGGVLPNIQAVLLPKK +TES-HHKTK--- +>Homo|NP_001035807.1|cH2A.8_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKSRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYMAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGKVTIAQGGVLPNIQAVLLPKK +TES-HHKAKGK- +>Homo|NP_003507.1|cH2A.8_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKSRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYMAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGKVTIAQGGVLPNIQAVLLPKK +TES-HHKAKGK- +>Homo|NP_003508.1|cH2A.9_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKSRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYMAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGKVTIAQGGVLPNIQAVLLPKK +TES--HKAKSK- +>Mus|NP_783589.1|cH2A.1_(Mus_musculus) organism=Mus musculus phylum=Chordata class=Mammalia +MSGPTKRGGKARAKVKSRSSRAGLQFPVGRVHRLLRQGNYAQRIGAGAPVYLAAVLEYLT +AEVLELAGNAARDNKKTRITPRHLQLAIRNDEELNKLLGRVTIAQGGVLPNIQAVLLPKK +TES--HKSQTK- diff --git a/CURATED_SET/draft_seeds/cH2A_(Mammalia)_only.fasta b/CURATED_SET/draft_seeds/cH2A_(Mammalia)_only.fasta new file mode 100644 index 0000000..2c6a16b --- /dev/null +++ b/CURATED_SET/draft_seeds/cH2A_(Mammalia)_only.fasta @@ -0,0 +1,64 @@ +>Ailuropoda|XP_011215280.1|cH2A_(Mammalia) organism=Ailuropoda melanoleuca phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKSRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGRVTIAQGGVLPNIQAVLLPKK +TES-HHKAKGK- +>Bos|NP_001192525.1|cH2A_(Mammalia) organism=Bos taurus phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKSRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGRVTIAQGGVLPNIQAVLLPKK +TES-HHKAKGK- +>Callithrix|XP_008992112.1|cH2A_(Mammalia) organism=Callithrix jacchus phylum=Chordata class=Mammalia +MSGRGKQGGKVRAKAKSRSSRAGLQFPVGRIHRLLRKGKYADRIGAGAPVYLAAVLEYLT +AEILELAGNASRDNKKTRIIPRHLQLAIRNDEELNKLLGGVTIAQGGVLPNIQAVLLPKK +TESHHHKSQSK- +>Canis|XP_005640150.1|cH2A_(Mammalia) organism=Canis lupus familiaris phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKSRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGRVTIAQGGVLPNIQAVLLPKK +TES-HHKAKGK- +>Cavia|XP_003478913.1|cH2A_(Mammalia) organism=Cavia porcellus phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKSRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYMAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGKVTIAQGGVLPNIQAVLLPKK +TES-HHKAKGK- +>Cricetulus|XP_007634672.1|cH2A_(Mammalia) organism=Cricetulus griseus phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYSERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGRVTIAQGGVLPNIQAVLLPKK +TES-HHKAKGK- +>Heterocephalus|XP_004847876.1|cH2A_(Mammalia) organism=Heterocephalus glaber phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGKVTIAQGGVLPNIQAVLLPKK +TES-HHKAKGK- +>Loxodonta|XP_003422330.1|cH2A_(Mammalia) organism=Loxodonta africana phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGKVTIAQGGVLPNIQAVLLPKK +TES-HHKAKGK- +>Macaca|NP_001180653.1|cH2A_(Mammalia) organism=Macaca mulatta phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYSERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGRVTIAQGGVLPNIQAVLLPKK +TES-HHKAKGK- +>Monodelphis|XP_007485414.1|cH2A_(Mammalia) organism=Monodelphis domestica phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKSRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYMAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGKVTIAQGGVLPNIQAVLLPKK +TES-HHKTKGK- +>Mus|NP_835489.1|cH2A_(Mammalia) organism=Mus musculus phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYSERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGRVTIAQGGVLPNIQAVLLPKK +TES-HHKAKGK- +>Nomascus|XP_004088806.1|cH2A_(Mammalia) organism=Nomascus leucogenys phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKSRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGRVTIAQGGVLPNIQAVLLPKK +TES-HHKAKGK- +>Oryctolagus|XP_008246690.1|cH2A_(Mammalia) organism=Oryctolagus cuniculus phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGKVTIAQGGVLPNIQAVLLPKK +TES-HHKAKGK- +>Pan|XP_009448979.1|cH2A_(Mammalia) organism=Pan troglodytes phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGKVTIAQGGVLPNIQAVLLPKK +TES-HHKAKGKA +>Rattus|NP_068612.2|cH2A_(Mammalia) organism=Rattus norvegicus phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKSRSSRAGLQFPVGRVHRLLRKGNYSERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGRVTIAQGGVLPNIQAVLLPKK +TES-HHKAKGK- +>Sus|XP_003356618.1|cH2A_(Mammalia) organism=Sus scrofa phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGKVTIAQGGVLPNIQAVLLPKK +TES-HHKAKGK- diff --git a/CURATED_SET/draft_seeds/cH2A_(Mus_musculus).fasta b/CURATED_SET/draft_seeds/cH2A_(Mus_musculus).fasta new file mode 100644 index 0000000..0f7a65c --- /dev/null +++ b/CURATED_SET/draft_seeds/cH2A_(Mus_musculus).fasta @@ -0,0 +1,4 @@ +>Mus|NP_783589.1|cH2A.1_(Mus_musculus) organism=Mus musculus phylum=Chordata class=Mammalia +MSGPTKRGGKARAKVKSRSSRAGLQFPVGRVHRLLRQGNYAQRIGAGAPVYLAAVLEYLT +AEVLELAGNAARDNKKTRITPRHLQLAIRNDEELNKLLGRVTIAQGGVLPNIQAVLLPKK +TESHKSQTK diff --git a/CURATED_SET/draft_seeds/cH2A_(Mus_musculus)_only.fasta b/CURATED_SET/draft_seeds/cH2A_(Mus_musculus)_only.fasta new file mode 100644 index 0000000..e69de29 diff --git a/CURATED_SET/draft_seeds/cH2A_(Plants).fasta b/CURATED_SET/draft_seeds/cH2A_(Plants).fasta new file mode 100644 index 0000000..5369c07 --- /dev/null +++ b/CURATED_SET/draft_seeds/cH2A_(Plants).fasta @@ -0,0 +1,36 @@ +>Ostreococcus|XP_003080758.2|cH2A_(Chlorophyta) organism=Ostreococcus tauri phylum=Chlorophyta class=Mamiellophyceae +---------MSGRGK---GKTGKKAMSRSAKAGLQFPVGRVARYLKQGKYATRVGAGAPV +YLAAVLEYLAAEVLELAGNASRDNKKSRIVPRHIQLAIRNDEELSKLLGTVTIASGGVLP +NIHSVLLPKKSKK-------------------- +>Arabidopsis|NP_001190852.1|cH2A_(Embryophyta) organism=Arabidopsis thaliana phylum=Streptophyta class=Magnoliopsida +---------MAGRGKQLGSGAAKKSTSRSSKAGLQFPVGRIARFLKAGKYAERVGAGAPV +YLAAVLEYLAAEVLELAGNAARDNKKTRIVPRHIQLAVRNDEELSKLLGDVTIANGGVMP +NIHNLLLPKKAGSSKPTEED------------- +>Arabidopsis|NP_175517.1|cH2A_(Embryophyta) organism=Arabidopsis thaliana phylum=Streptophyta class=Magnoliopsida +---------MAGRGKTLGSGSAKKATTRSSKAGLQFPVGRIARFLKKGKYAERVGAGAPV +YLAAVLEYLAAEVLELAGNAARDNKKTRIVPRHIQLAVRNDEELSKLLGDVTIANGGVMP +NIHNLLLPKKTGASKPSAEDD------------ +>Arabidopsis|NP_188703.1|cH2A_(Embryophyta) organism=Arabidopsis thaliana phylum=Streptophyta class=Magnoliopsida +---------MAGRGKTLGSGVAKKSTSRSSKAGLQFPVGRIARFLKNGKYATRVGAGAPV +YLAAVLEYLAAEVLELAGNAARDNKKTRIVPRHIQLAVRNDEELSKLLGDVTIANGGVMP +NIHSLLLPKKAGASKPSADED------------ +>Arabidopsis|NP_200275.1|cH2A_(Embryophyta) organism=Arabidopsis thaliana phylum=Streptophyta class=Magnoliopsida +---------MAGRGKTLGSGGAKKATSRSSKAGLQFPVGRIARFLKAGKYAERVGAGAPV +YLAAVLEYLAAEVLELAGNAARDNKKTRIVPRHIQLAVRNDEELSKLLGDVTIANGGVMP +NIHNLLLPKKAGASKPQED-------------- +>Beta|XP_010685819.1|cH2A_(Embryophyta) organism=Beta vulgaris subsp. vulgaris phylum=Streptophyta class=Magnoliopsida +MDSTAGGKAKKGAGGRKGGGPKKKPVSRSVKAGLQFPVGRIGRYLKKGRYAQRVGTGAPV +YLAAVLEYLAAEVLELAGNAARDNKKNRIIPRHVLLAVRNDDELGKLLSGVTIAHGGVLP +NINPVLLPKKAGGDKATKEPKSPSKATKSPKKA +>Oryza|NP_001066688.1|cH2A_(Embryophyta) organism=Oryza sativa Japonica Group phylum=Streptophyta class=Magnoliopsida +---------MAGRGKAIGAGAAKKATSRSSKAGLQFPVGRIARFLKAGKYAERVGAGAPV +YLAAVLEYLAAEVLELAGNAARDNKKTRIVPRHIQLAVRNDEELTKLLGGATIASGGVMP +NIHQHLLPKKAGSSKASHADDDDN--------- +>Zea|NP_001132837.1|cH2A_(Embryophyta) organism=Zea mays phylum=Streptophyta class=Magnoliopsida +---------MAGRGKAIGSGAAKKATSRSSKAGLQFPVGRIARFLKAGKYAERVGAGAPV +YLAAVLEYLAAEVLELAGNAARDNKKTRIVPRHIQLAVRNDEELSRLLGTVTIASGGVMP +NIHNLLLPKKAGGGSAKAAAGDED--------- +>Zea|NP_001141988.1|cH2A_(Embryophyta) organism=Zea mays phylum=Streptophyta class=Magnoliopsida +---------MAGRGKAIGAGAAKKATSRSSKAGLQFPVGRIARFLKAGKYAERVGAGAPV +YLAAVLEYLAAEVLELAGNAARDNKKTRIVPRHIQLAVRNDEELTKLLGGATIASGGVMP +NIHQHLLPKKAASSKASVDDDDN---------- diff --git a/CURATED_SET/draft_seeds/cH2A_(Plants)_only.fasta b/CURATED_SET/draft_seeds/cH2A_(Plants)_only.fasta new file mode 100644 index 0000000..e69de29 diff --git a/CURATED_SET/draft_seeds/cH2A_(Protists).fasta b/CURATED_SET/draft_seeds/cH2A_(Protists).fasta new file mode 100644 index 0000000..cc1550b --- /dev/null +++ b/CURATED_SET/draft_seeds/cH2A_(Protists).fasta @@ -0,0 +1,16 @@ +>Toxoplasma|XP_002365268.1|cH2A_(Protists) organism=Toxoplasma gondii ME49 phylum=Apicomplexa class=Conoidasida +MSAKGKGGRAKKSGKSS---SKSAKAGLQFPVGRIGRYLKKGRYAKRVGAGAPVYMAAVL +EYLCAEILELAGNAA--RDHKKTRIIPRHIQLAVRNDEELSKFLGGVTIASGGVMPNVHS +VLLPKKSKGKKSQ----- +>Tetrahymena|AAC37292.1|cH2A_(Protists) organism=Tetrahymena thermophila phylum=Ciliophora class=Oligohymenophorea +MSTTGKGGKAKGKTASSKQVSRSARAGLQFPVGRISRFLKNGRYSERIGTGAPVYLAAVL +EYLAAEVLELAGNAA--KDNKKTRIVPRHILLAIRNDEELNKLMANTTIADGGVLPNINP +MLLPSKTKKSTEPEH--- +>Trypanosoma|XP_845905.1|cH2A_(Protists) organism=Trypanosoma brucei brucei TREU927 phylum=Euglenozoa class=Kinetoplastea +MATPKQAVKKASKGGS----SRSVKAGLIFPVGRVGTLLRRGQYARRIGASGAVYMAAVL +EYLTAELLELSVKAAAQQTKKTKRLTPRTVTLAVRHDDDLGALLRNVTMSRGGVMPSLNK +ALAKKQKSGKHAKATPSV +>Perkinsus|EER16127.1|cH2A_(Protists) organism=Perkinsus marinus ATCC 50983 phylum=Perkinsozoa class= +MSGKGKGAVLEGMHKDKK--TRSAKAGLQFPVGRIARYMKHGRYAKRVGAGAPVYLAAVL +EYLVAEILELAGNAA--RDHKKTRINPRHIQLAVRNDDELNEFLSNVTIASGGVLPNIHT +SLLPKKSTKKSMEY---- diff --git a/CURATED_SET/draft_seeds/cH2A_(Vertebrata).fasta b/CURATED_SET/draft_seeds/cH2A_(Vertebrata).fasta new file mode 100644 index 0000000..fec8b74 --- /dev/null +++ b/CURATED_SET/draft_seeds/cH2A_(Vertebrata).fasta @@ -0,0 +1,148 @@ +>Danio|XP_009296490.1|cH2A_(Vertebrata) organism=Danio rerio phylum=Chordata class=Actinopteri +MSGRGKTGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAVRNDEELNKLLGGVTIAQGGVLPNIQAVLLPKK +TE---KAAKGK- +>Xenopus|NP_001087948.1|cH2A_(Vertebrata) organism=Xenopus laevis phylum=Chordata class=Amphibia +MSGRGKQGGKTRAKAKTRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAVRNDEELNKLLGRVTIAQGGVLPNIQSVLLPKK +TES-SKSAKSK- +>Gallus|NP_001072943.1|cH2A_(Vertebrata) organism=Gallus gallus phylum=Chordata class=Aves +MSGRGKQGGKARAKAKSRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGKVTIAQGGVLPNIQAVLLPKK +TD--SHKAKAK- +>Ailuropoda|XP_011215280.1|cH2A_(Mammalia) organism=Ailuropoda melanoleuca phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKSRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGRVTIAQGGVLPNIQAVLLPKK +TES-HHKAKGK- +>Bos|NP_001192525.1|cH2A_(Mammalia) organism=Bos taurus phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKSRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGRVTIAQGGVLPNIQAVLLPKK +TES-HHKAKGK- +>Callithrix|XP_008992112.1|cH2A_(Mammalia) organism=Callithrix jacchus phylum=Chordata class=Mammalia +MSGRGKQGGKVRAKAKSRSSRAGLQFPVGRIHRLLRKGKYADRIGAGAPVYLAAVLEYLT +AEILELAGNASRDNKKTRIIPRHLQLAIRNDEELNKLLGGVTIAQGGVLPNIQAVLLPKK +TESHHHKSQSK- +>Canis|XP_005640150.1|cH2A_(Mammalia) organism=Canis lupus familiaris phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKSRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGRVTIAQGGVLPNIQAVLLPKK +TES-HHKAKGK- +>Cavia|XP_003478913.1|cH2A_(Mammalia) organism=Cavia porcellus phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKSRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYMAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGKVTIAQGGVLPNIQAVLLPKK +TES-HHKAKGK- +>Cricetulus|XP_007634672.1|cH2A_(Mammalia) organism=Cricetulus griseus phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYSERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGRVTIAQGGVLPNIQAVLLPKK +TES-HHKAKGK- +>Heterocephalus|XP_004847876.1|cH2A_(Mammalia) organism=Heterocephalus glaber phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGKVTIAQGGVLPNIQAVLLPKK +TES-HHKAKGK- +>Loxodonta|XP_003422330.1|cH2A_(Mammalia) organism=Loxodonta africana phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGKVTIAQGGVLPNIQAVLLPKK +TES-HHKAKGK- +>Macaca|NP_001180653.1|cH2A_(Mammalia) organism=Macaca mulatta phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYSERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGRVTIAQGGVLPNIQAVLLPKK +TES-HHKAKGK- +>Monodelphis|XP_007485414.1|cH2A_(Mammalia) organism=Monodelphis domestica phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKSRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYMAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGKVTIAQGGVLPNIQAVLLPKK +TES-HHKTKGK- +>Mus|NP_835489.1|cH2A_(Mammalia) organism=Mus musculus phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYSERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGRVTIAQGGVLPNIQAVLLPKK +TES-HHKAKGK- +>Nomascus|XP_004088806.1|cH2A_(Mammalia) organism=Nomascus leucogenys phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKSRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGRVTIAQGGVLPNIQAVLLPKK +TES-HHKAKGK- +>Oryctolagus|XP_008246690.1|cH2A_(Mammalia) organism=Oryctolagus cuniculus phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGKVTIAQGGVLPNIQAVLLPKK +TES-HHKAKGK- +>Pan|XP_009448979.1|cH2A_(Mammalia) organism=Pan troglodytes phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGKVTIAQGGVLPNIQAVLLPKK +TES-HHKAKGKA +>Rattus|NP_068612.2|cH2A_(Mammalia) organism=Rattus norvegicus phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKSRSSRAGLQFPVGRVHRLLRKGNYSERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGRVTIAQGGVLPNIQAVLLPKK +TES-HHKAKGK- +>Sus|XP_003356618.1|cH2A_(Mammalia) organism=Sus scrofa phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGKVTIAQGGVLPNIQAVLLPKK +TES-HHKAKGK- +>Homo|NP_778235.1|cH2A.10_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKSRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAVRNDEELNKLLGGVTIAQGGVLPNIQAVLLPKK +TES-HKPGKNK- +>Homo|NP_254280.1|cH2A.11_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKSRSSRAGLQFPVGRVHRLLRKGNYSERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGRVTIAQGGVLPNIQAVLLPKK +TES-HHKAKGK- +>Homo|NP_734466.1|cH2A.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKQGGKARAKSKSRSSRAGLQFPVGRIHRLLRKGNYAERIGAGAPVYLAAVLEYLT +AEILELAGNASRDNKKTRIIPRHLQLAIRNDEELNKLLGGVTIAQGGVLPNIQAVLLPKK +TESHHHKAQSK- +>Homo|NP_003504.2|cH2A.2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYSERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGRVTIAQGGVLPNIQAVLLPKK +TES-HHKAKGK- +>Homo|NP_066390.1|cH2A.2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYSERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGRVTIAQGGVLPNIQAVLLPKK +TES-HHKAKGK- +>Homo|NP_003503.1|cH2A.3_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKSRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGRVTIAQGGVLPNIQAVLLPKK +TES-HHKAKGK- +>Homo|NP_066409.1|cH2A.4_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYSERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGKVTIAQGGVLPNIQAVLLPKK +TES-HHKAKGK- +>Homo|NP_003500.1|cH2A.5_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGKVTIAQGGVLPNIQAVLLPKK +TES-HHKAKGK- +>Homo|NP_003501.1|cH2A.5_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGKVTIAQGGVLPNIQAVLLPKK +TES-HHKAKGK- +>Homo|NP_003502.1|cH2A.5_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGKVTIAQGGVLPNIQAVLLPKK +TES-HHKAKGK- +>Homo|NP_003505.1|cH2A.5_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGKVTIAQGGVLPNIQAVLLPKK +TES-HHKAKGK- +>Homo|NP_066408.1|cH2A.5_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGKVTIAQGGVLPNIQAVLLPKK +TES-HHKAKGK- +>Homo|NP_542163.1|cH2A.6_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGKVTIAQGGVLPNIQAVLLPKK +TES-HHKAK--- +>Homo|NP_066544.1|cH2A.7_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGKVTIAQGGVLPNIQAVLLPKK +TES-HHKTK--- +>Homo|NP_001035807.1|cH2A.8_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKSRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYMAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGKVTIAQGGVLPNIQAVLLPKK +TES-HHKAKGK- +>Homo|NP_003507.1|cH2A.8_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKSRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYMAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGKVTIAQGGVLPNIQAVLLPKK +TES-HHKAKGK- +>Homo|NP_003508.1|cH2A.9_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKQGGKARAKAKSRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYMAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGKVTIAQGGVLPNIQAVLLPKK +TES--HKAKSK- +>Mus|NP_783589.1|cH2A.1_(Mus_musculus) organism=Mus musculus phylum=Chordata class=Mammalia +MSGPTKRGGKARAKVKSRSSRAGLQFPVGRVHRLLRQGNYAQRIGAGAPVYLAAVLEYLT +AEVLELAGNAARDNKKTRITPRHLQLAIRNDEELNKLLGRVTIAQGGVLPNIQAVLLPKK +TES--HKSQTK- diff --git a/CURATED_SET/draft_seeds/cH2A_(Vertebrata)_only.fasta b/CURATED_SET/draft_seeds/cH2A_(Vertebrata)_only.fasta new file mode 100644 index 0000000..ee2ced4 --- /dev/null +++ b/CURATED_SET/draft_seeds/cH2A_(Vertebrata)_only.fasta @@ -0,0 +1,12 @@ +>Danio|XP_009296490.1|cH2A_(Vertebrata) organism=Danio rerio phylum=Chordata class=Actinopteri +MSGRGKTGGKARAKAKTRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAVRNDEELNKLLGGVTIAQGGVLPNIQAVLLPKK +TE--KAAKGK +>Xenopus|NP_001087948.1|cH2A_(Vertebrata) organism=Xenopus laevis phylum=Chordata class=Amphibia +MSGRGKQGGKTRAKAKTRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAVRNDEELNKLLGRVTIAQGGVLPNIQSVLLPKK +TESSKSAKSK +>Gallus|NP_001072943.1|cH2A_(Vertebrata) organism=Gallus gallus phylum=Chordata class=Aves +MSGRGKQGGKARAKAKSRSSRAGLQFPVGRVHRLLRKGNYAERVGAGAPVYLAAVLEYLT +AEILELAGNAARDNKKTRIIPRHLQLAIRNDEELNKLLGKVTIAQGGVLPNIQAVLLPKK +TD-SHKAKAK diff --git a/CURATED_SET/draft_seeds/cH2A_only.fasta b/CURATED_SET/draft_seeds/cH2A_only.fasta new file mode 100644 index 0000000..e69de29 diff --git a/CURATED_SET/draft_seeds/cH2B.10_(Homo_sapiens).fasta b/CURATED_SET/draft_seeds/cH2B.10_(Homo_sapiens).fasta new file mode 100644 index 0000000..c8ba6a0 --- /dev/null +++ b/CURATED_SET/draft_seeds/cH2B.10_(Homo_sapiens).fasta @@ -0,0 +1,4 @@ +>Homo|NP_003511.1|cH2B.10_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPEPSKSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKAM +GIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVT +KYTSSK diff --git a/CURATED_SET/draft_seeds/cH2B.11_(Homo_sapiens).fasta b/CURATED_SET/draft_seeds/cH2B.11_(Homo_sapiens).fasta new file mode 100644 index 0000000..840f885 --- /dev/null +++ b/CURATED_SET/draft_seeds/cH2B.11_(Homo_sapiens).fasta @@ -0,0 +1,4 @@ +>Homo|NP_003518.2|cH2B.11_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPDPAKSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSIYVYKVLKQVHPDTGISSKAM +GIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVT +KYTSSK diff --git a/CURATED_SET/draft_seeds/cH2B.12_(Homo_sapiens).fasta b/CURATED_SET/draft_seeds/cH2B.12_(Homo_sapiens).fasta new file mode 100644 index 0000000..b8fd3fe --- /dev/null +++ b/CURATED_SET/draft_seeds/cH2B.12_(Homo_sapiens).fasta @@ -0,0 +1,8 @@ +>Homo|NP_001019770.1|cH2B.12_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPDPAKSAPAPKKGSKKAVTKVQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKAM +GIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVT +KYTSSK-------- +>Homo|NP_001154806.1|cH2B.12_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPDPAKSAPAPKKGSKKAVTKVQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKAM +GIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVT +KYTSSKLIGPILWK diff --git a/CURATED_SET/draft_seeds/cH2B.13_(Homo_sapiens).fasta b/CURATED_SET/draft_seeds/cH2B.13_(Homo_sapiens).fasta new file mode 100644 index 0000000..62730bd --- /dev/null +++ b/CURATED_SET/draft_seeds/cH2B.13_(Homo_sapiens).fasta @@ -0,0 +1,4 @@ +>Homo|NP_003519.1|cH2B.13_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPEPAKSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSIYVYKVLKQVHPDTGISSKAM +GIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVT +KYTSSK diff --git a/CURATED_SET/draft_seeds/cH2B.14_(Homo_sapiens).fasta b/CURATED_SET/draft_seeds/cH2B.14_(Homo_sapiens).fasta new file mode 100644 index 0000000..ac820c6 --- /dev/null +++ b/CURATED_SET/draft_seeds/cH2B.14_(Homo_sapiens).fasta @@ -0,0 +1,4 @@ +>Homo|NP_778225.1|cH2B.14_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPDPSKSAPAPKKGSKKAVTKAQKKDGKKRKRGRKESYSIYVYKVLKQVHPDTGISSKAM +GIMNSFVNDIFERIASEASRLAHYNKRSTITSREVQTAVRLLLPGELAKHAVSEGTKAVT +KYTSSK diff --git a/CURATED_SET/draft_seeds/cH2B.15_(Homo_sapiens).fasta b/CURATED_SET/draft_seeds/cH2B.15_(Homo_sapiens).fasta new file mode 100644 index 0000000..e69de29 diff --git a/CURATED_SET/draft_seeds/cH2B.1_(Homo_sapiens).fasta b/CURATED_SET/draft_seeds/cH2B.1_(Homo_sapiens).fasta new file mode 100644 index 0000000..165ca2a --- /dev/null +++ b/CURATED_SET/draft_seeds/cH2B.1_(Homo_sapiens).fasta @@ -0,0 +1,4 @@ +>Homo|NP_733759.1|cH2B.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPEVSSKGATISKKGFKKAVVKTQKKEGKKRKRTRKESYSIYIYKVLKQVHPDTGISSKA +MSIMNSFVTDIFERIASEASRLAHYSKRSTISSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK diff --git a/CURATED_SET/draft_seeds/cH2B.1_(Mus_musculus).fasta b/CURATED_SET/draft_seeds/cH2B.1_(Mus_musculus).fasta new file mode 100644 index 0000000..4e571cb --- /dev/null +++ b/CURATED_SET/draft_seeds/cH2B.1_(Mus_musculus).fasta @@ -0,0 +1,8 @@ +>Mus|CAA62299.1|cH2B.1_(Mus_musculus) organism=Mus musculus phylum=Chordata class=Mammalia +MPEVAVKGATISKKGFKKAVTKTQKKEGRKRKRCRKESYSIYIYKVLKQVHPDTGISSKA +MSIMNSFVTDIFERIASEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK +>Mus|NP_835502.1|cH2B.1_(Mus_musculus) organism=Mus musculus phylum=Chordata class=Mammalia +MPEPA-KSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIASEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK diff --git a/CURATED_SET/draft_seeds/cH2B.2_(Homo_sapiens).fasta b/CURATED_SET/draft_seeds/cH2B.2_(Homo_sapiens).fasta new file mode 100644 index 0000000..4ba051b --- /dev/null +++ b/CURATED_SET/draft_seeds/cH2B.2_(Homo_sapiens).fasta @@ -0,0 +1,4 @@ +>Homo|NP_066406.1|cH2B.2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPEPSKSAPAPKKGSKKAITKAQKKDGKKRKRSRKESYSIYVYKVLKQVHPDTGISSKAM +GIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVT +KYTSSK diff --git a/CURATED_SET/draft_seeds/cH2B.3_(Homo_sapiens).fasta b/CURATED_SET/draft_seeds/cH2B.3_(Homo_sapiens).fasta new file mode 100644 index 0000000..7424554 --- /dev/null +++ b/CURATED_SET/draft_seeds/cH2B.3_(Homo_sapiens).fasta @@ -0,0 +1,20 @@ +>Homo|NP_003509.1|cH2B.3_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPEPAKSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKAM +GIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVT +KYTSSK +>Homo|NP_003513.1|cH2B.3_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPEPAKSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKAM +GIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVT +KYTSSK +>Homo|NP_003514.2|cH2B.3_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPEPAKSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKAM +GIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVT +KYTSSK +>Homo|NP_003516.1|cH2B.3_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPEPAKSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKAM +GIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVT +KYTSSK +>Homo|NP_003517.2|cH2B.3_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPEPAKSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKAM +GIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVT +KYTSSK diff --git a/CURATED_SET/draft_seeds/cH2B.4_(Homo_sapiens).fasta b/CURATED_SET/draft_seeds/cH2B.4_(Homo_sapiens).fasta new file mode 100644 index 0000000..afb95e5 --- /dev/null +++ b/CURATED_SET/draft_seeds/cH2B.4_(Homo_sapiens).fasta @@ -0,0 +1,4 @@ +>Homo|NP_066407.1|cH2B.4_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPEPTKSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKAM +GIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVT +KYTSSK diff --git a/CURATED_SET/draft_seeds/cH2B.5_(Homo_sapiens).fasta b/CURATED_SET/draft_seeds/cH2B.5_(Homo_sapiens).fasta new file mode 100644 index 0000000..6c17988 --- /dev/null +++ b/CURATED_SET/draft_seeds/cH2B.5_(Homo_sapiens).fasta @@ -0,0 +1,4 @@ +>Homo|NP_003515.1|cH2B.5_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPDPAKSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKAM +GIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVT +KYTSSK diff --git a/CURATED_SET/draft_seeds/cH2B.6_(Homo_sapiens).fasta b/CURATED_SET/draft_seeds/cH2B.6_(Homo_sapiens).fasta new file mode 100644 index 0000000..4627a24 --- /dev/null +++ b/CURATED_SET/draft_seeds/cH2B.6_(Homo_sapiens).fasta @@ -0,0 +1,4 @@ +>Homo|NP_066402.2|cH2B.6_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPEPAKSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSIYVYKVLKQVHPDTGISSKAM +GIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVT +KYTSAK diff --git a/CURATED_SET/draft_seeds/cH2B.7_(Homo_sapiens).fasta b/CURATED_SET/draft_seeds/cH2B.7_(Homo_sapiens).fasta new file mode 100644 index 0000000..183e3bd --- /dev/null +++ b/CURATED_SET/draft_seeds/cH2B.7_(Homo_sapiens).fasta @@ -0,0 +1,4 @@ +>Homo|NP_001299582.1|cH2B.7_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPEPAKSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKAM +GIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVT +KYTSAK diff --git a/CURATED_SET/draft_seeds/cH2B.8_(Homo_sapiens).fasta b/CURATED_SET/draft_seeds/cH2B.8_(Homo_sapiens).fasta new file mode 100644 index 0000000..7fc002a --- /dev/null +++ b/CURATED_SET/draft_seeds/cH2B.8_(Homo_sapiens).fasta @@ -0,0 +1,4 @@ +>Homo|NP_003510.1|cH2B.8_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPELAKSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKAM +GIMNSFVNDIFERIASEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVT +KYTSSK diff --git a/CURATED_SET/draft_seeds/cH2B.9_(Homo_sapiens).fasta b/CURATED_SET/draft_seeds/cH2B.9_(Homo_sapiens).fasta new file mode 100644 index 0000000..16c1abb --- /dev/null +++ b/CURATED_SET/draft_seeds/cH2B.9_(Homo_sapiens).fasta @@ -0,0 +1,4 @@ +>Homo|NP_003512.1|cH2B.9_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPEPVKSAPVPKKGSKKAINKAQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKAM +GIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVT +KYTSSK diff --git a/CURATED_SET/draft_seeds/cH2B.E_(Mus_musculus).fasta b/CURATED_SET/draft_seeds/cH2B.E_(Mus_musculus).fasta new file mode 100644 index 0000000..daf743b --- /dev/null +++ b/CURATED_SET/draft_seeds/cH2B.E_(Mus_musculus).fasta @@ -0,0 +1,4 @@ +>Mus|Q64524.3|cH2B.E_(Mus_musculus) organism=Mus musculus phylum=Chordata class=Mammalia +MPELAKSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSIYVYKVLKQVHPDTGISSKAM +GIMNSFVNDIFERIANEASRLAHYNKRSTITSREIQTSVRLLLPGELAKHAVSEGTKAVT +KYTSAK diff --git a/CURATED_SET/draft_seeds/cH2B.fasta b/CURATED_SET/draft_seeds/cH2B.fasta new file mode 100644 index 0000000..ea2cab9 --- /dev/null +++ b/CURATED_SET/draft_seeds/cH2B.fasta @@ -0,0 +1,348 @@ +>Drosophila|NP_724342.1|cH2B_(Animals) organism=Drosophila melanogaster phylum=Arthropoda class=Insecta +-------MP-P-----------------------------KTSGKAAKKAGKAQ-KNITK +--TDKKK------KRKRKESYAIYIYKVLKQVHPDTGISSKAMSIMNSFVNDIFERIAAE +ASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK-------- +>Psammechinus|AAA30025.1|cH2B_(Animals) organism=Psammechinus miliaris phylum=Echinodermata class=Echinoidea +----M--APTA-----------------------------QVAKKGSKKAVKAP-R--PS +--GGKKR------NRKRKESYGIYIYKVLKQVHPDTGISSRAMIIMNSFVNDIFERIAGE +SSRLAQYNKKSTISSREIQTAVRLILPGELAKHAVSEGTKAVTKYTTSK-------- +>Strongylocentrotus|NP_999710.2|cH2B_(Animals) organism=Strongylocentrotus purpuratus phylum=Echinodermata class=Echinoidea +----M--APTA-----------------------------QVAKKGSKKAVKGT-KT-AX +--GGKKR------NRKRKESYGIYIYKVLKQVHPDTGISSRAMVIMNSXVBDIFERIAGE +SSRLAQYNKKXTXSSREIQTAVRLILPGELAKHAVSEGTKAVTKYTTSK-------- +>Caenorhabditis|NP_505464.1|cH2B_(Animals) organism=Caenorhabditis elegans phylum=Nematoda class=Chromadorea +----M--AP-P-----------------------------KPSAKGAKKAAKTV-SK-PK +--DGKKR------KHARKESYSVYIYRVLKQVHPDTGVSSKAMSIMNSFVNDVFERIASE +ASRLAHYNKRSTISSREIQTAVRLILPGELAKHAVSEGTKAVTKYTSSK-------- +>Xenopus|NP_001086753.1|cH2B_(Vertebrata) organism=Xenopus laevis phylum=Chordata class=Amphibia +MPE-P--AKSA-----------------------------PAPKKGSKKAVTKT-Q--KK +--DGKKR------RKSRKESYAIYVYKVLKQVHPDTGISSKAMSIMNSFVNDVFERIAGE +ASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYTSAK-------- +>Gallus|NP_001073188.1|cH2B_(Vertebrata) organism=Gallus gallus phylum=Chordata class=Aves +MPE-P--AKSA-----------------------------PAPKKGSKKAVTKT-Q--KK +--GDKKR------KKSRKESYSIYVYKVLKQVHPDTGISSKAMGIMNSFVNDIFERIAGE +ASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK-------- +>Ailuropoda|XP_011216221.1|cH2B_(Mammalia) organism=Ailuropoda melanoleuca phylum=Chordata class=Mammalia +MPD-P--AKSA-----------------------------PAPKKGSKKAVTKV-Q--KK +--DGKKR------KRSRKESYSVYVYKVLKQVHPDTGISSKAMGIMNSFVNDIFERIAGE +ASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK-------- +>Bos|NP_001032546.1|cH2B_(Mammalia) organism=Bos taurus phylum=Chordata class=Mammalia +MPE-P--AKSA-----------------------------PAPKKGSKKAVTKA-Q--KK +--DGKKR------KRSRKESYSVYVYKVLKQVHPDTGISSKAMGIMNSFVNDIFERIAGE +ASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK-------- +>Canis|XP_005640164.1|cH2B_(Mammalia) organism=Canis lupus familiaris phylum=Chordata class=Mammalia +MPELT--SKGT-----------------------------TISKKGFKRAVAKT-Q--KK +--EGKKR------RRCRKESYSIYIYKVLKQVHPDTGISSKAMGIMNSFVNDIFERIAGE +ASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK-------- +>Cavia|XP_013006892.1|cH2B_(Mammalia) organism=Cavia porcellus phylum=Chordata class=Mammalia +MPE-P--AKSA-----------------------------PAPKKGSKKAVTKA-Q--KK +--DGKKR------KRSRKESYSVYVYKVLKQVHPDTGISSKAMGIMNSFVNDIFERIAGE +ASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK-------- +>Cricetulus|XP_007634673.1|cH2B_(Mammalia) organism=Cricetulus griseus phylum=Chordata class=Mammalia +MPE-P--AKSA-----------------------------PAPKKGSKKAVTKA-Q--KK +--DGKKR------KRSRKESYSVYVYKVLKQVHPDTGISSKAMGIMNSFVNDIFERIAGE +ASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYTSAK-------- +>Equus|XP_005610255.1|cH2B_(Mammalia) organism=Equus caballus phylum=Chordata class=Mammalia +MPE-P--AKSA-----------------------------PAPKKGSKKAVTKA-Q--KK +--DGKKR------KRSRKESYSIYVYKVLKQVHPDTGISSKAMGIMNSFVNDIFERIAGE +ASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK-------- +>Heterocephalus|XP_012928647.1|cH2B_(Mammalia) organism=Heterocephalus glaber phylum=Chordata class=Mammalia +MPE-P--AKSA-----------------------------PAPKKGSKKAVTKA-Q--KK +--DGKKR------KRSRKESYSVYVYKVLKQVHPDTGISSKAMGIMNSFVNDIFERIAGE +ASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK-------- +>Loxodonta|XP_003422331.1|cH2B_(Mammalia) organism=Loxodonta africana phylum=Chordata class=Mammalia +MPE-P--AKSA-----------------------------PAPKKGSKKAVTKA-Q--KK +--DGKKR------KRSRKESYSIYVYKVLKQVHPDTGISSKAMGIMNSFVNDIFERIAGE +ASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYTSAK-------- +>Macaca|XP_011741102.1|cH2B_(Mammalia) organism=Macaca nemestrina phylum=Chordata class=Mammalia +MPE-P--AKSA-----------------------------PAPKKGSKKAVTKA-Q--KK +--DGKKR------KRSRKESYSVYVYKVLKQVHPDTGISSKAMGIMNSFVNDIFERIAGE +ASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK-------- +>Monodelphis|XP_007485418.1|cH2B_(Mammalia) organism=Monodelphis domestica phylum=Chordata class=Mammalia +MPE-P--GKSA-----------------------------PAPKKGSKKAVTKA-Q--KK +--DGKKR------KRSRKESYSVYVYKVLKQVHPDTGISSKAMGIMNSFVNDIFERIASE +ASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK-------- +>Mus|NP_783594.1|cH2B_(Mammalia) organism=Mus musculus phylum=Chordata class=Mammalia +MPEVA--VKGA-----------------------------TISKKGFKKAVTKT-Q--KK +--EGRKR------KRCRKESYSIYIYKVLKQVHPDTGISSKAMSIMNSFVTDIFERIASE +ASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK-------- +>Oryctolagus|XP_008246696.1|cH2B_(Mammalia) organism=Oryctolagus cuniculus phylum=Chordata class=Mammalia +MPE-P--AKSA-----------------------------PAPKKGSKKAVTKA-Q--KK +--DGKKR------KRSRKESYSVYVYKVLKQVHPDTGISSKAMGIMNSFVNDIFERIAGE +ASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK-------- +>Rattus|HISTDB_cH2B_1_0|cH2B_(Mammalia) organism=Rattus norvegicus phylum=Chordata class=Mammalia +MPEVS--AKGT-----------------------------TISKKGFKKAVTKT-Q--KK +--EGRKR------KRCREESYSIYIYKVLKQVHPDTGISSKAMSIMNSFVTDIFERIAGE +ASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK-------- +>Rattus|NP_072169.1|cH2B_(Mammalia) organism=Rattus norvegicus phylum=Chordata class=Mammalia +MPEVS--AKGT-----------------------------TISKKGFKKAVTKT-Q--KK +--EGRKR------KRCREESYSIYIYKVLKQVHPDTGISSKAMSIMNSFVTDIFERIAGE +ASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK-------- +>Rattus|NP_072173.2|cH2B_(Mammalia) organism=Rattus norvegicus phylum=Chordata class=Mammalia +MPE-P--AKSA-----------------------------PAPKKGSKKAVTKA-Q--KK +--DGKKR------KRSRKESYSVYVYKVLKQVHPDTGISSKAMGIMNSFVNDIFERIAGE +ASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK-------- +>Sus|XP_005665716.1|cH2B_(Mammalia) organism=Sus scrofa phylum=Chordata class=Mammalia +MPD-P--AKSA-----------------------------PAPKKGSKKAVTKA-Q--KK +--DGKKR------KRSRKESYSVYVYKVLKQVHPDTGISSKAMGIMNSFVNDIFERIAGE +ASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK-------- +>Homo|NP_003511.1|cH2B.10_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPE-P--SKSA-----------------------------PAPKKGSKKAVTKA-Q--KK +--DGKKR------KRSRKESYSVYVYKVLKQVHPDTGISSKAMGIMNSFVNDIFERIAGE +ASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK-------- +>Homo|NP_003518.2|cH2B.11_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPD-P--AKSA-----------------------------PAPKKGSKKAVTKA-Q--KK +--DGKKR------KRSRKESYSIYVYKVLKQVHPDTGISSKAMGIMNSFVNDIFERIAGE +ASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK-------- +>Homo|NP_001019770.1|cH2B.12_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPD-P--AKSA-----------------------------PAPKKGSKKAVTKV-Q--KK +--DGKKR------KRSRKESYSVYVYKVLKQVHPDTGISSKAMGIMNSFVNDIFERIAGE +ASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK-------- +>Homo|NP_001154806.1|cH2B.12_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPD-P--AKSA-----------------------------PAPKKGSKKAVTKV-Q--KK +--DGKKR------KRSRKESYSVYVYKVLKQVHPDTGISSKAMGIMNSFVNDIFERIAGE +ASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYTSSKLIGPILWK +>Homo|NP_003519.1|cH2B.13_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPE-P--AKSA-----------------------------PAPKKGSKKAVTKA-Q--KK +--DGKKR------KRSRKESYSIYVYKVLKQVHPDTGISSKAMGIMNSFVNDIFERIAGE +ASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK-------- +>Homo|NP_778225.1|cH2B.14_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPD-P--SKSA-----------------------------PAPKKGSKKAVTKA-Q--KK +--DGKKR------KRGRKESYSIYVYKVLKQVHPDTGISSKAMGIMNSFVNDIFERIASE +ASRLAHYNKRSTITSREVQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK-------- +>Homo|NP_733759.1|cH2B.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPEVS--SKGA-----------------------------TISKKGFKKAVVKT-Q--KK +--EGKKR------KRTRKESYSIYIYKVLKQVHPDTGISSKAMSIMNSFVTDIFERIASE +ASRLAHYSKRSTISSREIQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK-------- +>Homo|NP_066406.1|cH2B.2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPE-P--SKSA-----------------------------PAPKKGSKKAITKA-Q--KK +--DGKKR------KRSRKESYSIYVYKVLKQVHPDTGISSKAMGIMNSFVNDIFERIAGE +ASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK-------- +>Homo|NP_003509.1|cH2B.3_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPE-P--AKSA-----------------------------PAPKKGSKKAVTKA-Q--KK +--DGKKR------KRSRKESYSVYVYKVLKQVHPDTGISSKAMGIMNSFVNDIFERIAGE +ASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK-------- +>Homo|NP_003513.1|cH2B.3_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPE-P--AKSA-----------------------------PAPKKGSKKAVTKA-Q--KK +--DGKKR------KRSRKESYSVYVYKVLKQVHPDTGISSKAMGIMNSFVNDIFERIAGE +ASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK-------- +>Homo|NP_003514.2|cH2B.3_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPE-P--AKSA-----------------------------PAPKKGSKKAVTKA-Q--KK +--DGKKR------KRSRKESYSVYVYKVLKQVHPDTGISSKAMGIMNSFVNDIFERIAGE +ASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK-------- +>Homo|NP_003516.1|cH2B.3_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPE-P--AKSA-----------------------------PAPKKGSKKAVTKA-Q--KK +--DGKKR------KRSRKESYSVYVYKVLKQVHPDTGISSKAMGIMNSFVNDIFERIAGE +ASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK-------- +>Homo|NP_003517.2|cH2B.3_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPE-P--AKSA-----------------------------PAPKKGSKKAVTKA-Q--KK +--DGKKR------KRSRKESYSVYVYKVLKQVHPDTGISSKAMGIMNSFVNDIFERIAGE +ASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK-------- +>Homo|NP_066407.1|cH2B.4_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPE-P--TKSA-----------------------------PAPKKGSKKAVTKA-Q--KK +--DGKKR------KRSRKESYSVYVYKVLKQVHPDTGISSKAMGIMNSFVNDIFERIAGE +ASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK-------- +>Homo|NP_003515.1|cH2B.5_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPD-P--AKSA-----------------------------PAPKKGSKKAVTKA-Q--KK +--DGKKR------KRSRKESYSVYVYKVLKQVHPDTGISSKAMGIMNSFVNDIFERIAGE +ASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK-------- +>Homo|NP_066402.2|cH2B.6_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPE-P--AKSA-----------------------------PAPKKGSKKAVTKA-Q--KK +--DGKKR------KRSRKESYSIYVYKVLKQVHPDTGISSKAMGIMNSFVNDIFERIAGE +ASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYTSAK-------- +>Homo|NP_001299582.1|cH2B.7_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPE-P--AKSA-----------------------------PAPKKGSKKAVTKA-Q--KK +--DGKKR------KRSRKESYSVYVYKVLKQVHPDTGISSKAMGIMNSFVNDIFERIAGE +ASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYTSAK-------- +>Homo|NP_003510.1|cH2B.8_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPE-L--AKSA-----------------------------PAPKKGSKKAVTKA-Q--KK +--DGKKR------KRSRKESYSVYVYKVLKQVHPDTGISSKAMGIMNSFVNDIFERIASE +ASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK-------- +>Homo|NP_003512.1|cH2B.9_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPE-P--VKSA-----------------------------PVPKKGSKKAINKA-Q--KK +--DGKKR------KRSRKESYSVYVYKVLKQVHPDTGISSKAMGIMNSFVNDIFERIAGE +ASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK-------- +>Mus|NP_783595.1|cH2B_(Mus_musculus) organism=Mus musculus phylum=Chordata class=Mammalia +MPEPS---KSA-----------------------------PAPKKGSKKAISKA-Q--KK +--DGKKR------KRSRKESYSVYVYKVLKQVHPDTGISSKAMGIMNSFVNDIFERIASE +ASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK-------- +>Mus|CAA62299.1|cH2B.1_(Mus_musculus) organism=Mus musculus phylum=Chordata class=Mammalia +MPEVA--VKGA-----------------------------TISKKGFKKAVTKT-Q--KK +--EGRKR------KRCRKESYSIYIYKVLKQVHPDTGISSKAMSIMNSFVTDIFERIASE +ASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK-------- +>Mus|NP_835502.1|cH2B.1_(Mus_musculus) organism=Mus musculus phylum=Chordata class=Mammalia +MPEPA---KSA-----------------------------PAPKKGSKKAVTKA-Q--KK +--DGKKR------KRSRKESYSVYVYKVLKQVHPDTGISSKAMGIMNSFVNDIFERIASE +ASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVTKYTSSK-------- +>Mus|Q64524.3|cH2B.E_(Mus_musculus) organism=Mus musculus phylum=Chordata class=Mammalia +MPELA---KSA-----------------------------PAPKKGSKKAVTKA-Q--KK +--DGKKR------KRSRKESYSIYVYKVLKQVHPDTGISSKAMGIMNSFVNDIFERIANE +ASRLAHYNKRSTITSREIQTSVRLLLPGELAKHAVSEGTKAVTKYTSAK-------- +>Saccharomyces|NP_009553.1|cH2B_(Fungi) organism=Saccharomyces cerevisiae S288C phylum=Ascomycota class=Saccharomycetes +MSSAA--EKKP-----------------------------ASKAPAEKKPAAKK-TSTSV +--DGKKR------SKVRKETYSSYIYKVLKQTHPDTGISQKSMSILNSFVNDIFERIATE +ASKLAAYNKKSTISAREIQTAVRLILPGELAKHAVSEGTRAVTKYSSSTQA------ +>Saccharomyces|NP_010510.3|cH2B_(Fungi) organism=Saccharomyces cerevisiae S288C phylum=Ascomycota class=Saccharomycetes +MSAKA--EKKP-----------------------------ASKAPAEKKPAAKK-TSTST +--DGKKR------SKARKETYSSYIYKVLKQTHPDTGISQKSMSILNSFVNDIFERIATE +ASKLAAYNKKSTISAREIQTAVRLILPGELAKHAVSEGTRAVTKYSSSTQA------ +>Chlamydomonas|XP_001690668.1|cH2B_(Chlorophyta) organism=Chlamydomonas reinhardtii phylum=Chlorophyta class=Chlorophyceae +MAPKK--DEKP----VTAEAGAEAPAKAEAKPKAEKAA-KKAKKEPSKKAAKEP-KGDGE +--KKDKK--KKK---SAVETYKLYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKVATE +ASKLSRYNKKPTVTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSG--------- +>Chlamydomonas|XP_001691007.1|cH2B_(Chlorophyta) organism=Chlamydomonas reinhardtii phylum=Chlorophyta class=Chlorophyceae +MAPKK--DEKP----ATQEAAAEAPAKAEAKPKAEKAA-KKAKKEPSKKAAKEP-KGDGE +--KKDKK--KKK---SAVETYKLYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKVATE +ASKLSRYNKKPTVTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSG--------- +>Chlamydomonas|XP_001691162.1|cH2B_(Chlorophyta) organism=Chlamydomonas reinhardtii phylum=Chlorophyta class=Chlorophyceae +MAPKK--DEKP----ATAEAGAEAPAKAEAKPKAEKAA-KKAKKEPSKKAAKEP-KGDGE +--KKDKK--KKK---SAVETYKLYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKVATE +ASKLSRYNKKPTVTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSG--------- +>Chlamydomonas|XP_001691541.1|cH2B_(Chlorophyta) organism=Chlamydomonas reinhardtii phylum=Chlorophyta class=Chlorophyceae +MAPKK--DEKP----ATAEAGAEAPAKAEAKPKAEKAG-KKAKKEPAKKAAKEP-KGDGE +--KKDKK--KKK---SAVETYKLYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKVATE +ASKLSRYNKKPTVTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSG--------- +>Chlamydomonas|XP_001691693.1|cH2B_(Chlorophyta) organism=Chlamydomonas reinhardtii phylum=Chlorophyta class=Chlorophyceae +MAPKK--DEKS----ATQEAGAEAPAKAEAKPKAEKAA-KKAKKEPSKKAAKEP-KGDGE +--KKDKK--KKK---SAVETYKLYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKVATE +ASKLSRYNKKPTVTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSG--------- +>Chlamydomonas|XP_001692948.1|cH2B_(Chlorophyta) organism=Chlamydomonas reinhardtii phylum=Chlorophyta class=Chlorophyceae +MAPKK--DEKK---DAAAPEAAEPKAEKESKPKADKAA-KKAKKSPAKKAAKE--GGDGE +--KGDKKKGKKK---SSVETYKLYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKVATE +ASKLSRYNKKPTVTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSG--------- +>Chlamydomonas|XP_001693071.1|cH2B_(Chlorophyta) organism=Chlamydomonas reinhardtii phylum=Chlorophyta class=Chlorophyceae +MAPKK--DEKK---DAAAPEAAEPKAEKESKPKADKAA-KKAKKAPAKKAAKE--GGDGE +--KGDKKKGKKK---SSVETYKLYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKVATE +ASKLSRYNKKPTVTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSG--------- +>Chlamydomonas|XP_001693722.1|cH2B_(Chlorophyta) organism=Chlamydomonas reinhardtii phylum=Chlorophyta class=Chlorophyceae +MAPKK--DEKPATRAATQEAGAEATAKAEAKPKAEKAA-KKAKKEPAKKAAKEP-KGDGE +--KKDKK--KK----SAVETYKLYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKVATE +ASKLSRYNKKPTVTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSG--------- +>Chlamydomonas|XP_001696245.1|cH2B_(Chlorophyta) organism=Chlamydomonas reinhardtii phylum=Chlorophyta class=Chlorophyceae +MAPKK--DEKP----ATAEAGAEAPAKAEAKPKAEKAA-KKAKKEPSKKTAKEP-KGDGE +--KKDKK--KKK---SAVETYKLYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKVATE +ASKLSRYNKKPTVTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSG--------- +>Chlamydomonas|XP_001696283.1|cH2B_(Chlorophyta) organism=Chlamydomonas reinhardtii phylum=Chlorophyta class=Chlorophyceae +MAPKK--DEKP----ATAEAGAEAPAKADAKPKAEKAA-KKAKKEPSKKAAKEP-KGDGE +--KKDKK--KKK---SAVETYKLYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKVATE +ASKLSRYNKKPTVTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSG--------- +>Chlamydomonas|XP_001696556.1|cH2B_(Chlorophyta) organism=Chlamydomonas reinhardtii phylum=Chlorophyta class=Chlorophyceae +MAPKK--DEKP----ATAEAGAEAPAKAEAKPKAEKAA-KKAKKEPSKKAAKEP-KGEGE +--KKDKK--KKK---SAVETYKLYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKVATE +ASKLSRYNKKPTVTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSG--------- +>Chlamydomonas|XP_001700194.1|cH2B_(Chlorophyta) organism=Chlamydomonas reinhardtii phylum=Chlorophyta class=Chlorophyceae +MAPKA--AEKA----------------------------------PAKKTPAKT-AEGSK +--KKKKL--------NKAETYKVYIYKVLKQVHPDTGISSKAMSIMNSFINDIFDKMANE +AVRLAQYNKKPTLTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTST--------- +>Chlamydomonas|XP_001700403.1|cH2B_(Chlorophyta) organism=Chlamydomonas reinhardtii phylum=Chlorophyta class=Chlorophyceae +MAPKK--DEKP----ATQEAGAEAPAKAEAKPKAEKAA-KKAKKEPSKKAAKEP-KGDGA +--KKDKK--KKK---SAVETYKLYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKVATE +ASKLSRYNKKPTVTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSG--------- +>Chlamydomonas|XP_001700461.1|cH2B_(Chlorophyta) organism=Chlamydomonas reinhardtii phylum=Chlorophyta class=Chlorophyceae +MAPKK--DEKP----ATQEAGAEAPAKAEAKPKAEKAA-KKAKKEPSKKAAKEP-KGDGE +--KKDKK--KKK---SAVETYKLYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKVATE +ASKLSRYNKKPTVTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSG--------- +>Chlamydomonas|XP_001702223.2|cH2B_(Chlorophyta) organism=Chlamydomonas reinhardtii phylum=Chlorophyta class=Chlorophyceae +MAPKK--DEKP----ATQEAAAEAPAKAEAKPKAEKAA-KKA-KEPSKKAAKEP-KGDGE +--KKDKK--KKK---SAVETYKLYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKVATE +ASKLSRYNKKPTVTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSG--------- +>Chlamydomonas|XP_042914553.1|cH2B_(Chlorophyta) organism=Chlamydomonas reinhardtii phylum=Chlorophyta class=Chlorophyceae +MAPKK--DEKP----PTQEAGAEAPAKAEAKPKAEKAA-KKAKKEPSKKAAKEP-KGDGE +--KKDKK--KKK---SAVETYKLYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKVATE +ASKLSRYNKKPTVTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSG--------- +>Chlamydomonas|XP_042914596.1|cH2B_(Chlorophyta) organism=Chlamydomonas reinhardtii phylum=Chlorophyta class=Chlorophyceae +MAPKR--DEKP----ATQEAGAEAPAKAEAKPKAEKAA-KKAKKEPSKKAAKEP-KGDGA +--KKDKK--KKK---SAVETYKLYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKVATE +ASKLSRYNKKPTVTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSG--------- +>Chlamydomonas|XP_042923653.1|cH2B_(Chlorophyta) organism=Chlamydomonas reinhardtii phylum=Chlorophyta class=Chlorophyceae +MAPKK--DEKP--------------------PKAEKAA-KKAKKEPSKKAAKEP-KGDGE +--KKDKK--KKS---SAVETYKLYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKVATE +ASKLSRYNKKPTVTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSG--------- +>Volvox|XP_002946193.1|cH2B_(Chlorophyta) organism=Volvox carteri f. nagariensis phylum=Chlorophyta class=Chlorophyceae +MAPKNVKEEKA-----EEKAEAGAAAKAEAKAKAAKPA-KKEKKAPAKKAAKEP-SAGGE +--GEGDKKAKKKAKVAKSETYKLYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKVATE +ASKLSRYNKKPTVTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSS--------- +>Volvox|XP_002946194.1|cH2B_(Chlorophyta) organism=Volvox carteri f. nagariensis phylum=Chlorophyta class=Chlorophyceae +MAPKNVKEEKV---EEKAEAGAAAKAKAEAKAKAAKPA-KKEKKAPAKKAAKEP-SAGGE +--GEGDKKAKKKAKVAKSETYKLYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKVATE +ASKLSRYNKKPTVTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSS--------- +>Volvox|XP_002946213.1|cH2B_(Chlorophyta) organism=Volvox carteri f. nagariensis phylum=Chlorophyta class=Chlorophyceae +MAPKG--KKADAAGAEAKPDPSELKAEPVSEGKADAKP-KKEKKAAGKKAAKD--SGAGE +--EVEKK-GKKKAKVAKSETYKLYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKVATE +ASRLSRYNKKPTVTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTAS--------- +>Volvox|XP_002947412.1|cH2B_(Chlorophyta) organism=Volvox carteri f. nagariensis phylum=Chlorophyta class=Chlorophyceae +MAPKK--EEKP-----ASQAAEAPEVKAEAKPKAVKAPKKKEKKAPAKKVAKEP-SAGGE +--DGDKK-SKKKAKVAKSETYKLYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKVATE +ASKLSRYNKKPTVTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSA--------- +>Volvox|XP_002947842.1|cH2B_(Chlorophyta) organism=Volvox carteri f. nagariensis phylum=Chlorophyta class=Chlorophyceae +MAPKK--DEKT------ASQPADAEPKAEAKPKETKV--KKEKKAPAKKAAKEP-AAGGE +--EGDKK-AKKKTKVSKSETYKLYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKVATE +ASRLSRYNKKPTVTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSA--------- +>Volvox|XP_002948133.1|cH2B_(Chlorophyta) organism=Volvox carteri f. nagariensis phylum=Chlorophyta class=Chlorophyceae +-MAKK--DEKT-----ASQPADAAEPKADAKPKAAKV--KKEKKAPAKKTAKEP-VAGGE +--EGDKK-AKKKAKVAKSETYKLYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKVATE +SSRLSRYNKKPTVTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSA--------- +>Volvox|XP_002948276.1|cH2B_(Chlorophyta) organism=Volvox carteri f. nagariensis phylum=Chlorophyta class=Chlorophyceae +MAPKK--DEKT-----ASQPADAAEPKADAKPKAAKV--KKEKKAPAKKAAKEP-VAGSE +--EGEKK-AKKKAKVAKSETYKLYIYKVLKQVHPDTGISSKAMSVMNSFINDIFEKVATE +ASRLSRYNKKPTVTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSA--------- +>Volvox|XP_002948472.1|cH2B_(Chlorophyta) organism=Volvox carteri f. nagariensis phylum=Chlorophyta class=Chlorophyceae +MAPKA--AEKA----------------------------------PAKKTPAKT-AEGSK +--KKKKI--------NKAETYKVYIYKVLKQVHPDTGISSKAMSIMNSFINDIFDKMANE +AVRLAQYNKKPTLTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSS--------- +>Volvox|XP_002951705.1|cH2B_(Chlorophyta) organism=Volvox carteri f. nagariensis phylum=Chlorophyta class=Chlorophyceae +MAPKK--DEKA-----AAPAADAPEVKVEAKPKKARVP-KKEKKAPAKKVAKEP-AAGGE +--EGDKK-AKKKAKVAKSETYKLYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKVATE +ASKLSRYNKKPTVTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSS--------- +>Volvox|XP_002955481.1|cH2B_(Chlorophyta) organism=Volvox carteri f. nagariensis phylum=Chlorophyta class=Chlorophyceae +MAPKNVKEEKQ-------EKAEAVEPKAEAKPKKEKAP-KKEKKAPAKKAAKEP-SAAGD +--DGDKK-AKKKAKVSKSETYKLYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKVATE +ASKLSRYNKKPTVTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSA--------- +>Volvox|XP_002956800.1|cH2B_(Chlorophyta) organism=Volvox carteri f. nagariensis phylum=Chlorophyta class=Chlorophyceae +MAPKTK-EEKP--------ASEAVEPKAEAKPKAEKAPKKKEKKAPAKKSAKEPAAGDAA +--EGDKK--KKKAKVAKSETYKLYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKVATE +ASKLSRYNKKPTVTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSA--------- +>Micromonas|XP_002501781.1|cH2B_(Chlorophyta) organism=Micromonas commoda phylum=Chlorophyta class=Mamiellophyceae +-------MAKP----------------------------------TSKKPAKKT-VAKGG +--SKAKK--------SKTETYKIYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKIATE +ASKLARYNKKPTVTSREIQTAVRLILPGELAKHAVSEGTKAVTKFTSN--------- +>Micromonas|XP_002503973.1|cH2B_(Chlorophyta) organism=Micromonas commoda phylum=Chlorophyta class=Mamiellophyceae +-------MAKP----------------------------------TSKKPAKKT-VKGAG +--GKAKK--------SKTETYKIYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKIATE +ASKLARYNKKPTVTSREIQTAVRLILPGELAKHAVSEGTKAVTKFTSN--------- +>Micromonas|XP_003055512.1|cH2B_(Chlorophyta) organism=Micromonas pusilla CCMP1545 phylum=Chlorophyta class=Mamiellophyceae +-------MAKP----------------------------------TSKKPAKKS-LKGGK +--KGGKK--------SKTETYKIYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKIATE +ASKLARYNKKPTVTSREIQTAVRLILPGELAKHAVSEGTKAVTKFTSA--------- +>Ostreococcus|XP_001419128.1|cH2B_(Chlorophyta) organism=Ostreococcus lucimarinus CCE9901 phylum=Chlorophyta class=Mamiellophyceae +---------------------------------------------MAKKPAQKK-PSGAK +--KVGRK--------SKSETYKIYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKIATE +AAKLARYNKKPTVTSREIQTAVRLILPGELAKHAVSEGTKAVTKFTSA--------- +>Coccomyxa|XP_005643326.1|cH2B_(Chlorophyta) organism=Coccomyxa subellipsoidea C-169 phylum=Chlorophyta class=Trebouxiophyceae +MAPKG-----------------------------------SQGREEARQKGHQD-RQDWC +--KKKTK--------AKVESFKIYMCEVLKQAHPYTGISSWAISILNAFVTDTFGKMATE +TAQLARYNKKPTVASGKIQTALRLILPGKLAKHTVSEGSKAVTESTSAAITP----- +>Coccomyxa|XP_005643701.1|cH2B_(Chlorophyta) organism=Coccomyxa subellipsoidea C-169 phylum=Chlorophyta class=Trebouxiophyceae +MAPKA--E-----------------------------------KKPAKKVAKTA-KTGGK +--RKSK---------AKVESFKIYIYKVLKQVHPDTGISSRAISILNSFITDIFEKIATE +TAQLARYNKKPTVTSREIQTAVRLILPGELAKHAVSEGTKAVTKFTSA--------- +>Arabidopsis|Q9FFC0.3|cH2B_(Embryophyta) organism=Arabidopsis thaliana phylum=Streptophyta class=Magnoliopsida +-MAKA--DKKP------AEKK-----PAEKTPAAEPAAAAEKKPKAGKKLPKEP-AGAGD +--KKKKR------SKKNVETYKIYIFKVLKQVHPDIGISSKAMGIMNSFINDIFEKLAGE +SSKLARYNKKPTITSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSS--------- +>Arabidopsis|Q9LQQ4.3|cH2B_(Embryophyta) organism=Arabidopsis thaliana phylum=Streptophyta class=Magnoliopsida +MAPRA--EKKP------AEKKTAAERPVEENKAAEK-APAEKKPKAGKKLPPK---EAGD +--KKKKR------SKKNVETYKIYIFKVLKQVHPDIGISSKAMGIMNSFINDIFEKLAQE +SSKLARYNKKPTITSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSS--------- +>Arabidopsis|Q9LZ45.3|cH2B_(Embryophyta) organism=Arabidopsis thaliana phylum=Streptophyta class=Magnoliopsida +MAPKA--EKKP------AEKA-----PA---------------PKAEKKIAKE--GGTSE +IVKKKKK------TKKSTETYKIYIFKVLKQVHPDIGISGKAMGIMNSFINDIFEKLAQE +SSRLARYNKKPTITSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSS--------- +>Arabidopsis|Q9LZT0.3|cH2B_(Embryophyta) organism=Arabidopsis thaliana phylum=Streptophyta class=Magnoliopsida +MAPKA--EKKP------AEKK-----PVEEKSKAEK-APAEKKPKAGKKLPKEA-GAGGD +--KKKKM------KKKSVETYKIYIFKVLKQVHPDIGISSKAMGIMNSFINDIFEKLASE +SSKLARYNKKPTITSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSS--------- +>Trypanosoma|XP_823266.1|cH2B_(Protists) organism=Trypanosoma brucei brucei TREU927 phylum=Euglenozoa class=Kinetoplastea +-------MATP-----------------------------KSTPAKTRKEAKKT------ +-------------RRQRKRTWNVYVSRSLRSINSQMSMTSRTMKIVNSFVNDLFERIAAE +AATIVRVNRKRTLGARELQTAVRLVLPADLAKHAMAEGTKAVSHASS---------- diff --git a/CURATED_SET/draft_seeds/cH2B_(Animals).fasta b/CURATED_SET/draft_seeds/cH2B_(Animals).fasta new file mode 100644 index 0000000..2a5070d --- /dev/null +++ b/CURATED_SET/draft_seeds/cH2B_(Animals).fasta @@ -0,0 +1,180 @@ +>Drosophila|NP_724342.1|cH2B_(Animals) organism=Drosophila melanogaster phylum=Arthropoda class=Insecta +-----MP-PKTSGKAAKKAGKAQKNITKTDKKKKRKRKESYAIYIYKVLKQVHPDTGISS +KAMSIMNSFVNDIFERIAAEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTK +AVTKYTSSK-------- +>Psammechinus|AAA30025.1|cH2B_(Animals) organism=Psammechinus miliaris phylum=Echinodermata class=Echinoidea +----MAPTAQVAKKGSKKAVKAPR--PSGGKKRNRKRKESYGIYIYKVLKQVHPDTGISS +RAMIIMNSFVNDIFERIAGESSRLAQYNKKSTISSREIQTAVRLILPGELAKHAVSEGTK +AVTKYTTSK-------- +>Strongylocentrotus|NP_999710.2|cH2B_(Animals) organism=Strongylocentrotus purpuratus phylum=Echinodermata class=Echinoidea +----MAPTAQVAKKGSKKAVKGTKT-AXGGKKRNRKRKESYGIYIYKVLKQVHPDTGISS +RAMVIMNSXVBDIFERIAGESSRLAQYNKKXTXSSREIQTAVRLILPGELAKHAVSEGTK +AVTKYTTSK-------- +>Caenorhabditis|NP_505464.1|cH2B_(Animals) organism=Caenorhabditis elegans phylum=Nematoda class=Chromadorea +----MAP-PKPSAKGAKKAAKTVSK-PKDGKKRKHARKESYSVYIYRVLKQVHPDTGVSS +KAMSIMNSFVNDVFERIASEASRLAHYNKRSTISSREIQTAVRLILPGELAKHAVSEGTK +AVTKYTSSK-------- +>Xenopus|NP_001086753.1|cH2B_(Vertebrata) organism=Xenopus laevis phylum=Chordata class=Amphibia +MPE-PAKSAPAPKKGSKKAVTKTQ--KKDGKKRRKSRKESYAIYVYKVLKQVHPDTGISS +KAMSIMNSFVNDVFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTK +AVTKYTSAK-------- +>Gallus|NP_001073188.1|cH2B_(Vertebrata) organism=Gallus gallus phylum=Chordata class=Aves +MPE-PAKSAPAPKKGSKKAVTKTQ--KKGDKKRKKSRKESYSIYVYKVLKQVHPDTGISS +KAMGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTK +AVTKYTSSK-------- +>Ailuropoda|XP_011216221.1|cH2B_(Mammalia) organism=Ailuropoda melanoleuca phylum=Chordata class=Mammalia +MPD-PAKSAPAPKKGSKKAVTKVQ--KKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISS +KAMGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTK +AVTKYTSSK-------- +>Bos|NP_001032546.1|cH2B_(Mammalia) organism=Bos taurus phylum=Chordata class=Mammalia +MPE-PAKSAPAPKKGSKKAVTKAQ--KKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISS +KAMGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTK +AVTKYTSSK-------- +>Canis|XP_005640164.1|cH2B_(Mammalia) organism=Canis lupus familiaris phylum=Chordata class=Mammalia +MPELTSKGTTISKKGFKRAVAKTQ--KKEGKKRRRCRKESYSIYIYKVLKQVHPDTGISS +KAMGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTK +AVTKYTSSK-------- +>Cavia|XP_013006892.1|cH2B_(Mammalia) organism=Cavia porcellus phylum=Chordata class=Mammalia +MPE-PAKSAPAPKKGSKKAVTKAQ--KKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISS +KAMGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTK +AVTKYTSSK-------- +>Cricetulus|XP_007634673.1|cH2B_(Mammalia) organism=Cricetulus griseus phylum=Chordata class=Mammalia +MPE-PAKSAPAPKKGSKKAVTKAQ--KKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISS +KAMGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTK +AVTKYTSAK-------- +>Equus|XP_005610255.1|cH2B_(Mammalia) organism=Equus caballus phylum=Chordata class=Mammalia +MPE-PAKSAPAPKKGSKKAVTKAQ--KKDGKKRKRSRKESYSIYVYKVLKQVHPDTGISS +KAMGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTK +AVTKYTSSK-------- +>Heterocephalus|XP_012928647.1|cH2B_(Mammalia) organism=Heterocephalus glaber phylum=Chordata class=Mammalia +MPE-PAKSAPAPKKGSKKAVTKAQ--KKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISS +KAMGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTK +AVTKYTSSK-------- +>Loxodonta|XP_003422331.1|cH2B_(Mammalia) organism=Loxodonta africana phylum=Chordata class=Mammalia +MPE-PAKSAPAPKKGSKKAVTKAQ--KKDGKKRKRSRKESYSIYVYKVLKQVHPDTGISS +KAMGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTK +AVTKYTSAK-------- +>Macaca|XP_011741102.1|cH2B_(Mammalia) organism=Macaca nemestrina phylum=Chordata class=Mammalia +MPE-PAKSAPAPKKGSKKAVTKAQ--KKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISS +KAMGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTK +AVTKYTSSK-------- +>Monodelphis|XP_007485418.1|cH2B_(Mammalia) organism=Monodelphis domestica phylum=Chordata class=Mammalia +MPE-PGKSAPAPKKGSKKAVTKAQ--KKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISS +KAMGIMNSFVNDIFERIASEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTK +AVTKYTSSK-------- +>Mus|NP_783594.1|cH2B_(Mammalia) organism=Mus musculus phylum=Chordata class=Mammalia +MPEVAVKGATISKKGFKKAVTKTQ--KKEGRKRKRCRKESYSIYIYKVLKQVHPDTGISS +KAMSIMNSFVTDIFERIASEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTK +AVTKYTSSK-------- +>Oryctolagus|XP_008246696.1|cH2B_(Mammalia) organism=Oryctolagus cuniculus phylum=Chordata class=Mammalia +MPE-PAKSAPAPKKGSKKAVTKAQ--KKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISS +KAMGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTK +AVTKYTSSK-------- +>Rattus|HISTDB_cH2B_1_0|cH2B_(Mammalia) organism=Rattus norvegicus phylum=Chordata class=Mammalia +MPEVSAKGTTISKKGFKKAVTKTQ--KKEGRKRKRCREESYSIYIYKVLKQVHPDTGISS +KAMSIMNSFVTDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTK +AVTKYTSSK-------- +>Rattus|NP_072169.1|cH2B_(Mammalia) organism=Rattus norvegicus phylum=Chordata class=Mammalia +MPEVSAKGTTISKKGFKKAVTKTQ--KKEGRKRKRCREESYSIYIYKVLKQVHPDTGISS +KAMSIMNSFVTDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTK +AVTKYTSSK-------- +>Rattus|NP_072173.2|cH2B_(Mammalia) organism=Rattus norvegicus phylum=Chordata class=Mammalia +MPE-PAKSAPAPKKGSKKAVTKAQ--KKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISS +KAMGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTK +AVTKYTSSK-------- +>Sus|XP_005665716.1|cH2B_(Mammalia) organism=Sus scrofa phylum=Chordata class=Mammalia +MPD-PAKSAPAPKKGSKKAVTKAQ--KKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISS +KAMGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTK +AVTKYTSSK-------- +>Homo|NP_003511.1|cH2B.10_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPE-PSKSAPAPKKGSKKAVTKAQ--KKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISS +KAMGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTK +AVTKYTSSK-------- +>Homo|NP_003518.2|cH2B.11_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPD-PAKSAPAPKKGSKKAVTKAQ--KKDGKKRKRSRKESYSIYVYKVLKQVHPDTGISS +KAMGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTK +AVTKYTSSK-------- +>Homo|NP_001019770.1|cH2B.12_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPD-PAKSAPAPKKGSKKAVTKVQ--KKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISS +KAMGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTK +AVTKYTSSK-------- +>Homo|NP_001154806.1|cH2B.12_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPD-PAKSAPAPKKGSKKAVTKVQ--KKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISS +KAMGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTK +AVTKYTSSKLIGPILWK +>Homo|NP_003519.1|cH2B.13_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPE-PAKSAPAPKKGSKKAVTKAQ--KKDGKKRKRSRKESYSIYVYKVLKQVHPDTGISS +KAMGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTK +AVTKYTSSK-------- +>Homo|NP_778225.1|cH2B.14_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPD-PSKSAPAPKKGSKKAVTKAQ--KKDGKKRKRGRKESYSIYVYKVLKQVHPDTGISS +KAMGIMNSFVNDIFERIASEASRLAHYNKRSTITSREVQTAVRLLLPGELAKHAVSEGTK +AVTKYTSSK-------- +>Homo|NP_733759.1|cH2B.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPEVSSKGATISKKGFKKAVVKTQ--KKEGKKRKRTRKESYSIYIYKVLKQVHPDTGISS +KAMSIMNSFVTDIFERIASEASRLAHYSKRSTISSREIQTAVRLLLPGELAKHAVSEGTK +AVTKYTSSK-------- +>Homo|NP_066406.1|cH2B.2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPE-PSKSAPAPKKGSKKAITKAQ--KKDGKKRKRSRKESYSIYVYKVLKQVHPDTGISS +KAMGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTK +AVTKYTSSK-------- +>Homo|NP_003509.1|cH2B.3_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPE-PAKSAPAPKKGSKKAVTKAQ--KKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISS +KAMGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTK +AVTKYTSSK-------- +>Homo|NP_003513.1|cH2B.3_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPE-PAKSAPAPKKGSKKAVTKAQ--KKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISS +KAMGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTK +AVTKYTSSK-------- +>Homo|NP_003514.2|cH2B.3_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPE-PAKSAPAPKKGSKKAVTKAQ--KKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISS +KAMGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTK +AVTKYTSSK-------- +>Homo|NP_003516.1|cH2B.3_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPE-PAKSAPAPKKGSKKAVTKAQ--KKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISS +KAMGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTK +AVTKYTSSK-------- +>Homo|NP_003517.2|cH2B.3_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPE-PAKSAPAPKKGSKKAVTKAQ--KKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISS +KAMGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTK +AVTKYTSSK-------- +>Homo|NP_066407.1|cH2B.4_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPE-PTKSAPAPKKGSKKAVTKAQ--KKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISS +KAMGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTK +AVTKYTSSK-------- +>Homo|NP_003515.1|cH2B.5_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPD-PAKSAPAPKKGSKKAVTKAQ--KKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISS +KAMGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTK +AVTKYTSSK-------- +>Homo|NP_066402.2|cH2B.6_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPE-PAKSAPAPKKGSKKAVTKAQ--KKDGKKRKRSRKESYSIYVYKVLKQVHPDTGISS +KAMGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTK +AVTKYTSAK-------- +>Homo|NP_001299582.1|cH2B.7_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPE-PAKSAPAPKKGSKKAVTKAQ--KKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISS +KAMGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTK +AVTKYTSAK-------- +>Homo|NP_003510.1|cH2B.8_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPE-LAKSAPAPKKGSKKAVTKAQ--KKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISS +KAMGIMNSFVNDIFERIASEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTK +AVTKYTSSK-------- +>Homo|NP_003512.1|cH2B.9_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPE-PVKSAPVPKKGSKKAINKAQ--KKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISS +KAMGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTK +AVTKYTSSK-------- +>Mus|NP_783595.1|cH2B_(Mus_musculus) organism=Mus musculus phylum=Chordata class=Mammalia +MPEPS-KSAPAPKKGSKKAISKAQ--KKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISS +KAMGIMNSFVNDIFERIASEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTK +AVTKYTSSK-------- +>Mus|CAA62299.1|cH2B.1_(Mus_musculus) organism=Mus musculus phylum=Chordata class=Mammalia +MPEVAVKGATISKKGFKKAVTKTQ--KKEGRKRKRCRKESYSIYIYKVLKQVHPDTGISS +KAMSIMNSFVTDIFERIASEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTK +AVTKYTSSK-------- +>Mus|NP_835502.1|cH2B.1_(Mus_musculus) organism=Mus musculus phylum=Chordata class=Mammalia +MPEPA-KSAPAPKKGSKKAVTKAQ--KKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISS +KAMGIMNSFVNDIFERIASEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTK +AVTKYTSSK-------- +>Mus|Q64524.3|cH2B.E_(Mus_musculus) organism=Mus musculus phylum=Chordata class=Mammalia +MPELA-KSAPAPKKGSKKAVTKAQ--KKDGKKRKRSRKESYSIYVYKVLKQVHPDTGISS +KAMGIMNSFVNDIFERIANEASRLAHYNKRSTITSREIQTSVRLLLPGELAKHAVSEGTK +AVTKYTSAK-------- diff --git a/CURATED_SET/draft_seeds/cH2B_(Animals)_only.fasta b/CURATED_SET/draft_seeds/cH2B_(Animals)_only.fasta new file mode 100644 index 0000000..a804940 --- /dev/null +++ b/CURATED_SET/draft_seeds/cH2B_(Animals)_only.fasta @@ -0,0 +1,16 @@ +>Drosophila|NP_724342.1|cH2B_(Animals) organism=Drosophila melanogaster phylum=Arthropoda class=Insecta +-MP-PKTSGKAAKKAGKAQKNITKTDKKKKRKRKESYAIYIYKVLKQVHPDTGISSKAMS +IMNSFVNDIFERIAAEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVTK +YTSSK +>Psammechinus|AAA30025.1|cH2B_(Animals) organism=Psammechinus miliaris phylum=Echinodermata class=Echinoidea +MAPTAQVAKKGSKKAVKAPR--PSGGKKRNRKRKESYGIYIYKVLKQVHPDTGISSRAMI +IMNSFVNDIFERIAGESSRLAQYNKKSTISSREIQTAVRLILPGELAKHAVSEGTKAVTK +YTTSK +>Strongylocentrotus|NP_999710.2|cH2B_(Animals) organism=Strongylocentrotus purpuratus phylum=Echinodermata class=Echinoidea +MAPTAQVAKKGSKKAVKGTKT-AXGGKKRNRKRKESYGIYIYKVLKQVHPDTGISSRAMV +IMNSXVBDIFERIAGESSRLAQYNKKXTXSSREIQTAVRLILPGELAKHAVSEGTKAVTK +YTTSK +>Caenorhabditis|NP_505464.1|cH2B_(Animals) organism=Caenorhabditis elegans phylum=Nematoda class=Chromadorea +MAP-PKPSAKGAKKAAKTVSK-PKDGKKRKHARKESYSVYIYRVLKQVHPDTGVSSKAMS +IMNSFVNDVFERIASEASRLAHYNKRSTISSREIQTAVRLILPGELAKHAVSEGTKAVTK +YTSSK diff --git a/CURATED_SET/draft_seeds/cH2B_(Chlorophyta).fasta b/CURATED_SET/draft_seeds/cH2B_(Chlorophyta).fasta new file mode 100644 index 0000000..66a5e88 --- /dev/null +++ b/CURATED_SET/draft_seeds/cH2B_(Chlorophyta).fasta @@ -0,0 +1,140 @@ +>Chlamydomonas|XP_001690668.1|cH2B_(Chlorophyta) organism=Chlamydomonas reinhardtii phylum=Chlorophyta class=Chlorophyceae +MAPKK--DEKP----VTAEAGAEAPAKAEAKPKAEKAA-KKAKKEPSKKAAKEP-KGDGE +KKDKK--KKK---SAVETYKLYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKVATEAS +KLSRYNKKPTVTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSG---- +>Chlamydomonas|XP_001691007.1|cH2B_(Chlorophyta) organism=Chlamydomonas reinhardtii phylum=Chlorophyta class=Chlorophyceae +MAPKK--DEKP----ATQEAAAEAPAKAEAKPKAEKAA-KKAKKEPSKKAAKEP-KGDGE +KKDKK--KKK---SAVETYKLYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKVATEAS +KLSRYNKKPTVTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSG---- +>Chlamydomonas|XP_001691162.1|cH2B_(Chlorophyta) organism=Chlamydomonas reinhardtii phylum=Chlorophyta class=Chlorophyceae +MAPKK--DEKP----ATAEAGAEAPAKAEAKPKAEKAA-KKAKKEPSKKAAKEP-KGDGE +KKDKK--KKK---SAVETYKLYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKVATEAS +KLSRYNKKPTVTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSG---- +>Chlamydomonas|XP_001691541.1|cH2B_(Chlorophyta) organism=Chlamydomonas reinhardtii phylum=Chlorophyta class=Chlorophyceae +MAPKK--DEKP----ATAEAGAEAPAKAEAKPKAEKAG-KKAKKEPAKKAAKEP-KGDGE +KKDKK--KKK---SAVETYKLYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKVATEAS +KLSRYNKKPTVTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSG---- +>Chlamydomonas|XP_001691693.1|cH2B_(Chlorophyta) organism=Chlamydomonas reinhardtii phylum=Chlorophyta class=Chlorophyceae +MAPKK--DEKS----ATQEAGAEAPAKAEAKPKAEKAA-KKAKKEPSKKAAKEP-KGDGE +KKDKK--KKK---SAVETYKLYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKVATEAS +KLSRYNKKPTVTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSG---- +>Chlamydomonas|XP_001692948.1|cH2B_(Chlorophyta) organism=Chlamydomonas reinhardtii phylum=Chlorophyta class=Chlorophyceae +MAPKK--DEKK---DAAAPEAAEPKAEKESKPKADKAA-KKAKKSPAKKAAKE--GGDGE +KGDKKKGKKK---SSVETYKLYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKVATEAS +KLSRYNKKPTVTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSG---- +>Chlamydomonas|XP_001693071.1|cH2B_(Chlorophyta) organism=Chlamydomonas reinhardtii phylum=Chlorophyta class=Chlorophyceae +MAPKK--DEKK---DAAAPEAAEPKAEKESKPKADKAA-KKAKKAPAKKAAKE--GGDGE +KGDKKKGKKK---SSVETYKLYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKVATEAS +KLSRYNKKPTVTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSG---- +>Chlamydomonas|XP_001693722.1|cH2B_(Chlorophyta) organism=Chlamydomonas reinhardtii phylum=Chlorophyta class=Chlorophyceae +MAPKK--DEKPATRAATQEAGAEATAKAEAKPKAEKAA-KKAKKEPAKKAAKEP-KGDGE +KKDKK--KK----SAVETYKLYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKVATEAS +KLSRYNKKPTVTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSG---- +>Chlamydomonas|XP_001696245.1|cH2B_(Chlorophyta) organism=Chlamydomonas reinhardtii phylum=Chlorophyta class=Chlorophyceae +MAPKK--DEKP----ATAEAGAEAPAKAEAKPKAEKAA-KKAKKEPSKKTAKEP-KGDGE +KKDKK--KKK---SAVETYKLYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKVATEAS +KLSRYNKKPTVTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSG---- +>Chlamydomonas|XP_001696283.1|cH2B_(Chlorophyta) organism=Chlamydomonas reinhardtii phylum=Chlorophyta class=Chlorophyceae +MAPKK--DEKP----ATAEAGAEAPAKADAKPKAEKAA-KKAKKEPSKKAAKEP-KGDGE +KKDKK--KKK---SAVETYKLYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKVATEAS +KLSRYNKKPTVTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSG---- +>Chlamydomonas|XP_001696556.1|cH2B_(Chlorophyta) organism=Chlamydomonas reinhardtii phylum=Chlorophyta class=Chlorophyceae +MAPKK--DEKP----ATAEAGAEAPAKAEAKPKAEKAA-KKAKKEPSKKAAKEP-KGEGE +KKDKK--KKK---SAVETYKLYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKVATEAS +KLSRYNKKPTVTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSG---- +>Chlamydomonas|XP_001700194.1|cH2B_(Chlorophyta) organism=Chlamydomonas reinhardtii phylum=Chlorophyta class=Chlorophyceae +MAPKA--AEKA----------------------------------PAKKTPAKT-AEGSK +KKKKL--------NKAETYKVYIYKVLKQVHPDTGISSKAMSIMNSFINDIFDKMANEAV +RLAQYNKKPTLTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTST---- +>Chlamydomonas|XP_001700403.1|cH2B_(Chlorophyta) organism=Chlamydomonas reinhardtii phylum=Chlorophyta class=Chlorophyceae +MAPKK--DEKP----ATQEAGAEAPAKAEAKPKAEKAA-KKAKKEPSKKAAKEP-KGDGA +KKDKK--KKK---SAVETYKLYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKVATEAS +KLSRYNKKPTVTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSG---- +>Chlamydomonas|XP_001700461.1|cH2B_(Chlorophyta) organism=Chlamydomonas reinhardtii phylum=Chlorophyta class=Chlorophyceae +MAPKK--DEKP----ATQEAGAEAPAKAEAKPKAEKAA-KKAKKEPSKKAAKEP-KGDGE +KKDKK--KKK---SAVETYKLYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKVATEAS +KLSRYNKKPTVTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSG---- +>Chlamydomonas|XP_001702223.2|cH2B_(Chlorophyta) organism=Chlamydomonas reinhardtii phylum=Chlorophyta class=Chlorophyceae +MAPKK--DEKP----ATQEAAAEAPAKAEAKPKAEKAA-KKA-KEPSKKAAKEP-KGDGE +KKDKK--KKK---SAVETYKLYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKVATEAS +KLSRYNKKPTVTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSG---- +>Chlamydomonas|XP_042914553.1|cH2B_(Chlorophyta) organism=Chlamydomonas reinhardtii phylum=Chlorophyta class=Chlorophyceae +MAPKK--DEKP----PTQEAGAEAPAKAEAKPKAEKAA-KKAKKEPSKKAAKEP-KGDGE +KKDKK--KKK---SAVETYKLYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKVATEAS +KLSRYNKKPTVTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSG---- +>Chlamydomonas|XP_042914596.1|cH2B_(Chlorophyta) organism=Chlamydomonas reinhardtii phylum=Chlorophyta class=Chlorophyceae +MAPKR--DEKP----ATQEAGAEAPAKAEAKPKAEKAA-KKAKKEPSKKAAKEP-KGDGA +KKDKK--KKK---SAVETYKLYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKVATEAS +KLSRYNKKPTVTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSG---- +>Chlamydomonas|XP_042923653.1|cH2B_(Chlorophyta) organism=Chlamydomonas reinhardtii phylum=Chlorophyta class=Chlorophyceae +MAPKK--DEKP--------------------PKAEKAA-KKAKKEPSKKAAKEP-KGDGE +KKDKK--KKS---SAVETYKLYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKVATEAS +KLSRYNKKPTVTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSG---- +>Volvox|XP_002946193.1|cH2B_(Chlorophyta) organism=Volvox carteri f. nagariensis phylum=Chlorophyta class=Chlorophyceae +MAPKNVKEEKA-----EEKAEAGAAAKAEAKAKAAKPA-KKEKKAPAKKAAKEP-SAGGE +GEGDKKAKKKAKVAKSETYKLYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKVATEAS +KLSRYNKKPTVTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSS---- +>Volvox|XP_002946194.1|cH2B_(Chlorophyta) organism=Volvox carteri f. nagariensis phylum=Chlorophyta class=Chlorophyceae +MAPKNVKEEKV---EEKAEAGAAAKAKAEAKAKAAKPA-KKEKKAPAKKAAKEP-SAGGE +GEGDKKAKKKAKVAKSETYKLYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKVATEAS +KLSRYNKKPTVTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSS---- +>Volvox|XP_002946213.1|cH2B_(Chlorophyta) organism=Volvox carteri f. nagariensis phylum=Chlorophyta class=Chlorophyceae +MAPKG--KKADAAGAEAKPDPSELKAEPVSEGKADAKP-KKEKKAAGKKAAKD--SGAGE +EVEKK-GKKKAKVAKSETYKLYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKVATEAS +RLSRYNKKPTVTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTAS---- +>Volvox|XP_002947412.1|cH2B_(Chlorophyta) organism=Volvox carteri f. nagariensis phylum=Chlorophyta class=Chlorophyceae +MAPKK--EEKP-----ASQAAEAPEVKAEAKPKAVKAPKKKEKKAPAKKVAKEP-SAGGE +DGDKK-SKKKAKVAKSETYKLYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKVATEAS +KLSRYNKKPTVTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSA---- +>Volvox|XP_002947842.1|cH2B_(Chlorophyta) organism=Volvox carteri f. nagariensis phylum=Chlorophyta class=Chlorophyceae +MAPKK--DEKT------ASQPADAEPKAEAKPKETKV--KKEKKAPAKKAAKEP-AAGGE +EGDKK-AKKKTKVSKSETYKLYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKVATEAS +RLSRYNKKPTVTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSA---- +>Volvox|XP_002948133.1|cH2B_(Chlorophyta) organism=Volvox carteri f. nagariensis phylum=Chlorophyta class=Chlorophyceae +-MAKK--DEKT-----ASQPADAAEPKADAKPKAAKV--KKEKKAPAKKTAKEP-VAGGE +EGDKK-AKKKAKVAKSETYKLYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKVATESS +RLSRYNKKPTVTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSA---- +>Volvox|XP_002948276.1|cH2B_(Chlorophyta) organism=Volvox carteri f. nagariensis phylum=Chlorophyta class=Chlorophyceae +MAPKK--DEKT-----ASQPADAAEPKADAKPKAAKV--KKEKKAPAKKAAKEP-VAGSE +EGEKK-AKKKAKVAKSETYKLYIYKVLKQVHPDTGISSKAMSVMNSFINDIFEKVATEAS +RLSRYNKKPTVTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSA---- +>Volvox|XP_002948472.1|cH2B_(Chlorophyta) organism=Volvox carteri f. nagariensis phylum=Chlorophyta class=Chlorophyceae +MAPKA--AEKA----------------------------------PAKKTPAKT-AEGSK +KKKKI--------NKAETYKVYIYKVLKQVHPDTGISSKAMSIMNSFINDIFDKMANEAV +RLAQYNKKPTLTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSS---- +>Volvox|XP_002951705.1|cH2B_(Chlorophyta) organism=Volvox carteri f. nagariensis phylum=Chlorophyta class=Chlorophyceae +MAPKK--DEKA-----AAPAADAPEVKVEAKPKKARVP-KKEKKAPAKKVAKEP-AAGGE +EGDKK-AKKKAKVAKSETYKLYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKVATEAS +KLSRYNKKPTVTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSS---- +>Volvox|XP_002955481.1|cH2B_(Chlorophyta) organism=Volvox carteri f. nagariensis phylum=Chlorophyta class=Chlorophyceae +MAPKNVKEEKQ-------EKAEAVEPKAEAKPKKEKAP-KKEKKAPAKKAAKEP-SAAGD +DGDKK-AKKKAKVSKSETYKLYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKVATEAS +KLSRYNKKPTVTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSA---- +>Volvox|XP_002956800.1|cH2B_(Chlorophyta) organism=Volvox carteri f. nagariensis phylum=Chlorophyta class=Chlorophyceae +MAPKTK-EEKP--------ASEAVEPKAEAKPKAEKAPKKKEKKAPAKKSAKEPAAGDAA +EGDKK--KKKAKVAKSETYKLYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKVATEAS +KLSRYNKKPTVTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSA---- +>Micromonas|XP_002501781.1|cH2B_(Chlorophyta) organism=Micromonas commoda phylum=Chlorophyta class=Mamiellophyceae +-------MAKP----------------------------------TSKKPAKKT-VAKGG +SKAKK--------SKTETYKIYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKIATEAS +KLARYNKKPTVTSREIQTAVRLILPGELAKHAVSEGTKAVTKFTSN---- +>Micromonas|XP_002503973.1|cH2B_(Chlorophyta) organism=Micromonas commoda phylum=Chlorophyta class=Mamiellophyceae +-------MAKP----------------------------------TSKKPAKKT-VKGAG +GKAKK--------SKTETYKIYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKIATEAS +KLARYNKKPTVTSREIQTAVRLILPGELAKHAVSEGTKAVTKFTSN---- +>Micromonas|XP_003055512.1|cH2B_(Chlorophyta) organism=Micromonas pusilla CCMP1545 phylum=Chlorophyta class=Mamiellophyceae +-------MAKP----------------------------------TSKKPAKKS-LKGGK +KGGKK--------SKTETYKIYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKIATEAS +KLARYNKKPTVTSREIQTAVRLILPGELAKHAVSEGTKAVTKFTSA---- +>Ostreococcus|XP_001419128.1|cH2B_(Chlorophyta) organism=Ostreococcus lucimarinus CCE9901 phylum=Chlorophyta class=Mamiellophyceae +---------------------------------------------MAKKPAQKK-PSGAK +KVGRK--------SKSETYKIYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKIATEAA +KLARYNKKPTVTSREIQTAVRLILPGELAKHAVSEGTKAVTKFTSA---- +>Coccomyxa|XP_005643326.1|cH2B_(Chlorophyta) organism=Coccomyxa subellipsoidea C-169 phylum=Chlorophyta class=Trebouxiophyceae +MAPKG-----------------------------------SQGREEARQKGHQD-RQDWC +KKKTK--------AKVESFKIYMCEVLKQAHPYTGISSWAISILNAFVTDTFGKMATETA +QLARYNKKPTVASGKIQTALRLILPGKLAKHTVSEGSKAVTESTSAAITP +>Coccomyxa|XP_005643701.1|cH2B_(Chlorophyta) organism=Coccomyxa subellipsoidea C-169 phylum=Chlorophyta class=Trebouxiophyceae +MAPKA--E-----------------------------------KKPAKKVAKTA-KTGGK +RKSK---------AKVESFKIYIYKVLKQVHPDTGISSRAISILNSFITDIFEKIATETA +QLARYNKKPTVTSREIQTAVRLILPGELAKHAVSEGTKAVTKFTSA---- diff --git a/CURATED_SET/draft_seeds/cH2B_(Embryophyta).fasta b/CURATED_SET/draft_seeds/cH2B_(Embryophyta).fasta new file mode 100644 index 0000000..9c58674 --- /dev/null +++ b/CURATED_SET/draft_seeds/cH2B_(Embryophyta).fasta @@ -0,0 +1,16 @@ +>Arabidopsis|Q9FFC0.3|cH2B_(Embryophyta) organism=Arabidopsis thaliana phylum=Streptophyta class=Magnoliopsida +-MAKADKKPAEKK-----PAEKTPAAEPAAAAEKKPKAGKKLPKEPAGAGD--KKKKRSK +KNVETYKIYIFKVLKQVHPDIGISSKAMGIMNSFINDIFEKLAGESSKLARYNKKPTITS +REIQTAVRLVLPGELAKHAVSEGTKAVTKFTSS +>Arabidopsis|Q9LQQ4.3|cH2B_(Embryophyta) organism=Arabidopsis thaliana phylum=Streptophyta class=Magnoliopsida +MAPRAEKKPAEKKTAAERPVEENKAAEK-APAEKKPKAGKKLPPK--EAGD--KKKKRSK +KNVETYKIYIFKVLKQVHPDIGISSKAMGIMNSFINDIFEKLAQESSKLARYNKKPTITS +REIQTAVRLVLPGELAKHAVSEGTKAVTKFTSS +>Arabidopsis|Q9LZ45.3|cH2B_(Embryophyta) organism=Arabidopsis thaliana phylum=Streptophyta class=Magnoliopsida +MAPKAEKKPAEKA-----PA---------------PKAEKKIAKE-GGTSEIVKKKKKTK +KSTETYKIYIFKVLKQVHPDIGISGKAMGIMNSFINDIFEKLAQESSRLARYNKKPTITS +REIQTAVRLVLPGELAKHAVSEGTKAVTKFTSS +>Arabidopsis|Q9LZT0.3|cH2B_(Embryophyta) organism=Arabidopsis thaliana phylum=Streptophyta class=Magnoliopsida +MAPKAEKKPAEKK-----PVEEKSKAEK-APAEKKPKAGKKLPKEAGAGGD--KKKKMKK +KSVETYKIYIFKVLKQVHPDIGISSKAMGIMNSFINDIFEKLASESSKLARYNKKPTITS +REIQTAVRLVLPGELAKHAVSEGTKAVTKFTSS diff --git a/CURATED_SET/draft_seeds/cH2B_(Fungi).fasta b/CURATED_SET/draft_seeds/cH2B_(Fungi).fasta new file mode 100644 index 0000000..23ef1ff --- /dev/null +++ b/CURATED_SET/draft_seeds/cH2B_(Fungi).fasta @@ -0,0 +1,8 @@ +>Saccharomyces|NP_009553.1|cH2B_(Fungi) organism=Saccharomyces cerevisiae S288C phylum=Ascomycota class=Saccharomycetes +MSSAAEKKPASKAPAEKKPAAKKTSTSVDGKKRSKVRKETYSSYIYKVLKQTHPDTGISQ +KSMSILNSFVNDIFERIATEASKLAAYNKKSTISAREIQTAVRLILPGELAKHAVSEGTR +AVTKYSSSTQA +>Saccharomyces|NP_010510.3|cH2B_(Fungi) organism=Saccharomyces cerevisiae S288C phylum=Ascomycota class=Saccharomycetes +MSAKAEKKPASKAPAEKKPAAKKTSTSTDGKKRSKARKETYSSYIYKVLKQTHPDTGISQ +KSMSILNSFVNDIFERIATEASKLAAYNKKSTISAREIQTAVRLILPGELAKHAVSEGTR +AVTKYSSSTQA diff --git a/CURATED_SET/draft_seeds/cH2B_(Homo_sapiens).fasta b/CURATED_SET/draft_seeds/cH2B_(Homo_sapiens).fasta new file mode 100644 index 0000000..e5029a0 --- /dev/null +++ b/CURATED_SET/draft_seeds/cH2B_(Homo_sapiens).fasta @@ -0,0 +1,76 @@ +>Homo|NP_003511.1|cH2B.10_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPE-PSKSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK-------- +>Homo|NP_003518.2|cH2B.11_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPD-PAKSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSIYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK-------- +>Homo|NP_001019770.1|cH2B.12_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPD-PAKSAPAPKKGSKKAVTKVQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK-------- +>Homo|NP_001154806.1|cH2B.12_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPD-PAKSAPAPKKGSKKAVTKVQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSKLIGPILWK +>Homo|NP_003519.1|cH2B.13_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPE-PAKSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSIYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK-------- +>Homo|NP_778225.1|cH2B.14_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPD-PSKSAPAPKKGSKKAVTKAQKKDGKKRKRGRKESYSIYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIASEASRLAHYNKRSTITSREVQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK-------- +>Homo|NP_733759.1|cH2B.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPEVSSKGATISKKGFKKAVVKTQKKEGKKRKRTRKESYSIYIYKVLKQVHPDTGISSKA +MSIMNSFVTDIFERIASEASRLAHYSKRSTISSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK-------- +>Homo|NP_066406.1|cH2B.2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPE-PSKSAPAPKKGSKKAITKAQKKDGKKRKRSRKESYSIYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK-------- +>Homo|NP_003509.1|cH2B.3_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPE-PAKSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK-------- +>Homo|NP_003513.1|cH2B.3_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPE-PAKSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK-------- +>Homo|NP_003514.2|cH2B.3_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPE-PAKSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK-------- +>Homo|NP_003516.1|cH2B.3_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPE-PAKSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK-------- +>Homo|NP_003517.2|cH2B.3_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPE-PAKSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK-------- +>Homo|NP_066407.1|cH2B.4_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPE-PTKSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK-------- +>Homo|NP_003515.1|cH2B.5_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPD-PAKSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK-------- +>Homo|NP_066402.2|cH2B.6_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPE-PAKSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSIYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSAK-------- +>Homo|NP_001299582.1|cH2B.7_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPE-PAKSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSAK-------- +>Homo|NP_003510.1|cH2B.8_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPE-LAKSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIASEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK-------- +>Homo|NP_003512.1|cH2B.9_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPE-PVKSAPVPKKGSKKAINKAQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK-------- diff --git a/CURATED_SET/draft_seeds/cH2B_(Homo_sapiens)_only.fasta b/CURATED_SET/draft_seeds/cH2B_(Homo_sapiens)_only.fasta new file mode 100644 index 0000000..e69de29 diff --git a/CURATED_SET/draft_seeds/cH2B_(Mammalia).fasta b/CURATED_SET/draft_seeds/cH2B_(Mammalia).fasta new file mode 100644 index 0000000..040254a --- /dev/null +++ b/CURATED_SET/draft_seeds/cH2B_(Mammalia).fasta @@ -0,0 +1,156 @@ +>Ailuropoda|XP_011216221.1|cH2B_(Mammalia) organism=Ailuropoda melanoleuca phylum=Chordata class=Mammalia +MPD-PAKSAPAPKKGSKKAVTKVQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK-------- +>Bos|NP_001032546.1|cH2B_(Mammalia) organism=Bos taurus phylum=Chordata class=Mammalia +MPE-PAKSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK-------- +>Canis|XP_005640164.1|cH2B_(Mammalia) organism=Canis lupus familiaris phylum=Chordata class=Mammalia +MPELTSKGTTISKKGFKRAVAKTQKKEGKKRRRCRKESYSIYIYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK-------- +>Cavia|XP_013006892.1|cH2B_(Mammalia) organism=Cavia porcellus phylum=Chordata class=Mammalia +MPE-PAKSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK-------- +>Cricetulus|XP_007634673.1|cH2B_(Mammalia) organism=Cricetulus griseus phylum=Chordata class=Mammalia +MPE-PAKSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSAK-------- +>Equus|XP_005610255.1|cH2B_(Mammalia) organism=Equus caballus phylum=Chordata class=Mammalia +MPE-PAKSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSIYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK-------- +>Heterocephalus|XP_012928647.1|cH2B_(Mammalia) organism=Heterocephalus glaber phylum=Chordata class=Mammalia +MPE-PAKSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK-------- +>Loxodonta|XP_003422331.1|cH2B_(Mammalia) organism=Loxodonta africana phylum=Chordata class=Mammalia +MPE-PAKSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSIYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSAK-------- +>Macaca|XP_011741102.1|cH2B_(Mammalia) organism=Macaca nemestrina phylum=Chordata class=Mammalia +MPE-PAKSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK-------- +>Monodelphis|XP_007485418.1|cH2B_(Mammalia) organism=Monodelphis domestica phylum=Chordata class=Mammalia +MPE-PGKSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIASEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK-------- +>Mus|NP_783594.1|cH2B_(Mammalia) organism=Mus musculus phylum=Chordata class=Mammalia +MPEVAVKGATISKKGFKKAVTKTQKKEGRKRKRCRKESYSIYIYKVLKQVHPDTGISSKA +MSIMNSFVTDIFERIASEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK-------- +>Oryctolagus|XP_008246696.1|cH2B_(Mammalia) organism=Oryctolagus cuniculus phylum=Chordata class=Mammalia +MPE-PAKSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK-------- +>Rattus|HISTDB_cH2B_1_0|cH2B_(Mammalia) organism=Rattus norvegicus phylum=Chordata class=Mammalia +MPEVSAKGTTISKKGFKKAVTKTQKKEGRKRKRCREESYSIYIYKVLKQVHPDTGISSKA +MSIMNSFVTDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK-------- +>Rattus|NP_072169.1|cH2B_(Mammalia) organism=Rattus norvegicus phylum=Chordata class=Mammalia +MPEVSAKGTTISKKGFKKAVTKTQKKEGRKRKRCREESYSIYIYKVLKQVHPDTGISSKA +MSIMNSFVTDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK-------- +>Rattus|NP_072173.2|cH2B_(Mammalia) organism=Rattus norvegicus phylum=Chordata class=Mammalia +MPE-PAKSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK-------- +>Sus|XP_005665716.1|cH2B_(Mammalia) organism=Sus scrofa phylum=Chordata class=Mammalia +MPD-PAKSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK-------- +>Homo|NP_003511.1|cH2B.10_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPE-PSKSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK-------- +>Homo|NP_003518.2|cH2B.11_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPD-PAKSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSIYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK-------- +>Homo|NP_001019770.1|cH2B.12_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPD-PAKSAPAPKKGSKKAVTKVQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK-------- +>Homo|NP_001154806.1|cH2B.12_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPD-PAKSAPAPKKGSKKAVTKVQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSKLIGPILWK +>Homo|NP_003519.1|cH2B.13_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPE-PAKSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSIYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK-------- +>Homo|NP_778225.1|cH2B.14_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPD-PSKSAPAPKKGSKKAVTKAQKKDGKKRKRGRKESYSIYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIASEASRLAHYNKRSTITSREVQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK-------- +>Homo|NP_733759.1|cH2B.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPEVSSKGATISKKGFKKAVVKTQKKEGKKRKRTRKESYSIYIYKVLKQVHPDTGISSKA +MSIMNSFVTDIFERIASEASRLAHYSKRSTISSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK-------- +>Homo|NP_066406.1|cH2B.2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPE-PSKSAPAPKKGSKKAITKAQKKDGKKRKRSRKESYSIYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK-------- +>Homo|NP_003509.1|cH2B.3_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPE-PAKSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK-------- +>Homo|NP_003513.1|cH2B.3_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPE-PAKSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK-------- +>Homo|NP_003514.2|cH2B.3_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPE-PAKSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK-------- +>Homo|NP_003516.1|cH2B.3_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPE-PAKSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK-------- +>Homo|NP_003517.2|cH2B.3_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPE-PAKSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK-------- +>Homo|NP_066407.1|cH2B.4_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPE-PTKSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK-------- +>Homo|NP_003515.1|cH2B.5_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPD-PAKSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK-------- +>Homo|NP_066402.2|cH2B.6_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPE-PAKSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSIYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSAK-------- +>Homo|NP_001299582.1|cH2B.7_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPE-PAKSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSAK-------- +>Homo|NP_003510.1|cH2B.8_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPE-LAKSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIASEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK-------- +>Homo|NP_003512.1|cH2B.9_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPE-PVKSAPVPKKGSKKAINKAQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK-------- +>Mus|NP_783595.1|cH2B_(Mus_musculus) organism=Mus musculus phylum=Chordata class=Mammalia +MPEPS-KSAPAPKKGSKKAISKAQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIASEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK-------- +>Mus|CAA62299.1|cH2B.1_(Mus_musculus) organism=Mus musculus phylum=Chordata class=Mammalia +MPEVAVKGATISKKGFKKAVTKTQKKEGRKRKRCRKESYSIYIYKVLKQVHPDTGISSKA +MSIMNSFVTDIFERIASEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK-------- +>Mus|NP_835502.1|cH2B.1_(Mus_musculus) organism=Mus musculus phylum=Chordata class=Mammalia +MPEPA-KSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIASEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK-------- +>Mus|Q64524.3|cH2B.E_(Mus_musculus) organism=Mus musculus phylum=Chordata class=Mammalia +MPELA-KSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSIYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIANEASRLAHYNKRSTITSREIQTSVRLLLPGELAKHAVSEGTKAV +TKYTSAK-------- diff --git a/CURATED_SET/draft_seeds/cH2B_(Mammalia)_only.fasta b/CURATED_SET/draft_seeds/cH2B_(Mammalia)_only.fasta new file mode 100644 index 0000000..664fb64 --- /dev/null +++ b/CURATED_SET/draft_seeds/cH2B_(Mammalia)_only.fasta @@ -0,0 +1,64 @@ +>Ailuropoda|XP_011216221.1|cH2B_(Mammalia) organism=Ailuropoda melanoleuca phylum=Chordata class=Mammalia +MPD-PAKSAPAPKKGSKKAVTKVQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK +>Bos|NP_001032546.1|cH2B_(Mammalia) organism=Bos taurus phylum=Chordata class=Mammalia +MPE-PAKSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK +>Canis|XP_005640164.1|cH2B_(Mammalia) organism=Canis lupus familiaris phylum=Chordata class=Mammalia +MPELTSKGTTISKKGFKRAVAKTQKKEGKKRRRCRKESYSIYIYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK +>Cavia|XP_013006892.1|cH2B_(Mammalia) organism=Cavia porcellus phylum=Chordata class=Mammalia +MPE-PAKSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK +>Cricetulus|XP_007634673.1|cH2B_(Mammalia) organism=Cricetulus griseus phylum=Chordata class=Mammalia +MPE-PAKSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSAK +>Equus|XP_005610255.1|cH2B_(Mammalia) organism=Equus caballus phylum=Chordata class=Mammalia +MPE-PAKSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSIYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK +>Heterocephalus|XP_012928647.1|cH2B_(Mammalia) organism=Heterocephalus glaber phylum=Chordata class=Mammalia +MPE-PAKSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK +>Loxodonta|XP_003422331.1|cH2B_(Mammalia) organism=Loxodonta africana phylum=Chordata class=Mammalia +MPE-PAKSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSIYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSAK +>Macaca|XP_011741102.1|cH2B_(Mammalia) organism=Macaca nemestrina phylum=Chordata class=Mammalia +MPE-PAKSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK +>Monodelphis|XP_007485418.1|cH2B_(Mammalia) organism=Monodelphis domestica phylum=Chordata class=Mammalia +MPE-PGKSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIASEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK +>Mus|NP_783594.1|cH2B_(Mammalia) organism=Mus musculus phylum=Chordata class=Mammalia +MPEVAVKGATISKKGFKKAVTKTQKKEGRKRKRCRKESYSIYIYKVLKQVHPDTGISSKA +MSIMNSFVTDIFERIASEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK +>Oryctolagus|XP_008246696.1|cH2B_(Mammalia) organism=Oryctolagus cuniculus phylum=Chordata class=Mammalia +MPE-PAKSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK +>Rattus|HISTDB_cH2B_1_0|cH2B_(Mammalia) organism=Rattus norvegicus phylum=Chordata class=Mammalia +MPEVSAKGTTISKKGFKKAVTKTQKKEGRKRKRCREESYSIYIYKVLKQVHPDTGISSKA +MSIMNSFVTDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK +>Rattus|NP_072169.1|cH2B_(Mammalia) organism=Rattus norvegicus phylum=Chordata class=Mammalia +MPEVSAKGTTISKKGFKKAVTKTQKKEGRKRKRCREESYSIYIYKVLKQVHPDTGISSKA +MSIMNSFVTDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK +>Rattus|NP_072173.2|cH2B_(Mammalia) organism=Rattus norvegicus phylum=Chordata class=Mammalia +MPE-PAKSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK +>Sus|XP_005665716.1|cH2B_(Mammalia) organism=Sus scrofa phylum=Chordata class=Mammalia +MPD-PAKSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK diff --git a/CURATED_SET/draft_seeds/cH2B_(Mus_musculus).fasta b/CURATED_SET/draft_seeds/cH2B_(Mus_musculus).fasta new file mode 100644 index 0000000..1ea127e --- /dev/null +++ b/CURATED_SET/draft_seeds/cH2B_(Mus_musculus).fasta @@ -0,0 +1,16 @@ +>Mus|NP_783595.1|cH2B_(Mus_musculus) organism=Mus musculus phylum=Chordata class=Mammalia +MPEPS-KSAPAPKKGSKKAISKAQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIASEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK +>Mus|CAA62299.1|cH2B.1_(Mus_musculus) organism=Mus musculus phylum=Chordata class=Mammalia +MPEVAVKGATISKKGFKKAVTKTQKKEGRKRKRCRKESYSIYIYKVLKQVHPDTGISSKA +MSIMNSFVTDIFERIASEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK +>Mus|NP_835502.1|cH2B.1_(Mus_musculus) organism=Mus musculus phylum=Chordata class=Mammalia +MPEPA-KSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIASEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK +>Mus|Q64524.3|cH2B.E_(Mus_musculus) organism=Mus musculus phylum=Chordata class=Mammalia +MPELA-KSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSIYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIANEASRLAHYNKRSTITSREIQTSVRLLLPGELAKHAVSEGTKAV +TKYTSAK diff --git a/CURATED_SET/draft_seeds/cH2B_(Mus_musculus)_only.fasta b/CURATED_SET/draft_seeds/cH2B_(Mus_musculus)_only.fasta new file mode 100644 index 0000000..6e62011 --- /dev/null +++ b/CURATED_SET/draft_seeds/cH2B_(Mus_musculus)_only.fasta @@ -0,0 +1,4 @@ +>Mus|NP_783595.1|cH2B_(Mus_musculus) organism=Mus musculus phylum=Chordata class=Mammalia +MPEPSKSAPAPKKGSKKAISKAQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKAM +GIMNSFVNDIFERIASEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVT +KYTSSK diff --git a/CURATED_SET/draft_seeds/cH2B_(Plants).fasta b/CURATED_SET/draft_seeds/cH2B_(Plants).fasta new file mode 100644 index 0000000..4124475 --- /dev/null +++ b/CURATED_SET/draft_seeds/cH2B_(Plants).fasta @@ -0,0 +1,156 @@ +>Chlamydomonas|XP_001690668.1|cH2B_(Chlorophyta) organism=Chlamydomonas reinhardtii phylum=Chlorophyta class=Chlorophyceae +MAPKK--DEKP----VTAEAGAEAPAKAEAKPKAEKAA-KKAKKEPSKKAAKEP-KGDGE +--KKDKK--KKK---SAVETYKLYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKVATE +ASKLSRYNKKPTVTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSG---- +>Chlamydomonas|XP_001691007.1|cH2B_(Chlorophyta) organism=Chlamydomonas reinhardtii phylum=Chlorophyta class=Chlorophyceae +MAPKK--DEKP----ATQEAAAEAPAKAEAKPKAEKAA-KKAKKEPSKKAAKEP-KGDGE +--KKDKK--KKK---SAVETYKLYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKVATE +ASKLSRYNKKPTVTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSG---- +>Chlamydomonas|XP_001691162.1|cH2B_(Chlorophyta) organism=Chlamydomonas reinhardtii phylum=Chlorophyta class=Chlorophyceae +MAPKK--DEKP----ATAEAGAEAPAKAEAKPKAEKAA-KKAKKEPSKKAAKEP-KGDGE +--KKDKK--KKK---SAVETYKLYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKVATE +ASKLSRYNKKPTVTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSG---- +>Chlamydomonas|XP_001691541.1|cH2B_(Chlorophyta) organism=Chlamydomonas reinhardtii phylum=Chlorophyta class=Chlorophyceae +MAPKK--DEKP----ATAEAGAEAPAKAEAKPKAEKAG-KKAKKEPAKKAAKEP-KGDGE +--KKDKK--KKK---SAVETYKLYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKVATE +ASKLSRYNKKPTVTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSG---- +>Chlamydomonas|XP_001691693.1|cH2B_(Chlorophyta) organism=Chlamydomonas reinhardtii phylum=Chlorophyta class=Chlorophyceae +MAPKK--DEKS----ATQEAGAEAPAKAEAKPKAEKAA-KKAKKEPSKKAAKEP-KGDGE +--KKDKK--KKK---SAVETYKLYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKVATE +ASKLSRYNKKPTVTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSG---- +>Chlamydomonas|XP_001692948.1|cH2B_(Chlorophyta) organism=Chlamydomonas reinhardtii phylum=Chlorophyta class=Chlorophyceae +MAPKK--DEKK---DAAAPEAAEPKAEKESKPKADKAA-KKAKKSPAKKAAKE--GGDGE +--KGDKKKGKKK---SSVETYKLYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKVATE +ASKLSRYNKKPTVTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSG---- +>Chlamydomonas|XP_001693071.1|cH2B_(Chlorophyta) organism=Chlamydomonas reinhardtii phylum=Chlorophyta class=Chlorophyceae +MAPKK--DEKK---DAAAPEAAEPKAEKESKPKADKAA-KKAKKAPAKKAAKE--GGDGE +--KGDKKKGKKK---SSVETYKLYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKVATE +ASKLSRYNKKPTVTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSG---- +>Chlamydomonas|XP_001693722.1|cH2B_(Chlorophyta) organism=Chlamydomonas reinhardtii phylum=Chlorophyta class=Chlorophyceae +MAPKK--DEKPATRAATQEAGAEATAKAEAKPKAEKAA-KKAKKEPAKKAAKEP-KGDGE +--KKDKK--KK----SAVETYKLYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKVATE +ASKLSRYNKKPTVTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSG---- +>Chlamydomonas|XP_001696245.1|cH2B_(Chlorophyta) organism=Chlamydomonas reinhardtii phylum=Chlorophyta class=Chlorophyceae +MAPKK--DEKP----ATAEAGAEAPAKAEAKPKAEKAA-KKAKKEPSKKTAKEP-KGDGE +--KKDKK--KKK---SAVETYKLYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKVATE +ASKLSRYNKKPTVTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSG---- +>Chlamydomonas|XP_001696283.1|cH2B_(Chlorophyta) organism=Chlamydomonas reinhardtii phylum=Chlorophyta class=Chlorophyceae +MAPKK--DEKP----ATAEAGAEAPAKADAKPKAEKAA-KKAKKEPSKKAAKEP-KGDGE +--KKDKK--KKK---SAVETYKLYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKVATE +ASKLSRYNKKPTVTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSG---- +>Chlamydomonas|XP_001696556.1|cH2B_(Chlorophyta) organism=Chlamydomonas reinhardtii phylum=Chlorophyta class=Chlorophyceae +MAPKK--DEKP----ATAEAGAEAPAKAEAKPKAEKAA-KKAKKEPSKKAAKEP-KGEGE +--KKDKK--KKK---SAVETYKLYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKVATE +ASKLSRYNKKPTVTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSG---- +>Chlamydomonas|XP_001700194.1|cH2B_(Chlorophyta) organism=Chlamydomonas reinhardtii phylum=Chlorophyta class=Chlorophyceae +MAPKA--AEKA----------------------------------PAKKTPAKT-AEGSK +--KKKKL--------NKAETYKVYIYKVLKQVHPDTGISSKAMSIMNSFINDIFDKMANE +AVRLAQYNKKPTLTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTST---- +>Chlamydomonas|XP_001700403.1|cH2B_(Chlorophyta) organism=Chlamydomonas reinhardtii phylum=Chlorophyta class=Chlorophyceae +MAPKK--DEKP----ATQEAGAEAPAKAEAKPKAEKAA-KKAKKEPSKKAAKEP-KGDGA +--KKDKK--KKK---SAVETYKLYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKVATE +ASKLSRYNKKPTVTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSG---- +>Chlamydomonas|XP_001700461.1|cH2B_(Chlorophyta) organism=Chlamydomonas reinhardtii phylum=Chlorophyta class=Chlorophyceae +MAPKK--DEKP----ATQEAGAEAPAKAEAKPKAEKAA-KKAKKEPSKKAAKEP-KGDGE +--KKDKK--KKK---SAVETYKLYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKVATE +ASKLSRYNKKPTVTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSG---- +>Chlamydomonas|XP_001702223.2|cH2B_(Chlorophyta) organism=Chlamydomonas reinhardtii phylum=Chlorophyta class=Chlorophyceae +MAPKK--DEKP----ATQEAAAEAPAKAEAKPKAEKAA-KKA-KEPSKKAAKEP-KGDGE +--KKDKK--KKK---SAVETYKLYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKVATE +ASKLSRYNKKPTVTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSG---- +>Chlamydomonas|XP_042914553.1|cH2B_(Chlorophyta) organism=Chlamydomonas reinhardtii phylum=Chlorophyta class=Chlorophyceae +MAPKK--DEKP----PTQEAGAEAPAKAEAKPKAEKAA-KKAKKEPSKKAAKEP-KGDGE +--KKDKK--KKK---SAVETYKLYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKVATE +ASKLSRYNKKPTVTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSG---- +>Chlamydomonas|XP_042914596.1|cH2B_(Chlorophyta) organism=Chlamydomonas reinhardtii phylum=Chlorophyta class=Chlorophyceae +MAPKR--DEKP----ATQEAGAEAPAKAEAKPKAEKAA-KKAKKEPSKKAAKEP-KGDGA +--KKDKK--KKK---SAVETYKLYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKVATE +ASKLSRYNKKPTVTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSG---- +>Chlamydomonas|XP_042923653.1|cH2B_(Chlorophyta) organism=Chlamydomonas reinhardtii phylum=Chlorophyta class=Chlorophyceae +MAPKK--DEKP--------------------PKAEKAA-KKAKKEPSKKAAKEP-KGDGE +--KKDKK--KKS---SAVETYKLYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKVATE +ASKLSRYNKKPTVTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSG---- +>Volvox|XP_002946193.1|cH2B_(Chlorophyta) organism=Volvox carteri f. nagariensis phylum=Chlorophyta class=Chlorophyceae +MAPKNVKEEKA-----EEKAEAGAAAKAEAKAKAAKPA-KKEKKAPAKKAAKEP-SAGGE +--GEGDKKAKKKAKVAKSETYKLYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKVATE +ASKLSRYNKKPTVTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSS---- +>Volvox|XP_002946194.1|cH2B_(Chlorophyta) organism=Volvox carteri f. nagariensis phylum=Chlorophyta class=Chlorophyceae +MAPKNVKEEKV---EEKAEAGAAAKAKAEAKAKAAKPA-KKEKKAPAKKAAKEP-SAGGE +--GEGDKKAKKKAKVAKSETYKLYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKVATE +ASKLSRYNKKPTVTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSS---- +>Volvox|XP_002946213.1|cH2B_(Chlorophyta) organism=Volvox carteri f. nagariensis phylum=Chlorophyta class=Chlorophyceae +MAPKG--KKADAAGAEAKPDPSELKAEPVSEGKADAKP-KKEKKAAGKKAAKD--SGAGE +--EVEKK-GKKKAKVAKSETYKLYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKVATE +ASRLSRYNKKPTVTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTAS---- +>Volvox|XP_002947412.1|cH2B_(Chlorophyta) organism=Volvox carteri f. nagariensis phylum=Chlorophyta class=Chlorophyceae +MAPKK--EEKP-----ASQAAEAPEVKAEAKPKAVKAPKKKEKKAPAKKVAKEP-SAGGE +--DGDKK-SKKKAKVAKSETYKLYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKVATE +ASKLSRYNKKPTVTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSA---- +>Volvox|XP_002947842.1|cH2B_(Chlorophyta) organism=Volvox carteri f. nagariensis phylum=Chlorophyta class=Chlorophyceae +MAPKK--DEKT------ASQPADAEPKAEAKPKETKV--KKEKKAPAKKAAKEP-AAGGE +--EGDKK-AKKKTKVSKSETYKLYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKVATE +ASRLSRYNKKPTVTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSA---- +>Volvox|XP_002948133.1|cH2B_(Chlorophyta) organism=Volvox carteri f. nagariensis phylum=Chlorophyta class=Chlorophyceae +-MAKK--DEKT-----ASQPADAAEPKADAKPKAAKV--KKEKKAPAKKTAKEP-VAGGE +--EGDKK-AKKKAKVAKSETYKLYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKVATE +SSRLSRYNKKPTVTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSA---- +>Volvox|XP_002948276.1|cH2B_(Chlorophyta) organism=Volvox carteri f. nagariensis phylum=Chlorophyta class=Chlorophyceae +MAPKK--DEKT-----ASQPADAAEPKADAKPKAAKV--KKEKKAPAKKAAKEP-VAGSE +--EGEKK-AKKKAKVAKSETYKLYIYKVLKQVHPDTGISSKAMSVMNSFINDIFEKVATE +ASRLSRYNKKPTVTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSA---- +>Volvox|XP_002948472.1|cH2B_(Chlorophyta) organism=Volvox carteri f. nagariensis phylum=Chlorophyta class=Chlorophyceae +MAPKA--AEKA----------------------------------PAKKTPAKT-AEGSK +--KKKKI--------NKAETYKVYIYKVLKQVHPDTGISSKAMSIMNSFINDIFDKMANE +AVRLAQYNKKPTLTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSS---- +>Volvox|XP_002951705.1|cH2B_(Chlorophyta) organism=Volvox carteri f. nagariensis phylum=Chlorophyta class=Chlorophyceae +MAPKK--DEKA-----AAPAADAPEVKVEAKPKKARVP-KKEKKAPAKKVAKEP-AAGGE +--EGDKK-AKKKAKVAKSETYKLYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKVATE +ASKLSRYNKKPTVTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSS---- +>Volvox|XP_002955481.1|cH2B_(Chlorophyta) organism=Volvox carteri f. nagariensis phylum=Chlorophyta class=Chlorophyceae +MAPKNVKEEKQ-------EKAEAVEPKAEAKPKKEKAP-KKEKKAPAKKAAKEP-SAAGD +--DGDKK-AKKKAKVSKSETYKLYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKVATE +ASKLSRYNKKPTVTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSA---- +>Volvox|XP_002956800.1|cH2B_(Chlorophyta) organism=Volvox carteri f. nagariensis phylum=Chlorophyta class=Chlorophyceae +MAPKTK-EEKP--------ASEAVEPKAEAKPKAEKAPKKKEKKAPAKKSAKEPAAGDAA +--EGDKK--KKKAKVAKSETYKLYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKVATE +ASKLSRYNKKPTVTSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSA---- +>Micromonas|XP_002501781.1|cH2B_(Chlorophyta) organism=Micromonas commoda phylum=Chlorophyta class=Mamiellophyceae +-------MAKP----------------------------------TSKKPAKKT-VAKGG +--SKAKK--------SKTETYKIYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKIATE +ASKLARYNKKPTVTSREIQTAVRLILPGELAKHAVSEGTKAVTKFTSN---- +>Micromonas|XP_002503973.1|cH2B_(Chlorophyta) organism=Micromonas commoda phylum=Chlorophyta class=Mamiellophyceae +-------MAKP----------------------------------TSKKPAKKT-VKGAG +--GKAKK--------SKTETYKIYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKIATE +ASKLARYNKKPTVTSREIQTAVRLILPGELAKHAVSEGTKAVTKFTSN---- +>Micromonas|XP_003055512.1|cH2B_(Chlorophyta) organism=Micromonas pusilla CCMP1545 phylum=Chlorophyta class=Mamiellophyceae +-------MAKP----------------------------------TSKKPAKKS-LKGGK +--KGGKK--------SKTETYKIYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKIATE +ASKLARYNKKPTVTSREIQTAVRLILPGELAKHAVSEGTKAVTKFTSA---- +>Ostreococcus|XP_001419128.1|cH2B_(Chlorophyta) organism=Ostreococcus lucimarinus CCE9901 phylum=Chlorophyta class=Mamiellophyceae +---------------------------------------------MAKKPAQKK-PSGAK +--KVGRK--------SKSETYKIYIYKVLKQVHPDTGISSKAMSIMNSFINDIFEKIATE +AAKLARYNKKPTVTSREIQTAVRLILPGELAKHAVSEGTKAVTKFTSA---- +>Coccomyxa|XP_005643326.1|cH2B_(Chlorophyta) organism=Coccomyxa subellipsoidea C-169 phylum=Chlorophyta class=Trebouxiophyceae +MAPKG-----------------------------------SQGREEARQKGHQD-RQDWC +--KKKTK--------AKVESFKIYMCEVLKQAHPYTGISSWAISILNAFVTDTFGKMATE +TAQLARYNKKPTVASGKIQTALRLILPGKLAKHTVSEGSKAVTESTSAAITP +>Coccomyxa|XP_005643701.1|cH2B_(Chlorophyta) organism=Coccomyxa subellipsoidea C-169 phylum=Chlorophyta class=Trebouxiophyceae +MAPKA--E-----------------------------------KKPAKKVAKTA-KTGGK +--RKSK---------AKVESFKIYIYKVLKQVHPDTGISSRAISILNSFITDIFEKIATE +TAQLARYNKKPTVTSREIQTAVRLILPGELAKHAVSEGTKAVTKFTSA---- +>Arabidopsis|Q9FFC0.3|cH2B_(Embryophyta) organism=Arabidopsis thaliana phylum=Streptophyta class=Magnoliopsida +-MAKA--DKKP------AEKK-----PAEKTPAAEPAAAAEKKPKAGKKLPKEP-AGAGD +--KKKKR------SKKNVETYKIYIFKVLKQVHPDIGISSKAMGIMNSFINDIFEKLAGE +SSKLARYNKKPTITSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSS---- +>Arabidopsis|Q9LQQ4.3|cH2B_(Embryophyta) organism=Arabidopsis thaliana phylum=Streptophyta class=Magnoliopsida +MAPRA--EKKP------AEKKTAAERPVEENKAAEK-APAEKKPKAGKKLPPK---EAGD +--KKKKR------SKKNVETYKIYIFKVLKQVHPDIGISSKAMGIMNSFINDIFEKLAQE +SSKLARYNKKPTITSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSS---- +>Arabidopsis|Q9LZ45.3|cH2B_(Embryophyta) organism=Arabidopsis thaliana phylum=Streptophyta class=Magnoliopsida +MAPKA--EKKP------AEKA-----PA---------------PKAEKKIAKE--GGTSE +IVKKKKK------TKKSTETYKIYIFKVLKQVHPDIGISGKAMGIMNSFINDIFEKLAQE +SSRLARYNKKPTITSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSS---- +>Arabidopsis|Q9LZT0.3|cH2B_(Embryophyta) organism=Arabidopsis thaliana phylum=Streptophyta class=Magnoliopsida +MAPKA--EKKP------AEKK-----PVEEKSKAEK-APAEKKPKAGKKLPKEA-GAGGD +--KKKKM------KKKSVETYKIYIFKVLKQVHPDIGISSKAMGIMNSFINDIFEKLASE +SSKLARYNKKPTITSREIQTAVRLVLPGELAKHAVSEGTKAVTKFTSS---- diff --git a/CURATED_SET/draft_seeds/cH2B_(Plants)_only.fasta b/CURATED_SET/draft_seeds/cH2B_(Plants)_only.fasta new file mode 100644 index 0000000..e69de29 diff --git a/CURATED_SET/draft_seeds/cH2B_(Protists).fasta b/CURATED_SET/draft_seeds/cH2B_(Protists).fasta new file mode 100644 index 0000000..1f7f8d5 --- /dev/null +++ b/CURATED_SET/draft_seeds/cH2B_(Protists).fasta @@ -0,0 +1,3 @@ +>Trypanosoma|XP_823266.1|cH2B_(Protists) organism=Trypanosoma brucei brucei TREU927 phylum=Euglenozoa class=Kinetoplastea +MATPKSTPAKTRKEAKKTRRQRKRTWNVYVSRSLRSINSQMSMTSRTMKIVNSFVNDLFE +RIAAEAATIVRVNRKRTLGARELQTAVRLVLPADLAKHAMAEGTKAVSHASS diff --git a/CURATED_SET/draft_seeds/cH2B_(Vertebrata).fasta b/CURATED_SET/draft_seeds/cH2B_(Vertebrata).fasta new file mode 100644 index 0000000..b3e9859 --- /dev/null +++ b/CURATED_SET/draft_seeds/cH2B_(Vertebrata).fasta @@ -0,0 +1,164 @@ +>Xenopus|NP_001086753.1|cH2B_(Vertebrata) organism=Xenopus laevis phylum=Chordata class=Amphibia +MPE-PAKSAPAPKKGSKKAVTKTQKKDGKKRRKSRKESYAIYVYKVLKQVHPDTGISSKA +MSIMNSFVNDVFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSAK-------- +>Gallus|NP_001073188.1|cH2B_(Vertebrata) organism=Gallus gallus phylum=Chordata class=Aves +MPE-PAKSAPAPKKGSKKAVTKTQKKGDKKRKKSRKESYSIYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK-------- +>Ailuropoda|XP_011216221.1|cH2B_(Mammalia) organism=Ailuropoda melanoleuca phylum=Chordata class=Mammalia +MPD-PAKSAPAPKKGSKKAVTKVQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK-------- +>Bos|NP_001032546.1|cH2B_(Mammalia) organism=Bos taurus phylum=Chordata class=Mammalia +MPE-PAKSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK-------- +>Canis|XP_005640164.1|cH2B_(Mammalia) organism=Canis lupus familiaris phylum=Chordata class=Mammalia +MPELTSKGTTISKKGFKRAVAKTQKKEGKKRRRCRKESYSIYIYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK-------- +>Cavia|XP_013006892.1|cH2B_(Mammalia) organism=Cavia porcellus phylum=Chordata class=Mammalia +MPE-PAKSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK-------- +>Cricetulus|XP_007634673.1|cH2B_(Mammalia) organism=Cricetulus griseus phylum=Chordata class=Mammalia +MPE-PAKSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSAK-------- +>Equus|XP_005610255.1|cH2B_(Mammalia) organism=Equus caballus phylum=Chordata class=Mammalia +MPE-PAKSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSIYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK-------- +>Heterocephalus|XP_012928647.1|cH2B_(Mammalia) organism=Heterocephalus glaber phylum=Chordata class=Mammalia +MPE-PAKSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK-------- +>Loxodonta|XP_003422331.1|cH2B_(Mammalia) organism=Loxodonta africana phylum=Chordata class=Mammalia +MPE-PAKSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSIYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSAK-------- +>Macaca|XP_011741102.1|cH2B_(Mammalia) organism=Macaca nemestrina phylum=Chordata class=Mammalia +MPE-PAKSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK-------- +>Monodelphis|XP_007485418.1|cH2B_(Mammalia) organism=Monodelphis domestica phylum=Chordata class=Mammalia +MPE-PGKSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIASEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK-------- +>Mus|NP_783594.1|cH2B_(Mammalia) organism=Mus musculus phylum=Chordata class=Mammalia +MPEVAVKGATISKKGFKKAVTKTQKKEGRKRKRCRKESYSIYIYKVLKQVHPDTGISSKA +MSIMNSFVTDIFERIASEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK-------- +>Oryctolagus|XP_008246696.1|cH2B_(Mammalia) organism=Oryctolagus cuniculus phylum=Chordata class=Mammalia +MPE-PAKSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK-------- +>Rattus|HISTDB_cH2B_1_0|cH2B_(Mammalia) organism=Rattus norvegicus phylum=Chordata class=Mammalia +MPEVSAKGTTISKKGFKKAVTKTQKKEGRKRKRCREESYSIYIYKVLKQVHPDTGISSKA +MSIMNSFVTDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK-------- +>Rattus|NP_072169.1|cH2B_(Mammalia) organism=Rattus norvegicus phylum=Chordata class=Mammalia +MPEVSAKGTTISKKGFKKAVTKTQKKEGRKRKRCREESYSIYIYKVLKQVHPDTGISSKA +MSIMNSFVTDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK-------- +>Rattus|NP_072173.2|cH2B_(Mammalia) organism=Rattus norvegicus phylum=Chordata class=Mammalia +MPE-PAKSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK-------- +>Sus|XP_005665716.1|cH2B_(Mammalia) organism=Sus scrofa phylum=Chordata class=Mammalia +MPD-PAKSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK-------- +>Homo|NP_003511.1|cH2B.10_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPE-PSKSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK-------- +>Homo|NP_003518.2|cH2B.11_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPD-PAKSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSIYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK-------- +>Homo|NP_001019770.1|cH2B.12_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPD-PAKSAPAPKKGSKKAVTKVQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK-------- +>Homo|NP_001154806.1|cH2B.12_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPD-PAKSAPAPKKGSKKAVTKVQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSKLIGPILWK +>Homo|NP_003519.1|cH2B.13_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPE-PAKSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSIYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK-------- +>Homo|NP_778225.1|cH2B.14_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPD-PSKSAPAPKKGSKKAVTKAQKKDGKKRKRGRKESYSIYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIASEASRLAHYNKRSTITSREVQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK-------- +>Homo|NP_733759.1|cH2B.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPEVSSKGATISKKGFKKAVVKTQKKEGKKRKRTRKESYSIYIYKVLKQVHPDTGISSKA +MSIMNSFVTDIFERIASEASRLAHYSKRSTISSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK-------- +>Homo|NP_066406.1|cH2B.2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPE-PSKSAPAPKKGSKKAITKAQKKDGKKRKRSRKESYSIYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK-------- +>Homo|NP_003509.1|cH2B.3_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPE-PAKSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK-------- +>Homo|NP_003513.1|cH2B.3_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPE-PAKSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK-------- +>Homo|NP_003514.2|cH2B.3_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPE-PAKSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK-------- +>Homo|NP_003516.1|cH2B.3_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPE-PAKSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK-------- +>Homo|NP_003517.2|cH2B.3_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPE-PAKSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK-------- +>Homo|NP_066407.1|cH2B.4_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPE-PTKSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK-------- +>Homo|NP_003515.1|cH2B.5_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPD-PAKSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK-------- +>Homo|NP_066402.2|cH2B.6_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPE-PAKSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSIYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSAK-------- +>Homo|NP_001299582.1|cH2B.7_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPE-PAKSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSAK-------- +>Homo|NP_003510.1|cH2B.8_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPE-LAKSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIASEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK-------- +>Homo|NP_003512.1|cH2B.9_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MPE-PVKSAPVPKKGSKKAINKAQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK-------- +>Mus|NP_783595.1|cH2B_(Mus_musculus) organism=Mus musculus phylum=Chordata class=Mammalia +MPEPS-KSAPAPKKGSKKAISKAQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIASEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK-------- +>Mus|CAA62299.1|cH2B.1_(Mus_musculus) organism=Mus musculus phylum=Chordata class=Mammalia +MPEVAVKGATISKKGFKKAVTKTQKKEGRKRKRCRKESYSIYIYKVLKQVHPDTGISSKA +MSIMNSFVTDIFERIASEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK-------- +>Mus|NP_835502.1|cH2B.1_(Mus_musculus) organism=Mus musculus phylum=Chordata class=Mammalia +MPEPA-KSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSVYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIASEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAV +TKYTSSK-------- +>Mus|Q64524.3|cH2B.E_(Mus_musculus) organism=Mus musculus phylum=Chordata class=Mammalia +MPELA-KSAPAPKKGSKKAVTKAQKKDGKKRKRSRKESYSIYVYKVLKQVHPDTGISSKA +MGIMNSFVNDIFERIANEASRLAHYNKRSTITSREIQTSVRLLLPGELAKHAVSEGTKAV +TKYTSAK-------- diff --git a/CURATED_SET/draft_seeds/cH2B_(Vertebrata)_only.fasta b/CURATED_SET/draft_seeds/cH2B_(Vertebrata)_only.fasta new file mode 100644 index 0000000..8e8aa6e --- /dev/null +++ b/CURATED_SET/draft_seeds/cH2B_(Vertebrata)_only.fasta @@ -0,0 +1,8 @@ +>Xenopus|NP_001086753.1|cH2B_(Vertebrata) organism=Xenopus laevis phylum=Chordata class=Amphibia +MPEPAKSAPAPKKGSKKAVTKTQKKDGKKRRKSRKESYAIYVYKVLKQVHPDTGISSKAM +SIMNSFVNDVFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVT +KYTSAK +>Gallus|NP_001073188.1|cH2B_(Vertebrata) organism=Gallus gallus phylum=Chordata class=Aves +MPEPAKSAPAPKKGSKKAVTKTQKKGDKKRKKSRKESYSIYVYKVLKQVHPDTGISSKAM +GIMNSFVNDIFERIAGEASRLAHYNKRSTITSREIQTAVRLLLPGELAKHAVSEGTKAVT +KYTSSK diff --git a/CURATED_SET/draft_seeds/cH2B_only.fasta b/CURATED_SET/draft_seeds/cH2B_only.fasta new file mode 100644 index 0000000..e69de29 diff --git a/CURATED_SET/draft_seeds/cH3.1_(Homo_sapiens).fasta b/CURATED_SET/draft_seeds/cH3.1_(Homo_sapiens).fasta new file mode 100644 index 0000000..6f84519 --- /dev/null +++ b/CURATED_SET/draft_seeds/cH3.1_(Homo_sapiens).fasta @@ -0,0 +1,40 @@ +>Homo|NP_003520.1|cH3.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKSAPATGGVKKPHRYRPGTVALREIRRYQKSTE +LLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEACEAYLVGLFEDTNLCAIHAKRVTI +MPKDIQLARRIRGERA +>Homo|NP_003521.2|cH3.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKSAPATGGVKKPHRYRPGTVALREIRRYQKSTE +LLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEACEAYLVGLFEDTNLCAIHAKRVTI +MPKDIQLARRIRGERA +>Homo|NP_003522.1|cH3.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKSAPATGGVKKPHRYRPGTVALREIRRYQKSTE +LLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEACEAYLVGLFEDTNLCAIHAKRVTI +MPKDIQLARRIRGERA +>Homo|NP_003523.1|cH3.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKSAPATGGVKKPHRYRPGTVALREIRRYQKSTE +LLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEACEAYLVGLFEDTNLCAIHAKRVTI +MPKDIQLARRIRGERA +>Homo|NP_003524.1|cH3.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKSAPATGGVKKPHRYRPGTVALREIRRYQKSTE +LLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEACEAYLVGLFEDTNLCAIHAKRVTI +MPKDIQLARRIRGERA +>Homo|NP_003525.1|cH3.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKSAPATGGVKKPHRYRPGTVALREIRRYQKSTE +LLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEACEAYLVGLFEDTNLCAIHAKRVTI +MPKDIQLARRIRGERA +>Homo|NP_003526.1|cH3.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKSAPATGGVKKPHRYRPGTVALREIRRYQKSTE +LLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEACEAYLVGLFEDTNLCAIHAKRVTI +MPKDIQLARRIRGERA +>Homo|NP_003527.1|cH3.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKSAPATGGVKKPHRYRPGTVALREIRRYQKSTE +LLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEACEAYLVGLFEDTNLCAIHAKRVTI +MPKDIQLARRIRGERA +>Homo|NP_003528.1|cH3.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKSAPATGGVKKPHRYRPGTVALREIRRYQKSTE +LLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEACEAYLVGLFEDTNLCAIHAKRVTI +MPKDIQLARRIRGERA +>Homo|NP_066298.1|cH3.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKSAPATGGVKKPHRYRPGTVALREIRRYQKSTE +LLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEACEAYLVGLFEDTNLCAIHAKRVTI +MPKDIQLARRIRGERA diff --git a/CURATED_SET/draft_seeds/cH3.1_(Mammalia).fasta b/CURATED_SET/draft_seeds/cH3.1_(Mammalia).fasta new file mode 100644 index 0000000..6f84519 --- /dev/null +++ b/CURATED_SET/draft_seeds/cH3.1_(Mammalia).fasta @@ -0,0 +1,40 @@ +>Homo|NP_003520.1|cH3.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKSAPATGGVKKPHRYRPGTVALREIRRYQKSTE +LLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEACEAYLVGLFEDTNLCAIHAKRVTI +MPKDIQLARRIRGERA +>Homo|NP_003521.2|cH3.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKSAPATGGVKKPHRYRPGTVALREIRRYQKSTE +LLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEACEAYLVGLFEDTNLCAIHAKRVTI +MPKDIQLARRIRGERA +>Homo|NP_003522.1|cH3.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKSAPATGGVKKPHRYRPGTVALREIRRYQKSTE +LLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEACEAYLVGLFEDTNLCAIHAKRVTI +MPKDIQLARRIRGERA +>Homo|NP_003523.1|cH3.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKSAPATGGVKKPHRYRPGTVALREIRRYQKSTE +LLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEACEAYLVGLFEDTNLCAIHAKRVTI +MPKDIQLARRIRGERA +>Homo|NP_003524.1|cH3.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKSAPATGGVKKPHRYRPGTVALREIRRYQKSTE +LLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEACEAYLVGLFEDTNLCAIHAKRVTI +MPKDIQLARRIRGERA +>Homo|NP_003525.1|cH3.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKSAPATGGVKKPHRYRPGTVALREIRRYQKSTE +LLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEACEAYLVGLFEDTNLCAIHAKRVTI +MPKDIQLARRIRGERA +>Homo|NP_003526.1|cH3.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKSAPATGGVKKPHRYRPGTVALREIRRYQKSTE +LLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEACEAYLVGLFEDTNLCAIHAKRVTI +MPKDIQLARRIRGERA +>Homo|NP_003527.1|cH3.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKSAPATGGVKKPHRYRPGTVALREIRRYQKSTE +LLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEACEAYLVGLFEDTNLCAIHAKRVTI +MPKDIQLARRIRGERA +>Homo|NP_003528.1|cH3.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKSAPATGGVKKPHRYRPGTVALREIRRYQKSTE +LLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEACEAYLVGLFEDTNLCAIHAKRVTI +MPKDIQLARRIRGERA +>Homo|NP_066298.1|cH3.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKSAPATGGVKKPHRYRPGTVALREIRRYQKSTE +LLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEACEAYLVGLFEDTNLCAIHAKRVTI +MPKDIQLARRIRGERA diff --git a/CURATED_SET/draft_seeds/cH3.1_(Mammalia)_only.fasta b/CURATED_SET/draft_seeds/cH3.1_(Mammalia)_only.fasta new file mode 100644 index 0000000..e69de29 diff --git a/CURATED_SET/draft_seeds/cH3.2_(Homo_sapiens).fasta b/CURATED_SET/draft_seeds/cH3.2_(Homo_sapiens).fasta new file mode 100644 index 0000000..7fa14f6 --- /dev/null +++ b/CURATED_SET/draft_seeds/cH3.2_(Homo_sapiens).fasta @@ -0,0 +1,12 @@ +>Homo|NP_001005464.1|cH3.2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKSAPATGGVKKPHRYRPGTVALREIRRYQKSTE +LLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEASEAYLVGLFEDTNLCAIHAKRVTI +MPKDIQLARRIRGERA +>Homo|NP_001116847.1|cH3.2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKSAPATGGVKKPHRYRPGTVALREIRRYQKSTE +LLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEASEAYLVGLFEDTNLCAIHAKRVTI +MPKDIQLARRIRGERA +>Homo|NP_066403.2|cH3.2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKSAPATGGVKKPHRYRPGTVALREIRRYQKSTE +LLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEASEAYLVGLFEDTNLCAIHAKRVTI +MPKDIQLARRIRGERA diff --git a/CURATED_SET/draft_seeds/cH3.2_(Mammalia).fasta b/CURATED_SET/draft_seeds/cH3.2_(Mammalia).fasta new file mode 100644 index 0000000..7fa14f6 --- /dev/null +++ b/CURATED_SET/draft_seeds/cH3.2_(Mammalia).fasta @@ -0,0 +1,12 @@ +>Homo|NP_001005464.1|cH3.2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKSAPATGGVKKPHRYRPGTVALREIRRYQKSTE +LLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEASEAYLVGLFEDTNLCAIHAKRVTI +MPKDIQLARRIRGERA +>Homo|NP_001116847.1|cH3.2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKSAPATGGVKKPHRYRPGTVALREIRRYQKSTE +LLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEASEAYLVGLFEDTNLCAIHAKRVTI +MPKDIQLARRIRGERA +>Homo|NP_066403.2|cH3.2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKSAPATGGVKKPHRYRPGTVALREIRRYQKSTE +LLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEASEAYLVGLFEDTNLCAIHAKRVTI +MPKDIQLARRIRGERA diff --git a/CURATED_SET/draft_seeds/cH3.2_(Mammalia)_only.fasta b/CURATED_SET/draft_seeds/cH3.2_(Mammalia)_only.fasta new file mode 100644 index 0000000..e69de29 diff --git a/CURATED_SET/draft_seeds/cH3.fasta b/CURATED_SET/draft_seeds/cH3.fasta new file mode 100644 index 0000000..004181b --- /dev/null +++ b/CURATED_SET/draft_seeds/cH3.fasta @@ -0,0 +1,144 @@ +>Guillardia|XP_001713521.1|cH3 organism=Guillardia theta phylum= class=Cryptophyceae +MARTKQTARKTVGGDI-NSRISEKRAKSD---NSKNIDLKKVHR-YKPGTVALREIRKYQ +KSTNLLIRKLPFQRLVRELAQDYKSDLRFQNSAVLALQEASESYLVNLFEDTNLCAIHAK +RVTIMPKDIYLARRIRGEMIF +>Plasmodium|XP_724814.1|cH3 organism=Plasmodium yoelii phylum=Apicomplexa class=Aconoidasida +MARTKQTARKSTAGKAPRKQLASKAARKS---APISAGIKKPHR-YRPGTVALREIRRYQ +KSTDLLIRKLPFQRLVREIAQDYKTDLRFQSSAVMALQEAAEAYLVGLFEDTNLCAIHAK +RVTIMPKDIQLARRIRGERS- +>Drosophila|CAA32434.1|cH3 organism=Drosophila melanogaster phylum=Arthropoda class=Insecta +MARTKQTARKSTGGKAPRKQLATKAARKS---APATGGVKKPHR-YRPGTVALREIRRYQ +KSTELLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEASEAYLVGLFEDTNLCAIHAK +RITIMPKDIQLARRIRGERA- +>Drosophila|NP_001027285.1|cH3 organism=Drosophila melanogaster phylum=Arthropoda class=Insecta +MARTKQTARKSTGGKAPRKQLATKAARKS---APATGGVKKPHR-YRPGTVALREIRRYQ +KSTELLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEASEAYLVGLFEDTNLCAIHAK +RVTIMPKDIQLARRIRGERA- +>Candida|XP_719887.1|cH3 organism=Candida albicans SC5314 phylum=Ascomycota class=Saccharomycetes +MARTKQTARKSTGGKAPRKQLASKAARKS---APVSGGVKKPHR-YKPGTVALREIRRFQ +KSTELLIRKLPFQRLVREIAQDFKSDLRFQSSAIGALQEAVEAYLVGLFEDTNLCAIHAK +RVTIQKKDMQLARRLRGERS- +>Debaryomyces|XP_460476.1|cH3 organism=Debaryomyces hansenii CBS767 phylum=Ascomycota class=Saccharomycetes +MARTKQTARKSTGGKAPRKQLASKAARKS---APVSGGVKKPHR-YKPGTVALREIRRFQ +KSTELLIRKLPFQRLVREIAQDFKSDLRFQSSAIGALQEAVEAYLVSLFEDTNLCAIHAK +RVTIQKKDIQLARRLRGERS- +>Thalassiosira|XP_002288694.1|cH3 organism=Thalassiosira pseudonana CCMP1335 phylum=Bacillariophyta class=Coscinodiscophyceae +MARTKQTARKSTGGKAPRKQLATKAARKS---APATGGVKKPHR-YRPGTVALREIRRYQ +KSTDLLIRKLPFQRLVREIAQDFKSDLRFQGSAVLALQEAAEAYLVGLFEDTNLCAIHAK +RVTIMPKDIQLARRIRGERS- +>Xenopus|NP_001091119.1|cH3 organism=Xenopus laevis phylum=Chordata class=Amphibia +MARTKQTARKSTGGKAPRKQLATKAARKS---APATGGVKKPHR-YRPGTVALREIRRYQ +KSTELLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEASEAYLVGLFEDTNLCGIHAK +RVTIMPKDIQLARRIRGERA- +>Gallus|NP_001268409.1|cH3 organism=Gallus gallus phylum=Chordata class=Aves +MARTKQTARKSTGGKAPRKQLATKAARKS---APATGGVKKPHR-YRPGTVALREIRRYQ +KSTELLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEASEAYLVGLFEDTNLCAIHAK +RVTIMPKDIQLARRIRGERA- +>Macaca|NP_001180643.1|cH3 organism=Macaca mulatta phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKS---APATGGVKKPHR-YRPGTVALREIRRYQ +KSTELLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEACEAYLVGLFEDTNLCAIHAK +RVTIMPKDIQLARRIRGERA- +>Mus|NP_659539.1|cH3 organism=Mus musculus phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKS---APATGGVKKPHR-YRPGTVALREIRRYQ +KSTELLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEACEAYLVGLFEDTNLCAIHAK +RVTIMPKDIQLARRIRGERA- +>Pan|XP_527254.2|cH3 organism=Pan troglodytes phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKS---APATGGVKKPHR-YRPGTVALREIRRYQ +KSTELLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEACEAYLVGLFEDTNLCAIHAK +RVTIMPKDIQLARRIRGERA- +>Tetrahymena|XP_001016594.3|cH3 organism=Tetrahymena thermophila SB210 phylum=Ciliophora class=Oligohymenophorea +MARTKQTARKSTGAKAPRKQLASKAARKS---APATGGIKKPHR-FRPGTVALREIRKYQ +KSTDLLIRKLPFQRLVRDIAHEFKAELRFQSSAVLALQEAAEAYLVGLFEDTNLCAIHAR +RVTIMTKDMQLARRIRGERF- +>Trypanosoma|XP_001218942.1|cH3 organism=Trypanosoma brucei brucei TREU927 phylum=Euglenozoa class=Kinetoplastea +MSRTKETART-------KKTITSKKSKKASKGSDAASGVKTAQRRWRPGTVALREIRQFQ +RSTDLLLQKAPFQRLVREVSGAQKEGLRFQSSAILAAQEATESYIVSLLADTNRACIHSG +RVTIQPKDIHLALCLRGERA- +>Caenorhabditis|NP_509344.1|cH3 organism=Caenorhabditis elegans phylum=Nematoda class=Chromadorea +MARTKQTARKSTGGKAPRKQLATKAARKS---APTTGGVKKPHR-YRPGTVALREIRRYQ +KSTELLIRKLPFQRLVREIAQDFKTDLRFQSAAIGALQEASEAYLVGLFEDTNLCAIHAK +RVTIMPKDIQLARRIRGERA- +>Phytophthora|XP_002999294.1|cH3 organism=Phytophthora infestans T30-4 phylum=Oomycota class= +MARTKQTARKSTGGKAPRKQLATKAARKS---APATGGVKKPHR-YRPGTVALREIRRYQ +KSTELLIRKLPFQRLVREIAQDFKTDLRFQGSAVLALQEAAEAYLVGLFEDTNLCAIHAK +RVTIMPKDIQLARRIRGERS- +>Perkinsus|XP_002788889.1|cH3 organism=Perkinsus marinus ATCC 50983 phylum=Perkinsozoa class= +MARTKQTARKSTGGKAPRKQLASKAARKS---TPSTGGIKKPHR-YRPGTVALREIRRYQ +KSTDLLIRKLPFQRLVREVAQDFKTDLRFQSSAVMALQEAAEAYLVGLFEDTNLCAIHAK +RVTIMPKDMQLARRIRGERS- +>Cyanidioschyzon|XP_005537317.1|cH3 organism=Cyanidioschyzon merolae strain 10D phylum=Rhodophyta class=Bangiophyceae +MARTKQTARKSTGGKAPRKQLATKAARKS---APSVGGVKKPHR-YRPGTVALREIRRYQ +KSTELLIRKLPFQRLVREIAQDFKTDLRFQSSAVLALQEAAEAYLVNLFEDTNLCAIHAK +RVTIMPKDIQLARRIRGERA- +>Griffithsia|Q7XYZ0.3|cH3 organism=Griffithsia japonica phylum=Rhodophyta class=Florideophyceae +MARTKQTARKSTGGKAPRKQLATKAARKS---APASGGVKKPHR-FRPGTVALREIRRFQ +KSTELLVRKLPFQRLVREIAQDFKSDLRFQSSAVLALQEAAEAYMVGLFEDTNLCAIHAK +RVTIMPKDIQLARRIRGERT- +>Selaginella|XP_002977792.1|cH3 organism=Selaginella moellendorffii phylum=Streptophyta class=Lycopodiopsida +MARTKQTARKSTGGKAPRKQLATKAARKS---APTTGGVKKPHR-YRPGTVALREIRKYQ +KSTELLIRKLPFQRLVREIAQDFKTDLRFQSHAVLALQEAAEAYLVGLFEDTNLCAIHAK +RVTIMPKDIQLARRIRGERA- +>Arabidopsis|NP_189372.1|cH3 organism=Arabidopsis thaliana phylum=Streptophyta class=Magnoliopsida +MARTKQTARKSTGGKAPRKQLATKAARKS---APATGGVKKPHR-FRPGTVALREIRKYQ +KSTELLIRKLPFQRLVREIAQDFKTDLRFQSSAVAALQEAAEAYLVGLFEDTNLCAIHAK +RVTIMPKDIQLARRIRGERA- +>Oryza|NP_001044904.1|cH3 organism=Oryza sativa Japonica Group phylum=Streptophyta class=Magnoliopsida +MARTKQTARKSTGGKAPRKQLATKAARKS---APATGGVKKPHR-FRPGTVALREIRKYQ +KSTELLIRKLPFQRLVREIAQDFKTDLRFQSSAVAALQEAAEAYLVGLFEDTNLCAIHAK +RVTIMPKDIQLARRIRGERA- +>Oryza|NP_001050276.1|cH3 organism=Oryza sativa Japonica Group phylum=Streptophyta class=Magnoliopsida +MARTKQTARKSTGGKAPRKQLATKAARKS---APTTGGVKKPHR-YRPGTVALREIRKYQ +KSTELLIRKLPFQRLVREIAQDFKTDLRFQSHAVLALQEAAEAYLVGLFEDTNLCAIHAK +RVTIMPKDIQLARRIRGERA- +>Homo|NP_003520.1|cH3.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKS---APATGGVKKPHR-YRPGTVALREIRRYQ +KSTELLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEACEAYLVGLFEDTNLCAIHAK +RVTIMPKDIQLARRIRGERA- +>Homo|NP_003521.2|cH3.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKS---APATGGVKKPHR-YRPGTVALREIRRYQ +KSTELLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEACEAYLVGLFEDTNLCAIHAK +RVTIMPKDIQLARRIRGERA- +>Homo|NP_003522.1|cH3.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKS---APATGGVKKPHR-YRPGTVALREIRRYQ +KSTELLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEACEAYLVGLFEDTNLCAIHAK +RVTIMPKDIQLARRIRGERA- +>Homo|NP_003523.1|cH3.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKS---APATGGVKKPHR-YRPGTVALREIRRYQ +KSTELLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEACEAYLVGLFEDTNLCAIHAK +RVTIMPKDIQLARRIRGERA- +>Homo|NP_003524.1|cH3.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKS---APATGGVKKPHR-YRPGTVALREIRRYQ +KSTELLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEACEAYLVGLFEDTNLCAIHAK +RVTIMPKDIQLARRIRGERA- +>Homo|NP_003525.1|cH3.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKS---APATGGVKKPHR-YRPGTVALREIRRYQ +KSTELLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEACEAYLVGLFEDTNLCAIHAK +RVTIMPKDIQLARRIRGERA- +>Homo|NP_003526.1|cH3.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKS---APATGGVKKPHR-YRPGTVALREIRRYQ +KSTELLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEACEAYLVGLFEDTNLCAIHAK +RVTIMPKDIQLARRIRGERA- +>Homo|NP_003527.1|cH3.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKS---APATGGVKKPHR-YRPGTVALREIRRYQ +KSTELLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEACEAYLVGLFEDTNLCAIHAK +RVTIMPKDIQLARRIRGERA- +>Homo|NP_003528.1|cH3.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKS---APATGGVKKPHR-YRPGTVALREIRRYQ +KSTELLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEACEAYLVGLFEDTNLCAIHAK +RVTIMPKDIQLARRIRGERA- +>Homo|NP_066298.1|cH3.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKS---APATGGVKKPHR-YRPGTVALREIRRYQ +KSTELLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEACEAYLVGLFEDTNLCAIHAK +RVTIMPKDIQLARRIRGERA- +>Homo|NP_001005464.1|cH3.2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKS---APATGGVKKPHR-YRPGTVALREIRRYQ +KSTELLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEASEAYLVGLFEDTNLCAIHAK +RVTIMPKDIQLARRIRGERA- +>Homo|NP_001116847.1|cH3.2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKS---APATGGVKKPHR-YRPGTVALREIRRYQ +KSTELLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEASEAYLVGLFEDTNLCAIHAK +RVTIMPKDIQLARRIRGERA- +>Homo|NP_066403.2|cH3.2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKS---APATGGVKKPHR-YRPGTVALREIRRYQ +KSTELLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEASEAYLVGLFEDTNLCAIHAK +RVTIMPKDIQLARRIRGERA- diff --git a/CURATED_SET/draft_seeds/cH3_(Animals).fasta b/CURATED_SET/draft_seeds/cH3_(Animals).fasta new file mode 100644 index 0000000..12a53bb --- /dev/null +++ b/CURATED_SET/draft_seeds/cH3_(Animals).fasta @@ -0,0 +1,52 @@ +>Homo|NP_003520.1|cH3.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKSAPATGGVKKPHRYRPGTVALREIRRYQKSTE +LLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEACEAYLVGLFEDTNLCAIHAKRVTI +MPKDIQLARRIRGERA +>Homo|NP_003521.2|cH3.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKSAPATGGVKKPHRYRPGTVALREIRRYQKSTE +LLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEACEAYLVGLFEDTNLCAIHAKRVTI +MPKDIQLARRIRGERA +>Homo|NP_003522.1|cH3.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKSAPATGGVKKPHRYRPGTVALREIRRYQKSTE +LLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEACEAYLVGLFEDTNLCAIHAKRVTI +MPKDIQLARRIRGERA +>Homo|NP_003523.1|cH3.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKSAPATGGVKKPHRYRPGTVALREIRRYQKSTE +LLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEACEAYLVGLFEDTNLCAIHAKRVTI +MPKDIQLARRIRGERA +>Homo|NP_003524.1|cH3.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKSAPATGGVKKPHRYRPGTVALREIRRYQKSTE +LLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEACEAYLVGLFEDTNLCAIHAKRVTI +MPKDIQLARRIRGERA +>Homo|NP_003525.1|cH3.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKSAPATGGVKKPHRYRPGTVALREIRRYQKSTE +LLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEACEAYLVGLFEDTNLCAIHAKRVTI +MPKDIQLARRIRGERA +>Homo|NP_003526.1|cH3.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKSAPATGGVKKPHRYRPGTVALREIRRYQKSTE +LLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEACEAYLVGLFEDTNLCAIHAKRVTI +MPKDIQLARRIRGERA +>Homo|NP_003527.1|cH3.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKSAPATGGVKKPHRYRPGTVALREIRRYQKSTE +LLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEACEAYLVGLFEDTNLCAIHAKRVTI +MPKDIQLARRIRGERA +>Homo|NP_003528.1|cH3.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKSAPATGGVKKPHRYRPGTVALREIRRYQKSTE +LLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEACEAYLVGLFEDTNLCAIHAKRVTI +MPKDIQLARRIRGERA +>Homo|NP_066298.1|cH3.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKSAPATGGVKKPHRYRPGTVALREIRRYQKSTE +LLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEACEAYLVGLFEDTNLCAIHAKRVTI +MPKDIQLARRIRGERA +>Homo|NP_001005464.1|cH3.2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKSAPATGGVKKPHRYRPGTVALREIRRYQKSTE +LLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEASEAYLVGLFEDTNLCAIHAKRVTI +MPKDIQLARRIRGERA +>Homo|NP_001116847.1|cH3.2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKSAPATGGVKKPHRYRPGTVALREIRRYQKSTE +LLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEASEAYLVGLFEDTNLCAIHAKRVTI +MPKDIQLARRIRGERA +>Homo|NP_066403.2|cH3.2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKSAPATGGVKKPHRYRPGTVALREIRRYQKSTE +LLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEASEAYLVGLFEDTNLCAIHAKRVTI +MPKDIQLARRIRGERA diff --git a/CURATED_SET/draft_seeds/cH3_(Animals)_only.fasta b/CURATED_SET/draft_seeds/cH3_(Animals)_only.fasta new file mode 100644 index 0000000..e69de29 diff --git a/CURATED_SET/draft_seeds/cH3_(Chlorophyta).fasta b/CURATED_SET/draft_seeds/cH3_(Chlorophyta).fasta new file mode 100644 index 0000000..e69de29 diff --git a/CURATED_SET/draft_seeds/cH3_(Embryophyta).fasta b/CURATED_SET/draft_seeds/cH3_(Embryophyta).fasta new file mode 100644 index 0000000..e69de29 diff --git a/CURATED_SET/draft_seeds/cH3_(Fungi).fasta b/CURATED_SET/draft_seeds/cH3_(Fungi).fasta new file mode 100644 index 0000000..e69de29 diff --git a/CURATED_SET/draft_seeds/cH3_(Mammalia).fasta b/CURATED_SET/draft_seeds/cH3_(Mammalia).fasta new file mode 100644 index 0000000..12a53bb --- /dev/null +++ b/CURATED_SET/draft_seeds/cH3_(Mammalia).fasta @@ -0,0 +1,52 @@ +>Homo|NP_003520.1|cH3.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKSAPATGGVKKPHRYRPGTVALREIRRYQKSTE +LLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEACEAYLVGLFEDTNLCAIHAKRVTI +MPKDIQLARRIRGERA +>Homo|NP_003521.2|cH3.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKSAPATGGVKKPHRYRPGTVALREIRRYQKSTE +LLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEACEAYLVGLFEDTNLCAIHAKRVTI +MPKDIQLARRIRGERA +>Homo|NP_003522.1|cH3.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKSAPATGGVKKPHRYRPGTVALREIRRYQKSTE +LLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEACEAYLVGLFEDTNLCAIHAKRVTI +MPKDIQLARRIRGERA +>Homo|NP_003523.1|cH3.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKSAPATGGVKKPHRYRPGTVALREIRRYQKSTE +LLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEACEAYLVGLFEDTNLCAIHAKRVTI +MPKDIQLARRIRGERA +>Homo|NP_003524.1|cH3.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKSAPATGGVKKPHRYRPGTVALREIRRYQKSTE +LLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEACEAYLVGLFEDTNLCAIHAKRVTI +MPKDIQLARRIRGERA +>Homo|NP_003525.1|cH3.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKSAPATGGVKKPHRYRPGTVALREIRRYQKSTE +LLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEACEAYLVGLFEDTNLCAIHAKRVTI +MPKDIQLARRIRGERA +>Homo|NP_003526.1|cH3.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKSAPATGGVKKPHRYRPGTVALREIRRYQKSTE +LLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEACEAYLVGLFEDTNLCAIHAKRVTI +MPKDIQLARRIRGERA +>Homo|NP_003527.1|cH3.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKSAPATGGVKKPHRYRPGTVALREIRRYQKSTE +LLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEACEAYLVGLFEDTNLCAIHAKRVTI +MPKDIQLARRIRGERA +>Homo|NP_003528.1|cH3.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKSAPATGGVKKPHRYRPGTVALREIRRYQKSTE +LLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEACEAYLVGLFEDTNLCAIHAKRVTI +MPKDIQLARRIRGERA +>Homo|NP_066298.1|cH3.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKSAPATGGVKKPHRYRPGTVALREIRRYQKSTE +LLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEACEAYLVGLFEDTNLCAIHAKRVTI +MPKDIQLARRIRGERA +>Homo|NP_001005464.1|cH3.2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKSAPATGGVKKPHRYRPGTVALREIRRYQKSTE +LLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEASEAYLVGLFEDTNLCAIHAKRVTI +MPKDIQLARRIRGERA +>Homo|NP_001116847.1|cH3.2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKSAPATGGVKKPHRYRPGTVALREIRRYQKSTE +LLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEASEAYLVGLFEDTNLCAIHAKRVTI +MPKDIQLARRIRGERA +>Homo|NP_066403.2|cH3.2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKSAPATGGVKKPHRYRPGTVALREIRRYQKSTE +LLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEASEAYLVGLFEDTNLCAIHAKRVTI +MPKDIQLARRIRGERA diff --git a/CURATED_SET/draft_seeds/cH3_(Mammalia)_only.fasta b/CURATED_SET/draft_seeds/cH3_(Mammalia)_only.fasta new file mode 100644 index 0000000..e69de29 diff --git a/CURATED_SET/draft_seeds/cH3_(Plants).fasta b/CURATED_SET/draft_seeds/cH3_(Plants).fasta new file mode 100644 index 0000000..e69de29 diff --git a/CURATED_SET/draft_seeds/cH3_(Plants)_only.fasta b/CURATED_SET/draft_seeds/cH3_(Plants)_only.fasta new file mode 100644 index 0000000..e69de29 diff --git a/CURATED_SET/draft_seeds/cH3_(Protists).fasta b/CURATED_SET/draft_seeds/cH3_(Protists).fasta new file mode 100644 index 0000000..e69de29 diff --git a/CURATED_SET/draft_seeds/cH3_(Vertebrata).fasta b/CURATED_SET/draft_seeds/cH3_(Vertebrata).fasta new file mode 100644 index 0000000..12a53bb --- /dev/null +++ b/CURATED_SET/draft_seeds/cH3_(Vertebrata).fasta @@ -0,0 +1,52 @@ +>Homo|NP_003520.1|cH3.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKSAPATGGVKKPHRYRPGTVALREIRRYQKSTE +LLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEACEAYLVGLFEDTNLCAIHAKRVTI +MPKDIQLARRIRGERA +>Homo|NP_003521.2|cH3.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKSAPATGGVKKPHRYRPGTVALREIRRYQKSTE +LLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEACEAYLVGLFEDTNLCAIHAKRVTI +MPKDIQLARRIRGERA +>Homo|NP_003522.1|cH3.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKSAPATGGVKKPHRYRPGTVALREIRRYQKSTE +LLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEACEAYLVGLFEDTNLCAIHAKRVTI +MPKDIQLARRIRGERA +>Homo|NP_003523.1|cH3.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKSAPATGGVKKPHRYRPGTVALREIRRYQKSTE +LLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEACEAYLVGLFEDTNLCAIHAKRVTI +MPKDIQLARRIRGERA +>Homo|NP_003524.1|cH3.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKSAPATGGVKKPHRYRPGTVALREIRRYQKSTE +LLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEACEAYLVGLFEDTNLCAIHAKRVTI +MPKDIQLARRIRGERA +>Homo|NP_003525.1|cH3.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKSAPATGGVKKPHRYRPGTVALREIRRYQKSTE +LLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEACEAYLVGLFEDTNLCAIHAKRVTI +MPKDIQLARRIRGERA +>Homo|NP_003526.1|cH3.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKSAPATGGVKKPHRYRPGTVALREIRRYQKSTE +LLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEACEAYLVGLFEDTNLCAIHAKRVTI +MPKDIQLARRIRGERA +>Homo|NP_003527.1|cH3.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKSAPATGGVKKPHRYRPGTVALREIRRYQKSTE +LLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEACEAYLVGLFEDTNLCAIHAKRVTI +MPKDIQLARRIRGERA +>Homo|NP_003528.1|cH3.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKSAPATGGVKKPHRYRPGTVALREIRRYQKSTE +LLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEACEAYLVGLFEDTNLCAIHAKRVTI +MPKDIQLARRIRGERA +>Homo|NP_066298.1|cH3.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKSAPATGGVKKPHRYRPGTVALREIRRYQKSTE +LLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEACEAYLVGLFEDTNLCAIHAKRVTI +MPKDIQLARRIRGERA +>Homo|NP_001005464.1|cH3.2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKSAPATGGVKKPHRYRPGTVALREIRRYQKSTE +LLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEASEAYLVGLFEDTNLCAIHAKRVTI +MPKDIQLARRIRGERA +>Homo|NP_001116847.1|cH3.2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKSAPATGGVKKPHRYRPGTVALREIRRYQKSTE +LLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEASEAYLVGLFEDTNLCAIHAKRVTI +MPKDIQLARRIRGERA +>Homo|NP_066403.2|cH3.2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKSAPATGGVKKPHRYRPGTVALREIRRYQKSTE +LLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEASEAYLVGLFEDTNLCAIHAKRVTI +MPKDIQLARRIRGERA diff --git a/CURATED_SET/draft_seeds/cH3_(Vertebrata)_only.fasta b/CURATED_SET/draft_seeds/cH3_(Vertebrata)_only.fasta new file mode 100644 index 0000000..e69de29 diff --git a/CURATED_SET/draft_seeds/cH3_only.fasta b/CURATED_SET/draft_seeds/cH3_only.fasta new file mode 100644 index 0000000..5c40016 --- /dev/null +++ b/CURATED_SET/draft_seeds/cH3_only.fasta @@ -0,0 +1,92 @@ +>Guillardia|XP_001713521.1|cH3 organism=Guillardia theta phylum= class=Cryptophyceae +MARTKQTARKTVGGDI-NSRISEKRAKSD---NSKNIDLKKVHR-YKPGTVALREIRKYQ +KSTNLLIRKLPFQRLVRELAQDYKSDLRFQNSAVLALQEASESYLVNLFEDTNLCAIHAK +RVTIMPKDIYLARRIRGEMIF +>Plasmodium|XP_724814.1|cH3 organism=Plasmodium yoelii phylum=Apicomplexa class=Aconoidasida +MARTKQTARKSTAGKAPRKQLASKAARKS---APISAGIKKPHR-YRPGTVALREIRRYQ +KSTDLLIRKLPFQRLVREIAQDYKTDLRFQSSAVMALQEAAEAYLVGLFEDTNLCAIHAK +RVTIMPKDIQLARRIRGERS- +>Drosophila|CAA32434.1|cH3 organism=Drosophila melanogaster phylum=Arthropoda class=Insecta +MARTKQTARKSTGGKAPRKQLATKAARKS---APATGGVKKPHR-YRPGTVALREIRRYQ +KSTELLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEASEAYLVGLFEDTNLCAIHAK +RITIMPKDIQLARRIRGERA- +>Drosophila|NP_001027285.1|cH3 organism=Drosophila melanogaster phylum=Arthropoda class=Insecta +MARTKQTARKSTGGKAPRKQLATKAARKS---APATGGVKKPHR-YRPGTVALREIRRYQ +KSTELLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEASEAYLVGLFEDTNLCAIHAK +RVTIMPKDIQLARRIRGERA- +>Candida|XP_719887.1|cH3 organism=Candida albicans SC5314 phylum=Ascomycota class=Saccharomycetes +MARTKQTARKSTGGKAPRKQLASKAARKS---APVSGGVKKPHR-YKPGTVALREIRRFQ +KSTELLIRKLPFQRLVREIAQDFKSDLRFQSSAIGALQEAVEAYLVGLFEDTNLCAIHAK +RVTIQKKDMQLARRLRGERS- +>Debaryomyces|XP_460476.1|cH3 organism=Debaryomyces hansenii CBS767 phylum=Ascomycota class=Saccharomycetes +MARTKQTARKSTGGKAPRKQLASKAARKS---APVSGGVKKPHR-YKPGTVALREIRRFQ +KSTELLIRKLPFQRLVREIAQDFKSDLRFQSSAIGALQEAVEAYLVSLFEDTNLCAIHAK +RVTIQKKDIQLARRLRGERS- +>Thalassiosira|XP_002288694.1|cH3 organism=Thalassiosira pseudonana CCMP1335 phylum=Bacillariophyta class=Coscinodiscophyceae +MARTKQTARKSTGGKAPRKQLATKAARKS---APATGGVKKPHR-YRPGTVALREIRRYQ +KSTDLLIRKLPFQRLVREIAQDFKSDLRFQGSAVLALQEAAEAYLVGLFEDTNLCAIHAK +RVTIMPKDIQLARRIRGERS- +>Xenopus|NP_001091119.1|cH3 organism=Xenopus laevis phylum=Chordata class=Amphibia +MARTKQTARKSTGGKAPRKQLATKAARKS---APATGGVKKPHR-YRPGTVALREIRRYQ +KSTELLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEASEAYLVGLFEDTNLCGIHAK +RVTIMPKDIQLARRIRGERA- +>Gallus|NP_001268409.1|cH3 organism=Gallus gallus phylum=Chordata class=Aves +MARTKQTARKSTGGKAPRKQLATKAARKS---APATGGVKKPHR-YRPGTVALREIRRYQ +KSTELLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEASEAYLVGLFEDTNLCAIHAK +RVTIMPKDIQLARRIRGERA- +>Macaca|NP_001180643.1|cH3 organism=Macaca mulatta phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKS---APATGGVKKPHR-YRPGTVALREIRRYQ +KSTELLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEACEAYLVGLFEDTNLCAIHAK +RVTIMPKDIQLARRIRGERA- +>Mus|NP_659539.1|cH3 organism=Mus musculus phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKS---APATGGVKKPHR-YRPGTVALREIRRYQ +KSTELLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEACEAYLVGLFEDTNLCAIHAK +RVTIMPKDIQLARRIRGERA- +>Pan|XP_527254.2|cH3 organism=Pan troglodytes phylum=Chordata class=Mammalia +MARTKQTARKSTGGKAPRKQLATKAARKS---APATGGVKKPHR-YRPGTVALREIRRYQ +KSTELLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEACEAYLVGLFEDTNLCAIHAK +RVTIMPKDIQLARRIRGERA- +>Tetrahymena|XP_001016594.3|cH3 organism=Tetrahymena thermophila SB210 phylum=Ciliophora class=Oligohymenophorea +MARTKQTARKSTGAKAPRKQLASKAARKS---APATGGIKKPHR-FRPGTVALREIRKYQ +KSTDLLIRKLPFQRLVRDIAHEFKAELRFQSSAVLALQEAAEAYLVGLFEDTNLCAIHAR +RVTIMTKDMQLARRIRGERF- +>Trypanosoma|XP_001218942.1|cH3 organism=Trypanosoma brucei brucei TREU927 phylum=Euglenozoa class=Kinetoplastea +MSRTKETART-------KKTITSKKSKKASKGSDAASGVKTAQRRWRPGTVALREIRQFQ +RSTDLLLQKAPFQRLVREVSGAQKEGLRFQSSAILAAQEATESYIVSLLADTNRACIHSG +RVTIQPKDIHLALCLRGERA- +>Caenorhabditis|NP_509344.1|cH3 organism=Caenorhabditis elegans phylum=Nematoda class=Chromadorea +MARTKQTARKSTGGKAPRKQLATKAARKS---APTTGGVKKPHR-YRPGTVALREIRRYQ +KSTELLIRKLPFQRLVREIAQDFKTDLRFQSAAIGALQEASEAYLVGLFEDTNLCAIHAK +RVTIMPKDIQLARRIRGERA- +>Phytophthora|XP_002999294.1|cH3 organism=Phytophthora infestans T30-4 phylum=Oomycota class= +MARTKQTARKSTGGKAPRKQLATKAARKS---APATGGVKKPHR-YRPGTVALREIRRYQ +KSTELLIRKLPFQRLVREIAQDFKTDLRFQGSAVLALQEAAEAYLVGLFEDTNLCAIHAK +RVTIMPKDIQLARRIRGERS- +>Perkinsus|XP_002788889.1|cH3 organism=Perkinsus marinus ATCC 50983 phylum=Perkinsozoa class= +MARTKQTARKSTGGKAPRKQLASKAARKS---TPSTGGIKKPHR-YRPGTVALREIRRYQ +KSTDLLIRKLPFQRLVREVAQDFKTDLRFQSSAVMALQEAAEAYLVGLFEDTNLCAIHAK +RVTIMPKDMQLARRIRGERS- +>Cyanidioschyzon|XP_005537317.1|cH3 organism=Cyanidioschyzon merolae strain 10D phylum=Rhodophyta class=Bangiophyceae +MARTKQTARKSTGGKAPRKQLATKAARKS---APSVGGVKKPHR-YRPGTVALREIRRYQ +KSTELLIRKLPFQRLVREIAQDFKTDLRFQSSAVLALQEAAEAYLVNLFEDTNLCAIHAK +RVTIMPKDIQLARRIRGERA- +>Griffithsia|Q7XYZ0.3|cH3 organism=Griffithsia japonica phylum=Rhodophyta class=Florideophyceae +MARTKQTARKSTGGKAPRKQLATKAARKS---APASGGVKKPHR-FRPGTVALREIRRFQ +KSTELLVRKLPFQRLVREIAQDFKSDLRFQSSAVLALQEAAEAYMVGLFEDTNLCAIHAK +RVTIMPKDIQLARRIRGERT- +>Selaginella|XP_002977792.1|cH3 organism=Selaginella moellendorffii phylum=Streptophyta class=Lycopodiopsida +MARTKQTARKSTGGKAPRKQLATKAARKS---APTTGGVKKPHR-YRPGTVALREIRKYQ +KSTELLIRKLPFQRLVREIAQDFKTDLRFQSHAVLALQEAAEAYLVGLFEDTNLCAIHAK +RVTIMPKDIQLARRIRGERA- +>Arabidopsis|NP_189372.1|cH3 organism=Arabidopsis thaliana phylum=Streptophyta class=Magnoliopsida +MARTKQTARKSTGGKAPRKQLATKAARKS---APATGGVKKPHR-FRPGTVALREIRKYQ +KSTELLIRKLPFQRLVREIAQDFKTDLRFQSSAVAALQEAAEAYLVGLFEDTNLCAIHAK +RVTIMPKDIQLARRIRGERA- +>Oryza|NP_001044904.1|cH3 organism=Oryza sativa Japonica Group phylum=Streptophyta class=Magnoliopsida +MARTKQTARKSTGGKAPRKQLATKAARKS---APATGGVKKPHR-FRPGTVALREIRKYQ +KSTELLIRKLPFQRLVREIAQDFKTDLRFQSSAVAALQEAAEAYLVGLFEDTNLCAIHAK +RVTIMPKDIQLARRIRGERA- +>Oryza|NP_001050276.1|cH3 organism=Oryza sativa Japonica Group phylum=Streptophyta class=Magnoliopsida +MARTKQTARKSTGGKAPRKQLATKAARKS---APTTGGVKKPHR-YRPGTVALREIRKYQ +KSTELLIRKLPFQRLVREIAQDFKTDLRFQSHAVLALQEAAEAYLVGLFEDTNLCAIHAK +RVTIMPKDIQLARRIRGERA- diff --git a/CURATED_SET/draft_seeds/cH4.fasta b/CURATED_SET/draft_seeds/cH4.fasta new file mode 100644 index 0000000..07eb735 --- /dev/null +++ b/CURATED_SET/draft_seeds/cH4.fasta @@ -0,0 +1,78 @@ +>Drosophila|NP_001027352.1|cH4 organism=Drosophila melanogaster phylum=Arthropoda class=Insecta +MTGRGKGGK-GLGKGGAKRH-RKVLRDNIQGITKPAIRRLARRGGVKRISGLIYEETRGV +LKVFLENVIRDAVTYTEHAKRKTVTALDVVYALKRQGRTLYGFGG +>Saccharomyces|NP_009563.1|cH4 organism=Saccharomyces cerevisiae S288C phylum=Ascomycota class=Saccharomycetes +MSGRGKGGK-GLGKGGAKRH-RKILRDNIQGITKPAIRRLARRGGVKRISGLIYEEVRAV +LKSFLESVIRDSVTYTEHAKRKTVTSLDVVYALKRQGRTLYGFGG +>Xenopus|NP_001087926.1|cH4 organism=Xenopus laevis phylum=Chordata class=Amphibia +MSGRGKGGK-GLGKGGAKRH-RKVLRDNIQGITKPAIRRLARRGGVKRISGLIYEETRGV +LKVFLENVIRDAVTYTEHAKRKTVTAMDVVYALKRQGRTLYGFGG +>Gallus|NP_001032934.1|cH4 organism=Gallus gallus phylum=Chordata class=Aves +MSGRGKGGK-GLGKGGAKRH-RKVLRDNIQGITKPAIRRLARRGGVKRISGLIYEETRGV +LKVFLENVIRDAVTYTEHAKRKTVTAMDVVYALKRQGRTLYGFGG +>Heterocephalus|XP_012928609.1|cH4 organism=Heterocephalus glaber phylum=Chordata class=Mammalia +MSGRGKGGK-GLGKGGAKRH-RKVLRDNIQGITKPAIRRLARRGGVKRISGLIYEETRGV +LKVFLENVIRDAVTYTEHAKRKTVTAMDVVYALKRQGRTLYGFGG +>Mus|NP_291074.1|cH4 organism=Mus musculus phylum=Chordata class=Mammalia +MSGRGKGGK-GLGKGGAKRH-RKVLRDNIQGITKPAIRRLARRGGVKRISGLIYEETRGV +LKVFLENVIRDAVTYTEHAKRKTVTAMDVVYALKRQGRTLYGFGG +>Tetrahymena|XP_001016593.1|cH4 organism=Tetrahymena thermophila SB210 phylum=Ciliophora class=Oligohymenophorea +MAG-GKGGK-GMGKVGAKRHSRKSNKASIEGITKPAIRRLARRGGVKRISSFIYDDSRQV +LKSFLENVVRDAVTYTEHARRKTVTAMDVVYALKRQGRTLYGFGG +>Strongylocentrotus|NP_999707.1|cH4 organism=Strongylocentrotus purpuratus phylum=Echinodermata class=Echinoidea +MSGRGKGGK-GLGKGGAKRH-RKVLRDNIQGITKPAIRRLARRGGVKRISGLIYEETRGV +LKVFLENVIRDAVTYCEHAKRKTVTAMDVVYALKRQGRTLYGFGG +>Trypanosoma|XP_951561.1|cH4 organism=Trypanosoma brucei brucei TREU927 phylum=Euglenozoa class=Kinetoplastea +---MAKGKRVGESKGAQKRQ-KKVLRDNVRGITRGSIRRLARRAGVKRISGVIYDEVRGV +LKTFVESIVRDAGAYTEYSRKKTVTAAHVVFALRKRGKVLYGYD- +>Caenorhabditis|NP_492641.1|cH4 organism=Caenorhabditis elegans phylum=Nematoda class=Chromadorea +MSGRGKGGK-GLGKGGAKRH-RKVLRDNIQGITKPAIRRLARRGGVKRISGLIYEETRGV +LKVFLENVIRDAVTYCEHAKRKTVTAMDVVYALKRQGRTLYGFGG +>Arabidopsis|NP_180441.1|cH4 organism=Arabidopsis thaliana phylum=Streptophyta class=Magnoliopsida +MSGRGKGGK-GLGKGGAKRH-RKVLRDNIQGITKPAIRRLARRGGVKRISGLIYEETRGV +LKIFLENVIRDAVTYTEHARRKTVTAMDVVYALKRQGRTLYGFGG +>Zea|NP_001131585.1|cH4 organism=Zea mays phylum=Streptophyta class=Magnoliopsida +MSGRGKGGK-GLGKGGAKRH-RKVLRDNIQGITKPAIRRLARRGGVKRISGLIYEETRGV +LKIFLENVIRDAVTYTEHARRKTVTAMDVVYALKRQGRTLYGFGG +>Homo|NP_001029249.1|cH4_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKGGK-GLGKGGAKRH-RKVLRDNIQGITKPAIRRLARRGGVKRISGLIYEETRGV +LKVFLENVIRDAVTYTEHAKRKTVTAMDVVYALKRQGRTLYGFGG +>Homo|NP_003486.1|cH4_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKGGK-GLGKGGAKRH-RKVLRDNIQGITKPAIRRLARRGGVKRISGLIYEETRGV +LKVFLENVIRDAVTYTEHAKRKTVTAMDVVYALKRQGRTLYGFGG +>Homo|NP_003529.1|cH4_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKGGK-GLGKGGAKRH-RKVLRDNIQGITKPAIRRLARRGGVKRISGLIYEETRGV +LKVFLENVIRDAVTYTEHAKRKTVTAMDVVYALKRQGRTLYGFGG +>Homo|NP_003530.1|cH4_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKGGK-GLGKGGAKRH-RKVLRDNIQGITKPAIRRLARRGGVKRISGLIYEETRGV +LKVFLENVIRDAVTYTEHAKRKTVTAMDVVYALKRQGRTLYGFGG +>Homo|NP_003531.1|cH4_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKGGK-GLGKGGAKRH-RKVLRDNIQGITKPAIRRLARRGGVKRISGLIYEETRGV +LKVFLENVIRDAVTYTEHAKRKTVTAMDVVYALKRQGRTLYGFGG +>Homo|NP_003532.1|cH4_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKGGK-GLGKGGAKRH-RKVLRDNIQGITKPAIRRLARRGGVKRISGLIYEETRGV +LKVFLENVIRDAVTYTEHAKRKTVTAMDVVYALKRQGRTLYGFGG +>Homo|NP_003533.1|cH4_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKGGK-GLGKGGAKRH-RKVLRDNIQGITKPAIRRLARRGGVKRISGLIYEETRGV +LKVFLENVIRDAVTYTEHAKRKTVTAMDVVYALKRQGRTLYGFGG +>Homo|NP_003534.1|cH4_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKGGK-GLGKGGAKRH-RKVLRDNIQGITKPAIRRLARRGGVKRISGLIYEETRGV +LKVFLENVIRDAVTYTEHAKRKTVTAMDVVYALKRQGRTLYGFGG +>Homo|NP_003535.1|cH4_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKGGK-GLGKGGAKRH-RKVLRDNIQGITKPAIRRLARRGGVKRISGLIYEETRGV +LKVFLENVIRDAVTYTEHAKRKTVTAMDVVYALKRQGRTLYGFGG +>Homo|NP_003536.1|cH4_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKGGK-GLGKGGAKRH-RKVLRDNIQGITKPAIRRLARRGGVKRISGLIYEETRGV +LKVFLENVIRDAVTYTEHAKRKTVTAMDVVYALKRQGRTLYGFGG +>Homo|NP_003537.1|cH4_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKGGK-GLGKGGAKRH-RKVLRDNIQGITKPAIRRLARRGGVKRISGLIYEETRGV +LKVFLENVIRDAVTYTEHAKRKTVTAMDVVYALKRQGRTLYGFGG +>Homo|NP_003539.1|cH4_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKGGK-GLGKGGAKRH-RKVLRDNIQGITKPAIRRLARRGGVKRISGLIYEETRGV +LKVFLENVIRDAVTYTEHAKRKTVTAMDVVYALKRQGRTLYGFGG +>Homo|NP_068803.1|cH4_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKGGK-GLGKGGAKRH-RKVLRDNIQGITKPAIRRLARRGGVKRISGLIYEETRGV +LKVFLENVIRDAVTYTEHAKRKTVTAMDVVYALKRQGRTLYGFGG +>Homo|NP_778224.1|cH4_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKGGK-GLGKGGAKRH-RKVLRDNIQGITKPAIRRLARRGGVKRISGLIYEETRGV +LKVFLENVIRDAVTYTEHAKRKTVTAMDVVYALKRQGRTLYGFGG diff --git a/CURATED_SET/draft_seeds/cH4_(Homo_sapiens).fasta b/CURATED_SET/draft_seeds/cH4_(Homo_sapiens).fasta new file mode 100644 index 0000000..29c2470 --- /dev/null +++ b/CURATED_SET/draft_seeds/cH4_(Homo_sapiens).fasta @@ -0,0 +1,42 @@ +>Homo|NP_001029249.1|cH4_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKGGKGLGKGGAKRHRKVLRDNIQGITKPAIRRLARRGGVKRISGLIYEETRGVLK +VFLENVIRDAVTYTEHAKRKTVTAMDVVYALKRQGRTLYGFGG +>Homo|NP_003486.1|cH4_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKGGKGLGKGGAKRHRKVLRDNIQGITKPAIRRLARRGGVKRISGLIYEETRGVLK +VFLENVIRDAVTYTEHAKRKTVTAMDVVYALKRQGRTLYGFGG +>Homo|NP_003529.1|cH4_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKGGKGLGKGGAKRHRKVLRDNIQGITKPAIRRLARRGGVKRISGLIYEETRGVLK +VFLENVIRDAVTYTEHAKRKTVTAMDVVYALKRQGRTLYGFGG +>Homo|NP_003530.1|cH4_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKGGKGLGKGGAKRHRKVLRDNIQGITKPAIRRLARRGGVKRISGLIYEETRGVLK +VFLENVIRDAVTYTEHAKRKTVTAMDVVYALKRQGRTLYGFGG +>Homo|NP_003531.1|cH4_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKGGKGLGKGGAKRHRKVLRDNIQGITKPAIRRLARRGGVKRISGLIYEETRGVLK +VFLENVIRDAVTYTEHAKRKTVTAMDVVYALKRQGRTLYGFGG +>Homo|NP_003532.1|cH4_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKGGKGLGKGGAKRHRKVLRDNIQGITKPAIRRLARRGGVKRISGLIYEETRGVLK +VFLENVIRDAVTYTEHAKRKTVTAMDVVYALKRQGRTLYGFGG +>Homo|NP_003533.1|cH4_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKGGKGLGKGGAKRHRKVLRDNIQGITKPAIRRLARRGGVKRISGLIYEETRGVLK +VFLENVIRDAVTYTEHAKRKTVTAMDVVYALKRQGRTLYGFGG +>Homo|NP_003534.1|cH4_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKGGKGLGKGGAKRHRKVLRDNIQGITKPAIRRLARRGGVKRISGLIYEETRGVLK +VFLENVIRDAVTYTEHAKRKTVTAMDVVYALKRQGRTLYGFGG +>Homo|NP_003535.1|cH4_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKGGKGLGKGGAKRHRKVLRDNIQGITKPAIRRLARRGGVKRISGLIYEETRGVLK +VFLENVIRDAVTYTEHAKRKTVTAMDVVYALKRQGRTLYGFGG +>Homo|NP_003536.1|cH4_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKGGKGLGKGGAKRHRKVLRDNIQGITKPAIRRLARRGGVKRISGLIYEETRGVLK +VFLENVIRDAVTYTEHAKRKTVTAMDVVYALKRQGRTLYGFGG +>Homo|NP_003537.1|cH4_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKGGKGLGKGGAKRHRKVLRDNIQGITKPAIRRLARRGGVKRISGLIYEETRGVLK +VFLENVIRDAVTYTEHAKRKTVTAMDVVYALKRQGRTLYGFGG +>Homo|NP_003539.1|cH4_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKGGKGLGKGGAKRHRKVLRDNIQGITKPAIRRLARRGGVKRISGLIYEETRGVLK +VFLENVIRDAVTYTEHAKRKTVTAMDVVYALKRQGRTLYGFGG +>Homo|NP_068803.1|cH4_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKGGKGLGKGGAKRHRKVLRDNIQGITKPAIRRLARRGGVKRISGLIYEETRGVLK +VFLENVIRDAVTYTEHAKRKTVTAMDVVYALKRQGRTLYGFGG +>Homo|NP_778224.1|cH4_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRGKGGKGLGKGGAKRHRKVLRDNIQGITKPAIRRLARRGGVKRISGLIYEETRGVLK +VFLENVIRDAVTYTEHAKRKTVTAMDVVYALKRQGRTLYGFGG diff --git a/CURATED_SET/draft_seeds/cH4_only.fasta b/CURATED_SET/draft_seeds/cH4_only.fasta new file mode 100644 index 0000000..d903dc3 --- /dev/null +++ b/CURATED_SET/draft_seeds/cH4_only.fasta @@ -0,0 +1,36 @@ +>Drosophila|NP_001027352.1|cH4 organism=Drosophila melanogaster phylum=Arthropoda class=Insecta +MTGRGKGGK-GLGKGGAKRH-RKVLRDNIQGITKPAIRRLARRGGVKRISGLIYEETRGV +LKVFLENVIRDAVTYTEHAKRKTVTALDVVYALKRQGRTLYGFGG +>Saccharomyces|NP_009563.1|cH4 organism=Saccharomyces cerevisiae S288C phylum=Ascomycota class=Saccharomycetes +MSGRGKGGK-GLGKGGAKRH-RKILRDNIQGITKPAIRRLARRGGVKRISGLIYEEVRAV +LKSFLESVIRDSVTYTEHAKRKTVTSLDVVYALKRQGRTLYGFGG +>Xenopus|NP_001087926.1|cH4 organism=Xenopus laevis phylum=Chordata class=Amphibia +MSGRGKGGK-GLGKGGAKRH-RKVLRDNIQGITKPAIRRLARRGGVKRISGLIYEETRGV +LKVFLENVIRDAVTYTEHAKRKTVTAMDVVYALKRQGRTLYGFGG +>Gallus|NP_001032934.1|cH4 organism=Gallus gallus phylum=Chordata class=Aves +MSGRGKGGK-GLGKGGAKRH-RKVLRDNIQGITKPAIRRLARRGGVKRISGLIYEETRGV +LKVFLENVIRDAVTYTEHAKRKTVTAMDVVYALKRQGRTLYGFGG +>Heterocephalus|XP_012928609.1|cH4 organism=Heterocephalus glaber phylum=Chordata class=Mammalia +MSGRGKGGK-GLGKGGAKRH-RKVLRDNIQGITKPAIRRLARRGGVKRISGLIYEETRGV +LKVFLENVIRDAVTYTEHAKRKTVTAMDVVYALKRQGRTLYGFGG +>Mus|NP_291074.1|cH4 organism=Mus musculus phylum=Chordata class=Mammalia +MSGRGKGGK-GLGKGGAKRH-RKVLRDNIQGITKPAIRRLARRGGVKRISGLIYEETRGV +LKVFLENVIRDAVTYTEHAKRKTVTAMDVVYALKRQGRTLYGFGG +>Tetrahymena|XP_001016593.1|cH4 organism=Tetrahymena thermophila SB210 phylum=Ciliophora class=Oligohymenophorea +MAG-GKGGK-GMGKVGAKRHSRKSNKASIEGITKPAIRRLARRGGVKRISSFIYDDSRQV +LKSFLENVVRDAVTYTEHARRKTVTAMDVVYALKRQGRTLYGFGG +>Strongylocentrotus|NP_999707.1|cH4 organism=Strongylocentrotus purpuratus phylum=Echinodermata class=Echinoidea +MSGRGKGGK-GLGKGGAKRH-RKVLRDNIQGITKPAIRRLARRGGVKRISGLIYEETRGV +LKVFLENVIRDAVTYCEHAKRKTVTAMDVVYALKRQGRTLYGFGG +>Trypanosoma|XP_951561.1|cH4 organism=Trypanosoma brucei brucei TREU927 phylum=Euglenozoa class=Kinetoplastea +---MAKGKRVGESKGAQKRQ-KKVLRDNVRGITRGSIRRLARRAGVKRISGVIYDEVRGV +LKTFVESIVRDAGAYTEYSRKKTVTAAHVVFALRKRGKVLYGYD- +>Caenorhabditis|NP_492641.1|cH4 organism=Caenorhabditis elegans phylum=Nematoda class=Chromadorea +MSGRGKGGK-GLGKGGAKRH-RKVLRDNIQGITKPAIRRLARRGGVKRISGLIYEETRGV +LKVFLENVIRDAVTYCEHAKRKTVTAMDVVYALKRQGRTLYGFGG +>Arabidopsis|NP_180441.1|cH4 organism=Arabidopsis thaliana phylum=Streptophyta class=Magnoliopsida +MSGRGKGGK-GLGKGGAKRH-RKVLRDNIQGITKPAIRRLARRGGVKRISGLIYEETRGV +LKIFLENVIRDAVTYTEHARRKTVTAMDVVYALKRQGRTLYGFGG +>Zea|NP_001131585.1|cH4 organism=Zea mays phylum=Streptophyta class=Magnoliopsida +MSGRGKGGK-GLGKGGAKRH-RKVLRDNIQGITKPAIRRLARRGGVKRISGLIYEETRGV +LKIFLENVIRDAVTYTEHARRKTVTAMDVVYALKRQGRTLYGFGG diff --git a/CURATED_SET/draft_seeds/cenH3_(Animals).fasta b/CURATED_SET/draft_seeds/cenH3_(Animals).fasta new file mode 100644 index 0000000..1f5bcb9 --- /dev/null +++ b/CURATED_SET/draft_seeds/cenH3_(Animals).fasta @@ -0,0 +1,30 @@ +>Drosophila|NP_523730.2|cenH3_(Animals) organism=Drosophila melanogaster phylum=Arthropoda class=Insecta +-----------------------MPRHSRAKRAPRPSANNSKSPNDD-----DTAFRSPE +PEDGTDYGLEFTTSQLTLQDNNRRSSTL-----------RRDAGRRQPAARD-------- +-------------SSTSGEEEDQENRYPTTRSPQTRRMTVQQESKTRAAGPVAAQNQTRR +RKAANPMSRAKRMD------REIRRLQHHPGTLIPKLPFSRLVREFIVKYSD-DEPLRVT +EGALLAMQESCEMYLTQRLADSYMLTKHRNRVTLEVRDMALMAYICDRGRQF- +>Caenorhabditis|NP_499128.1|cenH3_(Animals) organism=Caenorhabditis elegans phylum=Nematoda class=Chromadorea +MADDTPIIEEIAEQNESVTRIMQRLKHDMQRVTSVPGFNTSAAGVNDLIDILNQYKKELE +DDAANDY-TEAHIHKIRLVTGKRNQYVLKLKQAEDEYHARKEQARRRASSMDFTVGRNST +NLVDYSHGRHHMPSYRRHDSSDEENYSMDGTNGDGNRAGPSNPDRGNRTGP-SSSDRVRM +RAGRNRVTKTRRYRPGQKALEEIRKYQKTEDLLIQKAPFARLVREIMQTSTPFGADCRIR +SDAISALQEAAEAFLVEMFEGSSLISTHAKRVTLMTTDIQLYRRLCL--RHL- +>Mus|NP_031707.1|cenH3_(Mammalia) organism=Mus musculus phylum=Chordata class=Mammalia +-----------------------MGPR---R----------------------------- +---------------------------------------KPQTPRRRPSS---------- +-------------------------------------PAPGPSRQSSSVGS-QTLRR-RQ +KFM---------------WLKEIKTLQKSTDLLFRKKPFSMVVREICEKFSR-GVDFWWQ +AQALLALQEAAEAFLIHLFEDAYLLSLHAGRVTLFPKDIQLTRRIRGFEGGLP +>Homo|NP_001035891.1|cenH3_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +-----------------------MGPRRRSR----------------------------- +---------------------------------------KPEAPRRRSPSPT-------- +-------------------------------------PTPGPSRRGPSLGA-SSHQHSRR +RQG---------------WLKEIRKLQKSTHLLIRKLPFSRL------------------ +---------AAEAFLVHLFEDAYLLTLHAGRVTLFPKDVQLARRIRGLEEGLG +>Homo|NP_001800.1|cenH3_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +-----------------------MGPRRRSR----------------------------- +---------------------------------------KPEAPRRRSPSPT-------- +-------------------------------------PTPGPSRRGPSLGA-SSHQHSRR +RQG---------------WLKEIRKLQKSTHLLIRKLPFSRLAREICVKFTR-GVDFNWQ +AQALLALQEAAEAFLVHLFEDAYLLTLHAGRVTLFPKDVQLARRIRGLEEGLG diff --git a/CURATED_SET/draft_seeds/cenH3_(Animals)_only.fasta b/CURATED_SET/draft_seeds/cenH3_(Animals)_only.fasta new file mode 100644 index 0000000..267fa58 --- /dev/null +++ b/CURATED_SET/draft_seeds/cenH3_(Animals)_only.fasta @@ -0,0 +1,12 @@ +>Drosophila|NP_523730.2|cenH3_(Animals) organism=Drosophila melanogaster phylum=Arthropoda class=Insecta +-----------------------MPRHSRAKRAPRPSANNSKSPNDD-----DTAFRSPE +PEDGTDYGLEFTTSQLTLQDNNRRSSTL-----------RRDAGRRQPAARD-------- +-------------SSTSGEEEDQENRYPTTRSPQTRRMTVQQESKTRAAGPVAAQNQTRR +RKAANPMSRAKRMD------REIRRLQHHPGTLIPKLPFSRLVREFIVKYSD-DEPLRVT +EGALLAMQESCEMYLTQRLADSYMLTKHRNRVTLEVRDMALMAYICDRGRQF +>Caenorhabditis|NP_499128.1|cenH3_(Animals) organism=Caenorhabditis elegans phylum=Nematoda class=Chromadorea +MADDTPIIEEIAEQNESVTRIMQRLKHDMQRVTSVPGFNTSAAGVNDLIDILNQYKKELE +DDAANDY-TEAHIHKIRLVTGKRNQYVLKLKQAEDEYHARKEQARRRASSMDFTVGRNST +NLVDYSHGRHHMPSYRRHDSSDEENYSMDGTNGDGNRAGPSNPDRGNRTGP-SSSDRVRM +RAGRNRVTKTRRYRPGQKALEEIRKYQKTEDLLIQKAPFARLVREIMQTSTPFGADCRIR +SDAISALQEAAEAFLVEMFEGSSLISTHAKRVTLMTTDIQLYRRLCL--RHL diff --git a/CURATED_SET/draft_seeds/cenH3_(Eukarya).fasta b/CURATED_SET/draft_seeds/cenH3_(Eukarya).fasta new file mode 100644 index 0000000..ca5a4a4 --- /dev/null +++ b/CURATED_SET/draft_seeds/cenH3_(Eukarya).fasta @@ -0,0 +1,150 @@ +>Plasmodium|XP_001350068.1|cenH3_(Eukarya) organism=Plasmodium falciparum 3D7 phylum=Apicomplexa class=Aconoidasida +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +----------------MVRTKKNIPNHNPLNAF-----------------NRDKSFKTNK +TLP-------NRTVHHGISSKTTNINRPSVNRGGINEVAQKNLHRTNIRK---------- +---------PHRYRPGVLALKEIRAYQASTQLLIPKIPFVRVVKEITRLFELP---D-E- +QFRYTPEALLALQTASEAYLVSLFEDAYLCSLHANRVTLMPKDIHLARRIRGRD------ +--------- +>Thalassiosira|XP_002287626.1|cenH3_(Eukarya) organism=Thalassiosira pseudonana CCMP1335 phylum=Bacillariophyta class=Coscinodiscophyceae +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------MRPGEKALREIRQYQSSTSLLLRRLPFARLVREIQ--YGMT---R-Q- +PYRWQGSAILALQEAAEAHLVGLFEDSNLCAIHGKRVTIMPKDMQLARRIRGWVRE---- +--------- +>Tetrahymena|XP_001011273.1|cenH3_(Eukarya) organism=Tetrahymena thermophila SB210 phylum=Ciliophora class=Oligohymenophorea +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-----------------MARKAYQPKRRSNSNQ-----------------NQQRSDSLKK +NKQ-----DNLRSKSAGNQQGNEKNKKDIQDQRNKASTKKKRESSGEKYE---------- +---SARDKVIRRFRPGDNALKQLRQYNQTPSLLIRKLPFQRLIREIS--TRMT---E-ED +SLRWTSFALVLLQTVVEDYMVSFFEDANACALHAKRVTLMSKDLALAARIRGQKNVTGIF +IPTKK---- +>Phytophthora|XP_009526809.1|cenH3_(Eukarya) organism=Phytophthora sojae phylum=Oomycota class= +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------------------------------------MASPRPPALP +ASDASSSSVTDSGTDDSTPPNSPQPQRSPSPPF-----------------AATRGPAAAR +APP--AAPSSVSSASTPSPPPNLPARRPSVAPVGRQAAIHATPTAPRRKRKATPPRRRPS +SPASATQTVKKRRKPGEAALREIRLLQRSTKLLLRKLPFARVVREIQ--TEFT---G-V- +GYRWQAEALLALQEAAETYLVRTFEDAYVGG----------------------------- +--------- +>Perkinsus|XP_002767160.1|cenH3_(Eukarya) organism=Perkinsus marinus ATCC 50983 phylum=Perkinsozoa class= +MVGVENLGVGFDELLTRGGCGVRDDAVEIAFRGVEGLEDVLKDYMVRNKDGKILSVARPV +DAEHSEELLGLAAAIGRSYGSLICAAAHNGGVRLPVGKGDDDGDSNNSSDEEADSGCGGA +AEGDEAGDVGAGAGDVGDGAGDGAAEGDGAGDAGNGAGDVGDVGDGAGDGAAEGDGAGDG +AADDAHGAGDDGEGSRNGGPPLVVQMMVLVMMNGNGNGADDGGNGVDDGEGDGDGHQGNV +EGDGHGDGQDDGDGEGSVDSSGNGGDSEPSLEV-----------------SREGSENRPK +LLPPVEGRTSSSAAAIAAPPVPSAGSHIITGSGGKVPTAGKRPRQFVKKS---------- +---SAK---KGRYRPGTVALREIRRHQEITDPLIEKRCFQALARSLS--REVE---A--- +SMRWQPQSLVALQEASESFIVGMLEASQLLAVHGRRITLMEKDVKMWTRLAAMFGSTTFM +DQEKQVGGT +>Drosophila|NP_523730.2|cenH3_(Animals) organism=Drosophila melanogaster phylum=Arthropoda class=Insecta +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +----MPRHSRAKRAPRPSANNSKSPNDD-----DTAFRSPEPEDGTDYGLEFTTSQLTLQ +DNNRRSSTL-----------RRDAGRRQPAARD---------------------SSTSGE +EEDQENRYPTTRSPQTRRMTVQQESKTRAAGPVAAQNQTRRRKAANPMSR---------- +---------AKRMD------REIRRLQHHPGTLIPKLPFSRLVREFI--VKYS---D-DE +PLRVTEGALLAMQESCEMYLTQRLADSYMLTKHRNRVTLEVRDMALMAYICDRGRQF--- +--------- +>Caenorhabditis|NP_499128.1|cenH3_(Animals) organism=Caenorhabditis elegans phylum=Nematoda class=Chromadorea +------------------------------------------------------------ +------------------------------------------------------------ +-----------------------------------------MADDTPIIEEIAEQNESVT +RIMQRLKHDMQRVTSVPGFNTSAAGVNDLIDILNQYKKELEDDAANDY-TEAHIHKIRLV +TGKRNQYVLKLKQAEDEYHARKEQARRRASSMDFTVGRNSTNLVDYSHGRHHMPSYRRHD +SSDEENYSMDGTNGDGNRAGPSNPDRGNRTGP-SSSDRVRMRAGRNRVTK---------- +---------TRRYRPGQKALEEIRKYQKTEDLLIQKAPFARLVREIM--QTST---PFGA +DCRIRSDAISALQEAAEAFLVEMFEGSSLISTHAKRVTLMTTDIQLYRRLCL--RHL--- +--------- +>Mus|NP_031707.1|cenH3_(Mammalia) organism=Mus musculus phylum=Chordata class=Mammalia +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +----MGPR---R------------------------------------------------ +--------------------KPQTPRRRPSS----------------------------- +------------------PAPGPSRQSSSVGS-QTLRR-RQKFM---------------- +------------------WLKEIKTLQKSTDLLFRKKPFSMVVREIC--EKFS---R-GV +DFWWQAQALLALQEAAEAFLIHLFEDAYLLSLHAGRVTLFPKDIQLTRRIRGFEGGLP-- +--------- +>Homo|NP_001035891.1|cenH3_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +----MGPRRRSR------------------------------------------------ +--------------------KPEAPRRRSPSPT--------------------------- +------------------PTPGPSRRGPSLGA-SSHQHSRRRQG---------------- +------------------WLKEIRKLQKSTHLLIRKLPFSRL------------------ +--------------AAEAFLVHLFEDAYLLTLHAGRVTLFPKDVQLARRIRGLEEGLG-- +--------- +>Homo|NP_001800.1|cenH3_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +----MGPRRRSR------------------------------------------------ +--------------------KPEAPRRRSPSPT--------------------------- +------------------PTPGPSRRGPSLGA-SSHQHSRRRQG---------------- +------------------WLKEIRKLQKSTHLLIRKLPFSRLAREIC--VKFT---R-GV +DFNWQAQALLALQEAAEAFLVHLFEDAYLLTLHAGRVTLFPKDVQLARRIRGLEEGLG-- +--------- +>Saccharomyces|NP_012875.2|cenH3_(Fungi) organism=Saccharomyces cerevisiae S288C phylum=Ascomycota class=Saccharomycetes +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------------------MSSKQQWVSSAIQSDSSGRSLSNVNRLA +GDQQSINDRALSLLQRTRATKNLFPRREERRRY----------------ESSKSDLDIET +DYEDQAGNLEIETENEEEAEMETEVPAPVRTHSYALDRYVRQKRREKQRK---------- +---QSLKRVEKKYTPSELALYEIRKYQRSTDLLISKIPFARLVKEVT--DEFT---TKDQ +DLRWQSMAIMALQEASEAYLVGLLEHTNLLALHAKRITIMKKDMQLARRIRGQFI----- +--------- +>Schizosaccharomyces|NP_596473.1|cenH3_(Fungi) organism=Schizosaccharomyces pombe phylum=Ascomycota class=Schizosaccharomycetes +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-----------------MAKKSLM------------------------------------ +------------------AEPGDPIPRPRK------------------------------ +----------KRYRPGTTALREIRKYQRSTDLLIQRLPFSRIVREIS--SEFVANFSTDV +GLRWQSTALQCLQEAAEAFLVHLFEDTNLCAIHAKRVTIMQRDMQLARRIRGA------- +--------- +>Guillardia|AAK39657.1|cenH3_(Plants) organism=Guillardia theta phylum= class=Cryptophyceae +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +---------------------MMKKQNLK------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +-----------RFKKSSNSLVDIRKFQKSTDLLIHRLPFARLVKEIS--LKYH---H-S- +-LNWQQVAVEALQFASEDYIIGLLQDANLAAIHAKRVTVMPKDLKLAKIIRGEH------ +--------- +>Cyanidioschyzon|XP_005535607.1|cenH3_(Plants) organism=Cyanidioschyzon merolae strain 10D phylum=Rhodophyta class=Bangiophyceae +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-MVRVFGNPVGGAHQYRQTQALNRDESLREQLQRTTVAEQDENIEPEPERSSPDEAQSRL +KSLLGSVARSSSLLGRRVAGPPGSSLSNLAETRLLTEEAANRPTGAGRSERVSAGRAEPL +PPGPTTATTNADANTVQRGLSVASAFPPRAPLQAAGFTVARRTTRSGVQR---------- +---------KHRFRPGSRAIMEIRKFQRSTELLLRRLPFARLVREIC--ERLF---G-SS +AFRWQASALEALQTAAEDYLIHLFEDSNLCAIHARRVTIMPRDIALARRIRGYHSDPHGY +L-------- +>Arabidopsis|NP_563627.1|cenH3_(Plants) organism=Arabidopsis thaliana phylum=Streptophyta class=Magnoliopsida +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +----------MARTKHRVTRSQPRNQTDA------------------------------- +-----------------------------------------------------AGASSSQ +AAGPTTTPTRRGGEGGDNTQQTNPTTSPATGTRRGAKRSRQAMPRGSQKK---------- +---------SYRYRPGTVALKEIRHFQKQTNLLIPAASFIREVRSIT--HMLA---P-PQ +INRWTAEALVALQEAAEDYLVGLFSDSMLCAIHARRVTLMRKDFELARRLGGKGRPW--- +--------- diff --git a/CURATED_SET/draft_seeds/cenH3_(Eukarya)_only.fasta b/CURATED_SET/draft_seeds/cenH3_(Eukarya)_only.fasta new file mode 100644 index 0000000..543ad9e --- /dev/null +++ b/CURATED_SET/draft_seeds/cenH3_(Eukarya)_only.fasta @@ -0,0 +1,45 @@ +>Plasmodium|XP_001350068.1|cenH3_(Eukarya) organism=Plasmodium falciparum 3D7 phylum=Apicomplexa class=Aconoidasida +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +----------------MVRTKKNIPNHNPLNAFNRDKSFKTNKTLP-------NRTVHHG +ISSKTTNINRPSVNRGGINEVAQKNLHRTNIRK-------------------PHRYRPGV +LALKEIRAYQASTQLLIPKIPFVRVVKEITRLFELPDE-QFRYTPEALLALQTASEAYLV +SLFEDAYLCSLHANRVTLMPKDIHLARRIRGRD--------------- +>Thalassiosira|XP_002287626.1|cenH3_(Eukarya) organism=Thalassiosira pseudonana CCMP1335 phylum=Bacillariophyta class=Coscinodiscophyceae +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------------------------------------MRPGE +KALREIRQYQSSTSLLLRRLPFARLVREIQ--YGMTRQ-PYRWQGSAILALQEAAEAHLV +GLFEDSNLCAIHGKRVTIMPKDMQLARRIRGWVRE------------- +>Tetrahymena|XP_001011273.1|cenH3_(Eukarya) organism=Tetrahymena thermophila SB210 phylum=Ciliophora class=Oligohymenophorea +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-----------------MARKAYQPKRRSNSNQNQQRSDSLKKNKQ-----DNLRSKSAG +NQQGNEKNKKDIQDQRNKASTKKKRESSGEKYE-------------SARDKVIRRFRPGD +NALKQLRQYNQTPSLLIRKLPFQRLIREIS--TRMTEEDSLRWTSFALVLLQTVVEDYMV +SFFEDANACALHAKRVTLMSKDLALAARIRGQKNVTGIFIPTKK---- +>Phytophthora|XP_009526809.1|cenH3_(Eukarya) organism=Phytophthora sojae phylum=Oomycota class= +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------------------------------------MASPRPPALP +ASDASSSSVTDSGTDDSTPPNSPQPQRSPSPPFAATRGPAAARAPP--AAPSSVSSASTP +SPPPNLPARRPSVAPVGRQAAIHATPTAPRRKRKATPPRRRPSSPASATQTVKKRRKPGE +AALREIRLLQRSTKLLLRKLPFARVVREIQ--TEFTGV-GYRWQAEALLALQEAAETYLV +RTFEDAYVGG-------------------------------------- +>Perkinsus|XP_002767160.1|cenH3_(Eukarya) organism=Perkinsus marinus ATCC 50983 phylum=Perkinsozoa class= +MVGVENLGVGFDELLTRGGCGVRDDAVEIAFRGVEGLEDVLKDYMVRNKDGKILSVARPV +DAEHSEELLGLAAAIGRSYGSLICAAAHNGGVRLPVGKGDDDGDSNNSSDEEADSGCGGA +AEGDEAGDVGAGAGDVGDGAGDGAAEGDGAGDAGNGAGDVGDVGDGAGDGAAEGDGAGDG +AADDAHGAGDDGEGSRNGGPPLVVQMMVLVMMNGNGNGADDGGNGVDDGEGDGDGHQGNV +EGDGHGDGQDDGDGEGSVDSSGNGGDSEPSLEVSREGSENRPKLLPPVEGRTSSSAAAIA +APPVPSAGSHIITGSGGKVPTAGKRPRQFVKKS-------------SAK---KGRYRPGT +VALREIRRHQEITDPLIEKRCFQALARSLS--REVEA--SMRWQPQSLVALQEASESFIV +GMLEASQLLAVHGRRITLMEKDVKMWTRLAAMFGSTTFMDQEKQVGGT diff --git a/CURATED_SET/draft_seeds/cenH3_(Fungi).fasta b/CURATED_SET/draft_seeds/cenH3_(Fungi).fasta new file mode 100644 index 0000000..ea55f5d --- /dev/null +++ b/CURATED_SET/draft_seeds/cenH3_(Fungi).fasta @@ -0,0 +1,10 @@ +>Saccharomyces|NP_012875.2|cenH3_(Fungi) organism=Saccharomyces cerevisiae S288C phylum=Ascomycota class=Saccharomycetes +MSSKQQWVSSAIQSDSSGRSLSNVNRLAGDQQSINDRALSLLQRTRATKNLFPRREERRR +YESSKSDLDIETDYEDQAGNLEIETENEEEAEMETEVPAPVRTHSYALDRYVRQKRREKQ +RKQSLKRVEKKYTPSELALYEIRKYQRSTDLLISKIPFARLVKEVTDEFT---TKDQDLR +WQSMAIMALQEASEAYLVGLLEHTNLLALHAKRITIMKKDMQLARRIRGQFI +>Schizosaccharomyces|NP_596473.1|cenH3_(Fungi) organism=Schizosaccharomyces pombe phylum=Ascomycota class=Schizosaccharomycetes +---------------------------------------------MAKKSLM-------- +------------------------------AEPGDPIPRPRK------------------ +---------KRYRPGTTALREIRKYQRSTDLLIQRLPFSRIVREISSEFVANFSTDVGLR +WQSTALQCLQEAAEAFLVHLFEDTNLCAIHAKRVTIMQRDMQLARRIRGA-- diff --git a/CURATED_SET/draft_seeds/cenH3_(Homo_sapiens).fasta b/CURATED_SET/draft_seeds/cenH3_(Homo_sapiens).fasta new file mode 100644 index 0000000..268ffa5 --- /dev/null +++ b/CURATED_SET/draft_seeds/cenH3_(Homo_sapiens).fasta @@ -0,0 +1,8 @@ +>Homo|NP_001035891.1|cenH3_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MGPRRRSRKPEAPRRRSPSPTPTPGPSRRGPSLGASSHQHSRRRQGWLKEIRKLQKSTHL +LIRKLPFSRL--------------------------AAEAFLVHLFEDAYLLTLHAGRVT +LFPKDVQLARRIRGLEEGLG +>Homo|NP_001800.1|cenH3_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MGPRRRSRKPEAPRRRSPSPTPTPGPSRRGPSLGASSHQHSRRRQGWLKEIRKLQKSTHL +LIRKLPFSRLAREICVKFTRGVDFNWQAQALLALQEAAEAFLVHLFEDAYLLTLHAGRVT +LFPKDVQLARRIRGLEEGLG diff --git a/CURATED_SET/draft_seeds/cenH3_(Mammalia).fasta b/CURATED_SET/draft_seeds/cenH3_(Mammalia).fasta new file mode 100644 index 0000000..3611649 --- /dev/null +++ b/CURATED_SET/draft_seeds/cenH3_(Mammalia).fasta @@ -0,0 +1,12 @@ +>Mus|NP_031707.1|cenH3_(Mammalia) organism=Mus musculus phylum=Chordata class=Mammalia +MGPR---RKPQTPRRRPSS--PAPGPSRQSSSVGSQTLRR-RQKFMWLKEIKTLQKSTDL +LFRKKPFSMVVREICEKFSRGVDFWWQAQALLALQEAAEAFLIHLFEDAYLLSLHAGRVT +LFPKDIQLTRRIRGFEGGLP +>Homo|NP_001035891.1|cenH3_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MGPRRRSRKPEAPRRRSPSPTPTPGPSRRGPSLGASSHQHSRRRQGWLKEIRKLQKSTHL +LIRKLPFSRL--------------------------AAEAFLVHLFEDAYLLTLHAGRVT +LFPKDVQLARRIRGLEEGLG +>Homo|NP_001800.1|cenH3_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MGPRRRSRKPEAPRRRSPSPTPTPGPSRRGPSLGASSHQHSRRRQGWLKEIRKLQKSTHL +LIRKLPFSRLAREICVKFTRGVDFNWQAQALLALQEAAEAFLVHLFEDAYLLTLHAGRVT +LFPKDVQLARRIRGLEEGLG diff --git a/CURATED_SET/draft_seeds/cenH3_(Mammalia)_only.fasta b/CURATED_SET/draft_seeds/cenH3_(Mammalia)_only.fasta new file mode 100644 index 0000000..1ba2ae2 --- /dev/null +++ b/CURATED_SET/draft_seeds/cenH3_(Mammalia)_only.fasta @@ -0,0 +1,4 @@ +>Mus|NP_031707.1|cenH3_(Mammalia) organism=Mus musculus phylum=Chordata class=Mammalia +MGPRRKPQTPRRRPSSPAPGPSRQSSSVGSQTLRRRQKFMWLKEIKTLQKSTDLLFRKKP +FSMVVREICEKFSRGVDFWWQAQALLALQEAAEAFLIHLFEDAYLLSLHAGRVTLFPKDI +QLTRRIRGFEGGLP diff --git a/CURATED_SET/draft_seeds/cenH3_(Plants).fasta b/CURATED_SET/draft_seeds/cenH3_(Plants).fasta new file mode 100644 index 0000000..a213492 --- /dev/null +++ b/CURATED_SET/draft_seeds/cenH3_(Plants).fasta @@ -0,0 +1,18 @@ +>Guillardia|AAK39657.1|cenH3_(Plants) organism=Guillardia theta phylum= class=Cryptophyceae +--------------------MMKKQNLK-------------------------------- +------------------------------------------------------------ +---------------------------------------------------RFKKSSNSL +VDIRKFQKSTDLLIHRLPFARLVKEISLKYHHS--LNWQQVAVEALQFASEDYIIGLLQD +ANLAAIHAKRVTVMPKDLKLAKIIRGEH------- +>Cyanidioschyzon|XP_005535607.1|cenH3_(Plants) organism=Cyanidioschyzon merolae strain 10D phylum=Rhodophyta class=Bangiophyceae +MVRVFGNPVGGAHQYRQTQALNRDESLREQLQRTTVAEQDENIEPEPERSSPDEAQSRLK +SLLGSVARSSSLLGRRVAGPPGSSLSNLAETRLLTEEAANRPTGAGRSERVSAGRAEPLP +PGPTTATTNADANTVQRGLSVASAFPPRAPLQAAGFTVARRTTRSGVQRKHRFRPGSRAI +MEIRKFQRSTELLLRRLPFARLVREICERLFGSSAFRWQASALEALQTAAEDYLIHLFED +SNLCAIHARRVTIMPRDIALARRIRGYHSDPHGYL +>Arabidopsis|NP_563627.1|cenH3_(Plants) organism=Arabidopsis thaliana phylum=Streptophyta class=Magnoliopsida +---------MARTKHRVTRSQPRNQTDA-------------------------------- +----------------------------------------------------AGASSSQA +AGPTTTPTRRGGEGGDNTQQTNPTTSPATGTRRGAKRSRQAMPRGSQKKSYRYRPGTVAL +KEIRHFQKQTNLLIPAASFIREVRSITHMLAPPQINRWTAEALVALQEAAEDYLVGLFSD +SMLCAIHARRVTLMRKDFELARRLGGKGRPW---- diff --git a/CURATED_SET/draft_seeds/early_H2B_(Echinoidea).fasta b/CURATED_SET/draft_seeds/early_H2B_(Echinoidea).fasta new file mode 100644 index 0000000..e1aaaa0 --- /dev/null +++ b/CURATED_SET/draft_seeds/early_H2B_(Echinoidea).fasta @@ -0,0 +1,4 @@ +>Strongylocentrotus|P02289.2|early_H2B_(Echinoidea) organism=Strongylocentrotus purpuratus phylum=Echinodermata class=Echinoidea +MAPTAQVAKKGSKKAVKGTKTAXGGKKRNRKRKESYGIYIYKVLKQVHPDTGISSRAMVI +MNSFVNDIFERIAGESSRLAQYNKKSTISSREIQTAVRLILPGELAKHAVSEGTKAVTKY +TTSK diff --git a/CURATED_SET/draft_seeds/gH2A.fasta b/CURATED_SET/draft_seeds/gH2A.fasta new file mode 100644 index 0000000..636c08d --- /dev/null +++ b/CURATED_SET/draft_seeds/gH2A.fasta @@ -0,0 +1,4 @@ +>Lilium|BAE47493.1|gH2A organism=Lilium longiflorum phylum=Streptophyta class=Magnoliopsida +MISSANNKGAGTSRRKLRSEKAALQFSVSRVEYSLKKGRYCRRLGATAPVYLAAVLENLV +AEVLEMAANVTEKHKRIVIKPRHIMLAVRNDVEVNKLFHGVTISASGVVPKTRKELDRRK +RRSTSQAD diff --git a/CURATED_SET/draft_seeds/gH2B.fasta b/CURATED_SET/draft_seeds/gH2B.fasta new file mode 100644 index 0000000..af1fa0e --- /dev/null +++ b/CURATED_SET/draft_seeds/gH2B.fasta @@ -0,0 +1,40 @@ +>Lilium|CUT18445.1|gH2B organism=Lilium davidii var. unicolor phylum=Streptophyta class=Magnoliopsida +MAPKSEKKPAEKKPVAEKPAAEEEKKSAPAPAAAEKKPAEKK------------------ +-----------PKAGKKLPASKGKEGEKKK------------------------------ +------------------------------------------------------------ +----------------------- +>Lilium|CUT18446.1|gH2B organism=Lilium davidii var. unicolor phylum=Streptophyta class=Magnoliopsida +--------------------------------------AEKK------------------ +-----------PKAGKKVPASKEGEKKKKR-SKKSVETYKIYIFKVLKQVHPDIGISSKA +MGIMNSFINDLFEKMAQESARLARYNKKNTITSREIQTSVRLVLPGELAKHAVSEGTKAV +TKFTSA----------------- +>Lilium|CUT18447.1|gH2B organism=Lilium davidii var. unicolor phylum=Streptophyta class=Magnoliopsida +MAPKAEKKPAAKKPAATPPPEEEKE---VVPPP----PAEKK------------------ +-----------PKAGKKLPAAKEGDAKKKKKAKKSIETYKIYIFKVLKQVHPDIGISSKA +MGIMNSFINDIFEKLAQESARLARYNKKPTITSREIQTSVRLVLPGELAKHAVSEGTKAV +TKFTSA----------------- +>Lilium|CUT18448.1|gH2B organism=Lilium davidii var. unicolor phylum=Streptophyta class=Magnoliopsida +--------------AATPPPEEEKE---VVPPPAEKKPAEKK------------------ +-----------PKAGKKLPASKEGDAKKKKKSKKSIETYKIYIFKVLKQVHPDIGISSKA +MGIMNSFINDIFEKLAQESARLARYNKKPTITSREIQTSVRLVLPGELAKHAVSEGTKAV +TKFTSA----------------- +>Lilium|CUT18449.1|gH2B organism=Lilium davidii var. unicolor phylum=Streptophyta class=Magnoliopsida +MPPR-------------------RKKTAAGAAAGGKAAAAAV------------------ +-----------GKAGF-MPPKKPKKGKKKT----PIMRYKRYIYKVLKQVRPELGISEKS +TMIMNNFVAHNFQNIAKEASILAYYSKKRTITVDELKAAVAMVLPNLLADYANRDGEKAV +SNFEGEASAKKSQGRKRGRGQQA +>Lilium|CUT18450.1|gH2B organism=Lilium davidii var. unicolor phylum=Streptophyta class=Magnoliopsida +MAPK--KKPSKLVGTVTKTRKVTETQTLKVSLTKGLKPEDQQTTTNKFEVSVTGKQSKTQ +PLIVSTNTNLVPKKEKEESPTTTLMVKKKRKNRKAGGEYKRYVYMVLKTVHPDMTVSSKA +MMVMEGMMQDMFERLVTEAVRLVQYMKKATLTCREIQTAVMLVLPGELGKHAVSEGAKAI +TNYMAAVGSGNGGAA-------- +>Lilium|CUT18451.1|gH2B organism=Lilium davidii var. unicolor phylum=Streptophyta class=Magnoliopsida +MAPKAEKKPAAKKPAATPPPEEEKE---VVPPP----PAEKK------------------ +-----------PKAGKKLPAAKEGDAKKKKKAKKSIETYKIYIFKVLKQVHPDIGISSKA +MGIMNSFINDIFEKLAQESARLARYNKKPTITSREIQTSVRLVLPGELAKHAVSEGTKAV +TKFTSG----------------- +>Lilium|CUT18452.1|gH2B organism=Lilium davidii var. unicolor phylum=Streptophyta class=Magnoliopsida +MAPKSEKKPAEKKPVAEKPAAEEEKKAAPAAAPAEKKAAEKK------------------ +-----------PKA---------------------------------------------- +------------------------------------------------------------ +----------------------- diff --git a/CURATED_SET/draft_seeds/generic_H1.fasta b/CURATED_SET/draft_seeds/generic_H1.fasta new file mode 100644 index 0000000..11d656e --- /dev/null +++ b/CURATED_SET/draft_seeds/generic_H1.fasta @@ -0,0 +1,104 @@ +>Drosophila|AAZ66580.1|generic_H1 organism=Drosophila melanogaster phylum=Arthropoda class=Insecta +---------MSDSAVATSASPVAAPPATV------------------EKKVVQKKASGSA +GTKAKKASAT--PSHPP------------------------TQQMVDASIKNLKERGGSS +LLAIKKYITATYK--CDAQKLAPFIKKYLKSAVVNGKLIQ---TKGKGASGSFKLS---A +SAKKEKDPKAKSKVLSAEKKVQSKK-VASKKIGVSSKKTAVGAADKK---PKAKKAVATK +KTAENKKTEKAKA--KDAKKTGIIKS-----------------KPAA---TKAKVTAAKP +KA-----------VVAKASKAKPAVSA-----------KPKKTVKKASVSATAKKPKAKT +TAAKK +>Drosophila|NP_650383.2|generic_H1 organism=Drosophila melanogaster phylum=Arthropoda class=Insecta +MKLKPVERNDGSDDESEEEMPNDHPESEDSNMGEEEELPEEDEEEMEEDEEEDRQDGDEV +ETDNLGADRNPYPTPPPDDGSKLVPPDSDNPKSMVPKPKGTLISLALMAIGKLASRSGSS +VQAIMTYLKDNGQEWKDPKKTARLLHRALKLAEANGEVVM---VKRS-----FKLTDKQK +NSSKAVEKMKAKKQKEKEKKAKVEK-VLKEKIQKKEAKAKMKEKKASKEKSSKPTERKTK +QAVKKKKPEDGTKDNPPASKAASSAAAQAMLETSQTAIPEAGKKPAK---TKVKLQADSS +EAGKTKKSRKSIGTLAQPKAARPKVKAVKKLVAGKGASTPDLSIMEAQATSTPQGATKAK +RKRKV +>Xenopus|NP_001081535.1|generic_H1 organism=Xenopus laevis phylum=Chordata class=Amphibia +---------------MAPKKAVAAPEGGNK-----------------ENAAVKGSSKVKV +KRKSIKLVKT--QSHPP------------------------TLSMVVEVLKKNTERKGTS +VQAIRTRILSAHPT-VDPLRLKFLLRTALNKGLEKGILIRPLNSSATGATGRFKLAKPVK +TTKAGKENVASENVDPNAEQETQKK-APKKEKKAKTEKEPKGEKTKAVAKKAKEDSDEKP +KVAKSKKAKEAKEVDKANKEAKEVDKANKEAKEVDKANKEV-KEVDK---APAKKPKAKT +EAAKAE-------GGGKAKKEPPKAKA-----------KDVKAQKDSTDEGAPVKAGKKG +KKVTN +>Gallus|XP_425456.1|generic_H1 organism=Gallus gallus phylum=Chordata class=Aves +---------------MSETAPVAAPAVSAP---------------------GAKAAAKKP +KKAAGGAKARK-PAGPS------------------------VTELITKAVSASKERKGLS +LAALKKALAAGG---YDVEKNNSRIKLGLKSLVSKGTLVQ---TKGTGASGSFKLN---K +KPGETKEKATKKKPAAKPKKPA----AKKPAAAAKKPKKAA---------AVKKSPKKAK +KPAAAA-TKKAAK--SPKKATKAGRP----------------KKTAK---SPAKAKAVKP +KA-------------AKSKAAKPKAA------------KAKKAATKKK------------ +----- +>Bos|XP_010816821.1|generic_H1 organism=Bos taurus phylum=Chordata class=Mammalia +---------------MSEVAL-PAPAASTS-----------------PEKPSAGKKAKKP +AKAAAAAKKK--PAGPS------------------------VSELIVQAVSSSKERSGVS +LAALKKALAAAG---YDVEKNNSRIKLGLKSLVGKGTLVQ---TKGTGASGSFKLN---K +KVASVDAKPTATKVAT----------KTKVTSASKKPKKASGAAAAK---KSVKTPKKAR +KSVLTKKSSK-----SPKKP-KAVKP----------------KKVAK---SPAKAKAVKP +KG-------------AKVKVTKPKTAA-----------KPKKAAPKKK------------ +----- +>Mus|NP_056601.1|generic_H1 organism=Mus musculus phylum=Chordata class=Mammalia +---------------MSEAAP-AAPAAAPP-----------------AEKAPAKKKAAKK +---PAGVRRK--ASGPP------------------------VSELITKAVAASKERSGVS +LAALKKALAAAG---YDVEKNNSRIKLGLKSLVSKGILVQ---TKGTGASGSFKLN---K +KAASGEAKPQAKKAGA----AK----AKKPAGAAKKPKKATGAATPK--KAAKKTPKKAK +KPAAAAVTKKVAK--SPKKA-KVTKP----------------KKVKS---A--------S +KA-------------VKPKAAKPKVA------------KAKKVAAKKK------------ +----- +>Mus|NP_085112.1|generic_H1 organism=Mus musculus phylum=Chordata class=Mammalia +---------------MSETAP-VAQAASTA-----------------TEKPAAAKKTKKP +AK-AAAPRKK--PAGPS------------------------VSELIVQAVSSSKERSGVS +LAALKKSLAAAG---YDVEKNNSRIKLGLKSLVNKGTLVQ---TKGTGAAGSFKLN---- +--KKAESKAITTKVSV----------KAKASGAAKKPKKTAGAAAKK----TVKTPKKPK +KPAVSKKTSK-----SPKKP-KVVKA----------------KKVAK---SPAKAKAVKP +KA-------------SKAKVTKPKTPA-----------KPKKAAPKKK------------ +----- +>Rattus|NP_579819.1|generic_H1 organism=Rattus norvegicus phylum=Chordata class=Mammalia +---------------MSETAP-AAPAAPAP-----------------AEKTPIKKKARKA +---AGGAKRK--ASGPP------------------------VSELITKAVAASKERSGVS +LAALKKALAAAG---YDVEKNNSRIKLGLKSLVSKGTLVQ---TKGTGASGSFKLN---K +KAASGEAKPKAKKAGA----AK----AKKPAGAAKKPKKATGTATPK--KSTKKTPKKAK +KPAAAAGAKKAK---SPKKA-KATKA----------------KKAPK---SPAKARAVKP +KA-------------AKPKTSKPKAA------------KPKKTAAKKK------------ +----- +>Strongylocentrotus|NP_999714.1|generic_H1 organism=Strongylocentrotus purpuratus phylum=Echinodermata class=Echinoidea +---------------MAEKNS------------------------------SKKVTTKKP +------------AAHPP------------------------AAEMVATAITELKDRNGSS +LQAIKKYIATNFD--VQMDRQLLFIKRALKSGVEKGKLVQ---TKGKGASGSFKVN---V +QAAKAQASEKAKKEKEKAKLLAQRE-KAKEKGCSEEGETAEGSRPKKVKAAPKKAKKPVK +KTTEKKEKKKTPKK-APKKP-AAKKS-----------------TPKK---TPKKAAAKKP +KT-------------AKPKKPAXKKAA-----------KSK------------------- +----- +>Strongylocentrotus|NP_999720.1|generic_H1 organism=Strongylocentrotus purpuratus phylum=Echinodermata class=Echinoidea +---------------MSAAKP------------------------------KVAKKARVA +------------PAHPP------------------------SSQMVVAAVTALKERGGSS +TQAIKKYIAANYT--VDMTKQGPFIRRALVKGVASGALVQ---TKGKGASGSFKL----G +KKKEGKSDAQKARIAAKKAKLAAKKKEQREKKALKTKARKEKVAAKKAAKKATKKTKKVK +KPAAKKAKKPAAKKPAAKKP-AAKKA----------------KKPAKKVAKPAKKAAAKP +---------------AKKAAKPAKKAA-----------KPAKKAAKPAKK---------- +----- +>Caenorhabditis|O17536.3|generic_H1 organism=Caenorhabditis elegans phylum=Nematoda class=Chromadorea +----MSDVAVAADTTETPAAPTKASKATKA-----------------SKATKASKATKAK +TTKVPMVKAD--AAHPP------------------------FINMVTEAISSIKDRKGPS +RAAILKYITTKYTLGDQANKINAHLRKALNKGLESNAFVQ---ASGNGANGRFRLAE--K +TASVAKSPAAAKKDATGEKKATT---TVAKKAATGEKKATTTVAKKAATGEKKATTTVAK +KAAAGDKAKK-----TEVKVKKVKSP----------------KKIAK---SPVNKVTKSP +VKKIA--------KSSSMKAAPKKAAA-----------KPAKKAPAAAPEA--------- +----- +>Arabidopsis|NP_172161.1|generic_H1 organism=Arabidopsis thaliana phylum=Streptophyta class=Magnoliopsida +MSEVEIENAATIEGNTAADAPVTDAAVEKKPAAK-------------GRKTKNVKEVKEK +KTVAAAPKKRTVSSHPT------------------------YEEMIKDAIVTLKERTGSS +QYAIQKFIEEKRK--ELPPTFRKLLLLNLKRLVASGKLVK---VKAS-----FKLPSASA +KASSPKAAAEKSAPAKKKPATVAVT-KAKRKVAAASKAKKTIAVKPKTAAAKKVTAKAKA +KPVPRATAAATKRKAVDAKPKAKARPAKA-------------AKTAK-VTSPAKKAVAAT +KKVA---------TVATKKKTPVKKVV-----------KPKTVKSPAKRASSRVKK---- +----- +>Zea|P23444.2|generic_H1 organism=Zea mays phylum=Streptophyta class=Magnoliopsida +-MATDVTETPAPLVDAAPEAPADAPAAPAA-----------------DANAAKAKKATAP +KKRAS-------PTHLP------------------------YAEMVSEAITSLKERTGSS +SYAIAKFVEDKHKA-KLPPNFRKLLNVQLKKLVAGGKLTK---VKNS-----YKLSSATK +PNPKPKAAPKKPKTGAKKPKAAAKP-KAKTPAKAKPATKPKPAAKPKAVVKPKTPAKPKA +KPAAKAKPKTAGAKPKPLAK-KAGRA-----------------KAAK---TSAKDTPGK- +-------------KAPAKKAAPSKKAA-----------TPVRKAPSRKAKK--------- +----- diff --git a/CURATED_SET/draft_seeds/late_H2B_(Echinoidea).fasta b/CURATED_SET/draft_seeds/late_H2B_(Echinoidea).fasta new file mode 100644 index 0000000..5e5bc57 --- /dev/null +++ b/CURATED_SET/draft_seeds/late_H2B_(Echinoidea).fasta @@ -0,0 +1,8 @@ +>Strongylocentrotus|P16888.2|late_H2B_(Echinoidea) organism=Strongylocentrotus purpuratus phylum=Echinodermata class=Echinoidea +MPAKAQPAGKKGSKKAKAPRPSGGKKRRRRRKESYGIYIYKVLKQVHPDTGISSRAMSIM +NSFVNDVFERIAAEASRLAHYNKKSTITSREVQTVVRLLLPGELAKHAVSEGTKAVTKYT +TSK +>Strongylocentrotus|P16889.3|late_H2B_(Echinoidea) organism=Strongylocentrotus purpuratus phylum=Echinodermata class=Echinoidea +MPAKAQAAGKKGSKKAKAPKPSGDKKRRRKRKESYGIYIYKVLKQVHPDTGISSRAMSIM +NSFVNDVFERIAAEASRLAHYNKKSTITSREVQTAVRLLLPGELAKHAVSEGTKAVTKYT +TSK diff --git a/CURATED_SET/draft_seeds/macroH2A.1.s1_(Homo_sapiens).fasta b/CURATED_SET/draft_seeds/macroH2A.1.s1_(Homo_sapiens).fasta new file mode 100644 index 0000000..7378744 --- /dev/null +++ b/CURATED_SET/draft_seeds/macroH2A.1.s1_(Homo_sapiens).fasta @@ -0,0 +1,8 @@ +>Homo|NP_613075.1|macroH2A.1.s1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSSRGGKKKSTKTSRSAKAGVIFPVGRMLRYIKKGHPKYRIGVGAPVYMAAVLEYLTAEI +LELAGNAARDNKKGRVTPRHILLAVANDEELNQLLKGVTIASGGVLPNIHPELLAKKRGS +KGKLEAIITPPPAKKAKSPSQKKPVSKKAGGKKGARKSKKKQGEVSKAASADSTTEGTPA +DGFTVLSTKSLFLGQKLQVVQADIASIDSDAVVHPTNTDFYIGGEVGNTLEKKGGKEFVE +AVLELRKKNGPLEVAGAAVSAGHGLPAKFVIHCNSPVWGADKCEELLEKTVKNCLALADD +KKLKSIAFPSIGSGRNGFPKQTAAQLILKAISSYFVSTMSSSIKTVYFVLFDSESIGIYV +QEMAKLDAN diff --git a/CURATED_SET/draft_seeds/macroH2A.1.s1_(Mammalia).fasta b/CURATED_SET/draft_seeds/macroH2A.1.s1_(Mammalia).fasta new file mode 100644 index 0000000..7378744 --- /dev/null +++ b/CURATED_SET/draft_seeds/macroH2A.1.s1_(Mammalia).fasta @@ -0,0 +1,8 @@ +>Homo|NP_613075.1|macroH2A.1.s1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSSRGGKKKSTKTSRSAKAGVIFPVGRMLRYIKKGHPKYRIGVGAPVYMAAVLEYLTAEI +LELAGNAARDNKKGRVTPRHILLAVANDEELNQLLKGVTIASGGVLPNIHPELLAKKRGS +KGKLEAIITPPPAKKAKSPSQKKPVSKKAGGKKGARKSKKKQGEVSKAASADSTTEGTPA +DGFTVLSTKSLFLGQKLQVVQADIASIDSDAVVHPTNTDFYIGGEVGNTLEKKGGKEFVE +AVLELRKKNGPLEVAGAAVSAGHGLPAKFVIHCNSPVWGADKCEELLEKTVKNCLALADD +KKLKSIAFPSIGSGRNGFPKQTAAQLILKAISSYFVSTMSSSIKTVYFVLFDSESIGIYV +QEMAKLDAN diff --git a/CURATED_SET/draft_seeds/macroH2A.1.s1_(Mammalia)_only.fasta b/CURATED_SET/draft_seeds/macroH2A.1.s1_(Mammalia)_only.fasta new file mode 100644 index 0000000..e69de29 diff --git a/CURATED_SET/draft_seeds/macroH2A.1.s2_(Homo_sapiens).fasta b/CURATED_SET/draft_seeds/macroH2A.1.s2_(Homo_sapiens).fasta new file mode 100644 index 0000000..925aeaf --- /dev/null +++ b/CURATED_SET/draft_seeds/macroH2A.1.s2_(Homo_sapiens).fasta @@ -0,0 +1,16 @@ +>Homo|NP_001035248.1|macroH2A.1.s2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSSRGGKKKSTKTSRSAKAGVIFPVGRMLRYIKKGHPKYRIGVGAPVYMAAVLEYLTAEI +LELAGNAARDNKKGRVTPRHILLAVANDEELNQLLKGVTIASGGVLPNIHPELLAKKRGS +KGKLEAIITPPPAKKAKSPSQKKPVSKKAGGKKGARKS-KKQGEVSKAASADSTTEGTPA +DGFTVLSTKSLFLGQKLNLIHSEISNLAGFEVEAIINPTNADIDLKDDLGNTLEKKGGKE +FVEAVLELRKKNGPLEVAGAAVSAGHGLPAKFVIHCNSPVWGADKCEELLEKTVKNCLAL +ADDKKLKSIAFPSIGSGRNGFPKQTAAQLILKAISSYFVSTMSSSIKTVYFVLFDSESIG +IYVQEMAKLDAN +>Homo|NP_613258.2|macroH2A.1.s2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSSRGGKKKSTKTSRSAKAGVIFPVGRMLRYIKKGHPKYRIGVGAPVYMAAVLEYLTAEI +LELAGNAARDNKKGRVTPRHILLAVANDEELNQLLKGVTIASGGVLPNIHPELLAKKRGS +KGKLEAIITPPPAKKAKSPSQKKPVSKKAGGKKGARKSKKKQGEVSKAASADSTTEGTPA +DGFTVLSTKSLFLGQKLNLIHSEISNLAGFEVEAIINPTNADIDLKDDLGNTLEKKGGKE +FVEAVLELRKKNGPLEVAGAAVSAGHGLPAKFVIHCNSPVWGADKCEELLEKTVKNCLAL +ADDKKLKSIAFPSIGSGRNGFPKQTAAQLILKAISSYFVSTMSSSIKTVYFVLFDSESIG +IYVQEMAKLDAN diff --git a/CURATED_SET/draft_seeds/macroH2A.1.s2_(Mammalia).fasta b/CURATED_SET/draft_seeds/macroH2A.1.s2_(Mammalia).fasta new file mode 100644 index 0000000..925aeaf --- /dev/null +++ b/CURATED_SET/draft_seeds/macroH2A.1.s2_(Mammalia).fasta @@ -0,0 +1,16 @@ +>Homo|NP_001035248.1|macroH2A.1.s2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSSRGGKKKSTKTSRSAKAGVIFPVGRMLRYIKKGHPKYRIGVGAPVYMAAVLEYLTAEI +LELAGNAARDNKKGRVTPRHILLAVANDEELNQLLKGVTIASGGVLPNIHPELLAKKRGS +KGKLEAIITPPPAKKAKSPSQKKPVSKKAGGKKGARKS-KKQGEVSKAASADSTTEGTPA +DGFTVLSTKSLFLGQKLNLIHSEISNLAGFEVEAIINPTNADIDLKDDLGNTLEKKGGKE +FVEAVLELRKKNGPLEVAGAAVSAGHGLPAKFVIHCNSPVWGADKCEELLEKTVKNCLAL +ADDKKLKSIAFPSIGSGRNGFPKQTAAQLILKAISSYFVSTMSSSIKTVYFVLFDSESIG +IYVQEMAKLDAN +>Homo|NP_613258.2|macroH2A.1.s2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSSRGGKKKSTKTSRSAKAGVIFPVGRMLRYIKKGHPKYRIGVGAPVYMAAVLEYLTAEI +LELAGNAARDNKKGRVTPRHILLAVANDEELNQLLKGVTIASGGVLPNIHPELLAKKRGS +KGKLEAIITPPPAKKAKSPSQKKPVSKKAGGKKGARKSKKKQGEVSKAASADSTTEGTPA +DGFTVLSTKSLFLGQKLNLIHSEISNLAGFEVEAIINPTNADIDLKDDLGNTLEKKGGKE +FVEAVLELRKKNGPLEVAGAAVSAGHGLPAKFVIHCNSPVWGADKCEELLEKTVKNCLAL +ADDKKLKSIAFPSIGSGRNGFPKQTAAQLILKAISSYFVSTMSSSIKTVYFVLFDSESIG +IYVQEMAKLDAN diff --git a/CURATED_SET/draft_seeds/macroH2A.1.s2_(Mammalia)_only.fasta b/CURATED_SET/draft_seeds/macroH2A.1.s2_(Mammalia)_only.fasta new file mode 100644 index 0000000..e69de29 diff --git a/CURATED_SET/draft_seeds/macroH2A.1_(Mammalia).fasta b/CURATED_SET/draft_seeds/macroH2A.1_(Mammalia).fasta new file mode 100644 index 0000000..f61ac9f --- /dev/null +++ b/CURATED_SET/draft_seeds/macroH2A.1_(Mammalia).fasta @@ -0,0 +1,24 @@ +>Homo|NP_613075.1|macroH2A.1.s1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSSRGGKKKSTKTSRSAKAGVIFPVGRMLRYIKKGHPKYRIGVGAPVYMAAVLEYLTAEI +LELAGNAARDNKKGRVTPRHILLAVANDEELNQLLKGVTIASGGVLPNIHPELLAKKRGS +KGKLEAIITPPPAKKAKSPSQKKPVSKKAGGKKGARKSKKKQGEVSKAASADSTTEGTPA +DGFTVLSTKSLFLGQKLQVVQADIASI---DSDAVVHPTNTDFYIGGEVGNTLEKKGGKE +FVEAVLELRKKNGPLEVAGAAVSAGHGLPAKFVIHCNSPVWGADKCEELLEKTVKNCLAL +ADDKKLKSIAFPSIGSGRNGFPKQTAAQLILKAISSYFVSTMSSSIKTVYFVLFDSESIG +IYVQEMAKLDAN +>Homo|NP_001035248.1|macroH2A.1.s2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSSRGGKKKSTKTSRSAKAGVIFPVGRMLRYIKKGHPKYRIGVGAPVYMAAVLEYLTAEI +LELAGNAARDNKKGRVTPRHILLAVANDEELNQLLKGVTIASGGVLPNIHPELLAKKRGS +KGKLEAIITPPPAKKAKSPSQKKPVSKKAGGKKGARKS-KKQGEVSKAASADSTTEGTPA +DGFTVLSTKSLFLGQKLNLIHSEISNLAGFEVEAIINPTNADIDLKDDLGNTLEKKGGKE +FVEAVLELRKKNGPLEVAGAAVSAGHGLPAKFVIHCNSPVWGADKCEELLEKTVKNCLAL +ADDKKLKSIAFPSIGSGRNGFPKQTAAQLILKAISSYFVSTMSSSIKTVYFVLFDSESIG +IYVQEMAKLDAN +>Homo|NP_613258.2|macroH2A.1.s2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSSRGGKKKSTKTSRSAKAGVIFPVGRMLRYIKKGHPKYRIGVGAPVYMAAVLEYLTAEI +LELAGNAARDNKKGRVTPRHILLAVANDEELNQLLKGVTIASGGVLPNIHPELLAKKRGS +KGKLEAIITPPPAKKAKSPSQKKPVSKKAGGKKGARKSKKKQGEVSKAASADSTTEGTPA +DGFTVLSTKSLFLGQKLNLIHSEISNLAGFEVEAIINPTNADIDLKDDLGNTLEKKGGKE +FVEAVLELRKKNGPLEVAGAAVSAGHGLPAKFVIHCNSPVWGADKCEELLEKTVKNCLAL +ADDKKLKSIAFPSIGSGRNGFPKQTAAQLILKAISSYFVSTMSSSIKTVYFVLFDSESIG +IYVQEMAKLDAN diff --git a/CURATED_SET/draft_seeds/macroH2A.1_(Mammalia)_only.fasta b/CURATED_SET/draft_seeds/macroH2A.1_(Mammalia)_only.fasta new file mode 100644 index 0000000..e69de29 diff --git a/CURATED_SET/draft_seeds/macroH2A.2_(Homo_sapiens).fasta b/CURATED_SET/draft_seeds/macroH2A.2_(Homo_sapiens).fasta new file mode 100644 index 0000000..d802eb5 --- /dev/null +++ b/CURATED_SET/draft_seeds/macroH2A.2_(Homo_sapiens).fasta @@ -0,0 +1,8 @@ +>Homo|NP_061119.1|macroH2A.2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRSGKKKMSKLSRSARAGVIFPVGRLMRYLKKGTFKYRISVGAPVYMAAVIEYLAAEI +LELAGNAARDNKKARIAPRHILLAVANDEELNQLLKGVTIASGGVLPRIHPELLAKKRGT +KGKSETILSPPPEKRGRKATSGKKGGKKSKAAKPRTSKKSKPKDSDKEGTSNSTSEDGPG +DGFTILSSKSLVLGQKLSLTQSDISHIGSMRVEGIVHPTTAEIDLKEDIGKALEKAGGKE +FLETVKELRKSQGPLEVAEAAVSQSSGLAAKFVIHCHIPQWGSDKCEEQLEETIKNCLSA +AEDKKLKSVAFPPFPSGRNCFPKQTAAQVTLKAISAHFDDSSASSLKNVYFLLFDSESIG +IYVQEMAKLDAK diff --git a/CURATED_SET/draft_seeds/macroH2A.2_(Mammalia).fasta b/CURATED_SET/draft_seeds/macroH2A.2_(Mammalia).fasta new file mode 100644 index 0000000..d802eb5 --- /dev/null +++ b/CURATED_SET/draft_seeds/macroH2A.2_(Mammalia).fasta @@ -0,0 +1,8 @@ +>Homo|NP_061119.1|macroH2A.2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +MSGRSGKKKMSKLSRSARAGVIFPVGRLMRYLKKGTFKYRISVGAPVYMAAVIEYLAAEI +LELAGNAARDNKKARIAPRHILLAVANDEELNQLLKGVTIASGGVLPRIHPELLAKKRGT +KGKSETILSPPPEKRGRKATSGKKGGKKSKAAKPRTSKKSKPKDSDKEGTSNSTSEDGPG +DGFTILSSKSLVLGQKLSLTQSDISHIGSMRVEGIVHPTTAEIDLKEDIGKALEKAGGKE +FLETVKELRKSQGPLEVAEAAVSQSSGLAAKFVIHCHIPQWGSDKCEEQLEETIKNCLSA +AEDKKLKSVAFPPFPSGRNCFPKQTAAQVTLKAISAHFDDSSASSLKNVYFLLFDSESIG +IYVQEMAKLDAK diff --git a/CURATED_SET/draft_seeds/macroH2A.2_(Mammalia)_only.fasta b/CURATED_SET/draft_seeds/macroH2A.2_(Mammalia)_only.fasta new file mode 100644 index 0000000..e69de29 diff --git a/CURATED_SET/draft_seeds/macroH2A.fasta b/CURATED_SET/draft_seeds/macroH2A.fasta old mode 100755 new mode 100644 index b59b15f..89e28b4 --- a/CURATED_SET/draft_seeds/macroH2A.fasta +++ b/CURATED_SET/draft_seeds/macroH2A.fasta @@ -1,4 +1,4 @@ ->macroH2A_Ixodes_scapularis_XP_002403551.1 XP_002403551.1 histone: H2A variant: macroH2A organism: Ixodes scapularis +>Ixodes|XP_002403551.1|macroH2A organism=Ixodes scapularis phylum=Arthropoda class=Arachnida MSARGGK--KRAKVVSKSTKAGVLFPVGRMRRYLKKGTHHFRIGAGAPVYMAAVIEYLSG EAISCQDSSPRDPLLSALTKRKCCSGRPN-LFFLQLLKGVTIASGGVLPRILPELLARRK GGRFKTVALAKKPVAAAIAKEKAVPPKEKAKLAKGKVCRKSSHCHVPLGDRGIILSLWPT @@ -6,90 +6,82 @@ APQGEHFTLVKHKTGLSLSVQLTVIQGDMASV---TADAAIHPTNASLSLSGEVGQVLEK AGGKEFVQEVKDLFSAHGPLESAGAVICPGHQFPAKFVIHCNVPSGSSEP------LEKC VRNCLALADEKNIRVLAVPPLATHSVASQKQQAAQTILKAISNYFVNVMSSSLKQIYFVL SDMESIGIYTSELAKLDS-- ->macroH2A_Trichoplax_adhaerens_XP_002111582.1 XP_002111582.1 histone: H2A variant: macroH2A organism: Trichoplax adhaerens -MSGRGGK--ARKKPSSRSARAGLQFPVGRMHRRLKSSTHHLRIGSGAPVYLAACIEYLTA -EILELAGNAARDNKKLRIIPRHIQLAIGNDEELHKLLSDVTIASGGVLPHVHTELLSKKA -KGGGASVAAAAAP--KKSKVRVSRVGKST--------PAKSNFSKKSG--------SSTK -AFKNSEVTILSEKQ-LFLGQKLIVTKGDITKI---STDGIVHPTSSNFSHAGMIGGALSS -AGGKQYMDGVAKVEQETGSLPVAGVTGSPAANLSAQEVIHVHSPSWGSTD--CQGNLEKA -VRNILDYADKKGMKSVAIPSIGSGSNNFPKLTAAQIILRSIAKYFVGVMSSSLKEVYFVL -WDEESINIYTSELNKLDVSG ->macroH2A_Nematostella_vectensis_XP_001637578.1 XP_001637578.1 histone: H2A variant: macroH2A organism: Nematostella vectensis -MSARGGKAAKRAKAVSRSAKAGLQFPVSRVHRYLRKCTHHYRISAAAPVYQAAVMEYLTA -EILELAGNAARDNKKTRIIPRHILLAVANDEELHKLLKGVTIASGGVLPNIHPELLKKRK -GGK------LVSP--EELKSKKPKPAPPPS-------PKKPVSSKKGR--------GKAD -KGPGDGFSVLSEKT-LFLGQKLTVVQGDIAAI---DADAVVLPTNAKFKLEGEVGEALKK -AGGKEFKDEIKKLSEDNGDLALLDAAICDGHNFPAAYVISLHSPVYSEDSTTASDDLEKA -VKNVLTIADEKNLKILAIPSIGTGSNKYPKDLAAQVTLKAISNYFVSAMASSLKQIYFVL -SDPENIGMYTMELARLDS-- ->macroH2A_Gallus_gallus_NP_990338.1 NP_990338.1 histone: H2A variant: macroH2A organism: Gallus gallus +>Danio|NP_001020673.1|macroH2A organism=Danio rerio phylum=Chordata class=Actinopteri +MSARGGK--KKITKLSRSARAGVIFPVGRMMRYLRTGTHKYRIGMGAPVYMAAVIEYLAA +EILELAGNAARDNKKGRITPRHIKLAVANDEELNQLLRGVTISNGGVLPRIHPELLSKKR +GGKVKVESQVAVPEKSAKRKPV----KKPYKKSKGKPGRKPKKSTENDKEADANA---ME +DGPGEGFTILSAKS-LFLGQKLSLTESEISKIGTIKVEGIINPTNAEIDLKEGIGNALEK +TGGKDFLETVKELRKSQGPLEVASVAVSQANGMAARFIIHCHVPQWGSDK--CEDQLEKT +VKNCLSAAEEKKLKSVAFPSLPAGRNGFPKQTAAQLILKAISNHFVSATTSSLKNIYFVL +FDSESIGIYLQEMAKMDAK- +>Gallus|AAC28846.1|macroH2A organism=Gallus gallus phylum=Chordata class=Aves MSSRGGK--KKSTKTSRSAKAGVIFPVGRMLRYIKKGHPKYRIGVGAPVYMAAVLEYLTA EILELAGNAARDNKKGRVTPRHILLAVANDEELNQLLKGVTIASGGVLPNIHPELLAKKR -GSKGKLEAIITPP--PAKKAKSPSQKKTVSKKTGGKKGARKSKKKQGEVSKSASADSTTE -GTPADGFTVLSTKS-LFLGQKLNLIHSEISNLAGFEVEAIINPTNADIDLKDDLGSTLEK +GSKGKLEAIITPPPAKKAKSPS--QKKTVSKKTGGKKGARKSKKKQGEVSKSASADSTTE +GTPADGFTVLSTKS-LFLGQKLQVVQADIATI---DSDAVVHPTNSDFYTGGEVGSTLEK KGGKEFVEAVIELRKKNGPLDIAGAVVSAGHGLPAKFVIHCNSPGWGSDK--CEELLEKT VKNCLALADEKKLKSIAFPSIGSGRNGFPKQTAAQLILKAISSYFVSTMSSSIKTVYFVL FDSESIGIYVQEMAKLDAN- ->macroH2A_Homo_sapiens_NP_001035248.1 NP_001035248.1 histone: H2A variant: macroH2A organism: Homo sapiens +>Gallus|NP_001264267.1|macroH2A organism=Gallus gallus phylum=Chordata class=Aves +MSGRSGK--KKMSKLSRSSRAGVIFPVGRMMRYLKKGTYKYRIGVGAPVYMAAVIEYLAA +EILELAGNAARDNKKGRIAPRHILLAVANDEELNQLLKGVTIASGGVLPRIQPELLAKKR +GAKGKSETILSPAPEKKGRKSM--VSKKSGKKAKSNKARTPKKNKQKDSEKEGASNSTSE +DGPGDGFTILSSKS-LVPGQKLSLTQSDISHIGSMKVEGIVHPTTAEIDLKEEIGKALEK +AGGKEFLETVKELRKSQGPLEVAEAALTQSSGLAAKFVIHCHIPQWGSDK--CEEQLEET +IKNCLTAAEDKKLKSVAFPPFPSGRNCFPKQTAAQVTLRAISTHFDGTSSSSLKNIYFLL +FDSESIGIYVQEMAKLDTK- +>Gallus|NP_990338.1|macroH2A organism=Gallus gallus phylum=Chordata class=Aves MSSRGGK--KKSTKTSRSAKAGVIFPVGRMLRYIKKGHPKYRIGVGAPVYMAAVLEYLTA EILELAGNAARDNKKGRVTPRHILLAVANDEELNQLLKGVTIASGGVLPNIHPELLAKKR -GSKGKLEAIITPP--PAKKAKSPSQKKPVSKKAGGKKGARKSK-KQGEVSKAASADSTTE -GTPADGFTVLSTKS-LFLGQKLNLIHSEISNLAGFEVEAIINPTNADIDLKDDLGNTLEK -KGGKEFVEAVLELRKKNGPLEVAGAAVSAGHGLPAKFVIHCNSPVWGADK--CEELLEKT -VKNCLALADDKKLKSIAFPSIGSGRNGFPKQTAAQLILKAISSYFVSTMSSSIKTVYFVL +GSKGKLEAIITPPPAKKAKSPS--QKKTVSKKTGGKKGARKSKKKQGEVSKSASADSTTE +GTPADGFTVLSTKS-LFLGQKLNLIHSEISNLAGFEVEAIINPTNADIDLKDDLGSTLEK +KGGKEFVEAVIELRKKNGPLDIAGAVVSAGHGLPAKFVIHCNSPGWGSDK--CEELLEKT +VKNCLALADEKKLKSIAFPSIGSGRNGFPKQTAAQLILKAISSYFVSTMSSSIKTVYFVL FDSESIGIYVQEMAKLDAN- ->macroH2A_Homo_sapiens_NP_613258.2 NP_613258.2 histone: H2A variant: macroH2A organism: Homo sapiens +>Nematostella|XP_001637578.1|macroH2A organism=Nematostella vectensis phylum=Cnidaria class=Anthozoa +MSARGGKAAKRAKAVSRSAKAGLQFPVSRVHRYLRKCTHHYRISAAAPVYQAAVMEYLTA +EILELAGNAARDNKKTRIIPRHILLAVANDEELHKLLKGVTIASGGVLPNIHPELLKKRK +GGK------LVSPEELKSKKPKPAPPPSPKKPVSSKKGRGKADK---------------- +-GPGDGFSVLSEKT-LFLGQKLTVVQGDIAAI---DADAVVLPTNAKFKLEGEVGEALKK +AGGKEFKDEIKKLSEDNGDLALLDAAICDGHNFPAAYVISLHSPVYSEDSTTASDDLEKA +VKNVLTIADEKNLKILAIPSIGTGSNKYPKDLAAQVTLKAISNYFVSAMASSLKQIYFVL +SDPENIGMYTMELARLDS-- +>Trichoplax|XP_002111582.1|macroH2A organism=Trichoplax adhaerens phylum=Placozoa class=Uniplacotomia +MSGRGGK--ARKKPSSRSARAGLQFPVGRMHRRLKSSTHHLRIGSGAPVYLAACIEYLTA +EILELAGNAARDNKKLRIIPRHIQLAIGNDEELHKLLSDVTIASGGVLPHVHTELLSKKA +KGGGASVAAAAAPKKSKVRVSR-VGKSTPAKSNFSKKSGSSTKAFKN------------- +----SEVTILSEKQ-LFLGQKLIVTKGDITKI---STDGIVHPTSSNFSHAGMIGGALSS +AGGKQYMDGVAKVEQETGSLPVAGVTGSPAANLSAQEVIHVHSPSWGSTD--CQGNLEKA +VRNILDYADKKGMKSVAIPSIGSGSNNFPKLTAAQIILRSIAKYFVGVMSSSLKEVYFVL +WDEESINIYTSELNKLDVSG +>Homo|NP_613075.1|macroH2A.1.s1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia MSSRGGK--KKSTKTSRSAKAGVIFPVGRMLRYIKKGHPKYRIGVGAPVYMAAVLEYLTA EILELAGNAARDNKKGRVTPRHILLAVANDEELNQLLKGVTIASGGVLPNIHPELLAKKR -GSKGKLEAIITPP--PAKKAKSPSQKKPVSKKAGGKKGARKSKKKQGEVSKAASADSTTE -GTPADGFTVLSTKS-LFLGQKLNLIHSEISNLAGFEVEAIINPTNADIDLKDDLGNTLEK +GSKGKLEAIITPPPAKKAKSPS--QKKPVSKKAGGKKGARKSKKKQGEVSKAASADSTTE +GTPADGFTVLSTKS-LFLGQKLQVVQADIASI---DSDAVVHPTNTDFYIGGEVGNTLEK KGGKEFVEAVLELRKKNGPLEVAGAAVSAGHGLPAKFVIHCNSPVWGADK--CEELLEKT VKNCLALADDKKLKSIAFPSIGSGRNGFPKQTAAQLILKAISSYFVSTMSSSIKTVYFVL FDSESIGIYVQEMAKLDAN- ->macroH2A_Gallus_gallus_AAC28846.1 AAC28846.1 histone: H2A variant: macroH2A organism: Gallus gallus -MSSRGGK--KKSTKTSRSAKAGVIFPVGRMLRYIKKGHPKYRIGVGAPVYMAAVLEYLTA -EILELAGNAARDNKKGRVTPRHILLAVANDEELNQLLKGVTIASGGVLPNIHPELLAKKR -GSKGKLEAIITPP--PAKKAKSPSQKKTVSKKTGGKKGARKSKKKQGEVSKSASADSTTE -GTPADGFTVLSTKS-LFLGQKLQVVQADIATI---DSDAVVHPTNSDFYTGGEVGSTLEK -KGGKEFVEAVIELRKKNGPLDIAGAVVSAGHGLPAKFVIHCNSPGWGSDK--CEELLEKT -VKNCLALADEKKLKSIAFPSIGSGRNGFPKQTAAQLILKAISSYFVSTMSSSIKTVYFVL -FDSESIGIYVQEMAKLDAN- ->macroH2A_Homo_sapiens_NP_613075.1 NP_613075.1 histone: H2A variant: macroH2A organism: Homo sapiens +>Homo|NP_001035248.1|macroH2A.1.s2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia MSSRGGK--KKSTKTSRSAKAGVIFPVGRMLRYIKKGHPKYRIGVGAPVYMAAVLEYLTA EILELAGNAARDNKKGRVTPRHILLAVANDEELNQLLKGVTIASGGVLPNIHPELLAKKR -GSKGKLEAIITPP--PAKKAKSPSQKKPVSKKAGGKKGARKSKKKQGEVSKAASADSTTE -GTPADGFTVLSTKS-LFLGQKLQVVQADIASI---DSDAVVHPTNTDFYIGGEVGNTLEK +GSKGKLEAIITPPPAKKAKSPS--QKKPVSKKAGGKKGARKS-KKQGEVSKAASADSTTE +GTPADGFTVLSTKS-LFLGQKLNLIHSEISNLAGFEVEAIINPTNADIDLKDDLGNTLEK KGGKEFVEAVLELRKKNGPLEVAGAAVSAGHGLPAKFVIHCNSPVWGADK--CEELLEKT VKNCLALADDKKLKSIAFPSIGSGRNGFPKQTAAQLILKAISSYFVSTMSSSIKTVYFVL FDSESIGIYVQEMAKLDAN- ->macroH2A_Homo_sapiens_XP_005272189.1 XP_005272189.1 histone: H2A variant: macroH2A organism: Homo sapiens +>Homo|NP_613258.2|macroH2A.1.s2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia MSSRGGK--KKSTKTSRSAKAGVIFPVGRMLRYIKKGHPKYRIGVGAPVYMAAVLEYLTA EILELAGNAARDNKKGRVTPRHILLAVANDEELNQLLKGVTIASGGVLPNIHPELLAKKR -GSKGKLEAIITPP--PAKKAKSPSQKKPVSKKAGGKKGARKSK-KQGEVSKAASADSTTE -GTPADGFTVLSTKS-LFLGQKLQVVQADIASI---DSDAVVHPTNTDFYIGGEVGNTLEK +GSKGKLEAIITPPPAKKAKSPS--QKKPVSKKAGGKKGARKSKKKQGEVSKAASADSTTE +GTPADGFTVLSTKS-LFLGQKLNLIHSEISNLAGFEVEAIINPTNADIDLKDDLGNTLEK KGGKEFVEAVLELRKKNGPLEVAGAAVSAGHGLPAKFVIHCNSPVWGADK--CEELLEKT VKNCLALADDKKLKSIAFPSIGSGRNGFPKQTAAQLILKAISSYFVSTMSSSIKTVYFVL FDSESIGIYVQEMAKLDAN- ->macroH2A_Danio_rerio_NP_001020673.1 NP_001020673.1 histone: H2A variant: macroH2A organism: Danio rerio -MSARGGK--KKITKLSRSARAGVIFPVGRMMRYLRTGTHKYRIGMGAPVYMAAVIEYLAA -EILELAGNAARDNKKGRITPRHIKLAVANDEELNQLLRGVTISNGGVLPRIHPELLSKKR -GGKVKVESQVAVP--EKSAKRKPV--KKPYKKSKGKPGRKPKKSTENDKEADA---NAME -DGPGEGFTILSAKS-LFLGQKLSLTESEISKIGTIKVEGIINPTNAEIDLKEGIGNALEK -TGGKDFLETVKELRKSQGPLEVASVAVSQANGMAARFIIHCHVPQWGSDK--CEDQLEKT -VKNCLSAAEEKKLKSVAFPSLPAGRNGFPKQTAAQLILKAISNHFVSATTSSLKNIYFVL -FDSESIGIYLQEMAKMDAK- ->macroH2A_Gallus_gallus_NP_001264267.1 NP_001264267.1 histone: H2A variant: macroH2A organism: Gallus gallus -MSGRSGK--KKMSKLSRSSRAGVIFPVGRMMRYLKKGTYKYRIGVGAPVYMAAVIEYLAA -EILELAGNAARDNKKGRIAPRHILLAVANDEELNQLLKGVTIASGGVLPRIQPELLAKKR -GAKGKSETILSPA--PEKKGRKSMVSKKSGKKAKSNKARTPKKNKQKDSEKEGASNSTSE -DGPGDGFTILSSKS-LVPGQKLSLTQSDISHIGSMKVEGIVHPTTAEIDLKEEIGKALEK -AGGKEFLETVKELRKSQGPLEVAEAALTQSSGLAAKFVIHCHIPQWGSDK--CEEQLEET -IKNCLTAAEDKKLKSVAFPPFPSGRNCFPKQTAAQVTLRAISTHFDGTSSSSLKNIYFLL -FDSESIGIYVQEMAKLDTK- ->macroH2A_Homo_sapiens_NP_061119.1 NP_061119.1 histone: H2A variant: macroH2A organism: Homo sapiens +>Homo|NP_061119.1|macroH2A.2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia MSGRSGK--KKMSKLSRSARAGVIFPVGRLMRYLKKGTFKYRISVGAPVYMAAVIEYLAA EILELAGNAARDNKKARIAPRHILLAVANDEELNQLLKGVTIASGGVLPRIHPELLAKKR -GTKGKSETILSPP--PEKRGRKATSGKKGGKKSKAAKPRTSKKSKPKDSDKEGTSNSTSE +GTKGKSETILSPPPEKRGRKAT--SGKKGGKKSKAAKPRTSKKSKPKDSDKEGTSNSTSE DGPGDGFTILSSKS-LVLGQKLSLTQSDISHIGSMRVEGIVHPTTAEIDLKEDIGKALEK AGGKEFLETVKELRKSQGPLEVAEAAVSQSSGLAAKFVIHCHIPQWGSDK--CEEQLEET IKNCLSAAEDKKLKSVAFPPFPSGRNCFPKQTAAQVTLKAISAHFDDSSASSLKNVYFLL diff --git a/CURATED_SET/draft_seeds/macroH2A_only.fasta b/CURATED_SET/draft_seeds/macroH2A_only.fasta new file mode 100644 index 0000000..dd48c63 --- /dev/null +++ b/CURATED_SET/draft_seeds/macroH2A_only.fasta @@ -0,0 +1,56 @@ +>Ixodes|XP_002403551.1|macroH2A organism=Ixodes scapularis phylum=Arthropoda class=Arachnida +MSARGGK--KRAKVVSKSTKAGVLFPVGRMRRYLKKGTHHFRIGAGAPVYMAAVIEYLSG +EAISCQDSSPRDPLLSALTKRKCCSGRPN-LFFLQLLKGVTIASGGVLPRILPELLARRK +GGRFKTVALAKKPVAAAIAKEKAVPPKEKAKLAKGKVCRKSSHCHVPLGDRGIILSLWPT +APQGEHFTLVKHKTGLSLSVQLTVIQGDMASV---TADAAIHPTNASLSLSGEVGQVLEK +AGGKEFVQEVKDLFSAHGPLESAGAVICPGHQFPAKFVIHCNVPSGSSEP------LEKC +VRNCLALADEKNIRVLAVPPLATHSVASQKQQAAQTILKAISNYFVNVMSSSLKQIYFVL +SDMESIGIYTSELAKLDS-- +>Danio|NP_001020673.1|macroH2A organism=Danio rerio phylum=Chordata class=Actinopteri +MSARGGK--KKITKLSRSARAGVIFPVGRMMRYLRTGTHKYRIGMGAPVYMAAVIEYLAA +EILELAGNAARDNKKGRITPRHIKLAVANDEELNQLLRGVTISNGGVLPRIHPELLSKKR +GGKVKVESQVAVPEKSAKRKPV----KKPYKKSKGKPGRKPKKSTENDKEADANA---ME +DGPGEGFTILSAKS-LFLGQKLSLTESEISKIGTIKVEGIINPTNAEIDLKEGIGNALEK +TGGKDFLETVKELRKSQGPLEVASVAVSQANGMAARFIIHCHVPQWGSDK--CEDQLEKT +VKNCLSAAEEKKLKSVAFPSLPAGRNGFPKQTAAQLILKAISNHFVSATTSSLKNIYFVL +FDSESIGIYLQEMAKMDAK- +>Gallus|AAC28846.1|macroH2A organism=Gallus gallus phylum=Chordata class=Aves +MSSRGGK--KKSTKTSRSAKAGVIFPVGRMLRYIKKGHPKYRIGVGAPVYMAAVLEYLTA +EILELAGNAARDNKKGRVTPRHILLAVANDEELNQLLKGVTIASGGVLPNIHPELLAKKR +GSKGKLEAIITPPPAKKAKSPS--QKKTVSKKTGGKKGARKSKKKQGEVSKSASADSTTE +GTPADGFTVLSTKS-LFLGQKLQVVQADIATI---DSDAVVHPTNSDFYTGGEVGSTLEK +KGGKEFVEAVIELRKKNGPLDIAGAVVSAGHGLPAKFVIHCNSPGWGSDK--CEELLEKT +VKNCLALADEKKLKSIAFPSIGSGRNGFPKQTAAQLILKAISSYFVSTMSSSIKTVYFVL +FDSESIGIYVQEMAKLDAN- +>Gallus|NP_001264267.1|macroH2A organism=Gallus gallus phylum=Chordata class=Aves +MSGRSGK--KKMSKLSRSSRAGVIFPVGRMMRYLKKGTYKYRIGVGAPVYMAAVIEYLAA +EILELAGNAARDNKKGRIAPRHILLAVANDEELNQLLKGVTIASGGVLPRIQPELLAKKR +GAKGKSETILSPAPEKKGRKSM--VSKKSGKKAKSNKARTPKKNKQKDSEKEGASNSTSE +DGPGDGFTILSSKS-LVPGQKLSLTQSDISHIGSMKVEGIVHPTTAEIDLKEEIGKALEK +AGGKEFLETVKELRKSQGPLEVAEAALTQSSGLAAKFVIHCHIPQWGSDK--CEEQLEET +IKNCLTAAEDKKLKSVAFPPFPSGRNCFPKQTAAQVTLRAISTHFDGTSSSSLKNIYFLL +FDSESIGIYVQEMAKLDTK- +>Gallus|NP_990338.1|macroH2A organism=Gallus gallus phylum=Chordata class=Aves +MSSRGGK--KKSTKTSRSAKAGVIFPVGRMLRYIKKGHPKYRIGVGAPVYMAAVLEYLTA +EILELAGNAARDNKKGRVTPRHILLAVANDEELNQLLKGVTIASGGVLPNIHPELLAKKR +GSKGKLEAIITPPPAKKAKSPS--QKKTVSKKTGGKKGARKSKKKQGEVSKSASADSTTE +GTPADGFTVLSTKS-LFLGQKLNLIHSEISNLAGFEVEAIINPTNADIDLKDDLGSTLEK +KGGKEFVEAVIELRKKNGPLDIAGAVVSAGHGLPAKFVIHCNSPGWGSDK--CEELLEKT +VKNCLALADEKKLKSIAFPSIGSGRNGFPKQTAAQLILKAISSYFVSTMSSSIKTVYFVL +FDSESIGIYVQEMAKLDAN- +>Nematostella|XP_001637578.1|macroH2A organism=Nematostella vectensis phylum=Cnidaria class=Anthozoa +MSARGGKAAKRAKAVSRSAKAGLQFPVSRVHRYLRKCTHHYRISAAAPVYQAAVMEYLTA +EILELAGNAARDNKKTRIIPRHILLAVANDEELHKLLKGVTIASGGVLPNIHPELLKKRK +GGK------LVSPEELKSKKPKPAPPPSPKKPVSSKKGRGKADK---------------- +-GPGDGFSVLSEKT-LFLGQKLTVVQGDIAAI---DADAVVLPTNAKFKLEGEVGEALKK +AGGKEFKDEIKKLSEDNGDLALLDAAICDGHNFPAAYVISLHSPVYSEDSTTASDDLEKA +VKNVLTIADEKNLKILAIPSIGTGSNKYPKDLAAQVTLKAISNYFVSAMASSLKQIYFVL +SDPENIGMYTMELARLDS-- +>Trichoplax|XP_002111582.1|macroH2A organism=Trichoplax adhaerens phylum=Placozoa class=Uniplacotomia +MSGRGGK--ARKKPSSRSARAGLQFPVGRMHRRLKSSTHHLRIGSGAPVYLAACIEYLTA +EILELAGNAARDNKKLRIIPRHIQLAIGNDEELHKLLSDVTIASGGVLPHVHTELLSKKA +KGGGASVAAAAAPKKSKVRVSR-VGKSTPAKSNFSKKSGSSTKAFKN------------- +----SEVTILSEKQ-LFLGQKLIVTKGDITKI---STDGIVHPTSSNFSHAGMIGGALSS +AGGKQYMDGVAKVEQETGSLPVAGVTGSPAANLSAQEVIHVHSPSWGSTD--CQGNLEKA +VRNILDYADKKGMKSVAIPSIGSGSNNFPKLTAAQIILRSIAKYFVGVMSSSLKEVYFVL +WDEESINIYTSELNKLDVSG diff --git a/CURATED_SET/draft_seeds/scH1.fasta b/CURATED_SET/draft_seeds/scH1.fasta new file mode 100644 index 0000000..0025098 --- /dev/null +++ b/CURATED_SET/draft_seeds/scH1.fasta @@ -0,0 +1,12 @@ +>Saccharomyces|XP_011105792.1|scH1 organism=Saccharomyces arboricola H-6 phylum=Ascomycota class=Saccharomycetes +MAPKKTSTKTTTTNKGKKPVTSKGKDKPVIKTAVKKNAAKKEEPSSKSYKELIVEGLAAL +KERKGSSRPALKKFIKENYPLVGSTSNFDLYFNNAIKKGVETGDFEQPKGPAGTLKLAKK +KSPELK--KETSPKPKQAAAATTTTTTTTPTSLKAKAKTASKKQAPKKVVKKKVPAVAVI +PKKTSSPSALTYKEMILKSMPELNDGKGSSRIVLKKYVKDTFSSKLKTSSNFDYLFNSAI +KKCVENGELVQPKGPSGIIKINKKKAKLST +>Saccharomyces|NP_015198.1|scH1 organism=Saccharomyces cerevisiae S288C phylum=Ascomycota class=Saccharomycetes +MAPKKSTTKTTS--KGKKPATSKGKEKSTSKAAIKKTTAKKEEASSKSYRELIIEGLTAL +KERKGSSRPALKKFIKENYPIVGSASNFDLYFNNAIKKGVEAGDFEQPKGPAGAVKLAKK +KSPEVKKEKEVSPKPKQAATSVSATAS--------KAKAASTKLAPKKVVKKKSPTVTA- +-KKASSPSSLTYKEMILKSMPQLNDGKGSSRIVLKKYVKDTFSSKLKTSSNFDYLFNSAI +KKCVENGELVQPKGPSGIIKLNKKKVKLST diff --git a/CURATED_SET/draft_seeds/short_H2A.fasta b/CURATED_SET/draft_seeds/short_H2A.fasta new file mode 100644 index 0000000..2564875 --- /dev/null +++ b/CURATED_SET/draft_seeds/short_H2A.fasta @@ -0,0 +1,525 @@ +>Ailuropoda|XP_011215272.1|H2A.B organism=Ailuropoda melanoleuca phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MPGDR--SRRGSSSG--QR-----------RTRSRTARAELSFSVSHVERLLR +EGHYAQRLGSSAPVFLAAVIQYLTAKVLELA--GNEAQNSG-GRRITPQLVDMAVHNHAL +LSGFF--GTTTI-----------------SQ---------VAPAWN-------------- +------------------------------------------------------------ +---------- +>Bos|NP_001069373.1|H2A.B organism=Bos taurus phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MPKKR--GHQRSSGI-----------------RSRTAQSELSFSVSHMEHLLR +KGHYAQRLSSSAPVFLAAVIQDLTSKVLELA--GNEAQKNG-EKRITPKLVDMAIHNNAL +LSSIF--GMTTI-----------------SL---------VAPGPH-------------- +------------------------------------------------------------ +---------- +>Callithrix|XP_002763866.2|H2A.B organism=Callithrix jacchus phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSERR--SRRGSSAAGRRG-----------HTRSRTARAELIFSVSKMERGLW +EGHYAQRLSDNAPVYLAAVIQYLTAKILELA--AKGADNRG-ERIITPRLLDMAVHNDGL +LSTLF--HAITI-----------------SQ---------VGPGPN-------------- +------------------------------------------------------------ +---------- +>Cricetulus|XP_003514308.1|H2A.B organism=Cricetulus griseus phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MPRTRQSSRRGSSSR-----------------RSRTDRAELTFSVSLVEHHLR +ESGHARRLSETVPILVTAILEFLTRRLLELA--SNEAQRLGAQRLITPEILDLTIYNNAL +LSEMF--QFTTI-----------------SQ---------TAPAGPRRRRRQI------- +------------------------------------------------------------ +---------- +>Cricetulus|XP_003515491.1|H2A.B organism=Cricetulus griseus phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MPRTRQSSLRGSSSR-----------------RSRTDRAELTFSVSLVEHHLR +ESGHAPRLSETVPILLTAILEFLTRRLLELA--SNEAQRLGAQRLITPEILDLTVYNNTL +LSQLL--QFTTI-----------------SQ---------TAPAGRRRRRRQT------- +------------------------------------------------------------ +---------- +>Heterocephalus|EHB05905.1|H2A.B organism=Heterocephalus glaber phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MPRQA----------LALT-----------NERPPQGRAEPIFSVSQVERALC +DGRYAQRLSCSASVFLAATLQFLSATVLELA--DREARYRS-RRRITRELLDVATLKDAL +LCTLL--GTTTI-----------------SR---------VAPARP-------------- +------------------------------------------------------------ +---------- +>Loxodonta|XP_003421752.1|H2A.B organism=Loxodonta africana phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MAGKR--SRRGGGGGGGGGGGGGGGGGSSRRQRRTRSRTELIFSASHVAHLLR +EGHYAQRLSSSAPVFLAAILKCLTAKILELA--GNEAQNSG-RRLVTPELVDMAVHNNAL +LSGFF--LTTTI-----------------SQ---------VAPAR--------------- +------------------------------------------------------------ +---------- +>Macaca|NP_001180843.1|H2A.B organism=Macaca mulatta phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSERR--SHRRSSRAGGRG-----------RTRSRTVRAELSFSVSQVERGLR +EGHYAQRLSPTAPVYLAAVIEYLTAKVLELA--GNEAQNNG-ERNITPLLLDMAVHNNRL +LSTLF--DTTTI-----------------SQ---------VAPGGD-------------- +------------------------------------------------------------ +---------- +>Nomascus|XP_003282204.1|H2A.B organism=Nomascus leucogenys phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MPRRR--SHRGSSGAGGRG-----------RTCSRTVRAELSFSVSQVERGLR +EGHYAQRLSRTAPVYLAAVIEYLTAKVLELA--GNEAQNNG-ERNITPLLLDMVVHNNRL +LSTLF--HTTTI-----------------SR---------VAPGGD-------------- +------------------------------------------------------------ +---------- +>Pan|XP_001145032.1|H2A.B organism=Pan troglodytes phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MPRRR--RHRGSSGAGGRG-----------RTCSRTVRAELSFSVSQVERSLR +EGQYAQRLSRTAPVYLAAVIEYLTAKVLELA--GNEAQNSG-ARNITPLLLDMVVHNDRL +LSTLF--NTTTI-----------------SQ---------VAPGED-------------- +------------------------------------------------------------ +---------- +>Sus|XP_003135571.1|H2A.B organism=Sus scrofa phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MPGKR--SRRKSPGR--QG-----------RTCARTTRAGLSASVSHMERLLR +EGPYAQCLSSSARVFLAATIEYLTARVLELA--GDEAQIVG-RRCITPELVAMAVHNNAL +LSAFF--GTLAI-----------------SQ---------VAPTQE-------------- +------------------------------------------------------------ +---------- +>Homo|NP_001017990.1|H2A.B.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MPRRR--RRRGSSGAGGRG-----------RTCSRTVRAELSFSVSQVERSLR +EGHYAQRLSRTAPVYLAAVIEYLTAKVPELA--GNEAQNSG-ERNITPLLLDMVVHNDRL +LSTLF--NTTTI-----------------SQ---------VAPGED-------------- +------------------------------------------------------------ +---------- +>Homo|NP_001017991.1|H2A.B.2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MPRRR--RRRGSSGAGGRG-----------RTCSRTVRAELSFSVSQVERSLR +EGHYAQRLSRTAPVYLAAVIEYLTAKVLELA--GNEAQNSG-ERNITPLLLDMVVHNDRL +LSTLF--NTTTI-----------------SQ---------VAPGED-------------- +------------------------------------------------------------ +---------- +>Homo|NP_542451.1|H2A.B.2_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MPRRR--RRRGSSGAGGRG-----------RTCSRTVRAELSFSVSQVERSLR +EGHYAQRLSRTAPVYLAAVIEYLTAKVLELA--GNEAQNSG-ERNITPLLLDMVVHNDRL +LSTLF--NTTTI-----------------SQ---------VAPGED-------------- +------------------------------------------------------------ +---------- +>Mus|NP_001268460.1|H2A.B.1_(Mus_musculus) organism=Mus musculus phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MPRNRENCLRESSGRR--------------HRRSRTSRAELIFAVSLVEQHLR +EVSRARRLSDTVPIFLAAILESLTRRLLELA--GNEAQRRGTERRITPELLDLAVYSNME +LSDVF--QFITI-----------------SQ---------VAPAHR-------------- +------------------------------------------------------------ +---------- +>Mus|NP_001268459.1|H2A.B.2_(Mus_musculus) organism=Mus musculus phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MPRNTENCLQRSSGHR--------------QHHSRTSRGELIFAVSLVEQHLR +EVSRARRLSDMVPVSLVAILEFLTSRLLELA--GNEAQRRGTQRLITPQPLDLEVYSSME +LSDVF--QFITI-----------------SQ---------VAPAHR-------------- +------------------------------------------------------------ +---------- +>Mus|NP_001096135.1|H2A.B.3_(Mus_musculus) organism=Mus musculus phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MPRNRENCLRESSGRR--------------HRRSRTSRAELIFAVSLVEQHLR +EISRAWRLSDMVPIFLAAILESLTRRLLELA--GNEAQRRGTERRITPELLDLAVYSNME +LSDVF--QFITI-----------------SQ---------VAPAHR-------------- +------------------------------------------------------------ +---------- +>Ailuropoda|XP_002927211.2|H2A.L organism=Ailuropoda melanoleuca phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSGRRSPWHSHQHKR---------------HGLSRSMRAELQFPVSRVDRLLR +EGCYAQRLSSSTPVFLTGVLEYLTANILELA--GQEARNHH-KMRITPEHVQRALVNNQH +LSCLF--EDITS-----------------PP---------AKGTPQLRKC---------- +------------------------------------------------------------ +---------- +>Bos|NP_001071426.1|H2A.L organism=Bos taurus phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSPGRHLWNCRRSRR---------------RSLSRSTRAELQFPVSRVDRLLR +EGQFANRLSSATPVFLTGILEYLIANILDLA--GKEACTNH-RVRISPEHVQTALVNNEN +LRCLF--QPGAF-----------------SQ---------PAASPPAPEN---------- +------------------------------------------------------------ +---------- +>Bos|XP_875023.2|H2A.L organism=Bos taurus phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSPRRHLWNCRRSRR---------------HSLSRSTRAELQFPVSRVDRLLR +EGQGAYRLSSATPVFLTAVLEYLIANILDLA--GKEACTNH-RVRISPEHVQTALINNEN +LRRLF--QPGAF-----------------SQ---------PTASPHLPEN---------- +------------------------------------------------------------ +---------- +>Canis|XP_548938.1|H2A.L organism=Canis lupus familiaris phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSGERGPGHSRRPRR---------------HGLSRSRRAELQFPVSRVDRLLR +EGHYAHRLSSSTPVFLAGILEYLTSNILELA--GQEARNSH-KMRITPEHLQKALGNNQY +LSQLF--EENTY-----------------SQ---------GDGMVQARKWSGPGTGADSR +I----------------------------------------------------------- +---------- +>Canis|XP_548947.1|H2A.L organism=Canis lupus familiaris phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSGERGPGHSRRPRR---------------HGLSRSRRAELQFPVSRVDRLLR +EGCYAHRLSSSTPVFLTGILEYLTSNILELV--GQEACNSH-KMRITPEHMQKALGNNQY +LSQLF--EENTY-----------------SQ---------GDGMVQARKWSGPGTGADSR +I----------------------------------------------------------- +---------- +>Cavia|XP_003469395.1|H2A.L organism=Cavia porcellus phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MAAKKCKGISSKPRK---------------HPVSRSTRAQLQFPVSRVERYLR +ENGYL-RLSACTPVFLAGILEYLTASALHLA--ARVAHRRH-KKRISPEHLARALEKSEQ +LRQVF--GDSTK-----------------AL---------LDEIIQAKKK---------- +------------------------------------------------------------ +---------- +>Cavia|XP_003469399.1|H2A.L organism=Cavia porcellus phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MDGKKSNEKSSQLRK---------------HPVSCSRRAELQLPVSRMERYLR +ENSYAPHLPFSTPVFLEGVLEYLTASILDLA--RKEARGKR-KKHILPQHLETAAENNQQ +LGLRF--GDSRK-----------------SM---------LDEMTQNKKK---------- +------------------------------------------------------------ +---------- +>Cricetulus|XP_003508207.1|H2A.L organism=Cricetulus griseus phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MEEKK--------RK---------------ETISRITRGQLQFSLDRIERFFR +DGNFSQRLSASAPVFLAGVLEFLTSNILDLA--GREAHANG-TRLITPEHVTQVVQNNDQ +LREVF--KEHED-----------------PV---------VSETPEPEKN---------- +------------------------------------------------------------ +---------- +>Heterocephalus|EHB04253.1|H2A.L organism=Heterocephalus glaber phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MAGKKRRQNSSKPRK---------------QAVSRSTRAELQFPVSRVEHYLR +EGGYAHRLASSTPVFLAGVLEYLRANILDLA--GKEAQGKR-KKCITPQHLETAMENNQH +LRPLF--QDDPK-----------------SL---------LDETSQPNPRRSDEAWVPEH +GKTFSTSSSSPQNTHR-------------------------------------------- +---------- +>Heterocephalus|EHB10563.1|H2A.L organism=Heterocephalus glaber phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MAGKKRRQNSSKPRK---------------QAVSRSTRAELQFPVSRVEHYLR +EGGYAHRLGSSTPVFLAGVLKYLRANILDLA--GKEAQGKR-KKCIAPQHLETAMENNQH +LRPLF--QDGPK-----------------SL---------LDETSQPKKK---------- +------------------------------------------------------------ +---------- +>Heterocephalus|EHB17227.1|H2A.L organism=Heterocephalus glaber phylum=Chordata class=Mammalia +MDIHRQRPNQSPVDTNDITKPRLSQRQLLTSEKHRETSGWAQALLFGEAQARREIKLGQL +KPNTASIMAGKKRRQNSSKPRK---------------QAVSRSTRAELQFPVSRVERYLR +EGGYAQRLGSSTPVFLAGVLEYLTANILDLA--GKEAEGNC-KKRITPQHLETVMENNQQ +LRALF--QGDTK-----------------SL---------LDETSQLKKKC--------- +------------------------------------------------------------ +---------- +>Oryctolagus|XP_002719866.1|H2A.L organism=Oryctolagus cuniculus phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MRGKKPSKKAGKRRK---------------PNVSRSTRAELQFPVSRVDRHLH +HDRYAQRLSSSTPVFLAGVLEYLTSNILELA--GEEAHKNS-RIRITPEHMRKAIESSEH +LRDLL--EEDPK-----------------PR---------DEDVAQPEEKE--------- +------------------------------------------------------------ +---------- +>Oryctolagus|XP_002720052.1|H2A.L organism=Oryctolagus cuniculus phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MRGKKPSKKAGKRRK---------------PNVSRSTRAELQFPVSRVDRHLH +HDRYAQRLSSSTPVFLAGVLEYLTSNILELA--GEEAHKNS-RVRITPEHMRKAIESSEH +LRDLL--EEDPK-----------------PR---------DEDVAQPEEKE--------- +------------------------------------------------------------ +---------- +>Sus|XP_003360303.2|H2A.L organism=Sus scrofa phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSRKRNLPQCNRRKK---------------HALSRSSRAELQFPVSRVDRYLR +EGRYAQRLSSQAPVFLAGVLEYLTANILELA--ASEARSNN-KMRIAPEHVQRAASHNQT +LSSLF--QASSV-----------------SRGAEE-----GAEEPLPEAGR--------- +------------------------------------------------------------ +---------- +>Sus|XP_003484147.1|H2A.L organism=Sus scrofa phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSRKRNLPQCSHRKK---------------HALSCSSRAELQFPMSSLDCVLP +EGQYAQRLSSYTPVFLAGVLEHLMAHILELA--AREARSSR-KVRITPEHVQRALNNNET +LSRLF--QASSV-----------------SRGAEEGAEEPGAEEPLPEAGQ--------- +------------------------------------------------------------ +---------- +>Homo|HISTDB_H2A_L_0|H2A.L.1_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MAGNKHSRSSCKPRR---------------QCLSRSRRAELQFPVSHMERCLR +EGQYARHLSSTTPVFLAAVLEYLTANILEQA--GKEAQNSH-RVCITPEHLKRALQKNEQ +LRWILEEEDDIH-----------------SQ---------EEEMPQPEEEEEEDERMEEE +EEEKKEEEEKEEEEDERMEEEEEEKKEEEEKKEEKEKEEEKEKKKKKGGFLSFRAVQDFI +SNLFQLLKFP +>Homo|HISTDB_H2A_L_1|H2A.L.3_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MAGNK---MFCRPRR---------------QRLSHSRRAELQFPVSHLERCLR +ESQHARHLSSTTPVFLAGVLEYLTANILEKV--GKEVKNSC-RLCITPEHVKRALQKDEQ +LRWILELEDDTH-----------------SQ---------VEEMPQSEEEEE-------- +-----EEEEKEEEMVVLVVMGGRRRR--------------RRRRRRKDS----------- +---------- +>Mus|NP_083864.1|H2A.L.1_(Mus_musculus) organism=Mus musculus phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MAKKM-----QRRRR---------------QKRTRSQRGEL--PLSLVDRFLR +EEFHSSRLSSSALSFLTSVLEYLTSNILELA--GEVAHTTG-RKRVTPEDVRLVVQNNEQ +LRQLF--K---------PGG---------TS---------VNE----DDN---------- +------------------------------------------------------------ +---------- +>Mus|NP_080903.1|H2A.L.2_(Mus_musculus) organism=Mus musculus phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MARKR-----QRRRR---------------RKVTRSQRAELQFPVSRVDRFLR +EGNYSRRLSSSAPVFLAGVLEYLTSNILELA--GEVAHTTG-RKRIAPEHVCRVVQNNEQ +LHQLF--K---------QGG---------TS---------VFEPPEPDDN---------- +------------------------------------------------------------ +---------- +>Mus|Q9D4U4|H2A.L.3_(Mus_musculus) organism=Mus musculus phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------ME--------DKRQK---------------DSVAPSSGAKLQFPVSEAEHLLQ +ERNLSKCLNSSTPVLFTDMLNYVTSSILELTVKDRDSHTSC-NKLIAPEQKSKPTDNIDE +LCQLF--KDSQYMADETPGCYKTPRSNKITG---------LYEAPRPGPK---------- +------------------------------------------------------------ +---------- +>Ailuropoda|XP_002913536.1|H2A.P organism=Ailuropoda melanoleuca phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSGKKSHESSY-------------------QTQAHLITTELQVPVSYVDRLLQ +ENQYNHPLSSSTTDFLLTMLDYLTDYILDVV--GTEA-NNS-NMPTAPQDVERAVDSSGE +PYHRS--KDTAF-----------------TL---------FDEMPGSRRNG--------- +------------------------------------------------------------ +---------- +>Canis|XP_005641297.1|H2A.P organism=Canis lupus familiaris phylum=Chordata class=Mammalia +-----------------------------------------------------------M +EPNPANIMSGNKNHESSN-------------------QTQAHLVTTELQFPVSYVDRLLQ +EDQRTHCLSSTSTEFLLAMLDSLTDYILERV--GTEA-NNN-NMQTAPQDVERAVGSNRE +PQQCL--KDTAF-----------------TL---------FDEMPRSRRNG--------- +------------------------------------------------------------ +---------- +>Cricetulus|XP_003508203.1|H2A.P organism=Cricetulus griseus phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSGKKNQGKSCSDNK---------------KMEDPSSKPEVQIPVNYVYHLLQ +EEQYTPCLGSTTSDFLLAMLDYITDYILEVV--GSEA-NIN-SQQDIPQDRERQGDNDHD +HSHAF--KNAPF-----------------SL---------FDEMPGPRRNG--------- +------------------------------------------------------------ +---------- +>Heterocephalus|EHB10562.1|H2A.P organism=Heterocephalus glaber phylum=Chordata class=Mammalia +------------------------------------------------------------ +------MMCEQKSQYGSYKDNN---------------QQEDPASRPEQQLPVSDIYCILH +EE-YNPYF-SSTSDLLLAMLESLTDYILTLV--GSEG-NNV-GMPTNPQDGEREMDNNHE +HPPII--PDVSF-----------------SF---------SDEMPGSRKKG--------- +------------------------------------------------------------ +---------- +>Macaca|EHH30639.1|H2A.P organism=Macaca mulatta phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSEKNNRKNSSANNN---------------QIQDR-SRNELRVPMSFVDRVVQ +DEQDAQSQSSSTINILLTLLDCLADYIMEQV--GLEAINNG-RMRNTSQDGEREGDNHHE +PHRTE--SDGTR-----------------FV---------FDEMPKSGKND--------- +------------------------------------------------------------ +---------- +>Pan|XP_003317470.2|H2A.P organism=Pan troglodytes phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSEKKNCKNSSTNNN---------------QTQDP-SRNELQVPMSFVDRVVQ +DEQDVQSQSSSTINTLLTLLDCLADYIMERV--GLEASNNG-SMRNTSQDREREVDNNRE +PHSAE--SDVTR-----------------FL---------FDEMPKSRKND--------- +------------------------------------------------------------ +---------- +>Rattus|NP_001128070.1|H2A.P organism=Rattus norvegicus phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSGKKSQEKACSDNK---------------QTEDPSSRPEVQVPVNYVYRLLQ +EEQYTPCLGSTTSDFLLAMLDYLTDYILEVV--GSEA-NIN-SQQNISQDRERQRENDRE +PPQAF--KNAPF-----------------SL---------FDEMPGPRRNG--------- +------------------------------------------------------------ +---------- +>Sus|XP_003135058.1|H2A.P organism=Sus scrofa phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSGKQSPEGSY-------------------EAPTHLTTTEPQVPVSFVDHLLQ +EDQYVHTLSSSTTHFLFSVLEYLTDYILDLV--DTKA-NTG-RMQMTPQDVERAVDSNAE +PHRQV--KDTAF-----------------AL---------FDEMPGSRRNG--------- +------------------------------------------------------------ +---------- +>Homo|NP_036406.1|H2A.P_(Homo_sapiens) organism=Homo sapiens phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSEKKNCKNSSTNNN---------------QTQDP-SRNELQVPRSFVDRVVQ +DERDVQSQSSSTINTLLTLLDCLADYIMERV--GLEASNNG-SMRNTSQDREREVDNNRE +PHSAE--SDVTR-----------------FL---------FDEMPKSRKND--------- +------------------------------------------------------------ +---------- +>Mus|NP_080372.1|H2A.P_(Mus_musculus) organism=Mus musculus phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSEKKSQEKPCSDNN---------------QIEDPSSRPEVQVPVNYVYRILQ +EEQYTPCIGSTTSDFLLAMLDYLTDYILEVV--GSEA-NIN-NQQNISQDRERQRDNDRE +PSRGF--KNAPF-----------------SL---------FDEMPGPRRNG--------- +------------------------------------------------------------ +---------- +>Ammotragus|HISTDB_H2A_Q_2|H2A.Q organism=Ammotragus lervia phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSGNRSYHSSDGFRK---------------HILPCTMKTKLQFSKSHVDHHLQ +ENQDAQNQNLTNQIYLSAILKYLSSNALKLV--SNETQSDC-RIH-------KAMDNHPQ +LHHAF--DKDAK-----------------SQ---------VHEM---------------- +------------------------------------------------------------ +---------- +>Bison|HISTDB_H2A_Q_3|H2A.Q organism=Bison bison bison phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSGNRSYHSSDGLRK---------------RTVPCTMKTKLQFSKSHVDHHLQ +GNQDAHNQNLTTQIYLSAILKNVSSNVLKLV--SDETQSNC-KIH-------RAKHNHPQ +LQHVF--DKDAK-----------------SQ---------VHEMF--------------- +------------------------------------------------------------ +---------- +>Bos|HISTDB_H2A_Q_14|H2A.Q organism=Bos mutus phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSGNRSYHSSDGLRK---------------RTVPCTMKTKLQFSKSHVDHHLQ +GNQDAHNQNLTTQIYLSAILKNVSSNVLKLV--SDETQSNC-KIH-------RAKHNHPQ +LQHVF--DKDAK-----------------SQ---------VHEMF--------------- +------------------------------------------------------------ +---------- +>Bos|HISTDB_H2A_Q_16|H2A.Q organism=Bos taurus phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSGNRSYHSSDGLKK---------------RTVPCTMKTKLQFSKSHVDHHLQ +GNQDAHNQNLTTQIYLSAILKNVSSNVLKLV--SDETQSNC-KIH-------RAKHNHPQ +LQHVF--DKDAK-----------------SQ---------VHEMF--------------- +------------------------------------------------------------ +---------- +>Camelus|XP_010950849.1|H2A.Q organism=Camelus bactrianus phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSGNRSCQSSCWLRK---------------QTFSCSTKNKPLFPKSHADHLLQ +ENHFAQQLNLPTQVFLSAILKYVTSNVLEVV--GNKTHSNC-RIQ-------KAVDNDLQ +LSHLF--EEDTN-----------------SQ---------ARETF--------------- +------------------------------------------------------------ +---------- +>Camelus|XP_010995375.1|H2A.Q organism=Camelus dromedarius phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSGNRSCQSSCWLRK---------------QTFSCSTKNKPLFPKSHADHLLQ +ENHFAQQLNLPTQVFLSAILKYVTSNVLEVV--GNKTHSNC-RIQ-------KAVDNDLQ +LSHLF--EEDTN-----------------SQ---------ARETF--------------- +------------------------------------------------------------ +---------- +>Canis|XP_013966888.1|H2A.Q organism=Canis lupus familiaris phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSGKRSSQNSCRLGK---------------QTLSSSTKTKLKFSVSHEDHLLQ +ENHPAQHLRFSSQVCLSAILKYVATNILELV--GNEAHNDC-RVQ-------RAVNNNMQ +SSHLF--EDDTT-----------------SQ---------VSEMF--------------- +------------------------------------------------------------ +---------- +>Ceratotherium|HISTDB_H2A_Q_13|H2A.Q organism=Ceratotherium simum phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSEKRSCQNSSRLKK---------------QTFSCSTKAKLHFPVSHMDRHQQ +ENHSAQQLSLSTPVFLSAIRKYVTNNILELV--GNESHNNR-RIR-------RAVDNAEQ +LSHLF--EDDNQ-----------------FS---------G------------------- +------------------------------------------------------------ +---------- +>Cercocebus|HISTDB_H2A_Q_11|H2A.Q organism=Cercocebus atys phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MFWKRRQRRSYRCRN---------------QTFSYSIKAKQQFPLSCVHCLLW +KNHCPRPE-------LVHYFQILGGNILELM--GNKVHKNY-RMHITPKYVERAVDNNSL +ISHLF--EGDTN------------------------------------------------ +------------------------------------------------------------ +---------- +>Cercopithecus|HISTDB_H2A_Q_0|H2A.Q organism=Cercopithecus aethiops phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MFWKRRQRRSYRCRN---------------QTFSYSIKAKQQFPSSCVHCLLW +KNHCPRPE-------LVHYFQILGGNILELM--GNKVHKTY-RMHITPKYVERAVDNNSL +ISHLF--EGDTN------------------------------------------------ +------------------------------------------------------------ +---------- +>Equus|HISTDB_H2A_Q_17|H2A.Q organism=Equus caballus phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSGKRSCQN-YKLMK---------------QTFSCSAKTKLQFPVSHVDCLQQ +ENHSAQHLSLSTQVFLPAILKYVTNNILEWV--GNEAHNSC-RIR-------KAVANNQQ +LSHLF--EDDTD-----------------SQ---------VNEMF--------------- +------------------------------------------------------------ +---------- +>Giraffa|HISTDB_H2A_Q_7|H2A.Q organism=Giraffa camelopardalis tippelskirchi phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSGNRSYHSSDGLKK---------------RTLPCTMKTKLQFSKSHVDHLLQ +ENQAAQNQNLTTQIFLSAILRHMSSNILKLM--SNETQSNC-RIH-------RAMDNPTQ +LHHVF--DNDAK-----------------SQ---------VHEMF--------------- +------------------------------------------------------------ +---------- +>Mandrillus|HISTDB_H2A_Q_5|H2A.Q organism=Mandrillus leucophaeus phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MFWKRRQRRSYRCRN---------------QTFSYSIKAKQQFPLSCVHCLLW +KNHCPRPE-------LVHYFQILGGNILELM--GNKVHKNY-RMHITPKYVERAVDNNSL +ISHLF--EGDTN------------------------------------------------ +------------------------------------------------------------ +---------- +>Mustela|HISTDB_H2A_Q_6|H2A.Q organism=Mustela putorius furo phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSGKRSSQTSYRLGK---------------QTSSCSTKTKLQVPVSHVEPILQ +EHQPVQNLSFSSQVCLSAILKYVATNILELV--GNEAQHNC-RVQ-------RAMNNNMQ +NSHLF--EDDTT-----------------SQ---------L------------------- +------------------------------------------------------------ +---------- +>Nasalis|HISTDB_H2A_Q_21|H2A.Q organism=Nasalis larvatus phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MFWKRRQRRSYRCRN---------------QTFSYSIKAKQQFPLSCVHCLLW +KNHCPRPE-------LVHYFQILGGNILELM--GNKVHKNY-RMHITPKYVERAVDNNSL +ISHLF--EGDTN-----------------SG----------------------------- +------------------------------------------------------------ +---------- +>Nomascus|HISTDB_H2A_Q_22|H2A.Q organism=Nomascus leucogenys phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MFWKRRQRSSYRCRN---------------QTFSYSIRAKQQFPLSCVHCLLW +KNHCPRPE-------LVHYFQILGGNILELM--GNKVHKNY-RMHITPKYVERVVDNNLL +LSHLF--EGDTN-----------------SG----------------------------- +------------------------------------------------------------ +---------- +>Odobenus|HISTDB_H2A_Q_12|H2A.Q organism=Odobenus rosmarus divergens phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSGKRSGRSSYRLGK---------------QTSSCSTKTKLQFSMSHEEPLLQ +ENHPAQKLSFSRQVCLSAILKYVATNILELV--GNKAHHNC-RVQ-------TAMDHDMQ +ISHLF--EDDTT-----------------SQ---------VSEMF--------------- +------------------------------------------------------------ +---------- +>Okapia|HISTDB_H2A_Q_9|H2A.Q organism=Okapia johnstoni phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSGNRSYHSSDGLKK---------------HTLPCTMKTKLQFSKSHVDHLLR +ENQAAQNQNLTTQIFLSAILKYTSSNILKLV--SNETQSNC-RIH-------RAMDNPTQ +LHHVF--DNGAK-----------------SQ---------VHEIF--------------- +------------------------------------------------------------ +---------- +>Ovis|HISTDB_H2A_Q_8|H2A.Q organism=Ovis aries musimon phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSGNRSYHSSDGFRK---------------HILPCTMKTKLQFSKSHVDHHLQ +ENQDAQNQNLTNQIYLSAILKYLSSNVLKLV--SNETQSDC-RIH-------KAMDNHPQ +LHHAF--DKDAK-----------------SQ---------VHEM---------------- +------------------------------------------------------------ +---------- +>Pan|HISTDB_H2A_Q_4|H2A.Q organism=Pan paniscus phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MFWKRRQRSSYRCRN---------------QTFSYSIRAKQQFLLSCVHCLLW +KNHCPRPE-------LVHYFQILGGNILELM--GNKVHKNY-RMHITPKYVERVVDNNPL +LSHLF--EGDTN-----------------SG----------------------------- +------------------------------------------------------------ +---------- +>Pan|HISTDB_H2A_Q_15|H2A.Q organism=Pan troglodytes phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MFWKRRQRSSYRCRN---------------QTFSYSIRAKQQFLLSCVHCLLW +KNHCPRPE-------LVHYFQILGGNILELM--GNKVHKNY-RMHITPKYVERVVDNNPL +LSHLF--EGDRN-----------------SG----------------------------- +------------------------------------------------------------ +---------- +>Pantholops|HISTDB_H2A_Q_1|H2A.Q organism=Pantholops hodgsonii phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSGNRSYHSSDGFRK---------------HILPCTMKTKLQFSKSHVDHHLQ +ENQDAQNQNLTNQIYLSAILKYLSSNVLKLV--SNETQSNC-RIH-------KAMDNHPQ +LHHVF--DKDAK-----------------SQ---------VHEM---------------- +------------------------------------------------------------ +---------- +>Papio|HISTDB_H2A_Q_18|H2A.Q organism=Papio anubis phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MFWKRRQRRSYRCRN---------------QTFSYSIKAKQQFPLSCVHCLLW +KNHCPRQE-------LVHYFQILGGNILELM--GNKVHKNY-RMHITPKYVERAVDNNSL +ISHLF--EGDTN------------------------------------------------ +------------------------------------------------------------ +---------- +>Pongo|HISTDB_H2A_Q_19|H2A.Q organism=Pongo pygmaeus abelii phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MFWKRRQRSSYRCRN---------------QTFSYSIRAKQQFPLCCVHCLLW +KDHCPRPE-------LVHYFQILGGNILELM--GNKVHKNY-RMHITPKYVERVVDNNPL +LSHLL------------------------------------------------------- +------------------------------------------------------------ +---------- +>Pteropus|ELK02218.1|H2A.Q organism=Pteropus alecto phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSGKRSCLN-YKLKK---------------QTLSCTTKTKLHLPVSHEDSLLQ +GNNSARNLRFSTQVFLSVILKYVTTNILELV--DNEAHNNC-CVQ-------RAVDKNPQ +LGQHF--KNGNN-----------------SQ---------VDEMF--------------- +------------------------------------------------------------ +---------- +>Sus|HISTDB_H2A_Q_20|H2A.Q organism=Sus scrofa phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSANRSCQNSCGLRK---------------HTLSRSTKTKLQFPESQGGPLLQ +ENHSAQHLNLSTRVFLSAILKYVTANVLELV--GNETPSNC-RIQ-------RTVGNNPQ +LSQLF--ENETD-----------------PQ---------VREMF--------------- +------------------------------------------------------------ +---------- +>Ursus|HISTDB_H2A_Q_10|H2A.Q organism=Ursus maritimus phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSGKRSGPSSYRLGK---------------QTSSCFPKSKLQFPVSHEEPLLQ +EDHPAQQLSLSSQVCLSAILKYVATNILELV--GNEAQHNC-RVQ-------RAVNNNMQ +SGHLF--KDDAA-----------------SQ---------VSEMF--------------- +------------------------------------------------------------ +---------- +>Vicugna|XP_015107649.1|H2A.Q organism=Vicugna pacos phylum=Chordata class=Mammalia +------------------------------------------------------------ +-------MSGNRSCQSSCWLRK---------------QTFSCSTKNKPLFPKSHADHLLQ +ENHFAQQLNLSTQVFLSAILKYVTSNVLEVV--GNKSHSNC-RIQ-------KAADNDLQ +LSHLF--EEDTN-----------------SQ---------ARETF--------------- +------------------------------------------------------------ +---------- diff --git a/CURATED_SET/draft_seeds/short_H2A_only.fasta b/CURATED_SET/draft_seeds/short_H2A_only.fasta new file mode 100644 index 0000000..e69de29 diff --git a/CURATED_SET/draft_seeds/sperm_H2B_(Echinoidea).fasta b/CURATED_SET/draft_seeds/sperm_H2B_(Echinoidea).fasta new file mode 100644 index 0000000..78f2f87 --- /dev/null +++ b/CURATED_SET/draft_seeds/sperm_H2B_(Echinoidea).fasta @@ -0,0 +1,28 @@ +>Parechinus|P02290.2|sperm_H2B_(Echinoidea) organism=Parechinus angulosus phylum=Echinodermata class=Echinoidea +---MPSQKSPTKRSPTKRS-----PTKRSPQKGGKGGKGAKRGGKAGKRRRGVQVKRRRR +RRESYGIYIYKVLKQVHPDTGISSRAMSVMNSFVNDVFERIAAEAGRLTTYNRRSTVSSR +EVQTAVRLLLPGELAKHAVSEGTKAVTKYTTSR- +>Parechinus|P02291.2|sperm_H2B_(Echinoidea) organism=Parechinus angulosus phylum=Echinodermata class=Echinoidea +MPRSPAKTSPRKGSPRKGS-----PSRKASPK--RGGKGAKRAGKGGRRRR--VVKRRRR +RRESYGIYIYKVLKQVHPDTGISSRAMSVMNSFVNDVFERIAGEASRLTSANRRSTVSSR +EIQTAVRLLLPGELAKHAVSEGTKAVTKYTTSR- +>Parechinus|P02292.2|sperm_H2B_(Echinoidea) organism=Parechinus angulosus phylum=Echinodermata class=Echinoidea +MPRSPAKTSPRKGSPRKGSPRKGSPSRKASPK--RGGKGAKRAGKGGRRRR--VVKRRRR +RRESYGIYIYKVLKQVHPDTGISSRAMSVMNSFVNDVFERIASEASRLTSANRRSTVSSR +EIQTAVRLLLPGELAKHAVSEGTKAVTKYTTSR- +>Psammechinus|Q27749.3|sperm_H2B_(Echinoidea) organism=Psammechinus miliaris phylum=Echinodermata class=Echinoidea +---MPSQKSPTKRSPTKRS-----PQ--------KGGKGAKRGGKAGKRRRGVAVKRRRR +RRESYGIYIYKVLKQVHPDTGISSRAMSVMNSFVNDVFERIASEAGRLTTYNRRNTVSSR +EVQTAVRLLLPGELAKHAVSEGTKAVTKYTTSR- +>Psammechinus|Q27750.3|sperm_H2B_(Echinoidea) organism=Psammechinus miliaris phylum=Echinodermata class=Echinoidea +MPKSPSKSSPRKGSPRKGS-----PRKGSPK---RGGKGAKRAGKGGRRN---VVKRRRR +RRESYGIYIYKVLKQVHPDTGISSRGMSVMNSFVNDVFERIAGEASRLTSANRRSTISSR +EIQTAVRLLLPGELAKHAVSEGTKAVTKYTTARR +>Strongylocentrotus|NP_999706.1|sperm_H2B_(Echinoidea) organism=Strongylocentrotus purpuratus phylum=Echinodermata class=Echinoidea +---MPSQRSPTKRSPTKRS-----PQKGAG----KGGKGSKRGGKA-RRRGGAAVRRRRR +RRESYGIYIYKVLKQVHPDTGISSRGMSIMNSFVNDVFERVAAEASRLTKYNRRSTVSSR +EIQTAVRLLLPGELAKHAVSEGTKAVTKYTTSR- +>Strongylocentrotus|NP_999721.1|sperm_H2B_(Echinoidea) organism=Strongylocentrotus purpuratus phylum=Echinodermata class=Echinoidea +MPRSPSKTSPRKGSPRRGS-----PSRKASPK--RGGKGAKRAGKGGRRRN--VVRRRRR +RRESYGIYIYKVLKQVHPDTGISSRGMSVMNSFVNDIFGRIAGEASRLTRANRRSTISSR +EIQTAVRLLLPGELAKHAVSEGTKAVTKYTTSR-