diff --git a/CURATED_SET/classification.json b/CURATED_SET/classification.json index 4236031..4216945 100644 --- a/CURATED_SET/classification.json +++ b/CURATED_SET/classification.json @@ -271,9 +271,9 @@ "H3.5_(Hominidae)": { "H3.5_(Homo_sapiens)": "null" }, - "H3.6_(Mammals?)?": "null", - "H3.7_(Mammals?)?": "null", - "H3.8_(Mammals?)?": "null", + "H3.6_(Homo_sapiens)": "null", + "H3.7_(Homo_sapiens)": "null", + "H3.8_(Homo_sapiens)": "null", "H3.B_(Giardia?)": "null", "H3.P_(Moneuplotes?)": "null", "H3.V_(Trypanosomatidae)": "null" @@ -4892,10 +4892,10 @@ "alternate_names": [], "publications": [] }, - "H3.6_(Mammals?)?": { + "H3.6_(Homo_sapiens)": { "level": "variant_group", "description": { - "summary": "null", + "summary": "H3.6_(Homo_sapiens) is a human histone variant, similar to histone variant H3.3 in amino acid sequence, encoded by the H3F3AP6 gene. Expression levels of H3F3AP6 are extremely low, as compared to H3F3B gene (encodes histone H3.3) [taguchi_crystal_2017]. H3.6 nucleosomes are substantially unstable due to the Val62 residue, which weakens interactions with H4 [taguchi_crystal_2017].", "taxonomy": "null", "genes": "null", "evolution": "null", @@ -4905,7 +4905,7 @@ "sequence": "null", "localization": "null", "deposition": "null", - "structure": "null", + "structure": "H3.6 nucleosomes are substantially unstable due to the Val62 residue, which weakens interactions with H4 [taguchi_crystal_2017].", "interactions": "null", "disease": "null", "caveats": "null" @@ -4913,12 +4913,14 @@ "taxonomic_span": "null", "taxonomic_span_id": "null", "alternate_names": [], - "publications": [] + "publications": [ + "taguchi_crystal_2017" + ] }, - "H3.7_(Mammals?)?": { + "H3.7_(Homo_sapiens)": { "level": "variant_group", "description": { - "summary": "null", + "summary": "H3.7_(Homo_sapiens) is a human histone variant, similar to histone variant H3.1 in amino acid sequence, encoded by the HIST2H3PS2 gene. Expression levels of HIST2H3PS2 are extremely low, as compared to H3F3B gene (encodes histone H3.3) [taguchi_crystal_2017]. In vitro results showed that H3.7 failed to form nucleosomes [taguchi_crystal_2017].", "taxonomy": "null", "genes": "null", "evolution": "null", @@ -4928,7 +4930,7 @@ "sequence": "null", "localization": "null", "deposition": "null", - "structure": "null", + "structure": "In vitro results showed that H3.7 failed to form nucleosomes [taguchi_crystal_2017].", "interactions": "null", "disease": "null", "caveats": "null" @@ -4936,12 +4938,14 @@ "taxonomic_span": "null", "taxonomic_span_id": "null", "alternate_names": [], - "publications": [] + "publications": [ + "taguchi_crystal_2017" + ] }, - "H3.8_(Mammals?)?": { + "H3.8_(Homo_sapiens)": { "level": "variant_group", "description": { - "summary": "null", + "summary": "H3.8_(Homo_sapiens) is a human histone variant, similar to histone variant H3.3 in amino acid sequence, encoded by the H3F3AP5 gene. Expression levels of H3F3AP5 are extremely low, as compared to H3F3B gene (encodes histone H3.3) [taguchi_crystal_2017]. H3.8 nucleosomes are extremely unstable [taguchi_crystal_2017].", "taxonomy": "null", "genes": "null", "evolution": "null", @@ -4951,7 +4955,7 @@ "sequence": "null", "localization": "null", "deposition": "null", - "structure": "null", + "structure": "H3.8 nucleosomes are extremely unstable [taguchi_crystal_2017].", "interactions": "null", "disease": "null", "caveats": "null" @@ -4959,7 +4963,9 @@ "taxonomic_span": "null", "taxonomic_span_id": "null", "alternate_names": [], - "publications": [] + "publications": [ + "taguchi_crystal_2017" + ] }, "H3.B_(Giardia?)": { "level": "variant_group", diff --git a/CURATED_SET/curated_service/curatedDB/UPD_curatedDB_241018.ipynb b/CURATED_SET/curated_service/curatedDB/UPD_curatedDB_241018.ipynb index 62ba2c5..3e6e6a6 100644 --- a/CURATED_SET/curated_service/curatedDB/UPD_curatedDB_241018.ipynb +++ b/CURATED_SET/curated_service/curatedDB/UPD_curatedDB_241018.ipynb @@ -484,7 +484,6 @@ "cell_type": "markdown", "id": "42fed33b-1181-4f30-917a-d835b96d24b9", "metadata": { - "jp-MarkdownHeadingCollapsed": true, "tags": [] }, "source": [ diff --git a/CURATED_SET/curated_service/curatedDB/UPD_curatedDB_241113.ipynb b/CURATED_SET/curated_service/curatedDB/UPD_curatedDB_241113.ipynb index da32cba..cce6845 100644 --- a/CURATED_SET/curated_service/curatedDB/UPD_curatedDB_241113.ipynb +++ b/CURATED_SET/curated_service/curatedDB/UPD_curatedDB_241113.ipynb @@ -2364,7 +2364,10 @@ { "cell_type": "markdown", "id": "e8fd8805-c640-415d-94a0-da21a6e9ccf3", - "metadata": {}, + "metadata": { + "jp-MarkdownHeadingCollapsed": true, + "tags": [] + }, "source": [ "# Delete GBG59214.1 and GBG60584.1\n", "\n", diff --git a/CURATED_SET/curated_service/curatedDB/UPD_curatedDB_241119.ipynb b/CURATED_SET/curated_service/curatedDB/UPD_curatedDB_241119.ipynb new file mode 100644 index 0000000..c2b2194 --- /dev/null +++ b/CURATED_SET/curated_service/curatedDB/UPD_curatedDB_241119.ipynb @@ -0,0 +1,2501 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "fb32231a-293e-45b8-af7d-d996c91a4edd", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from Bio import Entrez, SeqIO\n", + "from mysql.connector import connection\n", + "from sshtunnel import SSHTunnelForwarder\n", + "\n", + "Entrez.email = \"l.singh@intbio.org\"" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "d5fa78a7-4901-4e63-9d91-d9ed584d2f62", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "with open(\"db_curated_server_info.txt\", \"r\") as file:\n", + " lines = file.readlines()\n", + "\n", + "config = {}\n", + "\n", + "for line in lines:\n", + " line = line.strip()\n", + " if line and not line.startswith(\"#\"):\n", + " key, value = line.split(\"=\", 1)\n", + " config[key] = value.strip()\n", + "\n", + "server_name = config.get(\"server_name\")\n", + "srever_port = int(config.get(\"srever_port\"))\n", + "ssh_password = config.get(\"ssh_password\")\n", + "ssh_username = config.get(\"ssh_username\")\n", + "db_adress = config.get(\"db_adress\")\n", + "db_port = int(config.get(\"db_port\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "ebe3d2e8-9736-4fd9-b3f1-5ec2d255567d", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "33727\n" + ] + } + ], + "source": [ + "tunnel = SSHTunnelForwarder(\n", + " (server_name, srever_port),\n", + " ssh_password=ssh_password,\n", + " ssh_username=ssh_username,\n", + " remote_bind_address=(db_adress, db_port),\n", + ")\n", + "tunnel.start()\n", + "print(tunnel.local_bind_port)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "0de93630-7502-40f3-ad24-42edf7a11eef", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "conn = connection.MySQLConnection(\n", + " user=\"db_user\",\n", + " password=\"db_password\",\n", + " host=\"localhost\",\n", + " port=tunnel.local_bind_port,\n", + " database=\"db_name\",\n", + ")\n", + "cursor = conn.cursor()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "b4767ba0-4ea1-43ae-953f-f2575d9b7a69", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[('alternative_name',),\n", + " ('histone',),\n", + " ('histone_description',),\n", + " ('histone_has_publication',),\n", + " ('publication',),\n", + " ('sequence',),\n", + " ('sequence_has_publication',)]" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "query = \"SHOW TABLES;\"\n", + "cursor.execute(query)\n", + "cursor.fetchall()" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "c68d4b8f-c22c-4f8e-bb4f-71f95668656e", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# add_histone = (\n", + "# \"INSERT INTO histone \"\n", + "# \"(id, level, taxonomic_span, taxonomic_span_id, description, parent) \"\n", + "# \"VALUES (%(id)s, %(level)s, %(taxonomic_span)s, %(taxonomic_span_id)s, %(description)s, %(parent)s)\"\n", + "# )\n", + "# add_histone_description = (\n", + "# \"INSERT INTO histone_description \"\n", + "# \"(summary, taxonomy, genes, evolution, expression, knock_out, function, sequence, localization, deposition, structure, interactions, disease, caveats) \"\n", + "# \"VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)\"\n", + "# )\n", + "add_publication = (\n", + " \"INSERT INTO publication \"\n", + " \"(id, title, doi, author, year) \"\n", + " \"VALUES (%(id)s, %(title)s, %(doi)s, %(author)s, %(year)s)\"\n", + ")\n", + "# add_sequence_has_publication = (\n", + "# \"INSERT INTO sequence_has_publication \"\n", + "# \"(sequence_accession, publication_id) \"\n", + "# \"VALUES (%s, %s)\"\n", + "# )\n", + "# add_alternate_names = (\n", + "# \"INSERT INTO alternative_name \"\n", + "# \"(name, taxonomy, gene, splice, histone) \"\n", + "# \"VALUES (%(name)s, %(taxonomy)s, %(gene)s, %(splice)s, %(histone)s)\"\n", + "# )\n", + "add_histone_has_publication = (\n", + " \"INSERT INTO histone_has_publication \"\n", + " \"(histone_id, publication_id) \"\n", + " \"VALUES (%s, %s)\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "f0e30842-5f18-4d84-9f5d-50d87f0a90fa", + "metadata": { + "jp-MarkdownHeadingCollapsed": true, + "tags": [] + }, + "source": [ + "# Update H3.6_(Homo_sapiens)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "e1f735c6-e34f-4475-b74f-667e3ef6dd57", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idleveltaxonomic_spantaxonomic_span_iddescriptionparenthistone_idpublication_id
362H3.6_(Mammals?)?variant_groupnullnull53H3NoneNone
\n", + "
" + ], + "text/plain": [ + " id level taxonomic_span taxonomic_span_id \\\n", + "362 H3.6_(Mammals?)? variant_group null null \n", + "\n", + " description parent histone_id publication_id \n", + "362 53 H3 None None " + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "query = (\n", + " \"SELECT * FROM histone h LEFT JOIN histone_has_publication hp \"\n", + " \"ON h.id = hp.histone_id\"\n", + ")\n", + "cursor.execute(query)\n", + "histone_df = pd.DataFrame(cursor.fetchall(), columns=[i[0] for i in cursor.description])\n", + "histone_df[histone_df[\"id\"].str.contains(\"H3.6\")]" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "85bd02ff-d7d2-4054-9294-da04762689c2", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "query = (\n", + " f\"UPDATE histone SET id='H3.6_(Homo_sapiens)' \"\n", + " f\"WHERE id='H3.6_(Mammals?)?'\"\n", + ")\n", + "cursor.execute(query)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "46efc2e0-ef3a-4ae4-ba72-eaf0e3cf5302", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idleveltaxonomic_spantaxonomic_span_iddescriptionparenthistone_idpublication_id
362H3.6_(Homo_sapiens)variant_groupnullnull53H3NoneNone
\n", + "
" + ], + "text/plain": [ + " id level taxonomic_span taxonomic_span_id \\\n", + "362 H3.6_(Homo_sapiens) variant_group null null \n", + "\n", + " description parent histone_id publication_id \n", + "362 53 H3 None None " + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "query = (\n", + " \"SELECT * FROM histone h LEFT JOIN histone_has_publication hp \"\n", + " \"ON h.id = hp.histone_id\"\n", + ")\n", + "cursor.execute(query)\n", + "histone_df = pd.DataFrame(cursor.fetchall(), columns=[i[0] for i in cursor.description])\n", + "histone_df[histone_df[\"id\"].str.contains(\"H3.6\")]" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "653b18e7-c10e-4d13-918d-6207bd05b130", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idleveltaxonomic_spantaxonomic_span_iddescriptionparenthistone_idpublication_id
0Archaealtypenullnull1NoneNoneNone
1cenH3_(Animals)variantHomo sapiens960694cenH3_(Eukarya)NoneNone
2cenH3_(Eukarya)variant_groupnullnull46H3NoneNone
3cenH3_(Fungi)variantHomo sapiens960695cenH3_(Eukarya)NoneNone
4cenH3_(Homo_sapiens)variantHomo sapiens9606158cenH3_(Mammalia)NoneNone
...........................
401TS H3.10variantnullnull139H3.3-like_(Plants)NoneNone
402TS_H1.6variant_groupMammalia4067419H1TS_H1.622650316
403TS_H1.7variant_groupMammalia4067420H1TS_H1.722650316
404TS_H1.9variant_groupMammalia4067421H1TS_H1.922650316
405Viraltypenullnull7NoneNoneNone
\n", + "

406 rows × 8 columns

\n", + "
" + ], + "text/plain": [ + " id level taxonomic_span taxonomic_span_id \\\n", + "0 Archaeal type null null \n", + "1 cenH3_(Animals) variant Homo sapiens 9606 \n", + "2 cenH3_(Eukarya) variant_group null null \n", + "3 cenH3_(Fungi) variant Homo sapiens 9606 \n", + "4 cenH3_(Homo_sapiens) variant Homo sapiens 9606 \n", + ".. ... ... ... ... \n", + "401 TS H3.10 variant null null \n", + "402 TS_H1.6 variant_group Mammalia 40674 \n", + "403 TS_H1.7 variant_group Mammalia 40674 \n", + "404 TS_H1.9 variant_group Mammalia 40674 \n", + "405 Viral type null null \n", + "\n", + " description parent histone_id publication_id \n", + "0 1 None None None \n", + "1 94 cenH3_(Eukarya) None None \n", + "2 46 H3 None None \n", + "3 95 cenH3_(Eukarya) None None \n", + "4 158 cenH3_(Mammalia) None None \n", + ".. ... ... ... ... \n", + "401 139 H3.3-like_(Plants) None None \n", + "402 19 H1 TS_H1.6 22650316 \n", + "403 20 H1 TS_H1.7 22650316 \n", + "404 21 H1 TS_H1.9 22650316 \n", + "405 7 None None None \n", + "\n", + "[406 rows x 8 columns]" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "histone_df" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "730f42c9-6611-4baf-b5dc-0b7454b5aef2", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Make sure data is committed to the database\n", + "conn.commit()" + ] + }, + { + "cell_type": "markdown", + "id": "94ee9d13-fc3a-4993-bfa1-201a7e0973f5", + "metadata": {}, + "source": [ + "## Update decription of H3.6_(Homo_sapiens) (id=53)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "bfc9d752-104e-4acd-b2e3-e342f6c95de9", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['id',\n", + " 'summary',\n", + " 'taxonomy',\n", + " 'genes',\n", + " 'evolution',\n", + " 'expression',\n", + " 'knock_out',\n", + " 'function',\n", + " 'sequence',\n", + " 'localization',\n", + " 'deposition',\n", + " 'structure',\n", + " 'interactions',\n", + " 'disease',\n", + " 'caveats']" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "query = \"SELECT * FROM histone_description\"\n", + "cursor.execute(query)\n", + "cursor.fetchall()\n", + "[i[0] for i in cursor.description]" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "48f3c303-529c-45ea-a1e2-c3441f5f3155", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "summary = \"H3.6_(Homo_sapiens) is a human histone variant, similar to histone variant H3.3 in amino acid sequence, encoded by the H3F3AP6 gene. Expression levels of H3F3AP6 are extremely low, as compared to H3F3B gene (encodes histone H3.3) [taguchi_crystal_2017]. H3.6 nucleosomes are substantially unstable due to the Val62 residue, which weakens interactions with H4 [taguchi_crystal_2017].\"\n", + "structure = \"H3.6 nucleosomes are substantially unstable due to the Val62 residue, which weakens interactions with H4 [taguchi_crystal_2017].\"" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "69c753ec-1aa0-48e0-9660-fa84ec13876a", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "query = (\n", + " f\"UPDATE histone_description SET summary='{summary}', structure='{structure}' \"\n", + " f\"WHERE id=53\"\n", + ")\n", + "cursor.execute(query)" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "70064222-b3a1-4023-b75a-4f3d2d175f08", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idleveltaxonomic_spantaxonomic_span_iddescriptionparentsummarytaxonomygenesevolutionexpressionknock_outfunctionsequencelocalizationdepositionstructureinteractionsdiseasecaveats
172H3.6_(Homo_sapiens)variant_groupnullnull53H3H3.6_(Homo_sapiens) is a human histone variant...nullnullnullnullnullnullnullnullnullH3.6 nucleosomes are substantially unstable du...nullnullnull
\n", + "
" + ], + "text/plain": [ + " id level taxonomic_span taxonomic_span_id \\\n", + "172 H3.6_(Homo_sapiens) variant_group null null \n", + "\n", + " description parent summary \\\n", + "172 53 H3 H3.6_(Homo_sapiens) is a human histone variant... \n", + "\n", + " taxonomy genes evolution expression knock_out function sequence \\\n", + "172 null null null null null null null \n", + "\n", + " localization deposition \\\n", + "172 null null \n", + "\n", + " structure interactions disease \\\n", + "172 H3.6 nucleosomes are substantially unstable du... null null \n", + "\n", + " caveats \n", + "172 null " + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "query = (\n", + " \"SELECT h.*, d.summary, d.taxonomy, d.genes, d.evolution, d.expression, d.knock_out, d.function, d.sequence, d.localization, d.deposition, d.structure, d.interactions, d.disease, d.caveats \"\n", + " \"FROM histone h LEFT JOIN histone_description d \"\n", + " \"ON h.description = d.id\"\n", + ")\n", + "cursor.execute(query)\n", + "histone_df = pd.DataFrame(cursor.fetchall(), columns=[i[0] for i in cursor.description])\n", + "histone_df[histone_df[\"id\"].str.contains(\"H3.6\")]" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "79fe2ee4-02e6-4db1-ab89-82c5a61cc1da", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Make sure data is committed to the database\n", + "conn.commit()" + ] + }, + { + "cell_type": "markdown", + "id": "9e2f15d7-fea0-4359-8c2b-2df85abe983f", + "metadata": {}, + "source": [ + "## Update publications of H3.6_(Homo_sapiens)" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "6acf6d9d-41cf-4c13-aa21-ae10b769fd12", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
histone_idpublication_id
\n", + "
" + ], + "text/plain": [ + "Empty DataFrame\n", + "Columns: [histone_id, publication_id]\n", + "Index: []" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "query = \"SELECT * FROM histone_has_publication\"\n", + "cursor.execute(query)\n", + "histone_df = pd.DataFrame(cursor.fetchall(), columns=[i[0] for i in cursor.description])\n", + "histone_df[histone_df[\"histone_id\"]==\"H3.6_(Homo_sapiens)\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "348603a9-dc6f-46e3-a9d1-6b1d0a7311ca", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idtitledoiauthoryear
\n", + "
" + ], + "text/plain": [ + "Empty DataFrame\n", + "Columns: [id, title, doi, author, year]\n", + "Index: []" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "query = \"SELECT * FROM publication WHERE id='taguchi_crystal_2017'\"\n", + "cursor.execute(query)\n", + "pd.DataFrame(cursor.fetchall(), columns=[i[0] for i in cursor.description])" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "9163251e-e672-4434-b1cf-17ef62e8a372", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "data_publication = {\n", + " \"id\": \"taguchi_crystal_2017\",\n", + " \"title\": None,\n", + " \"doi\": None,\n", + " \"author\": None,\n", + " \"year\": None,\n", + "}\n", + "cursor.execute(add_publication, data_publication)" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "6d44338e-0739-435c-ba96-e3b8c981c9d6", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idtitledoiauthoryear
0taguchi_crystal_2017NoneNoneNoneNone
\n", + "
" + ], + "text/plain": [ + " id title doi author year\n", + "0 taguchi_crystal_2017 None None None None" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "query = \"SELECT * FROM publication WHERE id='taguchi_crystal_2017'\"\n", + "cursor.execute(query)\n", + "pd.DataFrame(cursor.fetchall(), columns=[i[0] for i in cursor.description])" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "id": "8e543a1a-4a6d-453b-ade9-9931196ea249", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "cursor.execute(add_histone_has_publication, (\"H3.6_(Homo_sapiens)\", \"taguchi_crystal_2017\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "id": "641f0278-876e-4947-9b6f-42b6676e7705", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
histone_idpublication_id
286H3.6_(Homo_sapiens)taguchi_crystal_2017
\n", + "
" + ], + "text/plain": [ + " histone_id publication_id\n", + "286 H3.6_(Homo_sapiens) taguchi_crystal_2017" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "query = \"SELECT * FROM histone_has_publication\"\n", + "cursor.execute(query)\n", + "histone_df = pd.DataFrame(cursor.fetchall(), columns=[i[0] for i in cursor.description])\n", + "histone_df[histone_df[\"histone_id\"]==\"H3.6_(Homo_sapiens)\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "id": "c89356b7-55a0-4006-be22-7661d22c2ae2", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Make sure data is committed to the database\n", + "conn.commit()" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "id": "915051ec-ae89-4b16-9e45-0f2b1c3a2af9", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idleveltaxonomic_spantaxonomic_span_iddescriptionparenthistone_idpublication_id
362H3.6_(Homo_sapiens)variant_groupnullnull53H3H3.6_(Homo_sapiens)taguchi_crystal_2017
\n", + "
" + ], + "text/plain": [ + " id level taxonomic_span taxonomic_span_id \\\n", + "362 H3.6_(Homo_sapiens) variant_group null null \n", + "\n", + " description parent histone_id publication_id \n", + "362 53 H3 H3.6_(Homo_sapiens) taguchi_crystal_2017 " + ] + }, + "execution_count": 44, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "query = (\n", + " \"SELECT * FROM histone h LEFT JOIN histone_has_publication hp \"\n", + " \"ON h.id = hp.histone_id\"\n", + ")\n", + "cursor.execute(query)\n", + "histone_df = pd.DataFrame(cursor.fetchall(), columns=[i[0] for i in cursor.description])\n", + "histone_df[histone_df[\"id\"].str.contains(\"H3.6\")]" + ] + }, + { + "cell_type": "markdown", + "id": "b4068afa-67e4-47b3-ab53-31cafb6dbc5f", + "metadata": { + "jp-MarkdownHeadingCollapsed": true, + "tags": [] + }, + "source": [ + "# Update H3.7_(Homo_sapiens)" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "id": "2e0c23f9-9922-4249-afa6-2f38887b1031", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idleveltaxonomic_spantaxonomic_span_iddescriptionparenthistone_idpublication_id
363H3.7_(Mammals?)?variant_groupnullnull54H3NoneNone
\n", + "
" + ], + "text/plain": [ + " id level taxonomic_span taxonomic_span_id \\\n", + "363 H3.7_(Mammals?)? variant_group null null \n", + "\n", + " description parent histone_id publication_id \n", + "363 54 H3 None None " + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "query = (\n", + " \"SELECT * FROM histone h LEFT JOIN histone_has_publication hp \"\n", + " \"ON h.id = hp.histone_id\"\n", + ")\n", + "cursor.execute(query)\n", + "histone_df = pd.DataFrame(cursor.fetchall(), columns=[i[0] for i in cursor.description])\n", + "histone_df[histone_df[\"id\"].str.contains(\"H3.7\")]" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "id": "fa155850-f2e8-4e0c-bf65-c4a3fe109fec", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "query = (\n", + " f\"UPDATE histone SET id='H3.7_(Homo_sapiens)' \"\n", + " f\"WHERE id='H3.7_(Mammals?)?'\"\n", + ")\n", + "cursor.execute(query)" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "id": "ce62ff9f-b94a-4c05-972a-f4165cb76f64", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idleveltaxonomic_spantaxonomic_span_iddescriptionparenthistone_idpublication_id
363H3.7_(Homo_sapiens)variant_groupnullnull54H3NoneNone
\n", + "
" + ], + "text/plain": [ + " id level taxonomic_span taxonomic_span_id \\\n", + "363 H3.7_(Homo_sapiens) variant_group null null \n", + "\n", + " description parent histone_id publication_id \n", + "363 54 H3 None None " + ] + }, + "execution_count": 47, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "query = (\n", + " \"SELECT * FROM histone h LEFT JOIN histone_has_publication hp \"\n", + " \"ON h.id = hp.histone_id\"\n", + ")\n", + "cursor.execute(query)\n", + "histone_df = pd.DataFrame(cursor.fetchall(), columns=[i[0] for i in cursor.description])\n", + "histone_df[histone_df[\"id\"].str.contains(\"H3.7\")]" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "id": "1a135b4c-a8a5-41d1-8197-b4b2d9a79bfd", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Make sure data is committed to the database\n", + "conn.commit()" + ] + }, + { + "cell_type": "markdown", + "id": "2d124abe-c066-4a3b-b191-91369714ca5e", + "metadata": {}, + "source": [ + "## Update decription of H3.7_(Homo_sapiens) (id=54)" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "id": "648e788b-a135-448c-987a-9171823c48e1", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "summary = \"H3.7_(Homo_sapiens) is a human histone variant, similar to histone variant H3.1 in amino acid sequence, encoded by the HIST2H3PS2 gene. Expression levels of HIST2H3PS2 are extremely low, as compared to H3F3B gene (encodes histone H3.3) [taguchi_crystal_2017]. In vitro results showed that H3.7 failed to form nucleosomes [taguchi_crystal_2017].\"\n", + "structure = \"In vitro results showed that H3.7 failed to form nucleosomes [taguchi_crystal_2017].\"" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "id": "496e1527-b47e-4aac-8a44-94bbea5ba927", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "query = (\n", + " f\"UPDATE histone_description SET summary='{summary}', structure='{structure}' \"\n", + " f\"WHERE id=54\"\n", + ")\n", + "cursor.execute(query)" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "id": "33ad3aeb-598a-43e0-91e3-cdaf30f7b0dd", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idleveltaxonomic_spantaxonomic_span_iddescriptionparentsummarytaxonomygenesevolutionexpressionknock_outfunctionsequencelocalizationdepositionstructureinteractionsdiseasecaveats
173H3.7_(Homo_sapiens)variant_groupnullnull54H3H3.7_(Homo_sapiens) is a human histone variant...nullnullnullnullnullnullnullnullnullIn vitro results showed that H3.7 failed to fo...nullnullnull
\n", + "
" + ], + "text/plain": [ + " id level taxonomic_span taxonomic_span_id \\\n", + "173 H3.7_(Homo_sapiens) variant_group null null \n", + "\n", + " description parent summary \\\n", + "173 54 H3 H3.7_(Homo_sapiens) is a human histone variant... \n", + "\n", + " taxonomy genes evolution expression knock_out function sequence \\\n", + "173 null null null null null null null \n", + "\n", + " localization deposition \\\n", + "173 null null \n", + "\n", + " structure interactions disease \\\n", + "173 In vitro results showed that H3.7 failed to fo... null null \n", + "\n", + " caveats \n", + "173 null " + ] + }, + "execution_count": 51, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "query = (\n", + " \"SELECT h.*, d.summary, d.taxonomy, d.genes, d.evolution, d.expression, d.knock_out, d.function, d.sequence, d.localization, d.deposition, d.structure, d.interactions, d.disease, d.caveats \"\n", + " \"FROM histone h LEFT JOIN histone_description d \"\n", + " \"ON h.description = d.id\"\n", + ")\n", + "cursor.execute(query)\n", + "histone_df = pd.DataFrame(cursor.fetchall(), columns=[i[0] for i in cursor.description])\n", + "histone_df[histone_df[\"id\"].str.contains(\"H3.7\")]" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "id": "d3a6f191-1f25-4429-8676-20b2810ccf0a", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Make sure data is committed to the database\n", + "conn.commit()" + ] + }, + { + "cell_type": "markdown", + "id": "db916bf7-e50c-4fe7-933b-43037ce535f3", + "metadata": {}, + "source": [ + "## Update publications of H3.7_(Homo_sapiens)" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "id": "33f87712-781a-463a-bb6f-3b478a69d80d", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
histone_idpublication_id
\n", + "
" + ], + "text/plain": [ + "Empty DataFrame\n", + "Columns: [histone_id, publication_id]\n", + "Index: []" + ] + }, + "execution_count": 54, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "query = \"SELECT * FROM histone_has_publication\"\n", + "cursor.execute(query)\n", + "histone_df = pd.DataFrame(cursor.fetchall(), columns=[i[0] for i in cursor.description])\n", + "histone_df[histone_df[\"histone_id\"]==\"H3.7_(Homo_sapiens)\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "id": "ea7e05a0-0619-4881-a306-62bab20ab931", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idtitledoiauthoryear
0taguchi_crystal_2017NoneNoneNoneNone
\n", + "
" + ], + "text/plain": [ + " id title doi author year\n", + "0 taguchi_crystal_2017 None None None None" + ] + }, + "execution_count": 55, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "query = \"SELECT * FROM publication WHERE id='taguchi_crystal_2017'\"\n", + "cursor.execute(query)\n", + "pd.DataFrame(cursor.fetchall(), columns=[i[0] for i in cursor.description])" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "id": "148c9cab-7aad-4f19-9b37-956fd5994622", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "cursor.execute(add_histone_has_publication, (\"H3.7_(Homo_sapiens)\", \"taguchi_crystal_2017\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "id": "1c59644d-e15a-4b4b-aaf6-c149276d7c0a", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
histone_idpublication_id
287H3.7_(Homo_sapiens)taguchi_crystal_2017
\n", + "
" + ], + "text/plain": [ + " histone_id publication_id\n", + "287 H3.7_(Homo_sapiens) taguchi_crystal_2017" + ] + }, + "execution_count": 57, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "query = \"SELECT * FROM histone_has_publication\"\n", + "cursor.execute(query)\n", + "histone_df = pd.DataFrame(cursor.fetchall(), columns=[i[0] for i in cursor.description])\n", + "histone_df[histone_df[\"histone_id\"]==\"H3.7_(Homo_sapiens)\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "id": "e7b2fe70-9681-4e53-be4c-3a708a3f4fd4", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Make sure data is committed to the database\n", + "conn.commit()" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "id": "dca3cfd2-de03-4208-acdc-1b62d74ac1d3", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idleveltaxonomic_spantaxonomic_span_iddescriptionparenthistone_idpublication_id
363H3.7_(Homo_sapiens)variant_groupnullnull54H3H3.7_(Homo_sapiens)taguchi_crystal_2017
\n", + "
" + ], + "text/plain": [ + " id level taxonomic_span taxonomic_span_id \\\n", + "363 H3.7_(Homo_sapiens) variant_group null null \n", + "\n", + " description parent histone_id publication_id \n", + "363 54 H3 H3.7_(Homo_sapiens) taguchi_crystal_2017 " + ] + }, + "execution_count": 59, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "query = (\n", + " \"SELECT * FROM histone h LEFT JOIN histone_has_publication hp \"\n", + " \"ON h.id = hp.histone_id\"\n", + ")\n", + "cursor.execute(query)\n", + "histone_df = pd.DataFrame(cursor.fetchall(), columns=[i[0] for i in cursor.description])\n", + "histone_df[histone_df[\"id\"].str.contains(\"H3.7\")]" + ] + }, + { + "cell_type": "markdown", + "id": "2a1a364e-39e6-4af2-a724-2b34372fdec0", + "metadata": { + "jp-MarkdownHeadingCollapsed": true, + "tags": [] + }, + "source": [ + "# Update H3.8_(Homo_sapiens)" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "id": "69ee2ba3-418e-4426-8a0e-d7ccfcad57bf", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idleveltaxonomic_spantaxonomic_span_iddescriptionparenthistone_idpublication_id
364H3.8_(Mammals?)?variant_groupnullnull55H3NoneNone
\n", + "
" + ], + "text/plain": [ + " id level taxonomic_span taxonomic_span_id \\\n", + "364 H3.8_(Mammals?)? variant_group null null \n", + "\n", + " description parent histone_id publication_id \n", + "364 55 H3 None None " + ] + }, + "execution_count": 60, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "query = (\n", + " \"SELECT * FROM histone h LEFT JOIN histone_has_publication hp \"\n", + " \"ON h.id = hp.histone_id\"\n", + ")\n", + "cursor.execute(query)\n", + "histone_df = pd.DataFrame(cursor.fetchall(), columns=[i[0] for i in cursor.description])\n", + "histone_df[histone_df[\"id\"].str.contains(\"H3.8\")]" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "id": "7f63a24c-7835-49af-b75a-d804a91b854b", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "query = (\n", + " f\"UPDATE histone SET id='H3.8_(Homo_sapiens)' \"\n", + " f\"WHERE id='H3.8_(Mammals?)?'\"\n", + ")\n", + "cursor.execute(query)" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "id": "183f43e6-1ee8-47f1-a583-194acb2b7226", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idleveltaxonomic_spantaxonomic_span_iddescriptionparenthistone_idpublication_id
364H3.8_(Homo_sapiens)variant_groupnullnull55H3NoneNone
\n", + "
" + ], + "text/plain": [ + " id level taxonomic_span taxonomic_span_id \\\n", + "364 H3.8_(Homo_sapiens) variant_group null null \n", + "\n", + " description parent histone_id publication_id \n", + "364 55 H3 None None " + ] + }, + "execution_count": 62, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "query = (\n", + " \"SELECT * FROM histone h LEFT JOIN histone_has_publication hp \"\n", + " \"ON h.id = hp.histone_id\"\n", + ")\n", + "cursor.execute(query)\n", + "histone_df = pd.DataFrame(cursor.fetchall(), columns=[i[0] for i in cursor.description])\n", + "histone_df[histone_df[\"id\"].str.contains(\"H3.8\")]" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "id": "d5b649ae-cd9a-45ec-b231-bcbac754254e", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Make sure data is committed to the database\n", + "conn.commit()" + ] + }, + { + "cell_type": "markdown", + "id": "c4c00476-b65f-488e-9927-6c0825d9d437", + "metadata": {}, + "source": [ + "## Update decription of H3.8_(Homo_sapiens) (id=55)" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "id": "f8b09bbe-1afd-4f1e-b85d-6eb249400955", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "summary = \"H3.8_(Homo_sapiens) is a human histone variant, similar to histone variant H3.3 in amino acid sequence, encoded by the H3F3AP5 gene. Expression levels of H3F3AP5 are extremely low, as compared to H3F3B gene (encodes histone H3.3) [taguchi_crystal_2017]. H3.8 nucleosomes are extremely unstable [taguchi_crystal_2017].\"\n", + "structure = \"H3.8 nucleosomes are extremely unstable [taguchi_crystal_2017].\"" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "id": "9eb2a9c1-1d90-40e2-acb5-e0eaf127830c", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "query = (\n", + " f\"UPDATE histone_description SET summary='{summary}', structure='{structure}' \"\n", + " f\"WHERE id=55\"\n", + ")\n", + "cursor.execute(query)" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "id": "55830e81-31ac-4e37-b929-013375c427b7", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idleveltaxonomic_spantaxonomic_span_iddescriptionparentsummarytaxonomygenesevolutionexpressionknock_outfunctionsequencelocalizationdepositionstructureinteractionsdiseasecaveats
174H3.8_(Homo_sapiens)variant_groupnullnull55H3H3.8_(Homo_sapiens) is a human histone variant...nullnullnullnullnullnullnullnullnullH3.8 nucleosomes are extremely unstable [taguc...nullnullnull
\n", + "
" + ], + "text/plain": [ + " id level taxonomic_span taxonomic_span_id \\\n", + "174 H3.8_(Homo_sapiens) variant_group null null \n", + "\n", + " description parent summary \\\n", + "174 55 H3 H3.8_(Homo_sapiens) is a human histone variant... \n", + "\n", + " taxonomy genes evolution expression knock_out function sequence \\\n", + "174 null null null null null null null \n", + "\n", + " localization deposition \\\n", + "174 null null \n", + "\n", + " structure interactions disease \\\n", + "174 H3.8 nucleosomes are extremely unstable [taguc... null null \n", + "\n", + " caveats \n", + "174 null " + ] + }, + "execution_count": 66, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "query = (\n", + " \"SELECT h.*, d.summary, d.taxonomy, d.genes, d.evolution, d.expression, d.knock_out, d.function, d.sequence, d.localization, d.deposition, d.structure, d.interactions, d.disease, d.caveats \"\n", + " \"FROM histone h LEFT JOIN histone_description d \"\n", + " \"ON h.description = d.id\"\n", + ")\n", + "cursor.execute(query)\n", + "histone_df = pd.DataFrame(cursor.fetchall(), columns=[i[0] for i in cursor.description])\n", + "histone_df[histone_df[\"id\"].str.contains(\"H3.8\")]" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "id": "3c36d6ff-7d4f-4397-ab7a-5b306bbfe4ac", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Make sure data is committed to the database\n", + "conn.commit()" + ] + }, + { + "cell_type": "markdown", + "id": "22d2b8a0-e159-40d8-b700-71640be03115", + "metadata": {}, + "source": [ + "## Update publications of H3.8_(Homo_sapiens)" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "id": "863417d2-7455-4f25-aadf-bf56d5d10199", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
histone_idpublication_id
\n", + "
" + ], + "text/plain": [ + "Empty DataFrame\n", + "Columns: [histone_id, publication_id]\n", + "Index: []" + ] + }, + "execution_count": 68, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "query = \"SELECT * FROM histone_has_publication\"\n", + "cursor.execute(query)\n", + "histone_df = pd.DataFrame(cursor.fetchall(), columns=[i[0] for i in cursor.description])\n", + "histone_df[histone_df[\"histone_id\"]==\"H3.8_(Homo_sapiens)\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "id": "d664aebf-9d4a-4e88-b257-deced042689c", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idtitledoiauthoryear
0taguchi_crystal_2017NoneNoneNoneNone
\n", + "
" + ], + "text/plain": [ + " id title doi author year\n", + "0 taguchi_crystal_2017 None None None None" + ] + }, + "execution_count": 69, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "query = \"SELECT * FROM publication WHERE id='taguchi_crystal_2017'\"\n", + "cursor.execute(query)\n", + "pd.DataFrame(cursor.fetchall(), columns=[i[0] for i in cursor.description])" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "id": "40ccbd05-7ab9-4160-9a37-fc14743ba700", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "cursor.execute(add_histone_has_publication, (\"H3.8_(Homo_sapiens)\", \"taguchi_crystal_2017\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "id": "2e82aab7-f313-45f0-9c80-545d623afa18", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
histone_idpublication_id
288H3.8_(Homo_sapiens)taguchi_crystal_2017
\n", + "
" + ], + "text/plain": [ + " histone_id publication_id\n", + "288 H3.8_(Homo_sapiens) taguchi_crystal_2017" + ] + }, + "execution_count": 71, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "query = \"SELECT * FROM histone_has_publication\"\n", + "cursor.execute(query)\n", + "histone_df = pd.DataFrame(cursor.fetchall(), columns=[i[0] for i in cursor.description])\n", + "histone_df[histone_df[\"histone_id\"]==\"H3.8_(Homo_sapiens)\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "id": "a0c6e834-4663-4061-b886-3e9683c3d132", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Make sure data is committed to the database\n", + "conn.commit()" + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "id": "929974cc-e775-4bba-bcf9-c6095700ccac", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idleveltaxonomic_spantaxonomic_span_iddescriptionparenthistone_idpublication_id
364H3.8_(Homo_sapiens)variant_groupnullnull55H3H3.8_(Homo_sapiens)taguchi_crystal_2017
\n", + "
" + ], + "text/plain": [ + " id level taxonomic_span taxonomic_span_id \\\n", + "364 H3.8_(Homo_sapiens) variant_group null null \n", + "\n", + " description parent histone_id publication_id \n", + "364 55 H3 H3.8_(Homo_sapiens) taguchi_crystal_2017 " + ] + }, + "execution_count": 73, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "query = (\n", + " \"SELECT * FROM histone h LEFT JOIN histone_has_publication hp \"\n", + " \"ON h.id = hp.histone_id\"\n", + ")\n", + "cursor.execute(query)\n", + "histone_df = pd.DataFrame(cursor.fetchall(), columns=[i[0] for i in cursor.description])\n", + "histone_df[histone_df[\"id\"].str.contains(\"H3.8\")]" + ] + }, + { + "cell_type": "markdown", + "id": "d3c8a219-f508-46e1-936f-7d6040016e39", + "metadata": { + "tags": [] + }, + "source": [ + "# Close connections" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "id": "572059d8-54f9-4ec7-a2c6-1c8066552845", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "cursor.close()\n", + "conn.close()\n", + "tunnel.stop()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "790ee020-09af-4dcc-9c36-f1b3f69ea8a7", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".conda-histdb_env", + "language": "python", + "name": "conda-env-.conda-histdb_env-py" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.1" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/CURATED_SET/curated_service/curatedDB/generate_draft_seeds.ipynb b/CURATED_SET/curated_service/curatedDB/generate_draft_seeds.ipynb index 82a9b67..21abe34 100644 --- a/CURATED_SET/curated_service/curatedDB/generate_draft_seeds.ipynb +++ b/CURATED_SET/curated_service/curatedDB/generate_draft_seeds.ipynb @@ -1994,9 +1994,13 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 24, "id": "08671eff-0312-4d18-86f9-495df45db45f", "metadata": { + "collapsed": true, + "jupyter": { + "outputs_hidden": true + }, "tags": [] }, "outputs": [ @@ -2105,7 +2109,85 @@ "Building H2A.P\n", "Building H2A.Q\n", "Building short_H2A\n", - "Building H2A\n" + "Building H2A\n", + "Building cH2B.10_(Homo_sapiens)\n", + "Building cH2B.11_(Homo_sapiens)\n", + "Building cH2B.12_(Homo_sapiens)\n", + "Building cH2B.13_(Homo_sapiens)\n", + "Building cH2B.14_(Homo_sapiens)\n", + "Building cH2B.15_(Homo_sapiens)\n", + "Building cH2B.1_(Homo_sapiens)\n", + "Building cH2B.2_(Homo_sapiens)\n", + "Building cH2B.3_(Homo_sapiens)\n", + "Building cH2B.4_(Homo_sapiens)\n", + "Building cH2B.5_(Homo_sapiens)\n", + "Building cH2B.6_(Homo_sapiens)\n", + "Building cH2B.7_(Homo_sapiens)\n", + "Building cH2B.8_(Homo_sapiens)\n", + "Building cH2B.9_(Homo_sapiens)\n", + "Building cH2B_(Homo_sapiens)\n", + "Building cH2B.1_(Mus_musculus)\n", + "Building cH2B.E_(Mus_musculus)\n", + "Building cH2B_(Mus_musculus)\n", + "Building cH2B_(Mammalia)\n", + "Building cH2B_(Vertebrata)\n", + "Building cH2B_(Animals)\n", + "Building cH2B_(Fungi)\n", + "Building cH2B_(Chlorophyta)\n", + "Building cH2B_(Embryophyta)\n", + "Building cH2B_(Plants)\n", + "Building cH2B_(Protists)\n", + "Building cH2B\n", + "Building CS_H2B_(Echinoidea)\n", + "Building early_H2B_(Echinoidea)\n", + "Building gH2B\n", + "Building H2B.K_(Homo_sapiens)\n", + "Building H2B.K\n", + "Building H2B.L\n", + "Building H2B.N_(Homo_sapiens)\n", + "Building H2B.N\n", + "Building H2B.O\n", + "Building H2B.S\n", + "Building H2B.V\n", + "Building H2B.W.1_(Homo_sapiens)\n", + "Building H2B.W.2_(Homo_sapiens)\n", + "Building H2B.W_(Homo_sapiens)\n", + "Building H2B.W\n", + "Building H2B.Z\n", + "Building late_H2B_(Echinoidea)\n", + "Building sperm_H2B_(Echinoidea)\n", + "Building H2B\n", + "Building cenH3_(Homo_sapiens)\n", + "Building cenH3_(Mammalia)\n", + "Building cenH3_(Animals)\n", + "Building cenH3_(Fungi)\n", + "Building cenH3_(Plants)\n", + "Building cenH3_(Eukarya)\n", + "Building cH3.1_(Homo_sapiens)\n", + "Building cH3.1_(Mammalia)\n", + "Building cH3.2_(Homo_sapiens)\n", + "Building cH3.2_(Mammalia)\n", + "Building cH3_(Mammalia)\n", + "Building cH3_(Vertebrata)\n", + "Building cH3_(Animals)\n", + "Building cH3\n", + "Building H3.3_(Homo_sapiens)\n", + "Building H3.3_(Animals)\n", + "Building H3.3\n", + "Building H3.Y.1_(Homo_sapiens)\n", + "Building H3.Y.2_(Homo_sapiens)\n", + "Building H3.Y_(Homo_sapiens)\n", + "Building H3.Y_(Primates?)\n", + "Building H3.3-like_(Animals)\n", + "Building H3.3-like?\n", + "Building H3.4_(Homo_sapiens)\n", + "Building H3.4_(Mammalia)\n", + "Building H3.5_(Homo_sapiens)\n", + "Building H3.5_(Hominidae)\n", + "Building H3\n", + "Building cH4_(Homo_sapiens)\n", + "Building cH4\n", + "Building H4\n" ] } ], diff --git a/CURATED_SET/curated_service/curatedDB/save_curatedDB_toFiles.ipynb b/CURATED_SET/curated_service/curatedDB/save_curatedDB_toFiles.ipynb index f6e15c2..19916c7 100644 --- a/CURATED_SET/curated_service/curatedDB/save_curatedDB_toFiles.ipynb +++ b/CURATED_SET/curated_service/curatedDB/save_curatedDB_toFiles.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 16, + "execution_count": 1, "id": "713ac3f3-2233-4f7e-bce8-490dac1a0f26", "metadata": { "tags": [] @@ -19,7 +19,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 2, "id": "b8a89a61-5ae1-4e88-b41f-bd7ba2094d1e", "metadata": {}, "outputs": [], @@ -45,7 +45,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 3, "id": "8f76aa83-7659-4283-b61b-d4771339452a", "metadata": {}, "outputs": [ @@ -53,7 +53,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "43369\n" + "39587\n" ] } ], @@ -70,7 +70,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 4, "id": "194c4be6-c39c-4e6a-bd65-6265aff2b2e3", "metadata": { "tags": [] @@ -89,7 +89,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 5, "id": "55e0025e-1aa7-429d-b742-7992a0807386", "metadata": { "tags": [] @@ -107,7 +107,7 @@ " ('sequence_has_publication',)]" ] }, - "execution_count": 20, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -346,7 +346,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 9, "id": "abb4ded5-1cf2-4c0c-923a-8db5b206832c", "metadata": { "tags": [] @@ -363,7 +363,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 10, "id": "4198ddc0-b173-4a0f-977c-968d14c55438", "metadata": { "tags": [] @@ -383,7 +383,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 11, "id": "4396c1b6-21e4-4b03-ad6d-6e0d13238bb9", "metadata": { "tags": [] @@ -410,7 +410,6 @@ "cell_type": "markdown", "id": "614d13e9-62ee-4154-8e1d-f08ed8ee1473", "metadata": { - "jp-MarkdownHeadingCollapsed": true, "tags": [] }, "source": [ @@ -419,7 +418,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 12, "id": "d88af2ee-a6d0-4459-bf0b-0f848689bd0b", "metadata": { "tags": [] @@ -429,8 +428,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 7min 36s, sys: 8.55 s, total: 7min 45s\n", - "Wall time: 7min 24s\n" + "CPU times: user 7min 40s, sys: 7.11 s, total: 7min 47s\n", + "Wall time: 7min 29s\n" ] } ],