From 9239b85c62f03125cef984d485209a1e96302cb6 Mon Sep 17 00:00:00 2001 From: l-singh-biomsu Date: Thu, 5 Dec 2024 16:33:42 +0000 Subject: [PATCH] Deleted H3.6 and H3.8 --- CURATED_SET/classification.json | 52 - .../curatedDB/UPD_curatedDB_241205.ipynb | 968 ++++++++++++++++++ .../curatedDB/save_curatedDB_toFiles.ipynb | 6 +- 3 files changed, 971 insertions(+), 55 deletions(-) create mode 100644 CURATED_SET/curated_service/curatedDB/UPD_curatedDB_241205.ipynb diff --git a/CURATED_SET/classification.json b/CURATED_SET/classification.json index 0e153fc..4044452 100644 --- a/CURATED_SET/classification.json +++ b/CURATED_SET/classification.json @@ -271,9 +271,7 @@ "H3.5_(Hominidae)": { "H3.5_(Homo_sapiens)": "null" }, - "H3.6_(Homo_sapiens)": "null", "H3.7_(Homo_sapiens)": "null", - "H3.8_(Homo_sapiens)": "null", "H3.B_(Giardia?)": "null", "H3.P_(Moneuplotes?)": "null", "H3.V_(Trypanosomatidae)": "null" @@ -4892,31 +4890,6 @@ "alternate_names": [], "publications": [] }, - "H3.6_(Homo_sapiens)": { - "level": "variant_group", - "description": { - "summary": "H3.6_(Homo_sapiens) is a human histone variant, similar to histone variant H3.3 in amino acid sequence, encoded by the H3F3AP6 gene. Expression levels of H3F3AP6 are extremely low, as compared to H3F3B gene (encodes histone H3.3) [taguchi_crystal_2017]. H3.6 nucleosomes are substantially unstable due to the Val62 residue, which weakens interactions with H4 [taguchi_crystal_2017].", - "taxonomy": "null", - "genes": "null", - "evolution": "null", - "expression": "null", - "knock-out": "null", - "function": "null", - "sequence": "null", - "localization": "null", - "deposition": "null", - "structure": "H3.6 nucleosomes are substantially unstable due to the Val62 residue, which weakens interactions with H4 [taguchi_crystal_2017].", - "interactions": "null", - "disease": "null", - "caveats": "null" - }, - "taxonomic_span": "null", - "taxonomic_span_id": "null", - "alternate_names": [], - "publications": [ - "taguchi_crystal_2017" - ] - }, "H3.7_(Homo_sapiens)": { "level": "variant_group", "description": { @@ -4942,31 +4915,6 @@ "taguchi_crystal_2017" ] }, - "H3.8_(Homo_sapiens)": { - "level": "variant_group", - "description": { - "summary": "H3.8_(Homo_sapiens) is a human histone variant, similar to histone variant H3.3 in amino acid sequence, encoded by the H3F3AP5 gene. Expression levels of H3F3AP5 are extremely low, as compared to H3F3B gene (encodes histone H3.3) [taguchi_crystal_2017]. H3.8 nucleosomes are extremely unstable [taguchi_crystal_2017].", - "taxonomy": "null", - "genes": "null", - "evolution": "null", - "expression": "null", - "knock-out": "null", - "function": "null", - "sequence": "null", - "localization": "null", - "deposition": "null", - "structure": "H3.8 nucleosomes are extremely unstable [taguchi_crystal_2017].", - "interactions": "null", - "disease": "null", - "caveats": "null" - }, - "taxonomic_span": "null", - "taxonomic_span_id": "null", - "alternate_names": [], - "publications": [ - "taguchi_crystal_2017" - ] - }, "H3.B_(Giardia?)": { "level": "variant_group", "description": { diff --git a/CURATED_SET/curated_service/curatedDB/UPD_curatedDB_241205.ipynb b/CURATED_SET/curated_service/curatedDB/UPD_curatedDB_241205.ipynb new file mode 100644 index 0000000..802fd0c --- /dev/null +++ b/CURATED_SET/curated_service/curatedDB/UPD_curatedDB_241205.ipynb @@ -0,0 +1,968 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "fb32231a-293e-45b8-af7d-d996c91a4edd", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from Bio import Entrez, SeqIO\n", + "from mysql.connector import connection\n", + "from sshtunnel import SSHTunnelForwarder\n", + "\n", + "Entrez.email = \"l.singh@intbio.org\"" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "d5fa78a7-4901-4e63-9d91-d9ed584d2f62", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "with open(\"db_curated_server_info.txt\", \"r\") as file:\n", + " lines = file.readlines()\n", + "\n", + "config = {}\n", + "\n", + "for line in lines:\n", + " line = line.strip()\n", + " if line and not line.startswith(\"#\"):\n", + " key, value = line.split(\"=\", 1)\n", + " config[key] = value.strip()\n", + "\n", + "server_name = config.get(\"server_name\")\n", + "srever_port = int(config.get(\"srever_port\"))\n", + "ssh_password = config.get(\"ssh_password\")\n", + "ssh_username = config.get(\"ssh_username\")\n", + "db_adress = config.get(\"db_adress\")\n", + "db_port = int(config.get(\"db_port\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "ebe3d2e8-9736-4fd9-b3f1-5ec2d255567d", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "39737\n" + ] + } + ], + "source": [ + "tunnel = SSHTunnelForwarder(\n", + " (server_name, srever_port),\n", + " ssh_password=ssh_password,\n", + " ssh_username=ssh_username,\n", + " remote_bind_address=(db_adress, db_port),\n", + ")\n", + "tunnel.start()\n", + "print(tunnel.local_bind_port)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "0de93630-7502-40f3-ad24-42edf7a11eef", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "conn = connection.MySQLConnection(\n", + " user=\"db_user\",\n", + " password=\"db_password\",\n", + " host=\"localhost\",\n", + " port=tunnel.local_bind_port,\n", + " database=\"db_name\",\n", + ")\n", + "cursor = conn.cursor()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "b4767ba0-4ea1-43ae-953f-f2575d9b7a69", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[('alternative_name',),\n", + " ('histone',),\n", + " ('histone_description',),\n", + " ('histone_has_publication',),\n", + " ('publication',),\n", + " ('sequence',),\n", + " ('sequence_has_publication',)]" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "query = \"SHOW TABLES;\"\n", + "cursor.execute(query)\n", + "cursor.fetchall()" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "c68d4b8f-c22c-4f8e-bb4f-71f95668656e", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# add_histone = (\n", + "# \"INSERT INTO histone \"\n", + "# \"(id, level, taxonomic_span, taxonomic_span_id, description, parent) \"\n", + "# \"VALUES (%(id)s, %(level)s, %(taxonomic_span)s, %(taxonomic_span_id)s, %(description)s, %(parent)s)\"\n", + "# )\n", + "# add_histone_description = (\n", + "# \"INSERT INTO histone_description \"\n", + "# \"(summary, taxonomy, genes, evolution, expression, knock_out, function, sequence, localization, deposition, structure, interactions, disease, caveats) \"\n", + "# \"VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)\"\n", + "# )\n", + "# add_publication = (\n", + "# \"INSERT INTO publication \"\n", + "# \"(id, title, doi, author, year) \"\n", + "# \"VALUES (%(id)s, %(title)s, %(doi)s, %(author)s, %(year)s)\"\n", + "# )\n", + "# add_sequence_has_publication = (\n", + "# \"INSERT INTO sequence_has_publication \"\n", + "# \"(sequence_accession, publication_id) \"\n", + "# \"VALUES (%s, %s)\"\n", + "# )\n", + "# add_alternate_names = (\n", + "# \"INSERT INTO alternative_name \"\n", + "# \"(name, taxonomy, gene, splice, histone) \"\n", + "# \"VALUES (%(name)s, %(taxonomy)s, %(gene)s, %(splice)s, %(histone)s)\"\n", + "# )\n", + "# add_histone_has_publication = (\n", + "# \"INSERT INTO histone_has_publication \"\n", + "# \"(histone_id, publication_id) \"\n", + "# \"VALUES (%s, %s)\"\n", + "# )" + ] + }, + { + "cell_type": "markdown", + "id": "f0e30842-5f18-4d84-9f5d-50d87f0a90fa", + "metadata": { + "tags": [] + }, + "source": [ + "# Delete H3.6_(Homo_sapiens)\n", + "\n", + "This is pseudogene according to [article](https://epigeneticsandchromatin.biomedcentral.com/articles/10.1186/s13072-022-00467-2). " + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "e1f735c6-e34f-4475-b74f-667e3ef6dd57", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idleveltaxonomic_spantaxonomic_span_iddescriptionparentidsummarytaxonomygenes...expressionknock_outfunctionsequencelocalizationdepositionstructureinteractionsdiseasecaveats
0H3.6_(Homo_sapiens)variant_groupnullnull53H353H3.6_(Homo_sapiens) is a human histone variant...nullnull...nullnullnullnullnullnullH3.6 nucleosomes are substantially unstable du...nullnullnull
\n", + "

1 rows × 21 columns

\n", + "
" + ], + "text/plain": [ + " id level taxonomic_span taxonomic_span_id \\\n", + "0 H3.6_(Homo_sapiens) variant_group null null \n", + "\n", + " description parent id summary \\\n", + "0 53 H3 53 H3.6_(Homo_sapiens) is a human histone variant... \n", + "\n", + " taxonomy genes ... expression knock_out function sequence localization \\\n", + "0 null null ... null null null null null \n", + "\n", + " deposition structure interactions \\\n", + "0 null H3.6 nucleosomes are substantially unstable du... null \n", + "\n", + " disease caveats \n", + "0 null null \n", + "\n", + "[1 rows x 21 columns]" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "query = (\n", + " \"SELECT * FROM histone h LEFT JOIN histone_description hd \"\n", + " \"ON h.description = hd.id \"\n", + " \"WHERE h.id='H3.6_(Homo_sapiens)'\"\n", + ")\n", + "cursor.execute(query)\n", + "pd.DataFrame(cursor.fetchall(), columns=[i[0] for i in cursor.description])" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "4b8536f7-b5e9-4507-b6e3-95e2dd86dd38", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idleveltaxonomic_spantaxonomic_span_iddescriptionparenthistone_idpublication_id
0H3.6_(Homo_sapiens)variant_groupnullnull53H3H3.6_(Homo_sapiens)taguchi_crystal_2017
\n", + "
" + ], + "text/plain": [ + " id level taxonomic_span taxonomic_span_id \\\n", + "0 H3.6_(Homo_sapiens) variant_group null null \n", + "\n", + " description parent histone_id publication_id \n", + "0 53 H3 H3.6_(Homo_sapiens) taguchi_crystal_2017 " + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "query = (\n", + " \"SELECT * FROM histone h LEFT JOIN histone_has_publication hp \"\n", + " \"ON h.id = hp.histone_id \"\n", + " \"WHERE h.id='H3.6_(Homo_sapiens)'\"\n", + ")\n", + "cursor.execute(query)\n", + "pd.DataFrame(cursor.fetchall(), columns=[i[0] for i in cursor.description])" + ] + }, + { + "cell_type": "markdown", + "id": "ecf3057e-77e4-4c1f-8bb7-ed2f96a47593", + "metadata": {}, + "source": [ + "## Delete relationships first" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "59cb1373", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "query = \"DELETE FROM histone_has_publication WHERE histone_id = 'H3.6_(Homo_sapiens)'\" \n", + "cursor.execute(query) " + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "fdc609a7-9949-4b76-bb27-352db00414c1", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "UPDATE histone SET description=null WHERE id = 'H3.6_(Homo_sapiens)'\n" + ] + } + ], + "source": [ + "query = \"UPDATE histone SET description=null WHERE id = 'H3.6_(Homo_sapiens)'\" \n", + "print(query) \n", + "cursor.execute(query) " + ] + }, + { + "cell_type": "markdown", + "id": "6f832021-f290-433e-87eb-82c25b158127", + "metadata": {}, + "source": [ + "## Delete H3.6_(Homo_sapiens)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "020757e0", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "query = \"DELETE FROM histone WHERE id = 'H3.6_(Homo_sapiens)'\" \n", + "print(query) \n", + "cursor.execute(query) " + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "e13a0259", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idleveltaxonomic_spantaxonomic_span_iddescriptionparenthistone_idpublication_id
0H3.8_(Homo_sapiens)variant_groupnullnull55H3H3.8_(Homo_sapiens)taguchi_crystal_2017
\n", + "
" + ], + "text/plain": [ + " id level taxonomic_span taxonomic_span_id \\\n", + "0 H3.8_(Homo_sapiens) variant_group null null \n", + "\n", + " description parent histone_id publication_id \n", + "0 55 H3 H3.8_(Homo_sapiens) taguchi_crystal_2017 " + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "query = (\n", + " \"SELECT * FROM histone h LEFT JOIN histone_has_publication hp \"\n", + " \"ON h.id = hp.histone_id \"\n", + " \"WHERE h.id='H3.8_(Homo_sapiens)'\"\n", + ")\n", + "cursor.execute(query)\n", + "pd.DataFrame(cursor.fetchall(), columns=[i[0] for i in cursor.description])" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "730f42c9-6611-4baf-b5dc-0b7454b5aef2", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Make sure data is committed to the database\n", + "conn.commit()" + ] + }, + { + "cell_type": "markdown", + "id": "7671923c-f86f-452a-b96c-2d9b15ea7435", + "metadata": { + "tags": [] + }, + "source": [ + "# Delete H3.8_(Homo_sapiens)\n", + "\n", + "This is pseudogene according to [article](https://epigeneticsandchromatin.biomedcentral.com/articles/10.1186/s13072-022-00467-2). " + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "30d5e29a", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idleveltaxonomic_spantaxonomic_span_iddescriptionparenthistone_idpublication_id
0H3.8_(Homo_sapiens)variant_groupnullnull55H3H3.8_(Homo_sapiens)taguchi_crystal_2017
\n", + "
" + ], + "text/plain": [ + " id level taxonomic_span taxonomic_span_id \\\n", + "0 H3.8_(Homo_sapiens) variant_group null null \n", + "\n", + " description parent histone_id publication_id \n", + "0 55 H3 H3.8_(Homo_sapiens) taguchi_crystal_2017 " + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "query = (\n", + " \"SELECT * FROM histone h LEFT JOIN histone_has_publication hp \"\n", + " \"ON h.id = hp.histone_id \"\n", + " \"WHERE h.id='H3.8_(Homo_sapiens)'\"\n", + ")\n", + "cursor.execute(query)\n", + "pd.DataFrame(cursor.fetchall(), columns=[i[0] for i in cursor.description])" + ] + }, + { + "cell_type": "markdown", + "id": "72335286-6997-4b23-b6dd-8e8ccafbeb1d", + "metadata": { + "tags": [] + }, + "source": [ + "## Delete relationships first" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "171cc36b-406f-4e35-b7c6-72b6b2ceebdb", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "query = \"DELETE FROM histone_has_publication WHERE histone_id = 'H3.8_(Homo_sapiens)'\" \n", + "cursor.execute(query) " + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "1d8fa008-9c5e-48b7-b44d-250fba170c41", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "UPDATE histone SET description=null WHERE id = 'H3.8_(Homo_sapiens)'\n" + ] + } + ], + "source": [ + "query = \"UPDATE histone SET description=null WHERE id = 'H3.8_(Homo_sapiens)'\" \n", + "print(query) \n", + "cursor.execute(query) " + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "3ead3172-0ba8-4e4e-b166-a0cc40bbdb45", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idleveltaxonomic_spantaxonomic_span_iddescriptionparenthistone_idpublication_id
0H3.8_(Homo_sapiens)variant_groupnullnullNoneH3NoneNone
\n", + "
" + ], + "text/plain": [ + " id level taxonomic_span taxonomic_span_id \\\n", + "0 H3.8_(Homo_sapiens) variant_group null null \n", + "\n", + " description parent histone_id publication_id \n", + "0 None H3 None None " + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "query = (\n", + " \"SELECT * FROM histone h LEFT JOIN histone_has_publication hp \"\n", + " \"ON h.id = hp.histone_id \"\n", + " \"WHERE h.id='H3.8_(Homo_sapiens)'\"\n", + ")\n", + "cursor.execute(query)\n", + "pd.DataFrame(cursor.fetchall(), columns=[i[0] for i in cursor.description])" + ] + }, + { + "cell_type": "markdown", + "id": "3a40dd2f-ca2e-4cfc-952c-a6e11840d1c8", + "metadata": {}, + "source": [ + "## Delete H3.8_(Homo_sapiens)" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "fd66463f-5f2a-4afe-a7a6-abe83f9f5358", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "DELETE FROM histone WHERE id = 'H3.8_(Homo_sapiens)'\n" + ] + } + ], + "source": [ + "query = \"DELETE FROM histone WHERE id = 'H3.8_(Homo_sapiens)'\" \n", + "print(query) \n", + "cursor.execute(query) " + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "a1718818-08ad-45bd-a422-beb86c0046a8", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idleveltaxonomic_spantaxonomic_span_iddescriptionparenthistone_idpublication_id
\n", + "
" + ], + "text/plain": [ + "Empty DataFrame\n", + "Columns: [id, level, taxonomic_span, taxonomic_span_id, description, parent, histone_id, publication_id]\n", + "Index: []" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "query = (\n", + " \"SELECT * FROM histone h LEFT JOIN histone_has_publication hp \"\n", + " \"ON h.id = hp.histone_id \"\n", + " \"WHERE h.id='H3.8_(Homo_sapiens)'\"\n", + ")\n", + "cursor.execute(query)\n", + "pd.DataFrame(cursor.fetchall(), columns=[i[0] for i in cursor.description])" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "d3e3491c-4403-4251-abe8-1d198f3b683b", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idleveltaxonomic_spantaxonomic_span_iddescriptionparenthistone_idpublication_id
\n", + "
" + ], + "text/plain": [ + "Empty DataFrame\n", + "Columns: [id, level, taxonomic_span, taxonomic_span_id, description, parent, histone_id, publication_id]\n", + "Index: []" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "query = (\n", + " \"SELECT * FROM histone h LEFT JOIN histone_has_publication hp \"\n", + " \"ON h.id = hp.histone_id \"\n", + " \"WHERE h.id='H3.6_(Homo_sapiens)'\"\n", + ")\n", + "cursor.execute(query)\n", + "pd.DataFrame(cursor.fetchall(), columns=[i[0] for i in cursor.description])" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "ad3e6857-e0ac-4b65-bf0a-5f6f40352b3f", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Make sure data is committed to the database\n", + "conn.commit()" + ] + }, + { + "cell_type": "markdown", + "id": "d3c8a219-f508-46e1-936f-7d6040016e39", + "metadata": { + "tags": [] + }, + "source": [ + "# Close connections" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "572059d8-54f9-4ec7-a2c6-1c8066552845", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "cursor.close()\n", + "conn.close()\n", + "tunnel.stop()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "790ee020-09af-4dcc-9c36-f1b3f69ea8a7", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".conda-histdb_env", + "language": "python", + "name": "conda-env-.conda-histdb_env-py" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.1" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/CURATED_SET/curated_service/curatedDB/save_curatedDB_toFiles.ipynb b/CURATED_SET/curated_service/curatedDB/save_curatedDB_toFiles.ipynb index e0da09a..1e0289c 100644 --- a/CURATED_SET/curated_service/curatedDB/save_curatedDB_toFiles.ipynb +++ b/CURATED_SET/curated_service/curatedDB/save_curatedDB_toFiles.ipynb @@ -53,7 +53,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "44545\n" + "39083\n" ] } ], @@ -428,8 +428,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 7min 59s, sys: 7.63 s, total: 8min 7s\n", - "Wall time: 7min 50s\n" + "CPU times: user 7min 49s, sys: 7.39 s, total: 7min 56s\n", + "Wall time: 7min 40s\n" ] } ],