From 2ed85378b02a412edbc537b62f26322a952a4167 Mon Sep 17 00:00:00 2001 From: u-khasanova Date: Thu, 11 Jul 2024 05:46:31 +0000 Subject: [PATCH] update index.html --- Untitled.ipynb | 132 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 132 insertions(+) create mode 100644 Untitled.ipynb diff --git a/Untitled.ipynb b/Untitled.ipynb new file mode 100644 index 0000000..9d10d74 --- /dev/null +++ b/Untitled.ipynb @@ -0,0 +1,132 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 32, + "id": "a60abd40-dccc-4965-8c3e-30f1dfc6694b", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "a742ad43-6a73-4dc8-8eaf-7dda7bb997ab", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "uniprot = pd.read_csv('./data/uniprot_loc_with_codes.csv')\n", + "hpa = pd.read_csv('./data/hpa_loc_with_codes.csv')\n", + "opencell = pd.read_csv('./data/opencell_loc_with_codes.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "7853dac9-3cbb-4e4d-b81d-8e04d51882e1", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "uniprot_loc = [x for x in set(uniprot['localization']) if x.startswith('Nucl')] + ['PML body', 'Cajal body', 'Gem']\n", + "hpa_loc = [x for x in set(hpa['localization']) if x.startswith('Nucl')] + ['Mitotic chromosome', 'Kinetochore']\n", + "opencell_loc = [x for x in set(opencell['localization']) if x.startswith('Nucl')] + ['Chromosome']" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "7475f0f6-4c29-4952-96e3-4334ea4e5417", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "uniprot_loc_idx=[]\n", + "hpa_loc_idx=[]\n", + "opencell_loc_idx=[]\n", + "for i in range(uniprot.shape[0]):\n", + " if uniprot.loc[i, 'localization'] in uniprot_loc:\n", + " uniprot_loc_idx.append(i)\n", + "for i in range(hpa.shape[0]):\n", + " if hpa.loc[i, 'localization'] in hpa_loc:\n", + " hpa_loc_idx.append(i)\n", + "for i in range(opencell.shape[0]):\n", + " if opencell.loc[i, 'localization'] in opencell_loc:\n", + " opencell_loc_idx.append(i)\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "9975b790-17a3-49cd-9ea8-262372483120", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "uniprot_nucl = uniprot.iloc[uniprot_loc_idx,:]\n", + "hpa_nucl = hpa.iloc[hpa_loc_idx, :]\n", + "opencell_nucl = opencell.iloc[opencell_loc_idx, :]\n", + "uniprot_nucl.index = uniprot_nucl['Entry']\n", + "hpa_nucl.index = hpa_nucl['Entry']\n", + "opencell_nucl.index = opencell_nucl['Entry']\n", + "uniprot_nucl.drop('Entry',axis=1,inplace=True)\n", + "hpa_nucl.drop('Entry',axis=1,inplace=True)\n", + "opencell_nucl.drop('Entry',axis=1,inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "ca9f3220-c599-44a4-9476-1fb9eb943edc", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "uniprot_nucl.to_csv('uniprot_only_nucl.csv')\n", + "hpa_nucl.to_csv('hpa_only_nucl.csv')\n", + "opencell_nucl.to_csv('opencell_only_nucl.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d35213dc-ae56-4724-9e78-cabad02c5685", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "moldyn", + "language": "python", + "name": "conda-env-moldyn-py" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}