diff --git a/README.md b/README.md index 5589419..c5018b1 100644 --- a/README.md +++ b/README.md @@ -25,8 +25,9 @@ With a growing number of aging clocks and biomarkers of aging, comparing and ana ## πŸ“ To-Do List +- [ ] Incorporate more murine DNA methylation clocks - [ ] Integrate scAge and scRNAseq clocks (and datasets) -- [ ] Incorporate murine DNA methylation and proteomic clocks (and datasets) +- [ ] Incorporate proteomic clocks (and datasets) ## ❓ Can't find an aging clock? diff --git a/clocks/notebooks/bitage.ipynb b/clocks/notebooks/bitage.ipynb index f2d39c5..5dba314 100644 --- a/clocks/notebooks/bitage.ipynb +++ b/clocks/notebooks/bitage.ipynb @@ -108,7 +108,7 @@ " 'species': 'C elegans',\n", " 'data_type': 'transcriptomics',\n", " 'year': 2021,\n", - " 'implementation_approved_by_author(s)': 'βŒ›',\n", + " 'implementation_approved_by_author(s)': 'βœ…',\n", " 'preprocessing': weights_dict['preprocessing'], \n", " 'postprocessing': weights_dict['postprocessing'], \n", " 'citation': \"Meyer, David H., and BjΓΆrn Schumacher. \\\"BiT age: A transcriptome‐based aging clock near the theoretical limit of accuracy.\\\" Aging cell 20.3 (2021): e13320.\",\n", diff --git a/clocks/notebooks/join_metadata.ipynb b/clocks/notebooks/join_metadata.ipynb index df3362f..c6e7ce7 100644 --- a/clocks/notebooks/join_metadata.ipynb +++ b/clocks/notebooks/join_metadata.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 14, + "execution_count": 7, "id": "59eb29df-0597-4d45-b2e6-8825670effe2", "metadata": {}, "outputs": [], @@ -40,6 +40,14 @@ "\n", "torch.save(combined_dictionary, '../metadata/all_clock_metadata.pt')" ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "047fee95-c914-4c3b-872b-c108696caa1b", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { diff --git a/clocks/notebooks/meermultitissue.ipynb b/clocks/notebooks/meermultitissue.ipynb new file mode 100644 index 0000000..b6c64f3 --- /dev/null +++ b/clocks/notebooks/meermultitissue.ipynb @@ -0,0 +1,197 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 3, + "id": "fb157849-5454-4a60-8548-fff633fff764", + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "import pandas as pd\n", + "import pyaging as pya\n", + "import os" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "46c6fc26-9a6b-4027-bd01-601b70eb401a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "os.system(\"curl -o coefficients.xlsx https://elifesciences.org/download/aHR0cHM6Ly9jZG4uZWxpZmVzY2llbmNlcy5vcmcvYXJ0aWNsZXMvNDA2NzUvZWxpZmUtNDA2NzUtc3VwcDMtdjIueGxzeA--/elife-40675-supp3-v2.xlsx?_hash=qzOMc4yUFACfDFG%2FlgxkFTHWt%2BSXSmP9zz1BM3oOTRM%3D\")" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "bf89303a-6e7f-4585-a439-655fe0a79b05", + "metadata": {}, + "outputs": [], + "source": [ + "# You have to manually open Excel and convert to .csv (multi tissue, whole lifespan)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "b9f484b1-f501-41b7-9565-82e03bfe97dc", + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_csv('coefficients.csv')\n", + "\n", + "intercept = df['Position'].iloc[-1]\n", + "\n", + "df = df[0:-2]\n", + "\n", + "df['feature'] = df['Chromosome'].astype(str) + ':' + df['Position'].astype(int).astype(str)\n", + "df['coefficient'] = df['Weight']" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "a284fe99-dc47-4f0c-b2ff-274e136e7020", + "metadata": {}, + "outputs": [], + "source": [ + "features = df['feature'].tolist()\n", + "\n", + "weights = torch.tensor(df['coefficient'].tolist()).unsqueeze(0)\n", + "intercept = torch.tensor([intercept])" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "7b4c3f6b-72af-4e99-84c4-65b8ef58c91d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "LinearModel(\n", + " (linear): Linear(in_features=435, out_features=1, bias=True)\n", + ")" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model = pya.models.LinearModel(len(features))\n", + "\n", + "model.linear.weight.data = weights\n", + "model.linear.bias.data = intercept\n", + "\n", + "model" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "e32706f0-ce07-455e-bb17-1993c1c0e152", + "metadata": {}, + "outputs": [], + "source": [ + "weights_dict = {\n", + " 'preprocessing': \"times100\", \n", + " 'preprocessing_helper': None,\n", + " 'postprocessing': None,\n", + " 'postprocessing_helper': None,\n", + " 'features': features,\n", + " 'weight_dict': model.state_dict(),\n", + "}\n", + "\n", + "metadata_dict = {\n", + " 'species': 'Mus musculus',\n", + " 'data_type': 'methylation',\n", + " 'year': 2018,\n", + " 'implementation_approved_by_author(s)': 'βŒ›',\n", + " 'preprocessing': weights_dict['preprocessing'], \n", + " 'postprocessing': weights_dict['postprocessing'], \n", + " 'citation': \"Meer, Margarita V., et al. \\\"A whole lifespan mouse multi-tissue DNA methylation clock.\\\" Elife 7 (2018): e40675.\",\n", + " 'doi': \"https://doi.org/10.7554/eLife.40675\",\n", + " \"notes\": \"Predicts age in days\",\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "34136f3c-92b8-4641-a103-381d3a7dd857", + "metadata": {}, + "outputs": [], + "source": [ + "torch.save(weights_dict, '../weights/meermultitissue.pt')\n", + "torch.save(metadata_dict, '../metadata/meermultitissue.pt')" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "303e9b76-993f-4691-af9d-1151b3c7638f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "os.system(\"rm coefficients.xlsx\")\n", + "os.system(\"rm coefficients.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6f111912-501e-4d2c-a592-1cc9829092dd", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.17" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/clocks/notebooks/thompsonmultitissue.ipynb b/clocks/notebooks/thompsonmultitissue.ipynb new file mode 100644 index 0000000..24c80b5 --- /dev/null +++ b/clocks/notebooks/thompsonmultitissue.ipynb @@ -0,0 +1,186 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "id": "fb157849-5454-4a60-8548-fff633fff764", + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "import pandas as pd\n", + "import pyaging as pya\n", + "import os" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "46c6fc26-9a6b-4027-bd01-601b70eb401a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "os.system(\"git clone https://github.com/kerepesi/MouseAgingClocks.git\")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "b9f484b1-f501-41b7-9565-82e03bfe97dc", + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_table('MouseAgingClocks/ClockData/Thompson2018-ElasticNet_aging_clock.txt', skiprows=1)\n", + "\n", + "intercept = df['Coefficient'].iloc[0]\n", + "\n", + "df = df[1:]\n", + "\n", + "df['feature'] = df['Chromosome'].astype(str) + ':' + df['Coordinate'].astype(int).astype(str)\n", + "df['coefficient'] = df['Coefficient']" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "a284fe99-dc47-4f0c-b2ff-274e136e7020", + "metadata": {}, + "outputs": [], + "source": [ + "features = df['feature'].tolist()\n", + "\n", + "weights = torch.tensor(df['coefficient'].tolist()).unsqueeze(0)\n", + "intercept = torch.tensor([intercept])" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "7b4c3f6b-72af-4e99-84c4-65b8ef58c91d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "LinearModel(\n", + " (linear): Linear(in_features=582, out_features=1, bias=True)\n", + ")" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model = pya.models.LinearModel(len(features))\n", + "\n", + "model.linear.weight.data = weights\n", + "model.linear.bias.data = intercept\n", + "\n", + "model" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "e32706f0-ce07-455e-bb17-1993c1c0e152", + "metadata": {}, + "outputs": [], + "source": [ + "weights_dict = {\n", + " 'preprocessing': None, \n", + " 'preprocessing_helper': None,\n", + " 'postprocessing': None,\n", + " 'postprocessing_helper': None,\n", + " 'features': features,\n", + " 'weight_dict': model.state_dict(),\n", + "}\n", + "\n", + "metadata_dict = {\n", + " 'species': 'Mus musculus',\n", + " 'data_type': 'methylation',\n", + " 'year': 2018,\n", + " 'implementation_approved_by_author(s)': 'βŒ›',\n", + " 'preprocessing': weights_dict['preprocessing'], \n", + " 'postprocessing': weights_dict['postprocessing'], \n", + " 'citation': \"Thompson, Michael J., et al. \\\"A multi-tissue full lifespan epigenetic clock for mice.\\\" Aging (Albany NY) 10.10 (2018): 2832.\",\n", + " 'doi': \"https://doi.org/10.18632/aging.101590\",\n", + " \"notes\": None,\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "34136f3c-92b8-4641-a103-381d3a7dd857", + "metadata": {}, + "outputs": [], + "source": [ + "torch.save(weights_dict, '../weights/thompsonmultitissue.pt')\n", + "torch.save(metadata_dict, '../metadata/thompsonmultitissue.pt')" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "303e9b76-993f-4691-af9d-1151b3c7638f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "os.system(\"rm -r MouseAgingClocks\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6f111912-501e-4d2c-a592-1cc9829092dd", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.17" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/pyaging/data/_data.py b/pyaging/data/_data.py index 7d1e64a..dedf2d8 100644 --- a/pyaging/data/_data.py +++ b/pyaging/data/_data.py @@ -22,8 +22,8 @@ def download_example_data( Parameters ---------- data_type : str - The type of data to download. Valid options are 'GSE139307' (human methylation), - 'GSE223748' (mammalian methylation), 'ENCFF386QWG' (histone mark), + The type of data to download. Valid options are 'GSE139307' (human methylation), 'GSE130735' (mouse + methylation), 'GSE223748' (mammalian methylation), 'ENCFF386QWG' (histone mark), 'GSE65765' (C. elegans RNA-seq), 'GSE193140' (ATAC-Seq), 'blood_chemistry_example' (blood chemistry). dir : str @@ -59,6 +59,7 @@ def download_example_data( logger.first_info("Starting download_example_data function") data_type_to_url = { + "GSE130735": "https://pyaging.s3.amazonaws.com/example_data/GSE130735_subset.pkl", "GSE193140": "https://pyaging.s3.amazonaws.com/example_data/GSE193140.pkl", "GSE139307": "https://pyaging.s3.amazonaws.com/example_data/GSE139307.pkl", "GSE223748": "https://pyaging.s3.amazonaws.com/example_data/GSE223748_subset.pkl", diff --git a/pyaging/predict/_pred_utils.py b/pyaging/predict/_pred_utils.py index 186cbaa..9d49436 100644 --- a/pyaging/predict/_pred_utils.py +++ b/pyaging/predict/_pred_utils.py @@ -295,6 +295,8 @@ def initialize_model( "leecpc", "leerpc", "leerefinedrpc", + "meermultitissue", + "thompsonmultitissue", ]: model = LinearModel(len(features)) elif clock_name in [ @@ -412,6 +414,10 @@ def preprocess_data( X = adata.X X = np.log1p(X) adata.X = X + elif preprocessing == "times100": + X = adata.X + X = X * 100 + adata.X = X elif preprocessing == "tpm_norm_log1p": X = adata.X X = tpm_norm_log1p(X, preprocessing_helper) diff --git a/pyaging/preprocess/_preprocess.py b/pyaging/preprocess/_preprocess.py index 70744d0..0aef775 100644 --- a/pyaging/preprocess/_preprocess.py +++ b/pyaging/preprocess/_preprocess.py @@ -133,7 +133,7 @@ def df_to_adata( imputer_strategy : str, optional The strategy for imputing missing values in 'df'. Supported strategies include 'mean', - 'median', 'constant', and 'knn'. Defaults to 'knn'. + 'median', 'constant' (0 values), and 'knn'. Defaults to 'knn'. verbose: bool Whether to log the output to console with the logger. Defaults to True. diff --git a/tutorials/tutorial_atacseq.ipynb b/tutorials/tutorial_atacseq.ipynb index e2fff99..7a12196 100644 --- a/tutorials/tutorial_atacseq.ipynb +++ b/tutorials/tutorial_atacseq.ipynb @@ -79,7 +79,7 @@ "text": [ "|-----> πŸ—οΈ Starting download_example_data function\n", "|-----------> Data found in pyaging_data/GSE193140.pkl\n", - "|-----> πŸŽ‰ Done! [0.0007s]\n" + "|-----> πŸŽ‰ Done! [0.0021s]\n" ] } ], @@ -370,22 +370,22 @@ "text": [ "|-----> πŸ—οΈ Starting df_to_adata function\n", "|-----> βš™οΈ Create anndata object started\n", - "|-----> βœ… Create anndata object finished [0.0031s]\n", + "|-----> βœ… Create anndata object finished [0.0025s]\n", "|-----> βš™οΈ Add metadata to anndata started\n", "|-----------? No metadata provided. Leaving adata.obs empty\n", - "|-----> ⚠️ Add metadata to anndata finished [0.0010s]\n", + "|-----> ⚠️ Add metadata to anndata finished [0.0005s]\n", "|-----> βš™οΈ Log data statistics started\n", "|-----------> There are 157 observations\n", "|-----------> There are 80400 features\n", "|-----------> Total missing values: 0\n", "|-----------> Percentage of missing values: 0.00%\n", - "|-----> βœ… Log data statistics finished [0.0054s]\n", + "|-----> βœ… Log data statistics finished [0.0063s]\n", "|-----> βš™οΈ Impute missing values started\n", "|-----------> No missing values found. No imputation necessary\n", - "|-----> βœ… Impute missing values finished [0.0070s]\n", + "|-----> βœ… Impute missing values finished [0.0053s]\n", "|-----> βš™οΈ Add unstructured data to anndata started\n", "|-----> βœ… Add unstructured data to anndata finished [0.0003s]\n", - "|-----> πŸŽ‰ Done! [0.0197s]\n" + "|-----> πŸŽ‰ Done! [0.0175s]\n" ] } ], @@ -454,62 +454,64 @@ "|-----> πŸ—οΈ Starting predict_age function\n", "|-----> βš™οΈ Set PyTorch device started\n", "|-----------> Using device: cpu\n", - "|-----> βœ… Set PyTorch device finished [0.0007s]\n", + "|-----> βœ… Set PyTorch device finished [0.0022s]\n", "|-----> πŸ•’ Processing clock: OcampoATAC1\n", "|-----------> βš™οΈ Load clock started\n", - "|-----------------> Data found in pyaging_data/ocampoatac1.pt\n", - "|-----------> βœ… Load clock finished [0.0061s]\n", + "|-----------------> Downloading data to pyaging_data/ocampoatac1.pt\n", + "|-----------------> in progress: 100.0000%\n", + "|-----------> βœ… Load clock finished [1.0218s]\n", "|-----------> βš™οΈ Check features in adata started\n", "|-----------------> All features are present in adata.var_names.\n", - "|-----------> βœ… Check features in adata finished [0.0005s]\n", + "|-----------> βœ… Check features in adata finished [0.0010s]\n", "|-----------> βš™οΈ Preprocess data started\n", "|-----------------> Preprocessing data with function tpm_norm_log1p\n", - "|-----------> βœ… Preprocess data finished [0.1565s]\n", + "|-----------> βœ… Preprocess data finished [0.2021s]\n", "|-----------> βš™οΈ Filter features and extract data matrix started\n", - "|-----------> βœ… Filter features and extract data matrix finished [0.0023s]\n", + "|-----------> βœ… Filter features and extract data matrix finished [0.0025s]\n", "|-----------> βš™οΈ Convert numpy array to tensor started\n", - "|-----------> βœ… Convert numpy array to tensor finished [0.0007s]\n", + "|-----------> βœ… Convert numpy array to tensor finished [0.0008s]\n", "|-----------> βš™οΈ Initialize model started\n", - "|-----------> βœ… Initialize model finished [0.0010s]\n", + "|-----------> βœ… Initialize model finished [0.0021s]\n", "|-----------> βš™οΈ Predict ages with model started\n", - "|-----------> βœ… Predict ages with model finished [0.0007s]\n", + "|-----------> βœ… Predict ages with model finished [0.0005s]\n", "|-----------> βš™οΈ Convert tensor to numpy array started\n", "|-----------> βœ… Convert tensor to numpy array finished [0.0006s]\n", "|-----------> βš™οΈ Add predicted ages to adata started\n", - "|-----------> βœ… Add predicted ages to adata finished [0.0012s]\n", + "|-----------> βœ… Add predicted ages to adata finished [0.0009s]\n", "|-----------> βš™οΈ Load all clock metadata started\n", "|-----------------> Data found in pyaging_data/all_clock_metadata.pt\n", - "|-----------> βœ… Load all clock metadata finished [0.0012s]\n", + "|-----------> βœ… Load all clock metadata finished [0.0033s]\n", "|-----------> βš™οΈ Add clock metadata to adata.uns started\n", - "|-----------> βœ… Add clock metadata to adata.uns finished [0.0005s]\n", + "|-----------> βœ… Add clock metadata to adata.uns finished [0.0003s]\n", "|-----> πŸ•’ Processing clock: OcampoATAC2\n", "|-----------> βš™οΈ Load clock started\n", - "|-----------------> Data found in pyaging_data/ocampoatac2.pt\n", - "|-----------> βœ… Load clock finished [0.0090s]\n", + "|-----------------> Downloading data to pyaging_data/ocampoatac2.pt\n", + "|-----------------> in progress: 100.0000%\n", + "|-----------> βœ… Load clock finished [0.9181s]\n", "|-----------> βš™οΈ Check features in adata started\n", "|-----------------> All features are present in adata.var_names.\n", - "|-----------> βœ… Check features in adata finished [0.0010s]\n", + "|-----------> βœ… Check features in adata finished [0.0031s]\n", "|-----------> βš™οΈ Preprocess data started\n", "|-----------------> Layer with tpm_norm_log1p preprocessing is already in adata\n", - "|-----------> βœ… Preprocess data finished [0.0008s]\n", + "|-----------> βœ… Preprocess data finished [0.0041s]\n", "|-----------> βš™οΈ Filter features and extract data matrix started\n", - "|-----------> βœ… Filter features and extract data matrix finished [0.0019s]\n", + "|-----------> βœ… Filter features and extract data matrix finished [0.0017s]\n", "|-----------> βš™οΈ Convert numpy array to tensor started\n", - "|-----------> βœ… Convert numpy array to tensor finished [0.0006s]\n", + "|-----------> βœ… Convert numpy array to tensor finished [0.0009s]\n", "|-----------> βš™οΈ Initialize model started\n", - "|-----------> βœ… Initialize model finished [0.0013s]\n", + "|-----------> βœ… Initialize model finished [0.0016s]\n", "|-----------> βš™οΈ Predict ages with model started\n", - "|-----------> βœ… Predict ages with model finished [0.0006s]\n", + "|-----------> βœ… Predict ages with model finished [0.0014s]\n", "|-----------> βš™οΈ Convert tensor to numpy array started\n", - "|-----------> βœ… Convert tensor to numpy array finished [0.0004s]\n", + "|-----------> βœ… Convert tensor to numpy array finished [0.0016s]\n", "|-----------> βš™οΈ Add predicted ages to adata started\n", - "|-----------> βœ… Add predicted ages to adata finished [0.0008s]\n", + "|-----------> βœ… Add predicted ages to adata finished [0.0031s]\n", "|-----------> βš™οΈ Load all clock metadata started\n", "|-----------------> Data found in pyaging_data/all_clock_metadata.pt\n", - "|-----------> βœ… Load all clock metadata finished [0.0025s]\n", + "|-----------> βœ… Load all clock metadata finished [0.0022s]\n", "|-----------> βš™οΈ Add clock metadata to adata.uns started\n", - "|-----------> βœ… Add clock metadata to adata.uns finished [0.0004s]\n", - "|-----> πŸŽ‰ Done! [0.2084s]\n" + "|-----------> βœ… Add clock metadata to adata.uns finished [0.0006s]\n", + "|-----> πŸŽ‰ Done! [2.3205s]\n" ] } ], @@ -757,11 +759,12 @@ "{'species': 'Homo sapiens',\n", " 'data_type': 'atac',\n", " 'year': 2023,\n", - " 'preprocessing': 'log1p',\n", + " 'implementation_approved_by_author(s)': 'βŒ›',\n", + " 'preprocessing': 'tpm_norm_log1p',\n", " 'postprocessing': None,\n", " 'citation': 'Morandini, Francesco, et al. \"ATAC-clock: An aging clock based on chromatin accessibility.\" GeroScience (2023): 1-18.',\n", " 'doi': 'https://doi.org/10.1007/s11357-023-00986-0',\n", - " 'notes': 'This is the model trained solely on ATAC data produced by the Ocampo lab'}" + " 'notes': 'This is the model trained on the ATAC data produced by the Ocampo lab plus a public dataset'}" ] }, "execution_count": 12, diff --git a/tutorials/tutorial_bloodchemistry.ipynb b/tutorials/tutorial_bloodchemistry.ipynb index caf5c8a..08c31e7 100644 --- a/tutorials/tutorial_bloodchemistry.ipynb +++ b/tutorials/tutorial_bloodchemistry.ipynb @@ -72,7 +72,7 @@ "|-----> πŸ—οΈ Starting download_example_data function\n", "|-----------> Downloading data to pyaging_data/blood_chemistry_example.pkl\n", "|-----------> in progress: 100.0000%\n", - "|-----> πŸŽ‰ Done! [0.4727s]\n" + "|-----> πŸŽ‰ Done! [0.5078s]\n" ] } ], @@ -259,22 +259,22 @@ "text": [ "|-----> πŸ—οΈ Starting df_to_adata function\n", "|-----> βš™οΈ Create anndata object started\n", - "|-----> βœ… Create anndata object finished [0.0020s]\n", + "|-----> βœ… Create anndata object finished [0.0027s]\n", "|-----> βš™οΈ Add metadata to anndata started\n", "|-----------? No metadata provided. Leaving adata.obs empty\n", - "|-----> ⚠️ Add metadata to anndata finished [0.0010s]\n", + "|-----> ⚠️ Add metadata to anndata finished [0.0006s]\n", "|-----> βš™οΈ Log data statistics started\n", "|-----------> There are 30 observations\n", "|-----------> There are 10 features\n", "|-----------> Total missing values: 0\n", "|-----------> Percentage of missing values: 0.00%\n", - "|-----> βœ… Log data statistics finished [0.0019s]\n", + "|-----> βœ… Log data statistics finished [0.0016s]\n", "|-----> βš™οΈ Impute missing values started\n", "|-----------> No missing values found. No imputation necessary\n", - "|-----> βœ… Impute missing values finished [0.0010s]\n", + "|-----> βœ… Impute missing values finished [0.0009s]\n", "|-----> βš™οΈ Add unstructured data to anndata started\n", - "|-----> βœ… Add unstructured data to anndata finished [0.0003s]\n", - "|-----> πŸŽ‰ Done! [0.0102s]\n" + "|-----> βœ… Add unstructured data to anndata finished [0.0009s]\n", + "|-----> πŸŽ‰ Done! [0.0104s]\n" ] } ], @@ -343,36 +343,36 @@ "|-----> πŸ—οΈ Starting predict_age function\n", "|-----> βš™οΈ Set PyTorch device started\n", "|-----------> Using device: cpu\n", - "|-----> βœ… Set PyTorch device finished [0.0006s]\n", + "|-----> βœ… Set PyTorch device finished [0.0018s]\n", "|-----> πŸ•’ Processing clock: PhenoAge\n", "|-----------> βš™οΈ Load clock started\n", "|-----------------> Downloading data to pyaging_data/phenoage.pt\n", "|-----------------> in progress: 100.0000%\n", - "|-----------> βœ… Load clock finished [0.4947s]\n", + "|-----------> βœ… Load clock finished [0.5525s]\n", "|-----------> βš™οΈ Check features in adata started\n", "|-----------------> All features are present in adata.var_names.\n", - "|-----------> βœ… Check features in adata finished [0.0017s]\n", + "|-----------> βœ… Check features in adata finished [0.0009s]\n", "|-----------> βš™οΈ Filter features and extract data matrix started\n", - "|-----------> βœ… Filter features and extract data matrix finished [0.0030s]\n", + "|-----------> βœ… Filter features and extract data matrix finished [0.0022s]\n", "|-----------> βš™οΈ Convert numpy array to tensor started\n", - "|-----------> βœ… Convert numpy array to tensor finished [0.0013s]\n", + "|-----------> βœ… Convert numpy array to tensor finished [0.0011s]\n", "|-----------> βš™οΈ Initialize model started\n", - "|-----------> βœ… Initialize model finished [0.0023s]\n", + "|-----------> βœ… Initialize model finished [0.0019s]\n", "|-----------> βš™οΈ Predict ages with model started\n", - "|-----------> βœ… Predict ages with model finished [0.0012s]\n", + "|-----------> βœ… Predict ages with model finished [0.0008s]\n", "|-----------> βš™οΈ Convert tensor to numpy array started\n", - "|-----------> βœ… Convert tensor to numpy array finished [0.0008s]\n", + "|-----------> βœ… Convert tensor to numpy array finished [0.0014s]\n", "|-----------> βš™οΈ Postprocess data started\n", "|-----------------> Postprocessing data with function mortality_to_phenoage\n", - "|-----------> βœ… Postprocess data finished [0.0015s]\n", + "|-----------> βœ… Postprocess data finished [0.0016s]\n", "|-----------> βš™οΈ Add predicted ages to adata started\n", - "|-----------> βœ… Add predicted ages to adata finished [0.0012s]\n", + "|-----------> βœ… Add predicted ages to adata finished [0.0014s]\n", "|-----------> βš™οΈ Load all clock metadata started\n", "|-----------------> Data found in pyaging_data/all_clock_metadata.pt\n", - "|-----------> βœ… Load all clock metadata finished [0.0024s]\n", + "|-----------> βœ… Load all clock metadata finished [0.0018s]\n", "|-----------> βš™οΈ Add clock metadata to adata.uns started\n", - "|-----------> βœ… Add clock metadata to adata.uns finished [0.0006s]\n", - "|-----> πŸŽ‰ Done! [0.5255s]\n" + "|-----------> βœ… Add clock metadata to adata.uns finished [0.0003s]\n", + "|-----> πŸŽ‰ Done! [0.6331s]\n" ] } ], @@ -612,7 +612,8 @@ " 'postprocessing': 'mortality_to_phenoage',\n", " 'citation': 'Levine, Morgan E., et al. \"An epigenetic biomarker of aging for lifespan and healthspan.\" Aging (albany NY) 10.4 (2018): 573.',\n", " 'doi': 'https://doi.org/10.18632%2Faging.101414',\n", - " 'notes': 'To check the units for each of the features, please go to the paper in Table 1.'}" + " 'notes': 'To check the units for each of the features, please go to the paper in Table 1.',\n", + " 'implementation_approved_by_author(s)': 'βŒ›'}" ] }, "execution_count": 12, diff --git a/tutorials/tutorial_dnam.ipynb b/tutorials/tutorial_dnam.ipynb index 7e2bb4d..cc7ecad 100644 --- a/tutorials/tutorial_dnam.ipynb +++ b/tutorials/tutorial_dnam.ipynb @@ -84,9 +84,8 @@ "output_type": "stream", "text": [ "|-----> πŸ—οΈ Starting download_example_data function\n", - "|-----------> Downloading data to pyaging_data/GSE139307.pkl\n", - "|-----------> in progress: 100.0000%\n", - "|-----> πŸŽ‰ Done! [22.4135s]\n" + "|-----------> Data found in pyaging_data/GSE139307.pkl\n", + "|-----> πŸŽ‰ Done! [0.0018s]\n" ] } ], @@ -375,7 +374,7 @@ "text": [ "|-----> πŸ—οΈ Starting df_to_adata function\n", "|-----> βš™οΈ Create anndata object started\n", - "|-----> βœ… Create anndata object finished [0.0406s]\n", + "|-----> βœ… Create anndata object finished [0.0465s]\n", "|-----> βš™οΈ Add metadata to anndata started\n", "|-----------> Adding provided metadata to adata.obs\n", "|-----> βœ… Add metadata to anndata finished [0.0007s]\n", @@ -384,13 +383,13 @@ "|-----------> There are 485514 features\n", "|-----------> Total missing values: 526\n", "|-----------> Percentage of missing values: 0.00%\n", - "|-----> βœ… Log data statistics finished [0.0120s]\n", + "|-----> βœ… Log data statistics finished [0.0197s]\n", "|-----> βš™οΈ Impute missing values started\n", "|-----------> Imputing missing values using mean strategy\n", - "|-----> βœ… Impute missing values finished [0.1744s]\n", + "|-----> βœ… Impute missing values finished [0.1813s]\n", "|-----> βš™οΈ Add unstructured data to anndata started\n", "|-----> βœ… Add unstructured data to anndata finished [0.0003s]\n", - "|-----> πŸŽ‰ Done! [0.3132s]\n" + "|-----> πŸŽ‰ Done! [0.3691s]\n" ] } ], @@ -460,89 +459,86 @@ "|-----> πŸ—οΈ Starting predict_age function\n", "|-----> βš™οΈ Set PyTorch device started\n", "|-----------> Using device: cpu\n", - "|-----> βœ… Set PyTorch device finished [0.0007s]\n", + "|-----> βœ… Set PyTorch device finished [0.0016s]\n", "|-----> πŸ•’ Processing clock: Horvath2013\n", "|-----------> βš™οΈ Load clock started\n", - "|-----------------> Downloading data to pyaging_data/horvath2013.pt\n", - "|-----------------> in progress: 100.0000%\n", - "|-----------> βœ… Load clock finished [0.5573s]\n", + "|-----------------> Data found in pyaging_data/horvath2013.pt\n", + "|-----------> βœ… Load clock finished [0.0050s]\n", "|-----------> βš™οΈ Check features in adata started\n", "|-----------------> All features are present in adata.var_names.\n", - "|-----------> βœ… Check features in adata finished [0.0031s]\n", + "|-----------> βœ… Check features in adata finished [0.0025s]\n", "|-----------> βš™οΈ Filter features and extract data matrix started\n", - "|-----------> βœ… Filter features and extract data matrix finished [0.0033s]\n", + "|-----------> βœ… Filter features and extract data matrix finished [0.0035s]\n", "|-----------> βš™οΈ Convert numpy array to tensor started\n", - "|-----------> βœ… Convert numpy array to tensor finished [0.0013s]\n", + "|-----------> βœ… Convert numpy array to tensor finished [0.0022s]\n", "|-----------> βš™οΈ Initialize model started\n", - "|-----------> βœ… Initialize model finished [0.0024s]\n", + "|-----------> βœ… Initialize model finished [0.0019s]\n", "|-----------> βš™οΈ Predict ages with model started\n", - "|-----------> βœ… Predict ages with model finished [0.0012s]\n", + "|-----------> βœ… Predict ages with model finished [0.0010s]\n", "|-----------> βš™οΈ Convert tensor to numpy array started\n", - "|-----------> βœ… Convert tensor to numpy array finished [0.0011s]\n", + "|-----------> βœ… Convert tensor to numpy array finished [0.0013s]\n", "|-----------> βš™οΈ Postprocess data started\n", "|-----------------> Postprocessing data with function anti_log_linear\n", - "|-----------> βœ… Postprocess data finished [0.0015s]\n", + "|-----------> βœ… Postprocess data finished [0.0008s]\n", "|-----------> βš™οΈ Add predicted ages to adata started\n", - "|-----------> βœ… Add predicted ages to adata finished [0.0019s]\n", + "|-----------> βœ… Add predicted ages to adata finished [0.0006s]\n", "|-----------> βš™οΈ Load all clock metadata started\n", "|-----------------> Data found in pyaging_data/all_clock_metadata.pt\n", - "|-----------> βœ… Load all clock metadata finished [0.0028s]\n", + "|-----------> βœ… Load all clock metadata finished [0.0023s]\n", "|-----------> βš™οΈ Add clock metadata to adata.uns started\n", - "|-----------> βœ… Add clock metadata to adata.uns finished [0.0006s]\n", + "|-----------> βœ… Add clock metadata to adata.uns finished [0.0004s]\n", "|-----> πŸ•’ Processing clock: AltumAge\n", "|-----------> βš™οΈ Load clock started\n", - "|-----------------> Downloading data to pyaging_data/altumage.pt\n", - "|-----------------> in progress: 100.0000%\n", - "|-----------> βœ… Load clock finished [1.6789s]\n", + "|-----------------> Data found in pyaging_data/altumage.pt\n", + "|-----------> βœ… Load clock finished [0.0106s]\n", "|-----------> βš™οΈ Check features in adata started\n", "|-----------------> All features are present in adata.var_names.\n", - "|-----------> βœ… Check features in adata finished [0.0207s]\n", + "|-----------> βœ… Check features in adata finished [0.0147s]\n", "|-----------> βš™οΈ Preprocess data started\n", "|-----------------> Preprocessing data with function scale\n", - "|-----------> βœ… Preprocess data finished [0.1290s]\n", + "|-----------> βœ… Preprocess data finished [0.1357s]\n", "|-----------> βš™οΈ Filter features and extract data matrix started\n", - "|-----------> βœ… Filter features and extract data matrix finished [0.0108s]\n", + "|-----------> βœ… Filter features and extract data matrix finished [0.0132s]\n", "|-----------> βš™οΈ Convert numpy array to tensor started\n", - "|-----------> βœ… Convert numpy array to tensor finished [0.0009s]\n", + "|-----------> βœ… Convert numpy array to tensor finished [0.0012s]\n", "|-----------> βš™οΈ Initialize model started\n", - "|-----------> βœ… Initialize model finished [0.0044s]\n", + "|-----------> βœ… Initialize model finished [0.0057s]\n", "|-----------> βš™οΈ Predict ages with model started\n", "|-----------> βœ… Predict ages with model finished [0.0026s]\n", "|-----------> βš™οΈ Convert tensor to numpy array started\n", - "|-----------> βœ… Convert tensor to numpy array finished [0.0006s]\n", + "|-----------> βœ… Convert tensor to numpy array finished [0.0004s]\n", "|-----------> βš™οΈ Add predicted ages to adata started\n", - "|-----------> βœ… Add predicted ages to adata finished [0.0005s]\n", + "|-----------> βœ… Add predicted ages to adata finished [0.0006s]\n", "|-----------> βš™οΈ Load all clock metadata started\n", "|-----------------> Data found in pyaging_data/all_clock_metadata.pt\n", - "|-----------> βœ… Load all clock metadata finished [0.0013s]\n", + "|-----------> βœ… Load all clock metadata finished [0.0012s]\n", "|-----------> βš™οΈ Add clock metadata to adata.uns started\n", - "|-----------> βœ… Add clock metadata to adata.uns finished [0.0003s]\n", + "|-----------> βœ… Add clock metadata to adata.uns finished [0.0005s]\n", "|-----> πŸ•’ Processing clock: PCGrimAge\n", "|-----------> βš™οΈ Load clock started\n", - "|-----------------> Downloading data to pyaging_data/pcgrimage.pt\n", - "|-----------------> in progress: 100.0000%\n", - "|-----------> βœ… Load clock finished [97.5974s]\n", + "|-----------------> Data found in pyaging_data/pcgrimage.pt\n", + "|-----------> βœ… Load clock finished [0.1976s]\n", "|-----------> βš™οΈ Check features in adata started\n", "|-----------------> All features are present in adata.var_names.\n", - "|-----------> βœ… Check features in adata finished [0.0395s]\n", + "|-----------> βœ… Check features in adata finished [0.0413s]\n", "|-----------> βš™οΈ Filter features and extract data matrix started\n", - "|-----------> βœ… Filter features and extract data matrix finished [0.0313s]\n", + "|-----------> βœ… Filter features and extract data matrix finished [0.0363s]\n", "|-----------> βš™οΈ Convert numpy array to tensor started\n", - "|-----------> βœ… Convert numpy array to tensor finished [0.0017s]\n", + "|-----------> βœ… Convert numpy array to tensor finished [0.0015s]\n", "|-----------> βš™οΈ Initialize model started\n", - "|-----------> βœ… Initialize model finished [0.1665s]\n", + "|-----------> βœ… Initialize model finished [0.0429s]\n", "|-----------> βš™οΈ Predict ages with model started\n", - "|-----------> βœ… Predict ages with model finished [0.0887s]\n", + "|-----------> βœ… Predict ages with model finished [0.0638s]\n", "|-----------> βš™οΈ Convert tensor to numpy array started\n", - "|-----------> βœ… Convert tensor to numpy array finished [0.0003s]\n", + "|-----------> βœ… Convert tensor to numpy array finished [0.0004s]\n", "|-----------> βš™οΈ Add predicted ages to adata started\n", "|-----------> βœ… Add predicted ages to adata finished [0.0005s]\n", "|-----------> βš™οΈ Load all clock metadata started\n", "|-----------------> Data found in pyaging_data/all_clock_metadata.pt\n", - "|-----------> βœ… Load all clock metadata finished [0.0014s]\n", + "|-----------> βœ… Load all clock metadata finished [0.0013s]\n", "|-----------> βš™οΈ Add clock metadata to adata.uns started\n", - "|-----------> βœ… Add clock metadata to adata.uns finished [0.0002s]\n", - "|-----> πŸŽ‰ Done! [100.3793s]\n" + "|-----------> βœ… Add clock metadata to adata.uns finished [0.0003s]\n", + "|-----> πŸŽ‰ Done! [0.7939s]\n" ] } ], @@ -843,7 +839,8 @@ " 'postprocessing': 'anti_log_linear',\n", " 'citation': 'Horvath, Steve. \"DNA methylation age of human tissues and cell types.\" Genome biology 14.10 (2013): 1-20.',\n", " 'doi': 'https://doi.org/10.1186/gb-2013-14-10-r115',\n", - " 'notes': None}" + " 'notes': None,\n", + " 'implementation_approved_by_author(s)': 'βŒ›'}" ] }, "execution_count": 13, @@ -867,6 +864,7 @@ "{'species': 'Homo sapiens',\n", " 'data_type': 'methylation',\n", " 'year': 2022,\n", + " 'implementation_approved_by_author(s)': 'βœ…',\n", " 'preprocessing': 'scale',\n", " 'postprocessing': None,\n", " 'citation': 'de Lima Camillo, Lucas Paulo, Louis R. Lapierre, and Ritambhara Singh. \"A pan-tissue DNA-methylation epigenetic clock based on deep learning.\" npj Aging 8.1 (2022): 4.',\n", @@ -899,7 +897,8 @@ " 'postprocessing': None,\n", " 'citation': 'Higgins-Chen, Albert T., et al. \"A computational solution for bolstering reliability of epigenetic clocks: Implications for clinical trials and longitudinal tracking.\" Nature aging 2.7 (2022): 644-661.',\n", " 'doi': 'https://doi.org/10.1038/s43587-022-00248-2',\n", - " 'notes': None}" + " 'notes': None,\n", + " 'implementation_approved_by_author(s)': 'βŒ›'}" ] }, "execution_count": 15, @@ -911,6 +910,727 @@ "adata.uns['pcgrimage_metadata']" ] }, + { + "cell_type": "markdown", + "id": "49080a8a-1957-4eee-ac38-2ae495f100bd", + "metadata": {}, + "source": [ + "## Mus musculus" + ] + }, + { + "cell_type": "markdown", + "id": "5e620ab9-4837-4a7a-83f1-726be9c9f7bf", + "metadata": {}, + "source": [ + "### Download and load example data" + ] + }, + { + "cell_type": "markdown", + "id": "5ff79235-46fb-4c59-a629-1f479f9f13a3", + "metadata": {}, + "source": [ + "Let's download the publicly available dataset GSE130735 with RRBS samples from mouse. Given it is RRBS, there are millions of CpG sites." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "148f7ae7-8a5d-4fd2-a159-e3b8e576bbce", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "|-----> πŸ—οΈ Starting download_example_data function\n", + "|-----------> Data found in pyaging_data/GSE130735_subset.pkl\n", + "|-----> πŸŽ‰ Done! [0.0015s]\n" + ] + } + ], + "source": [ + "pya.data.download_example_data('GSE130735')" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "2a6fcd90-8ded-40d5-a606-e32e21816ebf", + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_pickle('pyaging_data/GSE130735_subset.pkl')" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "c4d8245b-8d04-4ae0-945d-3aed4956a3bb", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
chr1:3020814chr1:3020842chr1:3020877chr1:3020891chr1:3020945chr1:3020971chr1:3020987chr1:3021012chr1:3037802chr1:3037820...chrY:1825397chrY:4682362chrY:32122892chrY:85867071chrY:85867083chrY:85867117chrY:85867137chrY:85867139chrY:85867178chrY:88224179
GSM37526310.6090.250.4080.1890.0680.3730.5710.2520.3330.158...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
GSM3752625NaNNaN0.9730.9840.9120.9150.9870.9740.9910.932...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
GSM3752634NaNNaN0.5260.1310.0000.0380.4690.7690.7720.146...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
GSM37526200.9310.920.9880.9490.8970.9210.9070.9581.0000.867...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
GSM3752622NaNNaN0.2050.3820.0910.1320.1740.2270.1080.053...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
\n", + "

5 rows Γ— 1778324 columns

\n", + "
" + ], + "text/plain": [ + " chr1:3020814 chr1:3020842 chr1:3020877 chr1:3020891 \\\n", + "GSM3752631 0.609 0.25 0.408 0.189 \n", + "GSM3752625 NaN NaN 0.973 0.984 \n", + "GSM3752634 NaN NaN 0.526 0.131 \n", + "GSM3752620 0.931 0.92 0.988 0.949 \n", + "GSM3752622 NaN NaN 0.205 0.382 \n", + "\n", + " chr1:3020945 chr1:3020971 chr1:3020987 chr1:3021012 \\\n", + "GSM3752631 0.068 0.373 0.571 0.252 \n", + "GSM3752625 0.912 0.915 0.987 0.974 \n", + "GSM3752634 0.000 0.038 0.469 0.769 \n", + "GSM3752620 0.897 0.921 0.907 0.958 \n", + "GSM3752622 0.091 0.132 0.174 0.227 \n", + "\n", + " chr1:3037802 chr1:3037820 ... chrY:1825397 chrY:4682362 \\\n", + "GSM3752631 0.333 0.158 ... NaN NaN \n", + "GSM3752625 0.991 0.932 ... NaN NaN \n", + "GSM3752634 0.772 0.146 ... NaN NaN \n", + "GSM3752620 1.000 0.867 ... NaN NaN \n", + "GSM3752622 0.108 0.053 ... NaN NaN \n", + "\n", + " chrY:32122892 chrY:85867071 chrY:85867083 chrY:85867117 \\\n", + "GSM3752631 NaN NaN NaN NaN \n", + "GSM3752625 NaN NaN NaN NaN \n", + "GSM3752634 NaN NaN NaN NaN \n", + "GSM3752620 NaN NaN NaN NaN \n", + "GSM3752622 NaN NaN NaN NaN \n", + "\n", + " chrY:85867137 chrY:85867139 chrY:85867178 chrY:88224179 \n", + "GSM3752631 NaN NaN NaN NaN \n", + "GSM3752625 NaN NaN NaN NaN \n", + "GSM3752634 NaN NaN NaN NaN \n", + "GSM3752620 NaN NaN NaN NaN \n", + "GSM3752622 NaN NaN NaN NaN \n", + "\n", + "[5 rows x 1778324 columns]" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "markdown", + "id": "ea7c44d7-73c3-4cd7-844d-bab34aa2dcee", + "metadata": {}, + "source": [ + "### Convert data to AnnData object" + ] + }, + { + "cell_type": "markdown", + "id": "04f2758f-fb8c-4a52-983a-29ec826dba6c", + "metadata": {}, + "source": [ + "AnnData objects are highly flexible and are thus our preferred method of organizing data for age prediction." + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "c261c2dc-8245-47d7-82d8-7731f7c94f1e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "|-----> πŸ—οΈ Starting df_to_adata function\n", + "|-----> βš™οΈ Create anndata object started\n", + "|-----> βœ… Create anndata object finished [0.0218s]\n", + "|-----> βš™οΈ Add metadata to anndata started\n", + "|-----------? No metadata provided. Leaving adata.obs empty\n", + "|-----> ⚠️ Add metadata to anndata finished [0.0008s]\n", + "|-----> βš™οΈ Log data statistics started\n", + "|-----------> There are 14 observations\n", + "|-----------> There are 1778324 features\n", + "|-----------> Total missing values: 6322346\n", + "|-----------> Percentage of missing values: 25.39%\n", + "|-----> βœ… Log data statistics finished [0.0214s]\n", + "|-----> βš™οΈ Impute missing values started\n", + "|-----------> Imputing missing values using mean strategy\n", + "|-----> βœ… Impute missing values finished [0.3441s]\n", + "|-----> βš™οΈ Add unstructured data to anndata started\n", + "|-----> βœ… Add unstructured data to anndata finished [0.0003s]\n", + "|-----> πŸŽ‰ Done! [0.3931s]\n" + ] + } + ], + "source": [ + "adata = pya.pp.df_to_adata(df, imputer_strategy='mean')" + ] + }, + { + "cell_type": "markdown", + "id": "7349164c-f28b-4222-bf41-6f80d8b79c3b", + "metadata": {}, + "source": [ + "This is what the `adata` object looks like:" + ] + }, + { + "cell_type": "markdown", + "id": "4b5ff1ef-e724-407a-b6d4-9907558f21ba", + "metadata": {}, + "source": [ + "### Predict age" + ] + }, + { + "cell_type": "markdown", + "id": "eb197ded-91dd-4319-8dbb-a635d09c8367", + "metadata": {}, + "source": [ + "We can either predict one clock at once or all at the same time. For convenience, let's simply input all four available mammalian clocks at once. The function is invariant to the capitalization of the clock name." + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "29ebd120-1c4a-4847-a3a3-5be7ffc5f730", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "|-----> πŸ—οΈ Starting predict_age function\n", + "|-----> βš™οΈ Set PyTorch device started\n", + "|-----------> Using device: cpu\n", + "|-----> βœ… Set PyTorch device finished [0.0028s]\n", + "|-----> πŸ•’ Processing clock: ThompsonMultiTissue\n", + "|-----------> βš™οΈ Load clock started\n", + "|-----------------> Data found in pyaging_data/thompsonmultitissue.pt\n", + "|-----------> βœ… Load clock finished [0.0050s]\n", + "|-----------> βš™οΈ Check features in adata started\n", + "|-----------------? 1 out of 582 features (0.17%) are missing and will be added with default value 0: ['chr4:91376687'], etc.\n", + "|-----------------> Expanded adata with 1 missing features.\n", + "|-----------> ⚠️ Check features in adata finished [0.3974s]\n", + "|-----------> βš™οΈ Filter features and extract data matrix started\n", + "|-----------> βœ… Filter features and extract data matrix finished [0.0175s]\n", + "|-----------> βš™οΈ Convert numpy array to tensor started\n", + "|-----------> βœ… Convert numpy array to tensor finished [0.0009s]\n", + "|-----------> βš™οΈ Initialize model started\n", + "|-----------> βœ… Initialize model finished [0.0019s]\n", + "|-----------> βš™οΈ Predict ages with model started\n", + "|-----------> βœ… Predict ages with model finished [0.0013s]\n", + "|-----------> βš™οΈ Convert tensor to numpy array started\n", + "|-----------> βœ… Convert tensor to numpy array finished [0.0007s]\n", + "|-----------> βš™οΈ Add predicted ages to adata started\n", + "|-----------> βœ… Add predicted ages to adata finished [0.0007s]\n", + "|-----------> βš™οΈ Load all clock metadata started\n", + "|-----------------> Data found in pyaging_data/all_clock_metadata.pt\n", + "|-----------> βœ… Load all clock metadata finished [0.0028s]\n", + "|-----------> βš™οΈ Add clock metadata to adata.uns started\n", + "|-----------> βœ… Add clock metadata to adata.uns finished [0.0007s]\n", + "|-----> πŸ•’ Processing clock: MeerMultiTissue\n", + "|-----------> βš™οΈ Load clock started\n", + "|-----------------> Data found in pyaging_data/meermultitissue.pt\n", + "|-----------> βœ… Load clock finished [0.0019s]\n", + "|-----------> βš™οΈ Check features in adata started\n", + "|-----------------? 225 out of 435 features (51.72%) are missing and will be added with default value 0: ['chr10:111559529', 'chr10:115250413', 'chr10:127620127'], etc.\n", + "|-----------------> Expanded adata with 225 missing features.\n", + "|-----------> ⚠️ Check features in adata finished [0.4286s]\n", + "|-----------> βš™οΈ Preprocess data started\n", + "|-----------------> Preprocessing data with function times100\n", + "|-----------> βœ… Preprocess data finished [0.0724s]\n", + "|-----------> βš™οΈ Filter features and extract data matrix started\n", + "|-----------> βœ… Filter features and extract data matrix finished [0.0105s]\n", + "|-----------> βš™οΈ Convert numpy array to tensor started\n", + "|-----------> βœ… Convert numpy array to tensor finished [0.0015s]\n", + "|-----------> βš™οΈ Initialize model started\n", + "|-----------> βœ… Initialize model finished [0.0014s]\n", + "|-----------> βš™οΈ Predict ages with model started\n", + "|-----------> βœ… Predict ages with model finished [0.0012s]\n", + "|-----------> βš™οΈ Convert tensor to numpy array started\n", + "|-----------> βœ… Convert tensor to numpy array finished [0.0016s]\n", + "|-----------> βš™οΈ Add predicted ages to adata started\n", + "|-----------> βœ… Add predicted ages to adata finished [0.0007s]\n", + "|-----------> βš™οΈ Load all clock metadata started\n", + "|-----------------> Data found in pyaging_data/all_clock_metadata.pt\n", + "|-----------> βœ… Load all clock metadata finished [0.0016s]\n", + "|-----------> βš™οΈ Add clock metadata to adata.uns started\n", + "|-----------> βœ… Add clock metadata to adata.uns finished [0.0005s]\n", + "|-----> πŸŽ‰ Done! [1.1162s]\n" + ] + } + ], + "source": [ + "adata = pya.pred.predict_age(adata, ['ThompsonMultiTissue', 'MeerMultiTissue',])" + ] + }, + { + "cell_type": "markdown", + "id": "dfce0265-b647-42ae-8693-0e3c05d480d3", + "metadata": {}, + "source": [ + "Note that the Meer clock predicts age in days rather than months." + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "4bb259c5-2cba-4dc1-b123-2387a5bb7749", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
thompsonmultitissuemeermultitissue
GSM375263119.634113223.113083
GSM3752625-1.4104670.860733
GSM375263461.058792650.326477
GSM3752620-2.66381149.164429
GSM375262220.594114231.560425
\n", + "
" + ], + "text/plain": [ + " thompsonmultitissue meermultitissue\n", + "GSM3752631 19.634113 223.113083\n", + "GSM3752625 -1.410467 0.860733\n", + "GSM3752634 61.058792 650.326477\n", + "GSM3752620 -2.663811 49.164429\n", + "GSM3752622 20.594114 231.560425" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "adata.obs.head()" + ] + }, + { + "cell_type": "markdown", + "id": "8519affc-ffc8-4904-ad7a-bd6a6d6458cf", + "metadata": {}, + "source": [ + "Having so much information printed can be overwhelming, particularly when running several clocks at once. In such cases, just set verbose to False." + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "18b44cfa-36d5-49c9-badf-7ba9e189bbc0", + "metadata": {}, + "outputs": [], + "source": [ + "pya.data.download_example_data('GSE130735', verbose=False)\n", + "df = pd.read_pickle('pyaging_data/GSE130735_subset.pkl')\n", + "adata = pya.preprocess.df_to_adata(df, imputer_strategy='mean', verbose=False)\n", + "adata = pya.pred.predict_age(adata, ['ThompsonMultiTissue', 'MeerMultiTissue',], verbose=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "a2520978-b693-474f-88cf-91bcde1a5d95", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
thompsonmultitissuemeermultitissue
GSM375263119.634113223.113083
GSM3752625-1.4104670.860733
GSM375263461.058792650.326477
GSM3752620-2.66381149.164429
GSM375262220.594114231.560425
\n", + "
" + ], + "text/plain": [ + " thompsonmultitissue meermultitissue\n", + "GSM3752631 19.634113 223.113083\n", + "GSM3752625 -1.410467 0.860733\n", + "GSM3752634 61.058792 650.326477\n", + "GSM3752620 -2.663811 49.164429\n", + "GSM3752622 20.594114 231.560425" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "adata.obs.head()" + ] + }, + { + "cell_type": "markdown", + "id": "33119798-f1b3-4c4c-9f18-e4e4b7ca21e8", + "metadata": {}, + "source": [ + "After age prediction, the clocks are added to `adata.obs`. Moreover, the percent of missing values for each clock and other metadata are included in `adata.uns`." + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "a4585982-32bf-49c3-93ca-85ed26af4199", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "AnnData object with n_obs Γ— n_vars = 14 Γ— 1778550\n", + " obs: 'thompsonmultitissue', 'meermultitissue'\n", + " uns: 'meermultitissue_metadata', 'thompsonmultitissue_percent_na', 'thompsonmultitissue_metadata', 'meermultitissue_percent_na'\n", + " layers: 'X_original', 'X_imputed', 'X_times100'" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "adata" + ] + }, + { + "cell_type": "markdown", + "id": "615f8fbf-f5e1-4af9-a2a0-5f4f781001fe", + "metadata": {}, + "source": [ + "### Get citation" + ] + }, + { + "cell_type": "markdown", + "id": "0fe55edd-9271-4b41-857d-ef3fceafc2a6", + "metadata": {}, + "source": [ + "The doi, citation, and some metadata are automatically added to the AnnData object under `adata.uns[CLOCKNAME_metadata]`." + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "44fa2b51-7c04-4806-843f-06a0b385c0ec", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'species': 'Mus musculus',\n", + " 'data_type': 'methylation',\n", + " 'year': 2018,\n", + " 'implementation_approved_by_author(s)': 'βŒ›',\n", + " 'preprocessing': None,\n", + " 'postprocessing': None,\n", + " 'citation': 'Thompson, Michael J., et al. \"A multi-tissue full lifespan epigenetic clock for mice.\" Aging (Albany NY) 10.10 (2018): 2832.',\n", + " 'doi': 'https://doi.org/10.18632/aging.101590',\n", + " 'notes': None}" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "adata.uns['thompsonmultitissue_metadata']" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "cdf4c609-7a24-4c3f-a891-647315b77d54", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'species': 'Mus musculus',\n", + " 'data_type': 'methylation',\n", + " 'year': 2018,\n", + " 'implementation_approved_by_author(s)': 'βŒ›',\n", + " 'preprocessing': 'times100',\n", + " 'postprocessing': None,\n", + " 'citation': 'Meer, Margarita V., et al. \"A whole lifespan mouse multi-tissue DNA methylation clock.\" Elife 7 (2018): e40675.',\n", + " 'doi': 'https://doi.org/10.7554/eLife.40675',\n", + " 'notes': 'Predicts age in days'}" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "adata.uns['meermultitissue_metadata']" + ] + }, { "cell_type": "markdown", "id": "21b470d4-eef3-4825-9899-fcb0068c9c1c", @@ -937,7 +1657,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 29, "id": "5d07743e-516c-4cac-a733-a05c2ed55d53", "metadata": {}, "outputs": [ @@ -948,7 +1668,7 @@ "|-----> πŸ—οΈ Starting download_example_data function\n", "|-----------> Downloading data to pyaging_data/GSE223748_subset.pkl\n", "|-----------> in progress: 100.0000%\n", - "|-----> πŸŽ‰ Done! [5.1092s]\n" + "|-----> πŸŽ‰ Done! [4.6242s]\n" ] } ], @@ -958,7 +1678,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 30, "id": "6e929219-e691-4171-911e-46143ae94898", "metadata": {}, "outputs": [], @@ -968,7 +1688,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 31, "id": "03141fc7-f175-4ad3-86ed-22d51db5cadd", "metadata": {}, "outputs": [ @@ -1202,7 +1922,7 @@ "[5 rows x 37554 columns]" ] }, - "execution_count": 18, + "execution_count": 31, "metadata": {}, "output_type": "execute_result" } @@ -1229,7 +1949,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 32, "id": "6437e2df-a1f5-4d66-949c-e0fc706b574b", "metadata": {}, "outputs": [ @@ -1239,22 +1959,22 @@ "text": [ "|-----> πŸ—οΈ Starting df_to_adata function\n", "|-----> βš™οΈ Create anndata object started\n", - "|-----> βœ… Create anndata object finished [0.0019s]\n", + "|-----> βœ… Create anndata object finished [0.0013s]\n", "|-----> βš™οΈ Add metadata to anndata started\n", "|-----------? No metadata provided. Leaving adata.obs empty\n", - "|-----> ⚠️ Add metadata to anndata finished [0.0007s]\n", + "|-----> ⚠️ Add metadata to anndata finished [0.0005s]\n", "|-----> βš™οΈ Log data statistics started\n", "|-----------> There are 100 observations\n", "|-----------> There are 37554 features\n", "|-----------> Total missing values: 0\n", "|-----------> Percentage of missing values: 0.00%\n", - "|-----> βœ… Log data statistics finished [0.0058s]\n", + "|-----> βœ… Log data statistics finished [0.0047s]\n", "|-----> βš™οΈ Impute missing values started\n", "|-----------> No missing values found. No imputation necessary\n", - "|-----> βœ… Impute missing values finished [0.0067s]\n", + "|-----> βœ… Impute missing values finished [0.0075s]\n", "|-----> βš™οΈ Add unstructured data to anndata started\n", - "|-----> βœ… Add unstructured data to anndata finished [0.0004s]\n", - "|-----> πŸŽ‰ Done! [0.0190s]\n" + "|-----> βœ… Add unstructured data to anndata finished [0.0008s]\n", + "|-----> πŸŽ‰ Done! [0.0188s]\n" ] } ], @@ -1272,7 +1992,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 33, "id": "e26459d3-276d-4cda-b3bf-a4147c397667", "metadata": {}, "outputs": [ @@ -1285,7 +2005,7 @@ " layers: 'X_original'" ] }, - "execution_count": 20, + "execution_count": 33, "metadata": {}, "output_type": "execute_result" } @@ -1312,7 +2032,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 34, "id": "1e55a74c-4bbf-41a6-bbe3-0faf295ef33c", "metadata": {}, "outputs": [ @@ -1323,117 +2043,117 @@ "|-----> πŸ—οΈ Starting predict_age function\n", "|-----> βš™οΈ Set PyTorch device started\n", "|-----------> Using device: cpu\n", - "|-----> βœ… Set PyTorch device finished [0.0012s]\n", + "|-----> βœ… Set PyTorch device finished [0.0009s]\n", "|-----> πŸ•’ Processing clock: Mammalian1\n", "|-----------> βš™οΈ Load clock started\n", "|-----------------> Downloading data to pyaging_data/mammalian1.pt\n", "|-----------------> in progress: 100.0000%\n", - "|-----------> βœ… Load clock finished [0.4849s]\n", + "|-----------> βœ… Load clock finished [0.4437s]\n", "|-----------> βš™οΈ Check features in adata started\n", "|-----------------> All features are present in adata.var_names.\n", - "|-----------> βœ… Check features in adata finished [0.0018s]\n", + "|-----------> βœ… Check features in adata finished [0.0015s]\n", "|-----------> βš™οΈ Filter features and extract data matrix started\n", - "|-----------> βœ… Filter features and extract data matrix finished [0.0033s]\n", + "|-----------> βœ… Filter features and extract data matrix finished [0.0043s]\n", "|-----------> βš™οΈ Convert numpy array to tensor started\n", - "|-----------> βœ… Convert numpy array to tensor finished [0.0008s]\n", + "|-----------> βœ… Convert numpy array to tensor finished [0.0009s]\n", "|-----------> βš™οΈ Initialize model started\n", - "|-----------> βœ… Initialize model finished [0.0024s]\n", + "|-----------> βœ… Initialize model finished [0.0017s]\n", "|-----------> βš™οΈ Predict ages with model started\n", - "|-----------> βœ… Predict ages with model finished [0.0011s]\n", + "|-----------> βœ… Predict ages with model finished [0.0012s]\n", "|-----------> βš™οΈ Convert tensor to numpy array started\n", - "|-----------> βœ… Convert tensor to numpy array finished [0.0012s]\n", + "|-----------> βœ… Convert tensor to numpy array finished [0.0010s]\n", "|-----------> βš™οΈ Postprocess data started\n", "|-----------------> Postprocessing data with function anti_logp2\n", - "|-----------> βœ… Postprocess data finished [0.0012s]\n", + "|-----------> βœ… Postprocess data finished [0.0011s]\n", "|-----------> βš™οΈ Add predicted ages to adata started\n", - "|-----------> βœ… Add predicted ages to adata finished [0.0011s]\n", + "|-----------> βœ… Add predicted ages to adata finished [0.0012s]\n", "|-----------> βš™οΈ Load all clock metadata started\n", "|-----------------> Data found in pyaging_data/all_clock_metadata.pt\n", - "|-----------> βœ… Load all clock metadata finished [0.0031s]\n", + "|-----------> βœ… Load all clock metadata finished [0.0041s]\n", "|-----------> βš™οΈ Add clock metadata to adata.uns started\n", - "|-----------> βœ… Add clock metadata to adata.uns finished [0.0008s]\n", + "|-----------> βœ… Add clock metadata to adata.uns finished [0.0007s]\n", "|-----> πŸ•’ Processing clock: Mammalian2\n", "|-----------> βš™οΈ Load clock started\n", "|-----------------> Downloading data to pyaging_data/mammalian2.pt\n", "|-----------------> in progress: 100.0000%\n", - "|-----------> βœ… Load clock finished [0.6213s]\n", + "|-----------> βœ… Load clock finished [0.6232s]\n", "|-----------> βš™οΈ Check features in adata started\n", "|-----------------> All features are present in adata.var_names.\n", - "|-----------> βœ… Check features in adata finished [0.0024s]\n", + "|-----------> βœ… Check features in adata finished [0.0029s]\n", "|-----------> βš™οΈ Filter features and extract data matrix started\n", - "|-----------> βœ… Filter features and extract data matrix finished [0.0039s]\n", + "|-----------> βœ… Filter features and extract data matrix finished [0.0042s]\n", "|-----------> βš™οΈ Convert numpy array to tensor started\n", - "|-----------> βœ… Convert numpy array to tensor finished [0.0007s]\n", + "|-----------> βœ… Convert numpy array to tensor finished [0.0010s]\n", "|-----------> βš™οΈ Initialize model started\n", - "|-----------> βœ… Initialize model finished [0.0017s]\n", + "|-----------> βœ… Initialize model finished [0.0011s]\n", "|-----------> βš™οΈ Predict ages with model started\n", - "|-----------> βœ… Predict ages with model finished [0.0012s]\n", + "|-----------> βœ… Predict ages with model finished [0.0009s]\n", "|-----------> βš™οΈ Convert tensor to numpy array started\n", - "|-----------> βœ… Convert tensor to numpy array finished [0.0010s]\n", + "|-----------> βœ… Convert tensor to numpy array finished [0.0008s]\n", "|-----------> βš™οΈ Postprocess data started\n", "|-----------------> Postprocessing data with function anti_log_log\n", - "|-----------> βœ… Postprocess data finished [0.0013s]\n", + "|-----------> βœ… Postprocess data finished [0.0016s]\n", "|-----------> βš™οΈ Add predicted ages to adata started\n", - "|-----------> βœ… Add predicted ages to adata finished [0.0011s]\n", + "|-----------> βœ… Add predicted ages to adata finished [0.0016s]\n", "|-----------> βš™οΈ Load all clock metadata started\n", "|-----------------> Data found in pyaging_data/all_clock_metadata.pt\n", - "|-----------> βœ… Load all clock metadata finished [0.0021s]\n", + "|-----------> βœ… Load all clock metadata finished [0.0019s]\n", "|-----------> βš™οΈ Add clock metadata to adata.uns started\n", "|-----------> βœ… Add clock metadata to adata.uns finished [0.0006s]\n", "|-----> πŸ•’ Processing clock: Mammalian3\n", "|-----------> βš™οΈ Load clock started\n", "|-----------------> Downloading data to pyaging_data/mammalian3.pt\n", "|-----------------> in progress: 100.0000%\n", - "|-----------> βœ… Load clock finished [0.5057s]\n", + "|-----------> βœ… Load clock finished [0.5402s]\n", "|-----------> βš™οΈ Check features in adata started\n", "|-----------------> All features are present in adata.var_names.\n", - "|-----------> βœ… Check features in adata finished [0.0037s]\n", + "|-----------> βœ… Check features in adata finished [0.0028s]\n", "|-----------> βš™οΈ Filter features and extract data matrix started\n", - "|-----------> βœ… Filter features and extract data matrix finished [0.0057s]\n", + "|-----------> βœ… Filter features and extract data matrix finished [0.0038s]\n", "|-----------> βš™οΈ Convert numpy array to tensor started\n", - "|-----------> βœ… Convert numpy array to tensor finished [0.0026s]\n", + "|-----------> βœ… Convert numpy array to tensor finished [0.0006s]\n", "|-----------> βš™οΈ Initialize model started\n", - "|-----------> βœ… Initialize model finished [0.0049s]\n", + "|-----------> βœ… Initialize model finished [0.0009s]\n", "|-----------> βš™οΈ Predict ages with model started\n", - "|-----------> βœ… Predict ages with model finished [0.0023s]\n", + "|-----------> βœ… Predict ages with model finished [0.0006s]\n", "|-----------> βš™οΈ Convert tensor to numpy array started\n", - "|-----------> βœ… Convert tensor to numpy array finished [0.0010s]\n", + "|-----------> βœ… Convert tensor to numpy array finished [0.0003s]\n", "|-----------> βš™οΈ Add predicted ages to adata started\n", "|-----------> βœ… Add predicted ages to adata finished [0.0006s]\n", "|-----------> βš™οΈ Load all clock metadata started\n", "|-----------------> Data found in pyaging_data/all_clock_metadata.pt\n", - "|-----------> βœ… Load all clock metadata finished [0.0019s]\n", + "|-----------> βœ… Load all clock metadata finished [0.0011s]\n", "|-----------> βš™οΈ Add clock metadata to adata.uns started\n", - "|-----------> βœ… Add clock metadata to adata.uns finished [0.0008s]\n", + "|-----------> βœ… Add clock metadata to adata.uns finished [0.0012s]\n", "|-----> πŸ•’ Processing clock: MammalianLifespan\n", "|-----------> βš™οΈ Load clock started\n", "|-----------------> Downloading data to pyaging_data/mammalianlifespan.pt\n", "|-----------------> in progress: 100.0000%\n", - "|-----------> βœ… Load clock finished [0.4236s]\n", + "|-----------> βœ… Load clock finished [0.4707s]\n", "|-----------> βš™οΈ Check features in adata started\n", "|-----------------> All features are present in adata.var_names.\n", - "|-----------> βœ… Check features in adata finished [0.0019s]\n", + "|-----------> βœ… Check features in adata finished [0.0015s]\n", "|-----------> βš™οΈ Filter features and extract data matrix started\n", - "|-----------> βœ… Filter features and extract data matrix finished [0.0030s]\n", + "|-----------> βœ… Filter features and extract data matrix finished [0.0021s]\n", "|-----------> βš™οΈ Convert numpy array to tensor started\n", "|-----------> βœ… Convert numpy array to tensor finished [0.0008s]\n", "|-----------> βš™οΈ Initialize model started\n", - "|-----------> βœ… Initialize model finished [0.0015s]\n", + "|-----------> βœ… Initialize model finished [0.0010s]\n", "|-----------> βš™οΈ Predict ages with model started\n", - "|-----------> βœ… Predict ages with model finished [0.0007s]\n", + "|-----------> βœ… Predict ages with model finished [0.0011s]\n", "|-----------> βš™οΈ Convert tensor to numpy array started\n", "|-----------> βœ… Convert tensor to numpy array finished [0.0011s]\n", "|-----------> βš™οΈ Postprocess data started\n", "|-----------------> Postprocessing data with function anti_log\n", - "|-----------> βœ… Postprocess data finished [0.0017s]\n", + "|-----------> βœ… Postprocess data finished [0.0013s]\n", "|-----------> βš™οΈ Add predicted ages to adata started\n", - "|-----------> βœ… Add predicted ages to adata finished [0.0012s]\n", + "|-----------> βœ… Add predicted ages to adata finished [0.0016s]\n", "|-----------> βš™οΈ Load all clock metadata started\n", "|-----------------> Data found in pyaging_data/all_clock_metadata.pt\n", - "|-----------> βœ… Load all clock metadata finished [0.0022s]\n", + "|-----------> βœ… Load all clock metadata finished [0.0016s]\n", "|-----------> βš™οΈ Add clock metadata to adata.uns started\n", - "|-----------> βœ… Add clock metadata to adata.uns finished [0.0006s]\n", - "|-----> πŸŽ‰ Done! [2.1646s]\n" + "|-----------> βœ… Add clock metadata to adata.uns finished [0.0004s]\n", + "|-----> πŸŽ‰ Done! [2.4322s]\n" ] } ], @@ -1451,7 +2171,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 35, "id": "98fbdf4c-57c2-4885-bc4a-96b4771a638b", "metadata": {}, "outputs": [ @@ -1531,7 +2251,7 @@ "203531420070_R05C02 10.371320 0.142493 -0.559288 68.409359" ] }, - "execution_count": 22, + "execution_count": 35, "metadata": {}, "output_type": "execute_result" } @@ -1550,7 +2270,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 36, "id": "64035819-8dd0-4917-96ed-55c36ef34a66", "metadata": {}, "outputs": [], @@ -1563,7 +2283,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 37, "id": "9414fe4c-a83a-4ac2-9dbb-a2a9d333af06", "metadata": {}, "outputs": [ @@ -1643,7 +2363,7 @@ "203531420070_R05C02 10.371320 0.142493 -0.559288 68.409359" ] }, - "execution_count": 24, + "execution_count": 37, "metadata": {}, "output_type": "execute_result" } @@ -1662,7 +2382,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 38, "id": "0d069189-1750-42f6-89d9-73039dd07a00", "metadata": {}, "outputs": [ @@ -1676,7 +2396,7 @@ " layers: 'X_original'" ] }, - "execution_count": 25, + "execution_count": 38, "metadata": {}, "output_type": "execute_result" } @@ -1703,7 +2423,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 39, "id": "300b40ad-68e7-49b3-a7f8-66c64436c80f", "metadata": {}, "outputs": [ @@ -1717,10 +2437,11 @@ " 'postprocessing': 'anti_logp2',\n", " 'citation': 'Lu, A. T., et al. \"Universal DNA methylation age across mammalian tissues.\" Nature aging 3.9 (2023): 1144-1166.',\n", " 'doi': 'https://doi.org/10.1038/s43587-023-00462-6',\n", - " 'notes': 'This is the DNAm age predictor from the paper in which there is no adjustment for species'}" + " 'notes': 'This is the DNAm age predictor from the paper in which there is no adjustment for species',\n", + " 'implementation_approved_by_author(s)': 'βŒ›'}" ] }, - "execution_count": 26, + "execution_count": 39, "metadata": {}, "output_type": "execute_result" } diff --git a/tutorials/tutorial_histonemarkchipseq.ipynb b/tutorials/tutorial_histonemarkchipseq.ipynb index 88ecb4d..1aaf544 100644 --- a/tutorials/tutorial_histonemarkchipseq.ipynb +++ b/tutorials/tutorial_histonemarkchipseq.ipynb @@ -61,7 +61,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "7b3988dd-3e39-4e13-8ad8-a06447137c52", "metadata": {}, "outputs": [ @@ -71,8 +71,7 @@ "text": [ "|-----> πŸ—οΈ Starting download_example_data function\n", "|-----------> Downloading data to pyaging_data/ENCFF386QWG.bigWig\n", - "|-----------> in progress: 100.0000%\n", - "|-----> πŸŽ‰ Done! [178.9496s]\n" + "|-----------> in progress: 79.0189%" ] } ], @@ -90,186 +89,20 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "e56f4080-e968-44cb-8e31-4bd27092f4b2", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "|-----> πŸ—οΈ Starting bigwig_to_df function\n", - "|-----> βš™οΈ Load Ensembl genome metadata started\n", - "|-----------> Downloading data to pyaging_data/Ensembl-105-EnsDb-for-Homo-sapiens-genes.csv\n", - "|-----------> in progress: 100.0000%\n", - "|-----> βœ… Load Ensembl genome metadata finished [3.0124s]\n", - "|-----> βš™οΈ Processing bigWig files started\n", - "|-----------> Processing file: pyaging_data/ENCFF386QWG.bigWig\n", - "|-----------> in progress: 100.0000%\n", - "|-----------> Processing file: pyaging_data/ENCFF386QWG.bigWig\n", - "|-----------> in progress: 100.0000%\n", - "|-----> βœ… Processing bigWig files finished [16.4875s]\n", - "|-----> πŸŽ‰ Done! [36.6192s]\n" - ] - } - ], + "outputs": [], "source": [ "df = pya.pp.bigwig_to_df(['pyaging_data/ENCFF386QWG.bigWig', 'pyaging_data/ENCFF386QWG.bigWig'])" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "769858ac-9d6d-43f8-9c53-0f4a88c5484c", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ENSG00000223972ENSG00000227232ENSG00000278267ENSG00000243485ENSG00000284332ENSG00000237613ENSG00000268020ENSG00000240361ENSG00000186092ENSG00000238009...ENSG00000237801ENSG00000237040ENSG00000124333ENSG00000228410ENSG00000223484ENSG00000124334ENSG00000270726ENSG00000185203ENSG00000182484ENSG00000227159
pyaging_data/ENCFF386QWG.bigWig0.0286160.0304150.0277830.0286160.0286160.0286160.0441710.0364740.0307840.03181...0.0344350.0068221.4131190.0294240.1400050.0497860.0692960.3321260.0285960.028616
pyaging_data/ENCFF386QWG.bigWig0.0286160.0304150.0277830.0286160.0286160.0286160.0441710.0364740.0307840.03181...0.0344350.0068221.4131190.0294240.1400050.0497860.0692960.3321260.0285960.028616
\n", - "

2 rows Γ— 62241 columns

\n", - "
" - ], - "text/plain": [ - " ENSG00000223972 ENSG00000227232 \\\n", - "pyaging_data/ENCFF386QWG.bigWig 0.028616 0.030415 \n", - "pyaging_data/ENCFF386QWG.bigWig 0.028616 0.030415 \n", - "\n", - " ENSG00000278267 ENSG00000243485 \\\n", - "pyaging_data/ENCFF386QWG.bigWig 0.027783 0.028616 \n", - "pyaging_data/ENCFF386QWG.bigWig 0.027783 0.028616 \n", - "\n", - " ENSG00000284332 ENSG00000237613 \\\n", - "pyaging_data/ENCFF386QWG.bigWig 0.028616 0.028616 \n", - "pyaging_data/ENCFF386QWG.bigWig 0.028616 0.028616 \n", - "\n", - " ENSG00000268020 ENSG00000240361 \\\n", - "pyaging_data/ENCFF386QWG.bigWig 0.044171 0.036474 \n", - "pyaging_data/ENCFF386QWG.bigWig 0.044171 0.036474 \n", - "\n", - " ENSG00000186092 ENSG00000238009 ... \\\n", - "pyaging_data/ENCFF386QWG.bigWig 0.030784 0.03181 ... \n", - "pyaging_data/ENCFF386QWG.bigWig 0.030784 0.03181 ... \n", - "\n", - " ENSG00000237801 ENSG00000237040 \\\n", - "pyaging_data/ENCFF386QWG.bigWig 0.034435 0.006822 \n", - "pyaging_data/ENCFF386QWG.bigWig 0.034435 0.006822 \n", - "\n", - " ENSG00000124333 ENSG00000228410 \\\n", - "pyaging_data/ENCFF386QWG.bigWig 1.413119 0.029424 \n", - "pyaging_data/ENCFF386QWG.bigWig 1.413119 0.029424 \n", - "\n", - " ENSG00000223484 ENSG00000124334 \\\n", - "pyaging_data/ENCFF386QWG.bigWig 0.140005 0.049786 \n", - "pyaging_data/ENCFF386QWG.bigWig 0.140005 0.049786 \n", - "\n", - " ENSG00000270726 ENSG00000185203 \\\n", - "pyaging_data/ENCFF386QWG.bigWig 0.069296 0.332126 \n", - "pyaging_data/ENCFF386QWG.bigWig 0.069296 0.332126 \n", - "\n", - " ENSG00000182484 ENSG00000227159 \n", - "pyaging_data/ENCFF386QWG.bigWig 0.028596 0.028616 \n", - "pyaging_data/ENCFF386QWG.bigWig 0.028596 0.028616 \n", - "\n", - "[2 rows x 62241 columns]" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df.head()" ] @@ -292,43 +125,10 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "c501ed4c-f711-44be-bda4-669fbbae88f1", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "|-----> πŸ—οΈ Starting df_to_adata function\n", - "|-----> βš™οΈ Create anndata object started\n", - "|-----> βœ… Create anndata object finished [0.0166s]\n", - "|-----> βš™οΈ Add metadata to anndata started\n", - "|-----------? No metadata provided. Leaving adata.obs empty\n", - "|-----> ⚠️ Add metadata to anndata finished [0.0005s]\n", - "|-----> βš™οΈ Log data statistics started\n", - "|-----------> There are 2 observations\n", - "|-----------> There are 62241 features\n", - "|-----------> Total missing values: 0\n", - "|-----------> Percentage of missing values: 0.00%\n", - "|-----> βœ… Log data statistics finished [0.0025s]\n", - "|-----> βš™οΈ Impute missing values started\n", - "|-----------> No missing values found. No imputation necessary\n", - "|-----> βœ… Impute missing values finished [0.0026s]\n", - "|-----> βš™οΈ Add unstructured data to anndata started\n", - "|-----> βœ… Add unstructured data to anndata finished [0.0003s]\n", - "|-----> πŸŽ‰ Done! [0.0258s]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/lucascamillo/mambaforge/envs/brain/lib/python3.9/site-packages/anndata/_core/anndata.py:1897: UserWarning: Observation names are not unique. To make them unique, call `.obs_names_make_unique`.\n", - " utils.warn_names_duplicates(\"obs\")\n" - ] - } - ], + "outputs": [], "source": [ "adata = pya.preprocess.df_to_adata(df)" ] @@ -343,24 +143,10 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "id": "641a61a6-46fc-4d47-b176-eb39524ce94f", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "AnnData object with n_obs Γ— n_vars = 2 Γ— 62241\n", - " var: 'percent_na'\n", - " uns: 'imputer_strategy', 'data_type'\n", - " layers: 'X_original'" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "adata" ] @@ -383,161 +169,20 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "id": "13e7d357-c6d5-474b-b13f-30ec5edc0d19", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "|-----> πŸ—οΈ Starting predict_age function\n", - "|-----> βš™οΈ Set PyTorch device started\n", - "|-----------> Using device: cpu\n", - "|-----> βœ… Set PyTorch device finished [0.0027s]\n", - "|-----> πŸ•’ Processing clock: H3K4me3\n", - "|-----------> βš™οΈ Load clock started\n", - "|-----------------> Downloading data to pyaging_data/h3k4me3.pt\n", - "|-----------------> in progress: 100.0000%\n", - "|-----------> βœ… Load clock finished [1.4404s]\n", - "|-----------> βš™οΈ Check features in adata started\n", - "|-----------------> All features are present in adata.var_names.\n", - "|-----------> βœ… Check features in adata finished [0.0019s]\n", - "|-----------> βš™οΈ Filter features and extract data matrix started\n", - "|-----------> βœ… Filter features and extract data matrix finished [0.0040s]\n", - "|-----------> βš™οΈ Convert numpy array to tensor started\n", - "|-----------> βœ… Convert numpy array to tensor finished [0.0012s]\n", - "|-----------> βš™οΈ Initialize model started\n", - "|-----------> βœ… Initialize model finished [0.0157s]\n", - "|-----------> βš™οΈ Predict ages with model started\n", - "|-----------> βœ… Predict ages with model finished [0.0026s]\n", - "|-----------> βš™οΈ Convert tensor to numpy array started\n", - "|-----------> βœ… Convert tensor to numpy array finished [0.0004s]\n", - "|-----------> βš™οΈ Add predicted ages to adata started\n", - "|-----------> βœ… Add predicted ages to adata finished [0.0006s]\n", - "|-----------> βš™οΈ Load all clock metadata started\n", - "|-----------------> Data found in pyaging_data/all_clock_metadata.pt\n", - "|-----------> βœ… Load all clock metadata finished [0.0024s]\n", - "|-----------> βš™οΈ Add clock metadata to adata.uns started\n", - "|-----------> βœ… Add clock metadata to adata.uns finished [0.0003s]\n", - "|-----> πŸ•’ Processing clock: H3K9me3\n", - "|-----------> βš™οΈ Load clock started\n", - "|-----------------> Downloading data to pyaging_data/h3k9me3.pt\n", - "|-----------------> in progress: 100.0000%\n", - "|-----------> βœ… Load clock finished [0.8886s]\n", - "|-----------> βš™οΈ Check features in adata started\n", - "|-----------------> All features are present in adata.var_names.\n", - "|-----------> βœ… Check features in adata finished [0.0008s]\n", - "|-----------> βš™οΈ Filter features and extract data matrix started\n", - "|-----------> βœ… Filter features and extract data matrix finished [0.0013s]\n", - "|-----------> βš™οΈ Convert numpy array to tensor started\n", - "|-----------> βœ… Convert numpy array to tensor finished [0.0004s]\n", - "|-----------> βš™οΈ Initialize model started\n", - "|-----------> βœ… Initialize model finished [0.0013s]\n", - "|-----------> βš™οΈ Predict ages with model started\n", - "|-----------> βœ… Predict ages with model finished [0.0006s]\n", - "|-----------> βš™οΈ Convert tensor to numpy array started\n", - "|-----------> βœ… Convert tensor to numpy array finished [0.0004s]\n", - "|-----------> βš™οΈ Add predicted ages to adata started\n", - "|-----------> βœ… Add predicted ages to adata finished [0.0007s]\n", - "|-----------> βš™οΈ Load all clock metadata started\n", - "|-----------------> Data found in pyaging_data/all_clock_metadata.pt\n", - "|-----------> βœ… Load all clock metadata finished [0.0014s]\n", - "|-----------> βš™οΈ Add clock metadata to adata.uns started\n", - "|-----------> βœ… Add clock metadata to adata.uns finished [0.0005s]\n", - "|-----> πŸ•’ Processing clock: PanHistone\n", - "|-----------> βš™οΈ Load clock started\n", - "|-----------------> Downloading data to pyaging_data/panhistone.pt\n", - "|-----------------> in progress: 100.0000%\n", - "|-----------> βœ… Load clock finished [7.0801s]\n", - "|-----------> βš™οΈ Check features in adata started\n", - "|-----------------> All features are present in adata.var_names.\n", - "|-----------> βœ… Check features in adata finished [0.0142s]\n", - "|-----------> βš™οΈ Filter features and extract data matrix started\n", - "|-----------> βœ… Filter features and extract data matrix finished [0.0071s]\n", - "|-----------> βš™οΈ Convert numpy array to tensor started\n", - "|-----------> βœ… Convert numpy array to tensor finished [0.0004s]\n", - "|-----------> βš™οΈ Initialize model started\n", - "|-----------> βœ… Initialize model finished [0.0202s]\n", - "|-----------> βš™οΈ Predict ages with model started\n", - "|-----------> βœ… Predict ages with model finished [0.0083s]\n", - "|-----------> βš™οΈ Convert tensor to numpy array started\n", - "|-----------> βœ… Convert tensor to numpy array finished [0.0005s]\n", - "|-----------> βš™οΈ Add predicted ages to adata started\n", - "|-----------> βœ… Add predicted ages to adata finished [0.0017s]\n", - "|-----------> βš™οΈ Load all clock metadata started\n", - "|-----------------> Data found in pyaging_data/all_clock_metadata.pt\n", - "|-----------> βœ… Load all clock metadata finished [0.0041s]\n", - "|-----------> βš™οΈ Add clock metadata to adata.uns started\n", - "|-----------> βœ… Add clock metadata to adata.uns finished [0.0009s]\n", - "|-----> πŸŽ‰ Done! [9.5315s]\n" - ] - } - ], + "outputs": [], "source": [ "adata = pya.pred.predict_age(adata, ['H3K4me3', 'H3K9me3', 'PanHistone'])" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "id": "f64fb182-937b-4f67-b58e-5fffb0e2fad0", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
h3k4me3h3k9me3panhistone
pyaging_data/ENCFF386QWG.bigWig53.99856644.32288754.021847
pyaging_data/ENCFF386QWG.bigWig53.99856644.32288754.021847
\n", - "
" - ], - "text/plain": [ - " h3k4me3 h3k9me3 panhistone\n", - "pyaging_data/ENCFF386QWG.bigWig 53.998566 44.322887 54.021847\n", - "pyaging_data/ENCFF386QWG.bigWig 53.998566 44.322887 54.021847" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "adata.obs.head()" ] @@ -552,19 +197,10 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "id": "e8dd3457-8983-41a4-aaab-41563b91a866", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/lucascamillo/mambaforge/envs/brain/lib/python3.9/site-packages/anndata/_core/anndata.py:1897: UserWarning: Observation names are not unique. To make them unique, call `.obs_names_make_unique`.\n", - " utils.warn_names_duplicates(\"obs\")\n" - ] - } - ], + "outputs": [], "source": [ "pya.data.download_example_data('ENCFF386QWG', verbose=False)\n", "df = pya.pp.bigwig_to_df(['pyaging_data/ENCFF386QWG.bigWig', 'pyaging_data/ENCFF386QWG.bigWig'], verbose=False)\n", @@ -574,64 +210,10 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "id": "8192ab67-a1cc-4728-8ca0-f81a56940fbf", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
h3k4me3h3k9me3panhistone
pyaging_data/ENCFF386QWG.bigWig53.99856644.32288754.021847
pyaging_data/ENCFF386QWG.bigWig53.99856644.32288754.021847
\n", - "
" - ], - "text/plain": [ - " h3k4me3 h3k9me3 panhistone\n", - "pyaging_data/ENCFF386QWG.bigWig 53.998566 44.322887 54.021847\n", - "pyaging_data/ENCFF386QWG.bigWig 53.998566 44.322887 54.021847" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "adata.obs.head()" ] @@ -646,25 +228,10 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "id": "a4b22bf1-116f-456f-82d2-58b300f863f1", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "AnnData object with n_obs Γ— n_vars = 2 Γ— 62241\n", - " obs: 'h3k4me3', 'h3k9me3', 'panhistone'\n", - " var: 'percent_na'\n", - " uns: 'imputer_strategy', 'data_type', 'h3k4me3_percent_na', 'h3k4me3_metadata', 'h3k9me3_percent_na', 'h3k9me3_metadata', 'panhistone_percent_na', 'panhistone_metadata'\n", - " layers: 'X_original'" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "adata" ] @@ -687,28 +254,10 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "id": "2946393e-a199-46ba-a9dd-80bc8fa88787", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'species': 'Homo sapiens',\n", - " 'data_type': 'histone_mark',\n", - " 'year': 2023,\n", - " 'preprocessing': None,\n", - " 'postprocessing': None,\n", - " 'citation': 'de Lima Camillo, Lucas Paulo, et al. \"Histone mark age of human tissues and cells.\" bioRxiv (2023): 2023-08.',\n", - " 'doi': 'https://doi.org/10.1101/2023.08.21.554165',\n", - " 'notes': 'This is still a preprint, so the model might change'}" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "adata.uns['h3k4me3_metadata']" ] diff --git a/tutorials/tutorial_rnaseq.ipynb b/tutorials/tutorial_rnaseq.ipynb index 3f5be3c..bd420da 100644 --- a/tutorials/tutorial_rnaseq.ipynb +++ b/tutorials/tutorial_rnaseq.ipynb @@ -70,9 +70,8 @@ "output_type": "stream", "text": [ "|-----> πŸ—οΈ Starting download_example_data function\n", - "|-----------> Downloading data to pyaging_data/GSE65765_CPM.pkl\n", - "|-----------> in progress: 100.0000%\n", - "|-----> πŸŽ‰ Done! [1.3594s]\n" + "|-----------> Data found in pyaging_data/GSE65765_CPM.pkl\n", + "|-----> πŸŽ‰ Done! [0.0008s]\n" ] } ], @@ -319,22 +318,22 @@ "text": [ "|-----> πŸ—οΈ Starting df_to_adata function\n", "|-----> βš™οΈ Create anndata object started\n", - "|-----> βœ… Create anndata object finished [0.0026s]\n", + "|-----> βœ… Create anndata object finished [0.0028s]\n", "|-----> βš™οΈ Add metadata to anndata started\n", "|-----------? No metadata provided. Leaving adata.obs empty\n", - "|-----> ⚠️ Add metadata to anndata finished [0.0005s]\n", + "|-----> ⚠️ Add metadata to anndata finished [0.0004s]\n", "|-----> βš™οΈ Log data statistics started\n", "|-----------> There are 4 observations\n", "|-----------> There are 46755 features\n", "|-----------> Total missing values: 0\n", "|-----------> Percentage of missing values: 0.00%\n", - "|-----> βœ… Log data statistics finished [0.0013s]\n", + "|-----> βœ… Log data statistics finished [0.0021s]\n", "|-----> βš™οΈ Impute missing values started\n", "|-----------> No missing values found. No imputation necessary\n", - "|-----> βœ… Impute missing values finished [0.0017s]\n", + "|-----> βœ… Impute missing values finished [0.0013s]\n", "|-----> βš™οΈ Add unstructured data to anndata started\n", - "|-----> βœ… Add unstructured data to anndata finished [0.0003s]\n", - "|-----> πŸŽ‰ Done! [0.0096s]\n" + "|-----> βœ… Add unstructured data to anndata finished [0.0002s]\n", + "|-----> πŸŽ‰ Done! [0.0098s]\n" ] } ], @@ -403,37 +402,36 @@ "|-----> πŸ—οΈ Starting predict_age function\n", "|-----> βš™οΈ Set PyTorch device started\n", "|-----------> Using device: cpu\n", - "|-----> βœ… Set PyTorch device finished [0.0005s]\n", + "|-----> βœ… Set PyTorch device finished [0.0007s]\n", "|-----> πŸ•’ Processing clock: BiTAge\n", "|-----------> βš™οΈ Load clock started\n", "|-----------------> Downloading data to pyaging_data/bitage.pt\n", "|-----------------> in progress: 100.0000%\n", - "|-----------> βœ… Load clock finished [0.4104s]\n", + "|-----------> βœ… Load clock finished [0.5873s]\n", "|-----------> βš™οΈ Check features in adata started\n", "|-----------------> All features are present in adata.var_names.\n", - "|-----------> βœ… Check features in adata finished [0.0008s]\n", + "|-----------> βœ… Check features in adata finished [0.0025s]\n", "|-----------> βš™οΈ Preprocess data started\n", "|-----------------> Preprocessing data with function binarize\n", - "|-----------> βœ… Preprocess data finished [0.0028s]\n", + "|-----------> βœ… Preprocess data finished [0.0077s]\n", "|-----------> βš™οΈ Filter features and extract data matrix started\n", - "|-----------> βœ… Filter features and extract data matrix finished [0.0014s]\n", + "|-----------> βœ… Filter features and extract data matrix finished [0.0035s]\n", "|-----------> βš™οΈ Convert numpy array to tensor started\n", "|-----------> βœ… Convert numpy array to tensor finished [0.0007s]\n", "|-----------> βš™οΈ Initialize model started\n", - "|-----------> βœ… Initialize model finished [0.0017s]\n", + "|-----------> βœ… Initialize model finished [0.0024s]\n", "|-----------> βš™οΈ Predict ages with model started\n", - "|-----------> βœ… Predict ages with model finished [0.0008s]\n", + "|-----------> βœ… Predict ages with model finished [0.0010s]\n", "|-----------> βš™οΈ Convert tensor to numpy array started\n", - "|-----------> βœ… Convert tensor to numpy array finished [0.0007s]\n", + "|-----------> βœ… Convert tensor to numpy array finished [0.0006s]\n", "|-----------> βš™οΈ Add predicted ages to adata started\n", - "|-----------> βœ… Add predicted ages to adata finished [0.0007s]\n", + "|-----------> βœ… Add predicted ages to adata finished [0.0009s]\n", "|-----------> βš™οΈ Load all clock metadata started\n", - "|-----------------> Downloading data to pyaging_data/all_clock_metadata.pt\n", - "|-----------------> in progress: 100.0000%\n", - "|-----------> βœ… Load all clock metadata finished [0.5127s]\n", + "|-----------------> Data found in pyaging_data/all_clock_metadata.pt\n", + "|-----------> βœ… Load all clock metadata finished [0.0024s]\n", "|-----------> βš™οΈ Add clock metadata to adata.uns started\n", - "|-----------> βœ… Add clock metadata to adata.uns finished [0.0003s]\n", - "|-----> πŸŽ‰ Done! [0.9400s]\n" + "|-----------> βœ… Add clock metadata to adata.uns finished [0.0009s]\n", + "|-----> πŸŽ‰ Done! [0.6801s]\n" ] } ], @@ -663,7 +661,8 @@ " 'postprocessing': None,\n", " 'citation': 'Meyer, David H., and BjΓΆrn Schumacher. \"BiT age: A transcriptome‐based aging clock near the theoretical limit of accuracy.\" Aging cell 20.3 (2021): e13320.',\n", " 'doi': 'https://doi.org/10.1111/acel.13320',\n", - " 'notes': None}" + " 'notes': None,\n", + " 'implementation_approved_by_author(s)': 'βŒ›'}" ] }, "execution_count": 12,