diff --git a/README.md b/README.md
index 5589419..c5018b1 100644
--- a/README.md
+++ b/README.md
@@ -25,8 +25,9 @@ With a growing number of aging clocks and biomarkers of aging, comparing and ana
## π To-Do List
+- [ ] Incorporate more murine DNA methylation clocks
- [ ] Integrate scAge and scRNAseq clocks (and datasets)
-- [ ] Incorporate murine DNA methylation and proteomic clocks (and datasets)
+- [ ] Incorporate proteomic clocks (and datasets)
## β Can't find an aging clock?
diff --git a/clocks/notebooks/bitage.ipynb b/clocks/notebooks/bitage.ipynb
index f2d39c5..5dba314 100644
--- a/clocks/notebooks/bitage.ipynb
+++ b/clocks/notebooks/bitage.ipynb
@@ -108,7 +108,7 @@
" 'species': 'C elegans',\n",
" 'data_type': 'transcriptomics',\n",
" 'year': 2021,\n",
- " 'implementation_approved_by_author(s)': 'β',\n",
+ " 'implementation_approved_by_author(s)': 'β
',\n",
" 'preprocessing': weights_dict['preprocessing'], \n",
" 'postprocessing': weights_dict['postprocessing'], \n",
" 'citation': \"Meyer, David H., and BjΓΆrn Schumacher. \\\"BiT age: A transcriptomeβbased aging clock near the theoretical limit of accuracy.\\\" Aging cell 20.3 (2021): e13320.\",\n",
diff --git a/clocks/notebooks/join_metadata.ipynb b/clocks/notebooks/join_metadata.ipynb
index df3362f..c6e7ce7 100644
--- a/clocks/notebooks/join_metadata.ipynb
+++ b/clocks/notebooks/join_metadata.ipynb
@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
- "execution_count": 14,
+ "execution_count": 7,
"id": "59eb29df-0597-4d45-b2e6-8825670effe2",
"metadata": {},
"outputs": [],
@@ -40,6 +40,14 @@
"\n",
"torch.save(combined_dictionary, '../metadata/all_clock_metadata.pt')"
]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "047fee95-c914-4c3b-872b-c108696caa1b",
+ "metadata": {},
+ "outputs": [],
+ "source": []
}
],
"metadata": {
diff --git a/clocks/notebooks/meermultitissue.ipynb b/clocks/notebooks/meermultitissue.ipynb
new file mode 100644
index 0000000..b6c64f3
--- /dev/null
+++ b/clocks/notebooks/meermultitissue.ipynb
@@ -0,0 +1,197 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "fb157849-5454-4a60-8548-fff633fff764",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import torch\n",
+ "import pandas as pd\n",
+ "import pyaging as pya\n",
+ "import os"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "46c6fc26-9a6b-4027-bd01-601b70eb401a",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "os.system(\"curl -o coefficients.xlsx https://elifesciences.org/download/aHR0cHM6Ly9jZG4uZWxpZmVzY2llbmNlcy5vcmcvYXJ0aWNsZXMvNDA2NzUvZWxpZmUtNDA2NzUtc3VwcDMtdjIueGxzeA--/elife-40675-supp3-v2.xlsx?_hash=qzOMc4yUFACfDFG%2FlgxkFTHWt%2BSXSmP9zz1BM3oOTRM%3D\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "bf89303a-6e7f-4585-a439-655fe0a79b05",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# You have to manually open Excel and convert to .csv (multi tissue, whole lifespan)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "id": "b9f484b1-f501-41b7-9565-82e03bfe97dc",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df = pd.read_csv('coefficients.csv')\n",
+ "\n",
+ "intercept = df['Position'].iloc[-1]\n",
+ "\n",
+ "df = df[0:-2]\n",
+ "\n",
+ "df['feature'] = df['Chromosome'].astype(str) + ':' + df['Position'].astype(int).astype(str)\n",
+ "df['coefficient'] = df['Weight']"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "id": "a284fe99-dc47-4f0c-b2ff-274e136e7020",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "features = df['feature'].tolist()\n",
+ "\n",
+ "weights = torch.tensor(df['coefficient'].tolist()).unsqueeze(0)\n",
+ "intercept = torch.tensor([intercept])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 23,
+ "id": "7b4c3f6b-72af-4e99-84c4-65b8ef58c91d",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "LinearModel(\n",
+ " (linear): Linear(in_features=435, out_features=1, bias=True)\n",
+ ")"
+ ]
+ },
+ "execution_count": 23,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "model = pya.models.LinearModel(len(features))\n",
+ "\n",
+ "model.linear.weight.data = weights\n",
+ "model.linear.bias.data = intercept\n",
+ "\n",
+ "model"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 30,
+ "id": "e32706f0-ce07-455e-bb17-1993c1c0e152",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "weights_dict = {\n",
+ " 'preprocessing': \"times100\", \n",
+ " 'preprocessing_helper': None,\n",
+ " 'postprocessing': None,\n",
+ " 'postprocessing_helper': None,\n",
+ " 'features': features,\n",
+ " 'weight_dict': model.state_dict(),\n",
+ "}\n",
+ "\n",
+ "metadata_dict = {\n",
+ " 'species': 'Mus musculus',\n",
+ " 'data_type': 'methylation',\n",
+ " 'year': 2018,\n",
+ " 'implementation_approved_by_author(s)': 'β',\n",
+ " 'preprocessing': weights_dict['preprocessing'], \n",
+ " 'postprocessing': weights_dict['postprocessing'], \n",
+ " 'citation': \"Meer, Margarita V., et al. \\\"A whole lifespan mouse multi-tissue DNA methylation clock.\\\" Elife 7 (2018): e40675.\",\n",
+ " 'doi': \"https://doi.org/10.7554/eLife.40675\",\n",
+ " \"notes\": \"Predicts age in days\",\n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 31,
+ "id": "34136f3c-92b8-4641-a103-381d3a7dd857",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "torch.save(weights_dict, '../weights/meermultitissue.pt')\n",
+ "torch.save(metadata_dict, '../metadata/meermultitissue.pt')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 27,
+ "id": "303e9b76-993f-4691-af9d-1151b3c7638f",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0"
+ ]
+ },
+ "execution_count": 27,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "os.system(\"rm coefficients.xlsx\")\n",
+ "os.system(\"rm coefficients.csv\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "6f111912-501e-4d2c-a592-1cc9829092dd",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.17"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/clocks/notebooks/thompsonmultitissue.ipynb b/clocks/notebooks/thompsonmultitissue.ipynb
new file mode 100644
index 0000000..24c80b5
--- /dev/null
+++ b/clocks/notebooks/thompsonmultitissue.ipynb
@@ -0,0 +1,186 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "fb157849-5454-4a60-8548-fff633fff764",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import torch\n",
+ "import pandas as pd\n",
+ "import pyaging as pya\n",
+ "import os"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "46c6fc26-9a6b-4027-bd01-601b70eb401a",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "os.system(\"git clone https://github.com/kerepesi/MouseAgingClocks.git\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "b9f484b1-f501-41b7-9565-82e03bfe97dc",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df = pd.read_table('MouseAgingClocks/ClockData/Thompson2018-ElasticNet_aging_clock.txt', skiprows=1)\n",
+ "\n",
+ "intercept = df['Coefficient'].iloc[0]\n",
+ "\n",
+ "df = df[1:]\n",
+ "\n",
+ "df['feature'] = df['Chromosome'].astype(str) + ':' + df['Coordinate'].astype(int).astype(str)\n",
+ "df['coefficient'] = df['Coefficient']"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "a284fe99-dc47-4f0c-b2ff-274e136e7020",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "features = df['feature'].tolist()\n",
+ "\n",
+ "weights = torch.tensor(df['coefficient'].tolist()).unsqueeze(0)\n",
+ "intercept = torch.tensor([intercept])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "7b4c3f6b-72af-4e99-84c4-65b8ef58c91d",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "LinearModel(\n",
+ " (linear): Linear(in_features=582, out_features=1, bias=True)\n",
+ ")"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "model = pya.models.LinearModel(len(features))\n",
+ "\n",
+ "model.linear.weight.data = weights\n",
+ "model.linear.bias.data = intercept\n",
+ "\n",
+ "model"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "e32706f0-ce07-455e-bb17-1993c1c0e152",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "weights_dict = {\n",
+ " 'preprocessing': None, \n",
+ " 'preprocessing_helper': None,\n",
+ " 'postprocessing': None,\n",
+ " 'postprocessing_helper': None,\n",
+ " 'features': features,\n",
+ " 'weight_dict': model.state_dict(),\n",
+ "}\n",
+ "\n",
+ "metadata_dict = {\n",
+ " 'species': 'Mus musculus',\n",
+ " 'data_type': 'methylation',\n",
+ " 'year': 2018,\n",
+ " 'implementation_approved_by_author(s)': 'β',\n",
+ " 'preprocessing': weights_dict['preprocessing'], \n",
+ " 'postprocessing': weights_dict['postprocessing'], \n",
+ " 'citation': \"Thompson, Michael J., et al. \\\"A multi-tissue full lifespan epigenetic clock for mice.\\\" Aging (Albany NY) 10.10 (2018): 2832.\",\n",
+ " 'doi': \"https://doi.org/10.18632/aging.101590\",\n",
+ " \"notes\": None,\n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "id": "34136f3c-92b8-4641-a103-381d3a7dd857",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "torch.save(weights_dict, '../weights/thompsonmultitissue.pt')\n",
+ "torch.save(metadata_dict, '../metadata/thompsonmultitissue.pt')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "303e9b76-993f-4691-af9d-1151b3c7638f",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "os.system(\"rm -r MouseAgingClocks\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "6f111912-501e-4d2c-a592-1cc9829092dd",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.17"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/pyaging/data/_data.py b/pyaging/data/_data.py
index 7d1e64a..dedf2d8 100644
--- a/pyaging/data/_data.py
+++ b/pyaging/data/_data.py
@@ -22,8 +22,8 @@ def download_example_data(
Parameters
----------
data_type : str
- The type of data to download. Valid options are 'GSE139307' (human methylation),
- 'GSE223748' (mammalian methylation), 'ENCFF386QWG' (histone mark),
+ The type of data to download. Valid options are 'GSE139307' (human methylation), 'GSE130735' (mouse
+ methylation), 'GSE223748' (mammalian methylation), 'ENCFF386QWG' (histone mark),
'GSE65765' (C. elegans RNA-seq), 'GSE193140' (ATAC-Seq), 'blood_chemistry_example' (blood chemistry).
dir : str
@@ -59,6 +59,7 @@ def download_example_data(
logger.first_info("Starting download_example_data function")
data_type_to_url = {
+ "GSE130735": "https://pyaging.s3.amazonaws.com/example_data/GSE130735_subset.pkl",
"GSE193140": "https://pyaging.s3.amazonaws.com/example_data/GSE193140.pkl",
"GSE139307": "https://pyaging.s3.amazonaws.com/example_data/GSE139307.pkl",
"GSE223748": "https://pyaging.s3.amazonaws.com/example_data/GSE223748_subset.pkl",
diff --git a/pyaging/predict/_pred_utils.py b/pyaging/predict/_pred_utils.py
index 186cbaa..9d49436 100644
--- a/pyaging/predict/_pred_utils.py
+++ b/pyaging/predict/_pred_utils.py
@@ -295,6 +295,8 @@ def initialize_model(
"leecpc",
"leerpc",
"leerefinedrpc",
+ "meermultitissue",
+ "thompsonmultitissue",
]:
model = LinearModel(len(features))
elif clock_name in [
@@ -412,6 +414,10 @@ def preprocess_data(
X = adata.X
X = np.log1p(X)
adata.X = X
+ elif preprocessing == "times100":
+ X = adata.X
+ X = X * 100
+ adata.X = X
elif preprocessing == "tpm_norm_log1p":
X = adata.X
X = tpm_norm_log1p(X, preprocessing_helper)
diff --git a/pyaging/preprocess/_preprocess.py b/pyaging/preprocess/_preprocess.py
index 70744d0..0aef775 100644
--- a/pyaging/preprocess/_preprocess.py
+++ b/pyaging/preprocess/_preprocess.py
@@ -133,7 +133,7 @@ def df_to_adata(
imputer_strategy : str, optional
The strategy for imputing missing values in 'df'. Supported strategies include 'mean',
- 'median', 'constant', and 'knn'. Defaults to 'knn'.
+ 'median', 'constant' (0 values), and 'knn'. Defaults to 'knn'.
verbose: bool
Whether to log the output to console with the logger. Defaults to True.
diff --git a/tutorials/tutorial_atacseq.ipynb b/tutorials/tutorial_atacseq.ipynb
index e2fff99..7a12196 100644
--- a/tutorials/tutorial_atacseq.ipynb
+++ b/tutorials/tutorial_atacseq.ipynb
@@ -79,7 +79,7 @@
"text": [
"|-----> ποΈ Starting download_example_data function\n",
"|-----------> Data found in pyaging_data/GSE193140.pkl\n",
- "|-----> π Done! [0.0007s]\n"
+ "|-----> π Done! [0.0021s]\n"
]
}
],
@@ -370,22 +370,22 @@
"text": [
"|-----> ποΈ Starting df_to_adata function\n",
"|-----> βοΈ Create anndata object started\n",
- "|-----> β
Create anndata object finished [0.0031s]\n",
+ "|-----> β
Create anndata object finished [0.0025s]\n",
"|-----> βοΈ Add metadata to anndata started\n",
"|-----------? No metadata provided. Leaving adata.obs empty\n",
- "|-----> β οΈ Add metadata to anndata finished [0.0010s]\n",
+ "|-----> β οΈ Add metadata to anndata finished [0.0005s]\n",
"|-----> βοΈ Log data statistics started\n",
"|-----------> There are 157 observations\n",
"|-----------> There are 80400 features\n",
"|-----------> Total missing values: 0\n",
"|-----------> Percentage of missing values: 0.00%\n",
- "|-----> β
Log data statistics finished [0.0054s]\n",
+ "|-----> β
Log data statistics finished [0.0063s]\n",
"|-----> βοΈ Impute missing values started\n",
"|-----------> No missing values found. No imputation necessary\n",
- "|-----> β
Impute missing values finished [0.0070s]\n",
+ "|-----> β
Impute missing values finished [0.0053s]\n",
"|-----> βοΈ Add unstructured data to anndata started\n",
"|-----> β
Add unstructured data to anndata finished [0.0003s]\n",
- "|-----> π Done! [0.0197s]\n"
+ "|-----> π Done! [0.0175s]\n"
]
}
],
@@ -454,62 +454,64 @@
"|-----> ποΈ Starting predict_age function\n",
"|-----> βοΈ Set PyTorch device started\n",
"|-----------> Using device: cpu\n",
- "|-----> β
Set PyTorch device finished [0.0007s]\n",
+ "|-----> β
Set PyTorch device finished [0.0022s]\n",
"|-----> π Processing clock: OcampoATAC1\n",
"|-----------> βοΈ Load clock started\n",
- "|-----------------> Data found in pyaging_data/ocampoatac1.pt\n",
- "|-----------> β
Load clock finished [0.0061s]\n",
+ "|-----------------> Downloading data to pyaging_data/ocampoatac1.pt\n",
+ "|-----------------> in progress: 100.0000%\n",
+ "|-----------> β
Load clock finished [1.0218s]\n",
"|-----------> βοΈ Check features in adata started\n",
"|-----------------> All features are present in adata.var_names.\n",
- "|-----------> β
Check features in adata finished [0.0005s]\n",
+ "|-----------> β
Check features in adata finished [0.0010s]\n",
"|-----------> βοΈ Preprocess data started\n",
"|-----------------> Preprocessing data with function tpm_norm_log1p\n",
- "|-----------> β
Preprocess data finished [0.1565s]\n",
+ "|-----------> β
Preprocess data finished [0.2021s]\n",
"|-----------> βοΈ Filter features and extract data matrix started\n",
- "|-----------> β
Filter features and extract data matrix finished [0.0023s]\n",
+ "|-----------> β
Filter features and extract data matrix finished [0.0025s]\n",
"|-----------> βοΈ Convert numpy array to tensor started\n",
- "|-----------> β
Convert numpy array to tensor finished [0.0007s]\n",
+ "|-----------> β
Convert numpy array to tensor finished [0.0008s]\n",
"|-----------> βοΈ Initialize model started\n",
- "|-----------> β
Initialize model finished [0.0010s]\n",
+ "|-----------> β
Initialize model finished [0.0021s]\n",
"|-----------> βοΈ Predict ages with model started\n",
- "|-----------> β
Predict ages with model finished [0.0007s]\n",
+ "|-----------> β
Predict ages with model finished [0.0005s]\n",
"|-----------> βοΈ Convert tensor to numpy array started\n",
"|-----------> β
Convert tensor to numpy array finished [0.0006s]\n",
"|-----------> βοΈ Add predicted ages to adata started\n",
- "|-----------> β
Add predicted ages to adata finished [0.0012s]\n",
+ "|-----------> β
Add predicted ages to adata finished [0.0009s]\n",
"|-----------> βοΈ Load all clock metadata started\n",
"|-----------------> Data found in pyaging_data/all_clock_metadata.pt\n",
- "|-----------> β
Load all clock metadata finished [0.0012s]\n",
+ "|-----------> β
Load all clock metadata finished [0.0033s]\n",
"|-----------> βοΈ Add clock metadata to adata.uns started\n",
- "|-----------> β
Add clock metadata to adata.uns finished [0.0005s]\n",
+ "|-----------> β
Add clock metadata to adata.uns finished [0.0003s]\n",
"|-----> π Processing clock: OcampoATAC2\n",
"|-----------> βοΈ Load clock started\n",
- "|-----------------> Data found in pyaging_data/ocampoatac2.pt\n",
- "|-----------> β
Load clock finished [0.0090s]\n",
+ "|-----------------> Downloading data to pyaging_data/ocampoatac2.pt\n",
+ "|-----------------> in progress: 100.0000%\n",
+ "|-----------> β
Load clock finished [0.9181s]\n",
"|-----------> βοΈ Check features in adata started\n",
"|-----------------> All features are present in adata.var_names.\n",
- "|-----------> β
Check features in adata finished [0.0010s]\n",
+ "|-----------> β
Check features in adata finished [0.0031s]\n",
"|-----------> βοΈ Preprocess data started\n",
"|-----------------> Layer with tpm_norm_log1p preprocessing is already in adata\n",
- "|-----------> β
Preprocess data finished [0.0008s]\n",
+ "|-----------> β
Preprocess data finished [0.0041s]\n",
"|-----------> βοΈ Filter features and extract data matrix started\n",
- "|-----------> β
Filter features and extract data matrix finished [0.0019s]\n",
+ "|-----------> β
Filter features and extract data matrix finished [0.0017s]\n",
"|-----------> βοΈ Convert numpy array to tensor started\n",
- "|-----------> β
Convert numpy array to tensor finished [0.0006s]\n",
+ "|-----------> β
Convert numpy array to tensor finished [0.0009s]\n",
"|-----------> βοΈ Initialize model started\n",
- "|-----------> β
Initialize model finished [0.0013s]\n",
+ "|-----------> β
Initialize model finished [0.0016s]\n",
"|-----------> βοΈ Predict ages with model started\n",
- "|-----------> β
Predict ages with model finished [0.0006s]\n",
+ "|-----------> β
Predict ages with model finished [0.0014s]\n",
"|-----------> βοΈ Convert tensor to numpy array started\n",
- "|-----------> β
Convert tensor to numpy array finished [0.0004s]\n",
+ "|-----------> β
Convert tensor to numpy array finished [0.0016s]\n",
"|-----------> βοΈ Add predicted ages to adata started\n",
- "|-----------> β
Add predicted ages to adata finished [0.0008s]\n",
+ "|-----------> β
Add predicted ages to adata finished [0.0031s]\n",
"|-----------> βοΈ Load all clock metadata started\n",
"|-----------------> Data found in pyaging_data/all_clock_metadata.pt\n",
- "|-----------> β
Load all clock metadata finished [0.0025s]\n",
+ "|-----------> β
Load all clock metadata finished [0.0022s]\n",
"|-----------> βοΈ Add clock metadata to adata.uns started\n",
- "|-----------> β
Add clock metadata to adata.uns finished [0.0004s]\n",
- "|-----> π Done! [0.2084s]\n"
+ "|-----------> β
Add clock metadata to adata.uns finished [0.0006s]\n",
+ "|-----> π Done! [2.3205s]\n"
]
}
],
@@ -757,11 +759,12 @@
"{'species': 'Homo sapiens',\n",
" 'data_type': 'atac',\n",
" 'year': 2023,\n",
- " 'preprocessing': 'log1p',\n",
+ " 'implementation_approved_by_author(s)': 'β',\n",
+ " 'preprocessing': 'tpm_norm_log1p',\n",
" 'postprocessing': None,\n",
" 'citation': 'Morandini, Francesco, et al. \"ATAC-clock: An aging clock based on chromatin accessibility.\" GeroScience (2023): 1-18.',\n",
" 'doi': 'https://doi.org/10.1007/s11357-023-00986-0',\n",
- " 'notes': 'This is the model trained solely on ATAC data produced by the Ocampo lab'}"
+ " 'notes': 'This is the model trained on the ATAC data produced by the Ocampo lab plus a public dataset'}"
]
},
"execution_count": 12,
diff --git a/tutorials/tutorial_bloodchemistry.ipynb b/tutorials/tutorial_bloodchemistry.ipynb
index caf5c8a..08c31e7 100644
--- a/tutorials/tutorial_bloodchemistry.ipynb
+++ b/tutorials/tutorial_bloodchemistry.ipynb
@@ -72,7 +72,7 @@
"|-----> ποΈ Starting download_example_data function\n",
"|-----------> Downloading data to pyaging_data/blood_chemistry_example.pkl\n",
"|-----------> in progress: 100.0000%\n",
- "|-----> π Done! [0.4727s]\n"
+ "|-----> π Done! [0.5078s]\n"
]
}
],
@@ -259,22 +259,22 @@
"text": [
"|-----> ποΈ Starting df_to_adata function\n",
"|-----> βοΈ Create anndata object started\n",
- "|-----> β
Create anndata object finished [0.0020s]\n",
+ "|-----> β
Create anndata object finished [0.0027s]\n",
"|-----> βοΈ Add metadata to anndata started\n",
"|-----------? No metadata provided. Leaving adata.obs empty\n",
- "|-----> β οΈ Add metadata to anndata finished [0.0010s]\n",
+ "|-----> β οΈ Add metadata to anndata finished [0.0006s]\n",
"|-----> βοΈ Log data statistics started\n",
"|-----------> There are 30 observations\n",
"|-----------> There are 10 features\n",
"|-----------> Total missing values: 0\n",
"|-----------> Percentage of missing values: 0.00%\n",
- "|-----> β
Log data statistics finished [0.0019s]\n",
+ "|-----> β
Log data statistics finished [0.0016s]\n",
"|-----> βοΈ Impute missing values started\n",
"|-----------> No missing values found. No imputation necessary\n",
- "|-----> β
Impute missing values finished [0.0010s]\n",
+ "|-----> β
Impute missing values finished [0.0009s]\n",
"|-----> βοΈ Add unstructured data to anndata started\n",
- "|-----> β
Add unstructured data to anndata finished [0.0003s]\n",
- "|-----> π Done! [0.0102s]\n"
+ "|-----> β
Add unstructured data to anndata finished [0.0009s]\n",
+ "|-----> π Done! [0.0104s]\n"
]
}
],
@@ -343,36 +343,36 @@
"|-----> ποΈ Starting predict_age function\n",
"|-----> βοΈ Set PyTorch device started\n",
"|-----------> Using device: cpu\n",
- "|-----> β
Set PyTorch device finished [0.0006s]\n",
+ "|-----> β
Set PyTorch device finished [0.0018s]\n",
"|-----> π Processing clock: PhenoAge\n",
"|-----------> βοΈ Load clock started\n",
"|-----------------> Downloading data to pyaging_data/phenoage.pt\n",
"|-----------------> in progress: 100.0000%\n",
- "|-----------> β
Load clock finished [0.4947s]\n",
+ "|-----------> β
Load clock finished [0.5525s]\n",
"|-----------> βοΈ Check features in adata started\n",
"|-----------------> All features are present in adata.var_names.\n",
- "|-----------> β
Check features in adata finished [0.0017s]\n",
+ "|-----------> β
Check features in adata finished [0.0009s]\n",
"|-----------> βοΈ Filter features and extract data matrix started\n",
- "|-----------> β
Filter features and extract data matrix finished [0.0030s]\n",
+ "|-----------> β
Filter features and extract data matrix finished [0.0022s]\n",
"|-----------> βοΈ Convert numpy array to tensor started\n",
- "|-----------> β
Convert numpy array to tensor finished [0.0013s]\n",
+ "|-----------> β
Convert numpy array to tensor finished [0.0011s]\n",
"|-----------> βοΈ Initialize model started\n",
- "|-----------> β
Initialize model finished [0.0023s]\n",
+ "|-----------> β
Initialize model finished [0.0019s]\n",
"|-----------> βοΈ Predict ages with model started\n",
- "|-----------> β
Predict ages with model finished [0.0012s]\n",
+ "|-----------> β
Predict ages with model finished [0.0008s]\n",
"|-----------> βοΈ Convert tensor to numpy array started\n",
- "|-----------> β
Convert tensor to numpy array finished [0.0008s]\n",
+ "|-----------> β
Convert tensor to numpy array finished [0.0014s]\n",
"|-----------> βοΈ Postprocess data started\n",
"|-----------------> Postprocessing data with function mortality_to_phenoage\n",
- "|-----------> β
Postprocess data finished [0.0015s]\n",
+ "|-----------> β
Postprocess data finished [0.0016s]\n",
"|-----------> βοΈ Add predicted ages to adata started\n",
- "|-----------> β
Add predicted ages to adata finished [0.0012s]\n",
+ "|-----------> β
Add predicted ages to adata finished [0.0014s]\n",
"|-----------> βοΈ Load all clock metadata started\n",
"|-----------------> Data found in pyaging_data/all_clock_metadata.pt\n",
- "|-----------> β
Load all clock metadata finished [0.0024s]\n",
+ "|-----------> β
Load all clock metadata finished [0.0018s]\n",
"|-----------> βοΈ Add clock metadata to adata.uns started\n",
- "|-----------> β
Add clock metadata to adata.uns finished [0.0006s]\n",
- "|-----> π Done! [0.5255s]\n"
+ "|-----------> β
Add clock metadata to adata.uns finished [0.0003s]\n",
+ "|-----> π Done! [0.6331s]\n"
]
}
],
@@ -612,7 +612,8 @@
" 'postprocessing': 'mortality_to_phenoage',\n",
" 'citation': 'Levine, Morgan E., et al. \"An epigenetic biomarker of aging for lifespan and healthspan.\" Aging (albany NY) 10.4 (2018): 573.',\n",
" 'doi': 'https://doi.org/10.18632%2Faging.101414',\n",
- " 'notes': 'To check the units for each of the features, please go to the paper in Table 1.'}"
+ " 'notes': 'To check the units for each of the features, please go to the paper in Table 1.',\n",
+ " 'implementation_approved_by_author(s)': 'β'}"
]
},
"execution_count": 12,
diff --git a/tutorials/tutorial_dnam.ipynb b/tutorials/tutorial_dnam.ipynb
index 7e2bb4d..cc7ecad 100644
--- a/tutorials/tutorial_dnam.ipynb
+++ b/tutorials/tutorial_dnam.ipynb
@@ -84,9 +84,8 @@
"output_type": "stream",
"text": [
"|-----> ποΈ Starting download_example_data function\n",
- "|-----------> Downloading data to pyaging_data/GSE139307.pkl\n",
- "|-----------> in progress: 100.0000%\n",
- "|-----> π Done! [22.4135s]\n"
+ "|-----------> Data found in pyaging_data/GSE139307.pkl\n",
+ "|-----> π Done! [0.0018s]\n"
]
}
],
@@ -375,7 +374,7 @@
"text": [
"|-----> ποΈ Starting df_to_adata function\n",
"|-----> βοΈ Create anndata object started\n",
- "|-----> β
Create anndata object finished [0.0406s]\n",
+ "|-----> β
Create anndata object finished [0.0465s]\n",
"|-----> βοΈ Add metadata to anndata started\n",
"|-----------> Adding provided metadata to adata.obs\n",
"|-----> β
Add metadata to anndata finished [0.0007s]\n",
@@ -384,13 +383,13 @@
"|-----------> There are 485514 features\n",
"|-----------> Total missing values: 526\n",
"|-----------> Percentage of missing values: 0.00%\n",
- "|-----> β
Log data statistics finished [0.0120s]\n",
+ "|-----> β
Log data statistics finished [0.0197s]\n",
"|-----> βοΈ Impute missing values started\n",
"|-----------> Imputing missing values using mean strategy\n",
- "|-----> β
Impute missing values finished [0.1744s]\n",
+ "|-----> β
Impute missing values finished [0.1813s]\n",
"|-----> βοΈ Add unstructured data to anndata started\n",
"|-----> β
Add unstructured data to anndata finished [0.0003s]\n",
- "|-----> π Done! [0.3132s]\n"
+ "|-----> π Done! [0.3691s]\n"
]
}
],
@@ -460,89 +459,86 @@
"|-----> ποΈ Starting predict_age function\n",
"|-----> βοΈ Set PyTorch device started\n",
"|-----------> Using device: cpu\n",
- "|-----> β
Set PyTorch device finished [0.0007s]\n",
+ "|-----> β
Set PyTorch device finished [0.0016s]\n",
"|-----> π Processing clock: Horvath2013\n",
"|-----------> βοΈ Load clock started\n",
- "|-----------------> Downloading data to pyaging_data/horvath2013.pt\n",
- "|-----------------> in progress: 100.0000%\n",
- "|-----------> β
Load clock finished [0.5573s]\n",
+ "|-----------------> Data found in pyaging_data/horvath2013.pt\n",
+ "|-----------> β
Load clock finished [0.0050s]\n",
"|-----------> βοΈ Check features in adata started\n",
"|-----------------> All features are present in adata.var_names.\n",
- "|-----------> β
Check features in adata finished [0.0031s]\n",
+ "|-----------> β
Check features in adata finished [0.0025s]\n",
"|-----------> βοΈ Filter features and extract data matrix started\n",
- "|-----------> β
Filter features and extract data matrix finished [0.0033s]\n",
+ "|-----------> β
Filter features and extract data matrix finished [0.0035s]\n",
"|-----------> βοΈ Convert numpy array to tensor started\n",
- "|-----------> β
Convert numpy array to tensor finished [0.0013s]\n",
+ "|-----------> β
Convert numpy array to tensor finished [0.0022s]\n",
"|-----------> βοΈ Initialize model started\n",
- "|-----------> β
Initialize model finished [0.0024s]\n",
+ "|-----------> β
Initialize model finished [0.0019s]\n",
"|-----------> βοΈ Predict ages with model started\n",
- "|-----------> β
Predict ages with model finished [0.0012s]\n",
+ "|-----------> β
Predict ages with model finished [0.0010s]\n",
"|-----------> βοΈ Convert tensor to numpy array started\n",
- "|-----------> β
Convert tensor to numpy array finished [0.0011s]\n",
+ "|-----------> β
Convert tensor to numpy array finished [0.0013s]\n",
"|-----------> βοΈ Postprocess data started\n",
"|-----------------> Postprocessing data with function anti_log_linear\n",
- "|-----------> β
Postprocess data finished [0.0015s]\n",
+ "|-----------> β
Postprocess data finished [0.0008s]\n",
"|-----------> βοΈ Add predicted ages to adata started\n",
- "|-----------> β
Add predicted ages to adata finished [0.0019s]\n",
+ "|-----------> β
Add predicted ages to adata finished [0.0006s]\n",
"|-----------> βοΈ Load all clock metadata started\n",
"|-----------------> Data found in pyaging_data/all_clock_metadata.pt\n",
- "|-----------> β
Load all clock metadata finished [0.0028s]\n",
+ "|-----------> β
Load all clock metadata finished [0.0023s]\n",
"|-----------> βοΈ Add clock metadata to adata.uns started\n",
- "|-----------> β
Add clock metadata to adata.uns finished [0.0006s]\n",
+ "|-----------> β
Add clock metadata to adata.uns finished [0.0004s]\n",
"|-----> π Processing clock: AltumAge\n",
"|-----------> βοΈ Load clock started\n",
- "|-----------------> Downloading data to pyaging_data/altumage.pt\n",
- "|-----------------> in progress: 100.0000%\n",
- "|-----------> β
Load clock finished [1.6789s]\n",
+ "|-----------------> Data found in pyaging_data/altumage.pt\n",
+ "|-----------> β
Load clock finished [0.0106s]\n",
"|-----------> βοΈ Check features in adata started\n",
"|-----------------> All features are present in adata.var_names.\n",
- "|-----------> β
Check features in adata finished [0.0207s]\n",
+ "|-----------> β
Check features in adata finished [0.0147s]\n",
"|-----------> βοΈ Preprocess data started\n",
"|-----------------> Preprocessing data with function scale\n",
- "|-----------> β
Preprocess data finished [0.1290s]\n",
+ "|-----------> β
Preprocess data finished [0.1357s]\n",
"|-----------> βοΈ Filter features and extract data matrix started\n",
- "|-----------> β
Filter features and extract data matrix finished [0.0108s]\n",
+ "|-----------> β
Filter features and extract data matrix finished [0.0132s]\n",
"|-----------> βοΈ Convert numpy array to tensor started\n",
- "|-----------> β
Convert numpy array to tensor finished [0.0009s]\n",
+ "|-----------> β
Convert numpy array to tensor finished [0.0012s]\n",
"|-----------> βοΈ Initialize model started\n",
- "|-----------> β
Initialize model finished [0.0044s]\n",
+ "|-----------> β
Initialize model finished [0.0057s]\n",
"|-----------> βοΈ Predict ages with model started\n",
"|-----------> β
Predict ages with model finished [0.0026s]\n",
"|-----------> βοΈ Convert tensor to numpy array started\n",
- "|-----------> β
Convert tensor to numpy array finished [0.0006s]\n",
+ "|-----------> β
Convert tensor to numpy array finished [0.0004s]\n",
"|-----------> βοΈ Add predicted ages to adata started\n",
- "|-----------> β
Add predicted ages to adata finished [0.0005s]\n",
+ "|-----------> β
Add predicted ages to adata finished [0.0006s]\n",
"|-----------> βοΈ Load all clock metadata started\n",
"|-----------------> Data found in pyaging_data/all_clock_metadata.pt\n",
- "|-----------> β
Load all clock metadata finished [0.0013s]\n",
+ "|-----------> β
Load all clock metadata finished [0.0012s]\n",
"|-----------> βοΈ Add clock metadata to adata.uns started\n",
- "|-----------> β
Add clock metadata to adata.uns finished [0.0003s]\n",
+ "|-----------> β
Add clock metadata to adata.uns finished [0.0005s]\n",
"|-----> π Processing clock: PCGrimAge\n",
"|-----------> βοΈ Load clock started\n",
- "|-----------------> Downloading data to pyaging_data/pcgrimage.pt\n",
- "|-----------------> in progress: 100.0000%\n",
- "|-----------> β
Load clock finished [97.5974s]\n",
+ "|-----------------> Data found in pyaging_data/pcgrimage.pt\n",
+ "|-----------> β
Load clock finished [0.1976s]\n",
"|-----------> βοΈ Check features in adata started\n",
"|-----------------> All features are present in adata.var_names.\n",
- "|-----------> β
Check features in adata finished [0.0395s]\n",
+ "|-----------> β
Check features in adata finished [0.0413s]\n",
"|-----------> βοΈ Filter features and extract data matrix started\n",
- "|-----------> β
Filter features and extract data matrix finished [0.0313s]\n",
+ "|-----------> β
Filter features and extract data matrix finished [0.0363s]\n",
"|-----------> βοΈ Convert numpy array to tensor started\n",
- "|-----------> β
Convert numpy array to tensor finished [0.0017s]\n",
+ "|-----------> β
Convert numpy array to tensor finished [0.0015s]\n",
"|-----------> βοΈ Initialize model started\n",
- "|-----------> β
Initialize model finished [0.1665s]\n",
+ "|-----------> β
Initialize model finished [0.0429s]\n",
"|-----------> βοΈ Predict ages with model started\n",
- "|-----------> β
Predict ages with model finished [0.0887s]\n",
+ "|-----------> β
Predict ages with model finished [0.0638s]\n",
"|-----------> βοΈ Convert tensor to numpy array started\n",
- "|-----------> β
Convert tensor to numpy array finished [0.0003s]\n",
+ "|-----------> β
Convert tensor to numpy array finished [0.0004s]\n",
"|-----------> βοΈ Add predicted ages to adata started\n",
"|-----------> β
Add predicted ages to adata finished [0.0005s]\n",
"|-----------> βοΈ Load all clock metadata started\n",
"|-----------------> Data found in pyaging_data/all_clock_metadata.pt\n",
- "|-----------> β
Load all clock metadata finished [0.0014s]\n",
+ "|-----------> β
Load all clock metadata finished [0.0013s]\n",
"|-----------> βοΈ Add clock metadata to adata.uns started\n",
- "|-----------> β
Add clock metadata to adata.uns finished [0.0002s]\n",
- "|-----> π Done! [100.3793s]\n"
+ "|-----------> β
Add clock metadata to adata.uns finished [0.0003s]\n",
+ "|-----> π Done! [0.7939s]\n"
]
}
],
@@ -843,7 +839,8 @@
" 'postprocessing': 'anti_log_linear',\n",
" 'citation': 'Horvath, Steve. \"DNA methylation age of human tissues and cell types.\" Genome biology 14.10 (2013): 1-20.',\n",
" 'doi': 'https://doi.org/10.1186/gb-2013-14-10-r115',\n",
- " 'notes': None}"
+ " 'notes': None,\n",
+ " 'implementation_approved_by_author(s)': 'β'}"
]
},
"execution_count": 13,
@@ -867,6 +864,7 @@
"{'species': 'Homo sapiens',\n",
" 'data_type': 'methylation',\n",
" 'year': 2022,\n",
+ " 'implementation_approved_by_author(s)': 'β
',\n",
" 'preprocessing': 'scale',\n",
" 'postprocessing': None,\n",
" 'citation': 'de Lima Camillo, Lucas Paulo, Louis R. Lapierre, and Ritambhara Singh. \"A pan-tissue DNA-methylation epigenetic clock based on deep learning.\" npj Aging 8.1 (2022): 4.',\n",
@@ -899,7 +897,8 @@
" 'postprocessing': None,\n",
" 'citation': 'Higgins-Chen, Albert T., et al. \"A computational solution for bolstering reliability of epigenetic clocks: Implications for clinical trials and longitudinal tracking.\" Nature aging 2.7 (2022): 644-661.',\n",
" 'doi': 'https://doi.org/10.1038/s43587-022-00248-2',\n",
- " 'notes': None}"
+ " 'notes': None,\n",
+ " 'implementation_approved_by_author(s)': 'β'}"
]
},
"execution_count": 15,
@@ -911,6 +910,727 @@
"adata.uns['pcgrimage_metadata']"
]
},
+ {
+ "cell_type": "markdown",
+ "id": "49080a8a-1957-4eee-ac38-2ae495f100bd",
+ "metadata": {},
+ "source": [
+ "## Mus musculus"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "5e620ab9-4837-4a7a-83f1-726be9c9f7bf",
+ "metadata": {},
+ "source": [
+ "### Download and load example data"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "5ff79235-46fb-4c59-a629-1f479f9f13a3",
+ "metadata": {},
+ "source": [
+ "Let's download the publicly available dataset GSE130735 with RRBS samples from mouse. Given it is RRBS, there are millions of CpG sites."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "id": "148f7ae7-8a5d-4fd2-a159-e3b8e576bbce",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "|-----> ποΈ Starting download_example_data function\n",
+ "|-----------> Data found in pyaging_data/GSE130735_subset.pkl\n",
+ "|-----> π Done! [0.0015s]\n"
+ ]
+ }
+ ],
+ "source": [
+ "pya.data.download_example_data('GSE130735')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "id": "2a6fcd90-8ded-40d5-a606-e32e21816ebf",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df = pd.read_pickle('pyaging_data/GSE130735_subset.pkl')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "id": "c4d8245b-8d04-4ae0-945d-3aed4956a3bb",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " chr1:3020814 | \n",
+ " chr1:3020842 | \n",
+ " chr1:3020877 | \n",
+ " chr1:3020891 | \n",
+ " chr1:3020945 | \n",
+ " chr1:3020971 | \n",
+ " chr1:3020987 | \n",
+ " chr1:3021012 | \n",
+ " chr1:3037802 | \n",
+ " chr1:3037820 | \n",
+ " ... | \n",
+ " chrY:1825397 | \n",
+ " chrY:4682362 | \n",
+ " chrY:32122892 | \n",
+ " chrY:85867071 | \n",
+ " chrY:85867083 | \n",
+ " chrY:85867117 | \n",
+ " chrY:85867137 | \n",
+ " chrY:85867139 | \n",
+ " chrY:85867178 | \n",
+ " chrY:88224179 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " GSM3752631 | \n",
+ " 0.609 | \n",
+ " 0.25 | \n",
+ " 0.408 | \n",
+ " 0.189 | \n",
+ " 0.068 | \n",
+ " 0.373 | \n",
+ " 0.571 | \n",
+ " 0.252 | \n",
+ " 0.333 | \n",
+ " 0.158 | \n",
+ " ... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " GSM3752625 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.973 | \n",
+ " 0.984 | \n",
+ " 0.912 | \n",
+ " 0.915 | \n",
+ " 0.987 | \n",
+ " 0.974 | \n",
+ " 0.991 | \n",
+ " 0.932 | \n",
+ " ... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " GSM3752634 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.526 | \n",
+ " 0.131 | \n",
+ " 0.000 | \n",
+ " 0.038 | \n",
+ " 0.469 | \n",
+ " 0.769 | \n",
+ " 0.772 | \n",
+ " 0.146 | \n",
+ " ... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " GSM3752620 | \n",
+ " 0.931 | \n",
+ " 0.92 | \n",
+ " 0.988 | \n",
+ " 0.949 | \n",
+ " 0.897 | \n",
+ " 0.921 | \n",
+ " 0.907 | \n",
+ " 0.958 | \n",
+ " 1.000 | \n",
+ " 0.867 | \n",
+ " ... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " GSM3752622 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.205 | \n",
+ " 0.382 | \n",
+ " 0.091 | \n",
+ " 0.132 | \n",
+ " 0.174 | \n",
+ " 0.227 | \n",
+ " 0.108 | \n",
+ " 0.053 | \n",
+ " ... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
5 rows Γ 1778324 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " chr1:3020814 chr1:3020842 chr1:3020877 chr1:3020891 \\\n",
+ "GSM3752631 0.609 0.25 0.408 0.189 \n",
+ "GSM3752625 NaN NaN 0.973 0.984 \n",
+ "GSM3752634 NaN NaN 0.526 0.131 \n",
+ "GSM3752620 0.931 0.92 0.988 0.949 \n",
+ "GSM3752622 NaN NaN 0.205 0.382 \n",
+ "\n",
+ " chr1:3020945 chr1:3020971 chr1:3020987 chr1:3021012 \\\n",
+ "GSM3752631 0.068 0.373 0.571 0.252 \n",
+ "GSM3752625 0.912 0.915 0.987 0.974 \n",
+ "GSM3752634 0.000 0.038 0.469 0.769 \n",
+ "GSM3752620 0.897 0.921 0.907 0.958 \n",
+ "GSM3752622 0.091 0.132 0.174 0.227 \n",
+ "\n",
+ " chr1:3037802 chr1:3037820 ... chrY:1825397 chrY:4682362 \\\n",
+ "GSM3752631 0.333 0.158 ... NaN NaN \n",
+ "GSM3752625 0.991 0.932 ... NaN NaN \n",
+ "GSM3752634 0.772 0.146 ... NaN NaN \n",
+ "GSM3752620 1.000 0.867 ... NaN NaN \n",
+ "GSM3752622 0.108 0.053 ... NaN NaN \n",
+ "\n",
+ " chrY:32122892 chrY:85867071 chrY:85867083 chrY:85867117 \\\n",
+ "GSM3752631 NaN NaN NaN NaN \n",
+ "GSM3752625 NaN NaN NaN NaN \n",
+ "GSM3752634 NaN NaN NaN NaN \n",
+ "GSM3752620 NaN NaN NaN NaN \n",
+ "GSM3752622 NaN NaN NaN NaN \n",
+ "\n",
+ " chrY:85867137 chrY:85867139 chrY:85867178 chrY:88224179 \n",
+ "GSM3752631 NaN NaN NaN NaN \n",
+ "GSM3752625 NaN NaN NaN NaN \n",
+ "GSM3752634 NaN NaN NaN NaN \n",
+ "GSM3752620 NaN NaN NaN NaN \n",
+ "GSM3752622 NaN NaN NaN NaN \n",
+ "\n",
+ "[5 rows x 1778324 columns]"
+ ]
+ },
+ "execution_count": 18,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.head()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ea7c44d7-73c3-4cd7-844d-bab34aa2dcee",
+ "metadata": {},
+ "source": [
+ "### Convert data to AnnData object"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "04f2758f-fb8c-4a52-983a-29ec826dba6c",
+ "metadata": {},
+ "source": [
+ "AnnData objects are highly flexible and are thus our preferred method of organizing data for age prediction."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "id": "c261c2dc-8245-47d7-82d8-7731f7c94f1e",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "|-----> ποΈ Starting df_to_adata function\n",
+ "|-----> βοΈ Create anndata object started\n",
+ "|-----> β
Create anndata object finished [0.0218s]\n",
+ "|-----> βοΈ Add metadata to anndata started\n",
+ "|-----------? No metadata provided. Leaving adata.obs empty\n",
+ "|-----> β οΈ Add metadata to anndata finished [0.0008s]\n",
+ "|-----> βοΈ Log data statistics started\n",
+ "|-----------> There are 14 observations\n",
+ "|-----------> There are 1778324 features\n",
+ "|-----------> Total missing values: 6322346\n",
+ "|-----------> Percentage of missing values: 25.39%\n",
+ "|-----> β
Log data statistics finished [0.0214s]\n",
+ "|-----> βοΈ Impute missing values started\n",
+ "|-----------> Imputing missing values using mean strategy\n",
+ "|-----> β
Impute missing values finished [0.3441s]\n",
+ "|-----> βοΈ Add unstructured data to anndata started\n",
+ "|-----> β
Add unstructured data to anndata finished [0.0003s]\n",
+ "|-----> π Done! [0.3931s]\n"
+ ]
+ }
+ ],
+ "source": [
+ "adata = pya.pp.df_to_adata(df, imputer_strategy='mean')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "7349164c-f28b-4222-bf41-6f80d8b79c3b",
+ "metadata": {},
+ "source": [
+ "This is what the `adata` object looks like:"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "4b5ff1ef-e724-407a-b6d4-9907558f21ba",
+ "metadata": {},
+ "source": [
+ "### Predict age"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "eb197ded-91dd-4319-8dbb-a635d09c8367",
+ "metadata": {},
+ "source": [
+ "We can either predict one clock at once or all at the same time. For convenience, let's simply input all four available mammalian clocks at once. The function is invariant to the capitalization of the clock name."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "id": "29ebd120-1c4a-4847-a3a3-5be7ffc5f730",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "|-----> ποΈ Starting predict_age function\n",
+ "|-----> βοΈ Set PyTorch device started\n",
+ "|-----------> Using device: cpu\n",
+ "|-----> β
Set PyTorch device finished [0.0028s]\n",
+ "|-----> π Processing clock: ThompsonMultiTissue\n",
+ "|-----------> βοΈ Load clock started\n",
+ "|-----------------> Data found in pyaging_data/thompsonmultitissue.pt\n",
+ "|-----------> β
Load clock finished [0.0050s]\n",
+ "|-----------> βοΈ Check features in adata started\n",
+ "|-----------------? 1 out of 582 features (0.17%) are missing and will be added with default value 0: ['chr4:91376687'], etc.\n",
+ "|-----------------> Expanded adata with 1 missing features.\n",
+ "|-----------> β οΈ Check features in adata finished [0.3974s]\n",
+ "|-----------> βοΈ Filter features and extract data matrix started\n",
+ "|-----------> β
Filter features and extract data matrix finished [0.0175s]\n",
+ "|-----------> βοΈ Convert numpy array to tensor started\n",
+ "|-----------> β
Convert numpy array to tensor finished [0.0009s]\n",
+ "|-----------> βοΈ Initialize model started\n",
+ "|-----------> β
Initialize model finished [0.0019s]\n",
+ "|-----------> βοΈ Predict ages with model started\n",
+ "|-----------> β
Predict ages with model finished [0.0013s]\n",
+ "|-----------> βοΈ Convert tensor to numpy array started\n",
+ "|-----------> β
Convert tensor to numpy array finished [0.0007s]\n",
+ "|-----------> βοΈ Add predicted ages to adata started\n",
+ "|-----------> β
Add predicted ages to adata finished [0.0007s]\n",
+ "|-----------> βοΈ Load all clock metadata started\n",
+ "|-----------------> Data found in pyaging_data/all_clock_metadata.pt\n",
+ "|-----------> β
Load all clock metadata finished [0.0028s]\n",
+ "|-----------> βοΈ Add clock metadata to adata.uns started\n",
+ "|-----------> β
Add clock metadata to adata.uns finished [0.0007s]\n",
+ "|-----> π Processing clock: MeerMultiTissue\n",
+ "|-----------> βοΈ Load clock started\n",
+ "|-----------------> Data found in pyaging_data/meermultitissue.pt\n",
+ "|-----------> β
Load clock finished [0.0019s]\n",
+ "|-----------> βοΈ Check features in adata started\n",
+ "|-----------------? 225 out of 435 features (51.72%) are missing and will be added with default value 0: ['chr10:111559529', 'chr10:115250413', 'chr10:127620127'], etc.\n",
+ "|-----------------> Expanded adata with 225 missing features.\n",
+ "|-----------> β οΈ Check features in adata finished [0.4286s]\n",
+ "|-----------> βοΈ Preprocess data started\n",
+ "|-----------------> Preprocessing data with function times100\n",
+ "|-----------> β
Preprocess data finished [0.0724s]\n",
+ "|-----------> βοΈ Filter features and extract data matrix started\n",
+ "|-----------> β
Filter features and extract data matrix finished [0.0105s]\n",
+ "|-----------> βοΈ Convert numpy array to tensor started\n",
+ "|-----------> β
Convert numpy array to tensor finished [0.0015s]\n",
+ "|-----------> βοΈ Initialize model started\n",
+ "|-----------> β
Initialize model finished [0.0014s]\n",
+ "|-----------> βοΈ Predict ages with model started\n",
+ "|-----------> β
Predict ages with model finished [0.0012s]\n",
+ "|-----------> βοΈ Convert tensor to numpy array started\n",
+ "|-----------> β
Convert tensor to numpy array finished [0.0016s]\n",
+ "|-----------> βοΈ Add predicted ages to adata started\n",
+ "|-----------> β
Add predicted ages to adata finished [0.0007s]\n",
+ "|-----------> βοΈ Load all clock metadata started\n",
+ "|-----------------> Data found in pyaging_data/all_clock_metadata.pt\n",
+ "|-----------> β
Load all clock metadata finished [0.0016s]\n",
+ "|-----------> βοΈ Add clock metadata to adata.uns started\n",
+ "|-----------> β
Add clock metadata to adata.uns finished [0.0005s]\n",
+ "|-----> π Done! [1.1162s]\n"
+ ]
+ }
+ ],
+ "source": [
+ "adata = pya.pred.predict_age(adata, ['ThompsonMultiTissue', 'MeerMultiTissue',])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "dfce0265-b647-42ae-8693-0e3c05d480d3",
+ "metadata": {},
+ "source": [
+ "Note that the Meer clock predicts age in days rather than months."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "id": "4bb259c5-2cba-4dc1-b123-2387a5bb7749",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " thompsonmultitissue | \n",
+ " meermultitissue | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " GSM3752631 | \n",
+ " 19.634113 | \n",
+ " 223.113083 | \n",
+ "
\n",
+ " \n",
+ " GSM3752625 | \n",
+ " -1.410467 | \n",
+ " 0.860733 | \n",
+ "
\n",
+ " \n",
+ " GSM3752634 | \n",
+ " 61.058792 | \n",
+ " 650.326477 | \n",
+ "
\n",
+ " \n",
+ " GSM3752620 | \n",
+ " -2.663811 | \n",
+ " 49.164429 | \n",
+ "
\n",
+ " \n",
+ " GSM3752622 | \n",
+ " 20.594114 | \n",
+ " 231.560425 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " thompsonmultitissue meermultitissue\n",
+ "GSM3752631 19.634113 223.113083\n",
+ "GSM3752625 -1.410467 0.860733\n",
+ "GSM3752634 61.058792 650.326477\n",
+ "GSM3752620 -2.663811 49.164429\n",
+ "GSM3752622 20.594114 231.560425"
+ ]
+ },
+ "execution_count": 21,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "adata.obs.head()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "8519affc-ffc8-4904-ad7a-bd6a6d6458cf",
+ "metadata": {},
+ "source": [
+ "Having so much information printed can be overwhelming, particularly when running several clocks at once. In such cases, just set verbose to False."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "id": "18b44cfa-36d5-49c9-badf-7ba9e189bbc0",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "pya.data.download_example_data('GSE130735', verbose=False)\n",
+ "df = pd.read_pickle('pyaging_data/GSE130735_subset.pkl')\n",
+ "adata = pya.preprocess.df_to_adata(df, imputer_strategy='mean', verbose=False)\n",
+ "adata = pya.pred.predict_age(adata, ['ThompsonMultiTissue', 'MeerMultiTissue',], verbose=False)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 23,
+ "id": "a2520978-b693-474f-88cf-91bcde1a5d95",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " thompsonmultitissue | \n",
+ " meermultitissue | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " GSM3752631 | \n",
+ " 19.634113 | \n",
+ " 223.113083 | \n",
+ "
\n",
+ " \n",
+ " GSM3752625 | \n",
+ " -1.410467 | \n",
+ " 0.860733 | \n",
+ "
\n",
+ " \n",
+ " GSM3752634 | \n",
+ " 61.058792 | \n",
+ " 650.326477 | \n",
+ "
\n",
+ " \n",
+ " GSM3752620 | \n",
+ " -2.663811 | \n",
+ " 49.164429 | \n",
+ "
\n",
+ " \n",
+ " GSM3752622 | \n",
+ " 20.594114 | \n",
+ " 231.560425 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " thompsonmultitissue meermultitissue\n",
+ "GSM3752631 19.634113 223.113083\n",
+ "GSM3752625 -1.410467 0.860733\n",
+ "GSM3752634 61.058792 650.326477\n",
+ "GSM3752620 -2.663811 49.164429\n",
+ "GSM3752622 20.594114 231.560425"
+ ]
+ },
+ "execution_count": 23,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "adata.obs.head()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "33119798-f1b3-4c4c-9f18-e4e4b7ca21e8",
+ "metadata": {},
+ "source": [
+ "After age prediction, the clocks are added to `adata.obs`. Moreover, the percent of missing values for each clock and other metadata are included in `adata.uns`."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 26,
+ "id": "a4585982-32bf-49c3-93ca-85ed26af4199",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "AnnData object with n_obs Γ n_vars = 14 Γ 1778550\n",
+ " obs: 'thompsonmultitissue', 'meermultitissue'\n",
+ " uns: 'meermultitissue_metadata', 'thompsonmultitissue_percent_na', 'thompsonmultitissue_metadata', 'meermultitissue_percent_na'\n",
+ " layers: 'X_original', 'X_imputed', 'X_times100'"
+ ]
+ },
+ "execution_count": 26,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "adata"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "615f8fbf-f5e1-4af9-a2a0-5f4f781001fe",
+ "metadata": {},
+ "source": [
+ "### Get citation"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "0fe55edd-9271-4b41-857d-ef3fceafc2a6",
+ "metadata": {},
+ "source": [
+ "The doi, citation, and some metadata are automatically added to the AnnData object under `adata.uns[CLOCKNAME_metadata]`."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 27,
+ "id": "44fa2b51-7c04-4806-843f-06a0b385c0ec",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{'species': 'Mus musculus',\n",
+ " 'data_type': 'methylation',\n",
+ " 'year': 2018,\n",
+ " 'implementation_approved_by_author(s)': 'β',\n",
+ " 'preprocessing': None,\n",
+ " 'postprocessing': None,\n",
+ " 'citation': 'Thompson, Michael J., et al. \"A multi-tissue full lifespan epigenetic clock for mice.\" Aging (Albany NY) 10.10 (2018): 2832.',\n",
+ " 'doi': 'https://doi.org/10.18632/aging.101590',\n",
+ " 'notes': None}"
+ ]
+ },
+ "execution_count": 27,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "adata.uns['thompsonmultitissue_metadata']"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 28,
+ "id": "cdf4c609-7a24-4c3f-a891-647315b77d54",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{'species': 'Mus musculus',\n",
+ " 'data_type': 'methylation',\n",
+ " 'year': 2018,\n",
+ " 'implementation_approved_by_author(s)': 'β',\n",
+ " 'preprocessing': 'times100',\n",
+ " 'postprocessing': None,\n",
+ " 'citation': 'Meer, Margarita V., et al. \"A whole lifespan mouse multi-tissue DNA methylation clock.\" Elife 7 (2018): e40675.',\n",
+ " 'doi': 'https://doi.org/10.7554/eLife.40675',\n",
+ " 'notes': 'Predicts age in days'}"
+ ]
+ },
+ "execution_count": 28,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "adata.uns['meermultitissue_metadata']"
+ ]
+ },
{
"cell_type": "markdown",
"id": "21b470d4-eef3-4825-9899-fcb0068c9c1c",
@@ -937,7 +1657,7 @@
},
{
"cell_type": "code",
- "execution_count": 16,
+ "execution_count": 29,
"id": "5d07743e-516c-4cac-a733-a05c2ed55d53",
"metadata": {},
"outputs": [
@@ -948,7 +1668,7 @@
"|-----> ποΈ Starting download_example_data function\n",
"|-----------> Downloading data to pyaging_data/GSE223748_subset.pkl\n",
"|-----------> in progress: 100.0000%\n",
- "|-----> π Done! [5.1092s]\n"
+ "|-----> π Done! [4.6242s]\n"
]
}
],
@@ -958,7 +1678,7 @@
},
{
"cell_type": "code",
- "execution_count": 17,
+ "execution_count": 30,
"id": "6e929219-e691-4171-911e-46143ae94898",
"metadata": {},
"outputs": [],
@@ -968,7 +1688,7 @@
},
{
"cell_type": "code",
- "execution_count": 18,
+ "execution_count": 31,
"id": "03141fc7-f175-4ad3-86ed-22d51db5cadd",
"metadata": {},
"outputs": [
@@ -1202,7 +1922,7 @@
"[5 rows x 37554 columns]"
]
},
- "execution_count": 18,
+ "execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
@@ -1229,7 +1949,7 @@
},
{
"cell_type": "code",
- "execution_count": 19,
+ "execution_count": 32,
"id": "6437e2df-a1f5-4d66-949c-e0fc706b574b",
"metadata": {},
"outputs": [
@@ -1239,22 +1959,22 @@
"text": [
"|-----> ποΈ Starting df_to_adata function\n",
"|-----> βοΈ Create anndata object started\n",
- "|-----> β
Create anndata object finished [0.0019s]\n",
+ "|-----> β
Create anndata object finished [0.0013s]\n",
"|-----> βοΈ Add metadata to anndata started\n",
"|-----------? No metadata provided. Leaving adata.obs empty\n",
- "|-----> β οΈ Add metadata to anndata finished [0.0007s]\n",
+ "|-----> β οΈ Add metadata to anndata finished [0.0005s]\n",
"|-----> βοΈ Log data statistics started\n",
"|-----------> There are 100 observations\n",
"|-----------> There are 37554 features\n",
"|-----------> Total missing values: 0\n",
"|-----------> Percentage of missing values: 0.00%\n",
- "|-----> β
Log data statistics finished [0.0058s]\n",
+ "|-----> β
Log data statistics finished [0.0047s]\n",
"|-----> βοΈ Impute missing values started\n",
"|-----------> No missing values found. No imputation necessary\n",
- "|-----> β
Impute missing values finished [0.0067s]\n",
+ "|-----> β
Impute missing values finished [0.0075s]\n",
"|-----> βοΈ Add unstructured data to anndata started\n",
- "|-----> β
Add unstructured data to anndata finished [0.0004s]\n",
- "|-----> π Done! [0.0190s]\n"
+ "|-----> β
Add unstructured data to anndata finished [0.0008s]\n",
+ "|-----> π Done! [0.0188s]\n"
]
}
],
@@ -1272,7 +1992,7 @@
},
{
"cell_type": "code",
- "execution_count": 20,
+ "execution_count": 33,
"id": "e26459d3-276d-4cda-b3bf-a4147c397667",
"metadata": {},
"outputs": [
@@ -1285,7 +2005,7 @@
" layers: 'X_original'"
]
},
- "execution_count": 20,
+ "execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
@@ -1312,7 +2032,7 @@
},
{
"cell_type": "code",
- "execution_count": 21,
+ "execution_count": 34,
"id": "1e55a74c-4bbf-41a6-bbe3-0faf295ef33c",
"metadata": {},
"outputs": [
@@ -1323,117 +2043,117 @@
"|-----> ποΈ Starting predict_age function\n",
"|-----> βοΈ Set PyTorch device started\n",
"|-----------> Using device: cpu\n",
- "|-----> β
Set PyTorch device finished [0.0012s]\n",
+ "|-----> β
Set PyTorch device finished [0.0009s]\n",
"|-----> π Processing clock: Mammalian1\n",
"|-----------> βοΈ Load clock started\n",
"|-----------------> Downloading data to pyaging_data/mammalian1.pt\n",
"|-----------------> in progress: 100.0000%\n",
- "|-----------> β
Load clock finished [0.4849s]\n",
+ "|-----------> β
Load clock finished [0.4437s]\n",
"|-----------> βοΈ Check features in adata started\n",
"|-----------------> All features are present in adata.var_names.\n",
- "|-----------> β
Check features in adata finished [0.0018s]\n",
+ "|-----------> β
Check features in adata finished [0.0015s]\n",
"|-----------> βοΈ Filter features and extract data matrix started\n",
- "|-----------> β
Filter features and extract data matrix finished [0.0033s]\n",
+ "|-----------> β
Filter features and extract data matrix finished [0.0043s]\n",
"|-----------> βοΈ Convert numpy array to tensor started\n",
- "|-----------> β
Convert numpy array to tensor finished [0.0008s]\n",
+ "|-----------> β
Convert numpy array to tensor finished [0.0009s]\n",
"|-----------> βοΈ Initialize model started\n",
- "|-----------> β
Initialize model finished [0.0024s]\n",
+ "|-----------> β
Initialize model finished [0.0017s]\n",
"|-----------> βοΈ Predict ages with model started\n",
- "|-----------> β
Predict ages with model finished [0.0011s]\n",
+ "|-----------> β
Predict ages with model finished [0.0012s]\n",
"|-----------> βοΈ Convert tensor to numpy array started\n",
- "|-----------> β
Convert tensor to numpy array finished [0.0012s]\n",
+ "|-----------> β
Convert tensor to numpy array finished [0.0010s]\n",
"|-----------> βοΈ Postprocess data started\n",
"|-----------------> Postprocessing data with function anti_logp2\n",
- "|-----------> β
Postprocess data finished [0.0012s]\n",
+ "|-----------> β
Postprocess data finished [0.0011s]\n",
"|-----------> βοΈ Add predicted ages to adata started\n",
- "|-----------> β
Add predicted ages to adata finished [0.0011s]\n",
+ "|-----------> β
Add predicted ages to adata finished [0.0012s]\n",
"|-----------> βοΈ Load all clock metadata started\n",
"|-----------------> Data found in pyaging_data/all_clock_metadata.pt\n",
- "|-----------> β
Load all clock metadata finished [0.0031s]\n",
+ "|-----------> β
Load all clock metadata finished [0.0041s]\n",
"|-----------> βοΈ Add clock metadata to adata.uns started\n",
- "|-----------> β
Add clock metadata to adata.uns finished [0.0008s]\n",
+ "|-----------> β
Add clock metadata to adata.uns finished [0.0007s]\n",
"|-----> π Processing clock: Mammalian2\n",
"|-----------> βοΈ Load clock started\n",
"|-----------------> Downloading data to pyaging_data/mammalian2.pt\n",
"|-----------------> in progress: 100.0000%\n",
- "|-----------> β
Load clock finished [0.6213s]\n",
+ "|-----------> β
Load clock finished [0.6232s]\n",
"|-----------> βοΈ Check features in adata started\n",
"|-----------------> All features are present in adata.var_names.\n",
- "|-----------> β
Check features in adata finished [0.0024s]\n",
+ "|-----------> β
Check features in adata finished [0.0029s]\n",
"|-----------> βοΈ Filter features and extract data matrix started\n",
- "|-----------> β
Filter features and extract data matrix finished [0.0039s]\n",
+ "|-----------> β
Filter features and extract data matrix finished [0.0042s]\n",
"|-----------> βοΈ Convert numpy array to tensor started\n",
- "|-----------> β
Convert numpy array to tensor finished [0.0007s]\n",
+ "|-----------> β
Convert numpy array to tensor finished [0.0010s]\n",
"|-----------> βοΈ Initialize model started\n",
- "|-----------> β
Initialize model finished [0.0017s]\n",
+ "|-----------> β
Initialize model finished [0.0011s]\n",
"|-----------> βοΈ Predict ages with model started\n",
- "|-----------> β
Predict ages with model finished [0.0012s]\n",
+ "|-----------> β
Predict ages with model finished [0.0009s]\n",
"|-----------> βοΈ Convert tensor to numpy array started\n",
- "|-----------> β
Convert tensor to numpy array finished [0.0010s]\n",
+ "|-----------> β
Convert tensor to numpy array finished [0.0008s]\n",
"|-----------> βοΈ Postprocess data started\n",
"|-----------------> Postprocessing data with function anti_log_log\n",
- "|-----------> β
Postprocess data finished [0.0013s]\n",
+ "|-----------> β
Postprocess data finished [0.0016s]\n",
"|-----------> βοΈ Add predicted ages to adata started\n",
- "|-----------> β
Add predicted ages to adata finished [0.0011s]\n",
+ "|-----------> β
Add predicted ages to adata finished [0.0016s]\n",
"|-----------> βοΈ Load all clock metadata started\n",
"|-----------------> Data found in pyaging_data/all_clock_metadata.pt\n",
- "|-----------> β
Load all clock metadata finished [0.0021s]\n",
+ "|-----------> β
Load all clock metadata finished [0.0019s]\n",
"|-----------> βοΈ Add clock metadata to adata.uns started\n",
"|-----------> β
Add clock metadata to adata.uns finished [0.0006s]\n",
"|-----> π Processing clock: Mammalian3\n",
"|-----------> βοΈ Load clock started\n",
"|-----------------> Downloading data to pyaging_data/mammalian3.pt\n",
"|-----------------> in progress: 100.0000%\n",
- "|-----------> β
Load clock finished [0.5057s]\n",
+ "|-----------> β
Load clock finished [0.5402s]\n",
"|-----------> βοΈ Check features in adata started\n",
"|-----------------> All features are present in adata.var_names.\n",
- "|-----------> β
Check features in adata finished [0.0037s]\n",
+ "|-----------> β
Check features in adata finished [0.0028s]\n",
"|-----------> βοΈ Filter features and extract data matrix started\n",
- "|-----------> β
Filter features and extract data matrix finished [0.0057s]\n",
+ "|-----------> β
Filter features and extract data matrix finished [0.0038s]\n",
"|-----------> βοΈ Convert numpy array to tensor started\n",
- "|-----------> β
Convert numpy array to tensor finished [0.0026s]\n",
+ "|-----------> β
Convert numpy array to tensor finished [0.0006s]\n",
"|-----------> βοΈ Initialize model started\n",
- "|-----------> β
Initialize model finished [0.0049s]\n",
+ "|-----------> β
Initialize model finished [0.0009s]\n",
"|-----------> βοΈ Predict ages with model started\n",
- "|-----------> β
Predict ages with model finished [0.0023s]\n",
+ "|-----------> β
Predict ages with model finished [0.0006s]\n",
"|-----------> βοΈ Convert tensor to numpy array started\n",
- "|-----------> β
Convert tensor to numpy array finished [0.0010s]\n",
+ "|-----------> β
Convert tensor to numpy array finished [0.0003s]\n",
"|-----------> βοΈ Add predicted ages to adata started\n",
"|-----------> β
Add predicted ages to adata finished [0.0006s]\n",
"|-----------> βοΈ Load all clock metadata started\n",
"|-----------------> Data found in pyaging_data/all_clock_metadata.pt\n",
- "|-----------> β
Load all clock metadata finished [0.0019s]\n",
+ "|-----------> β
Load all clock metadata finished [0.0011s]\n",
"|-----------> βοΈ Add clock metadata to adata.uns started\n",
- "|-----------> β
Add clock metadata to adata.uns finished [0.0008s]\n",
+ "|-----------> β
Add clock metadata to adata.uns finished [0.0012s]\n",
"|-----> π Processing clock: MammalianLifespan\n",
"|-----------> βοΈ Load clock started\n",
"|-----------------> Downloading data to pyaging_data/mammalianlifespan.pt\n",
"|-----------------> in progress: 100.0000%\n",
- "|-----------> β
Load clock finished [0.4236s]\n",
+ "|-----------> β
Load clock finished [0.4707s]\n",
"|-----------> βοΈ Check features in adata started\n",
"|-----------------> All features are present in adata.var_names.\n",
- "|-----------> β
Check features in adata finished [0.0019s]\n",
+ "|-----------> β
Check features in adata finished [0.0015s]\n",
"|-----------> βοΈ Filter features and extract data matrix started\n",
- "|-----------> β
Filter features and extract data matrix finished [0.0030s]\n",
+ "|-----------> β
Filter features and extract data matrix finished [0.0021s]\n",
"|-----------> βοΈ Convert numpy array to tensor started\n",
"|-----------> β
Convert numpy array to tensor finished [0.0008s]\n",
"|-----------> βοΈ Initialize model started\n",
- "|-----------> β
Initialize model finished [0.0015s]\n",
+ "|-----------> β
Initialize model finished [0.0010s]\n",
"|-----------> βοΈ Predict ages with model started\n",
- "|-----------> β
Predict ages with model finished [0.0007s]\n",
+ "|-----------> β
Predict ages with model finished [0.0011s]\n",
"|-----------> βοΈ Convert tensor to numpy array started\n",
"|-----------> β
Convert tensor to numpy array finished [0.0011s]\n",
"|-----------> βοΈ Postprocess data started\n",
"|-----------------> Postprocessing data with function anti_log\n",
- "|-----------> β
Postprocess data finished [0.0017s]\n",
+ "|-----------> β
Postprocess data finished [0.0013s]\n",
"|-----------> βοΈ Add predicted ages to adata started\n",
- "|-----------> β
Add predicted ages to adata finished [0.0012s]\n",
+ "|-----------> β
Add predicted ages to adata finished [0.0016s]\n",
"|-----------> βοΈ Load all clock metadata started\n",
"|-----------------> Data found in pyaging_data/all_clock_metadata.pt\n",
- "|-----------> β
Load all clock metadata finished [0.0022s]\n",
+ "|-----------> β
Load all clock metadata finished [0.0016s]\n",
"|-----------> βοΈ Add clock metadata to adata.uns started\n",
- "|-----------> β
Add clock metadata to adata.uns finished [0.0006s]\n",
- "|-----> π Done! [2.1646s]\n"
+ "|-----------> β
Add clock metadata to adata.uns finished [0.0004s]\n",
+ "|-----> π Done! [2.4322s]\n"
]
}
],
@@ -1451,7 +2171,7 @@
},
{
"cell_type": "code",
- "execution_count": 22,
+ "execution_count": 35,
"id": "98fbdf4c-57c2-4885-bc4a-96b4771a638b",
"metadata": {},
"outputs": [
@@ -1531,7 +2251,7 @@
"203531420070_R05C02 10.371320 0.142493 -0.559288 68.409359"
]
},
- "execution_count": 22,
+ "execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
@@ -1550,7 +2270,7 @@
},
{
"cell_type": "code",
- "execution_count": 23,
+ "execution_count": 36,
"id": "64035819-8dd0-4917-96ed-55c36ef34a66",
"metadata": {},
"outputs": [],
@@ -1563,7 +2283,7 @@
},
{
"cell_type": "code",
- "execution_count": 24,
+ "execution_count": 37,
"id": "9414fe4c-a83a-4ac2-9dbb-a2a9d333af06",
"metadata": {},
"outputs": [
@@ -1643,7 +2363,7 @@
"203531420070_R05C02 10.371320 0.142493 -0.559288 68.409359"
]
},
- "execution_count": 24,
+ "execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
@@ -1662,7 +2382,7 @@
},
{
"cell_type": "code",
- "execution_count": 25,
+ "execution_count": 38,
"id": "0d069189-1750-42f6-89d9-73039dd07a00",
"metadata": {},
"outputs": [
@@ -1676,7 +2396,7 @@
" layers: 'X_original'"
]
},
- "execution_count": 25,
+ "execution_count": 38,
"metadata": {},
"output_type": "execute_result"
}
@@ -1703,7 +2423,7 @@
},
{
"cell_type": "code",
- "execution_count": 26,
+ "execution_count": 39,
"id": "300b40ad-68e7-49b3-a7f8-66c64436c80f",
"metadata": {},
"outputs": [
@@ -1717,10 +2437,11 @@
" 'postprocessing': 'anti_logp2',\n",
" 'citation': 'Lu, A. T., et al. \"Universal DNA methylation age across mammalian tissues.\" Nature aging 3.9 (2023): 1144-1166.',\n",
" 'doi': 'https://doi.org/10.1038/s43587-023-00462-6',\n",
- " 'notes': 'This is the DNAm age predictor from the paper in which there is no adjustment for species'}"
+ " 'notes': 'This is the DNAm age predictor from the paper in which there is no adjustment for species',\n",
+ " 'implementation_approved_by_author(s)': 'β'}"
]
},
- "execution_count": 26,
+ "execution_count": 39,
"metadata": {},
"output_type": "execute_result"
}
diff --git a/tutorials/tutorial_histonemarkchipseq.ipynb b/tutorials/tutorial_histonemarkchipseq.ipynb
index 88ecb4d..1aaf544 100644
--- a/tutorials/tutorial_histonemarkchipseq.ipynb
+++ b/tutorials/tutorial_histonemarkchipseq.ipynb
@@ -61,7 +61,7 @@
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": null,
"id": "7b3988dd-3e39-4e13-8ad8-a06447137c52",
"metadata": {},
"outputs": [
@@ -71,8 +71,7 @@
"text": [
"|-----> ποΈ Starting download_example_data function\n",
"|-----------> Downloading data to pyaging_data/ENCFF386QWG.bigWig\n",
- "|-----------> in progress: 100.0000%\n",
- "|-----> π Done! [178.9496s]\n"
+ "|-----------> in progress: 79.0189%"
]
}
],
@@ -90,186 +89,20 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": null,
"id": "e56f4080-e968-44cb-8e31-4bd27092f4b2",
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "|-----> ποΈ Starting bigwig_to_df function\n",
- "|-----> βοΈ Load Ensembl genome metadata started\n",
- "|-----------> Downloading data to pyaging_data/Ensembl-105-EnsDb-for-Homo-sapiens-genes.csv\n",
- "|-----------> in progress: 100.0000%\n",
- "|-----> β
Load Ensembl genome metadata finished [3.0124s]\n",
- "|-----> βοΈ Processing bigWig files started\n",
- "|-----------> Processing file: pyaging_data/ENCFF386QWG.bigWig\n",
- "|-----------> in progress: 100.0000%\n",
- "|-----------> Processing file: pyaging_data/ENCFF386QWG.bigWig\n",
- "|-----------> in progress: 100.0000%\n",
- "|-----> β
Processing bigWig files finished [16.4875s]\n",
- "|-----> π Done! [36.6192s]\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"df = pya.pp.bigwig_to_df(['pyaging_data/ENCFF386QWG.bigWig', 'pyaging_data/ENCFF386QWG.bigWig'])"
]
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": null,
"id": "769858ac-9d6d-43f8-9c53-0f4a88c5484c",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " ENSG00000223972 | \n",
- " ENSG00000227232 | \n",
- " ENSG00000278267 | \n",
- " ENSG00000243485 | \n",
- " ENSG00000284332 | \n",
- " ENSG00000237613 | \n",
- " ENSG00000268020 | \n",
- " ENSG00000240361 | \n",
- " ENSG00000186092 | \n",
- " ENSG00000238009 | \n",
- " ... | \n",
- " ENSG00000237801 | \n",
- " ENSG00000237040 | \n",
- " ENSG00000124333 | \n",
- " ENSG00000228410 | \n",
- " ENSG00000223484 | \n",
- " ENSG00000124334 | \n",
- " ENSG00000270726 | \n",
- " ENSG00000185203 | \n",
- " ENSG00000182484 | \n",
- " ENSG00000227159 | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " pyaging_data/ENCFF386QWG.bigWig | \n",
- " 0.028616 | \n",
- " 0.030415 | \n",
- " 0.027783 | \n",
- " 0.028616 | \n",
- " 0.028616 | \n",
- " 0.028616 | \n",
- " 0.044171 | \n",
- " 0.036474 | \n",
- " 0.030784 | \n",
- " 0.03181 | \n",
- " ... | \n",
- " 0.034435 | \n",
- " 0.006822 | \n",
- " 1.413119 | \n",
- " 0.029424 | \n",
- " 0.140005 | \n",
- " 0.049786 | \n",
- " 0.069296 | \n",
- " 0.332126 | \n",
- " 0.028596 | \n",
- " 0.028616 | \n",
- "
\n",
- " \n",
- " pyaging_data/ENCFF386QWG.bigWig | \n",
- " 0.028616 | \n",
- " 0.030415 | \n",
- " 0.027783 | \n",
- " 0.028616 | \n",
- " 0.028616 | \n",
- " 0.028616 | \n",
- " 0.044171 | \n",
- " 0.036474 | \n",
- " 0.030784 | \n",
- " 0.03181 | \n",
- " ... | \n",
- " 0.034435 | \n",
- " 0.006822 | \n",
- " 1.413119 | \n",
- " 0.029424 | \n",
- " 0.140005 | \n",
- " 0.049786 | \n",
- " 0.069296 | \n",
- " 0.332126 | \n",
- " 0.028596 | \n",
- " 0.028616 | \n",
- "
\n",
- " \n",
- "
\n",
- "
2 rows Γ 62241 columns
\n",
- "
"
- ],
- "text/plain": [
- " ENSG00000223972 ENSG00000227232 \\\n",
- "pyaging_data/ENCFF386QWG.bigWig 0.028616 0.030415 \n",
- "pyaging_data/ENCFF386QWG.bigWig 0.028616 0.030415 \n",
- "\n",
- " ENSG00000278267 ENSG00000243485 \\\n",
- "pyaging_data/ENCFF386QWG.bigWig 0.027783 0.028616 \n",
- "pyaging_data/ENCFF386QWG.bigWig 0.027783 0.028616 \n",
- "\n",
- " ENSG00000284332 ENSG00000237613 \\\n",
- "pyaging_data/ENCFF386QWG.bigWig 0.028616 0.028616 \n",
- "pyaging_data/ENCFF386QWG.bigWig 0.028616 0.028616 \n",
- "\n",
- " ENSG00000268020 ENSG00000240361 \\\n",
- "pyaging_data/ENCFF386QWG.bigWig 0.044171 0.036474 \n",
- "pyaging_data/ENCFF386QWG.bigWig 0.044171 0.036474 \n",
- "\n",
- " ENSG00000186092 ENSG00000238009 ... \\\n",
- "pyaging_data/ENCFF386QWG.bigWig 0.030784 0.03181 ... \n",
- "pyaging_data/ENCFF386QWG.bigWig 0.030784 0.03181 ... \n",
- "\n",
- " ENSG00000237801 ENSG00000237040 \\\n",
- "pyaging_data/ENCFF386QWG.bigWig 0.034435 0.006822 \n",
- "pyaging_data/ENCFF386QWG.bigWig 0.034435 0.006822 \n",
- "\n",
- " ENSG00000124333 ENSG00000228410 \\\n",
- "pyaging_data/ENCFF386QWG.bigWig 1.413119 0.029424 \n",
- "pyaging_data/ENCFF386QWG.bigWig 1.413119 0.029424 \n",
- "\n",
- " ENSG00000223484 ENSG00000124334 \\\n",
- "pyaging_data/ENCFF386QWG.bigWig 0.140005 0.049786 \n",
- "pyaging_data/ENCFF386QWG.bigWig 0.140005 0.049786 \n",
- "\n",
- " ENSG00000270726 ENSG00000185203 \\\n",
- "pyaging_data/ENCFF386QWG.bigWig 0.069296 0.332126 \n",
- "pyaging_data/ENCFF386QWG.bigWig 0.069296 0.332126 \n",
- "\n",
- " ENSG00000182484 ENSG00000227159 \n",
- "pyaging_data/ENCFF386QWG.bigWig 0.028596 0.028616 \n",
- "pyaging_data/ENCFF386QWG.bigWig 0.028596 0.028616 \n",
- "\n",
- "[2 rows x 62241 columns]"
- ]
- },
- "execution_count": 4,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"df.head()"
]
@@ -292,43 +125,10 @@
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": null,
"id": "c501ed4c-f711-44be-bda4-669fbbae88f1",
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "|-----> ποΈ Starting df_to_adata function\n",
- "|-----> βοΈ Create anndata object started\n",
- "|-----> β
Create anndata object finished [0.0166s]\n",
- "|-----> βοΈ Add metadata to anndata started\n",
- "|-----------? No metadata provided. Leaving adata.obs empty\n",
- "|-----> β οΈ Add metadata to anndata finished [0.0005s]\n",
- "|-----> βοΈ Log data statistics started\n",
- "|-----------> There are 2 observations\n",
- "|-----------> There are 62241 features\n",
- "|-----------> Total missing values: 0\n",
- "|-----------> Percentage of missing values: 0.00%\n",
- "|-----> β
Log data statistics finished [0.0025s]\n",
- "|-----> βοΈ Impute missing values started\n",
- "|-----------> No missing values found. No imputation necessary\n",
- "|-----> β
Impute missing values finished [0.0026s]\n",
- "|-----> βοΈ Add unstructured data to anndata started\n",
- "|-----> β
Add unstructured data to anndata finished [0.0003s]\n",
- "|-----> π Done! [0.0258s]\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "/Users/lucascamillo/mambaforge/envs/brain/lib/python3.9/site-packages/anndata/_core/anndata.py:1897: UserWarning: Observation names are not unique. To make them unique, call `.obs_names_make_unique`.\n",
- " utils.warn_names_duplicates(\"obs\")\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"adata = pya.preprocess.df_to_adata(df)"
]
@@ -343,24 +143,10 @@
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": null,
"id": "641a61a6-46fc-4d47-b176-eb39524ce94f",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "AnnData object with n_obs Γ n_vars = 2 Γ 62241\n",
- " var: 'percent_na'\n",
- " uns: 'imputer_strategy', 'data_type'\n",
- " layers: 'X_original'"
- ]
- },
- "execution_count": 6,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"adata"
]
@@ -383,161 +169,20 @@
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": null,
"id": "13e7d357-c6d5-474b-b13f-30ec5edc0d19",
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "|-----> ποΈ Starting predict_age function\n",
- "|-----> βοΈ Set PyTorch device started\n",
- "|-----------> Using device: cpu\n",
- "|-----> β
Set PyTorch device finished [0.0027s]\n",
- "|-----> π Processing clock: H3K4me3\n",
- "|-----------> βοΈ Load clock started\n",
- "|-----------------> Downloading data to pyaging_data/h3k4me3.pt\n",
- "|-----------------> in progress: 100.0000%\n",
- "|-----------> β
Load clock finished [1.4404s]\n",
- "|-----------> βοΈ Check features in adata started\n",
- "|-----------------> All features are present in adata.var_names.\n",
- "|-----------> β
Check features in adata finished [0.0019s]\n",
- "|-----------> βοΈ Filter features and extract data matrix started\n",
- "|-----------> β
Filter features and extract data matrix finished [0.0040s]\n",
- "|-----------> βοΈ Convert numpy array to tensor started\n",
- "|-----------> β
Convert numpy array to tensor finished [0.0012s]\n",
- "|-----------> βοΈ Initialize model started\n",
- "|-----------> β
Initialize model finished [0.0157s]\n",
- "|-----------> βοΈ Predict ages with model started\n",
- "|-----------> β
Predict ages with model finished [0.0026s]\n",
- "|-----------> βοΈ Convert tensor to numpy array started\n",
- "|-----------> β
Convert tensor to numpy array finished [0.0004s]\n",
- "|-----------> βοΈ Add predicted ages to adata started\n",
- "|-----------> β
Add predicted ages to adata finished [0.0006s]\n",
- "|-----------> βοΈ Load all clock metadata started\n",
- "|-----------------> Data found in pyaging_data/all_clock_metadata.pt\n",
- "|-----------> β
Load all clock metadata finished [0.0024s]\n",
- "|-----------> βοΈ Add clock metadata to adata.uns started\n",
- "|-----------> β
Add clock metadata to adata.uns finished [0.0003s]\n",
- "|-----> π Processing clock: H3K9me3\n",
- "|-----------> βοΈ Load clock started\n",
- "|-----------------> Downloading data to pyaging_data/h3k9me3.pt\n",
- "|-----------------> in progress: 100.0000%\n",
- "|-----------> β
Load clock finished [0.8886s]\n",
- "|-----------> βοΈ Check features in adata started\n",
- "|-----------------> All features are present in adata.var_names.\n",
- "|-----------> β
Check features in adata finished [0.0008s]\n",
- "|-----------> βοΈ Filter features and extract data matrix started\n",
- "|-----------> β
Filter features and extract data matrix finished [0.0013s]\n",
- "|-----------> βοΈ Convert numpy array to tensor started\n",
- "|-----------> β
Convert numpy array to tensor finished [0.0004s]\n",
- "|-----------> βοΈ Initialize model started\n",
- "|-----------> β
Initialize model finished [0.0013s]\n",
- "|-----------> βοΈ Predict ages with model started\n",
- "|-----------> β
Predict ages with model finished [0.0006s]\n",
- "|-----------> βοΈ Convert tensor to numpy array started\n",
- "|-----------> β
Convert tensor to numpy array finished [0.0004s]\n",
- "|-----------> βοΈ Add predicted ages to adata started\n",
- "|-----------> β
Add predicted ages to adata finished [0.0007s]\n",
- "|-----------> βοΈ Load all clock metadata started\n",
- "|-----------------> Data found in pyaging_data/all_clock_metadata.pt\n",
- "|-----------> β
Load all clock metadata finished [0.0014s]\n",
- "|-----------> βοΈ Add clock metadata to adata.uns started\n",
- "|-----------> β
Add clock metadata to adata.uns finished [0.0005s]\n",
- "|-----> π Processing clock: PanHistone\n",
- "|-----------> βοΈ Load clock started\n",
- "|-----------------> Downloading data to pyaging_data/panhistone.pt\n",
- "|-----------------> in progress: 100.0000%\n",
- "|-----------> β
Load clock finished [7.0801s]\n",
- "|-----------> βοΈ Check features in adata started\n",
- "|-----------------> All features are present in adata.var_names.\n",
- "|-----------> β
Check features in adata finished [0.0142s]\n",
- "|-----------> βοΈ Filter features and extract data matrix started\n",
- "|-----------> β
Filter features and extract data matrix finished [0.0071s]\n",
- "|-----------> βοΈ Convert numpy array to tensor started\n",
- "|-----------> β
Convert numpy array to tensor finished [0.0004s]\n",
- "|-----------> βοΈ Initialize model started\n",
- "|-----------> β
Initialize model finished [0.0202s]\n",
- "|-----------> βοΈ Predict ages with model started\n",
- "|-----------> β
Predict ages with model finished [0.0083s]\n",
- "|-----------> βοΈ Convert tensor to numpy array started\n",
- "|-----------> β
Convert tensor to numpy array finished [0.0005s]\n",
- "|-----------> βοΈ Add predicted ages to adata started\n",
- "|-----------> β
Add predicted ages to adata finished [0.0017s]\n",
- "|-----------> βοΈ Load all clock metadata started\n",
- "|-----------------> Data found in pyaging_data/all_clock_metadata.pt\n",
- "|-----------> β
Load all clock metadata finished [0.0041s]\n",
- "|-----------> βοΈ Add clock metadata to adata.uns started\n",
- "|-----------> β
Add clock metadata to adata.uns finished [0.0009s]\n",
- "|-----> π Done! [9.5315s]\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"adata = pya.pred.predict_age(adata, ['H3K4me3', 'H3K9me3', 'PanHistone'])"
]
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": null,
"id": "f64fb182-937b-4f67-b58e-5fffb0e2fad0",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " h3k4me3 | \n",
- " h3k9me3 | \n",
- " panhistone | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " pyaging_data/ENCFF386QWG.bigWig | \n",
- " 53.998566 | \n",
- " 44.322887 | \n",
- " 54.021847 | \n",
- "
\n",
- " \n",
- " pyaging_data/ENCFF386QWG.bigWig | \n",
- " 53.998566 | \n",
- " 44.322887 | \n",
- " 54.021847 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " h3k4me3 h3k9me3 panhistone\n",
- "pyaging_data/ENCFF386QWG.bigWig 53.998566 44.322887 54.021847\n",
- "pyaging_data/ENCFF386QWG.bigWig 53.998566 44.322887 54.021847"
- ]
- },
- "execution_count": 8,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"adata.obs.head()"
]
@@ -552,19 +197,10 @@
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": null,
"id": "e8dd3457-8983-41a4-aaab-41563b91a866",
"metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "/Users/lucascamillo/mambaforge/envs/brain/lib/python3.9/site-packages/anndata/_core/anndata.py:1897: UserWarning: Observation names are not unique. To make them unique, call `.obs_names_make_unique`.\n",
- " utils.warn_names_duplicates(\"obs\")\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"pya.data.download_example_data('ENCFF386QWG', verbose=False)\n",
"df = pya.pp.bigwig_to_df(['pyaging_data/ENCFF386QWG.bigWig', 'pyaging_data/ENCFF386QWG.bigWig'], verbose=False)\n",
@@ -574,64 +210,10 @@
},
{
"cell_type": "code",
- "execution_count": 10,
+ "execution_count": null,
"id": "8192ab67-a1cc-4728-8ca0-f81a56940fbf",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " h3k4me3 | \n",
- " h3k9me3 | \n",
- " panhistone | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " pyaging_data/ENCFF386QWG.bigWig | \n",
- " 53.998566 | \n",
- " 44.322887 | \n",
- " 54.021847 | \n",
- "
\n",
- " \n",
- " pyaging_data/ENCFF386QWG.bigWig | \n",
- " 53.998566 | \n",
- " 44.322887 | \n",
- " 54.021847 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " h3k4me3 h3k9me3 panhistone\n",
- "pyaging_data/ENCFF386QWG.bigWig 53.998566 44.322887 54.021847\n",
- "pyaging_data/ENCFF386QWG.bigWig 53.998566 44.322887 54.021847"
- ]
- },
- "execution_count": 10,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"adata.obs.head()"
]
@@ -646,25 +228,10 @@
},
{
"cell_type": "code",
- "execution_count": 11,
+ "execution_count": null,
"id": "a4b22bf1-116f-456f-82d2-58b300f863f1",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "AnnData object with n_obs Γ n_vars = 2 Γ 62241\n",
- " obs: 'h3k4me3', 'h3k9me3', 'panhistone'\n",
- " var: 'percent_na'\n",
- " uns: 'imputer_strategy', 'data_type', 'h3k4me3_percent_na', 'h3k4me3_metadata', 'h3k9me3_percent_na', 'h3k9me3_metadata', 'panhistone_percent_na', 'panhistone_metadata'\n",
- " layers: 'X_original'"
- ]
- },
- "execution_count": 11,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"adata"
]
@@ -687,28 +254,10 @@
},
{
"cell_type": "code",
- "execution_count": 12,
+ "execution_count": null,
"id": "2946393e-a199-46ba-a9dd-80bc8fa88787",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "{'species': 'Homo sapiens',\n",
- " 'data_type': 'histone_mark',\n",
- " 'year': 2023,\n",
- " 'preprocessing': None,\n",
- " 'postprocessing': None,\n",
- " 'citation': 'de Lima Camillo, Lucas Paulo, et al. \"Histone mark age of human tissues and cells.\" bioRxiv (2023): 2023-08.',\n",
- " 'doi': 'https://doi.org/10.1101/2023.08.21.554165',\n",
- " 'notes': 'This is still a preprint, so the model might change'}"
- ]
- },
- "execution_count": 12,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"adata.uns['h3k4me3_metadata']"
]
diff --git a/tutorials/tutorial_rnaseq.ipynb b/tutorials/tutorial_rnaseq.ipynb
index 3f5be3c..bd420da 100644
--- a/tutorials/tutorial_rnaseq.ipynb
+++ b/tutorials/tutorial_rnaseq.ipynb
@@ -70,9 +70,8 @@
"output_type": "stream",
"text": [
"|-----> ποΈ Starting download_example_data function\n",
- "|-----------> Downloading data to pyaging_data/GSE65765_CPM.pkl\n",
- "|-----------> in progress: 100.0000%\n",
- "|-----> π Done! [1.3594s]\n"
+ "|-----------> Data found in pyaging_data/GSE65765_CPM.pkl\n",
+ "|-----> π Done! [0.0008s]\n"
]
}
],
@@ -319,22 +318,22 @@
"text": [
"|-----> ποΈ Starting df_to_adata function\n",
"|-----> βοΈ Create anndata object started\n",
- "|-----> β
Create anndata object finished [0.0026s]\n",
+ "|-----> β
Create anndata object finished [0.0028s]\n",
"|-----> βοΈ Add metadata to anndata started\n",
"|-----------? No metadata provided. Leaving adata.obs empty\n",
- "|-----> β οΈ Add metadata to anndata finished [0.0005s]\n",
+ "|-----> β οΈ Add metadata to anndata finished [0.0004s]\n",
"|-----> βοΈ Log data statistics started\n",
"|-----------> There are 4 observations\n",
"|-----------> There are 46755 features\n",
"|-----------> Total missing values: 0\n",
"|-----------> Percentage of missing values: 0.00%\n",
- "|-----> β
Log data statistics finished [0.0013s]\n",
+ "|-----> β
Log data statistics finished [0.0021s]\n",
"|-----> βοΈ Impute missing values started\n",
"|-----------> No missing values found. No imputation necessary\n",
- "|-----> β
Impute missing values finished [0.0017s]\n",
+ "|-----> β
Impute missing values finished [0.0013s]\n",
"|-----> βοΈ Add unstructured data to anndata started\n",
- "|-----> β
Add unstructured data to anndata finished [0.0003s]\n",
- "|-----> π Done! [0.0096s]\n"
+ "|-----> β
Add unstructured data to anndata finished [0.0002s]\n",
+ "|-----> π Done! [0.0098s]\n"
]
}
],
@@ -403,37 +402,36 @@
"|-----> ποΈ Starting predict_age function\n",
"|-----> βοΈ Set PyTorch device started\n",
"|-----------> Using device: cpu\n",
- "|-----> β
Set PyTorch device finished [0.0005s]\n",
+ "|-----> β
Set PyTorch device finished [0.0007s]\n",
"|-----> π Processing clock: BiTAge\n",
"|-----------> βοΈ Load clock started\n",
"|-----------------> Downloading data to pyaging_data/bitage.pt\n",
"|-----------------> in progress: 100.0000%\n",
- "|-----------> β
Load clock finished [0.4104s]\n",
+ "|-----------> β
Load clock finished [0.5873s]\n",
"|-----------> βοΈ Check features in adata started\n",
"|-----------------> All features are present in adata.var_names.\n",
- "|-----------> β
Check features in adata finished [0.0008s]\n",
+ "|-----------> β
Check features in adata finished [0.0025s]\n",
"|-----------> βοΈ Preprocess data started\n",
"|-----------------> Preprocessing data with function binarize\n",
- "|-----------> β
Preprocess data finished [0.0028s]\n",
+ "|-----------> β
Preprocess data finished [0.0077s]\n",
"|-----------> βοΈ Filter features and extract data matrix started\n",
- "|-----------> β
Filter features and extract data matrix finished [0.0014s]\n",
+ "|-----------> β
Filter features and extract data matrix finished [0.0035s]\n",
"|-----------> βοΈ Convert numpy array to tensor started\n",
"|-----------> β
Convert numpy array to tensor finished [0.0007s]\n",
"|-----------> βοΈ Initialize model started\n",
- "|-----------> β
Initialize model finished [0.0017s]\n",
+ "|-----------> β
Initialize model finished [0.0024s]\n",
"|-----------> βοΈ Predict ages with model started\n",
- "|-----------> β
Predict ages with model finished [0.0008s]\n",
+ "|-----------> β
Predict ages with model finished [0.0010s]\n",
"|-----------> βοΈ Convert tensor to numpy array started\n",
- "|-----------> β
Convert tensor to numpy array finished [0.0007s]\n",
+ "|-----------> β
Convert tensor to numpy array finished [0.0006s]\n",
"|-----------> βοΈ Add predicted ages to adata started\n",
- "|-----------> β
Add predicted ages to adata finished [0.0007s]\n",
+ "|-----------> β
Add predicted ages to adata finished [0.0009s]\n",
"|-----------> βοΈ Load all clock metadata started\n",
- "|-----------------> Downloading data to pyaging_data/all_clock_metadata.pt\n",
- "|-----------------> in progress: 100.0000%\n",
- "|-----------> β
Load all clock metadata finished [0.5127s]\n",
+ "|-----------------> Data found in pyaging_data/all_clock_metadata.pt\n",
+ "|-----------> β
Load all clock metadata finished [0.0024s]\n",
"|-----------> βοΈ Add clock metadata to adata.uns started\n",
- "|-----------> β
Add clock metadata to adata.uns finished [0.0003s]\n",
- "|-----> π Done! [0.9400s]\n"
+ "|-----------> β
Add clock metadata to adata.uns finished [0.0009s]\n",
+ "|-----> π Done! [0.6801s]\n"
]
}
],
@@ -663,7 +661,8 @@
" 'postprocessing': None,\n",
" 'citation': 'Meyer, David H., and BjΓΆrn Schumacher. \"BiT age: A transcriptomeβbased aging clock near the theoretical limit of accuracy.\" Aging cell 20.3 (2021): e13320.',\n",
" 'doi': 'https://doi.org/10.1111/acel.13320',\n",
- " 'notes': None}"
+ " 'notes': None,\n",
+ " 'implementation_approved_by_author(s)': 'β'}"
]
},
"execution_count": 12,