Skip to content

Commit

Permalink
reduce sparse problem size for docs
Browse files Browse the repository at this point in the history
  • Loading branch information
apoorvalal committed Aug 27, 2024
1 parent 6a77952 commit 8282381
Showing 1 changed file with 32 additions and 20 deletions.
52 changes: 32 additions & 20 deletions docs/examples/example_sparse_inputs.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@
"source": [
"def generate_causal_data(\n",
" n_samples=100_000,\n",
" n_categories=1000,\n",
" n_categories=500,\n",
" n_features=100,\n",
" tau_magnitude=1.0,\n",
"):\n",
Expand Down Expand Up @@ -168,8 +168,9 @@
"\n",
"\n",
"X, W, Y, tau, propensity_score = generate_causal_data(\n",
" n_samples=10000, tau_magnitude=1.0\n",
")\n"
" n_samples=1000, tau_magnitude=1.0\n",
")\n",
"Xdf = pd.DataFrame(X)"
]
},
{
Expand All @@ -179,10 +180,14 @@
"outputs": [],
"source": [
"# sparse and dense X matrices\n",
"e1 = OneHotEncoder(sparse_output=True) # onehot encoder generates sparse output automatically\n",
"Xdf = pd.DataFrame(X)\n",
"e1 = OneHotEncoder(\n",
" sparse_output=True\n",
") # onehot encoder generates sparse output automatically\n",
"\n",
"X_csr = e1.fit_transform(X)\n",
"X_np = pd.get_dummies(Xdf, columns=Xdf.columns).values # dense onehot encoding with pandas"
"X_np = pd.get_dummies(\n",
" Xdf, columns=Xdf.columns\n",
").values # dense onehot encoding with pandas"
]
},
{
Expand All @@ -195,8 +200,8 @@
"output_type": "stream",
"text": [
"\n",
"Sparse data memory: 7.63MB\n",
"Dense data memory: 953.66MB\n"
"Sparse data memory: 0.76MB\n",
"Dense data memory: 41.28MB\n"
]
}
],
Expand Down Expand Up @@ -266,8 +271,8 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Sparse data - Runtime: 13.27s, Memory used: 345.56MB\n",
"(array([1.0007226]), array([0.02021719]))\n"
"Sparse data - Runtime: 3.06s, Memory used: 115.93MB\n",
"(array([1.0161235]), array([0.06374022]))\n"
]
}
],
Expand All @@ -291,26 +296,33 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Sparse data - Runtime: 149.10s, Memory used: 70.43MB\n",
"(array([1.00067664]), array([0.02021555]))\n"
"Sparse data - Runtime: 6.91s, Memory used: 131.66MB\n",
"(array([1.01609547]), array([0.06384197]))\n"
]
}
],
"source": [
"fit_drlearner_wrapper(X_np)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"In this (admittedly somewhat contrived) example, we that solving the DRLearner problem with sparse inputs takes around 1/8 of the time compared to dense inputs at the cost of some more memory usage in estimation. "
]
}
],
"metadata": {
"kernelspec": {
"display_name": "py311",
"language": "python",
"name": "python3"
},
"language_info": {
"name": "python"
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.7"
},
"mystnb": {
"execution_timeout": 120
Expand Down

0 comments on commit 8282381

Please sign in to comment.