diff --git a/hiselstudy.yml b/hiselstudy.yml
new file mode 100644
index 0000000..48d8fbe
--- /dev/null
+++ b/hiselstudy.yml
@@ -0,0 +1,17 @@
+name: hiselstudy
+channels:
+  - conda-forge
+  - nodefaults
+dependencies:
+  - python=3.9
+  - ipython
+  - ipykernel
+  - numpy
+  - pandas
+  - scipy
+  - scikit-learn
+  - shap
+  - lightgbm
+  - tqdm
+  - matplotlib
+  - pip
diff --git a/notebooks/study/ensemble.ipynb b/notebooks/study/ensemble.ipynb
new file mode 100644
index 0000000..20d1837
--- /dev/null
+++ b/notebooks/study/ensemble.ipynb
@@ -0,0 +1,404 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "802e8c73",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import seaborn as sns\n",
+    "import itertools\n",
+    "from sklearn.metrics import adjusted_mutual_info_score\n",
+    "\n",
+    "\n",
+    "from hisel import select, hsic\n",
+    "from hisel.select import FeatureType, HSICSelector as Selector"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "798f7c6d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "k = 5\n",
+    "n = 10000\n",
+    "d = 30"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "50b99be8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "x0 = np.random.randint(k, size=(n, 1))\n",
+    "x1 = np.random.randint(k, size=(n, 1))\n",
+    "ms = np.random.randint(low=2, high=20, size = d-2)\n",
+    "others = [np.random.choice(m, size=(n, 1)) for m in ms]\n",
+    "all_ = np.concatenate(\n",
+    "    [x0, x1] + others,\n",
+    "    axis=1\n",
+    ")\n",
+    "y = np.asarray(x0 == x1, dtype=int) # k + x0 - x1 # np.asarray(x0 == x1, dtype=int)\n",
+    "permuter =  np.random.permutation(np.eye(d, dtype=int).T).T\n",
+    "x = np.array(all_ @ permuter, dtype=int)\n",
+    "expected_features = [np.argmax(permuter[0, :]), np.argmax(permuter[1, :])]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e6236e9e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "assert np.all(x[:, expected_features[0]] == x0[:, 0])\n",
+    "assert np.all(x[:, expected_features[1]] == x1[:, 0])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f83edaef",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sns.scatterplot(x = x0[:, 0] - x1[:, 0], y = y[:, 0])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "140b9f88",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "xdf = pd.DataFrame(x, columns = [f'x{i}' for i in range(d)])\n",
+    "ydf = pd.Series(y[:, 0], name='y')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e37502d7",
+   "metadata": {},
+   "source": [
+    "### Selection with marginal 1D ksg mutual info"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "139b18ff",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ksgselection, mis = select.ksgmi(xdf, ydf, threshold=0.01)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5ffca204",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(f'Expected features: {sorted(expected_features)}')\n",
+    "print(f'Marginal KSG selection: {sorted(ksgselection)}')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c8906000",
+   "metadata": {},
+   "source": [
+    "### Selection with HSIC Lasso"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1487ff0e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "selector = Selector(x, y, xfeattype=FeatureType.DISCR, yfeattype=FeatureType.DISCR)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "afab6f16",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "batch_size = n // 10\n",
+    "minibatch_size = 200\n",
+    "number_of_epochs = 3\n",
+    "threshold = .0\n",
+    "device = None # run on CPU"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "01efe57c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "hsiclasso_selection = selector.select(\n",
+    "    number_of_features=2,\n",
+    "    batch_size=batch_size,\n",
+    "    minibatch_size=minibatch_size,\n",
+    "    number_of_epochs=number_of_epochs,\n",
+    "    device=device\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "97929ada",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(f'Expected features: {sorted(expected_features)}')\n",
+    "print(f'HSIC Lasso selection: {sorted(hsiclasso_selection)}')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d88d85c5",
+   "metadata": {},
+   "source": [
+    "### Confirm that HSIC_b correctly assigns highest dependence to the correct selection"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "38056f04",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "correct_dependence = n * n * hsic.hsic_b(\n",
+    "    x[:, list(expected_features)],\n",
+    "    y\n",
+    ")\n",
+    "nsel = np.random.randint(low=1, high=d)\n",
+    "random_selection = np.random.choice(list(range(d)), replace=False, size=nsel)\n",
+    "random_dependence = n * n * hsic.hsic_b(\n",
+    "    x[:, list(random_selection)],\n",
+    "    y\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "92bc809f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(f'HSIC-estimated dependence between correct selection and target: {correct_dependence}')\n",
+    "print(f'HSIC-estimated dependence between random selection and target: {random_dependence}')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "beb34ecd",
+   "metadata": {},
+   "source": [
+    "### Selection with 2D discrete mutual information"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3d1459fb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def onedimlabel(x):\n",
+    "    assert x.ndim == 2\n",
+    "    ns = np.amax(x, axis=0)\n",
+    "    res = np.array(x[:, 0], copy=True)\n",
+    "    m = 1\n",
+    "    for i in range(1, x.shape[1]):\n",
+    "        m *= max(1, ns[i-1])\n",
+    "        res += (1+m) * x[:, i]\n",
+    "    return res"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "16a8e7f5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "l = 2\n",
+    "miscores = {subset: \n",
+    "           adjusted_mutual_info_score(onedimlabel(x[:, list(subset)]), y[:, 0])\n",
+    "            for subset in itertools.combinations(list(range(d)), l)\n",
+    "            \n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "168eb38b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "s = (0,1)\n",
+    "mi = 0\n",
+    "for k, v in miscores.items():\n",
+    "    if v > mi:\n",
+    "        s = k\n",
+    "        mi = v\n",
+    "twod_mi_selection = s"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a14eb4e9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(f'Expected features: {sorted(expected_features)}')\n",
+    "print(f'2D discrete MI selection: {sorted(twod_mi_selection)}')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "6776b78e",
+   "metadata": {},
+   "source": [
+    "### Selection with Boruta"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "511abe4a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from arfs.feature_selection import allrelevant\n",
+    "from arfs.feature_selection.allrelevant import Leshy\n",
+    "from sklearn.ensemble import RandomForestClassifier"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a02901e1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "n_estimators = 'auto'\n",
+    "perc = 95\n",
+    "alpha = 0.05\n",
+    "importance = \"shap\"\n",
+    "two_step = True\n",
+    "max_iter = 100\n",
+    "random_state = None\n",
+    "verbose = 0\n",
+    "keep_weak = False"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "00081320",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "xdf = pd.DataFrame(x, columns = [f'f{i}' for i in range(d)])\n",
+    "yser = pd.Series(y[:, 0], name='y')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3f456422",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "rf = RandomForestClassifier(n_jobs=-1, max_depth=8)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a36700a6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "leshy = Leshy(\n",
+    "    rf,\n",
+    "    n_estimators=n_estimators,\n",
+    "    perc=perc,\n",
+    "    alpha=alpha,\n",
+    "    importance=importance,\n",
+    "    two_step=two_step,\n",
+    "    max_iter=max_iter,\n",
+    "    random_state=random_state,\n",
+    "    verbose=verbose,\n",
+    "    keep_weak=keep_weak,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9613c874",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "leshy.fit(xdf, yser)\n",
+    "leshy_selection = [int(col.replace('f', '')) for col in leshy.selected_features_]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c9056e56",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(f'Expected features: {sorted(expected_features)}')\n",
+    "print(f'Boruta selection: {sorted(leshy_selection)}')"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "hiselstudy",
+   "language": "python",
+   "name": "hiselstudy"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.16"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/notebooks/study/nonlinear.ipynb b/notebooks/study/nonlinear.ipynb
new file mode 100644
index 0000000..a9e5e8b
--- /dev/null
+++ b/notebooks/study/nonlinear.ipynb
@@ -0,0 +1,758 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fd61a5c3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import datetime\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import matplotlib.pyplot as plt\n",
+    "import seaborn as sns\n",
+    "from scipy.stats import special_ortho_group\n",
+    "from hisel.select import HSICSelector as Selector"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c2559eae",
+   "metadata": {},
+   "source": [
+    "# Sin transform "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4b761492",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dim_x = 10\n",
+    "dim_y = 1 # has to be one-dimensiona\n",
+    "dim_z = 1\n",
+    "\n",
+    "batch_size = int(1e+4)\n",
+    "minibatch_size = 250\n",
+    "num_of_samples = int(1e+4)\n",
+    "number_of_epochs = 3"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0d4f2545",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "transform_tilde = np.eye(dim_z)[:dim_y]\n",
+    "A = np.random.permutation(np.concatenate((np.eye(dim_z), np.zeros((dim_z, dim_x - dim_z))), axis=1).T).T\n",
+    "transform = transform_tilde @ A"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bde18951",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "x_samples = np.random.uniform(size=(num_of_samples, dim_x))\n",
+    "tt = np.repeat(np.expand_dims(transform, axis=0), repeats=num_of_samples, axis=0)\n",
+    "prey = (tt @ np.expand_dims(x_samples, axis=2))[:, :, 0]\n",
+    "y_samples = np.random.normal(0, 3e-1, size=prey.shape) \n",
+    "y_samples[:, 0] += np.sin(2*np.pi*prey[:, 0])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1d0b9a75",
+   "metadata": {},
+   "source": [
+    "### Viz of relations between target and features"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9f2e819f",
+   "metadata": {},
+   "source": [
+    "Relation between $y$ and the correct feature"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9fc8c15a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "expected_features = np.argsort(np.sum(A, axis=0))[::-1][:dim_z]\n",
+    "sns.scatterplot(x=x_samples[:, expected_features[0]], y=y_samples[:, 0])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f4305414",
+   "metadata": {},
+   "source": [
+    "Relation between $y$ and a wrong feature"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f0661e9a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "nonrelevant = set(range(dim_x)).difference(set(expected_features))\n",
+    "featureidx = np.random.choice(list(nonrelevant))\n",
+    "sns.scatterplot(x=x_samples[:, featureidx], y=y_samples[:, 0])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4c8caf8a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "projector = Selector(x_samples, y_samples)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7e198c95",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "curve = projector.regularization_curve(\n",
+    "    batch_size=batch_size,\n",
+    "    minibatch_size=minibatch_size,\n",
+    "    number_of_epochs=number_of_epochs\n",
+    ")\n",
+    "paths = projector.lasso_path()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "6551e522",
+   "metadata": {},
+   "source": [
+    "#### Sorted features by decreasing importance"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a503fa32",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(f'Sorted features by decreasing importance: {projector.ordered_features}')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3b6679bf",
+   "metadata": {},
+   "source": [
+    "### Test selection"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "65f990cd",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "expected_features = np.argsort(np.sum(A, axis=0))[::-1][:dim_z]\n",
+    "noise_features = set(range(dim_x)).difference(set(expected_features))\n",
+    "selected_features = np.argsort(paths.iloc[-1, :])[::-1][:dim_z]\n",
+    "print(f'Expected features: {sorted(list(expected_features))}')\n",
+    "print(f'Selected features: {sorted(list(selected_features))}')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a8bf88af",
+   "metadata": {},
+   "source": [
+    "## Comparison with sklearn"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "332ba768",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.feature_selection import f_regression, mutual_info_regression"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0b76ba28",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fstats, _ = f_regression(x_samples, np.linalg.norm(y_samples, axis=1))\n",
+    "fstats /= np.max(fstats)\n",
+    "f_selection = np.argmax(fstats)\n",
+    "print(f'f_selection: {f_selection}')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ceec08f5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "mi = mutual_info_regression(x_samples, np.linalg.norm(y_samples, axis=1))\n",
+    "mi /= np.max(mi)\n",
+    "mi_selection = np.argmax(mi)\n",
+    "print(f'mi_selection: {mi_selection}')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b455ad3a",
+   "metadata": {},
+   "source": [
+    "## Comparison with Boruta"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "96b8a9cb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from arfs.feature_selection import allrelevant\n",
+    "from arfs.feature_selection.allrelevant import Leshy\n",
+    "from sklearn.ensemble import RandomForestRegressor"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "221af1e1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "n_estimators = 'auto'\n",
+    "perc = 90\n",
+    "alpha = 0.05\n",
+    "importance = \"shap\"\n",
+    "two_step = True\n",
+    "max_iter = 100\n",
+    "random_state = None\n",
+    "verbose = 0\n",
+    "keep_weak = False"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b585adf9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "xdf = pd.DataFrame(x_samples, columns = [f'f{i}' for i in range(dim_x)])\n",
+    "yser = pd.Series(y_samples[:, 0], name='y')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2ce26143",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "rf = RandomForestRegressor(n_jobs=-1, max_depth=8)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a996e76c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "leshy = Leshy(\n",
+    "    rf,\n",
+    "    n_estimators=n_estimators,\n",
+    "    perc=perc,\n",
+    "    alpha=alpha,\n",
+    "    importance=importance,\n",
+    "    two_step=two_step,\n",
+    "    max_iter=max_iter,\n",
+    "    random_state=random_state,\n",
+    "    verbose=verbose,\n",
+    "    keep_weak=keep_weak,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "70b33240",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "leshy.fit(xdf, yser)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "fb4528e0",
+   "metadata": {},
+   "source": [
+    "### Test selection"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "93b73a26",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "expected_features = np.argsort(np.sum(A, axis=0))[::-1][:dim_z]\n",
+    "leshy_selection = [int(col.replace('f', '')) for col in leshy.selected_features_]\n",
+    "print(f'Expected features: {sorted(list(expected_features))}')\n",
+    "print(f'Leshy-selected features: {sorted(list(leshy_selection))}')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "7771b83e",
+   "metadata": {},
+   "source": [
+    "# Linear and non-linear  transformation in high dimension"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "10edf512",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dim_x = 20\n",
+    "dim_y = 1  # has to be one-dimensional\n",
+    "dim_z = 5\n",
+    "\n",
+    "batch_size = int(1e+4)\n",
+    "minibatch_size = 250\n",
+    "num_of_samples = int(1e+4)\n",
+    "number_of_epochs = 3"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9ac11521",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "transform_tilde = special_ortho_group.rvs(dim_z)[:dim_y]\n",
+    "A = np.random.permutation(np.concatenate((np.eye(dim_z), np.zeros((dim_z, dim_x - dim_z))), axis=1).T).T\n",
+    "transform = transform_tilde @ A"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "15eb3b4c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "x_samples = np.random.uniform(size=(num_of_samples, dim_x))\n",
+    "tt = np.repeat(np.expand_dims(transform, axis=0), repeats=num_of_samples, axis=0)\n",
+    "prey = (tt @ np.expand_dims(x_samples, axis=2))[:, :, 0]\n",
+    "y_samples = np.random.normal(0, 1e-2, size=prey.shape)  # np.zeros_like(prey)\n",
+    "y_samples[:, 0] = np.sin(2*np.pi*prey[:, 0])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "742b8c90",
+   "metadata": {},
+   "source": [
+    "### Viz of relations between target and features"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "286a7886",
+   "metadata": {},
+   "source": [
+    "Becasue of the initial rotation, visual inspection of the relation between features and target does not give insights on which features should be selected"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "dbc33430",
+   "metadata": {},
+   "source": [
+    "Relation between $y$ and a correct feature"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cbb24917",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "expected_features = np.argsort(np.sum(A, axis=0))[::-1][:dim_z]\n",
+    "sns.scatterplot(x=x_samples[:, expected_features[0]], y=y_samples[:, 0])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "dfad4947",
+   "metadata": {},
+   "source": [
+    "Relation between $y$ and a wrong feature"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "12f86383",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "nonrelevant = set(range(dim_x)).difference(set(expected_features))\n",
+    "featureidx = np.random.choice(list(nonrelevant))\n",
+    "sns.scatterplot(x=x_samples[:, featureidx], y=y_samples[:, 0])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "49701075",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "projector = Selector(x_samples, y_samples)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1af2d5e4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "curve = projector.regularization_curve(\n",
+    "    batch_size=batch_size,\n",
+    "    minibatch_size=minibatch_size,\n",
+    "    number_of_epochs=number_of_epochs\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c96e444f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "paths = projector.lasso_path()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "bd1a2150",
+   "metadata": {},
+   "source": [
+    "#### Sorted features by decreasing importance"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "59656d81",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(f'Sorted features by decreasing importance: {projector.ordered_features}')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "85ff9d3c",
+   "metadata": {},
+   "source": [
+    "### Test selection"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "55d268d9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "expected_features = np.argsort(np.sum(A, axis=0))[::-1][:dim_z]\n",
+    "noise_features = set(range(dim_x)).difference(set(expected_features))\n",
+    "selected_features = np.argsort(paths.iloc[-1, :])[::-1][:dim_z]\n",
+    "print(f'Expected features: {sorted(list(expected_features))}')\n",
+    "print(f'Selected features: {sorted(list(selected_features))}')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4e5280cf",
+   "metadata": {},
+   "source": [
+    "## Regularisation curve"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b578c3c6",
+   "metadata": {},
+   "source": [
+    "#### Cumulative beta"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "71c90034",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plt.plot(np.arange(1, 1+len(curve)), curve)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "64d50369",
+   "metadata": {},
+   "source": [
+    "#### Absolute beta"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d4ae8aab",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plt.plot(np.arange(1, len(curve)), np.abs(np.diff(curve)))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "791889d0",
+   "metadata": {},
+   "source": [
+    "## Lasso paths"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "df408f64",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "paths.plot(figsize=(10, 5))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "87b7675f",
+   "metadata": {},
+   "source": [
+    "## Comparison with sklearn"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "30f6f83b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.feature_selection import f_regression, mutual_info_regression"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "24df734c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fstats, _ = f_regression(x_samples, np.linalg.norm(y_samples, axis=1))\n",
+    "fstats /= np.max(fstats)\n",
+    "f_selection = np.argsort(fstats)[::-1][:dim_z]\n",
+    "print(f'Expected features: {sorted(list(expected_features))}')\n",
+    "print(f'f_selection: {sorted(f_selection)}')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d84477ee",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "mi = mutual_info_regression(x_samples, y_samples)\n",
+    "mi /= np.max(mi)\n",
+    "mi_selection = np.argsort(mi)[::-1][:dim_z]\n",
+    "print(f'Expected features: {sorted(list(expected_features))}')\n",
+    "print(f'mi_selection: {sorted(mi_selection)}')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f5ada8be",
+   "metadata": {},
+   "source": [
+    "## Comparison with Boruta"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "511abe4a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from arfs.feature_selection import allrelevant\n",
+    "from arfs.feature_selection.allrelevant import Leshy\n",
+    "from sklearn.ensemble import RandomForestRegressor"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a02901e1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "n_estimators = 'auto'\n",
+    "perc = 90\n",
+    "alpha = 0.05\n",
+    "importance = \"shap\"\n",
+    "two_step = True\n",
+    "max_iter = 100\n",
+    "random_state = None\n",
+    "verbose = 0\n",
+    "keep_weak = False"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "00081320",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "xdf = pd.DataFrame(x_samples, columns = [f'f{i}' for i in range(dim_x)])\n",
+    "yser = pd.Series(y_samples[:, 0], name='y')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3f456422",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "rf = RandomForestRegressor(n_jobs=-1, max_depth=8)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a36700a6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "leshy = Leshy(\n",
+    "    rf,\n",
+    "    n_estimators=n_estimators,\n",
+    "    perc=perc,\n",
+    "    alpha=alpha,\n",
+    "    importance=importance,\n",
+    "    two_step=two_step,\n",
+    "    max_iter=max_iter,\n",
+    "    random_state=random_state,\n",
+    "    verbose=verbose,\n",
+    "    keep_weak=keep_weak,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9613c874",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "leshy.fit(xdf, yser)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "7b37b316",
+   "metadata": {},
+   "source": [
+    "### Test selection"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cf586f09",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "expected_features = np.argsort(np.sum(A, axis=0))[::-1][:dim_z]\n",
+    "leshy_selection = [int(col.replace('f', '')) for col in leshy.selected_features_]\n",
+    "print(f'Expected features: {sorted(list(expected_features))}')\n",
+    "print(f'Leshy-selected features: {sorted(list(leshy_selection))}')"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "hiselstudy",
+   "language": "python",
+   "name": "hiselstudy"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.16"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}