diff --git a/Projects-ML/Reg-models/Supervised-ML-project.ipynb b/Projects-ML/Reg-models/Supervised-ML-project.ipynb index 1ef5cef..45d6ef7 100644 --- a/Projects-ML/Reg-models/Supervised-ML-project.ipynb +++ b/Projects-ML/Reg-models/Supervised-ML-project.ipynb @@ -33,7 +33,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -994,12 +994,575 @@ "print(f'Lasso when large number of alpha {lasso_regressor1.best_score_}')" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Now let's use train test method:**" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.model_selection import train_test_split\n", + "\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [], + "source": [ + "import warnings\n", + "from sklearn.exceptions import ConvergenceWarning\n", + "\n", + "# Suppress convergence warnings\n", + "warnings.filterwarnings(\"ignore\", category=ConvergenceWarning)" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Linear regression = -25.18787473928514\n", + "Ridge regression = {'alpha': 0.01}\n", + "Ridge regression = -25.18689936738697\n", + "Lasso regression = {'alpha': 1e-15}\n", + "Lasso regression = -25.18787473928503\n" + ] + } + ], + "source": [ + "lin_rag2 = LinearRegression() \n", + "mse = cross_val_score(lin_rag, X_train,y_train,scoring='neg_mean_squared_error', cv=5)\n", + "mean_mse = np.mean(mse)\n", + "print(f'Linear regression = {mean_mse}')\n", + "\n", + "ridge2 =Ridge()\n", + "params = {'alpha':[1e-15,1e-10,1e-8,1e-3,1e-2,1,5,10,20,35,40,50,55,60,80,90,100]}\n", + "ridge_regressor2 = GridSearchCV(ridge2, params, scoring='neg_mean_squared_error', cv=5)\n", + "ridge_regressor2.fit(X_train,y_train)\n", + "\n", + "print(f'Ridge regression = {ridge_regressor2.best_params_}')\n", + "print(f'Ridge regression = {ridge_regressor2.best_score_}')\n", + "\n", + "lasso2 =Lasso()\n", + "params = {'alpha':[1e-15,1e-10,1e-8,1e-3,1e-2,1,5,10,20,35,40,50,55,60,80,90,100]}\n", + "lasso_regressor2 = GridSearchCV(lasso2, params, scoring='neg_mean_squared_error', cv=5)\n", + "lasso_regressor2.fit(X_train,y_train)\n", + "\n", + "print(f'Lasso regression = {lasso_regressor2.best_params_}')\n", + "print(f'Lasso regression = {lasso_regressor2.best_score_}')" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.6832260852266521\n" + ] + } + ], + "source": [ + "y_pred = lin_rag.predict(X_test)\n", + "from sklearn.metrics import r2_score\n", + "\n", + "r2_score_lin = r2_score(y_pred, y_test)\n", + "\n", + "print(r2_score_lin)" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.6708743257533069\n" + ] + } + ], + "source": [ + "y_pred = ridge_regressor2.predict(X_test)\n", + "from sklearn.metrics import r2_score\n", + "\n", + "r2_score_ridge = r2_score(y_pred, y_test)\n", + "\n", + "print(r2_score_ridge)" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.670955897674443\n" + ] + } + ], + "source": [ + "y_pred = lasso_regressor2.predict(X_test)\n", + "from sklearn.metrics import r2_score\n", + "\n", + "r2_score_lasso = r2_score(y_pred, y_test)\n", + "\n", + "print(r2_score_lasso)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## **Logistic regression**\n", + "\n", + "https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html\n", + "\n", + "`class sklearn.linear_model.LogisticRegression(penalty='l2', *, dual=False, tol=0.0001, C=1.0, fit_intercept=True, intercept_scaling=1, class_weight=None, random_state=None, solver='lbfgs', max_iter=100, multi_class='auto', verbose=0, warm_start=False, n_jobs=None, l1_ratio=None)`" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.linear_model import LogisticRegression \n", + "from sklearn.datasets import load_breast_cancer\n", + "\n", + "df = load_breast_cancer()\n", + "\n", + "# Independent features\n", + "X = pd.DataFrame(df['data'], columns=df['feature_names'])" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " | mean radius | \n", + "mean texture | \n", + "mean perimeter | \n", + "mean area | \n", + "mean smoothness | \n", + "mean compactness | \n", + "mean concavity | \n", + "mean concave points | \n", + "mean symmetry | \n", + "mean fractal dimension | \n", + "... | \n", + "worst radius | \n", + "worst texture | \n", + "worst perimeter | \n", + "worst area | \n", + "worst smoothness | \n", + "worst compactness | \n", + "worst concavity | \n", + "worst concave points | \n", + "worst symmetry | \n", + "worst fractal dimension | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", + "17.99 | \n", + "10.38 | \n", + "122.80 | \n", + "1001.0 | \n", + "0.11840 | \n", + "0.27760 | \n", + "0.3001 | \n", + "0.14710 | \n", + "0.2419 | \n", + "0.07871 | \n", + "... | \n", + "25.38 | \n", + "17.33 | \n", + "184.60 | \n", + "2019.0 | \n", + "0.1622 | \n", + "0.6656 | \n", + "0.7119 | \n", + "0.2654 | \n", + "0.4601 | \n", + "0.11890 | \n", + "
1 | \n", + "20.57 | \n", + "17.77 | \n", + "132.90 | \n", + "1326.0 | \n", + "0.08474 | \n", + "0.07864 | \n", + "0.0869 | \n", + "0.07017 | \n", + "0.1812 | \n", + "0.05667 | \n", + "... | \n", + "24.99 | \n", + "23.41 | \n", + "158.80 | \n", + "1956.0 | \n", + "0.1238 | \n", + "0.1866 | \n", + "0.2416 | \n", + "0.1860 | \n", + "0.2750 | \n", + "0.08902 | \n", + "
2 | \n", + "19.69 | \n", + "21.25 | \n", + "130.00 | \n", + "1203.0 | \n", + "0.10960 | \n", + "0.15990 | \n", + "0.1974 | \n", + "0.12790 | \n", + "0.2069 | \n", + "0.05999 | \n", + "... | \n", + "23.57 | \n", + "25.53 | \n", + "152.50 | \n", + "1709.0 | \n", + "0.1444 | \n", + "0.4245 | \n", + "0.4504 | \n", + "0.2430 | \n", + "0.3613 | \n", + "0.08758 | \n", + "
3 | \n", + "11.42 | \n", + "20.38 | \n", + "77.58 | \n", + "386.1 | \n", + "0.14250 | \n", + "0.28390 | \n", + "0.2414 | \n", + "0.10520 | \n", + "0.2597 | \n", + "0.09744 | \n", + "... | \n", + "14.91 | \n", + "26.50 | \n", + "98.87 | \n", + "567.7 | \n", + "0.2098 | \n", + "0.8663 | \n", + "0.6869 | \n", + "0.2575 | \n", + "0.6638 | \n", + "0.17300 | \n", + "
4 | \n", + "20.29 | \n", + "14.34 | \n", + "135.10 | \n", + "1297.0 | \n", + "0.10030 | \n", + "0.13280 | \n", + "0.1980 | \n", + "0.10430 | \n", + "0.1809 | \n", + "0.05883 | \n", + "... | \n", + "22.54 | \n", + "16.67 | \n", + "152.20 | \n", + "1575.0 | \n", + "0.1374 | \n", + "0.2050 | \n", + "0.4000 | \n", + "0.1625 | \n", + "0.2364 | \n", + "0.07678 | \n", + "
5 rows × 30 columns
\n", + "\n", + " | Target | \n", + "
---|---|
0 | \n", + "0 | \n", + "
1 | \n", + "0 | \n", + "
2 | \n", + "0 | \n", + "
3 | \n", + "0 | \n", + "
4 | \n", + "0 | \n", + "
... | \n", + "... | \n", + "
564 | \n", + "0 | \n", + "
565 | \n", + "0 | \n", + "
566 | \n", + "0 | \n", + "
567 | \n", + "0 | \n", + "
568 | \n", + "1 | \n", + "
569 rows × 1 columns
\n", + "