Skip to content

sssqweeeed/handmade-ml

Folders and files

NameName
Last commit message
Last commit date

Latest commit

 

History

8 Commits
 
 
 
 
 
 
 
 
 
 
 
 
 
 

Repository files navigation

{
 "cells": [
  {
   "cell_type": "markdown",
   "source": [
    "## handmade-ml: Welcome notebook\n",
    "\n",
    "### Models\n",
    "* Linear Regression\n",
    "* Decision Tree\n",
    "* Random Forest (Classifier)\n",
    "* GBT\n",
    "\n",
    "You can read more about these models inside their folders."
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "outputs": [],
   "source": [
    "# plots, simple datasets\n",
    "import seaborn as sns\n",
    "# plots\n",
    "import matplotlib as plt\n",
    "# work with databases\n",
    "import pandas as pd\n",
    "# math, vectorized operations\n",
    "import numpy as np\n",
    "from metrics import mse, error\n",
    "\n",
    "from sklearn.metrics import accuracy_score, mean_squared_error, mean_absolute_error\n",
    "from sklearn.preprocessing import MinMaxScaler, StandardScaler\n",
    "from sklearn.model_selection import train_test_split\n",
    "\n",
    "%load_ext autoreload\n",
    "%autoreload 2"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "### Linear regression"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "outputs": [],
   "source": [
    "wine_quality_dataset = pd.read_csv('datasets/winequality-red.csv')\n",
    "wine_quality_dataset.head()\n",
    "\n",
    "feature_columns = list(wine_quality_dataset.columns)\n",
    "feature_columns.remove('quality')\n",
    "pred_column = 'quality'\n",
    "\n",
    "X, y = wine_quality_dataset[feature_columns].astype(float), wine_quality_dataset[pred_column].astype(float)\n",
    "\n",
    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)\n",
    "prep = MinMaxScaler()\n",
    "prep.fit(X_train)\n",
    "X_train = prep.fit_transform(X_train)\n",
    "X_test = prep.transform(X_test)\n"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "outputs": [],
   "source": [
    "# feature_columns = ['fixed acidity']"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "outputs": [
    {
     "data": {
      "text/plain": "0.6518541092800736"
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from LinearModels.LinearReg import LinearRegression\n",
    "model = LinearRegression(\n",
    "    num_iters=1000,\n",
    "    regularization='lasso',\n",
    "    learning_rate=1.0,\n",
    "    reg_lmb=0.0\n",
    ")\n",
    "model.fit(np.array(X_train), np.array(y_train))\n",
    "y_pred = model.predict(np.array(X_test))\n",
    "mean_absolute_error(y_pred, y_test)"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "### Decision tree"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "outputs": [
    {
     "data": {
      "text/plain": "0.5021684698676662"
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from Trees.Tree import TreeRegressor\n",
    "model = TreeRegressor(metric=mean_absolute_error, criterion='entropy', max_depth=5, minimize=True, debug=False)\n",
    "model.fit(X_train, y_train)\n",
    "pred = model.predict(X_test, predict_col='target')\n",
    "mean_absolute_error(pred['target'], y_test)"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "### GBT"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Iteration: 3, Loss: 0.5548046304394244\n",
      "\n",
      "Iteration: 4, Loss: 0.517176783565545\n",
      "\n",
      "Iteration: 5, Loss: 0.4930375806110396\n",
      "\n",
      "Iteration: 6, Loss: 0.48312167424749897\n",
      "\n",
      "Iteration: 7, Loss: 0.4598877124507926\n",
      "\n",
      "Iteration: 8, Loss: 0.4586910957712947\n",
      "\n",
      "Iteration: 9, Loss: 0.4335972170195566\n",
      "\n"
     ]
    }
   ],
   "source": [
    "from Boosting.TreeBoost import SimpleTreeBoostRegressor\n",
    "model = SimpleTreeBoostRegressor(\n",
    "    n_estimators=10,\n",
    "    lr=0.1,\n",
    "    metric=mse,\n",
    "    derivative=error,\n",
    "    max_depth=3,\n",
    "    colsample_bytree=0.8,\n",
    "    criterion='entropy',\n",
    "    subsample=0.8,\n",
    "    minimize=True,\n",
    "    debug=False,\n",
    ")\n",
    "df = pd.DataFrame(X_train)\n",
    "df['target'] = list(y_train)\n",
    "\n",
    "model.fit(df, target='target')\n",
    "# pred = model.predict(X_test, predict_col='pred')\n",
    "# mean_absolute_error(pred['target'], y_test)"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "outputs": [
    {
     "data": {
      "text/plain": "0.5697732394729527"
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.DataFrame(X_test)\n",
    "df['target'] = list(y_test)\n",
    "model.predict(df, predict_col='pred')\n",
    "mean_absolute_error(df['target'], df['pred'])"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [],
   "metadata": {
    "collapsed": false
   }
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}

About

No description, website, or topics provided.

Resources

Stars

Watchers

Forks

Releases

No releases published

Packages

No packages published