From eb96bb533d7e41c2bb97a935df80ff999da1a7f3 Mon Sep 17 00:00:00 2001 From: Mani9550 <93199718+Mani9550@users.noreply.github.com> Date: Wed, 27 Oct 2021 15:41:34 +0530 Subject: [PATCH 01/11] Added all the 4 tasks --- Classification_Task3_203174002.ipynb | 1 + KNN_Task4 _203174002.ipynb | 1 + Linear_Regression_Task2_203174002.ipynb | 1 + 3 files changed, 3 insertions(+) create mode 100644 Classification_Task3_203174002.ipynb create mode 100644 KNN_Task4 _203174002.ipynb create mode 100644 Linear_Regression_Task2_203174002.ipynb diff --git a/Classification_Task3_203174002.ipynb b/Classification_Task3_203174002.ipynb new file mode 100644 index 0000000..1d8640a --- /dev/null +++ b/Classification_Task3_203174002.ipynb @@ -0,0 +1 @@ +{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"Classification_Task3_203174002.ipynb","provenance":[],"collapsed_sections":[]},"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.8.8"}},"cells":[{"cell_type":"markdown","metadata":{"id":"7C5rAxwPGDQf"},"source":["# Importing useful libraries"]},{"cell_type":"code","metadata":{"id":"8qvrslgsF4Mn"},"source":["import numpy as np\n","import pandas as pd \n","from pandas import Series, DataFrame\n","\n","import seaborn as sns\n","import matplotlib.pyplot as plt\n","%matplotlib inline"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"6aYOLI2BHF6m"},"source":["## Loading the dataset.\n","The dataset can be found [here](https://github.com/shreedharmalpani/Intro-To-ML-Hello-FOSS/blob/main/iris.csv)"]},{"cell_type":"code","metadata":{"id":"_hccks2pF4Mq"},"source":["df = pd.read_csv(\"iris.csv\")"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"um0L09IOF4Ms","colab":{"base_uri":"https://localhost:8080/","height":205},"executionInfo":{"status":"ok","timestamp":1635010648975,"user_tz":-330,"elapsed":513,"user":{"displayName":"Desu Venkata Manikanta","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"10818416874497828882"}},"outputId":"5adfbd69-3916-4a7d-9b21-496cfc4ab498"},"source":["df.head()"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sepal_lengthsepal_widthpetal_lengthpetal_widthspecies
05.13.51.40.2setosa
14.93.01.40.2setosa
24.73.21.30.2setosa
34.63.11.50.2setosa
45.03.61.40.2setosa
\n","
"],"text/plain":[" sepal_length sepal_width petal_length petal_width species\n","0 5.1 3.5 1.4 0.2 setosa\n","1 4.9 3.0 1.4 0.2 setosa\n","2 4.7 3.2 1.3 0.2 setosa\n","3 4.6 3.1 1.5 0.2 setosa\n","4 5.0 3.6 1.4 0.2 setosa"]},"metadata":{},"execution_count":5}]},{"cell_type":"code","metadata":{"id":"cbx8gP4zF4Mt","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1635010651537,"user_tz":-330,"elapsed":11,"user":{"displayName":"Desu Venkata Manikanta","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"10818416874497828882"}},"outputId":"6d8d789e-43c0-499a-9f8d-7f501f9b58bc"},"source":["df.info() "],"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["\n","RangeIndex: 150 entries, 0 to 149\n","Data columns (total 5 columns):\n"," # Column Non-Null Count Dtype \n","--- ------ -------------- ----- \n"," 0 sepal_length 150 non-null float64\n"," 1 sepal_width 150 non-null float64\n"," 2 petal_length 150 non-null float64\n"," 3 petal_width 150 non-null float64\n"," 4 species 150 non-null object \n","dtypes: float64(4), object(1)\n","memory usage: 6.0+ KB\n"]}]},{"cell_type":"markdown","metadata":{"id":"vjH1pAqoJna2"},"source":["# Data Cleaning & Data Visualization"]},{"cell_type":"markdown","metadata":{"id":"PNfSyZF1F4Mu"},"source":["### 1) Remove unneeded columns\n","### 2) Check for duplicate rows \n","### 2) Check for rows with missing values\n"]},{"cell_type":"code","metadata":{"collapsed":true,"id":"HKFsVhubF4Mx"},"source":["df.isna().sum()"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"Rm7rlDcPR0aI"},"source":["df.isnull()"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"L1cnZS5XF4M2"},"source":["### EDA (Exploratory Data Analysis) with Iris"]},{"cell_type":"code","metadata":{"id":"DjsnhAraF4M7","colab":{"base_uri":"https://localhost:8080/","height":458},"executionInfo":{"status":"ok","timestamp":1635011702579,"user_tz":-330,"elapsed":523,"user":{"displayName":"Desu Venkata Manikanta","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"10818416874497828882"}},"outputId":"a7bf481b-9840-450b-b8f2-49a114bba973"},"source":["##Displaying a scatter plot to show the distribution of Sepal Length vs width the dataset\n","\n","fig = df[df.species == 'Iris-setosa'].plot(kind='scatter', x='petal_length', y='petal_width', color='orange', label='Setosa')\n","df[df.species == 'Iris-versicolor'].plot(kind='scatter', x='petal_length', y='petal_width', color='blue', label='Versicolor', ax=fig)\n","df[df.species == 'Iris-virginica'].plot(kind='scatter', x='petal_length', y='petal_width', color='green', label='Virginica', ax=fig)\n","\n","fig.set_xlabel('Petal Length')\n","fig.set_ylabel('Petal Width')\n","fig.set_title('Petal Length Vs Width')\n","\n","fig=plt.gcf()\n","fig.set_size_inches(10, 7)\n","plt.show()"],"execution_count":null,"outputs":[{"output_type":"display_data","data":{"image/png":"\n","text/plain":["
"]},"metadata":{"needs_background":"light"}}]},{"cell_type":"code","metadata":{"id":"O_eYByFvF4M5","colab":{"base_uri":"https://localhost:8080/","height":235},"executionInfo":{"status":"error","timestamp":1635011142444,"user_tz":-330,"elapsed":515,"user":{"displayName":"Desu Venkata Manikanta","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"10818416874497828882"}},"outputId":"fa78777e-0d87-449b-d9b7-a3995609ba6a"},"source":["#Display a scatter plot to show the distribution of Sepal Length vs width the dataset (Like previous Petal lenght vs width scatter plot)\n","\n","\n","#code\n","\n","fig.set_xlabel('Sepal Length')\n","fig.set_ylabel('Sepal Width')\n","fig.set_title('Sepal Length Vs Width')\n","\n","\n","fig=plt.gcf()\n","fig.set_size_inches(10, 7)\n","plt.show()\n","#example plot"],"execution_count":null,"outputs":[{"output_type":"error","ename":"AttributeError","evalue":"ignored","traceback":["\u001b[0;31m---------------------------------------------------------------------------\u001b[0m","\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)","\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;31m#code\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 6\u001b[0;31m \u001b[0mfig\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset_xlabel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'Sepal Length'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 7\u001b[0m \u001b[0mfig\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset_ylabel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'Sepal Width'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0mfig\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset_title\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'Sepal Length Vs Width'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;31mAttributeError\u001b[0m: 'Figure' object has no attribute 'set_xlabel'"]}]},{"cell_type":"code","metadata":{"id":"EIh_yKQAF4M6","outputId":"91706df0-4dd1-4a8b-92e7-41e04e6e602d"},"source":["#plot the FacetGrid plot using the seaborn library\n","\n","#sns.FacetGrid(...)\\\n","# .map(...)\\\n","# .add_legend()\n"],"execution_count":null,"outputs":[{"data":{"text/plain":[""]},"execution_count":12,"metadata":{},"output_type":"execute_result"},{"data":{"image/png":"iVBORw0KGgoAAAANSUhEUgAAAboAAAFgCAYAAADNUrzMAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3X+cXHV97/HXzszubJKZ7O4kGxMSfod8pRF0A5VaUJAH\nKL1UsFJ+XGzVUq8Fr9dbKz4o/gJUqlyx2lZFhaJitSCtFhRbBREtaLGQyI8o3xBBICFxN/tzJrs7\nszOz949zdjPZPXNmdmbO/Mr7+XjkITPfPed85uy63z3f8z3vb8fs7CwiIiLtKtToAkRERIKkjk5E\nRNqaOjoREWlr6uhERKStqaMTEZG2Fml0AeUaGkrWfXpoX99yRkcn633YkpqxLtVUHtVUnlatqb8/\n3lGncmQJAu3ojDFrgEeBs621TxW8/x7g7cCQ+9ZfWGttkLVUIhIJN7oET81Yl2oqj2oqj2qSWgqs\nozPGdAJfBKY8mk8C3mKtfTSo44uIiECw9+huBL4AvOjRdhJwtTHmQWPM1QHWICIih7iOIJJRjDFv\nAzZYaz9mjHkAuHzB0OU1wOeACeDbwE3W2u/67TObzc1q6EBEmpzu0TWhoDq6nwCz7r9XADuA86y1\ne40xHcBKa+24+7XvBFZZaz/qt89GTEbp748zNJSs92FLasa6VFN5VFN5WrUmTUZpToHco7PWvmbu\nvwuu6Pa6b60EnjTGHA/sB84Ebg2iDhERkbo9XmCMuRSIWWu/ZIx5P/AjIA380Fr7vXrVISIih5bA\nOzpr7Rnufz5V8N7XgK8FfWwRERElo4iISFtTRyciIm1NHZ2IiLQ1dXTSMOmZHIOjk6Rnco0uRUTa\nWMuEOkv7yOXz3HH/TrbtGGJkIk1iZZSBTf1cfOZGwiH97SUitaWOTurujvt3ct8ju+ZfD0+k519f\netamRpUlIm1Kfz5LXaVncmzbMeTZtm3HPg1jikjNqaOTuhpPpRmZSHu2jSanGU95t4mIVEodndRV\nTyxKYmXUs60v3k1PzLtNRKRS6uikrqKdYQY29Xu2DWxaTbRTK1SISG1pMorU3cVnbgSce3KjyWn6\n4t0MbFo9/76ISC2po5O6C4dCXHrWJi44/VjGU2l6YlFdyYlIYNTRScNEO8Os6Vve6DJEpM3pHp2I\niLQ1dXQiItLW1NGJiEhbU0cnIiJtTR2diIi0NXV0IiLS1tTRiYhIW1NHJyIibU0dnYiItDV1dCIi\n0tbU0YmISFtTRyciIm1NHZ2IiLQ1dXRStfRMjj379pOeyTW6FBGRRbRMj1Qsl89zx/072bZjiJFk\nmkQ8ysCmfi4+cyPhkP6GEpHmoI5OKnbH/Tu575Fd86+HJ9Lzry89a1OjyhIROYj+7JaKpGdybNsx\n5Nm2bcc+DWOKSNNQRycVGU+lGZlIe7aNJqcZT3m3iYjUmzo6qUhPLEpiZdSzrS/eTU/Mu01EpN7U\n0UlFop1hBjb1e7YNbFpNtDNc54pERLxpMopU7OIzNwLOPbnR5DR98W4GNq2ef19EpBmoo5OKhUMh\nLj1rExecfizhrk5ymRldyYlI09HQpVQt2hlm3eoV6uREpCmpoxMRkbamju4QkZ7JMTg6qefbROSQ\no3t0be6gmK6JNImViukSkUOLOro2p5guETnU6U/6NqaYLhERdXRtTTFdIiLq6NqaYrpERNTRtTXF\ndImIaDJK21NMl4gc6tTRtbnCmK7xVJqeWFRXciJySFFHd4iIdoZZ07e80WWIiNSd7tGJiEhbU0cn\nIiJtTR2dNC3lc4pILQR6j84YswZ4FDjbWvtUwftvAD4MZIFbrbU3B1mHtBblc4pILQX2W8MY0wl8\nEZjyeP/TwOuA04F3GGNeElQd0nrm8jmHJ9LMciCf8477dza6NBFpQUH+eXwj8AXgxQXvHw/stNaO\nWmszwIPAawKsQ1qI8jlFpNYCGbo0xrwNGLLWft8Yc/WC5pXAeMHrJNBTap99fcuJROr//Fd/f7zu\nxyxHM9ZVi5r27NvPSLJ4Pme4q5P+1SvqWlOtqabyqCaplaDu0V0GzBpjzgJeAdxmjDnPWrsXmAAK\nf1riwFipHY6OTgZSqJ/+/jhDQ8m6H7eUZqyrVjXlZnIk4lGGPcKo++Ld5DIzZR+nnc9TLamm8pRT\nkzrC5hRIR2etnR+KNMY8AFzudnIAvwKOM8YkgBTOsOWNQdQhrWcun7NwDb05yucUkUrULRnFGHMp\nELPWfskY81fA93HuEd5qrd1drzqk+SmfU0RqKfCOzlp7hvufTxW89x3gO0EfW1qT8jlFpJaUdSlN\nS/mcIlILevpWRETamjo6KSk5meFXvxkhOZlpdCkiIkumoUspKpPNcv1tW9k9lCI/C6EOWN8f4wNv\n2UJXRD86ItIadEUnRV1/21ZeGHQ6OYD8LLwwmOL627Y2tjARkSVQRyeekpMZdg+lPNt2D6U0jCki\nLUMdnXjaVXAlt1B+1mkXEWkF6ujE04Y1MUId3m2hDqddRKQVqKMTT/HlXazv9+7M1vfHiC/vqnNF\nIiKVUUcnRX3gLVs4vODKLtQBh69xZl2KiLQKzRGXoroiEa677JUkJzPsGkyxYY2u5ESk9aijk5Li\ny7s4/qhEo8sQEamIhi5FRKStqaMTEZG2po6ujQyPT/HTJ/YwPD7V6FLKkp7JMTg6SXom1+hSpE3k\n02kyg4Pk04tXqA9yW2luukfXBqYyM1x1089ITWXn34sti3DDFa9iWVdnAyvzlsvnueP+nWzbMcTI\nRJrEyigDm/q5+MyNhEP620uWbjaXY+jO20lt20p2ZIRIIkFsYAv9F15CR9h/LcNqtpXWoN8qbWBh\nJweQmspy1U0/a1BF/u64fyf3PbKL4Yk0s8DwRJr7HtnFHffvbHRp0qKG7rydsfvuJTs8DLOzZIeH\nGbvvXobuvD3QbaU1qKNrccPjU4s6uTmpqWzTDWOmZ3Js2zHk2bZtxz4NY8qS5dNpUtu8g8ZT27b5\nDkVWs620DnV0Lc4+P1ZVe72Np9KMTHj/8hhNTjOe0i8WWZrs+DjZkRHvttERsuPjgWwrrUMdXYsz\nR/RW1V5vPbEoiZVRz7a+eDc9Me82kWIiPT1EEt7PeUb6EkR6egLZVlqHOroWt6pnGbFl3nOKYssi\nrOpZVueK/EU7wwxs6vdsG9i0mminbv7L0oSiUWID3rF0sYEBQtHifzxVs620Ds26bAM3XPGqorMu\nm9HFZ24EnHtyo8lp+uLdDGxaPf++yFL1X3gJ4NxXy46OEOlLEBsYmH8/qG2lNXTMzhZZdKzJDA0l\n615of3+coaFkvQ9bUrG6hsensM+PYY7orfuVXCXnKj2TYzyVpicWDeRKrhm/f6qpPJXWlE+nyY6P\nE+npWfLVWKlty6mpvz9eZHEraSRd0bWRVT3L+P0Tmmuo0k+0M8yavuWNLkPaSCgapWvNmrpvK81N\n9+hERKStqaNrI9VEavltq6guEWllGrpsA9VEavltCyiqS0Ranjq6NjAXqTVnLlIL4NKzNlW8LVDx\nfkVEmoX+LG9x1URq+W87xFY7WNF+RUSaiTq6FldNpJbftiPJNCPJTEX7FRFpJuroWlw1kVp+2ybi\nURLxror2KyLSTNTRtbhqIrX8t+1ni/F+pkhRXSLSSjQZpQ1UE6lVzraK6hKRVqYIMB/NGI0Exeuq\nJlLLb9ty9tuM50o1lUc1lUcRYK1LV3RtpJpILb9tFdUlIq1M9+hERKStqaMTEZG2po6uQo3Kf0zP\n5Nizb78e2JaGy6fTZAYHyaf1TKU0N92jW6JqciVrdtxkmkRcuZPSGLO5HEN33k5q21ayIyNEEgli\nA1vov/ASOsJ67ESajzq6JaomV7IVjyuy0NCdtzN2373zr7PDw/Ov11zy5kaVJVKULgWWoJpcyVY8\nrshC+XSa1Latnm2pbds0jClNSR3dElSTK9mKxxVZKDs+TnZkxLttdITs+HidKxIpTR3dElSTK9mK\nxxVZKNLTQySR8G7rSxDp6alzRSKlqaNbgmpyJVvxuCILhaJRYgNbPNtiAwOEovqjS5qPJqMsUTW5\nkq14XJGF+i+8BHDuyWVHR4j0JYgNDMy/L9JslHXpwy/brppcyWqkZ3KEuzrJZWaa6kquVbMJ662d\nasqn02THx4n09NT8Sq5Vz5OyLpuThi4rNJf/WO/OJtoZZt3qFU3VycmhKRSN0rVmjYYrpempoxMR\nkbamjq7FJCczPPb0EMnJjGfbr34z4tkWZGSZYslEpJlpMkqLyGSzXH/bVnYPpcjPQqgD1vfH+MBb\nnBlwxdrCoVBgkWWKJRORVhBYR2eMCQM3AwaYBS631j5Z0P4e4O3AXOTHX1hrbVD1tLrrb9vKC4Op\n+df5WXhhMMX1tzkpFcXazBG9gUWHKZZMRFpBkH92vwHAWnsq8EHg+gXtJwFvsdae4f5TJ1dEcjLD\n7qGUZ9vuodRBnVyhXYMpHrXBRIcplkxEWkVgV3TW2n8zxnzXfXkkMLbgS04CrjbGrAXusdZ+3G9/\nfX3LiUTqP9Owvz9e92Mu9OLTQ+SLPFxR7H1wLqPHksWjw8JdnfSvXlFRTXv27WckoH3XUjN8/xZS\nTeVRTVIrgd6js9ZmjTFfBf4I+OMFzbcDnwMmgG8bY/7QWvvdhfuYMzo6GVyhRTTLszzxrhChDu9O\nrdj7AB1AbzzKqEeH1BfvJpeZqfjz5WZyJOJRhj0yOKvdd600y/evkGoqT6vWpI6wOZU1dGmM+Utj\nzIgxJuf+yxtjyhqbsta+FdgE3GyMWeHurwP4jLV2n7U2A9wDDFT4GdpefHkX6/tjnm3r+2Mcvsa7\nbcOaGCeZYKLDFEsmIq2i3Cu6vwReYa19vtwdG2P+FNjgDklOAnn3H8BK4EljzPHAfuBM4Nayqz4E\nfeAtWyqedQnBRIcplkxEWkFZEWDGmO8Bf2StLXs9GPfq7cvAWqAT+ASwAohZa7/kdoTvBtLAD621\n1/jtr9kiwBolOZkhmckT7woRX961qG3XYIoNa2KL2oKMLFMsWflUU3latSZFgDWncq/o/h54whjz\nX0B27k1r7WXFNrDW7gcu8mn/GvC1Mo8vrvjyLo450vv/cPHlXRx/lPcSKnORZUGIdobpX72i6X4x\niYjA0jq6fwKeC7AWERGRmiu3o5u21n4k0EpEREQCUG5Hd58x5lPAvwPzQYrW2p8EUlULqOael9+9\ntHK2ffHpIc97dH41NareRsnkMuxNDZHLhegKL61mv+VnglyaRkSCUW5HNzf1v3Bp4Vmc2ZKHlIPy\nHZeYHemXV9kV8f9W+G3rl2cJNKTeRsnlc3xr5z08PrSd0fQYfdFeTuzfzJs2nks45N/Bz+ZyDN15\nO6ltW8mOjBBJJIgNbJlfULRYW0e4eSbgiMhiWnjVh9csq2/ct+OgfMc5Z528oWS+4zW3/twzruvw\nNTGuu+yVFW+7MM+ysCagLvU2yyy5O3fczQO7Hlz0/hkbTuPCTef5bjt4+9cZu+/eRe/3nnU2QNG2\nNZe8uez6muU8FVJN5dGsy9bl+ye9MabDGHOdMebMgve+aoy5LvjSmk81+Y6l8iq9ltYpZ1v/PMsh\nttrButfbKJlchseHtnu2PbFvO5lc8Zrz6TSpbVs921LbtpLc+miRtm3k02U/dSMiDVAqGeU64BXA\nUwXvfQzYYoz5cGBVNanxVJoRj8grcPIdx1PFf+HtGkz55lXuKhLMXGrbWfCM+AIYSaYZSXr/cg+y\n3kYZTycZTS+MVHWMTI8xni7+13h2fJzsyIh328gIuWJtoyNkx8eXXqyI1E2pju6NwEXW2hfn3rDW\nPg38T3yekWtXPbEoiZXeExD64t30xIpPTtiwJkaoyKBGqMNpr2TbDqAv7n3cRDxKIu49ESPIehul\nJxqnL9rr2Zbo7qUnWjyHMNLTQyTh/QxiJJEgXKytL0Gkp2fpxYpI3ZTq6HJeaSjW2hQwE0xJzaua\nfMdSeZV+sxn9tvXPs+xni1lT93obpSvcxYn9mz3bTli92Xf2ZSgaJTawxbMtNrCF+JaTirQNaPal\nSJMrNXVuvzHmWGvtrwvfNMZs5EBu5SGlmnzHUnmVlW5bTp5lvettlDdtPBdw7smNTo/R193LCas3\nz7/vZ252ZWrbNrKjI0T6EsQGBubfL9UmIs3Jd9alMeZcnFSUa4Gf44yUnQxcA3zAWvvNOtQINM+s\nyzmNfI6uWNZlI5+ja7ZZcplchnAsTy7VXM/RNdt5AtVULs26bF0lHy8wxpwDvB/nWbo88N/AJ621\n3w++vAOaraNrpGasSzWVRzWVp1VrUkfXnEo+9Wut/Q/gP+pQi4iISM2VFW9hjPld4EpgNc7wJQDW\n2kMuGaXR0jM59uzbT24mV9PhSSnP9FSSscEX6V1zGN3L6readDaZJL1rF9ENG4jEtYq1yFKUm+N0\nG/BZYDvOo1tSZwdFjyXTJOK1ifmS8mRnMjx8yw102+dYkcqyOxZh2hzJKW+/ikhncDNQ85kMz3/8\nY2R274J8HkIhutZv4IirP0ioq/lmvoo0o3I7uilr7ecCrUR83XH/zoOivIYn0ge9LtZWKuZLyvPw\nLTfQ/+iBycfxVJb4o7/mYW7g1Cs+FNhxn//4x8i88PyBN/J5Mi88z/Mf/xhHXaMFRUTK4dvRGWOO\ncP9zmzHmPcBdHLzw6vOeG0pN+UePDVFsQtG2Hfu44PRjNYxZpempJN3WeynGbvuc0x7AMGY2mXSu\n5Dxkdu8im0xqGFOkDKWu6H6MM1TZgbNSwbsL2maBYwKqSwr4RY+NJNMUmzg7F/MV1Mrih4qxwRdZ\nkcp6ti1PZRkbfJG1R5qaHze9yx2u9JLPk961i8jxx9f8uCLtxrejs9YeDWCMSVhrDwr7M8YcFWBd\nUmAuemzYo7NLxKPMzs56ZlqWivmS8vSuOYzdsQhxj85uMhbh6DWHBXLc6IYNEAp5d3ahkNMuIiWV\nGro8HOdq7nvGmD/gwIzLCPA94KXBlidwIHrMa7mduUgy7zb/mC8pT/eyONPmSOKP/npR27Q5MrDZ\nl5F4nK71Gw6+R+fqWq/ZlyLlKjV0eR3wWuAwoHA18Szw3aCKksXKiR6rJOZLynPK26/iYZxZl8tT\nWSYLZl0G6YirP1h01qWIF2NMGCfRahOwDNgBXOGVW7yEfd5mrX1LjUqsu7IWXjXGXGWtvaEO9RSl\nZBRHeiZHuKuTXGamqZ6ja8ZzFURN1T5HV2lNQT5Hd6h876rVKskobnTjudbad7qv/x/wjLX2C42t\nrHFKDV1+2Ou/51hrNb+5zqKdYfpXr/D8P1y0M6yJJwHrXhYPZOJJKZF4XBNPpFy7gdcYY84D7seJ\ncDzCGPNTIAWsAa631t5pjHk9TnbxLHC3tfYGY8xm4EtAJ86z038O/NJa+1JjzEnAp93j/MJa+25j\nzMXAX+KshvM1a+1n6/dRy1PqaeIO998pwAU4WZcZ4FzAez0UERFpGGvtL3CSrC4DngP+DViLcwvq\njcDpwEeNMSHgU8A5wGnAacaY44FPAu+x1r4S2AasK9j9Z4E3W2tfA4TdLOSLgPcBpwL7g/+ES1dq\n1uV1AMaYh4BXWWsn3defAX4UfHkiIrIUxpgTgG3W2jcaYyLAXwPXA48U/A4fA/qB9cDd7qa9wLHA\nUTjh/Vhr/979+vndA19zX8eAJ3A61fcDH6VJ526Umw/Vz8HRX52A95LLLSQ9k2NwdJL0TK6m2yYn\nM/zqNyMkJxdP+a/WXNZlJTW3k0wuw9DkMJlc7c9xpaankux9zjI95X0fJ59OM7VnL/l0xXMCiu43\nMzjouV+/NnDO497UUEXnsdS+pWHOxhmOxFqbBR4H0sDLjDERY0wvsBLYBzwDvN5aewbwRZyhyqeB\nVwAYYz5pjClcgPKXwAXu138SZ/m2y4D3W2tfC7zFGOO9YnMDlRsBdjPwiDHme0AYZ+jy7wKrKmAH\n5UYuMRvSb9tcPl90odKuSLmnuozjLsi6PJTyLHP5HN/aeQ+PD21nND1GX7SXE/udhVXDocY8SlEq\nB3M2l2PozttJbdtKdmSESCJBbGAL/RdeQke48pr99gv4HrOa8xjU55Ga+Szwd8aYX+AMJQ4BH8bJ\nLL4HWAW811qbM8ZcA/zQGNMFPInzu/4q4Cb3avCXOMOXc64EvuV+/W+BPwF+gfMIWgq411qbqseH\nXIqyZl0CuDchz8C5svuhtfaxAOtapJazLr9x3w7P587OOnnDQdmQXrOs/La1z4/xwuDi7/Hha2Jc\nd9kr61Jzo9RrltydO+7mgV0PLnr/jA2nceGm8xpS00M3ffSgHMw5Qycdy6lXfIjB27/O2H33Lmrv\nPets1lzy5oqP67dfwPeYSzmPSzluNZ+nkGZd1pYb8PEFa+05ja6lEXwvBYwxf+j+71twJp8M4Vzu\nvtx9r+X450bu8x0S9Nt2644hdnl0cgC7h1JVDWNWU3M7yeQyPD603bPtiX3bGzKMWSoHc3J8mNS2\nrZ7tqW3bKh72y6fTPvvdSnLro0WPOT2VrPg8+h+38s8jEqRSY14nu//7Wo9/ZwRXVnD8ciPnsiEr\n2nYiXXT9ovwsRTvBclRTczsZTycZTY95to1MjzGerv8VQMkczGeeJjsy4tmeHR0hOz5e0XGz4+PF\n9zsyQs7nmGODL1Z8Hn2PW8XnkWBZa39zqF7NQel7dM8aY9ZZa/+sLtXUgV9uZKlsSN9tV0aLdnah\nDtiwpvL7s9XU3E56onH6or2MpEcXtSW6e+mJ1j8Sq1QO5pHHHMd0IkF2eHhRe6QvQaSnp6LjRnp6\niBTbbyLB7OysZ2cX6UvQu+Yw+nZVdh59j1vF5xEJUqkrutcDPzfGPGmM+VtjzB8YY5bVo7CgzOVG\neimVDem37ZZN/UU7s/X9MeLLK18ks5qa20lXuIsT+70f3zxh9Wa6wvVfiHQuB9PLtDmS5T2riA1s\n8WyPDQwQilb2R0ooGvXZ7xbiW04qeszuZfGKz6P/cSv/PCJBKvUc3aUwfyPzNcCbgE8YY/YBP2h0\nLFilysmNrGRbv1mXjay5nbxp47mAcy9pZHqMRHcvJ6zePP9+I5TKwZybBZnato3s6AiRvgSxgYH5\n9ytVzn6LtRWex9HpMfqWcB6D+jwiQVnKrMtenCfqT8F5vGDaWntKgLUdJIisy1LZkH6zrPy2TU5m\n2DWYYsOa6q7kih23WNZlI9V7llwml2E8naQnGi96BVLvmkrlYObTaVaGs0zkIjW98smn02THx4n0\n9Czar18bOOcxHMuTS4WWfEVcat/VONRmXb7hvXctx0kg2fOdT50/Wck+pDjfjs4Y8yqc4ctzgNU4\nuWk/AO5fuD5d0BTqfEAz1qWayqOaytOqNS21o3vDe++K4Dx4/UbgcOAFnMiu933nU+d7z3IqgzsK\nd7u19vcK3jsHOMJa+6VK91vkWH8EPGytfbGW+62lUpNRHgK+D/xva633fGUREanUJ3ECkeccVfD6\nPbU8kLX2P2q5vwL/F7gcaNmO7jzgdcBtc/flcO7N/XfglTW5apbE8dt2eHwK+/wY5oheVvW09Lyf\nQ06QQ3kTqRH27NnJunUbWRkrP31vcnyYsWeepveY41jes6qmNUl13OHKNxZpPv8N773rA9UOYxpj\nHgAGcSIb/xk4DrgW+CbQAywHPmCt/cGC7d6Ek5Ayg9OBXQLEgX/ESVYBeDdwBE5c2G3GmNOA/+N+\nbRb4ibX2KmPMqTjh0TPAJPDHOIsF3IKTr3kY8Dlr7U3VfFY/pSajfBc3pNO9FH49cJUx5mU4SzQc\ncnefg4oPy2RzXHXTz0hNHRitiC2LcMMVr2JZV2fQH0uqEGQkVjo9xb03fYiXPDtCfH8euyLEb49O\ncPYVHyUaLf6H0Mz0FNs/chXRoQlCszDeAen+lWz+8A10dusPqCaxDme40svhbvviyJ2l+2dr7beN\nMW9zXx+LcyvqHJwle7yilf4n8Elr7b+44SArgatxUrFuMsYcB3zZWnuaGzV2OU7g80XA7+N0dP/q\nho6cjtOxfgbn4qnPPf7t1tpvGWMOA34MBNbRlRWSaIzpBo50i+vGWaqn7vfMmsEd9+/kvkd2Mew+\nMzc8kea+R3Zxx/07q9p2YScHkJrKctVNPwvmg0jNDN15O2P33es8WzY7S3Z4mLH77mXoztur3ve9\nN32ITU/uo2d/nhDQsz/Ppif3ce9NH/LdbvtHrmL54AThWedP5/AsLB+cYPtHgl0RXZZkD849OS8v\nuO21YA96Ye12nADnfwY+D4SMMacZYx5w/50L/BVwpjHmxzgdVx44AbjMvUq8mcXB/i8F/staO2Ot\nnQX+EydR629wrtp+iHM1N4OTk/lGY8w/AR/EWSggMKUiwD5jjPk5sAvng+8HrrbWnmit/Z9BFtaM\ngooPe/Sp3y7q5OakprIMj08tvVipiyAjsSZSI7zkWe85Xy95doSJlHfb5Pgw0aEJz7bo0AST44sf\n9pb6c4cl/61I8101nH2ZL3zhLuMTt9aeC7wV+Adr7YPW2jPcf/cA7wCutdaejvO30h8BTwGfdlcu\nuAj4p4L9h9z2U9wVEjpwHknbgRP8/BV3dYPt7r7fC/zMWvsnwJ3uMQJT6h7dIPAu4FFr7aERqOij\nnCiuYit8+26bmvE9rn1+jN8/QcNNzaicSKyuNWsq2veePTuJ7897tsX259mzZycrj1scFj72zNOE\nioy3hGad9uUDul/XJN7n/u/5HJh1eVfB+0F4GrjGGHMRTgf1YY+v+TnwXWNMEmdV8rnbWP9ojHkH\nzlDmte7X/hRnZYTX4QxRPuTu90GcjvyVwC3GmP04neI7cCbd/IMx5hJgDMgaY6LW2kDyDEt1dJ04\n47jnFCy8N89a+5EgimpWgcWHxTp9OztzRG9lBUvggozEWrduI3ZFiB6Pzi61IoRZ5x0W0HvMcYx3\nOMOVC+U7nHZpDu4jBO95w3vv+gA1fI7OWvsb4PcWvPeVgpd/XGL77wDf8WhaNHnGWvtBnOFHgL91\n/xV6eGEtwLPAy/xqqKVy7tF1+Pw7pAQVH3bSS19CbJn33xyxZRHNvmxiQUZirYwl+O3R3jMsf3t0\noujsy+UIXWLcAAAgAElEQVQ9q0j3r/RsS/ev1OzLJvSdT50/+Z1Pnf9rPSwejFKzLq/zet8dfz06\nkIqaXFDxYX/0mqOLzrqU5hZkJNbZV3x0ftZlbH+eVMGsSz+bP3zDQbMu8wWzLkUONWVFgBlj3oUz\nc2ZFwdvPWmvrFrTYbMkojXyOrlVTI+qt3jWV8xxdpTUF+RydvnflaeWFVw91pe7RzXkv8HLgeuD9\nOGvRnR1QTS0h2hkuOvGkmm1X9SzTxJMWFYpGK554UsrKWMJz4kkpy3tWaeKJHPLKeo4OGLTWPgs8\nDpzg3tRcPDtFRESkyZR7RbffGPNanI7ujcaY/8Z5ul1ERKp00R1XzK9e8M2Lb9KElBor94ru/+BE\nt/wHTs6ZBf4hqKJqKT2TY3B00vNhbr+2ICUnM/zqNyMkJzNLrik9k2PPvv11rTmTyzA0OUwmt7je\nUlKZFHZkJ6lMqqb7nZ5Ksvc5y/SU9z2TfDrN1J69ng9sl9o2KJnhYQYf+DEZj0cRSp0Lv5rz6TSZ\nwcGqHk73Umq/QR231WqqxkV3XBG56I4rPo3zILUFtl90xxWfvuiOK8q9CPFkjDnKGPNfC947x30G\nrq6MMX9tjFnSuLub0PLSWtVQ1sm01m43xrwPJ7zzOuBCa633k6wuY0wYJybG4MSFXW6tfbKg/Q04\nDypmgVuttTdX9hG8+eVKAhXnVVYjk80WXZg1HAr51nTQ50mmScSDrzmXz/Gtnffw+NB2RtNj9EV7\nObHfWZwzHPKfgJPJZrhx6+fZk9pLnjwhQqyLreXKLe8kHApXvN/sTIaHb3EWOV2RyrK7YJHTSGeX\nb+5kLp/z3TYouakpnr36feRTBzr7UCzG0R//JES7fM+F3+cNh8KBZGyWyu4MMtuzlWqqkXZYvaDU\ncT/RiOMWKnfW5dnAV3FSrMM4idMX+a1iYIx5I3CetfYyY8wZwHustee7bZ3Ar4DfxYkVewj4Q2vt\nb4vtb6mzLr9x3w7ue2TXovfPOnkDQNG2S886kG9a65lf19z6c14YXHxlc/iaGOaIXt+a/D5PYc21\ndOeOu3lg14OL3j9jw2lcuOm8g95beK7+5uefYXdq8aod62OHcVzvMWXvd6GHbvoo/Y8uzrkdOulY\nTr3iQwze/nXG7rt3UXvvWWfz9OgzvtsGZedfvuugTm5OKBZj2ztf53su/D7vcX3HFP2say55c9n1\nLfze+Z3DNZe8uWR7LbRCTUW+puxZl+5w5Xaczm2hZ4GXVTqMObceHTDNElcvKPj9/HJr7X5jzJVA\nDvgX4EvAMmAKJ+EkjPNg+TDwPZwUlbfiJKD8t7X23caYr7i1/Bj4Mk5uchdO6tYj7nvHuPv6W2vt\nHW6e5uXAXpyosZU4F2YftNbeb4x5EideLFPO4gLlXgp8GvgDa+3J1toB4EJKJE1ba//NPRG4H2ys\noPl4YKe1dtRam8GJinlNmbWU5JcrudUOVZxXWY3kZIbdQ4t/2QHsHkrx6FPeffy2HftITmbqXnMm\nl+Hxoe2ebU/s2+473JjKpNiT2uvZ9mJqD48NPVHRfqenknTb5zzbuu1zTI4PF82dTG59lO6nflN0\n26CGMTPDw56dHEA+leLpZ7d5tj2xbzsTqRHfzzux1XuJyGoyNktld2aTycCyPVupphopZ/WCWvhn\na+1ZOJ0VHFi94A04qxQcNLJnrZ0B/hW4wH3rUpyIrxuBv3ezLm8E5q7U1gKvs9b+P+DPgHdZa18F\n/MoYU7jvy4HfuG2XAKcAfwEMWWt/HzgL+JgxZnXBNh8E7rXWvgan3/lH9znuGPDRclfQKXccOG2t\nfWzuhbX2Efdgvqy1WWPMV3ECQQsjZ1YC4wWvkzh/XRTV17ecSKS8IYg9+/YzkiyWSVn8h340OU24\nq5P+1QceF+zvj5d1zFJefHqIfJFr0vxs8bzL0eQ0yUze5/MsrrkW9qaGGE2PebaNTo8RjuXpjx18\nbubO1d7fvkge75HtWWYZTY97thXb75wXfv0CK1Le4dfLU1lyw7uK5k7mRkdZUWT0YnkqCzNj9B9x\nmGd7NQa3e/8CnhN/YR8cs/hxktHpMVKp3UU/74pUljzFMzZXhrMs61/t2e5l7ns3tWe/b3bnstSw\nb/tSj9uqNdXI3OoFR3m0Bbp6gTFmbvWCTuDv3bXkPuZ+ySdx1oq7yRjzlLOJHXbDoN9vjLkKJxlr\n7pfWs+4FCzgd3ZXGmKOBn3FwgpYB/t2t4WngM8aYzwH3ue8ljTG/xOmI5xwPfN1t322MmcBZWmjR\n5/JTbkf3sDHmFpx7blmc3vg3xpjXuAX8pNiG1tq3uifmYWPM71hr9wMTOIv4zYlz8BXfIqOj5V/B\n52ZyJOLFMimjdHRQNK8yl5mZH56o5dBlvCtEqAPPzi7UAT0rvPMu++LdxLtCPp/n4JprJZcL0Rft\nZSQ9uviY3b3kUiGGCq6CCs9VLLuSECHPzq6DDnqjKz07O6/9HqSzl/2xCHGPX/6TsQjhVRuK5k6G\n+/pIpseJ71989TsZi0BnbyAPKM+sPdK3PXn4apzR+4P1dfcSi61nT5HPuz8WoadrJXmPX/CRvgQT\nuQipMj9P4fcun4v4ZndOxVb5ti/luK1ck9/XlOubF980edEdV/wbB9+jm3NXDWdfFl29wBizDvip\ntfZonOejC7+uAydcem707ingRmvtT92JIqd77P9/4czHmDbGfB9niZ85c7er7jLGHIPTsf4UeDXw\nbWNMHGcpoGcXbPNqYJsxZj3ObP+5b7TvPJFC5Q5dHo/Ty34C55L1ZJwx3+s4kGB9EGPMnxpjrnZf\nTrpFzRX2K+A4Y0zCGNOFM2xZs4XX/HIlt5j+ivMqqxFf3sX6/phn2/r+GCe99CVFa4ov76p7zV3h\nLk7s3+zZdsLqzXSFi0/eiHXFWBdb69l2WGwdL+8/oaL9di+LM228O45pcyTLe1YVzZ2MbzmJ6Zce\nVXTb7mU1/Ut9XteqVYRi3t/3UCzGcUcPeLadsHozK2MJ38+7cstJnm3VZGyWyu6MxOOBZXu2Uk01\n9D6cBUmfxbmIeNZ9HfTqBWcYY36Cs0SO1+oF4KwmPgD8yH19Jc6qBz/GGcp83GObJ4D/NMbcj3Nv\n8OGCti8CxxRs/7c49/xWGWMeBB4ArrPWDhZs8zc46+L9BGclhHdYa72HOXyUNRmlEsaYFTg3Gdfi\nXB5/AidCLGat/VLBrMsQzqzLz/ntb6mTUQ7MUvTOpCzWVjiDsdaTUcqbdeldk9/nCXrW5RP7tjMy\nPUaiu5cTVnvPjlx4rsqZdVnOfhcqnIW4PJVlsuisy8W5k4WzLr22DUo5sy6LnQu/z3tg1uXiz7qU\nmYYLv3d+5/DgGY7VHbfVayryNRVFgOk5umCVO+vySJwx26NwLiO/AVzmLgVRF5VmXfrlSpbKqwwq\nby85mWHXYIoNa2LElx/8C7ZUTemZHOGuTnKZmcCuPhfK5DKMp5P0RONFr7iKnatUJsXu1F7Wx9YS\n6zr4yqac/RYzPZVkbPBFetcc5nk1lk+nWRnOMpGLLPqLvtS2QckMD9O59zlm1h5J16qDY7lKnQu/\nmsvJ2PRT7HtXar/VHrddalrwNcq6bELldnT/gXOZeQOwBXg78KfuTJi6aLZQ50ZqxrpUU3lUU3la\ntSZ1dM2p3DGv1XPPWVhrZ92Hu70XvBIREWki5XZ0U8aYDTgJJ7hTUZv24ZRyNSoC7FDiF21VTQRY\nOcfdmxqq6Lh+7X4RU9V8niDPRSO0WhSXtLdyHy94D/Bd4FhjzC9wZlxeGFhVAfOLBwsyAuxQ4hcf\nBlQcARbkcf22Dc1SNGIq31H556kmZq0ZtXAUl7Sxkh2dMeYPgV/iPP/w18BrgXsA71iGFnDH/TsP\nitMankjPvw4qTutQ862d9xwUbTWSHj3odbG2UhFgQR7Xb9vTtyYPipjKDg/Pv/7xlnjFn8fvmNWe\ni0YYuvP2ouepVlFc7eih8y+Yn3V56l3/qlmXNeZ7+eJmnF0DdOM8S/fXODMul+E8T9dy/OLBgowA\nO5T4xYc9PvQkjw096dlWKgKsuuNu57HB4sdNZVJFt/3l3idIFosW27aV7XsqizSrJmatGZWK6tIw\n5mIPnX9B5KHzLzho9YKHzr/g0w+df0HTrV6wlFUISh3LGPM2Y0zd/pIrdTL/FHiVtXbSGPMJ4G5r\n7S3uE/O/DL682htPpRnxSBgBJ05rPJWueOVwcYynk0Xjw0aKvA8wMj3GeDpJ//LKVsT2O+5oeoxZ\nvCfujkyPsTu1t+i2mdERcsWixUZGmBkD4ouH5Up9Ht/zVOW5aITs+LhvFFd2fDywFdhbWMusXrCU\nVQhKHctdvLtuSnV0s9baucvo1wKfB2fmpTGtucB4TyxKYmXxOK2eWFOnKLSEnmi8aHxYItrLLHj+\ngk9099ITrfy5Nr/j9kV7mZ2dZTTjfdz1sbVFt+3qSxBOQM4rWiyRoLM3AbnFkWalPo/vearyXDRC\npKfHN4or0uMbZ3vIcYcr31ik+fyHzr/gA9UOY7qrANRq9YKX46xCsBa4DGdE8BqczvldwAiQAe5w\nd/VS4AvucV/ASdf6ubX2CmPMtTgrE3wRZ23TV+KsaHANznyQL3Ig2Ppua+0HqzkPpWZeZI0xve6M\nywHgB+6JOBInrqbl+MWDBRkBdijxiw87sf9lvLz/ZZ5tpSLAqjvuZl6+pvhxY12xotv+ztoTiBeL\nFhvYwuZ1lUWaVROz1oxKRXU1eRRXI7Ta6gWFRq21pwGPAVcBpwKvw0m/WmgT8Oc4ndn/MMYU5gO+\nEefxtVfiXEydjPPZ/8ta+3p3m8uX+oEXKnVF9wngF+7X3WKt3WOMuQgnf+y6ag/eKHMxYMXiwaR6\nc7McvaKt5vi11eK4o9Nj9C3huH41h9w8da+IqTd1VP55yjlPraT/QmfVFK/zJIu02uoFXvvcCPxy\nbuTPGPNTj+PvtNYm3fY9OHM+5hjcnGNr7SjwIWPMSuB3jTGvxVkAoOq/kEomoxhjDsPpcR93X/8P\nYNJa+0C1B1+KIJJRGhUBVq1mrKtYTX7RVtVEgJWSyWUIx/LkUqElH9ev3S9iqpqotCDPRSlB/DwF\nFUvWSEEko7gTUbxWL/jMqXf9a8X36BYsvHq5tfYpY8zbcIYTvw6cbq397ILVCxbu44c4w5E3uYud\nfoUDQ5cvtdb+tbt23IM4I35pnCV35q7+5oYub7fW/p67z//CWf3mbThDl3uAC621f2KM6cEZUr0H\nWG+tvcoYsxGnU41YayvuA0rO7LHWvoizsvjc6+9VerBmE+0Ma+JJwLrCXUUnVPi11eS4sbjnkj+l\njuvXHopGi06oqObzBHkuGsHvPMlB5lYpOB9nyO4F4C6CX73gGnd0LoT/6gUf4cDqBYtYa/cZY24A\n/hOnU1yGs05dZ5m13A2c5a5eEMEZKXwe+IYx5lU4nefTwGHA7jL3uUhgqxfUmrIuD2jGulRTeVRT\neVq1pkqzLlv1OTp3BfGrrLXXu7Pxf4IzuaXoGqWNUNWzGiIiUj23c/t1o+tYKmtt1hizwhizFWfG\n5cM4V3dNRR2dBMrvXo3fEj6ltq1GNcf1a8smk6R37SK6YQOR+OJHA+byN3O5xfcNq9HI+3uVCup7\nG+QyPeLNWvt+4P2NrsOPOjoJhF/m4cxsruiirF2RrsDyEv0Wgy11XCiedTmby/H8xz9GZvcuyOch\nFKJr/QaOuPqDhLq6AsuzbMWczKC+t8rYFD/ha6+9ttE1lGVyMnNtvY+5YkWUycnmi2FqxroW1jT0\nzX9m7L57yU9NAZCfmmL6mWfIT0/x2ekfszv14nxSySyzJDNJnhx+ilev/z3fbVe87MSKa7rhkX+o\n+LiTv3yyaNu+f/s2mReeh7n73bOz5CbGST3+GL1nvJZ/ffq7PLDrQaZy0wBM5ab5zcTzTGXTbF5V\nefBCrfZbz5+ncr+3S62pVj8zfsqpacWKaMs+dtXOFNUvNeeXeTixbStDY96PB+1J7WUiNRJIXmIq\nk2JPam9Fx01u3Upqa5Gatj5KZtcLnm2Z3buYHB8OJM+yFXMyg8rCVMamlKKOTmrOL/MwNzLCsqkZ\nz7Y8efbs2VkyL7ESu93hykqOmxsdITtarKbRA1dyi3acZ+yZp0vmWVainJzMZlNOFmYz7Vfahzo6\nqbm5zEMv4USCqWXej9iECLFu3cai21aTl7g+tpZQkR/3UscN9yWI9BWrqQ86iswoD4XoPeY4+qK9\nns3V5FnO5WTWer9B8vu5qOZ7G9R+pX2oo5Oa88s8XDmwhf5e7wi/dbG1rIwlAslLjHXFWBdb69lW\n6rjxLVuIbSlS05aT6NrgHVfYtX4Dy3tWBZJn2Yo5mUFlYSpjU0rRZBQfzTjpA5qzroU1LT9+M/np\nKbLjE+TT00QSq1h56qn0X3gJv7fuZJ4cfor9mf3MMkuIEIfF1nHllncSDoV9t+1YwgrwC2s65SVb\nKj7uis0vK9rWc+qrST3+GLlU0hnGDIXo2nA4R1z9QTrCYV7at5GpbJpkJkk6mybR3ccpa092MjQ7\nKv9bs3C/09k0qyrcbz1/nsr93i61plr9zPjRZJTWpWQUH82YzgDNWVexmhr5HF2xmhr5HF2x/M1q\nVPscXSN+nkp9byutKcjn6IJMRpFg6Tk6CZRf5mGsK4ZJFF8xIqi8xGqO69cWiceJHH980f365W9W\noxVzMoP63ipjU7zoHp2IiLQ1dXSHiEwuw9DkcN2fr6rmuOMje7E/v5fxEe/n30odd29qqOafN59O\nkxkc9Hw2y69NRBpHQ5dtrlExUdUcd3oyxWPXXsnKkWlCOGtEPZXo5uXX3kj38sX31Gp1XD+VxoMp\nfkqk8dTRtblv7byHB3Y9OP96JD06//rCTec15XEfu/ZK+kam51+Hgb6RaR679kpO+X9fCOy4fobu\nvJ2x++6df50dHj7odbG2NZe8ueJjikhtaOiyjTUqJqqa446P7GVlQSdXaOXotO8wZlCf1z9iaivJ\nrY8WaVP8lEgzUEfXxhoVE1XNcffufKLoD2Vo1mkP4rh+fCOmRkbIKX5KpKmpo2tjjYqJqua4azee\nUCSREvIdTnsQx/XjGzGVSBBW/JRIU1NH18YaFRNVzXF7EmuZSHR7tk30ddOT8I7xqva4fvwjprYQ\n33JSkTbFT4k0A01GaXNv2ngu4NyjGpkeI9HdywmrN8+/34zHffm1NzqzLkenCc06V3ITfc6sy6Uc\nd3R6jL4afd652ZWpbdvIjo4Q6UsQGxiYf79Um4g0jiLAfDRj1BZUVle1MVGV1lTNccdH9rJ35xOs\n3XiC75Wcl6DitiqNB5vTjD9Tqqk8igBrXbqiO0Q0KiaqmuP2JNbS88qldXAHHTeAuK1K48FEpHF0\nj05ERNqaOjoREWlr6uikapXmSpbKwQwqn1OZlO1L31vxont0UrFKcyVLbdeIvEplUrY2fW/Fjzo6\nqViluZKltmtEXqUyKVubvrfiR0OXUpFKcyVLbZfKpBqQV6lMylam762Uoo5OKlJprmSp7Xan9tY/\nr1KZlC1N31spRR2dVKTSXMlS262Pra1/XqUyKVuavrdSijo6qUiluZKltot1xRqQV6lMylam762U\noskoUrFKcyVL5WAGlc9ZTl6ltCZ9b8WPsi59NGPeHjRfXZXmSpbKwaw2n7PYeSonkzIozfa9g/aq\nKcjvrbIuW5eu6KRqleZKlsrBDCqfU5mU7UvfW/Gie3QiItLW1NE1mWoijIKKzCrnuMUiwPxqasW4\npumpJHufs0zXeFWEUlrxXIk0Cw1dNolqIoyCiswqxe+4QNG20CwtF9eUncnw8C030G2fY0Uqy+5Y\nhGlzJKe8/SoincGs1A6KthKphcA6OmNMJ3ArcBQQBT5mrb27oP09wNuBIfetv7DW2qDqaXbVRBgF\nFZlVit9xgaJtp29Ntlxc08O33ED/o7+efx1PZYk/+mse5gZOveJDgR1X0VYi1Qty6PJPgGFr7auB\nc4DPLmg/CXiLtfYM998h28lVE2FUaRRXtfyO+/jQdh4bfNKz7Zd7nyDZYnFN01NJuu1znm3d9rnA\nhjEVbSVSG0EOXd4J/Iv73x1AdkH7ScDVxpi1wD3W2o/77ayvbzmRSP2Havr7K0viWIqpPft9I4xW\nhrMs61/tWdfe1FDRyKzR6THCsTz9sdp/Bt/jpseYxftpkMzoCLklftZq1OL798KvX2BFauGPr2N5\nKgszY/QfcVjNa6rk56JS9fg5XyrVJLUSWEdnrU0BGGPiOB3eBxd8ye3A54AJ4NvGmD+01n632P5G\nRyeDKrWoej1flM9FiCQSZIeHF7VF+hJM5CKkCuoorCuXC9EX7WUkPbpo277uXnKp0JKn/ZfD97jR\nXmZnZxnNLO4Iu/oShBOQK/OzVqNm37/OXvbHIsQ9OrvJWAQ6e8s+zlJqWurPRaXa6Tm6IJX5HF2d\nqpGlCHTWpTHmcOBHwNestd8oeL8D+Iy1dp+1NgPcAwwEWUszqybCqNIormr5HffE/s28fM3LPNt+\nZ+0JxFssrql7WZxpc6Rn27Q5ku5lwfxyU7SVSG0EORnlJcAPgHdZa3+4oHkl8KQx5nhgP3AmzsSV\nQ1Y1EUZBRWYt5bjFIsC8agod67S1UlzTKW+/iodxZl0uT2WZLJh1GSRFW4lUL7AIMGPM3wEXA08V\nvH0zsMJa+yVjzJ8C7wbSwA+ttdf47e9QiQArJ8KoWF3VRmZVyi8CzK+moKO4gvj+TU8lGRt8kd41\nh1V0Jdeq0Vb11qo1KQKsOSnr0kcz/p8NmrMu1VQe1VSeVq1JHV1zUjKKiIi0NXV0IiLS1tTRNZlG\n5VVWI5VJ8cRvLalMqtGliIgsoqzLJtGovMpqZLIZbtz6efak9pInT4gQ62JruXLLO+mK1G8yjIiI\nH13RNYm53MiR9CizzM5nQ35r5z2NLq2oG7d+nt2pF8mTByBPnt2pF7lx6+cbXJmIyAHq6JpAo/Iq\nq5HKpNiT2uvZtie1V8OYItI01NE1gfF0smhu5Mj0GOPp5ppmDbDbHa704lzZeXeCIiL1po6uCfRE\n4/RFez3bEt299ESbLz9vfWwtoSI/PiFCrI+trXNFIiLe1NE1gUblVVYj1hVjXZHObF1sLbGuWJ0r\nEhHxpo6uSbxp47mcseE0VnX30UEHq7r7OGPDaYHnVVbjyi3vZH3ssPkrO+dK7jCu3PLOBlcmInKA\nHi9oEuFQmAs3ncf5x57TkLzKSnRFunj/K/+SVCZFKjJBLLtSV3Ii0nTU0TWZrnAX/ctXNbqMJYl1\nxTi6f13TZROKiICGLkVEpM2poysiPZNjz779pGdyjS7lIJlchr2poaZ6tq4ZawpKK0a0iRzqNHS5\nQC6f5477d7JtxxAjyTSJeJSBTf1cfOZGwqHG/V3QjBFhzVhTUA6lzyrSbtTRLXDH/Tu575Fd86+H\nJ9Lzry89a1OjypqPCJszFxEGcOGm81RTwA6lzyrSbjR0WSA9k2PbjiHPtm079jVsGLMZI8Kasaag\nHEqfVaQdqaMrMJ5KMzKR9mwbTU4znvJuC1ozRoQ1Y01BOZQ+q0g7UkdXoCcWJbEy6tnWF++mJ+bd\nFrRmjAhrxpqCcih9VpF2pI6uQLQzzMCmfs+2gU2riXY2ZtJBM0aENWNNQTmUPqtIO9JklAUuPnMj\n4NyTG01O0xfvZmDT6vn3G2UuCuyJfdsZnR6jr7uXE1ZvbmhEWDPWFJTCzzoyPUaijT+rSLvpmJ2d\nbXQNZRkaSta10PRMjnBXJ7nMTMOu5LxkchnCsTy5VKhpriSasSaA/v54zdNaMrlMVRFtQdRULdVU\nnnJq6u+Pd9SpHFkCDV0WEe0Ms271iqbq5MAZRlsb62+qDqUZawrKXETbofBZRdqFOjoREWlr6uhE\nRKStqaOTtjQ9leSFX/+S6aml3+dRnqVIe9GsS2kr2ZkMD99yA932OVaksuyPRZg2R3LK268i0ul/\nX015liLtSVd00lYevuUG+h/9NfFUlhAQT2Xpf/TXPHzLDSW3ncuzHEmPMsvsfJ7lt3beE3zhIhIY\ndXTSNqanknTb5zzbuu1zvsOYyrMUaV/q6KRtjA2+yIpU1rNteSrL2OCLRbdVnqVI+1JHJ22jd81h\n7I9533aejEXoXXNY0W2VZynSvtTRSdvoXhZn2hzp2TZtjqR7WfHOSnmWIu1LHZ20lVPefhVDJx1L\nMhYhByRjEYZOOpZT3n5VyW3ftPFczthwGqu6++igg1XdfZyx4TTlWYq0OD1eIG0l0tnFqVd8yJl4\nMjMGnb2+V3KFwqEwF246j/OPPaeqPEsRaS7q6KQtdS+L03/EYRUFA8/lWYpIe9DQpYiItDV1dCIi\n0tbU0YmISFtTRyciIm1NHZ2IiLQ1dXQiItLW1NGJiEhbU0cnIiJtTR2diIi0NXV0IiLS1tTRiYhI\nW1NHJyIibU0dnYiItDV1dC0mk8uwNzVEJpdpdCkiIi0hsGV6jDGdwK3AUUAU+Ji19u6C9jcAHway\nwK3W2puDqqUd5PI5vrXzHh4f2s5oeoy+aC8n9m/mTRvPJRwKN7o8EZGmFeQV3Z8Aw9baVwPnAJ+d\na3A7wU8DrwNOB95hjHlJgLW0vG/tvIcHdj3ISHqUWWYZSY/ywK4H+dbOexpdmohIUwuyo7sT+JD7\n3x04V25zjgd2WmtHrbUZ4EHgNQHW0tIyuQyPD233bHti33YNY4qI+Ahs6NJamwIwxsSBfwE+WNC8\nEhgveJ0Eevz219e3nEik/kN0/f3xuh9zob2pIUbTY55to9NjhGN5+mONr7MZztVCqqk8qqk8zViT\nlBZYRwdgjDkc+DbweWvtNwqaJoDCn5g44P2b3DU6Oln7Akvo748zNJSs+3EXyuVC9EV7GUmPLmrr\n6+4llwoxNNXYOpvlXBVSTeVRTeUppyZ1hM0psKFL957bD4CrrLW3Lmj+FXCcMSZhjOnCGbb8WVC1\ntIu0vYkAAAhCSURBVLqucBcn9m/2bDth9Wa6wl11rkhEpHUEeUX3fqAP+JAxZu5e3c3ACmvtl4wx\nfwV8H6ezvdVauzvAWlremzaeCzj35Eanx+jr7uWE1Zvn3xcREW8ds7Ozja6hLENDyboX2ozDJ5lc\nhnAsTy4VaqoruWY8V6qpPKqpPGUOXXbUqRxZAj0w3mK6wl2sjfU3VScnItLM1NGJiEhbU0cnIiJt\nTR2diIi0NXV0IiLS1tTRiYhIW1NHJyIibU0dnYiItDV1dCIi0tbU0YmISFtTRyciIm1NHZ2IiLS1\nlgl1FhERqYSu6EREpK2poxMRkbamjk5ERNqaOjoREWlr6uhERKStqaMTEZG2po5ORETaWqTRBTQT\nY8wa4FHgbGvtUwXvvwd4OzDkvvUX1lpbh3q2AhPuy2ettX9W0PYG4MNAFrjVWntz0PWUUVOjztPV\nwHlAF/B5a+0/FrQ16jz51VT382SMeRvwNvdlN/AKYK21dsxtr/t5KqOmRpynTuCrwFFADvhfC34X\nNOTnSaqjjs7l/oB/EZjyaD4JeIu19tE61tMNdFhrz/Bo6wQ+DfwusB94yBhzt7X2t42qydWI83QG\n8PvAqcBy4MqCtkadp6I1uep+nqy1XwG+4tb3OZxf0nMdSkPOk19NrrqfJ+B/ABFr7e8bY84Grgcu\ncGtsyHmS6mno8oAbgS8AL3q0nQRcbYx50P1LvR5eDiw3xvzAGHO/Meb3CtqOB3Zaa0ettRngQeA1\nDa4JGnOeXg88AXwb+A7w3YK2Rp0nv5qgMecJAGPMycBma+2XCt5u1Hnyqwkac552ABFjTAhYCcwU\ntDX0PEnl1NExP4QyZK39fpEvuR24HDgTOM0Y84d1KGsSp/N9vXvsrxtj5q7AVwLjBV+bBHoaXBM0\n5jytBk4GLiyoqcNta9R58qsJGnOe5rwfuG7Be406T3O8aoLGnKcUzrDlU8DNwN8XtDX6PEmF1NE5\nLgPONsY8gHOf4DZjzFoA9xfUZ6y1+9y/4u4BBupQ0w7gn6y1s9baHcAwsM5tmwDiBV8bB8YIXtGa\nGniehoHvW2sz7v2baaDfbWvUeSpaUwPPE8aYXsBYa3+0oKlR56loTQ08T+/B+d5twhnB+Ko7ZA8N\nPE9SHd2jA6y188MPbmd3ubV2r/vWSuBJY8zxOOPyZwK31qGsy4ATgHcaYw5z69jjtv0KOM4Yk8D5\nC/Q1OFdajaypUefpQeD/GmP+FqfTXYHT0UDjzpNfTY06T+B8/h96vN+o8+RXU6PO0ygHhitHgE4g\n7L5u5HmSKuiKrghjzKXGmHdYa8dxhlZ+BPwnsN1a+706lPCPQK8x5kHgDpxO5iK3phngr4DvAz/D\nuYm/u8E1NeQ8WWu/C2wDfo5zP+x/Axc38jyVqKlRP08ABnhm/sWBn/FG/Tz51dSo8/RpYIsx5j+B\n+90azm+C8yRV0DI9IiLS1nRFJyIibU0dnYiItDV1dCIi0tbU0YmISFtTRyciIm1Nz9FJ4Iwxfwxc\njfPzFgJus9Z+sob7vxbAWnutMWbWWttRYpNqjvUG4Dhr7d8WHtfj69YBn8R5yDkLvAC821r7zMKv\nFZFg6YpOAmWMWQ98CnidtfblwKuAS4wx5zW2soqdhPMwc1HGmBXAj4GfAC9zP/c/A/e6wcAiUke6\nopOgrcZJl1gODFtrU8aYtwLTxpjfxXlAdzmwD2cZlmfddJpfAafgLN/yl9baHxhjXgb8AxAD1gCf\nstb+/aIjejDGnAN8xK3lWZzlV4aNMb8BvoaT37kCNy3fPdZXcP4/8p/AH+Asu3O5u7/n3F2/0hjz\nU2A98GX36u4S4MXCkGJr7deNMWkgaox5M3Cuu80G4DPAETjpH8PAH1hrp8v5XCJSmq7oJFDW2seA\nu4BnjDE/N8bcgBOp9DxwC3CptXYLzlVf4dpeUff9S3HyBrtw1ib7mLX2d4HX4iyhUpIxph/4BPB6\na+0ATrLFDQVfMmytfSXO6hXvd9/7KvBha+0rcJI7ItbaX7pf8wVr7Zfdr3uJW8tJwPuMMXGc4cqH\nPc7Fv1hrU+7LVwLnAK92P/u/W2tPdNteX87nEpHyqKOTwFlrr8BJhL8JOBL4L+CvgWOBu40xv8Dp\neI4p2Oxmd9tf4ORpngi8F+h2l2y5HufKrhyn4Fwx/cg91ruA4wra/8P93yeBhJtleFRB5JRfxuK/\nW2vT1tp9OFelCSAPlLpP+JC1dsJaO3dlOJf3+BzQV86HEpHyaOhSAmWMOReIWWvvAL4MfNkY879w\nrtSeca+YMMaEca6O5mQL/jvkvv4mTujud3CWcLmkzDLCwIPW2vPcY3VzcAr93DDhLE4HlaN0R+VV\n59z2j3Bg5ex5xphbcIZqATKFbdba7MKvF5Ha0BWdBG0S+Lgx5iiYX37ld3Cu6hLGmFe7X3cZ8I2C\n7S5xv/5knCucJ4CzcYYT7wJOd9vDlPYw8CpjzCb39YdwZkR6cgOFdxpj/sB961KcTgycjq3UH4h3\nAkcZY/587g1jzJ8BZwA7y6hXRGpIV3QSKGvtj4wx1wHfLZhx+H3gGuBu4O/cK6wJ4K0Fmx5jjNnq\n/vfF1tqcO53/QWPMGGCB3wBHLzymMSZV8PI5a+1mY8xlwDfdjnEX8CclSn8rcKsx5nrgcWDKff8n\nOPcMf+vzmaeMMWcBnzbG/BVOJ/kMzszTtDGmxKFFpJa0eoE0HXfW5bXW2gcaWMOHgZuttXuMMW8C\n3mytvaBR9YhI5XRFJ+LteZzn3mZw7gv+eYmvF5EmpSs6ERFpa5qMIiIibU0dnYiItDV1dCIi0tbU\n0YmISFtTRyciIm3t/wNlY9GprCKBfAAAAABJRU5ErkJggg==\n","text/plain":[""]},"metadata":{},"output_type":"display_data"}]},{"cell_type":"code","metadata":{"id":"yadZQxoKF4M8","colab":{"base_uri":"https://localhost:8080/","height":34},"executionInfo":{"status":"ok","timestamp":1635011940282,"user_tz":-330,"elapsed":443,"user":{"displayName":"Desu Venkata Manikanta","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"10818416874497828882"}},"outputId":"2f79fbfa-369f-48a0-8027-ae3f747a9adf"},"source":["#Plot the distritbution of the features using histgram\n","fig = plt.gcf()\n","fig.set_size_inches(12,6)\n","plt.show()"],"execution_count":48,"outputs":[{"output_type":"display_data","data":{"text/plain":["
"]},"metadata":{}}]},{"cell_type":"markdown","metadata":{"id":"xb-AFaG3PU0D"},"source":["## Importing alll the necessary packages to use the various classification algorithms\n"]},{"cell_type":"code","metadata":{"id":"cJVjbgAjF4M_"},"source":["from sklearn.linear_model import LogisticRegression # for Logistic Regression Algorithm\n","from sklearn import svm # for suport vector machine algorithm\n","from sklearn import metrics # for checking the model accuracy\n","from sklearn.tree import DecisionTreeClassifier # for using DTA"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"8LizCSuWF4NA"},"source":["df.shape"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"WW5Hp1fFF4NC"},"source":["Now, when we train any algorithm, the number of features and their correlation plays an important role. If there are features and many of the features are highly correlated, then training an algorithm with all the featues will reduce the accuracy. Thus features selection should be done carefully. This dataset has less featues but still we will see the correlation.\n"]},{"cell_type":"code","metadata":{"id":"YABeXMklF4ND","colab":{"base_uri":"https://localhost:8080/","height":270},"executionInfo":{"status":"ok","timestamp":1635011174157,"user_tz":-330,"elapsed":819,"user":{"displayName":"Desu Venkata Manikanta","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"10818416874497828882"}},"outputId":"a9537d5f-a224-42a1-b4ae-f10891512872"},"source":["plt.figure(figsize=(8,4))\n","sns.heatmap(df.corr(), annot=True, cmap='cubehelix_r') # draws heatmap with input as correlation matrix calculated by df.corr() \n","plt.show()"],"execution_count":null,"outputs":[{"output_type":"display_data","data":{"image/png":"\n","text/plain":["
"]},"metadata":{"needs_background":"light"}}]},{"cell_type":"markdown","metadata":{"id":"gsd6QaoaF4NE"},"source":["Observation--->\n","The Sepal Width and Length are not correlated The Petal Width and Length are highly correlated\n","We will use all the features for training the algorithm and check the accuracy.\n","\n","Then we will use 1 Petal Feature and 1 Sepal Feature to check the accuracy of the algorithm as we are using only 2 features that are not correlated. Thus we can have a variance in the dataset which may help in better accuracy. We will check it later.\n","\n","Steps To Be followed When Applying an Algorithm\n","\n","Split the dataset into training and testing dataset. The testing dataset is generally smaller than training one as it will help in training the model better.\n","\n","Select any algorithm based on the problem (classification or regression) whatever you feel may be good.\n","Then pass the training dataset to the algorithm to train it. We use the .fit() method\n","Then pass the testing data to the trained algorithm to predict the outcome. We use the .predict() method.\n","We then check the accuracy by passing the predicted outcome and the actual output to the model."]},{"cell_type":"markdown","metadata":{"id":"QAD_cNirF4NF"},"source":["# Splitting The Data into Training And Testing Dataset"]},{"cell_type":"code","metadata":{"id":"ZqSRd9GzF4NF","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1635011177764,"user_tz":-330,"elapsed":6,"user":{"displayName":"Desu Venkata Manikanta","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"10818416874497828882"}},"outputId":"58101a90-1ffd-45b9-8dbe-3ef9e5f914a2"},"source":["from sklearn.model_selection import train_test_split\n","train, test = train_test_split(df, test_size=0.3) # our main data split into train and test\n","# the attribute test_size=0.3 splits the data into 70% and 30% ratio. train=70% and test=30%\n","print(train.shape)\n","print(test.shape)"],"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["(105, 5)\n","(45, 5)\n"]}]},{"cell_type":"code","metadata":{"id":"yO2J2FpjF4NG"},"source":["train_X = train[['sepal_length','sepal_width','petal_length','petal_width']] # taking the training data features\n","train_y = train.species # output of the training data\n","\n","test_X = test[['sepal_length','sepal_width','petal_length','petal_width']] # taking test data feature\n","test_y = test.species # output value of the test data"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"yR9D2qgQF4NG","colab":{"base_uri":"https://localhost:8080/","height":205},"executionInfo":{"status":"ok","timestamp":1635011258352,"user_tz":-330,"elapsed":749,"user":{"displayName":"Desu Venkata Manikanta","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"10818416874497828882"}},"outputId":"f3bb7968-3684-416f-9a42-ab5fcbfff8f6"},"source":["train_X.head()"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sepal_lengthsepal_widthpetal_lengthpetal_width
244.83.41.90.2
895.52.54.01.3
134.33.01.10.1
645.62.93.61.3
255.03.01.60.2
\n","
"],"text/plain":[" sepal_length sepal_width petal_length petal_width\n","24 4.8 3.4 1.9 0.2\n","89 5.5 2.5 4.0 1.3\n","13 4.3 3.0 1.1 0.1\n","64 5.6 2.9 3.6 1.3\n","25 5.0 3.0 1.6 0.2"]},"metadata":{},"execution_count":36}]},{"cell_type":"code","metadata":{"id":"KcGbNGkcF4NH","colab":{"base_uri":"https://localhost:8080/","height":205},"executionInfo":{"status":"ok","timestamp":1635011262248,"user_tz":-330,"elapsed":623,"user":{"displayName":"Desu Venkata Manikanta","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"10818416874497828882"}},"outputId":"dcc6f9ba-8240-4f3f-8d74-ff21e5ac54e3"},"source":["test_X.head()"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sepal_lengthsepal_widthpetal_lengthpetal_width
34.63.11.50.2
454.83.01.40.3
1406.73.15.62.4
465.13.81.60.2
535.52.34.01.3
\n","
"],"text/plain":[" sepal_length sepal_width petal_length petal_width\n","3 4.6 3.1 1.5 0.2\n","45 4.8 3.0 1.4 0.3\n","140 6.7 3.1 5.6 2.4\n","46 5.1 3.8 1.6 0.2\n","53 5.5 2.3 4.0 1.3"]},"metadata":{},"execution_count":37}]},{"cell_type":"code","metadata":{"id":"5sFmts-IF4NI","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1635011269110,"user_tz":-330,"elapsed":486,"user":{"displayName":"Desu Venkata Manikanta","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"10818416874497828882"}},"outputId":"5892e853-6e9a-4f5e-e684-8c34c4822f55"},"source":["train_y.head()"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["24 setosa\n","89 versicolor\n","13 setosa\n","64 versicolor\n","25 setosa\n","Name: species, dtype: object"]},"metadata":{},"execution_count":38}]},{"cell_type":"markdown","metadata":{"id":"S_w4Me2bF4NL"},"source":["## Logistic Regression "]},{"cell_type":"code","metadata":{"id":"gOQ5JrqrF4NL","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1635011271869,"user_tz":-330,"elapsed":7,"user":{"displayName":"Desu Venkata Manikanta","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"10818416874497828882"}},"outputId":"73cfaedd-f9da-45c3-9bc3-32cfd91d6915"},"source":["model = LogisticRegression()\n","model.fit(train_X, train_y)\n","prediction = model.predict(test_X)\n","print('The accuracy of Logistic Regression is: ', metrics.accuracy_score(prediction, test_y))"],"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["The accuracy of Logistic Regression is: 0.9777777777777777\n"]},{"output_type":"stream","name":"stderr","text":["/usr/local/lib/python3.7/dist-packages/sklearn/linear_model/_logistic.py:940: ConvergenceWarning: lbfgs failed to converge (status=1):\n","STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n","\n","Increase the number of iterations (max_iter) or scale the data as shown in:\n"," https://scikit-learn.org/stable/modules/preprocessing.html\n","Please also refer to the documentation for alternative solver options:\n"," https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n"," extra_warning_msg=_LOGISTIC_SOLVER_CONVERGENCE_MSG)\n"]}]},{"cell_type":"markdown","metadata":{"id":"e1NNX-EGF4NJ"},"source":["## Support Vector Machine SVM"]},{"cell_type":"code","metadata":{"id":"zSJmVzqnF4NK","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1635011282010,"user_tz":-330,"elapsed":517,"user":{"displayName":"Desu Venkata Manikanta","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"10818416874497828882"}},"outputId":"37f4bbd2-9e91-4fd6-f53c-3aea85881b5b"},"source":["clf = svm.SVC(kernel='linear')\n","clf.fit(train_X, train_y)\n","\n","#Predict the response for test dataset\n","prediction = clf.predict(test_X)\n","\n","print('The accuracy of Support Vector Machine is: ', metrics.accuracy_score(prediction, test_y))"],"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["The accuracy of Support Vector Machine is: 1.0\n"]}]},{"cell_type":"markdown","metadata":{"id":"GWfemKzPF4NN"},"source":["## Decision Tree"]},{"cell_type":"code","metadata":{"id":"iRXy3EZIF4NN","outputId":"f470e075-fc92-4f3f-c343-7a8257e2c5d2"},"source":["#implementing using Decision Tree\n","#code\n","\n","print('The accuracy of Decision Tree is: ', metrics.accuracy_score(prediction, test_y))"],"execution_count":null,"outputs":[{"name":"stdout","output_type":"stream","text":["('The accuracy of Decision Tree is: ', 0.93333333333333335)\n"]}]},{"cell_type":"markdown","metadata":{"id":"uB2Co6f_F4NQ"},"source":["### We used all the features of iris in above models. Now we will use Petals and Sepals Seperately"]},{"cell_type":"markdown","metadata":{"id":"1_v6cAZMF4NQ"},"source":["### Creating Petals And Sepals Training Data"]},{"cell_type":"code","metadata":{"collapsed":true,"id":"e1Q-1b9YF4NQ"},"source":["petal = df[['PetalLengthCm','PetalWidthCm','Species']]\n","sepal = df[['SepalLengthCm','SepalWidthCm','Species']]"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"Lv-nab5oF4NQ"},"source":["### For Iris Petal"]},{"cell_type":"code","metadata":{"collapsed":true,"id":"DuOqLUWZF4NQ"},"source":["train_p,test_p = train_test_split(petal, test_size=0.3, random_state=0) #petals\n","train_x_p = train_p[['PetalWidthCm','PetalLengthCm']] # taking the training data's Petal features\n","train_y_p = train_p.Species # output of the training data\n","\n","test_x_p = test_p[['PetalWidthCm','PetalLengthCm']] # taking the test data's Petal features\n","test_y_p = test_p.Species # output of the test data"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"bgNB8kaNF4NU"},"source":["### For Iris Sepal"]},{"cell_type":"code","metadata":{"id":"6hVj5MW3F4NU"},"source":["#Similarly define the split for sepals\n","#define the training and test data's Sepal features followed by the output of the training and test data\n","\n","#use naming- train_s,test_s ; train_x_s, train_y_s; test_x_s, test_y_s\n","\n","#code"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"y08e1O6aU9mx"},"source":["Implementing the algorithms just like we did on the complete dataset but separately on sepals and petals and calculating accuracy"]},{"cell_type":"markdown","metadata":{"id":"TeMWnQr6F4NV"},"source":["## SVM Algorithm"]},{"cell_type":"code","metadata":{"id":"jhlutJ78F4NV"},"source":["#code\n","print('The accuracy of the SVM using Petals is:',metrics.accuracy_score(prediction,test_y_p))\n","\n","#code\n","print('The accuracy of the SVM using Sepals is:',metrics.accuracy_score(prediction,test_y_s))"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"Mli7zcq_F4NV"},"source":["## Logistic Regression"]},{"cell_type":"code","metadata":{"id":"2DqK_dFCF4NV"},"source":["#code\n","print('The accuracy of the Logistic Regression using Petals is:',metrics.accuracy_score(prediction,test_y_p))\n","\n","#code \n","print('The accuracy of the Logistic Regression using Sepals is:',metrics.accuracy_score(prediction,test_y_s))"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"aM-7Zx95F4NW"},"source":["## Decision Tree"]},{"cell_type":"code","metadata":{"id":"S8tXp-gMF4NW"},"source":["#code\n","print('The accuracy of the Decision Tree using Petals is:',metrics.accuracy_score(prediction,test_y_p))\n","\n","#code\n","print('The accuracy of the Decision Tree using Sepals is:',metrics.accuracy_score(prediction,test_y_s))"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"6ec0NUyJF4NW"},"source":["\n","\n","\n","### Question:\n","Does Using Petals over Sepals for training the data give a much better accuracy? Why?\n"]}]} \ No newline at end of file diff --git a/KNN_Task4 _203174002.ipynb b/KNN_Task4 _203174002.ipynb new file mode 100644 index 0000000..5dc9eee --- /dev/null +++ b/KNN_Task4 _203174002.ipynb @@ -0,0 +1 @@ +{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"KNN_Task4 _203174002.ipynb","provenance":[]},"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.8.8"}},"cells":[{"cell_type":"markdown","metadata":{"id":"cPP7BfqFSgyH"},"source":["# K-Nearest Neighbors Algorithm\n"]},{"cell_type":"markdown","metadata":{"id":"Zd0p7ZUpSgyL"},"source":["![example 1.png]()In this Jupyter Notebook we will focus on $KNN-Algorithm$. KNN is a data classification algorithm that attempts to determine what group a data point is in by looking at the data points around it.\n","\n","An algorithm, looking at one point on a grid, trying to determine if a point is in group A or B, looks at the states of the points that are near it. The range is arbitrarily determined, but the point is to take a sample of the data. If the majority of the points are in group A, then it is likely that the data point in question will be A rather than B, and vice versa.\n","
\n","\n","\n"]},{"cell_type":"markdown","metadata":{"id":"TyGHDf4NSgyM"},"source":["# Imports"]},{"cell_type":"code","metadata":{"id":"iIEvA0xjSgyN","executionInfo":{"status":"ok","timestamp":1635250818719,"user_tz":-330,"elapsed":418,"user":{"displayName":"Desu Venkata Manikanta","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"10818416874497828882"}}},"source":["import numpy as np\n","from tqdm import tqdm_notebook"],"execution_count":5,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"rc8ruF56SgyO"},"source":["# How it works?\n","\n","We have some labeled data set $X-train$, and a new set $X$ that we want to classify based on previous classifications\n","\n"]},{"cell_type":"markdown","metadata":{"id":"rGbvEXbvSgyO"},"source":["## Seps"]},{"cell_type":"markdown","metadata":{"id":"B-nf9G4ZSgyP"},"source":["### 1. Calculate distance to all neighbours\n","### 2. Sort neightbours (based on closest distance)\n","### 3. Count possibilities of each class for k nearest neighbours \n","### 4. The class with highest possibilty is Your prediction"]},{"cell_type":"markdown","metadata":{"id":"LuWwKdFrSgyP"},"source":["# 1. Calculate distance to all neighbours\n","\n","Depending on the problem You should use different type of count distance method.\n","
\n","For example we can use Euclidean distance. Euclidean distance is the \"ordinary\" straight-line distance between two points in D-Dimensional space\n","\n","#### Definiton\n","$d(p, q) = d(q, p) = \\sqrt{(q_1 - p_1)^2 + (q_2 - p_2)^2 + \\dots + (q_D - p_D)^2} = \\sum_{d=1}^{D} (p_d - q_d)^2$\n","\n","#### Example\n","Distance in $R^2$\n","\n","\n","\n","$p = (4,6)$\n","
\n","$q = (1,2)$\n","
\n","$d(p, q) = \\sqrt{(1-4)^2 + (2-6)^2} =\\sqrt{9 + 16} = \\sqrt{25} = 5 $\n","\n"]},{"cell_type":"markdown","metadata":{"id":"vlvNZqiJSgyQ"},"source":["## Code"]},{"cell_type":"code","metadata":{"id":"EvmQi6nsSgyR","executionInfo":{"status":"ok","timestamp":1635250723579,"user_tz":-330,"elapsed":1651,"user":{"displayName":"Desu Venkata Manikanta","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"10818416874497828882"}}},"source":["def get_euclidean_distance(A_matrix, B_matrix):\n"," \n"," C = [ [ 0 for i in range(np.size(B_matrix, 0)) ] for j in range(np.size(A_matrix, 0)) ]\n"," \n"," for i in range (0, np.size(A_matrix, 0)):\n"," row1 = A_matrix[i,:]\n"," for j in range (0, np.size(B_matrix, 0)):\n"," row2 = B_matrix[j,:]\n"," \n"," C[i][j] = np.sum(np.square(row1 - row2))\n"," \n"," ## Use the distance formula for the matrices using numpy functions\n"," ## C is the sum of the squares of the distances\n","\n"," return np.sqrt(C)\n"],"execution_count":1,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"GABzTa_0SgyS"},"source":["## Example Usage"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"W6b8yBSoSgyS","executionInfo":{"status":"ok","timestamp":1635250824471,"user_tz":-330,"elapsed":411,"user":{"displayName":"Desu Venkata Manikanta","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"10818416874497828882"}},"outputId":"966f10e6-5429-4f7e-e70f-50df76e1b2ca"},"source":["X = np.array([[1,2,3] , [-4,5,-6]])\n","\n","X_train = np.array([[0,0,0], [1,2,3], [4,5,6], [-4, 4, -6]])\n","\n","print(\"X: {} Exaples in {} Dimensional space\".format(*X.shape))\n","print(\"X_train: {} Exaples in {} Dimensional space\".format(*X_train.shape))\n","\n","\n","print()\n","\n","print(\"X:\")\n","print(X)\n","\n","print()\n","\n","print(\"X_train\")\n","print(X_train)\n"],"execution_count":6,"outputs":[{"output_type":"stream","name":"stdout","text":["X: 2 Exaples in 3 Dimensional space\n","X_train: 4 Exaples in 3 Dimensional space\n","\n","X:\n","[[ 1 2 3]\n"," [-4 5 -6]]\n","\n","X_train\n","[[ 0 0 0]\n"," [ 1 2 3]\n"," [ 4 5 6]\n"," [-4 4 -6]]\n"]}]},{"cell_type":"code","metadata":{"id":"kB8IZcDpSgyT","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1635250828586,"user_tz":-330,"elapsed":423,"user":{"displayName":"Desu Venkata Manikanta","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"10818416874497828882"}},"outputId":"e48c7a0f-c233-44a7-baa1-66354fc59437"},"source":["## Initialize the distance matrix using the get_euclidean_matrix\n","\n","C = get_euclidean_distance(X, X_train)\n","\n","## Euclidean distance b/w row i of X and row j of X_train is available as C[i][j]\n","\n","\n","## Print Distance between first example from X and first form X_train\n","print(f\"Distance between first example from X and first form X_train {C[0,0]}\")"],"execution_count":7,"outputs":[{"output_type":"stream","name":"stdout","text":["Distance between first example from X and first form X_train 3.7416573867739413\n"]}]},{"cell_type":"markdown","metadata":{"id":"vbaJfBihSgyT"},"source":["# 2. Sort neightbours\n","\n","In order to find best fitting class for our observations we need to find to which classes belong observation neightbours and then to sort classes based on the closest distance\n"]},{"cell_type":"markdown","metadata":{"id":"b1VLHUj2SgyU"},"source":["## Code"]},{"cell_type":"code","metadata":{"id":"na0G1o_ASgyU"},"source":["def get_sorted_train_labels(distance_matrix, y):\n"," \"\"\"\n"," Function sorts y labels, based on probabilities from distances matrix\n"," Args:\n"," distance_matrix (numpy.ndarray): Distance Matrix, between points from X and X_train, size: N1:N2\n"," y (numpy.ndarray): vector of classes of X points, size: N1\n","\n"," Returns:\n"," numpy.ndarray: labels matrix sorted according to distances to nearest neightours, size N1:N2 \n","\n"," \"\"\"\n"," \n"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"U0I8eltDSgyV"},"source":["# 3. Count possibilities of each class for k nearest neighbours \n","\n","In order to find best class for our observation $x$ we need to calculate the probability of belonging to each class. In our case it is quite easy. We need just to count how many from k-nearest-neighbours of observation $x$ belong to each class and then devide it by k \n","

\n","$p(y=class \\space| x) = \\frac{\\sum_{1}^{k}(1 \\space if \\space N_i = class, \\space else \\space 0) }{k}$ Where $N_i$ is $i$ nearest neightbour\n","\n"]},{"cell_type":"markdown","metadata":{"id":"j0ZtOC38SgyV"},"source":["## Code"]},{"cell_type":"code","metadata":{"id":"y2aaG2GdSgyV"},"source":["def get_p_y_x_using_knn(y, k):\n"," \"\"\"\n"," The function determines the probability distribution p (y | x)\n"," for each of the labels for objects from the X\n"," using the KNN classification learned on the X_train\n","\n"," Args:\n"," y (numpy.ndarray): Sorted matrix of N2 nearest neighbours labels, size N1:N2\n"," k (int): number of nearest neighbours for KNN algorithm\n","\n"," Returns: numpy.ndarray: Matrix of probabilities for N1 points (from set X) of belonging to each class,\n"," size N1:C (where C is number of classes)\n"," \"\"\"\n","\n"," ## Write your code here\n","\n"," return probabilities_matrix\n"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"ThEbAnXISgyW"},"source":["# 4. The class with highest possibilty is Your prediction"]},{"cell_type":"markdown","metadata":{"id":"_i7NTtN4SgyW"},"source":["At the end we combine all previous steps to get prediction"]},{"cell_type":"markdown","metadata":{"id":"OzK6rY8mSgyW"},"source":["## Code"]},{"cell_type":"code","metadata":{"id":"DaYqr_i6SgyW","executionInfo":{"status":"ok","timestamp":1635250858682,"user_tz":-330,"elapsed":471,"user":{"displayName":"Desu Venkata Manikanta","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"10818416874497828882"}}},"source":["def predict(X, X_train, y_train, k, distance_function):\n"," \"\"\"\n"," Function returns predictions for new set X based on labels of points from X_train\n"," Args:\n"," X (numpy.ndarray): set of observations (points) that we want to label\n"," X_train (numpy.ndarray): set of lalabeld bservations (points)\n"," y_train (numpy.ndarray): labels for X_train\n"," k (int): number of nearest neighbours for KNN algorithm\n","\n"," Returns:\n"," (numpy.ndarray): label predictions for points from set X\n"," \"\"\"\n"," ## Write your code here\n","\n"," return prediction"],"execution_count":9,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"i9kzyASWSgyX"},"source":["# Accuracy"]},{"cell_type":"markdown","metadata":{"id":"v8bNPTPZSgyX"},"source":["To find how good our knn model works we should count accuracy"]},{"cell_type":"markdown","metadata":{"id":"dgFCnJ14SgyX"},"source":["## Code"]},{"cell_type":"code","metadata":{"id":"2ySpyThlSgyX"},"source":["def count_accuracy(prediction, y_true):\n"," \"\"\"\n"," Returns:\n"," float: Predictions accuracy\n","\n"," \"\"\"\n"," N1 = prediction.shape[0]\n"," \n"," ## Use np.sum to count the number of elements where predicted value == actual value and assign the count to the variable accuracy\n","\n"," return accuracy"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"b5g7YFY2SgyX"},"source":["## Example usage"]},{"cell_type":"code","metadata":{"id":"uLqCqmJNSgyY","colab":{"base_uri":"https://localhost:8080/","height":235},"executionInfo":{"status":"error","timestamp":1635250842268,"user_tz":-330,"elapsed":449,"user":{"displayName":"Desu Venkata Manikanta","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"10818416874497828882"}},"outputId":"dcf624ee-b959-4577-b370-464112163929"},"source":["y_true = np.array([[0, 2]])\n","\n","predicton = predict(X, X_train, y_train, 3, get_euclidean_distance)\n","\n","\n","print(\"True classes:{}, accuracy {}%\".format(y_true, count_accuracy(predicton, y_true) * 100))"],"execution_count":8,"outputs":[{"output_type":"error","ename":"NameError","evalue":"ignored","traceback":["\u001b[0;31m---------------------------------------------------------------------------\u001b[0m","\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)","\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0my_true\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mpredicton\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m3\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mget_euclidean_distance\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;31mNameError\u001b[0m: name 'predict' is not defined"]}]},{"cell_type":"markdown","metadata":{"id":"--WUpIcxSgyY"},"source":["# Find best k"]},{"cell_type":"markdown","metadata":{"id":"itkcD0DlSgyY"},"source":["Best k parameter is that one for which we have highest accuracy"]},{"cell_type":"markdown","metadata":{"id":"7GYEUBnnSgyY"},"source":["## Code"]},{"cell_type":"code","metadata":{"id":"Q6OhNBOoSgyY","executionInfo":{"status":"ok","timestamp":1635250862606,"user_tz":-330,"elapsed":413,"user":{"displayName":"Desu Venkata Manikanta","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"10818416874497828882"}}},"source":["def select_knn_model(X_validation, y_validation, X_train, y_train, k_values, distance_function):\n"," \"\"\"\n"," Function returns k parameter that best fit Xval points\n"," Args:\n"," Xval (numpy.ndarray): set of Validation Data, size N1:D\n"," Xtrain (numpy.ndarray): set of Training Data, size N2:D\n"," yval (numpy.ndarray): set of labels for Validation data, size N1:1\n"," ytrain (numpy.ndarray): set of labels for Training Data, size N2:1\n"," k_values (list): list of int values of k parameter that should be checked\n","\n"," Returns:\n"," int: k paprameter that best fit validation set\n"," \"\"\"\n","\n"," accuracies = []\n","\n"," for k in tqdm_notebook(k_values):\n"," prediction = predict(X_validation, X_train, y_train, k, distance_function)\n","\n"," accuracy = count_accuracy(prediction, y_validation)\n"," accuracies.append(accuracy)\n","\n"," best_k = k_values[accuracies.index(max(accuracies))]\n","\n"," return best_k, accuracies\n"],"execution_count":10,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"nGtIjD0WSgyY"},"source":["# Real World Example - Iris Dataset"]},{"cell_type":"markdown","metadata":{"id":"-o6MHMtKSgyZ"},"source":["\n","\n","\n","\n","This is perhaps the best known database to be found in the pattern recognition literature. The data set contains 3 classes of 50 instances each, where each class refers to a type of iris plant. One class is linearly separable from the other 2; the latter are NOT linearly separable from each other. \n","\n","Each example contains 4 attributes\n","1. sepal length in cm \n","2. sepal width in cm \n","3. petal length in cm \n","4. petal width in cm \n","\n","Predicted attribute: class of iris plant. \n","\n","\n","\n","\n","\n","\n"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"SY8oOngQSgyZ","executionInfo":{"status":"ok","timestamp":1635250867474,"user_tz":-330,"elapsed":414,"user":{"displayName":"Desu Venkata Manikanta","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"10818416874497828882"}},"outputId":"270c2090-4cc4-43c8-dd20-5ed2670e0067"},"source":["from sklearn import datasets\n","import matplotlib.pyplot as plt\n","\n","iris = datasets.load_iris()\n","\n","iris_X = iris.data\n","iris_y = iris.target\n","\n","print(\"Iris: {} examples in {} dimensional space\".format(*iris_X.shape))\n","print(\"First example in dataset :\\n Speal lenght: {}cm \\n Speal width: {}cm \\n Petal length: {}cm \\n Petal width: {}cm\".format(*iris_X[0]))\n","\n","print(\"Avalible classes\", np.unique(iris_y))"],"execution_count":11,"outputs":[{"output_type":"stream","name":"stdout","text":["Iris: 150 examples in 4 dimensional space\n","First example in dataset :\n"," Speal lenght: 5.1cm \n"," Speal width: 3.5cm \n"," Petal length: 1.4cm \n"," Petal width: 0.2cm\n","Avalible classes [0 1 2]\n"]}]},{"cell_type":"markdown","metadata":{"id":"-IlKSX7hSgyZ"},"source":["## Prepare Data\n","\n","In our data set we have 150 examples (50 examples of each class), we have to divide it into 3 datasets.\n","1. Training data set, 90 examples. It will be used to find k - nearest neightbours\n","2. Validation data set, 30 examples. It will be used to find best k parameter, the one for which accuracy is highest\n","3. Test data set, 30 examples. It will be used to check how good our model performs\n","\n","Data has to be shuffled (mixed in random order), because originally it is stored 50 examples of class 0, 50 of 1 and 50 of 2.\n"]},{"cell_type":"code","metadata":{"id":"RA1Q7kCPSgyZ","executionInfo":{"status":"ok","timestamp":1635250871691,"user_tz":-330,"elapsed":418,"user":{"displayName":"Desu Venkata Manikanta","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"10818416874497828882"}}},"source":["from sklearn.utils import shuffle\n","\n","iris_X, iris_y = shuffle(iris_X, iris_y, random_state=134)\n","\n","\n","test_size = 30\n","validation_size = 30\n","training_size = 90\n","\n","## Initialize X_test\n","## Initialize X_validation \n","## Initialize X_train \n","\n","## Initialize y_test\n","## Initialize y_validation\n","## Initialize y_train"],"execution_count":12,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"r9xJVLzrSgyZ"},"source":["## Find best k parameter"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":235},"id":"hbvZBVNBSgya","executionInfo":{"status":"error","timestamp":1635250875803,"user_tz":-330,"elapsed":430,"user":{"displayName":"Desu Venkata Manikanta","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"10818416874497828882"}},"outputId":"f62b15f9-7fca-4789-bac2-2db5cbbcd8c0"},"source":["k_values = [i for i in range(3,50)]\n","\n","best_k, accuracies = select_knn_model(X_validation, y_validation, X_train, y_train, k_values, distance_function=get_euclidean_distance)\n","\n","## Plot accuracy vs k values graph"],"execution_count":13,"outputs":[{"output_type":"error","ename":"NameError","evalue":"ignored","traceback":["\u001b[0;31m---------------------------------------------------------------------------\u001b[0m","\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)","\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0mk_values\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m50\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mbest_k\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maccuracies\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mselect_knn_model\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_validation\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_validation\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mk_values\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdistance_function\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mget_euclidean_distance\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;31m## Plot accuracy vs k values graph\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;31mNameError\u001b[0m: name 'X_validation' is not defined"]}]},{"cell_type":"markdown","metadata":{"id":"BjQBDWJMSgya"},"source":["## Count accuracy for training set"]},{"cell_type":"code","metadata":{"id":"_f-J5sSESgya","colab":{"base_uri":"https://localhost:8080/","height":201},"executionInfo":{"status":"error","timestamp":1635250882340,"user_tz":-330,"elapsed":434,"user":{"displayName":"Desu Venkata Manikanta","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"10818416874497828882"}},"outputId":"d0dfb811-a65e-472c-bdc7-fad839ccc488"},"source":["prediction = predict(X_test, X_train, y_train, best_k, get_euclidean_distance)\n","\n","## Calculate Best accuracy using the best k value\n"],"execution_count":14,"outputs":[{"output_type":"error","ename":"NameError","evalue":"ignored","traceback":["\u001b[0;31m---------------------------------------------------------------------------\u001b[0m","\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)","\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mprediction\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_test\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbest_k\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mget_euclidean_distance\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;31m## Calculate Best accuracy using the best k value\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;31mNameError\u001b[0m: name 'X_test' is not defined"]}]},{"cell_type":"markdown","metadata":{"id":"72O5eXbCSgyc"},"source":["# Sources\n","\n","https://en.wikipedia.org/wiki/K-nearest_neighbors_algorithm - first visualisation image\n","\n","https://en.wikipedia.org/wiki/Euclidean_distance - euclidean distance visualisation\n","\n","https://rajritvikblog.wordpress.com/2017/06/29/iris-dataset-analysis-python/ - first iris image\n","\n","https://rpubs.com/wjholst/322258 - second iris image\n","\n"]}]} \ No newline at end of file diff --git a/Linear_Regression_Task2_203174002.ipynb b/Linear_Regression_Task2_203174002.ipynb new file mode 100644 index 0000000..c7b98ce --- /dev/null +++ b/Linear_Regression_Task2_203174002.ipynb @@ -0,0 +1 @@ +{"nbformat":4,"nbformat_minor":5,"metadata":{"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.8.8"},"colab":{"name":"Linear_Regression_Task2_203174002.ipynb","provenance":[],"collapsed_sections":[]}},"cells":[{"cell_type":"markdown","metadata":{"id":"89223f98"},"source":["\n","\n","```\n","Import libraries\n","```\n","\n","### Importing useful libraries \n"],"id":"89223f98"},{"cell_type":"code","metadata":{"id":"26f77ebe","executionInfo":{"status":"ok","timestamp":1635012283386,"user_tz":-330,"elapsed":1123,"user":{"displayName":"Desu Venkata Manikanta","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"10818416874497828882"}}},"source":["# This Python 3 environment comes with many helpful analytics libraries installed\n","# For example, here's several helpful packages to load in\n","import numpy as np # linear algebra\n","import matplotlib.pyplot as plt # data visualization\n","import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n","import seaborn as sns"],"id":"26f77ebe","execution_count":1,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"31c8220d"},"source":["### Loading the dataset \n","#### For implementation we will be using house prediction dataset . The dataset can be found [here](https://github.com/vrinda01go/Hellofoss/blob/main/Room_price_data.csv)"],"id":"31c8220d"},{"cell_type":"code","metadata":{"id":"1c5d873a","executionInfo":{"status":"ok","timestamp":1635012317184,"user_tz":-330,"elapsed":552,"user":{"displayName":"Desu Venkata Manikanta","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"10818416874497828882"}}},"source":["df = pd.read_csv(\"Hostel_Linear-Dataset.csv\") #import text file \n"],"id":"1c5d873a","execution_count":3,"outputs":[]},{"cell_type":"code","metadata":{"id":"1ca9aba0","colab":{"base_uri":"https://localhost:8080/","height":205},"executionInfo":{"status":"ok","timestamp":1635012321185,"user_tz":-330,"elapsed":524,"user":{"displayName":"Desu Venkata Manikanta","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"10818416874497828882"}},"outputId":"50a85b1a-cfee-4f7b-9ac1-5134a16822e9"},"source":["df.head()"],"id":"1ca9aba0","execution_count":4,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
PriceHostel No.OccupancyRoom SizeFloor
02540.0316868
12900.0329665
2NaN317888
32362.0329242
4NaN3210985
\n","
"],"text/plain":[" Price Hostel No. Occupancy Room Size Floor\n","0 2540.0 3 1 686 8\n","1 2900.0 3 2 966 5\n","2 NaN 3 1 788 8\n","3 2362.0 3 2 924 2\n","4 NaN 3 2 1098 5"]},"metadata":{},"execution_count":4}]},{"cell_type":"markdown","metadata":{"id":"af08f245"},"source":["# Visualizing and Cleaning the data\n","\n","We will now be removing the nan values and identical values from the dataset\n","\n","For seeing if there are nan values in the dataset we will use the isna() function and then to remove them we will use the dropna() function. We will need to set additional parameters like rows and columns in the dropna function depending on the number of nan values present for each column\n","\n","Using the sum() function with isna() function we can get to know the number of missing values in each column"],"id":"af08f245"},{"cell_type":"code","metadata":{"id":"2fd4babb","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1635012323753,"user_tz":-330,"elapsed":428,"user":{"displayName":"Desu Venkata Manikanta","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"10818416874497828882"}},"outputId":"dd94b5ef-188f-4c3a-aec4-fe91cdc6a86d"},"source":["df.isna().sum()"],"id":"2fd4babb","execution_count":5,"outputs":[{"output_type":"execute_result","data":{"text/plain":["Price 1531\n","Hostel No. 0\n","Occupancy 0\n","Room Size 0\n","Floor 0\n","dtype: int64"]},"metadata":{},"execution_count":5}]},{"cell_type":"markdown","metadata":{"id":"83ef03c3"},"source":["After this we will proceed to remove the nan values \n","\n","Since there are not many nan values in the column 'Price' as compared to the number of rows we will remove the rows which have nan values. \n","\n","Reseting the index after removing the nan values and dropping the old index will also be important"],"id":"83ef03c3"},{"cell_type":"code","metadata":{"id":"b65e4503","executionInfo":{"status":"ok","timestamp":1635012326744,"user_tz":-330,"elapsed":459,"user":{"displayName":"Desu Venkata Manikanta","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"10818416874497828882"}}},"source":["df = df.dropna(subset = ['Price'],how= 'any')\n","df = df.reset_index(drop = True)\n","## df.isna().sum()"],"id":"b65e4503","execution_count":6,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"40784889"},"source":["Now we can use the drop_duplicate function to remove the duplicate values\n","\n","This function has a parameter calle 'keep' where we specifiy to drop and which value to keep\n","\n","For this excercise we will keep the first values and drop the rest of the duplicates"],"id":"40784889"},{"cell_type":"code","metadata":{"id":"75fa3dc8","executionInfo":{"status":"ok","timestamp":1635012329999,"user_tz":-330,"elapsed":425,"user":{"displayName":"Desu Venkata Manikanta","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"10818416874497828882"}}},"source":["df = df.drop_duplicates(keep = 'first')\n","df = df.reset_index(drop = True)\n","## df.duplicated().sum()"],"id":"75fa3dc8","execution_count":7,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"a007a33f"},"source":["For visualizing the data we will first start with looking at the distribution of different columns to see if there are enough number for each category in every column and dropping them if the data is biased for one category more than the other"],"id":"a007a33f"},{"cell_type":"code","metadata":{"id":"b325df62","colab":{"base_uri":"https://localhost:8080/","height":1000},"executionInfo":{"status":"ok","timestamp":1635012366549,"user_tz":-330,"elapsed":1239,"user":{"displayName":"Desu Venkata Manikanta","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"10818416874497828882"}},"outputId":"cf878a76-66d3-4e68-f009-819d724a4eae"},"source":["columns = df.columns\n","for column in columns:\n"," if(column== 'Price' or column=='Room Size'): \n"," continue\n"," fig = plt.figure(figsize=(5,5))\n"," ax = fig.gca()\n"," counts = df[column].value_counts()\n"," counts.plot.bar(ax = ax, color='blue')\n"," ax.set_title('No of rooms '+ column)\n"," ax.set_xlabel(column)\n"," ax.set_ylabel(\"No of rooms\")\n"," plt.show()"],"id":"b325df62","execution_count":9,"outputs":[{"output_type":"display_data","data":{"image/png":"\n","text/plain":["
"]},"metadata":{"needs_background":"light"}},{"output_type":"display_data","data":{"image/png":"\n","text/plain":["
"]},"metadata":{"needs_background":"light"}},{"output_type":"display_data","data":{"image/png":"\n","text/plain":["
"]},"metadata":{"needs_background":"light"}}]},{"cell_type":"markdown","metadata":{"id":"9811a731"},"source":["We can clearly notice that for the Occupancy column the (occupancy) = 4 has a really low set of data points as compared to others. Hence we can proceed in dropping those rows where the occupancy is 4"],"id":"9811a731"},{"cell_type":"code","metadata":{"id":"825783c0","executionInfo":{"status":"ok","timestamp":1635012377084,"user_tz":-330,"elapsed":615,"user":{"displayName":"Desu Venkata Manikanta","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"10818416874497828882"}}},"source":["df = df[df['Occupancy'] != 4]\n","df = df.reset_index(drop= True)"],"id":"825783c0","execution_count":10,"outputs":[]},{"cell_type":"code","metadata":{"id":"30c64310","colab":{"base_uri":"https://localhost:8080/","height":205},"executionInfo":{"status":"ok","timestamp":1635012378828,"user_tz":-330,"elapsed":9,"user":{"displayName":"Desu Venkata Manikanta","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"10818416874497828882"}},"outputId":"bbb5d4dc-f7e5-4b8e-a248-6ecbd09568da"},"source":["df.head()"],"id":"30c64310","execution_count":11,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
PriceHostel No.OccupancyRoom SizeFloor
02540.0316868
12900.0329665
22362.0329242
31432.0217063
41702.02210383
\n","
"],"text/plain":[" Price Hostel No. Occupancy Room Size Floor\n","0 2540.0 3 1 686 8\n","1 2900.0 3 2 966 5\n","2 2362.0 3 2 924 2\n","3 1432.0 2 1 706 3\n","4 1702.0 2 2 1038 3"]},"metadata":{},"execution_count":11}]},{"cell_type":"markdown","metadata":{"id":"f333875b"},"source":["We will now write the columns between categorical and numerical\n","\n","categorical = Hostel No, occupancy, floor\n","\n","Numerical = price, occupancy, roomsize, floor, hostel No.\n","\n","Remember that we can treat Hostel Number and occupancy as numerical or categorical. For this notebook we will treat them as categorical for data visualization and numerical for the regression"],"id":"f333875b"},{"cell_type":"markdown","metadata":{"id":"0f34ca6a"},"source":["We will also plot the scatter plots and the correlation map to analyse the relation ships between different numerical columns"],"id":"0f34ca6a"},{"cell_type":"code","metadata":{"scrolled":false,"id":"f4a3ab6e","executionInfo":{"status":"ok","timestamp":1635012383025,"user_tz":-330,"elapsed":517,"user":{"displayName":"Desu Venkata Manikanta","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"10818416874497828882"}}},"source":["categorical = ['Hostel No.', 'Occupancy', 'Floor']\n","numerical = [ 'Price', 'Room Size']"],"id":"f4a3ab6e","execution_count":12,"outputs":[]},{"cell_type":"code","metadata":{"id":"df2b588a","colab":{"base_uri":"https://localhost:8080/","height":791},"executionInfo":{"status":"ok","timestamp":1635012386787,"user_tz":-330,"elapsed":1202,"user":{"displayName":"Desu Venkata Manikanta","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"10818416874497828882"}},"outputId":"c61015b6-8eb0-4d49-cf99-2e50e4e7bd51"},"source":["for column1 in numerical:\n"," for column2 in numerical:\n"," if(column1 != column2):\n"," fig = plt.figure(figsize=(6,6))\n"," ax = fig.gca()\n"," df.plot.scatter(x=column1,y=column2,ax = ax)\n"," ax.set_title('Scatter plot of '+ column1 + ' vs ' + column2)\n"," ax.set_xlabel(column1)\n"," ax.set_ylabel(column2)\n"," plt.show()"],"id":"df2b588a","execution_count":13,"outputs":[{"output_type":"display_data","data":{"image/png":"\n","text/plain":["
"]},"metadata":{"needs_background":"light"}},{"output_type":"display_data","data":{"image/png":"\n","text/plain":["
"]},"metadata":{"needs_background":"light"}}]},{"cell_type":"markdown","metadata":{"id":"625b89a8"},"source":["We can notice that there are no linear relation present between the numerical columns. Hence no need to drop anything"],"id":"625b89a8"},{"cell_type":"markdown","metadata":{"id":"8a7f95fa"},"source":["Now we will plot box plots of categorical and numerical columns to get more information about the number of outliers and the distrubtion."],"id":"8a7f95fa"},{"cell_type":"code","metadata":{"id":"e73f0769","colab":{"base_uri":"https://localhost:8080/","height":1000},"executionInfo":{"status":"ok","timestamp":1635012395663,"user_tz":-330,"elapsed":1890,"user":{"displayName":"Desu Venkata Manikanta","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"10818416874497828882"}},"outputId":"ed580d37-65dc-4b57-867d-df02ac8fdeea"},"source":["for c in categorical:\n"," for n in numerical:\n"," sns.set_style(\"whitegrid\")\n"," sns.boxplot(x= c, y= n, data=df)\n"," plt.xlabel(c)\n"," plt.ylabel(n)\n"," plt.show()\n"," "],"id":"e73f0769","execution_count":14,"outputs":[{"output_type":"display_data","data":{"image/png":"\n","text/plain":["
"]},"metadata":{}},{"output_type":"display_data","data":{"image/png":"iVBORw0KGgoAAAANSUhEUgAAAYsAAAEGCAYAAACUzrmNAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAbnElEQVR4nO3de1TUdcLH8c8IKxdBDFdncrPM1OSxAjtZIZQtHRAVBBR2z9aW6bZlmuTaWpJ5WWO13W29rWWybsdqT3/kBd3EzMK8FYWW5pa4yVOkhAytKKijQ+A8f/g4m6v4RZ0LI+/XOZ3TfGf4/j7DT/jwu47F5XK5BADABbTzdwAAQOtHWQAAjCgLAIARZQEAMKIsAABGwf4O4A27du1SSEiIv2MAQEBxOp2Ki4s773NXZFmEhIQoJibG3zEAIKCUlZU1+xy7oQAARpQFAMCIsgAAGFEWAAAjyiLAfPnllxoyZIjKy8v9HQVAG+K1ssjLy1N8fLzS0tLOee6VV17RjTfeqNraWkmSy+VSfn6+kpOTlZ6eri+++ML92sLCQqWkpCglJUWFhYXeihsw8vPzdfz4cc2aNcvfUQC0IV4rixEjRmjp0qXnjB88eFAffPCBunXr5h7bsmWLKioqtGHDBj333HOaOXOmJOnIkSNatGiR3nzzTS1fvlyLFi1SXV2dtyK3el9++aUqKiokSRUVFWxdAPAZr11nMWDAAFVWVp4zPmfOHE2ePFnjxo1zjxUXFyszM1MWi0VxcXGqr69XTU2NSktLlZCQoE6dOkmSEhIStHXr1vNurbQ269ev17p16zw653+fA/3YY495/HqSoUOHKjU11aNzAr7kjZ89Se49IdHR0R6fW2r9P3s+vSjvvffeU9euXdW3b9+zxu12u2w2m/uxzWaT3W4/Z9xqtcputxuX43Q6L3hxiS9UVVXJ4XB4dE6n03nOY08vo6qqyu/fu9agpKREH374ocfnPbNlHBUV5fG5Bw4cqPj4eI/PG2i88bMnSd99950kKTQ01ONzS63/Z89nZXHixAktWbJEr7zyiteX1Rqu4I6JidGYMWM8OueDDz7o3g0lST169Djvrj5cvm+++Ubh4eEen/fbb7+VJF199dUen7tbt25+/3ffGnjjZ0+ScnNzJUkLFy70+NytxYXKymdlsX//flVWViojI0OSVF1drREjRmj58uWyWq2qrq52v7a6ulpWq1VWq1WlpaXucbvdrttvv91XkVudZ599Vg8//LD78fTp0/2Y5sqWmprqlV0CbeEXDq5MPjt19sYbb1RJSYk2btyojRs3ymazadWqVerSpYuSkpK0evVquVwu7dq1S5GRkeratasSExO1bds21dXVqa6uTtu2bVNiYqKvIrc6ffr0cd8gsUePHurVq5efEwFoK7y2ZTFp0iSVlpbq8OHDuvvuuzVhwgTl5OSc97WDBg3S5s2blZycrLCwMM2ePVuS1KlTJ40bN07Z2dmSpPHjx7sPdrdV1113ncrLy9mqAOBTXiuLuXPnXvD5jRs3uv/fYrFoxowZ531ddna2uywghYeH65ZbbmGrAoBPcQU3AMCIsgAAGFEWAAAjygIAYERZAACMKAsAgBFlAQAwoiwAAEaUBQDAiLIAABhRFgAAI8oCAGBEWQAAjCgLAIARZQEAMKIsAABGlAUAwIiyAAAYURYAACPKAgBgRFkAAIwoCwCAEWUBADCiLAAARpQFAMCIsgAAGFEWAAAjygIAYERZAACMKAsAgBFlAQAwoiwAAEaUBQDAiLIAABhRFgAAI6+VRV5enuLj45WWluYe+8Mf/qDU1FSlp6dr/Pjxqq+vdz+3ZMkSJScna/Dgwdq6dat7fMuWLRo8eLCSk5NVUFDgrbgAgAvwWlmMGDFCS5cuPWssISFBa9eu1VtvvaUePXpoyZIlkqTy8nIVFRWpqKhIS5cu1e9+9zs1NTWpqalJs2bN0tKlS1VUVKS1a9eqvLzcW5EBAM3wWlkMGDBAUVFRZ40lJiYqODhYkhQXF6fq6mpJUnFxsYYNG6b27dure/fuuu6667R7927t3r1b1113nbp376727dtr2LBhKi4u9lZkAEAzgv214JUrV2rIkCGSJLvdrtjYWPdzVqtVdrtdkmSz2c4a3717t3Fup9OpsrIyDyduHRwOhyRdse/vSsf6C1xtfd35pSwWL16soKAgDR8+3Cvzh4SEKCYmxitz+1t4eLgkXbHv70rH+gtcbWHdXagIfV4Wq1at0qZNm7Rs2TJZLBZJp7cYzuySkk5vaVitVklqdhwA4Ds+PXV2y5YtWrp0qRYvXqywsDD3eFJSkoqKitTQ0KADBw6ooqJCt9xyi26++WZVVFTowIEDamhoUFFRkZKSknwZGQAgL25ZTJo0SaWlpTp8+LDuvvtuTZgwQQUFBWpoaNDo0aMlSbGxsZo1a5Z69+6tIUOGaOjQoQoKCtL06dMVFBQkSZo+fboefvhhNTU1aeTIkerdu7e3IgMAmuG1spg7d+45Yzk5Oc2+/rHHHtNjjz12zvigQYM0aNAgj2YDAFwcruAGABhRFgAAI8oCAGBEWQAAjCgLAIARZQEAMPLbvaGAy7Vw4cKAuwvxvn37JEm5ubl+TtJyvXr18kreQFt/gbjuJM+tP8oCAau8vFxffv6pro1o8neUFuvoOn2Lm5MV2/2cpGX2Hwvy2tzl5eXa+cVOqZPXFuFZ/78fZue3O/2b42Ic8dxUlAUC2rURTXr2tmP+jnHFyt8R4d0FdJJO3XPKu8tow9pt8tyRBo5ZAACMKAsAgBFlAQAwoiwAAEaUBQDAiLIAABhRFgAAI8oCAGBEWQAAjCgLAIARZQEAMKIsAABGlAUAwIiyAAAYURYAACPKAgBgRFkAAIyMZfH1119r1KhRSktLkyTt3btXL730kteDAQBaD2NZTJs2TU8++aSCg09/Amvfvn21bt06rwcDALQexrI4ceKEbrnllrPGgoK89yHuAIDWx1gWV111lfbv3y+LxSJJWr9+vbp06eL1YACA1iPY9IIZM2Zo2rRp+uqrr3TXXXfpmmuu0Z/+9CdfZANwBautrZWOSO02cZ6N1xyRasNqPTKVsSy6d++uZcuWyeFw6NSpU4qIiPDIgoHLVVtbq++OBil/B/8mveWbo0HqUuuZXzYIbMayiImJ0a9+9Ss9+eST7l1RWVlZKiws9Ho4AFeu6OhofXPiG52655S/o1yx2m1qp+joaI/MZSyLXr166dSpUxozZozmzZunTp06yeVyeWThwOWIjo5WeP3/6tnbjvk7yhUrf0eEQj30ywaBzbizMDg4WE899ZRycnJ0//336/PPP3dvYQAA2gZjWZzZihg6dKjmzZunvLw8HThwwDhxXl6e4uPj3RfzSdKRI0c0evRopaSkaPTo0aqrq3MvIz8/X8nJyUpPT9cXX3zh/prCwkKlpKQoJSWFXV8A4CfG3VD5+fnu/+/Tp4/eeOMNFRcXGyceMWKEfvnLX+rpp592jxUUFCg+Pl6PPPKICgoKVFBQoMmTJ2vLli2qqKjQhg0b9Nlnn2nmzJlavny5jhw5okWLFmnlypWyWCwaMWKEkpKSFBUVdYlv91wLFy5UeXm5x+bztn379kmScnNz/Zzk4vTq1SvgMgP4j2bLoqSkRPHx8aqqqlJVVdVZz4WHhxsnHjBggCorK88aKy4u1uuvvy5JyszM1AMPPKDJkyeruLhYmZmZslgsiouLU319vWpqalRaWqqEhAR16tRJkpSQkKCtW7eetbVyucrLy7Xzn3t0Kjww9stamk6vsk/+t9rPSVqunYOzaYBA12xZbN++XfHx8Xr//ffP+3xKSspFL+zQoUPq2rWrJKlLly46dOiQJMlut8tms7lfZ7PZZLfbzxm3Wq2y2+3G5TidTpWVlbUok8Ph0KnwaJ38H88VEM4WumetHA5Hi9dJSzkcDu6E6QPeWHdn5oX3eWr9NVsWZ3YZzJkz57IXcj4Wi8VrB8pDQkIUExPTotee3kqq90oO/Ed4eHiL18nFzHnSozPifLyx7s7Mq8Menxb/5WLW34VKpdk/zDZu3Khvv/3W/XjRokUaPny4xo4d26ID3OfTuXNn1dTUSJJqamrc5/9arVZVV/9nt0p1dbWsVus543a7XVar9ZKWDQC4dM2Wxbx589y/zN9//3299dZbmj17tu69917NnDnzkhaWlJSk1atXS5JWr16te++996xxl8ulXbt2KTIyUl27dlViYqK2bdumuro61dXVadu2bUpMTLykZQMALl2zu6EsFovCwsIkSRs2bNDIkSN100036aabbtIbb7xhnHjSpEkqLS3V4cOHdffdd2vChAl65JFHNHHiRK1YsULdunXT/PnzJUmDBg3S5s2blZycrLCwMM2ePVuS1KlTJ40bN07Z2dmSpPHjx7sPdgMAfKfZsnC5XDp+/LjCwsL00Ucf6b777nM/53Q6jRPPnTv3vOOvvvrqOWMWi0UzZsw47+uzs7PdZQEA8I9my2LUqFHKzMxURESEevbsqZtvvlmStGfPHm5RDgBtTLNlkZ2drbvuukuHDh1S37593eM//vGPvXaGFACgdbrgFdxnzkj6oTPXSQAA2g6uaQIAGFEWAAAj440EJamurk4HDx5UU1OTe6xfv35eCwUAaF2MZTF//nwVFhbq2muvdY9ZLBa99tprXg0GAGg9jGXx9ttv691331X79u19kQcA0AoZj1n06dNHR48e9UUWAEArZdyyeOSRR5SZmak+ffroRz/6kXv85Zdf9mowAEDrYSyLKVOm6Ne//rX69Omjdu04eQoA2iJjWYSGhurBBx/0RRYAQCtlLIvbbrtNf/7zn5WUlHTWQW5OnQWAtsNYFnv27JEk7dq1yz3GqbMA0LYYy+L111/3RQ6/qa2tVTvHIYXuWevvKFesdo5Dqq3l1GsgkBnL4ujRo1q0aJG2b98uSbr99ts1fvx4RUZGej0cAKB1MJbFM888o969e2vBggWSpDVr1igvL0+LFi3yejhfiI6O1teHG3Tyf9L8HeWKFbpnrfsjegEEJmNZ7N+/X3/5y1/cjx9//HFlZGR4NRQAoHUxXjgRGhqqHTt2uB9/8sknCg0N9WooAEDrYtyymDlzpp5++mkdO3ZMLpdLUVFRev75532RDQDQShjLIiYmRv/4xz907NgxSVJERITXQwEAWhfOhgIAGBmPWTzzzDPq0KGDFixYoAULFigiIkJ5eXm+yAYAaCU4GwoAYMTZUAAAI86GAgAYXfTZUGFhYSoqKlLfvn29Hg4A0Do0uxvq2LFjWrJkiWbNmqUPPvhAHTp00OrVq5WcnKy3337blxkBAH7W7JbF5MmTFRUVpbi4OL355pt6+eWX5XK59OKLLyomJsaXGQEAftZsWVRWVmrx4sWSpJycHCUmJmrTpk0KCQnxWTgAQOvQ7G6o4OD/9EhQUJBsNhtFAQBtVLNbFnv37tWtt94qSXK5XHI6nbr11lvlcrlksVj06aef+iwkAMC/mi2LsrIyX+YAALRixovyAACgLAAARn4pi2XLlmnYsGFKS0vTpEmT5HQ6deDAAeXk5Cg5OVkTJ05UQ0ODJKmhoUETJ05UcnKycnJyVFlZ6Y/IANCm+bws7Ha7XnvtNa1cuVJr165VU1OTioqK9MILL+ihhx7Su+++q44dO2rFihWSpOXLl6tjx45699139dBDD+mFF17wdWQAaPP8smXR1NSkkydPqrGxUSdPnlSXLl300UcfafDgwZKkrKwsFRcXS5I2btyorKwsSdLgwYNVUlIil8vlj9gA0GYZ7w3laVarVWPGjNFPf/pThYSEKCEhQf369VPHjh3d13bYbDbZ7XZJp7dErr766tNhg4MVGRmpw4cPKzo6utllOJ3OFp/N5XA4LvMdoSUcDofHz7BzOBwcdPMBb6y7M/PC+zy1/nxeFnV1dSouLlZxcbEiIyP1xBNPaOvWrR5dRkhISItvSRIeHi6p3qPLx7nCw8M9fpuY8PBwnfTojDgfb6y7M/Pqa6ndpgCp/DP/2ALpExqOSOE/afn6u1Cp+LwsPvzwQ11zzTXuLYOUlBR9+umnqq+vV2Njo4KDg1VdXS2r1Srp9JbIwYMHZbPZ1NjYqKNHj+qqq67ydWwAHtarVy9/R7go+/btkyT1/klvPye5CD/x3PfZ52XRrVs3ffbZZzpx4oRCQ0NVUlKim266SXfccYfeeecdDRs2TIWFhUpKSpIkJSUlqbCwUP3799c777yjO++8UxaLxdexAXhYbm6uvyNclDN5Fy5c6Ock/uHzsoiNjdXgwYOVlZWl4OBgxcTE6Oc//7nuuece/eY3v9H8+fMVExOjnJwcSVJ2drYmT56s5ORkRUVFad68eb6OjFZs/7Eg5e+I8HeMFqtrOP2HTlT7wDhJY/+xIPXxdwi0Cj4vC+l0Q//3XxXdu3d3ny77QyEhIW22yXFhgbYbQ5IO/P+uDGuPwNiV0UeB+X2G5/mlLABPCLTdGBK7MhC4AuQ0BACAP1EWAAAjygIAYERZAACMKAsAgBFlAQAw4tRZSe0ctQrds9bfMVrE8v0JSZLrR2F+TtJy7Ry1kmz+jgHgMrT5sgi0C47c96e5IZB++doC7vsM4GxtviwC7cIuLuoC4A8cswAAGFEWAAAjygIAYERZAACMKAsAgBFlAQAwoiwAAEaUBQDAiLIAABhRFgAAI8oCAGBEWQAAjCgLAIARZQEAMKIsAABGlAUAwIiyAAAYURYAACPKAgBgRFkAAIwoCwCAEWUBADCiLAAARpQFAMCIsgAAGPmlLOrr65Wbm6vU1FQNGTJEO3fu1JEjRzR69GilpKRo9OjRqqurkyS5XC7l5+crOTlZ6enp+uKLL/wRGQDaNL+Uxe9//3vdddddWr9+vdasWaMbbrhBBQUFio+P14YNGxQfH6+CggJJ0pYtW1RRUaENGzboueee08yZM/0RGQDatGBfL/Do0aPavn27nn/+eUlS+/bt1b59exUXF+v111+XJGVmZuqBBx7Q5MmTVVxcrMzMTFksFsXFxam+vl41NTXq2rWrr6OjDVm/fr3WrVvn8Xn37dsnScrNzfX43EOHDlVqaqrH5w00gbjupNa//nxeFpWVlYqOjlZeXp727t2rfv36aerUqTp06JC7ALp06aJDhw5Jkux2u2w2m/vrbTab7Hb7BcvC6XSqrKzMu2/ETxwOhyRdse+vtaiqqnJ/rz0pIiJCkrwyd1VVFf8uFJjrTmr968/nZdHY2Kg9e/Zo2rRpio2NVX5+vnuX0xkWi0UWi+WSlxESEqKYmJjLjdoqhYeHS9IV+/5ai5iYGI0ZM8bfMXAJWHeX7kJl5fNjFjabTTabTbGxsZKk1NRU7dmzR507d1ZNTY0kqaamRtHR0ZIkq9Wq6upq99dXV1fLarX6OjYAtGk+L4suXbrIZrPpq6++kiSVlJTohhtuUFJSklavXi1JWr16te69915Jco+7XC7t2rVLkZGRHK8AAB/z+W4oSZo2bZp++9vf6vvvv1f37t01Z84cnTp1ShMnTtSKFSvUrVs3zZ8/X5I0aNAgbd68WcnJyQoLC9Ps2bP9ERkA2jS/lEVMTIxWrVp1zvirr756zpjFYtGMGTN8EQsA0Ayu4AYAGFEWAAAjygIAYERZAACMKAsAgBFlAQAwoiwAAEaUBQDAiLIAABhRFgAAI8oCAGBEWQAAjCgLAIARZQEAMKIsAABGfvk8i7Zg/fr1Wrduncfn3bdvnyQpNzfX43NL0tChQ5WamuqVuQEELsoiwHTu3NnfEQC0QZSFl6SmpvIXOoArBscsAABGlAUAwIiyAAAYURYAACPKAgBgRFkAAIwoCwCAEWUBADC6Ii/KczqdKisr83cMAAgoTqez2ecsLpfL5cMsAIAAxG4oAIARZQEAMKIsAABGlAUAwIiyAAAYURYAAKMr8jqLK1VeXp42bdqkzp07a+3atf6Og4tw8OBBPfXUUzp06JAsFot+9rOfadSoUf6OhRZyOp26//771dDQoKamJg0ePNhrH23cWnGdRQDZvn27wsPD9fTTT1MWAaampkbfffed+vXrp2PHjmnkyJF68cUX1atXL39HQwu4XC45HA516NBB33//ve677z5NnTpVcXFx/o7mM+yGCiADBgxQVFSUv2PgEnTt2lX9+vWTJEVERKhnz56y2+1+ToWWslgs6tChgySpsbFRjY2Nslgsfk7lW5QF4GOVlZUqKytTbGysv6PgIjQ1NSkjI0MDBw7UwIED29z6oywAHzp+/Lhyc3P1zDPPKCIiwt9xcBGCgoK0Zs0abd68Wbt379aXX37p70g+RVkAPvL9998rNzdX6enpSklJ8XccXKKOHTvqjjvu0NatW/0dxacoC8AHXC6Xpk6dqp49e2r06NH+joOLVFtbq/r6eknSyZMn9eGHH6pnz55+TuVbnA0VQCZNmqTS0lIdPnxYnTt31oQJE5STk+PvWGiBHTt26P7771efPn3Urt3pv9EmTZqkQYMG+TkZWmLv3r2aMmWKmpqa5HK5lJqaqscff9zfsXyKsgAAGLEbCgBgRFkAAIwoCwCAEWUBADCiLAAARpQF8AP9+/c/6/GqVas0a9asi56nrKxMmzdvNr7u448/1qOPPnre8RtvvFEbN250jz366KP6+OOPLzoL4AmUBeAFLS2LC7HZbHr55Zc9lAi4PJQF0EKVlZV68MEHlZ6erlGjRqmqqkqS9PbbbystLU3Dhw93f+bBwoULtW7dOmVkZGjdunVyOBzKy8tTdna2MjMz9d577xmX17dvX0VGRuqDDz4457mSkhJlZmYqPT1deXl5amho8Pj7BX6IDz8CfuDkyZPKyMhwP66rq1NSUpIkKT8/X1lZWcrKytKKFSuUn5+vl156SS+99JL+9re/yWq1qr6+Xu3bt1dubq4+//xzTZ8+XZI0d+5c3XnnnZozZ47q6+uVk5OjgQMHGvOMHTtWCxYsUEJCgnvM6XRqypQpWrZsma6//no99dRTeuONN/TQQw959psB/ABbFsAPhIaGas2aNe7/fvhpaDt37lRaWpokKSMjQ5988omk08c5pkyZojfffFNNTU3nnXfbtm3661//qoyMDD3wwANyOp06ePCgMc+AAQMknb5dyBlff/21rrnmGl1//fWSpKysrLOeB7yBLQvgMs2aNUufffaZNm3apJEjR2rlypXnfd3ChQvPufncv//9b+P8Y8eO1eLFixUczI8r/IctC6CF+vfvr6KiIknSW2+9pdtuu02StH//fsXGxuqJJ57QVVddperqanXo0EHHjx93f21iYqL+/ve/68yt2Pbs2dPi5SYmJqq+vl7/+te/JEnXX3+9vv32W33zzTeSpDVr1ri3QABvoSyAFpo2bZpWrVql9PR0rVmzRlOnTpUk/fGPf1R6errS0tLUv39/9e3bV3fccYfKy8vdB7jHjRunxsZGDR8+XMOGDdOCBQsuatljx45177YKCQnRnDlz9MQTTyg9PV0Wi0W/+MUvJElTp07VP//5T8++cUDcdRYA0AJsWQAAjCgLAIARZQEAMKIsAABGlAUAwIiyAAAYURYAAKP/A2poTjxmCStdAAAAAElFTkSuQmCC\n","text/plain":["
"]},"metadata":{}},{"output_type":"display_data","data":{"image/png":"\n","text/plain":["
"]},"metadata":{}},{"output_type":"display_data","data":{"image/png":"\n","text/plain":["
"]},"metadata":{}},{"output_type":"display_data","data":{"image/png":"\n","text/plain":["
"]},"metadata":{}},{"output_type":"display_data","data":{"image/png":"\n","text/plain":["
"]},"metadata":{}}]},{"cell_type":"markdown","metadata":{"id":"a74d69fd"},"source":["Now that we have analysed our data we can proceed to normalixing our data and regression"],"id":"a74d69fd"},{"cell_type":"markdown","metadata":{"id":"77d8c6ba"},"source":["### Importing useful libraries \n"],"id":"77d8c6ba"},{"cell_type":"code","metadata":{"id":"fffac537","executionInfo":{"status":"ok","timestamp":1635012398317,"user_tz":-330,"elapsed":2,"user":{"displayName":"Desu Venkata Manikanta","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"10818416874497828882"}}},"source":["# This Python 3 environment comes with many helpful analytics libraries installed\n","# For example, here's several helpful packages to load in\n","import numpy as np # linear algebra\n","import matplotlib.pyplot as plt # data visualization\n","import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)"],"id":"fffac537","execution_count":15,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"04cd7e4e"},"source":["### Loading the dataset \n","#### For implementation we will be using house prediction dataset . The dataset can be found [here](https://github.com/vrinda01go/Hellofoss/blob/main/Insti_data.csv)"],"id":"04cd7e4e"},{"cell_type":"code","metadata":{"id":"4ac8e74b","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1635012403815,"user_tz":-330,"elapsed":517,"user":{"displayName":"Desu Venkata Manikanta","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"10818416874497828882"}},"outputId":"19c614d4-a1b3-46ba-80e7-9500e5fde8c1"},"source":["data = np.array(df, dtype=float)\n","data"],"id":"4ac8e74b","execution_count":16,"outputs":[{"output_type":"execute_result","data":{"text/plain":["array([[2.540e+03, 3.000e+00, 1.000e+00, 6.860e+02, 8.000e+00],\n"," [2.900e+03, 3.000e+00, 2.000e+00, 9.660e+02, 5.000e+00],\n"," [2.362e+03, 3.000e+00, 2.000e+00, 9.240e+02, 2.000e+00],\n"," ...,\n"," [1.020e+03, 3.000e+00, 2.000e+00, 1.006e+03, 3.000e+00],\n"," [2.400e+03, 2.000e+00, 2.000e+00, 9.380e+02, 1.000e+00],\n"," [9.500e+02, 3.000e+00, 2.000e+00, 1.053e+03, 2.000e+00]])"]},"metadata":{},"execution_count":16}]},{"cell_type":"markdown","metadata":{"id":"627a5a3a"},"source":["#### Since our dataset has four features i.e Hostel No. , Occupancy, Room Size and Floor ,our hypothesis function becomes\n","### hθ(x) = θ0 + θ1x1 + θ2x2 +θ3x3 + θ4x4\n","#### where x1 ,x2,x3 and x4 are the two features (i.e. size of house and number of rooms)"],"id":"627a5a3a"},{"cell_type":"markdown","metadata":{"id":"58fdf0fe"},"source":["### So Your task is to define hypothesis function having 4 features and a corresponding cost function "],"id":"58fdf0fe"},{"cell_type":"code","metadata":{"id":"34dc5643"},"source":[" # define and complete hypothesis function "],"id":"34dc5643","execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"6fae141b"},"source":["# define and complete cost function"],"id":"6fae141b","execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"178eb11f"},"source":["### Gradient Descent \n","#### So we have our hypothesis function and we have a way of measuring how well it fits into the data. Now we need to estimate the parameters in the hypothesis function. That's where gradient descent comes in.\n","### Your next task is to define gradient descent function having some specific value of learning rate and number of epochs.\n","#### Note that learning rate should be neither very high nor very low .Why?\n","#### Check out exact reason [here](https://towardsdatascience.com/understanding-learning-rates-and-how-it-improves-performance-in-deep-learning-d0d4059c1c10)\n","\n"],"id":"178eb11f"},{"cell_type":"code","metadata":{"id":"167b5bdc"},"source":["#define and complete Gradient Descent function "],"id":"167b5bdc","execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"724e407a"},"source":["### Now we want to visualize how our cost function varies with number of epochs .So your next task is to plot graph of updated costs vs number of epochs "],"id":"724e407a"},{"cell_type":"markdown","metadata":{"id":"e82f6ebf"},"source":["#### After plotting above graph you will notice that your cost function decreases with epochs.\n","#### Perfect! This is all what we wanted to seek by doing linear regression. \n","\n","#### Now it's time to test our model on some test data. \n","\n","#### For this you will define a test function that will take as input Hostel No. , Occupancy, Room Size , Floor and the final theta vector that was returned by our linear regression model and will give us the price of the house. Compute it for any set of features given and final value of theta as given by gradient descent function"],"id":"e82f6ebf"},{"cell_type":"code","metadata":{"id":"c522bca3"},"source":["# define and complete test function that will take required inputs .This function should return price of Room "],"id":"c522bca3","execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"947f35b1"},"source":["#### Now since we have defined all required functions , we can call functions one by one and get our final results .\n","#### Your final task is to use all functions defined above and predict the price of room for some input combinations to check how well your model works."],"id":"947f35b1"},{"cell_type":"markdown","metadata":{"id":"06e54ef5"},"source":["#### You can try playing with different values of alpha and epochs and see which combination gives most accurate results but do lookout for overfitting \n"],"id":"06e54ef5"},{"cell_type":"code","metadata":{"id":"3fc631fd"},"source":[""],"id":"3fc631fd","execution_count":null,"outputs":[]}]} \ No newline at end of file From ba19cd4344225e58169b9a23d9b627a6b66539fb Mon Sep 17 00:00:00 2001 From: Mani9550 <93199718+Mani9550@users.noreply.github.com> Date: Wed, 27 Oct 2021 15:44:45 +0530 Subject: [PATCH 02/11] Added quiz file and renamed it --- MCQ's.md => MCQ's_203174002.md | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) rename MCQ's.md => MCQ's_203174002.md (85%) diff --git a/MCQ's.md b/MCQ's_203174002.md similarity index 85% rename from MCQ's.md rename to MCQ's_203174002.md index b84bcd4..f450220 100644 --- a/MCQ's.md +++ b/MCQ's_203174002.md @@ -8,37 +8,37 @@ Change the markdown file for submission of the quiz - Suppose you are working on weather prediction and use a learning algorithm to predict tomorrow's temperature . What kind of problem would that be? - [ ] Classification - - [ ] Regression + - [X] Regression - Suppose that you have trained a logistic regression classifier, and it outputs on a new example x a prediction hθ​ (x) = 0.4. This means (check all that apply): - - [ ] Our estimate for P(y = 0| x,θ) = 0.6 - - [ ] Our estimate for P(y = 1| x,θ) = 0.4 + - [X] Our estimate for P(y = 0| x,θ) = 0.6 + - [X] Our estimate for P(y = 1| x,θ) = 0.4 - [ ] Our estimate for P(y = 1| x,θ) = 0.6 - [ ] Our estimate for P(y = 0| x,θ) = 0.4 - Which of the following are reasons for using feature scaling? - - [ ] It speeds up gradient descent by making it require fewer iterations to get to a good solution. + - [X] It speeds up gradient descent by making it require fewer iterations to get to a good solution. - [ ] It speeds up solving for θ using the normal equation. - [ ] It prevents the matrix XTX (used in the normal equation) from being non-invertable (singular/degenerate). - [ ] It is necessary to prevent gradient descent from getting stuck in local optima. - Which of the following statements are true? Check all that apply. - - [ ] The cost function J(θ) for logistic regression trained with m≥1 examples is always greater than or equal to zero. - - [ ] The sigmoid function g(z)=1/1+e^−z is never greater than one (>1). + - [X] The cost function J(θ) for logistic regression trained with m≥1 examples is always greater than or equal to zero. + - [X] The sigmoid function g(z)=1/1+e^−z is never greater than one (>1). - [ ] For logistic regression, sometimes gradient descent will converge to a local minimum (and fail to find the global minimum). - [ ] Linear regression always works well for classification if you classify by using a threshold on the prediction made by linear regression. - KNN algorithm does more computation on test time rather than train time. - - [ ] True + - [X] True - [ ] False - Which of the following distance metric can not be used in KNN? - [ ] Manhattan - [ ] Minkowski - [ ] Euclidean - - [ ] All of them can be used + - [X] All of them can be used - Which of the following machine learning algorithm can be used for imputing missing values of both categorical and continuous variables? - - [ ] KNN + - [X] KNN - [ ] Logistic Regression - [ ] Linear Regression - Suppose, you have given the following data where x and y are the 2 input variables and Class is the dependent variable. You want to predict the class of new data point x=1 and y=1 using eucledian distance in 3-NN. In which class this data point belong to? ![image](https://user-images.githubusercontent.com/73156496/138315461-b7127e2a-aa76-48bc-9934-53ed4e6d4166.png) - - [ ] + class + - [X] + class - [ ] - class - [ ] Can't Say From 9f89066fe2a152788653888836f692afdc4f6188 Mon Sep 17 00:00:00 2001 From: Mani9550 <93199718+Mani9550@users.noreply.github.com> Date: Fri, 29 Oct 2021 00:11:04 +0530 Subject: [PATCH 03/11] Delete Linear_Regression_Task2_203174002.ipynb --- Linear_Regression_Task2_203174002.ipynb | 1 - 1 file changed, 1 deletion(-) delete mode 100644 Linear_Regression_Task2_203174002.ipynb diff --git a/Linear_Regression_Task2_203174002.ipynb b/Linear_Regression_Task2_203174002.ipynb deleted file mode 100644 index c7b98ce..0000000 --- a/Linear_Regression_Task2_203174002.ipynb +++ /dev/null @@ -1 +0,0 @@ -{"nbformat":4,"nbformat_minor":5,"metadata":{"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.8.8"},"colab":{"name":"Linear_Regression_Task2_203174002.ipynb","provenance":[],"collapsed_sections":[]}},"cells":[{"cell_type":"markdown","metadata":{"id":"89223f98"},"source":["\n","\n","```\n","Import libraries\n","```\n","\n","### Importing useful libraries \n"],"id":"89223f98"},{"cell_type":"code","metadata":{"id":"26f77ebe","executionInfo":{"status":"ok","timestamp":1635012283386,"user_tz":-330,"elapsed":1123,"user":{"displayName":"Desu Venkata Manikanta","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"10818416874497828882"}}},"source":["# This Python 3 environment comes with many helpful analytics libraries installed\n","# For example, here's several helpful packages to load in\n","import numpy as np # linear algebra\n","import matplotlib.pyplot as plt # data visualization\n","import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n","import seaborn as sns"],"id":"26f77ebe","execution_count":1,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"31c8220d"},"source":["### Loading the dataset \n","#### For implementation we will be using house prediction dataset . The dataset can be found [here](https://github.com/vrinda01go/Hellofoss/blob/main/Room_price_data.csv)"],"id":"31c8220d"},{"cell_type":"code","metadata":{"id":"1c5d873a","executionInfo":{"status":"ok","timestamp":1635012317184,"user_tz":-330,"elapsed":552,"user":{"displayName":"Desu Venkata Manikanta","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"10818416874497828882"}}},"source":["df = pd.read_csv(\"Hostel_Linear-Dataset.csv\") #import text file \n"],"id":"1c5d873a","execution_count":3,"outputs":[]},{"cell_type":"code","metadata":{"id":"1ca9aba0","colab":{"base_uri":"https://localhost:8080/","height":205},"executionInfo":{"status":"ok","timestamp":1635012321185,"user_tz":-330,"elapsed":524,"user":{"displayName":"Desu Venkata Manikanta","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"10818416874497828882"}},"outputId":"50a85b1a-cfee-4f7b-9ac1-5134a16822e9"},"source":["df.head()"],"id":"1ca9aba0","execution_count":4,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
PriceHostel No.OccupancyRoom SizeFloor
02540.0316868
12900.0329665
2NaN317888
32362.0329242
4NaN3210985
\n","
"],"text/plain":[" Price Hostel No. Occupancy Room Size Floor\n","0 2540.0 3 1 686 8\n","1 2900.0 3 2 966 5\n","2 NaN 3 1 788 8\n","3 2362.0 3 2 924 2\n","4 NaN 3 2 1098 5"]},"metadata":{},"execution_count":4}]},{"cell_type":"markdown","metadata":{"id":"af08f245"},"source":["# Visualizing and Cleaning the data\n","\n","We will now be removing the nan values and identical values from the dataset\n","\n","For seeing if there are nan values in the dataset we will use the isna() function and then to remove them we will use the dropna() function. We will need to set additional parameters like rows and columns in the dropna function depending on the number of nan values present for each column\n","\n","Using the sum() function with isna() function we can get to know the number of missing values in each column"],"id":"af08f245"},{"cell_type":"code","metadata":{"id":"2fd4babb","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1635012323753,"user_tz":-330,"elapsed":428,"user":{"displayName":"Desu Venkata Manikanta","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"10818416874497828882"}},"outputId":"dd94b5ef-188f-4c3a-aec4-fe91cdc6a86d"},"source":["df.isna().sum()"],"id":"2fd4babb","execution_count":5,"outputs":[{"output_type":"execute_result","data":{"text/plain":["Price 1531\n","Hostel No. 0\n","Occupancy 0\n","Room Size 0\n","Floor 0\n","dtype: int64"]},"metadata":{},"execution_count":5}]},{"cell_type":"markdown","metadata":{"id":"83ef03c3"},"source":["After this we will proceed to remove the nan values \n","\n","Since there are not many nan values in the column 'Price' as compared to the number of rows we will remove the rows which have nan values. \n","\n","Reseting the index after removing the nan values and dropping the old index will also be important"],"id":"83ef03c3"},{"cell_type":"code","metadata":{"id":"b65e4503","executionInfo":{"status":"ok","timestamp":1635012326744,"user_tz":-330,"elapsed":459,"user":{"displayName":"Desu Venkata Manikanta","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"10818416874497828882"}}},"source":["df = df.dropna(subset = ['Price'],how= 'any')\n","df = df.reset_index(drop = True)\n","## df.isna().sum()"],"id":"b65e4503","execution_count":6,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"40784889"},"source":["Now we can use the drop_duplicate function to remove the duplicate values\n","\n","This function has a parameter calle 'keep' where we specifiy to drop and which value to keep\n","\n","For this excercise we will keep the first values and drop the rest of the duplicates"],"id":"40784889"},{"cell_type":"code","metadata":{"id":"75fa3dc8","executionInfo":{"status":"ok","timestamp":1635012329999,"user_tz":-330,"elapsed":425,"user":{"displayName":"Desu Venkata Manikanta","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"10818416874497828882"}}},"source":["df = df.drop_duplicates(keep = 'first')\n","df = df.reset_index(drop = True)\n","## df.duplicated().sum()"],"id":"75fa3dc8","execution_count":7,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"a007a33f"},"source":["For visualizing the data we will first start with looking at the distribution of different columns to see if there are enough number for each category in every column and dropping them if the data is biased for one category more than the other"],"id":"a007a33f"},{"cell_type":"code","metadata":{"id":"b325df62","colab":{"base_uri":"https://localhost:8080/","height":1000},"executionInfo":{"status":"ok","timestamp":1635012366549,"user_tz":-330,"elapsed":1239,"user":{"displayName":"Desu Venkata Manikanta","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"10818416874497828882"}},"outputId":"cf878a76-66d3-4e68-f009-819d724a4eae"},"source":["columns = df.columns\n","for column in columns:\n"," if(column== 'Price' or column=='Room Size'): \n"," continue\n"," fig = plt.figure(figsize=(5,5))\n"," ax = fig.gca()\n"," counts = df[column].value_counts()\n"," counts.plot.bar(ax = ax, color='blue')\n"," ax.set_title('No of rooms '+ column)\n"," ax.set_xlabel(column)\n"," ax.set_ylabel(\"No of rooms\")\n"," plt.show()"],"id":"b325df62","execution_count":9,"outputs":[{"output_type":"display_data","data":{"image/png":"\n","text/plain":["
"]},"metadata":{"needs_background":"light"}},{"output_type":"display_data","data":{"image/png":"\n","text/plain":["
"]},"metadata":{"needs_background":"light"}},{"output_type":"display_data","data":{"image/png":"\n","text/plain":["
"]},"metadata":{"needs_background":"light"}}]},{"cell_type":"markdown","metadata":{"id":"9811a731"},"source":["We can clearly notice that for the Occupancy column the (occupancy) = 4 has a really low set of data points as compared to others. Hence we can proceed in dropping those rows where the occupancy is 4"],"id":"9811a731"},{"cell_type":"code","metadata":{"id":"825783c0","executionInfo":{"status":"ok","timestamp":1635012377084,"user_tz":-330,"elapsed":615,"user":{"displayName":"Desu Venkata Manikanta","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"10818416874497828882"}}},"source":["df = df[df['Occupancy'] != 4]\n","df = df.reset_index(drop= True)"],"id":"825783c0","execution_count":10,"outputs":[]},{"cell_type":"code","metadata":{"id":"30c64310","colab":{"base_uri":"https://localhost:8080/","height":205},"executionInfo":{"status":"ok","timestamp":1635012378828,"user_tz":-330,"elapsed":9,"user":{"displayName":"Desu Venkata Manikanta","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"10818416874497828882"}},"outputId":"bbb5d4dc-f7e5-4b8e-a248-6ecbd09568da"},"source":["df.head()"],"id":"30c64310","execution_count":11,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
PriceHostel No.OccupancyRoom SizeFloor
02540.0316868
12900.0329665
22362.0329242
31432.0217063
41702.02210383
\n","
"],"text/plain":[" Price Hostel No. Occupancy Room Size Floor\n","0 2540.0 3 1 686 8\n","1 2900.0 3 2 966 5\n","2 2362.0 3 2 924 2\n","3 1432.0 2 1 706 3\n","4 1702.0 2 2 1038 3"]},"metadata":{},"execution_count":11}]},{"cell_type":"markdown","metadata":{"id":"f333875b"},"source":["We will now write the columns between categorical and numerical\n","\n","categorical = Hostel No, occupancy, floor\n","\n","Numerical = price, occupancy, roomsize, floor, hostel No.\n","\n","Remember that we can treat Hostel Number and occupancy as numerical or categorical. For this notebook we will treat them as categorical for data visualization and numerical for the regression"],"id":"f333875b"},{"cell_type":"markdown","metadata":{"id":"0f34ca6a"},"source":["We will also plot the scatter plots and the correlation map to analyse the relation ships between different numerical columns"],"id":"0f34ca6a"},{"cell_type":"code","metadata":{"scrolled":false,"id":"f4a3ab6e","executionInfo":{"status":"ok","timestamp":1635012383025,"user_tz":-330,"elapsed":517,"user":{"displayName":"Desu Venkata Manikanta","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"10818416874497828882"}}},"source":["categorical = ['Hostel No.', 'Occupancy', 'Floor']\n","numerical = [ 'Price', 'Room Size']"],"id":"f4a3ab6e","execution_count":12,"outputs":[]},{"cell_type":"code","metadata":{"id":"df2b588a","colab":{"base_uri":"https://localhost:8080/","height":791},"executionInfo":{"status":"ok","timestamp":1635012386787,"user_tz":-330,"elapsed":1202,"user":{"displayName":"Desu Venkata Manikanta","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"10818416874497828882"}},"outputId":"c61015b6-8eb0-4d49-cf99-2e50e4e7bd51"},"source":["for column1 in numerical:\n"," for column2 in numerical:\n"," if(column1 != column2):\n"," fig = plt.figure(figsize=(6,6))\n"," ax = fig.gca()\n"," df.plot.scatter(x=column1,y=column2,ax = ax)\n"," ax.set_title('Scatter plot of '+ column1 + ' vs ' + column2)\n"," ax.set_xlabel(column1)\n"," ax.set_ylabel(column2)\n"," plt.show()"],"id":"df2b588a","execution_count":13,"outputs":[{"output_type":"display_data","data":{"image/png":"\n","text/plain":["
"]},"metadata":{"needs_background":"light"}},{"output_type":"display_data","data":{"image/png":"\n","text/plain":["
"]},"metadata":{"needs_background":"light"}}]},{"cell_type":"markdown","metadata":{"id":"625b89a8"},"source":["We can notice that there are no linear relation present between the numerical columns. Hence no need to drop anything"],"id":"625b89a8"},{"cell_type":"markdown","metadata":{"id":"8a7f95fa"},"source":["Now we will plot box plots of categorical and numerical columns to get more information about the number of outliers and the distrubtion."],"id":"8a7f95fa"},{"cell_type":"code","metadata":{"id":"e73f0769","colab":{"base_uri":"https://localhost:8080/","height":1000},"executionInfo":{"status":"ok","timestamp":1635012395663,"user_tz":-330,"elapsed":1890,"user":{"displayName":"Desu Venkata Manikanta","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"10818416874497828882"}},"outputId":"ed580d37-65dc-4b57-867d-df02ac8fdeea"},"source":["for c in categorical:\n"," for n in numerical:\n"," sns.set_style(\"whitegrid\")\n"," sns.boxplot(x= c, y= n, data=df)\n"," plt.xlabel(c)\n"," plt.ylabel(n)\n"," plt.show()\n"," "],"id":"e73f0769","execution_count":14,"outputs":[{"output_type":"display_data","data":{"image/png":"\n","text/plain":["
"]},"metadata":{}},{"output_type":"display_data","data":{"image/png":"\n","text/plain":["
"]},"metadata":{}},{"output_type":"display_data","data":{"image/png":"\n","text/plain":["
"]},"metadata":{}},{"output_type":"display_data","data":{"image/png":"\n","text/plain":["
"]},"metadata":{}},{"output_type":"display_data","data":{"image/png":"\n","text/plain":["
"]},"metadata":{}},{"output_type":"display_data","data":{"image/png":"\n","text/plain":["
"]},"metadata":{}}]},{"cell_type":"markdown","metadata":{"id":"a74d69fd"},"source":["Now that we have analysed our data we can proceed to normalixing our data and regression"],"id":"a74d69fd"},{"cell_type":"markdown","metadata":{"id":"77d8c6ba"},"source":["### Importing useful libraries \n"],"id":"77d8c6ba"},{"cell_type":"code","metadata":{"id":"fffac537","executionInfo":{"status":"ok","timestamp":1635012398317,"user_tz":-330,"elapsed":2,"user":{"displayName":"Desu Venkata Manikanta","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"10818416874497828882"}}},"source":["# This Python 3 environment comes with many helpful analytics libraries installed\n","# For example, here's several helpful packages to load in\n","import numpy as np # linear algebra\n","import matplotlib.pyplot as plt # data visualization\n","import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)"],"id":"fffac537","execution_count":15,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"04cd7e4e"},"source":["### Loading the dataset \n","#### For implementation we will be using house prediction dataset . The dataset can be found [here](https://github.com/vrinda01go/Hellofoss/blob/main/Insti_data.csv)"],"id":"04cd7e4e"},{"cell_type":"code","metadata":{"id":"4ac8e74b","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1635012403815,"user_tz":-330,"elapsed":517,"user":{"displayName":"Desu Venkata Manikanta","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"10818416874497828882"}},"outputId":"19c614d4-a1b3-46ba-80e7-9500e5fde8c1"},"source":["data = np.array(df, dtype=float)\n","data"],"id":"4ac8e74b","execution_count":16,"outputs":[{"output_type":"execute_result","data":{"text/plain":["array([[2.540e+03, 3.000e+00, 1.000e+00, 6.860e+02, 8.000e+00],\n"," [2.900e+03, 3.000e+00, 2.000e+00, 9.660e+02, 5.000e+00],\n"," [2.362e+03, 3.000e+00, 2.000e+00, 9.240e+02, 2.000e+00],\n"," ...,\n"," [1.020e+03, 3.000e+00, 2.000e+00, 1.006e+03, 3.000e+00],\n"," [2.400e+03, 2.000e+00, 2.000e+00, 9.380e+02, 1.000e+00],\n"," [9.500e+02, 3.000e+00, 2.000e+00, 1.053e+03, 2.000e+00]])"]},"metadata":{},"execution_count":16}]},{"cell_type":"markdown","metadata":{"id":"627a5a3a"},"source":["#### Since our dataset has four features i.e Hostel No. , Occupancy, Room Size and Floor ,our hypothesis function becomes\n","### hθ(x) = θ0 + θ1x1 + θ2x2 +θ3x3 + θ4x4\n","#### where x1 ,x2,x3 and x4 are the two features (i.e. size of house and number of rooms)"],"id":"627a5a3a"},{"cell_type":"markdown","metadata":{"id":"58fdf0fe"},"source":["### So Your task is to define hypothesis function having 4 features and a corresponding cost function "],"id":"58fdf0fe"},{"cell_type":"code","metadata":{"id":"34dc5643"},"source":[" # define and complete hypothesis function "],"id":"34dc5643","execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"6fae141b"},"source":["# define and complete cost function"],"id":"6fae141b","execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"178eb11f"},"source":["### Gradient Descent \n","#### So we have our hypothesis function and we have a way of measuring how well it fits into the data. Now we need to estimate the parameters in the hypothesis function. That's where gradient descent comes in.\n","### Your next task is to define gradient descent function having some specific value of learning rate and number of epochs.\n","#### Note that learning rate should be neither very high nor very low .Why?\n","#### Check out exact reason [here](https://towardsdatascience.com/understanding-learning-rates-and-how-it-improves-performance-in-deep-learning-d0d4059c1c10)\n","\n"],"id":"178eb11f"},{"cell_type":"code","metadata":{"id":"167b5bdc"},"source":["#define and complete Gradient Descent function "],"id":"167b5bdc","execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"724e407a"},"source":["### Now we want to visualize how our cost function varies with number of epochs .So your next task is to plot graph of updated costs vs number of epochs "],"id":"724e407a"},{"cell_type":"markdown","metadata":{"id":"e82f6ebf"},"source":["#### After plotting above graph you will notice that your cost function decreases with epochs.\n","#### Perfect! This is all what we wanted to seek by doing linear regression. \n","\n","#### Now it's time to test our model on some test data. \n","\n","#### For this you will define a test function that will take as input Hostel No. , Occupancy, Room Size , Floor and the final theta vector that was returned by our linear regression model and will give us the price of the house. Compute it for any set of features given and final value of theta as given by gradient descent function"],"id":"e82f6ebf"},{"cell_type":"code","metadata":{"id":"c522bca3"},"source":["# define and complete test function that will take required inputs .This function should return price of Room "],"id":"c522bca3","execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"947f35b1"},"source":["#### Now since we have defined all required functions , we can call functions one by one and get our final results .\n","#### Your final task is to use all functions defined above and predict the price of room for some input combinations to check how well your model works."],"id":"947f35b1"},{"cell_type":"markdown","metadata":{"id":"06e54ef5"},"source":["#### You can try playing with different values of alpha and epochs and see which combination gives most accurate results but do lookout for overfitting \n"],"id":"06e54ef5"},{"cell_type":"code","metadata":{"id":"3fc631fd"},"source":[""],"id":"3fc631fd","execution_count":null,"outputs":[]}]} \ No newline at end of file From 8e8e0350effd10948ca7deef9795019ad65abf3c Mon Sep 17 00:00:00 2001 From: Mani9550 <93199718+Mani9550@users.noreply.github.com> Date: Fri, 29 Oct 2021 00:11:21 +0530 Subject: [PATCH 04/11] Delete KNN_Task4 _203174002.ipynb --- KNN_Task4 _203174002.ipynb | 1 - 1 file changed, 1 deletion(-) delete mode 100644 KNN_Task4 _203174002.ipynb diff --git a/KNN_Task4 _203174002.ipynb b/KNN_Task4 _203174002.ipynb deleted file mode 100644 index 5dc9eee..0000000 --- a/KNN_Task4 _203174002.ipynb +++ /dev/null @@ -1 +0,0 @@ -{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"KNN_Task4 _203174002.ipynb","provenance":[]},"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.8.8"}},"cells":[{"cell_type":"markdown","metadata":{"id":"cPP7BfqFSgyH"},"source":["# K-Nearest Neighbors Algorithm\n"]},{"cell_type":"markdown","metadata":{"id":"Zd0p7ZUpSgyL"},"source":["![example 1.png]()In this Jupyter Notebook we will focus on $KNN-Algorithm$. KNN is a data classification algorithm that attempts to determine what group a data point is in by looking at the data points around it.\n","\n","An algorithm, looking at one point on a grid, trying to determine if a point is in group A or B, looks at the states of the points that are near it. The range is arbitrarily determined, but the point is to take a sample of the data. If the majority of the points are in group A, then it is likely that the data point in question will be A rather than B, and vice versa.\n","
\n","\n","\n"]},{"cell_type":"markdown","metadata":{"id":"TyGHDf4NSgyM"},"source":["# Imports"]},{"cell_type":"code","metadata":{"id":"iIEvA0xjSgyN","executionInfo":{"status":"ok","timestamp":1635250818719,"user_tz":-330,"elapsed":418,"user":{"displayName":"Desu Venkata Manikanta","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"10818416874497828882"}}},"source":["import numpy as np\n","from tqdm import tqdm_notebook"],"execution_count":5,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"rc8ruF56SgyO"},"source":["# How it works?\n","\n","We have some labeled data set $X-train$, and a new set $X$ that we want to classify based on previous classifications\n","\n"]},{"cell_type":"markdown","metadata":{"id":"rGbvEXbvSgyO"},"source":["## Seps"]},{"cell_type":"markdown","metadata":{"id":"B-nf9G4ZSgyP"},"source":["### 1. Calculate distance to all neighbours\n","### 2. Sort neightbours (based on closest distance)\n","### 3. Count possibilities of each class for k nearest neighbours \n","### 4. The class with highest possibilty is Your prediction"]},{"cell_type":"markdown","metadata":{"id":"LuWwKdFrSgyP"},"source":["# 1. Calculate distance to all neighbours\n","\n","Depending on the problem You should use different type of count distance method.\n","
\n","For example we can use Euclidean distance. Euclidean distance is the \"ordinary\" straight-line distance between two points in D-Dimensional space\n","\n","#### Definiton\n","$d(p, q) = d(q, p) = \\sqrt{(q_1 - p_1)^2 + (q_2 - p_2)^2 + \\dots + (q_D - p_D)^2} = \\sum_{d=1}^{D} (p_d - q_d)^2$\n","\n","#### Example\n","Distance in $R^2$\n","\n","\n","\n","$p = (4,6)$\n","
\n","$q = (1,2)$\n","
\n","$d(p, q) = \\sqrt{(1-4)^2 + (2-6)^2} =\\sqrt{9 + 16} = \\sqrt{25} = 5 $\n","\n"]},{"cell_type":"markdown","metadata":{"id":"vlvNZqiJSgyQ"},"source":["## Code"]},{"cell_type":"code","metadata":{"id":"EvmQi6nsSgyR","executionInfo":{"status":"ok","timestamp":1635250723579,"user_tz":-330,"elapsed":1651,"user":{"displayName":"Desu Venkata Manikanta","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"10818416874497828882"}}},"source":["def get_euclidean_distance(A_matrix, B_matrix):\n"," \n"," C = [ [ 0 for i in range(np.size(B_matrix, 0)) ] for j in range(np.size(A_matrix, 0)) ]\n"," \n"," for i in range (0, np.size(A_matrix, 0)):\n"," row1 = A_matrix[i,:]\n"," for j in range (0, np.size(B_matrix, 0)):\n"," row2 = B_matrix[j,:]\n"," \n"," C[i][j] = np.sum(np.square(row1 - row2))\n"," \n"," ## Use the distance formula for the matrices using numpy functions\n"," ## C is the sum of the squares of the distances\n","\n"," return np.sqrt(C)\n"],"execution_count":1,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"GABzTa_0SgyS"},"source":["## Example Usage"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"W6b8yBSoSgyS","executionInfo":{"status":"ok","timestamp":1635250824471,"user_tz":-330,"elapsed":411,"user":{"displayName":"Desu Venkata Manikanta","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"10818416874497828882"}},"outputId":"966f10e6-5429-4f7e-e70f-50df76e1b2ca"},"source":["X = np.array([[1,2,3] , [-4,5,-6]])\n","\n","X_train = np.array([[0,0,0], [1,2,3], [4,5,6], [-4, 4, -6]])\n","\n","print(\"X: {} Exaples in {} Dimensional space\".format(*X.shape))\n","print(\"X_train: {} Exaples in {} Dimensional space\".format(*X_train.shape))\n","\n","\n","print()\n","\n","print(\"X:\")\n","print(X)\n","\n","print()\n","\n","print(\"X_train\")\n","print(X_train)\n"],"execution_count":6,"outputs":[{"output_type":"stream","name":"stdout","text":["X: 2 Exaples in 3 Dimensional space\n","X_train: 4 Exaples in 3 Dimensional space\n","\n","X:\n","[[ 1 2 3]\n"," [-4 5 -6]]\n","\n","X_train\n","[[ 0 0 0]\n"," [ 1 2 3]\n"," [ 4 5 6]\n"," [-4 4 -6]]\n"]}]},{"cell_type":"code","metadata":{"id":"kB8IZcDpSgyT","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1635250828586,"user_tz":-330,"elapsed":423,"user":{"displayName":"Desu Venkata Manikanta","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"10818416874497828882"}},"outputId":"e48c7a0f-c233-44a7-baa1-66354fc59437"},"source":["## Initialize the distance matrix using the get_euclidean_matrix\n","\n","C = get_euclidean_distance(X, X_train)\n","\n","## Euclidean distance b/w row i of X and row j of X_train is available as C[i][j]\n","\n","\n","## Print Distance between first example from X and first form X_train\n","print(f\"Distance between first example from X and first form X_train {C[0,0]}\")"],"execution_count":7,"outputs":[{"output_type":"stream","name":"stdout","text":["Distance between first example from X and first form X_train 3.7416573867739413\n"]}]},{"cell_type":"markdown","metadata":{"id":"vbaJfBihSgyT"},"source":["# 2. Sort neightbours\n","\n","In order to find best fitting class for our observations we need to find to which classes belong observation neightbours and then to sort classes based on the closest distance\n"]},{"cell_type":"markdown","metadata":{"id":"b1VLHUj2SgyU"},"source":["## Code"]},{"cell_type":"code","metadata":{"id":"na0G1o_ASgyU"},"source":["def get_sorted_train_labels(distance_matrix, y):\n"," \"\"\"\n"," Function sorts y labels, based on probabilities from distances matrix\n"," Args:\n"," distance_matrix (numpy.ndarray): Distance Matrix, between points from X and X_train, size: N1:N2\n"," y (numpy.ndarray): vector of classes of X points, size: N1\n","\n"," Returns:\n"," numpy.ndarray: labels matrix sorted according to distances to nearest neightours, size N1:N2 \n","\n"," \"\"\"\n"," \n"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"U0I8eltDSgyV"},"source":["# 3. Count possibilities of each class for k nearest neighbours \n","\n","In order to find best class for our observation $x$ we need to calculate the probability of belonging to each class. In our case it is quite easy. We need just to count how many from k-nearest-neighbours of observation $x$ belong to each class and then devide it by k \n","

\n","$p(y=class \\space| x) = \\frac{\\sum_{1}^{k}(1 \\space if \\space N_i = class, \\space else \\space 0) }{k}$ Where $N_i$ is $i$ nearest neightbour\n","\n"]},{"cell_type":"markdown","metadata":{"id":"j0ZtOC38SgyV"},"source":["## Code"]},{"cell_type":"code","metadata":{"id":"y2aaG2GdSgyV"},"source":["def get_p_y_x_using_knn(y, k):\n"," \"\"\"\n"," The function determines the probability distribution p (y | x)\n"," for each of the labels for objects from the X\n"," using the KNN classification learned on the X_train\n","\n"," Args:\n"," y (numpy.ndarray): Sorted matrix of N2 nearest neighbours labels, size N1:N2\n"," k (int): number of nearest neighbours for KNN algorithm\n","\n"," Returns: numpy.ndarray: Matrix of probabilities for N1 points (from set X) of belonging to each class,\n"," size N1:C (where C is number of classes)\n"," \"\"\"\n","\n"," ## Write your code here\n","\n"," return probabilities_matrix\n"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"ThEbAnXISgyW"},"source":["# 4. The class with highest possibilty is Your prediction"]},{"cell_type":"markdown","metadata":{"id":"_i7NTtN4SgyW"},"source":["At the end we combine all previous steps to get prediction"]},{"cell_type":"markdown","metadata":{"id":"OzK6rY8mSgyW"},"source":["## Code"]},{"cell_type":"code","metadata":{"id":"DaYqr_i6SgyW","executionInfo":{"status":"ok","timestamp":1635250858682,"user_tz":-330,"elapsed":471,"user":{"displayName":"Desu Venkata Manikanta","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"10818416874497828882"}}},"source":["def predict(X, X_train, y_train, k, distance_function):\n"," \"\"\"\n"," Function returns predictions for new set X based on labels of points from X_train\n"," Args:\n"," X (numpy.ndarray): set of observations (points) that we want to label\n"," X_train (numpy.ndarray): set of lalabeld bservations (points)\n"," y_train (numpy.ndarray): labels for X_train\n"," k (int): number of nearest neighbours for KNN algorithm\n","\n"," Returns:\n"," (numpy.ndarray): label predictions for points from set X\n"," \"\"\"\n"," ## Write your code here\n","\n"," return prediction"],"execution_count":9,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"i9kzyASWSgyX"},"source":["# Accuracy"]},{"cell_type":"markdown","metadata":{"id":"v8bNPTPZSgyX"},"source":["To find how good our knn model works we should count accuracy"]},{"cell_type":"markdown","metadata":{"id":"dgFCnJ14SgyX"},"source":["## Code"]},{"cell_type":"code","metadata":{"id":"2ySpyThlSgyX"},"source":["def count_accuracy(prediction, y_true):\n"," \"\"\"\n"," Returns:\n"," float: Predictions accuracy\n","\n"," \"\"\"\n"," N1 = prediction.shape[0]\n"," \n"," ## Use np.sum to count the number of elements where predicted value == actual value and assign the count to the variable accuracy\n","\n"," return accuracy"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"b5g7YFY2SgyX"},"source":["## Example usage"]},{"cell_type":"code","metadata":{"id":"uLqCqmJNSgyY","colab":{"base_uri":"https://localhost:8080/","height":235},"executionInfo":{"status":"error","timestamp":1635250842268,"user_tz":-330,"elapsed":449,"user":{"displayName":"Desu Venkata Manikanta","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"10818416874497828882"}},"outputId":"dcf624ee-b959-4577-b370-464112163929"},"source":["y_true = np.array([[0, 2]])\n","\n","predicton = predict(X, X_train, y_train, 3, get_euclidean_distance)\n","\n","\n","print(\"True classes:{}, accuracy {}%\".format(y_true, count_accuracy(predicton, y_true) * 100))"],"execution_count":8,"outputs":[{"output_type":"error","ename":"NameError","evalue":"ignored","traceback":["\u001b[0;31m---------------------------------------------------------------------------\u001b[0m","\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)","\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0my_true\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mpredicton\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m3\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mget_euclidean_distance\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;31mNameError\u001b[0m: name 'predict' is not defined"]}]},{"cell_type":"markdown","metadata":{"id":"--WUpIcxSgyY"},"source":["# Find best k"]},{"cell_type":"markdown","metadata":{"id":"itkcD0DlSgyY"},"source":["Best k parameter is that one for which we have highest accuracy"]},{"cell_type":"markdown","metadata":{"id":"7GYEUBnnSgyY"},"source":["## Code"]},{"cell_type":"code","metadata":{"id":"Q6OhNBOoSgyY","executionInfo":{"status":"ok","timestamp":1635250862606,"user_tz":-330,"elapsed":413,"user":{"displayName":"Desu Venkata Manikanta","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"10818416874497828882"}}},"source":["def select_knn_model(X_validation, y_validation, X_train, y_train, k_values, distance_function):\n"," \"\"\"\n"," Function returns k parameter that best fit Xval points\n"," Args:\n"," Xval (numpy.ndarray): set of Validation Data, size N1:D\n"," Xtrain (numpy.ndarray): set of Training Data, size N2:D\n"," yval (numpy.ndarray): set of labels for Validation data, size N1:1\n"," ytrain (numpy.ndarray): set of labels for Training Data, size N2:1\n"," k_values (list): list of int values of k parameter that should be checked\n","\n"," Returns:\n"," int: k paprameter that best fit validation set\n"," \"\"\"\n","\n"," accuracies = []\n","\n"," for k in tqdm_notebook(k_values):\n"," prediction = predict(X_validation, X_train, y_train, k, distance_function)\n","\n"," accuracy = count_accuracy(prediction, y_validation)\n"," accuracies.append(accuracy)\n","\n"," best_k = k_values[accuracies.index(max(accuracies))]\n","\n"," return best_k, accuracies\n"],"execution_count":10,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"nGtIjD0WSgyY"},"source":["# Real World Example - Iris Dataset"]},{"cell_type":"markdown","metadata":{"id":"-o6MHMtKSgyZ"},"source":["\n","\n","\n","\n","This is perhaps the best known database to be found in the pattern recognition literature. The data set contains 3 classes of 50 instances each, where each class refers to a type of iris plant. One class is linearly separable from the other 2; the latter are NOT linearly separable from each other. \n","\n","Each example contains 4 attributes\n","1. sepal length in cm \n","2. sepal width in cm \n","3. petal length in cm \n","4. petal width in cm \n","\n","Predicted attribute: class of iris plant. \n","\n","\n","\n","\n","\n","\n"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"SY8oOngQSgyZ","executionInfo":{"status":"ok","timestamp":1635250867474,"user_tz":-330,"elapsed":414,"user":{"displayName":"Desu Venkata Manikanta","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"10818416874497828882"}},"outputId":"270c2090-4cc4-43c8-dd20-5ed2670e0067"},"source":["from sklearn import datasets\n","import matplotlib.pyplot as plt\n","\n","iris = datasets.load_iris()\n","\n","iris_X = iris.data\n","iris_y = iris.target\n","\n","print(\"Iris: {} examples in {} dimensional space\".format(*iris_X.shape))\n","print(\"First example in dataset :\\n Speal lenght: {}cm \\n Speal width: {}cm \\n Petal length: {}cm \\n Petal width: {}cm\".format(*iris_X[0]))\n","\n","print(\"Avalible classes\", np.unique(iris_y))"],"execution_count":11,"outputs":[{"output_type":"stream","name":"stdout","text":["Iris: 150 examples in 4 dimensional space\n","First example in dataset :\n"," Speal lenght: 5.1cm \n"," Speal width: 3.5cm \n"," Petal length: 1.4cm \n"," Petal width: 0.2cm\n","Avalible classes [0 1 2]\n"]}]},{"cell_type":"markdown","metadata":{"id":"-IlKSX7hSgyZ"},"source":["## Prepare Data\n","\n","In our data set we have 150 examples (50 examples of each class), we have to divide it into 3 datasets.\n","1. Training data set, 90 examples. It will be used to find k - nearest neightbours\n","2. Validation data set, 30 examples. It will be used to find best k parameter, the one for which accuracy is highest\n","3. Test data set, 30 examples. It will be used to check how good our model performs\n","\n","Data has to be shuffled (mixed in random order), because originally it is stored 50 examples of class 0, 50 of 1 and 50 of 2.\n"]},{"cell_type":"code","metadata":{"id":"RA1Q7kCPSgyZ","executionInfo":{"status":"ok","timestamp":1635250871691,"user_tz":-330,"elapsed":418,"user":{"displayName":"Desu Venkata Manikanta","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"10818416874497828882"}}},"source":["from sklearn.utils import shuffle\n","\n","iris_X, iris_y = shuffle(iris_X, iris_y, random_state=134)\n","\n","\n","test_size = 30\n","validation_size = 30\n","training_size = 90\n","\n","## Initialize X_test\n","## Initialize X_validation \n","## Initialize X_train \n","\n","## Initialize y_test\n","## Initialize y_validation\n","## Initialize y_train"],"execution_count":12,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"r9xJVLzrSgyZ"},"source":["## Find best k parameter"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":235},"id":"hbvZBVNBSgya","executionInfo":{"status":"error","timestamp":1635250875803,"user_tz":-330,"elapsed":430,"user":{"displayName":"Desu Venkata Manikanta","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"10818416874497828882"}},"outputId":"f62b15f9-7fca-4789-bac2-2db5cbbcd8c0"},"source":["k_values = [i for i in range(3,50)]\n","\n","best_k, accuracies = select_knn_model(X_validation, y_validation, X_train, y_train, k_values, distance_function=get_euclidean_distance)\n","\n","## Plot accuracy vs k values graph"],"execution_count":13,"outputs":[{"output_type":"error","ename":"NameError","evalue":"ignored","traceback":["\u001b[0;31m---------------------------------------------------------------------------\u001b[0m","\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)","\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0mk_values\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m50\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mbest_k\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maccuracies\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mselect_knn_model\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_validation\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_validation\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mk_values\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdistance_function\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mget_euclidean_distance\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;31m## Plot accuracy vs k values graph\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;31mNameError\u001b[0m: name 'X_validation' is not defined"]}]},{"cell_type":"markdown","metadata":{"id":"BjQBDWJMSgya"},"source":["## Count accuracy for training set"]},{"cell_type":"code","metadata":{"id":"_f-J5sSESgya","colab":{"base_uri":"https://localhost:8080/","height":201},"executionInfo":{"status":"error","timestamp":1635250882340,"user_tz":-330,"elapsed":434,"user":{"displayName":"Desu Venkata Manikanta","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"10818416874497828882"}},"outputId":"d0dfb811-a65e-472c-bdc7-fad839ccc488"},"source":["prediction = predict(X_test, X_train, y_train, best_k, get_euclidean_distance)\n","\n","## Calculate Best accuracy using the best k value\n"],"execution_count":14,"outputs":[{"output_type":"error","ename":"NameError","evalue":"ignored","traceback":["\u001b[0;31m---------------------------------------------------------------------------\u001b[0m","\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)","\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mprediction\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_test\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbest_k\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mget_euclidean_distance\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;31m## Calculate Best accuracy using the best k value\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;31mNameError\u001b[0m: name 'X_test' is not defined"]}]},{"cell_type":"markdown","metadata":{"id":"72O5eXbCSgyc"},"source":["# Sources\n","\n","https://en.wikipedia.org/wiki/K-nearest_neighbors_algorithm - first visualisation image\n","\n","https://en.wikipedia.org/wiki/Euclidean_distance - euclidean distance visualisation\n","\n","https://rajritvikblog.wordpress.com/2017/06/29/iris-dataset-analysis-python/ - first iris image\n","\n","https://rpubs.com/wjholst/322258 - second iris image\n","\n"]}]} \ No newline at end of file From c2045dbcb1f52241082186a4de77d69394026e77 Mon Sep 17 00:00:00 2001 From: Mani9550 <93199718+Mani9550@users.noreply.github.com> Date: Fri, 29 Oct 2021 00:11:34 +0530 Subject: [PATCH 05/11] Delete Classification_Task3_203174002.ipynb --- Classification_Task3_203174002.ipynb | 1 - 1 file changed, 1 deletion(-) delete mode 100644 Classification_Task3_203174002.ipynb diff --git a/Classification_Task3_203174002.ipynb b/Classification_Task3_203174002.ipynb deleted file mode 100644 index 1d8640a..0000000 --- a/Classification_Task3_203174002.ipynb +++ /dev/null @@ -1 +0,0 @@ -{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"Classification_Task3_203174002.ipynb","provenance":[],"collapsed_sections":[]},"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.8.8"}},"cells":[{"cell_type":"markdown","metadata":{"id":"7C5rAxwPGDQf"},"source":["# Importing useful libraries"]},{"cell_type":"code","metadata":{"id":"8qvrslgsF4Mn"},"source":["import numpy as np\n","import pandas as pd \n","from pandas import Series, DataFrame\n","\n","import seaborn as sns\n","import matplotlib.pyplot as plt\n","%matplotlib inline"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"6aYOLI2BHF6m"},"source":["## Loading the dataset.\n","The dataset can be found [here](https://github.com/shreedharmalpani/Intro-To-ML-Hello-FOSS/blob/main/iris.csv)"]},{"cell_type":"code","metadata":{"id":"_hccks2pF4Mq"},"source":["df = pd.read_csv(\"iris.csv\")"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"um0L09IOF4Ms","colab":{"base_uri":"https://localhost:8080/","height":205},"executionInfo":{"status":"ok","timestamp":1635010648975,"user_tz":-330,"elapsed":513,"user":{"displayName":"Desu Venkata Manikanta","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"10818416874497828882"}},"outputId":"5adfbd69-3916-4a7d-9b21-496cfc4ab498"},"source":["df.head()"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sepal_lengthsepal_widthpetal_lengthpetal_widthspecies
05.13.51.40.2setosa
14.93.01.40.2setosa
24.73.21.30.2setosa
34.63.11.50.2setosa
45.03.61.40.2setosa
\n","
"],"text/plain":[" sepal_length sepal_width petal_length petal_width species\n","0 5.1 3.5 1.4 0.2 setosa\n","1 4.9 3.0 1.4 0.2 setosa\n","2 4.7 3.2 1.3 0.2 setosa\n","3 4.6 3.1 1.5 0.2 setosa\n","4 5.0 3.6 1.4 0.2 setosa"]},"metadata":{},"execution_count":5}]},{"cell_type":"code","metadata":{"id":"cbx8gP4zF4Mt","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1635010651537,"user_tz":-330,"elapsed":11,"user":{"displayName":"Desu Venkata Manikanta","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"10818416874497828882"}},"outputId":"6d8d789e-43c0-499a-9f8d-7f501f9b58bc"},"source":["df.info() "],"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["\n","RangeIndex: 150 entries, 0 to 149\n","Data columns (total 5 columns):\n"," # Column Non-Null Count Dtype \n","--- ------ -------------- ----- \n"," 0 sepal_length 150 non-null float64\n"," 1 sepal_width 150 non-null float64\n"," 2 petal_length 150 non-null float64\n"," 3 petal_width 150 non-null float64\n"," 4 species 150 non-null object \n","dtypes: float64(4), object(1)\n","memory usage: 6.0+ KB\n"]}]},{"cell_type":"markdown","metadata":{"id":"vjH1pAqoJna2"},"source":["# Data Cleaning & Data Visualization"]},{"cell_type":"markdown","metadata":{"id":"PNfSyZF1F4Mu"},"source":["### 1) Remove unneeded columns\n","### 2) Check for duplicate rows \n","### 2) Check for rows with missing values\n"]},{"cell_type":"code","metadata":{"collapsed":true,"id":"HKFsVhubF4Mx"},"source":["df.isna().sum()"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"Rm7rlDcPR0aI"},"source":["df.isnull()"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"L1cnZS5XF4M2"},"source":["### EDA (Exploratory Data Analysis) with Iris"]},{"cell_type":"code","metadata":{"id":"DjsnhAraF4M7","colab":{"base_uri":"https://localhost:8080/","height":458},"executionInfo":{"status":"ok","timestamp":1635011702579,"user_tz":-330,"elapsed":523,"user":{"displayName":"Desu Venkata Manikanta","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"10818416874497828882"}},"outputId":"a7bf481b-9840-450b-b8f2-49a114bba973"},"source":["##Displaying a scatter plot to show the distribution of Sepal Length vs width the dataset\n","\n","fig = df[df.species == 'Iris-setosa'].plot(kind='scatter', x='petal_length', y='petal_width', color='orange', label='Setosa')\n","df[df.species == 'Iris-versicolor'].plot(kind='scatter', x='petal_length', y='petal_width', color='blue', label='Versicolor', ax=fig)\n","df[df.species == 'Iris-virginica'].plot(kind='scatter', x='petal_length', y='petal_width', color='green', label='Virginica', ax=fig)\n","\n","fig.set_xlabel('Petal Length')\n","fig.set_ylabel('Petal Width')\n","fig.set_title('Petal Length Vs Width')\n","\n","fig=plt.gcf()\n","fig.set_size_inches(10, 7)\n","plt.show()"],"execution_count":null,"outputs":[{"output_type":"display_data","data":{"image/png":"\n","text/plain":["
"]},"metadata":{"needs_background":"light"}}]},{"cell_type":"code","metadata":{"id":"O_eYByFvF4M5","colab":{"base_uri":"https://localhost:8080/","height":235},"executionInfo":{"status":"error","timestamp":1635011142444,"user_tz":-330,"elapsed":515,"user":{"displayName":"Desu Venkata Manikanta","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"10818416874497828882"}},"outputId":"fa78777e-0d87-449b-d9b7-a3995609ba6a"},"source":["#Display a scatter plot to show the distribution of Sepal Length vs width the dataset (Like previous Petal lenght vs width scatter plot)\n","\n","\n","#code\n","\n","fig.set_xlabel('Sepal Length')\n","fig.set_ylabel('Sepal Width')\n","fig.set_title('Sepal Length Vs Width')\n","\n","\n","fig=plt.gcf()\n","fig.set_size_inches(10, 7)\n","plt.show()\n","#example plot"],"execution_count":null,"outputs":[{"output_type":"error","ename":"AttributeError","evalue":"ignored","traceback":["\u001b[0;31m---------------------------------------------------------------------------\u001b[0m","\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)","\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;31m#code\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 6\u001b[0;31m \u001b[0mfig\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset_xlabel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'Sepal Length'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 7\u001b[0m \u001b[0mfig\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset_ylabel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'Sepal Width'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0mfig\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset_title\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'Sepal Length Vs Width'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;31mAttributeError\u001b[0m: 'Figure' object has no attribute 'set_xlabel'"]}]},{"cell_type":"code","metadata":{"id":"EIh_yKQAF4M6","outputId":"91706df0-4dd1-4a8b-92e7-41e04e6e602d"},"source":["#plot the FacetGrid plot using the seaborn library\n","\n","#sns.FacetGrid(...)\\\n","# .map(...)\\\n","# .add_legend()\n"],"execution_count":null,"outputs":[{"data":{"text/plain":[""]},"execution_count":12,"metadata":{},"output_type":"execute_result"},{"data":{"image/png":"iVBORw0KGgoAAAANSUhEUgAAAboAAAFgCAYAAADNUrzMAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3X+cXHV97/HXzszubJKZ7O4kGxMSfod8pRF0A5VaUJAH\nKL1UsFJ+XGzVUq8Fr9dbKz4o/gJUqlyx2lZFhaJitSCtFhRbBREtaLGQyI8o3xBBICFxN/tzJrs7\nszOz949zdjPZPXNmdmbO/Mr7+XjkITPfPed85uy63z3f8z3vb8fs7CwiIiLtKtToAkRERIKkjk5E\nRNqaOjoREWlr6uhERKStqaMTEZG2Fml0AeUaGkrWfXpoX99yRkcn633YkpqxLtVUHtVUnlatqb8/\n3lGncmQJAu3ojDFrgEeBs621TxW8/x7g7cCQ+9ZfWGttkLVUIhIJN7oET81Yl2oqj2oqj2qSWgqs\nozPGdAJfBKY8mk8C3mKtfTSo44uIiECw9+huBL4AvOjRdhJwtTHmQWPM1QHWICIih7iOIJJRjDFv\nAzZYaz9mjHkAuHzB0OU1wOeACeDbwE3W2u/67TObzc1q6EBEmpzu0TWhoDq6nwCz7r9XADuA86y1\ne40xHcBKa+24+7XvBFZZaz/qt89GTEbp748zNJSs92FLasa6VFN5VFN5WrUmTUZpToHco7PWvmbu\nvwuu6Pa6b60EnjTGHA/sB84Ebg2iDhERkbo9XmCMuRSIWWu/ZIx5P/AjIA380Fr7vXrVISIih5bA\nOzpr7Rnufz5V8N7XgK8FfWwRERElo4iISFtTRyciIm1NHZ2IiLQ1dXTSMOmZHIOjk6Rnco0uRUTa\nWMuEOkv7yOXz3HH/TrbtGGJkIk1iZZSBTf1cfOZGwiH97SUitaWOTurujvt3ct8ju+ZfD0+k519f\netamRpUlIm1Kfz5LXaVncmzbMeTZtm3HPg1jikjNqaOTuhpPpRmZSHu2jSanGU95t4mIVEodndRV\nTyxKYmXUs60v3k1PzLtNRKRS6uikrqKdYQY29Xu2DWxaTbRTK1SISG1pMorU3cVnbgSce3KjyWn6\n4t0MbFo9/76ISC2po5O6C4dCXHrWJi44/VjGU2l6YlFdyYlIYNTRScNEO8Os6Vve6DJEpM3pHp2I\niLQ1dXQiItLW1NGJiEhbU0cnIiJtTR2diIi0NXV0IiLS1tTRiYhIW1NHJyIibU0dnYiItDV1dCIi\n0tbU0YmISFtTRyciIm1NHZ2IiLQ1dXRStfRMjj379pOeyTW6FBGRRbRMj1Qsl89zx/072bZjiJFk\nmkQ8ysCmfi4+cyPhkP6GEpHmoI5OKnbH/Tu575Fd86+HJ9Lzry89a1OjyhIROYj+7JaKpGdybNsx\n5Nm2bcc+DWOKSNNQRycVGU+lGZlIe7aNJqcZT3m3iYjUmzo6qUhPLEpiZdSzrS/eTU/Mu01EpN7U\n0UlFop1hBjb1e7YNbFpNtDNc54pERLxpMopU7OIzNwLOPbnR5DR98W4GNq2ef19EpBmoo5OKhUMh\nLj1rExecfizhrk5ymRldyYlI09HQpVQt2hlm3eoV6uREpCmpoxMRkbamju4QkZ7JMTg6qefbROSQ\no3t0be6gmK6JNImViukSkUOLOro2p5guETnU6U/6NqaYLhERdXRtTTFdIiLq6NqaYrpERNTRtTXF\ndImIaDJK21NMl4gc6tTRtbnCmK7xVJqeWFRXciJySFFHd4iIdoZZ07e80WWIiNSd7tGJiEhbU0cn\nIiJtTR2dNC3lc4pILQR6j84YswZ4FDjbWvtUwftvAD4MZIFbrbU3B1mHtBblc4pILQX2W8MY0wl8\nEZjyeP/TwOuA04F3GGNeElQd0nrm8jmHJ9LMciCf8477dza6NBFpQUH+eXwj8AXgxQXvHw/stNaO\nWmszwIPAawKsQ1qI8jlFpNYCGbo0xrwNGLLWft8Yc/WC5pXAeMHrJNBTap99fcuJROr//Fd/f7zu\nxyxHM9ZVi5r27NvPSLJ4Pme4q5P+1SvqWlOtqabyqCaplaDu0V0GzBpjzgJeAdxmjDnPWrsXmAAK\nf1riwFipHY6OTgZSqJ/+/jhDQ8m6H7eUZqyrVjXlZnIk4lGGPcKo++Ld5DIzZR+nnc9TLamm8pRT\nkzrC5hRIR2etnR+KNMY8AFzudnIAvwKOM8YkgBTOsOWNQdQhrWcun7NwDb05yucUkUrULRnFGHMp\nELPWfskY81fA93HuEd5qrd1drzqk+SmfU0RqKfCOzlp7hvufTxW89x3gO0EfW1qT8jlFpJaUdSlN\nS/mcIlILevpWRETamjo6KSk5meFXvxkhOZlpdCkiIkumoUspKpPNcv1tW9k9lCI/C6EOWN8f4wNv\n2UJXRD86ItIadEUnRV1/21ZeGHQ6OYD8LLwwmOL627Y2tjARkSVQRyeekpMZdg+lPNt2D6U0jCki\nLUMdnXjaVXAlt1B+1mkXEWkF6ujE04Y1MUId3m2hDqddRKQVqKMTT/HlXazv9+7M1vfHiC/vqnNF\nIiKVUUcnRX3gLVs4vODKLtQBh69xZl2KiLQKzRGXoroiEa677JUkJzPsGkyxYY2u5ESk9aijk5Li\ny7s4/qhEo8sQEamIhi5FRKStqaMTEZG2po6ujQyPT/HTJ/YwPD7V6FLKkp7JMTg6SXom1+hSpE3k\n02kyg4Pk04tXqA9yW2luukfXBqYyM1x1089ITWXn34sti3DDFa9iWVdnAyvzlsvnueP+nWzbMcTI\nRJrEyigDm/q5+MyNhEP620uWbjaXY+jO20lt20p2ZIRIIkFsYAv9F15CR9h/LcNqtpXWoN8qbWBh\nJweQmspy1U0/a1BF/u64fyf3PbKL4Yk0s8DwRJr7HtnFHffvbHRp0qKG7rydsfvuJTs8DLOzZIeH\nGbvvXobuvD3QbaU1qKNrccPjU4s6uTmpqWzTDWOmZ3Js2zHk2bZtxz4NY8qS5dNpUtu8g8ZT27b5\nDkVWs620DnV0Lc4+P1ZVe72Np9KMTHj/8hhNTjOe0i8WWZrs+DjZkRHvttERsuPjgWwrrUMdXYsz\nR/RW1V5vPbEoiZVRz7a+eDc9Me82kWIiPT1EEt7PeUb6EkR6egLZVlqHOroWt6pnGbFl3nOKYssi\nrOpZVueK/EU7wwxs6vdsG9i0mminbv7L0oSiUWID3rF0sYEBQtHifzxVs620Ds26bAM3XPGqorMu\nm9HFZ24EnHtyo8lp+uLdDGxaPf++yFL1X3gJ4NxXy46OEOlLEBsYmH8/qG2lNXTMzhZZdKzJDA0l\n615of3+coaFkvQ9bUrG6hsensM+PYY7orfuVXCXnKj2TYzyVpicWDeRKrhm/f6qpPJXWlE+nyY6P\nE+npWfLVWKlty6mpvz9eZHEraSRd0bWRVT3L+P0Tmmuo0k+0M8yavuWNLkPaSCgapWvNmrpvK81N\n9+hERKStqaNrI9VEavltq6guEWllGrpsA9VEavltCyiqS0Ranjq6NjAXqTVnLlIL4NKzNlW8LVDx\nfkVEmoX+LG9x1URq+W87xFY7WNF+RUSaiTq6FldNpJbftiPJNCPJTEX7FRFpJuroWlw1kVp+2ybi\nURLxror2KyLSTNTRtbhqIrX8t+1ni/F+pkhRXSLSSjQZpQ1UE6lVzraK6hKRVqYIMB/NGI0Exeuq\nJlLLb9ty9tuM50o1lUc1lUcRYK1LV3RtpJpILb9tFdUlIq1M9+hERKStqaMTEZG2po6uQo3Kf0zP\n5Nizb78e2JaGy6fTZAYHyaf1TKU0N92jW6JqciVrdtxkmkRcuZPSGLO5HEN33k5q21ayIyNEEgli\nA1vov/ASOsJ67ESajzq6JaomV7IVjyuy0NCdtzN2373zr7PDw/Ov11zy5kaVJVKULgWWoJpcyVY8\nrshC+XSa1Latnm2pbds0jClNSR3dElSTK9mKxxVZKDs+TnZkxLttdITs+HidKxIpTR3dElSTK9mK\nxxVZKNLTQySR8G7rSxDp6alzRSKlqaNbgmpyJVvxuCILhaJRYgNbPNtiAwOEovqjS5qPJqMsUTW5\nkq14XJGF+i+8BHDuyWVHR4j0JYgNDMy/L9JslHXpwy/brppcyWqkZ3KEuzrJZWaa6kquVbMJ662d\nasqn02THx4n09NT8Sq5Vz5OyLpuThi4rNJf/WO/OJtoZZt3qFU3VycmhKRSN0rVmjYYrpempoxMR\nkbamjq7FJCczPPb0EMnJjGfbr34z4tkWZGSZYslEpJlpMkqLyGSzXH/bVnYPpcjPQqgD1vfH+MBb\nnBlwxdrCoVBgkWWKJRORVhBYR2eMCQM3AwaYBS631j5Z0P4e4O3AXOTHX1hrbVD1tLrrb9vKC4Op\n+df5WXhhMMX1tzkpFcXazBG9gUWHKZZMRFpBkH92vwHAWnsq8EHg+gXtJwFvsdae4f5TJ1dEcjLD\n7qGUZ9vuodRBnVyhXYMpHrXBRIcplkxEWkVgV3TW2n8zxnzXfXkkMLbgS04CrjbGrAXusdZ+3G9/\nfX3LiUTqP9Owvz9e92Mu9OLTQ+SLPFxR7H1wLqPHksWjw8JdnfSvXlFRTXv27WckoH3XUjN8/xZS\nTeVRTVIrgd6js9ZmjTFfBf4I+OMFzbcDnwMmgG8bY/7QWvvdhfuYMzo6GVyhRTTLszzxrhChDu9O\nrdj7AB1AbzzKqEeH1BfvJpeZqfjz5WZyJOJRhj0yOKvdd600y/evkGoqT6vWpI6wOZU1dGmM+Utj\nzIgxJuf+yxtjyhqbsta+FdgE3GyMWeHurwP4jLV2n7U2A9wDDFT4GdpefHkX6/tjnm3r+2Mcvsa7\nbcOaGCeZYKLDFEsmIq2i3Cu6vwReYa19vtwdG2P+FNjgDklOAnn3H8BK4EljzPHAfuBM4Nayqz4E\nfeAtWyqedQnBRIcplkxEWkFZEWDGmO8Bf2StLXs9GPfq7cvAWqAT+ASwAohZa7/kdoTvBtLAD621\n1/jtr9kiwBolOZkhmckT7woRX961qG3XYIoNa2KL2oKMLFMsWflUU3latSZFgDWncq/o/h54whjz\nX0B27k1r7WXFNrDW7gcu8mn/GvC1Mo8vrvjyLo450vv/cPHlXRx/lPcSKnORZUGIdobpX72i6X4x\niYjA0jq6fwKeC7AWERGRmiu3o5u21n4k0EpEREQCUG5Hd58x5lPAvwPzQYrW2p8EUlULqOael9+9\ntHK2ffHpIc97dH41NareRsnkMuxNDZHLhegKL61mv+VnglyaRkSCUW5HNzf1v3Bp4Vmc2ZKHlIPy\nHZeYHemXV9kV8f9W+G3rl2cJNKTeRsnlc3xr5z08PrSd0fQYfdFeTuzfzJs2nks45N/Bz+ZyDN15\nO6ltW8mOjBBJJIgNbJlfULRYW0e4eSbgiMhiWnjVh9csq2/ct+OgfMc5Z528oWS+4zW3/twzruvw\nNTGuu+yVFW+7MM+ysCagLvU2yyy5O3fczQO7Hlz0/hkbTuPCTef5bjt4+9cZu+/eRe/3nnU2QNG2\nNZe8uez6muU8FVJN5dGsy9bl+ye9MabDGHOdMebMgve+aoy5LvjSmk81+Y6l8iq9ltYpZ1v/PMsh\nttrButfbKJlchseHtnu2PbFvO5lc8Zrz6TSpbVs921LbtpLc+miRtm3k02U/dSMiDVAqGeU64BXA\nUwXvfQzYYoz5cGBVNanxVJoRj8grcPIdx1PFf+HtGkz55lXuKhLMXGrbWfCM+AIYSaYZSXr/cg+y\n3kYZTycZTS+MVHWMTI8xni7+13h2fJzsyIh328gIuWJtoyNkx8eXXqyI1E2pju6NwEXW2hfn3rDW\nPg38T3yekWtXPbEoiZXeExD64t30xIpPTtiwJkaoyKBGqMNpr2TbDqAv7n3cRDxKIu49ESPIehul\nJxqnL9rr2Zbo7qUnWjyHMNLTQyTh/QxiJJEgXKytL0Gkp2fpxYpI3ZTq6HJeaSjW2hQwE0xJzaua\nfMdSeZV+sxn9tvXPs+xni1lT93obpSvcxYn9mz3bTli92Xf2ZSgaJTawxbMtNrCF+JaTirQNaPal\nSJMrNXVuvzHmWGvtrwvfNMZs5EBu5SGlmnzHUnmVlW5bTp5lvettlDdtPBdw7smNTo/R193LCas3\nz7/vZ252ZWrbNrKjI0T6EsQGBubfL9UmIs3Jd9alMeZcnFSUa4Gf44yUnQxcA3zAWvvNOtQINM+s\nyzmNfI6uWNZlI5+ja7ZZcplchnAsTy7VXM/RNdt5AtVULs26bF0lHy8wxpwDvB/nWbo88N/AJ621\n3w++vAOaraNrpGasSzWVRzWVp1VrUkfXnEo+9Wut/Q/gP+pQi4iISM2VFW9hjPld4EpgNc7wJQDW\n2kMuGaXR0jM59uzbT24mV9PhSSnP9FSSscEX6V1zGN3L6readDaZJL1rF9ENG4jEtYq1yFKUm+N0\nG/BZYDvOo1tSZwdFjyXTJOK1ifmS8mRnMjx8yw102+dYkcqyOxZh2hzJKW+/ikhncDNQ85kMz3/8\nY2R274J8HkIhutZv4IirP0ioq/lmvoo0o3I7uilr7ecCrUR83XH/zoOivIYn0ge9LtZWKuZLyvPw\nLTfQ/+iBycfxVJb4o7/mYW7g1Cs+FNhxn//4x8i88PyBN/J5Mi88z/Mf/xhHXaMFRUTK4dvRGWOO\ncP9zmzHmPcBdHLzw6vOeG0pN+UePDVFsQtG2Hfu44PRjNYxZpempJN3WeynGbvuc0x7AMGY2mXSu\n5Dxkdu8im0xqGFOkDKWu6H6MM1TZgbNSwbsL2maBYwKqSwr4RY+NJNMUmzg7F/MV1Mrih4qxwRdZ\nkcp6ti1PZRkbfJG1R5qaHze9yx2u9JLPk961i8jxx9f8uCLtxrejs9YeDWCMSVhrDwr7M8YcFWBd\nUmAuemzYo7NLxKPMzs56ZlqWivmS8vSuOYzdsQhxj85uMhbh6DWHBXLc6IYNEAp5d3ahkNMuIiWV\nGro8HOdq7nvGmD/gwIzLCPA94KXBlidwIHrMa7mduUgy7zb/mC8pT/eyONPmSOKP/npR27Q5MrDZ\nl5F4nK71Gw6+R+fqWq/ZlyLlKjV0eR3wWuAwoHA18Szw3aCKksXKiR6rJOZLynPK26/iYZxZl8tT\nWSYLZl0G6YirP1h01qWIF2NMGCfRahOwDNgBXOGVW7yEfd5mrX1LjUqsu7IWXjXGXGWtvaEO9RSl\nZBRHeiZHuKuTXGamqZ6ja8ZzFURN1T5HV2lNQT5Hd6h876rVKskobnTjudbad7qv/x/wjLX2C42t\nrHFKDV1+2Ou/51hrNb+5zqKdYfpXr/D8P1y0M6yJJwHrXhYPZOJJKZF4XBNPpFy7gdcYY84D7seJ\ncDzCGPNTIAWsAa631t5pjHk9TnbxLHC3tfYGY8xm4EtAJ86z038O/NJa+1JjzEnAp93j/MJa+25j\nzMXAX+KshvM1a+1n6/dRy1PqaeIO998pwAU4WZcZ4FzAez0UERFpGGvtL3CSrC4DngP+DViLcwvq\njcDpwEeNMSHgU8A5wGnAacaY44FPAu+x1r4S2AasK9j9Z4E3W2tfA4TdLOSLgPcBpwL7g/+ES1dq\n1uV1AMaYh4BXWWsn3defAX4UfHkiIrIUxpgTgG3W2jcaYyLAXwPXA48U/A4fA/qB9cDd7qa9wLHA\nUTjh/Vhr/979+vndA19zX8eAJ3A61fcDH6VJ526Umw/Vz8HRX52A95LLLSQ9k2NwdJL0TK6m2yYn\nM/zqNyMkJxdP+a/WXNZlJTW3k0wuw9DkMJlc7c9xpaankux9zjI95X0fJ59OM7VnL/l0xXMCiu43\nMzjouV+/NnDO497UUEXnsdS+pWHOxhmOxFqbBR4H0sDLjDERY0wvsBLYBzwDvN5aewbwRZyhyqeB\nVwAYYz5pjClcgPKXwAXu138SZ/m2y4D3W2tfC7zFGOO9YnMDlRsBdjPwiDHme0AYZ+jy7wKrKmAH\n5UYuMRvSb9tcPl90odKuSLmnuozjLsi6PJTyLHP5HN/aeQ+PD21nND1GX7SXE/udhVXDocY8SlEq\nB3M2l2PozttJbdtKdmSESCJBbGAL/RdeQke48pr99gv4HrOa8xjU55Ga+Szwd8aYX+AMJQ4BH8bJ\nLL4HWAW811qbM8ZcA/zQGNMFPInzu/4q4Cb3avCXOMOXc64EvuV+/W+BPwF+gfMIWgq411qbqseH\nXIqyZl0CuDchz8C5svuhtfaxAOtapJazLr9x3w7P587OOnnDQdmQXrOs/La1z4/xwuDi7/Hha2Jc\nd9kr61Jzo9RrltydO+7mgV0PLnr/jA2nceGm8xpS00M3ffSgHMw5Qycdy6lXfIjB27/O2H33Lmrv\nPets1lzy5oqP67dfwPeYSzmPSzluNZ+nkGZd1pYb8PEFa+05ja6lEXwvBYwxf+j+71twJp8M4Vzu\nvtx9r+X450bu8x0S9Nt2644hdnl0cgC7h1JVDWNWU3M7yeQyPD603bPtiX3bGzKMWSoHc3J8mNS2\nrZ7tqW3bKh72y6fTPvvdSnLro0WPOT2VrPg8+h+38s8jEqRSY14nu//7Wo9/ZwRXVnD8ciPnsiEr\n2nYiXXT9ovwsRTvBclRTczsZTycZTY95to1MjzGerv8VQMkczGeeJjsy4tmeHR0hOz5e0XGz4+PF\n9zsyQs7nmGODL1Z8Hn2PW8XnkWBZa39zqF7NQel7dM8aY9ZZa/+sLtXUgV9uZKlsSN9tV0aLdnah\nDtiwpvL7s9XU3E56onH6or2MpEcXtSW6e+mJ1j8Sq1QO5pHHHMd0IkF2eHhRe6QvQaSnp6LjRnp6\niBTbbyLB7OysZ2cX6UvQu+Yw+nZVdh59j1vF5xEJUqkrutcDPzfGPGmM+VtjzB8YY5bVo7CgzOVG\neimVDem37ZZN/UU7s/X9MeLLK18ks5qa20lXuIsT+70f3zxh9Wa6wvVfiHQuB9PLtDmS5T2riA1s\n8WyPDQwQilb2R0ooGvXZ7xbiW04qeszuZfGKz6P/cSv/PCJBKvUc3aUwfyPzNcCbgE8YY/YBP2h0\nLFilysmNrGRbv1mXjay5nbxp47mAcy9pZHqMRHcvJ6zePP9+I5TKwZybBZnato3s6AiRvgSxgYH5\n9ytVzn6LtRWex9HpMfqWcB6D+jwiQVnKrMtenCfqT8F5vGDaWntKgLUdJIisy1LZkH6zrPy2TU5m\n2DWYYsOa6q7kih23WNZlI9V7llwml2E8naQnGi96BVLvmkrlYObTaVaGs0zkIjW98smn02THx4n0\n9Czar18bOOcxHMuTS4WWfEVcat/VONRmXb7hvXctx0kg2fOdT50/Wck+pDjfjs4Y8yqc4ctzgNU4\nuWk/AO5fuD5d0BTqfEAz1qWayqOaytOqNS21o3vDe++K4Dx4/UbgcOAFnMiu933nU+d7z3IqgzsK\nd7u19vcK3jsHOMJa+6VK91vkWH8EPGytfbGW+62lUpNRHgK+D/xva633fGUREanUJ3ECkeccVfD6\nPbU8kLX2P2q5vwL/F7gcaNmO7jzgdcBtc/flcO7N/XfglTW5apbE8dt2eHwK+/wY5oheVvW09Lyf\nQ06QQ3kTqRH27NnJunUbWRkrP31vcnyYsWeepveY41jes6qmNUl13OHKNxZpPv8N773rA9UOYxpj\nHgAGcSIb/xk4DrgW+CbQAywHPmCt/cGC7d6Ek5Ayg9OBXQLEgX/ESVYBeDdwBE5c2G3GmNOA/+N+\nbRb4ibX2KmPMqTjh0TPAJPDHOIsF3IKTr3kY8Dlr7U3VfFY/pSajfBc3pNO9FH49cJUx5mU4SzQc\ncnefg4oPy2RzXHXTz0hNHRitiC2LcMMVr2JZV2fQH0uqEGQkVjo9xb03fYiXPDtCfH8euyLEb49O\ncPYVHyUaLf6H0Mz0FNs/chXRoQlCszDeAen+lWz+8A10dusPqCaxDme40svhbvviyJ2l+2dr7beN\nMW9zXx+LcyvqHJwle7yilf4n8Elr7b+44SArgatxUrFuMsYcB3zZWnuaGzV2OU7g80XA7+N0dP/q\nho6cjtOxfgbn4qnPPf7t1tpvGWMOA34MBNbRlRWSaIzpBo50i+vGWaqn7vfMmsEd9+/kvkd2Mew+\nMzc8kea+R3Zxx/07q9p2YScHkJrKctVNPwvmg0jNDN15O2P33es8WzY7S3Z4mLH77mXoztur3ve9\nN32ITU/uo2d/nhDQsz/Ppif3ce9NH/LdbvtHrmL54AThWedP5/AsLB+cYPtHgl0RXZZkD849OS8v\nuO21YA96Ye12nADnfwY+D4SMMacZYx5w/50L/BVwpjHmxzgdVx44AbjMvUq8mcXB/i8F/staO2Ot\nnQX+EydR629wrtp+iHM1N4OTk/lGY8w/AR/EWSggMKUiwD5jjPk5sAvng+8HrrbWnmit/Z9BFtaM\ngooPe/Sp3y7q5OakprIMj08tvVipiyAjsSZSI7zkWe85Xy95doSJlHfb5Pgw0aEJz7bo0AST44sf\n9pb6c4cl/61I8101nH2ZL3zhLuMTt9aeC7wV+Adr7YPW2jPcf/cA7wCutdaejvO30h8BTwGfdlcu\nuAj4p4L9h9z2U9wVEjpwHknbgRP8/BV3dYPt7r7fC/zMWvsnwJ3uMQJT6h7dIPAu4FFr7aERqOij\nnCiuYit8+26bmvE9rn1+jN8/QcNNzaicSKyuNWsq2veePTuJ7897tsX259mzZycrj1scFj72zNOE\nioy3hGad9uUDul/XJN7n/u/5HJh1eVfB+0F4GrjGGHMRTgf1YY+v+TnwXWNMEmdV8rnbWP9ojHkH\nzlDmte7X/hRnZYTX4QxRPuTu90GcjvyVwC3GmP04neI7cCbd/IMx5hJgDMgaY6LW2kDyDEt1dJ04\n47jnFCy8N89a+5EgimpWgcWHxTp9OztzRG9lBUvggozEWrduI3ZFiB6Pzi61IoRZ5x0W0HvMcYx3\nOMOVC+U7nHZpDu4jBO95w3vv+gA1fI7OWvsb4PcWvPeVgpd/XGL77wDf8WhaNHnGWvtBnOFHgL91\n/xV6eGEtwLPAy/xqqKVy7tF1+Pw7pAQVH3bSS19CbJn33xyxZRHNvmxiQUZirYwl+O3R3jMsf3t0\noujsy+UIXWLcAAAgAElEQVQ9q0j3r/RsS/ev1OzLJvSdT50/+Z1Pnf9rPSwejFKzLq/zet8dfz06\nkIqaXFDxYX/0mqOLzrqU5hZkJNbZV3x0ftZlbH+eVMGsSz+bP3zDQbMu8wWzLkUONWVFgBlj3oUz\nc2ZFwdvPWmvrFrTYbMkojXyOrlVTI+qt3jWV8xxdpTUF+RydvnflaeWFVw91pe7RzXkv8HLgeuD9\nOGvRnR1QTS0h2hkuOvGkmm1X9SzTxJMWFYpGK554UsrKWMJz4kkpy3tWaeKJHPLKeo4OGLTWPgs8\nDpzg3tRcPDtFRESkyZR7RbffGPNanI7ujcaY/8Z5ul1ERKp00R1XzK9e8M2Lb9KElBor94ru/+BE\nt/wHTs6ZBf4hqKJqKT2TY3B00vNhbr+2ICUnM/zqNyMkJzNLrik9k2PPvv11rTmTyzA0OUwmt7je\nUlKZFHZkJ6lMqqb7nZ5Ksvc5y/SU9z2TfDrN1J69ng9sl9o2KJnhYQYf+DEZj0cRSp0Lv5rz6TSZ\nwcGqHk73Umq/QR231WqqxkV3XBG56I4rPo3zILUFtl90xxWfvuiOK8q9CPFkjDnKGPNfC947x30G\nrq6MMX9tjFnSuLub0PLSWtVQ1sm01m43xrwPJ7zzOuBCa633k6wuY0wYJybG4MSFXW6tfbKg/Q04\nDypmgVuttTdX9hG8+eVKAhXnVVYjk80WXZg1HAr51nTQ50mmScSDrzmXz/Gtnffw+NB2RtNj9EV7\nObHfWZwzHPKfgJPJZrhx6+fZk9pLnjwhQqyLreXKLe8kHApXvN/sTIaHb3EWOV2RyrK7YJHTSGeX\nb+5kLp/z3TYouakpnr36feRTBzr7UCzG0R//JES7fM+F3+cNh8KBZGyWyu4MMtuzlWqqkXZYvaDU\ncT/RiOMWKnfW5dnAV3FSrMM4idMX+a1iYIx5I3CetfYyY8wZwHustee7bZ3Ar4DfxYkVewj4Q2vt\nb4vtb6mzLr9x3w7ue2TXovfPOnkDQNG2S886kG9a65lf19z6c14YXHxlc/iaGOaIXt+a/D5PYc21\ndOeOu3lg14OL3j9jw2lcuOm8g95beK7+5uefYXdq8aod62OHcVzvMWXvd6GHbvoo/Y8uzrkdOulY\nTr3iQwze/nXG7rt3UXvvWWfz9OgzvtsGZedfvuugTm5OKBZj2ztf53su/D7vcX3HFP2say55c9n1\nLfze+Z3DNZe8uWR7LbRCTUW+puxZl+5w5Xaczm2hZ4GXVTqMObceHTDNElcvKPj9/HJr7X5jzJVA\nDvgX4EvAMmAKJ+EkjPNg+TDwPZwUlbfiJKD8t7X23caYr7i1/Bj4Mk5uchdO6tYj7nvHuPv6W2vt\nHW6e5uXAXpyosZU4F2YftNbeb4x5EideLFPO4gLlXgp8GvgDa+3J1toB4EJKJE1ba//NPRG4H2ys\noPl4YKe1dtRam8GJinlNmbWU5JcrudUOVZxXWY3kZIbdQ4t/2QHsHkrx6FPeffy2HftITmbqXnMm\nl+Hxoe2ebU/s2+473JjKpNiT2uvZ9mJqD48NPVHRfqenknTb5zzbuu1zTI4PF82dTG59lO6nflN0\n26CGMTPDw56dHEA+leLpZ7d5tj2xbzsTqRHfzzux1XuJyGoyNktld2aTycCyPVupphopZ/WCWvhn\na+1ZOJ0VHFi94A04qxQcNLJnrZ0B/hW4wH3rUpyIrxuBv3ezLm8E5q7U1gKvs9b+P+DPgHdZa18F\n/MoYU7jvy4HfuG2XAKcAfwEMWWt/HzgL+JgxZnXBNh8E7rXWvgan3/lH9znuGPDRclfQKXccOG2t\nfWzuhbX2Efdgvqy1WWPMV3ECQQsjZ1YC4wWvkzh/XRTV17ecSKS8IYg9+/YzkiyWSVn8h340OU24\nq5P+1QceF+zvj5d1zFJefHqIfJFr0vxs8bzL0eQ0yUze5/MsrrkW9qaGGE2PebaNTo8RjuXpjx18\nbubO1d7fvkge75HtWWYZTY97thXb75wXfv0CK1Le4dfLU1lyw7uK5k7mRkdZUWT0YnkqCzNj9B9x\nmGd7NQa3e/8CnhN/YR8cs/hxktHpMVKp3UU/74pUljzFMzZXhrMs61/t2e5l7ns3tWe/b3bnstSw\nb/tSj9uqNdXI3OoFR3m0Bbp6gTFmbvWCTuDv3bXkPuZ+ySdx1oq7yRjzlLOJHXbDoN9vjLkKJxlr\n7pfWs+4FCzgd3ZXGmKOBn3FwgpYB/t2t4WngM8aYzwH3ue8ljTG/xOmI5xwPfN1t322MmcBZWmjR\n5/JTbkf3sDHmFpx7blmc3vg3xpjXuAX8pNiG1tq3uifmYWPM71hr9wMTOIv4zYlz8BXfIqOj5V/B\n52ZyJOLFMimjdHRQNK8yl5mZH56o5dBlvCtEqAPPzi7UAT0rvPMu++LdxLtCPp/n4JprJZcL0Rft\nZSQ9uviY3b3kUiGGCq6CCs9VLLuSECHPzq6DDnqjKz07O6/9HqSzl/2xCHGPX/6TsQjhVRuK5k6G\n+/pIpseJ71989TsZi0BnbyAPKM+sPdK3PXn4apzR+4P1dfcSi61nT5HPuz8WoadrJXmPX/CRvgQT\nuQipMj9P4fcun4v4ZndOxVb5ti/luK1ck9/XlOubF980edEdV/wbB9+jm3NXDWdfFl29wBizDvip\ntfZonOejC7+uAydcem707ingRmvtT92JIqd77P9/4czHmDbGfB9niZ85c7er7jLGHIPTsf4UeDXw\nbWNMHGcpoGcXbPNqYJsxZj3ObP+5b7TvPJFC5Q5dHo/Ty34C55L1ZJwx3+s4kGB9EGPMnxpjrnZf\nTrpFzRX2K+A4Y0zCGNOFM2xZs4XX/HIlt5j+ivMqqxFf3sX6/phn2/r+GCe99CVFa4ov76p7zV3h\nLk7s3+zZdsLqzXSFi0/eiHXFWBdb69l2WGwdL+8/oaL9di+LM228O45pcyTLe1YVzZ2MbzmJ6Zce\nVXTb7mU1/Ut9XteqVYRi3t/3UCzGcUcPeLadsHozK2MJ38+7cstJnm3VZGyWyu6MxOOBZXu2Uk01\n9D6cBUmfxbmIeNZ9HfTqBWcYY36Cs0SO1+oF4KwmPgD8yH19Jc6qBz/GGcp83GObJ4D/NMbcj3Nv\n8OGCti8CxxRs/7c49/xWGWMeBB4ArrPWDhZs8zc46+L9BGclhHdYa72HOXyUNRmlEsaYFTg3Gdfi\nXB5/AidCLGat/VLBrMsQzqzLz/ntb6mTUQ7MUvTOpCzWVjiDsdaTUcqbdeldk9/nCXrW5RP7tjMy\nPUaiu5cTVnvPjlx4rsqZdVnOfhcqnIW4PJVlsuisy8W5k4WzLr22DUo5sy6LnQu/z3tg1uXiz7qU\nmYYLv3d+5/DgGY7VHbfVayryNRVFgOk5umCVO+vySJwx26NwLiO/AVzmLgVRF5VmXfrlSpbKqwwq\nby85mWHXYIoNa2LElx/8C7ZUTemZHOGuTnKZmcCuPhfK5DKMp5P0RONFr7iKnatUJsXu1F7Wx9YS\n6zr4yqac/RYzPZVkbPBFetcc5nk1lk+nWRnOMpGLLPqLvtS2QckMD9O59zlm1h5J16qDY7lKnQu/\nmsvJ2PRT7HtXar/VHrddalrwNcq6bELldnT/gXOZeQOwBXg78KfuTJi6aLZQ50ZqxrpUU3lUU3la\ntSZ1dM2p3DGv1XPPWVhrZ92Hu70XvBIREWki5XZ0U8aYDTgJJ7hTUZv24ZRyNSoC7FDiF21VTQRY\nOcfdmxqq6Lh+7X4RU9V8niDPRSO0WhSXtLdyHy94D/Bd4FhjzC9wZlxeGFhVAfOLBwsyAuxQ4hcf\nBlQcARbkcf22Dc1SNGIq31H556kmZq0ZtXAUl7Sxkh2dMeYPgV/iPP/w18BrgXsA71iGFnDH/TsP\nitMankjPvw4qTutQ862d9xwUbTWSHj3odbG2UhFgQR7Xb9vTtyYPipjKDg/Pv/7xlnjFn8fvmNWe\ni0YYuvP2ouepVlFc7eih8y+Yn3V56l3/qlmXNeZ7+eJmnF0DdOM8S/fXODMul+E8T9dy/OLBgowA\nO5T4xYc9PvQkjw096dlWKgKsuuNu57HB4sdNZVJFt/3l3idIFosW27aV7XsqizSrJmatGZWK6tIw\n5mIPnX9B5KHzLzho9YKHzr/g0w+df0HTrV6wlFUISh3LGPM2Y0zd/pIrdTL/FHiVtXbSGPMJ4G5r\n7S3uE/O/DL682htPpRnxSBgBJ05rPJWueOVwcYynk0Xjw0aKvA8wMj3GeDpJ//LKVsT2O+5oeoxZ\nvCfujkyPsTu1t+i2mdERcsWixUZGmBkD4ouH5Up9Ht/zVOW5aITs+LhvFFd2fDywFdhbWMusXrCU\nVQhKHctdvLtuSnV0s9baucvo1wKfB2fmpTGtucB4TyxKYmXxOK2eWFOnKLSEnmi8aHxYItrLLHj+\ngk9099ITrfy5Nr/j9kV7mZ2dZTTjfdz1sbVFt+3qSxBOQM4rWiyRoLM3AbnFkWalPo/vearyXDRC\npKfHN4or0uMbZ3vIcYcr31ik+fyHzr/gA9UOY7qrANRq9YKX46xCsBa4DGdE8BqczvldwAiQAe5w\nd/VS4AvucV/ASdf6ubX2CmPMtTgrE3wRZ23TV+KsaHANznyQL3Ig2Ppua+0HqzkPpWZeZI0xve6M\nywHgB+6JOBInrqbl+MWDBRkBdijxiw87sf9lvLz/ZZ5tpSLAqjvuZl6+pvhxY12xotv+ztoTiBeL\nFhvYwuZ1lUWaVROz1oxKRXU1eRRXI7Ta6gWFRq21pwGPAVcBpwKvw0m/WmgT8Oc4ndn/MMYU5gO+\nEefxtVfiXEydjPPZ/8ta+3p3m8uX+oEXKnVF9wngF+7X3WKt3WOMuQgnf+y6ag/eKHMxYMXiwaR6\nc7McvaKt5vi11eK4o9Nj9C3huH41h9w8da+IqTd1VP55yjlPraT/QmfVFK/zJIu02uoFXvvcCPxy\nbuTPGPNTj+PvtNYm3fY9OHM+5hjcnGNr7SjwIWPMSuB3jTGvxVkAoOq/kEomoxhjDsPpcR93X/8P\nYNJa+0C1B1+KIJJRGhUBVq1mrKtYTX7RVtVEgJWSyWUIx/LkUqElH9ev3S9iqpqotCDPRSlB/DwF\nFUvWSEEko7gTUbxWL/jMqXf9a8X36BYsvHq5tfYpY8zbcIYTvw6cbq397ILVCxbu44c4w5E3uYud\nfoUDQ5cvtdb+tbt23IM4I35pnCV35q7+5oYub7fW/p67z//CWf3mbThDl3uAC621f2KM6cEZUr0H\nWG+tvcoYsxGnU41YayvuA0rO7LHWvoizsvjc6+9VerBmE+0Ma+JJwLrCXUUnVPi11eS4sbjnkj+l\njuvXHopGi06oqObzBHkuGsHvPMlB5lYpOB9nyO4F4C6CX73gGnd0LoT/6gUf4cDqBYtYa/cZY24A\n/hOnU1yGs05dZ5m13A2c5a5eEMEZKXwe+IYx5lU4nefTwGHA7jL3uUhgqxfUmrIuD2jGulRTeVRT\neVq1pkqzLlv1OTp3BfGrrLXXu7Pxf4IzuaXoGqWNUNWzGiIiUj23c/t1o+tYKmtt1hizwhizFWfG\n5cM4V3dNRR2dBMrvXo3fEj6ltq1GNcf1a8smk6R37SK6YQOR+OJHA+byN3O5xfcNq9HI+3uVCup7\nG+QyPeLNWvt+4P2NrsOPOjoJhF/m4cxsruiirF2RrsDyEv0Wgy11XCiedTmby/H8xz9GZvcuyOch\nFKJr/QaOuPqDhLq6AsuzbMWczKC+t8rYFD/ha6+9ttE1lGVyMnNtvY+5YkWUycnmi2FqxroW1jT0\nzX9m7L57yU9NAZCfmmL6mWfIT0/x2ekfszv14nxSySyzJDNJnhx+ilev/z3fbVe87MSKa7rhkX+o\n+LiTv3yyaNu+f/s2mReeh7n73bOz5CbGST3+GL1nvJZ/ffq7PLDrQaZy0wBM5ab5zcTzTGXTbF5V\nefBCrfZbz5+ncr+3S62pVj8zfsqpacWKaMs+dtXOFNUvNeeXeTixbStDY96PB+1J7WUiNRJIXmIq\nk2JPam9Fx01u3Upqa5Gatj5KZtcLnm2Z3buYHB8OJM+yFXMyg8rCVMamlKKOTmrOL/MwNzLCsqkZ\nz7Y8efbs2VkyL7ESu93hykqOmxsdITtarKbRA1dyi3acZ+yZp0vmWVainJzMZlNOFmYz7Vfahzo6\nqbm5zEMv4USCqWXej9iECLFu3cai21aTl7g+tpZQkR/3UscN9yWI9BWrqQ86iswoD4XoPeY4+qK9\nns3V5FnO5WTWer9B8vu5qOZ7G9R+pX2oo5Oa88s8XDmwhf5e7wi/dbG1rIwlAslLjHXFWBdb69lW\n6rjxLVuIbSlS05aT6NrgHVfYtX4Dy3tWBZJn2Yo5mUFlYSpjU0rRZBQfzTjpA5qzroU1LT9+M/np\nKbLjE+TT00QSq1h56qn0X3gJv7fuZJ4cfor9mf3MMkuIEIfF1nHllncSDoV9t+1YwgrwC2s65SVb\nKj7uis0vK9rWc+qrST3+GLlU0hnGDIXo2nA4R1z9QTrCYV7at5GpbJpkJkk6mybR3ccpa092MjQ7\nKv9bs3C/09k0qyrcbz1/nsr93i61plr9zPjRZJTWpWQUH82YzgDNWVexmhr5HF2xmhr5HF2x/M1q\nVPscXSN+nkp9byutKcjn6IJMRpFg6Tk6CZRf5mGsK4ZJFF8xIqi8xGqO69cWiceJHH980f365W9W\noxVzMoP63ipjU7zoHp2IiLQ1dXSHiEwuw9DkcN2fr6rmuOMje7E/v5fxEe/n30odd29qqOafN59O\nkxkc9Hw2y69NRBpHQ5dtrlExUdUcd3oyxWPXXsnKkWlCOGtEPZXo5uXX3kj38sX31Gp1XD+VxoMp\nfkqk8dTRtblv7byHB3Y9OP96JD06//rCTec15XEfu/ZK+kam51+Hgb6RaR679kpO+X9fCOy4fobu\nvJ2x++6df50dHj7odbG2NZe8ueJjikhtaOiyjTUqJqqa446P7GVlQSdXaOXotO8wZlCf1z9iaivJ\nrY8WaVP8lEgzUEfXxhoVE1XNcffufKLoD2Vo1mkP4rh+fCOmRkbIKX5KpKmpo2tjjYqJqua4azee\nUCSREvIdTnsQx/XjGzGVSBBW/JRIU1NH18YaFRNVzXF7EmuZSHR7tk30ddOT8I7xqva4fvwjprYQ\n33JSkTbFT4k0A01GaXNv2ngu4NyjGpkeI9HdywmrN8+/34zHffm1NzqzLkenCc06V3ITfc6sy6Uc\nd3R6jL4afd652ZWpbdvIjo4Q6UsQGxiYf79Um4g0jiLAfDRj1BZUVle1MVGV1lTNccdH9rJ35xOs\n3XiC75Wcl6DitiqNB5vTjD9Tqqk8igBrXbqiO0Q0KiaqmuP2JNbS88qldXAHHTeAuK1K48FEpHF0\nj05ERNqaOjoREWlr6uikapXmSpbKwQwqn1OZlO1L31vxont0UrFKcyVLbdeIvEplUrY2fW/Fjzo6\nqViluZKltmtEXqUyKVubvrfiR0OXUpFKcyVLbZfKpBqQV6lMylam762Uoo5OKlJprmSp7Xan9tY/\nr1KZlC1N31spRR2dVKTSXMlS262Pra1/XqUyKVuavrdSijo6qUiluZKltot1xRqQV6lMylam762U\noskoUrFKcyVL5WAGlc9ZTl6ltCZ9b8WPsi59NGPeHjRfXZXmSpbKwaw2n7PYeSonkzIozfa9g/aq\nKcjvrbIuW5eu6KRqleZKlsrBDCqfU5mU7UvfW/Gie3QiItLW1NE1mWoijIKKzCrnuMUiwPxqasW4\npumpJHufs0zXeFWEUlrxXIk0Cw1dNolqIoyCiswqxe+4QNG20CwtF9eUncnw8C030G2fY0Uqy+5Y\nhGlzJKe8/SoincGs1A6KthKphcA6OmNMJ3ArcBQQBT5mrb27oP09wNuBIfetv7DW2qDqaXbVRBgF\nFZlVit9xgaJtp29Ntlxc08O33ED/o7+efx1PZYk/+mse5gZOveJDgR1X0VYi1Qty6PJPgGFr7auB\nc4DPLmg/CXiLtfYM998h28lVE2FUaRRXtfyO+/jQdh4bfNKz7Zd7nyDZYnFN01NJuu1znm3d9rnA\nhjEVbSVSG0EOXd4J/Iv73x1AdkH7ScDVxpi1wD3W2o/77ayvbzmRSP2Havr7K0viWIqpPft9I4xW\nhrMs61/tWdfe1FDRyKzR6THCsTz9sdp/Bt/jpseYxftpkMzoCLklftZq1OL798KvX2BFauGPr2N5\nKgszY/QfcVjNa6rk56JS9fg5XyrVJLUSWEdnrU0BGGPiOB3eBxd8ye3A54AJ4NvGmD+01n632P5G\nRyeDKrWoej1flM9FiCQSZIeHF7VF+hJM5CKkCuoorCuXC9EX7WUkPbpo277uXnKp0JKn/ZfD97jR\nXmZnZxnNLO4Iu/oShBOQK/OzVqNm37/OXvbHIsQ9OrvJWAQ6e8s+zlJqWurPRaXa6Tm6IJX5HF2d\nqpGlCHTWpTHmcOBHwNestd8oeL8D+Iy1dp+1NgPcAwwEWUszqybCqNIormr5HffE/s28fM3LPNt+\nZ+0JxFssrql7WZxpc6Rn27Q5ku5lwfxyU7SVSG0EORnlJcAPgHdZa3+4oHkl8KQx5nhgP3AmzsSV\nQ1Y1EUZBRWYt5bjFIsC8agod67S1UlzTKW+/iodxZl0uT2WZLJh1GSRFW4lUL7AIMGPM3wEXA08V\nvH0zsMJa+yVjzJ8C7wbSwA+ttdf47e9QiQArJ8KoWF3VRmZVyi8CzK+moKO4gvj+TU8lGRt8kd41\nh1V0Jdeq0Vb11qo1KQKsOSnr0kcz/p8NmrMu1VQe1VSeVq1JHV1zUjKKiIi0NXV0IiLS1tTRNZlG\n5VVWI5VJ8cRvLalMqtGliIgsoqzLJtGovMpqZLIZbtz6efak9pInT4gQ62JruXLLO+mK1G8yjIiI\nH13RNYm53MiR9CizzM5nQ35r5z2NLq2oG7d+nt2pF8mTByBPnt2pF7lx6+cbXJmIyAHq6JpAo/Iq\nq5HKpNiT2uvZtie1V8OYItI01NE1gfF0smhu5Mj0GOPp5ppmDbDbHa704lzZeXeCIiL1po6uCfRE\n4/RFez3bEt299ESbLz9vfWwtoSI/PiFCrI+trXNFIiLe1NE1gUblVVYj1hVjXZHObF1sLbGuWJ0r\nEhHxpo6uSbxp47mcseE0VnX30UEHq7r7OGPDaYHnVVbjyi3vZH3ssPkrO+dK7jCu3PLOBlcmInKA\nHi9oEuFQmAs3ncf5x57TkLzKSnRFunj/K/+SVCZFKjJBLLtSV3Ii0nTU0TWZrnAX/ctXNbqMJYl1\nxTi6f13TZROKiICGLkVEpM2poysiPZNjz779pGdyjS7lIJlchr2poaZ6tq4ZawpKK0a0iRzqNHS5\nQC6f5477d7JtxxAjyTSJeJSBTf1cfOZGwqHG/V3QjBFhzVhTUA6lzyrSbtTRLXDH/Tu575Fd86+H\nJ9Lzry89a1OjypqPCJszFxEGcOGm81RTwA6lzyrSbjR0WSA9k2PbjiHPtm079jVsGLMZI8Kasaag\nHEqfVaQdqaMrMJ5KMzKR9mwbTU4znvJuC1ozRoQ1Y01BOZQ+q0g7UkdXoCcWJbEy6tnWF++mJ+bd\nFrRmjAhrxpqCcih9VpF2pI6uQLQzzMCmfs+2gU2riXY2ZtJBM0aENWNNQTmUPqtIO9JklAUuPnMj\n4NyTG01O0xfvZmDT6vn3G2UuCuyJfdsZnR6jr7uXE1ZvbmhEWDPWFJTCzzoyPUaijT+rSLvpmJ2d\nbXQNZRkaSta10PRMjnBXJ7nMTMOu5LxkchnCsTy5VKhpriSasSaA/v54zdNaMrlMVRFtQdRULdVU\nnnJq6u+Pd9SpHFkCDV0WEe0Ms271iqbq5MAZRlsb62+qDqUZawrKXETbofBZRdqFOjoREWlr6uhE\nRKStqaOTtjQ9leSFX/+S6aml3+dRnqVIe9GsS2kr2ZkMD99yA932OVaksuyPRZg2R3LK268i0ul/\nX015liLtSVd00lYevuUG+h/9NfFUlhAQT2Xpf/TXPHzLDSW3ncuzHEmPMsvsfJ7lt3beE3zhIhIY\ndXTSNqanknTb5zzbuu1zvsOYyrMUaV/q6KRtjA2+yIpU1rNteSrL2OCLRbdVnqVI+1JHJ22jd81h\n7I9533aejEXoXXNY0W2VZynSvtTRSdvoXhZn2hzp2TZtjqR7WfHOSnmWIu1LHZ20lVPefhVDJx1L\nMhYhByRjEYZOOpZT3n5VyW3ftPFczthwGqu6++igg1XdfZyx4TTlWYq0OD1eIG0l0tnFqVd8yJl4\nMjMGnb2+V3KFwqEwF246j/OPPaeqPEsRaS7q6KQtdS+L03/EYRUFA8/lWYpIe9DQpYiItDV1dCIi\n0tbU0YmISFtTRyciIm1NHZ2IiLQ1dXQiItLW1NGJiEhbU0cnIiJtTR2diIi0NXV0IiLS1tTRiYhI\nW1NHJyIibU0dnYiItDV1dC0mk8uwNzVEJpdpdCkiIi0hsGV6jDGdwK3AUUAU+Ji19u6C9jcAHway\nwK3W2puDqqUd5PI5vrXzHh4f2s5oeoy+aC8n9m/mTRvPJRwKN7o8EZGmFeQV3Z8Aw9baVwPnAJ+d\na3A7wU8DrwNOB95hjHlJgLW0vG/tvIcHdj3ISHqUWWYZSY/ywK4H+dbOexpdmohIUwuyo7sT+JD7\n3x04V25zjgd2WmtHrbUZ4EHgNQHW0tIyuQyPD233bHti33YNY4qI+Ahs6NJamwIwxsSBfwE+WNC8\nEhgveJ0Eevz219e3nEik/kN0/f3xuh9zob2pIUbTY55to9NjhGN5+mONr7MZztVCqqk8qqk8zViT\nlBZYRwdgjDkc+DbweWvtNwqaJoDCn5g44P2b3DU6Oln7Akvo748zNJSs+3EXyuVC9EV7GUmPLmrr\n6+4llwoxNNXYOpvlXBVSTeVRTeUppyZ1hM0psKFL957bD4CrrLW3Lmj+FXCcMSZhjOnCGbb8WVC1\ntIu0vYkAAAhCSURBVLqucBcn9m/2bDth9Wa6wl11rkhEpHUEeUX3fqAP+JAxZu5e3c3ACmvtl4wx\nfwV8H6ezvdVauzvAWlremzaeCzj35Eanx+jr7uWE1Zvn3xcREW8ds7Ozja6hLENDyboX2ozDJ5lc\nhnAsTy4VaqoruWY8V6qpPKqpPGUOXXbUqRxZAj0w3mK6wl2sjfU3VScnItLM1NGJiEhbU0cnIiJt\nTR2diIi0NXV0IiLS1tTRiYhIW1NHJyIibU0dnYiItDV1dCIi0tbU0YmISFtTRyciIm1NHZ2IiLS1\nlgl1FhERqYSu6EREpK2poxMRkbamjk5ERNqaOjoREWlr6uhERKStqaMTEZG2po5ORETaWqTRBTQT\nY8wa4FHgbGvtUwXvvwd4OzDkvvUX1lpbh3q2AhPuy2ettX9W0PYG4MNAFrjVWntz0PWUUVOjztPV\nwHlAF/B5a+0/FrQ16jz51VT382SMeRvwNvdlN/AKYK21dsxtr/t5KqOmRpynTuCrwFFADvhfC34X\nNOTnSaqjjs7l/oB/EZjyaD4JeIu19tE61tMNdFhrz/Bo6wQ+DfwusB94yBhzt7X2t42qydWI83QG\n8PvAqcBy4MqCtkadp6I1uep+nqy1XwG+4tb3OZxf0nMdSkPOk19NrrqfJ+B/ABFr7e8bY84Grgcu\ncGtsyHmS6mno8oAbgS8AL3q0nQRcbYx50P1LvR5eDiw3xvzAGHO/Meb3CtqOB3Zaa0ettRngQeA1\nDa4JGnOeXg88AXwb+A7w3YK2Rp0nv5qgMecJAGPMycBma+2XCt5u1Hnyqwkac552ABFjTAhYCcwU\ntDX0PEnl1NExP4QyZK39fpEvuR24HDgTOM0Y84d1KGsSp/N9vXvsrxtj5q7AVwLjBV+bBHoaXBM0\n5jytBk4GLiyoqcNta9R58qsJGnOe5rwfuG7Be406T3O8aoLGnKcUzrDlU8DNwN8XtDX6PEmF1NE5\nLgPONsY8gHOf4DZjzFoA9xfUZ6y1+9y/4u4BBupQ0w7gn6y1s9baHcAwsM5tmwDiBV8bB8YIXtGa\nGniehoHvW2sz7v2baaDfbWvUeSpaUwPPE8aYXsBYa3+0oKlR56loTQ08T+/B+d5twhnB+Ko7ZA8N\nPE9SHd2jA6y188MPbmd3ubV2r/vWSuBJY8zxOOPyZwK31qGsy4ATgHcaYw5z69jjtv0KOM4Yk8D5\nC/Q1OFdajaypUefpQeD/GmP+FqfTXYHT0UDjzpNfTY06T+B8/h96vN+o8+RXU6PO0ygHhitHgE4g\n7L5u5HmSKuiKrghjzKXGmHdYa8dxhlZ+BPwnsN1a+706lPCPQK8x5kHgDpxO5iK3phngr4DvAz/D\nuYm/u8E1NeQ8WWu/C2wDfo5zP+x/Axc38jyVqKlRP08ABnhm/sWBn/FG/Tz51dSo8/RpYIsx5j+B\n+90azm+C8yRV0DI9IiLS1nRFJyIibU0dnYiItDV1dCIi0tbU0YmISFtTRyciIm1Nz9FJ4Iwxfwxc\njfPzFgJus9Z+sob7vxbAWnutMWbWWttRYpNqjvUG4Dhr7d8WHtfj69YBn8R5yDkLvAC821r7zMKv\nFZFg6YpOAmWMWQ98CnidtfblwKuAS4wx5zW2soqdhPMwc1HGmBXAj4GfAC9zP/c/A/e6wcAiUke6\nopOgrcZJl1gODFtrU8aYtwLTxpjfxXlAdzmwD2cZlmfddJpfAafgLN/yl9baHxhjXgb8AxAD1gCf\nstb+/aIjejDGnAN8xK3lWZzlV4aNMb8BvoaT37kCNy3fPdZXcP4/8p/AH+Asu3O5u7/n3F2/0hjz\nU2A98GX36u4S4MXCkGJr7deNMWkgaox5M3Cuu80G4DPAETjpH8PAH1hrp8v5XCJSmq7oJFDW2seA\nu4BnjDE/N8bcgBOp9DxwC3CptXYLzlVf4dpeUff9S3HyBrtw1ib7mLX2d4HX4iyhUpIxph/4BPB6\na+0ATrLFDQVfMmytfSXO6hXvd9/7KvBha+0rcJI7ItbaX7pf8wVr7Zfdr3uJW8tJwPuMMXGc4cqH\nPc7Fv1hrU+7LVwLnAK92P/u/W2tPdNteX87nEpHyqKOTwFlrr8BJhL8JOBL4L+CvgWOBu40xv8Dp\neI4p2Oxmd9tf4ORpngi8F+h2l2y5HufKrhyn4Fwx/cg91ruA4wra/8P93yeBhJtleFRB5JRfxuK/\nW2vT1tp9OFelCSAPlLpP+JC1dsJaO3dlOJf3+BzQV86HEpHyaOhSAmWMOReIWWvvAL4MfNkY879w\nrtSeca+YMMaEca6O5mQL/jvkvv4mTujud3CWcLmkzDLCwIPW2vPcY3VzcAr93DDhLE4HlaN0R+VV\n59z2j3Bg5ex5xphbcIZqATKFbdba7MKvF5Ha0BWdBG0S+Lgx5iiYX37ld3Cu6hLGmFe7X3cZ8I2C\n7S5xv/5knCucJ4CzcYYT7wJOd9vDlPYw8CpjzCb39YdwZkR6cgOFdxpj/sB961KcTgycjq3UH4h3\nAkcZY/587g1jzJ8BZwA7y6hXRGpIV3QSKGvtj4wx1wHfLZhx+H3gGuBu4O/cK6wJ4K0Fmx5jjNnq\n/vfF1tqcO53/QWPMGGCB3wBHLzymMSZV8PI5a+1mY8xlwDfdjnEX8CclSn8rcKsx5nrgcWDKff8n\nOPcMf+vzmaeMMWcBnzbG/BVOJ/kMzszTtDGmxKFFpJa0eoE0HXfW5bXW2gcaWMOHgZuttXuMMW8C\n3mytvaBR9YhI5XRFJ+LteZzn3mZw7gv+eYmvF5EmpSs6ERFpa5qMIiIibU0dnYiItDV1dCIi0tbU\n0YmISFtTRyciIm3t/wNlY9GprCKBfAAAAABJRU5ErkJggg==\n","text/plain":[""]},"metadata":{},"output_type":"display_data"}]},{"cell_type":"code","metadata":{"id":"yadZQxoKF4M8","colab":{"base_uri":"https://localhost:8080/","height":34},"executionInfo":{"status":"ok","timestamp":1635011940282,"user_tz":-330,"elapsed":443,"user":{"displayName":"Desu Venkata Manikanta","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"10818416874497828882"}},"outputId":"2f79fbfa-369f-48a0-8027-ae3f747a9adf"},"source":["#Plot the distritbution of the features using histgram\n","fig = plt.gcf()\n","fig.set_size_inches(12,6)\n","plt.show()"],"execution_count":48,"outputs":[{"output_type":"display_data","data":{"text/plain":["
"]},"metadata":{}}]},{"cell_type":"markdown","metadata":{"id":"xb-AFaG3PU0D"},"source":["## Importing alll the necessary packages to use the various classification algorithms\n"]},{"cell_type":"code","metadata":{"id":"cJVjbgAjF4M_"},"source":["from sklearn.linear_model import LogisticRegression # for Logistic Regression Algorithm\n","from sklearn import svm # for suport vector machine algorithm\n","from sklearn import metrics # for checking the model accuracy\n","from sklearn.tree import DecisionTreeClassifier # for using DTA"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"8LizCSuWF4NA"},"source":["df.shape"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"WW5Hp1fFF4NC"},"source":["Now, when we train any algorithm, the number of features and their correlation plays an important role. If there are features and many of the features are highly correlated, then training an algorithm with all the featues will reduce the accuracy. Thus features selection should be done carefully. This dataset has less featues but still we will see the correlation.\n"]},{"cell_type":"code","metadata":{"id":"YABeXMklF4ND","colab":{"base_uri":"https://localhost:8080/","height":270},"executionInfo":{"status":"ok","timestamp":1635011174157,"user_tz":-330,"elapsed":819,"user":{"displayName":"Desu Venkata Manikanta","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"10818416874497828882"}},"outputId":"a9537d5f-a224-42a1-b4ae-f10891512872"},"source":["plt.figure(figsize=(8,4))\n","sns.heatmap(df.corr(), annot=True, cmap='cubehelix_r') # draws heatmap with input as correlation matrix calculated by df.corr() \n","plt.show()"],"execution_count":null,"outputs":[{"output_type":"display_data","data":{"image/png":"\n","text/plain":["
"]},"metadata":{"needs_background":"light"}}]},{"cell_type":"markdown","metadata":{"id":"gsd6QaoaF4NE"},"source":["Observation--->\n","The Sepal Width and Length are not correlated The Petal Width and Length are highly correlated\n","We will use all the features for training the algorithm and check the accuracy.\n","\n","Then we will use 1 Petal Feature and 1 Sepal Feature to check the accuracy of the algorithm as we are using only 2 features that are not correlated. Thus we can have a variance in the dataset which may help in better accuracy. We will check it later.\n","\n","Steps To Be followed When Applying an Algorithm\n","\n","Split the dataset into training and testing dataset. The testing dataset is generally smaller than training one as it will help in training the model better.\n","\n","Select any algorithm based on the problem (classification or regression) whatever you feel may be good.\n","Then pass the training dataset to the algorithm to train it. We use the .fit() method\n","Then pass the testing data to the trained algorithm to predict the outcome. We use the .predict() method.\n","We then check the accuracy by passing the predicted outcome and the actual output to the model."]},{"cell_type":"markdown","metadata":{"id":"QAD_cNirF4NF"},"source":["# Splitting The Data into Training And Testing Dataset"]},{"cell_type":"code","metadata":{"id":"ZqSRd9GzF4NF","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1635011177764,"user_tz":-330,"elapsed":6,"user":{"displayName":"Desu Venkata Manikanta","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"10818416874497828882"}},"outputId":"58101a90-1ffd-45b9-8dbe-3ef9e5f914a2"},"source":["from sklearn.model_selection import train_test_split\n","train, test = train_test_split(df, test_size=0.3) # our main data split into train and test\n","# the attribute test_size=0.3 splits the data into 70% and 30% ratio. train=70% and test=30%\n","print(train.shape)\n","print(test.shape)"],"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["(105, 5)\n","(45, 5)\n"]}]},{"cell_type":"code","metadata":{"id":"yO2J2FpjF4NG"},"source":["train_X = train[['sepal_length','sepal_width','petal_length','petal_width']] # taking the training data features\n","train_y = train.species # output of the training data\n","\n","test_X = test[['sepal_length','sepal_width','petal_length','petal_width']] # taking test data feature\n","test_y = test.species # output value of the test data"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"yR9D2qgQF4NG","colab":{"base_uri":"https://localhost:8080/","height":205},"executionInfo":{"status":"ok","timestamp":1635011258352,"user_tz":-330,"elapsed":749,"user":{"displayName":"Desu Venkata Manikanta","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"10818416874497828882"}},"outputId":"f3bb7968-3684-416f-9a42-ab5fcbfff8f6"},"source":["train_X.head()"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sepal_lengthsepal_widthpetal_lengthpetal_width
244.83.41.90.2
895.52.54.01.3
134.33.01.10.1
645.62.93.61.3
255.03.01.60.2
\n","
"],"text/plain":[" sepal_length sepal_width petal_length petal_width\n","24 4.8 3.4 1.9 0.2\n","89 5.5 2.5 4.0 1.3\n","13 4.3 3.0 1.1 0.1\n","64 5.6 2.9 3.6 1.3\n","25 5.0 3.0 1.6 0.2"]},"metadata":{},"execution_count":36}]},{"cell_type":"code","metadata":{"id":"KcGbNGkcF4NH","colab":{"base_uri":"https://localhost:8080/","height":205},"executionInfo":{"status":"ok","timestamp":1635011262248,"user_tz":-330,"elapsed":623,"user":{"displayName":"Desu Venkata Manikanta","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"10818416874497828882"}},"outputId":"dcc6f9ba-8240-4f3f-8d74-ff21e5ac54e3"},"source":["test_X.head()"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sepal_lengthsepal_widthpetal_lengthpetal_width
34.63.11.50.2
454.83.01.40.3
1406.73.15.62.4
465.13.81.60.2
535.52.34.01.3
\n","
"],"text/plain":[" sepal_length sepal_width petal_length petal_width\n","3 4.6 3.1 1.5 0.2\n","45 4.8 3.0 1.4 0.3\n","140 6.7 3.1 5.6 2.4\n","46 5.1 3.8 1.6 0.2\n","53 5.5 2.3 4.0 1.3"]},"metadata":{},"execution_count":37}]},{"cell_type":"code","metadata":{"id":"5sFmts-IF4NI","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1635011269110,"user_tz":-330,"elapsed":486,"user":{"displayName":"Desu Venkata Manikanta","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"10818416874497828882"}},"outputId":"5892e853-6e9a-4f5e-e684-8c34c4822f55"},"source":["train_y.head()"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["24 setosa\n","89 versicolor\n","13 setosa\n","64 versicolor\n","25 setosa\n","Name: species, dtype: object"]},"metadata":{},"execution_count":38}]},{"cell_type":"markdown","metadata":{"id":"S_w4Me2bF4NL"},"source":["## Logistic Regression "]},{"cell_type":"code","metadata":{"id":"gOQ5JrqrF4NL","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1635011271869,"user_tz":-330,"elapsed":7,"user":{"displayName":"Desu Venkata Manikanta","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"10818416874497828882"}},"outputId":"73cfaedd-f9da-45c3-9bc3-32cfd91d6915"},"source":["model = LogisticRegression()\n","model.fit(train_X, train_y)\n","prediction = model.predict(test_X)\n","print('The accuracy of Logistic Regression is: ', metrics.accuracy_score(prediction, test_y))"],"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["The accuracy of Logistic Regression is: 0.9777777777777777\n"]},{"output_type":"stream","name":"stderr","text":["/usr/local/lib/python3.7/dist-packages/sklearn/linear_model/_logistic.py:940: ConvergenceWarning: lbfgs failed to converge (status=1):\n","STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n","\n","Increase the number of iterations (max_iter) or scale the data as shown in:\n"," https://scikit-learn.org/stable/modules/preprocessing.html\n","Please also refer to the documentation for alternative solver options:\n"," https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n"," extra_warning_msg=_LOGISTIC_SOLVER_CONVERGENCE_MSG)\n"]}]},{"cell_type":"markdown","metadata":{"id":"e1NNX-EGF4NJ"},"source":["## Support Vector Machine SVM"]},{"cell_type":"code","metadata":{"id":"zSJmVzqnF4NK","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1635011282010,"user_tz":-330,"elapsed":517,"user":{"displayName":"Desu Venkata Manikanta","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"10818416874497828882"}},"outputId":"37f4bbd2-9e91-4fd6-f53c-3aea85881b5b"},"source":["clf = svm.SVC(kernel='linear')\n","clf.fit(train_X, train_y)\n","\n","#Predict the response for test dataset\n","prediction = clf.predict(test_X)\n","\n","print('The accuracy of Support Vector Machine is: ', metrics.accuracy_score(prediction, test_y))"],"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["The accuracy of Support Vector Machine is: 1.0\n"]}]},{"cell_type":"markdown","metadata":{"id":"GWfemKzPF4NN"},"source":["## Decision Tree"]},{"cell_type":"code","metadata":{"id":"iRXy3EZIF4NN","outputId":"f470e075-fc92-4f3f-c343-7a8257e2c5d2"},"source":["#implementing using Decision Tree\n","#code\n","\n","print('The accuracy of Decision Tree is: ', metrics.accuracy_score(prediction, test_y))"],"execution_count":null,"outputs":[{"name":"stdout","output_type":"stream","text":["('The accuracy of Decision Tree is: ', 0.93333333333333335)\n"]}]},{"cell_type":"markdown","metadata":{"id":"uB2Co6f_F4NQ"},"source":["### We used all the features of iris in above models. Now we will use Petals and Sepals Seperately"]},{"cell_type":"markdown","metadata":{"id":"1_v6cAZMF4NQ"},"source":["### Creating Petals And Sepals Training Data"]},{"cell_type":"code","metadata":{"collapsed":true,"id":"e1Q-1b9YF4NQ"},"source":["petal = df[['PetalLengthCm','PetalWidthCm','Species']]\n","sepal = df[['SepalLengthCm','SepalWidthCm','Species']]"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"Lv-nab5oF4NQ"},"source":["### For Iris Petal"]},{"cell_type":"code","metadata":{"collapsed":true,"id":"DuOqLUWZF4NQ"},"source":["train_p,test_p = train_test_split(petal, test_size=0.3, random_state=0) #petals\n","train_x_p = train_p[['PetalWidthCm','PetalLengthCm']] # taking the training data's Petal features\n","train_y_p = train_p.Species # output of the training data\n","\n","test_x_p = test_p[['PetalWidthCm','PetalLengthCm']] # taking the test data's Petal features\n","test_y_p = test_p.Species # output of the test data"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"bgNB8kaNF4NU"},"source":["### For Iris Sepal"]},{"cell_type":"code","metadata":{"id":"6hVj5MW3F4NU"},"source":["#Similarly define the split for sepals\n","#define the training and test data's Sepal features followed by the output of the training and test data\n","\n","#use naming- train_s,test_s ; train_x_s, train_y_s; test_x_s, test_y_s\n","\n","#code"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"y08e1O6aU9mx"},"source":["Implementing the algorithms just like we did on the complete dataset but separately on sepals and petals and calculating accuracy"]},{"cell_type":"markdown","metadata":{"id":"TeMWnQr6F4NV"},"source":["## SVM Algorithm"]},{"cell_type":"code","metadata":{"id":"jhlutJ78F4NV"},"source":["#code\n","print('The accuracy of the SVM using Petals is:',metrics.accuracy_score(prediction,test_y_p))\n","\n","#code\n","print('The accuracy of the SVM using Sepals is:',metrics.accuracy_score(prediction,test_y_s))"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"Mli7zcq_F4NV"},"source":["## Logistic Regression"]},{"cell_type":"code","metadata":{"id":"2DqK_dFCF4NV"},"source":["#code\n","print('The accuracy of the Logistic Regression using Petals is:',metrics.accuracy_score(prediction,test_y_p))\n","\n","#code \n","print('The accuracy of the Logistic Regression using Sepals is:',metrics.accuracy_score(prediction,test_y_s))"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"aM-7Zx95F4NW"},"source":["## Decision Tree"]},{"cell_type":"code","metadata":{"id":"S8tXp-gMF4NW"},"source":["#code\n","print('The accuracy of the Decision Tree using Petals is:',metrics.accuracy_score(prediction,test_y_p))\n","\n","#code\n","print('The accuracy of the Decision Tree using Sepals is:',metrics.accuracy_score(prediction,test_y_s))"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"6ec0NUyJF4NW"},"source":["\n","\n","\n","### Question:\n","Does Using Petals over Sepals for training the data give a much better accuracy? Why?\n"]}]} \ No newline at end of file From b24c88681db685fefcfdf6c616da1badad8c0494 Mon Sep 17 00:00:00 2001 From: Mani9550 <93199718+Mani9550@users.noreply.github.com> Date: Fri, 29 Oct 2021 00:19:55 +0530 Subject: [PATCH 06/11] Updated Task-4 code as per your comments Added One pull request for each task. Please merge the PR --- KNN_Task4__203174002.ipynb | 1176 ++++++++++++++++++++++++++++++++++++ 1 file changed, 1176 insertions(+) create mode 100644 KNN_Task4__203174002.ipynb diff --git a/KNN_Task4__203174002.ipynb b/KNN_Task4__203174002.ipynb new file mode 100644 index 0000000..dbf1fd2 --- /dev/null +++ b/KNN_Task4__203174002.ipynb @@ -0,0 +1,1176 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "KNN_Task4 _203174002.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.8" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "97fb6e979f4f4455ab53206969f946e9": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "HBoxView", + "_dom_classes": [], + "_model_name": "HBoxModel", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.5.0", + "box_style": "", + "layout": "IPY_MODEL_9f89ed02ee3f40ceb2a5fd159f03c2ba", + "_model_module": "@jupyter-widgets/controls", + "children": [ + "IPY_MODEL_a975fbfaee414fd29b6319b69b5e0f8a", + "IPY_MODEL_19a7e07e3413408a96e5ce3844185182", + "IPY_MODEL_0ed469c169c14685a41571442dc7d6b5" + ] + } + }, + "9f89ed02ee3f40ceb2a5fd159f03c2ba": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "a975fbfaee414fd29b6319b69b5e0f8a": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_2663fca766e340189787c5d257e1b5bc", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "​", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": "100%", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_3199e8deb1994dfda6cd9c03dc611655" + } + }, + "19a7e07e3413408a96e5ce3844185182": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "ProgressView", + "style": "IPY_MODEL_06d256c3ed17466e9c67be388f9fa4d0", + "_dom_classes": [], + "description": "", + "_model_name": "FloatProgressModel", + "bar_style": "success", + "max": 47, + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": 47, + "_view_count": null, + "_view_module_version": "1.5.0", + "orientation": "horizontal", + "min": 0, + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_c362485c1cb14f859a5e34706ab94503" + } + }, + "0ed469c169c14685a41571442dc7d6b5": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_fc7918a519444d53ac61414b6d6e21c3", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "​", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": " 47/47 [00:01<00:00, 35.74it/s]", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_ec74538e65104e31bc1c22b6ec32dc2a" + } + }, + "2663fca766e340189787c5d257e1b5bc": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + } + }, + "3199e8deb1994dfda6cd9c03dc611655": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "06d256c3ed17466e9c67be388f9fa4d0": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "StyleView", + "_model_name": "ProgressStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "bar_color": null, + "_model_module": "@jupyter-widgets/controls" + } + }, + "c362485c1cb14f859a5e34706ab94503": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "fc7918a519444d53ac61414b6d6e21c3": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + } + }, + "ec74538e65104e31bc1c22b6ec32dc2a": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + } + } + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "cPP7BfqFSgyH" + }, + "source": [ + "# K-Nearest Neighbors Algorithm\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Zd0p7ZUpSgyL" + }, + "source": [ + "![example 1.png]()In this Jupyter Notebook we will focus on $KNN-Algorithm$. KNN is a data classification algorithm that attempts to determine what group a data point is in by looking at the data points around it.\n", + "\n", + "An algorithm, looking at one point on a grid, trying to determine if a point is in group A or B, looks at the states of the points that are near it. The range is arbitrarily determined, but the point is to take a sample of the data. If the majority of the points are in group A, then it is likely that the data point in question will be A rather than B, and vice versa.\n", + "
\n", + "\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "TyGHDf4NSgyM" + }, + "source": [ + "# Imports" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "iIEvA0xjSgyN" + }, + "source": [ + "import numpy as np\n", + "from tqdm import tqdm_notebook\n", + "import matplotlib.pyplot as plt" + ], + "execution_count": 6, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rc8ruF56SgyO" + }, + "source": [ + "# How it works?\n", + "\n", + "We have some labeled data set $X-train$, and a new set $X$ that we want to classify based on previous classifications\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rGbvEXbvSgyO" + }, + "source": [ + "## Seps" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "B-nf9G4ZSgyP" + }, + "source": [ + "### 1. Calculate distance to all neighbours\n", + "### 2. Sort neightbours (based on closest distance)\n", + "### 3. Count possibilities of each class for k nearest neighbours \n", + "### 4. The class with highest possibilty is Your prediction" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "LuWwKdFrSgyP" + }, + "source": [ + "# 1. Calculate distance to all neighbours\n", + "\n", + "Depending on the problem You should use different type of count distance method.\n", + "
\n", + "For example we can use Euclidean distance. Euclidean distance is the \"ordinary\" straight-line distance between two points in D-Dimensional space\n", + "\n", + "#### Definiton\n", + "$d(p, q) = d(q, p) = \\sqrt{(q_1 - p_1)^2 + (q_2 - p_2)^2 + \\dots + (q_D - p_D)^2} = \\sum_{d=1}^{D} (p_d - q_d)^2$\n", + "\n", + "#### Example\n", + "Distance in $R^2$\n", + "\n", + "\n", + "\n", + "$p = (4,6)$\n", + "
\n", + "$q = (1,2)$\n", + "
\n", + "$d(p, q) = \\sqrt{(1-4)^2 + (2-6)^2} =\\sqrt{9 + 16} = \\sqrt{25} = 5 $\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vlvNZqiJSgyQ" + }, + "source": [ + "## Code" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "EvmQi6nsSgyR" + }, + "source": [ + "def get_euclidean_distance(A_matrix, B_matrix):\n", + " \n", + " C=[]\n", + " for i in A_matrix:\n", + " temp=[]\n", + " for j in B_matrix:\n", + " temp.append(np.sum((i-j)**2))\n", + " C.append(temp)\n", + " \n", + " ## Use the distance formula for the matrices using numpy functions\n", + " ## C is the sum of the squares of the distances\n", + "\n", + " return np.sqrt(C)\n" + ], + "execution_count": 8, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "GABzTa_0SgyS" + }, + "source": [ + "## Example Usage" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "W6b8yBSoSgyS", + "outputId": "cdbad526-4c41-43ad-df70-ba8dee28c8be" + }, + "source": [ + "X = np.array([[1,2,3] , [-4,5,-6]])\n", + "\n", + "X_train = np.array([[0,0,0], [1,2,3], [4,5,6], [-4, 4, -6]])\n", + "\n", + "print(\"X: {} Exaples in {} Dimensional space\".format(*X.shape))\n", + "print(\"X_train: {} Exaples in {} Dimensional space\".format(*X_train.shape))\n", + "\n", + "\n", + "print()\n", + "\n", + "print(\"X:\")\n", + "print(X)\n", + "\n", + "print()\n", + "\n", + "print(\"X_train\")\n", + "print(X_train)\n" + ], + "execution_count": 9, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "X: 2 Exaples in 3 Dimensional space\n", + "X_train: 4 Exaples in 3 Dimensional space\n", + "\n", + "X:\n", + "[[ 1 2 3]\n", + " [-4 5 -6]]\n", + "\n", + "X_train\n", + "[[ 0 0 0]\n", + " [ 1 2 3]\n", + " [ 4 5 6]\n", + " [-4 4 -6]]\n" + ] + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "kB8IZcDpSgyT", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "3346c864-75da-451f-e6b6-7ced1b54988c" + }, + "source": [ + "## Initialize the distance matrix using the get_euclidean_matrix\n", + "\n", + "C = get_euclidean_distance(X, X_train)\n", + "\n", + "## Euclidean distance b/w row i of X and row j of X_train is available as C[i][j]\n", + "\n", + "\n", + "## Print Distance between first example from X and first form X_train\n", + "print(f\"Distance between first example from X and first form X_train {C[0,0]}\")" + ], + "execution_count": 10, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Distance between first example from X and first form X_train 3.7416573867739413\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vbaJfBihSgyT" + }, + "source": [ + "# 2. Sort neightbours\n", + "\n", + "In order to find best fitting class for our observations we need to find to which classes belong observation neightbours and then to sort classes based on the closest distance\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "b1VLHUj2SgyU" + }, + "source": [ + "## Code" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "na0G1o_ASgyU" + }, + "source": [ + "def get_sorted_train_labels(distance_matrix, y):\n", + " \"\"\"\n", + " Function sorts y labels, based on probabilities from distances matrix\n", + " Args:\n", + " distance_matrix (numpy.ndarray): Distance Matrix, between points from X and X_train, size: N1:N2\n", + " y (numpy.ndarray): vector of classes of X points, size: N1\n", + "\n", + " Returns:\n", + " numpy.ndarray: labels matrix sorted according to distances to nearest neightours, size N1:N2 \n", + "\n", + " \"\"\"\n", + "\n", + " labels=[[0 for i in range(distance_matrix.shape[1])] for j in range(distance_matrix.shape[0])]\n", + " for i in range(distance_matrix.shape[0]):\n", + " temp=[]\n", + " for j in range(distance_matrix.shape[1]):\n", + " temp.append([distance_matrix[i][j],y[j]])\n", + " temp.sort()\n", + " for j in range(distance_matrix.shape[1]):\n", + " labels[i][j]=temp[j][1]\n", + " return np.array(labels)\n", + "\n", + " \n" + ], + "execution_count": 11, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "U0I8eltDSgyV" + }, + "source": [ + "# 3. Count possibilities of each class for k nearest neighbours \n", + "\n", + "In order to find best class for our observation $x$ we need to calculate the probability of belonging to each class. In our case it is quite easy. We need just to count how many from k-nearest-neighbours of observation $x$ belong to each class and then devide it by k \n", + "

\n", + "$p(y=class \\space| x) = \\frac{\\sum_{1}^{k}(1 \\space if \\space N_i = class, \\space else \\space 0) }{k}$ Where $N_i$ is $i$ nearest neightbour\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "j0ZtOC38SgyV" + }, + "source": [ + "## Code" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "y2aaG2GdSgyV" + }, + "source": [ + "def get_p_y_x_using_knn(y, k):\n", + " \"\"\"\n", + " The function determines the probability distribution p (y | x)\n", + " for each of the labels for objects from the X\n", + " using the KNN classification learned on the X_train\n", + "\n", + " Args:\n", + " y (numpy.ndarray): Sorted matrix of N2 nearest neighbours labels, size N1:N2\n", + " k (int): number of nearest neighbours for KNN algorithm\n", + "\n", + " Returns: numpy.ndarray: Matrix of probabilities for N1 points (from set X) of belonging to each class,\n", + " size N1:C (where C is number of classes)\n", + " \"\"\"\n", + "\n", + " probabilities_matrix=[]\n", + " for i in y:\n", + " temp={}\n", + " for j in range(k):\n", + " if i[j] in temp.keys():\n", + " temp[i[j]]+=1/k\n", + " else:\n", + " temp[i[j]]=1/k\n", + " probabilities_matrix.append(temp)\n", + " return probabilities_matrix\n" + ], + "execution_count": 13, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ThEbAnXISgyW" + }, + "source": [ + "# 4. The class with highest possibilty is Your prediction" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_i7NTtN4SgyW" + }, + "source": [ + "At the end we combine all previous steps to get prediction" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "OzK6rY8mSgyW" + }, + "source": [ + "## Code" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "DaYqr_i6SgyW" + }, + "source": [ + "def predict(X, X_train, y_train, k, distance_function):\n", + " \"\"\"\n", + " Function returns predictions for new set X based on labels of points from X_train\n", + " Args:\n", + " X (numpy.ndarray): set of observations (points) that we want to label\n", + " X_train (numpy.ndarray): set of lalabeld bservations (points)\n", + " y_train (numpy.ndarray): labels for X_train\n", + " k (int): number of nearest neighbours for KNN algorithm\n", + "\n", + " Returns:\n", + " (numpy.ndarray): label predictions for points from set X\n", + " \"\"\"\n", + " distance_matrix=distance_function(X,X_train)\n", + " prob=get_p_y_x_using_knn(get_sorted_train_labels(distance_matrix, y_train), k)\n", + " \n", + " prediction=[]\n", + " for i in prob:\n", + " prediction.append(max(zip(i.values(), i.keys()))[1])\n", + " \n", + " return np.array(prediction)\n", + " return prediction" + ], + "execution_count": 14, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "i9kzyASWSgyX" + }, + "source": [ + "# Accuracy" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "v8bNPTPZSgyX" + }, + "source": [ + "To find how good our knn model works we should count accuracy" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "dgFCnJ14SgyX" + }, + "source": [ + "## Code" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "2ySpyThlSgyX" + }, + "source": [ + "def count_accuracy(prediction, y_true):\n", + " \"\"\"\n", + " Returns:\n", + " float: Predictions accuracy\n", + "\n", + " \"\"\"\n", + " N1 = prediction.shape[0]\n", + " \n", + " accuracy=np.sum(prediction==y_true)/len(prediction)\n", + "\n", + " return accuracy" + ], + "execution_count": 18, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "b5g7YFY2SgyX" + }, + "source": [ + "## Example usage" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "uLqCqmJNSgyY", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "ba253ddc-ec5c-4461-bd70-d191fa1e006c" + }, + "source": [ + "y_true = np.array([[0, 2]])\n", + "\n", + "predicton = predict(X, X_train, [2,2,0,2], 3, get_euclidean_distance)\n", + "\n", + "\n", + "print(\"True classes:{}, accuracy {}%\".format(y_true, count_accuracy(predicton, y_true) * 100))" + ], + "execution_count": 19, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "True classes:[[0 2]], accuracy 50.0%\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "--WUpIcxSgyY" + }, + "source": [ + "# Find best k" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "itkcD0DlSgyY" + }, + "source": [ + "Best k parameter is that one for which we have highest accuracy" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "7GYEUBnnSgyY" + }, + "source": [ + "## Code" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "Q6OhNBOoSgyY" + }, + "source": [ + "def select_knn_model(X_validation, y_validation, X_train, y_train, k_values, distance_function):\n", + " \"\"\"\n", + " Function returns k parameter that best fit Xval points\n", + " Args:\n", + " Xval (numpy.ndarray): set of Validation Data, size N1:D\n", + " Xtrain (numpy.ndarray): set of Training Data, size N2:D\n", + " yval (numpy.ndarray): set of labels for Validation data, size N1:1\n", + " ytrain (numpy.ndarray): set of labels for Training Data, size N2:1\n", + " k_values (list): list of int values of k parameter that should be checked\n", + "\n", + " Returns:\n", + " int: k paprameter that best fit validation set\n", + " \"\"\"\n", + "\n", + " accuracies = []\n", + "\n", + " for k in tqdm_notebook(k_values):\n", + " prediction = predict(X_validation, X_train, y_train, k, distance_function)\n", + "\n", + " accuracy = count_accuracy(prediction, y_validation)\n", + " accuracies.append(accuracy)\n", + "\n", + " best_k = k_values[accuracies.index(max(accuracies))]\n", + "\n", + " return best_k, accuracies\n" + ], + "execution_count": 20, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nGtIjD0WSgyY" + }, + "source": [ + "# Real World Example - Iris Dataset" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-o6MHMtKSgyZ" + }, + "source": [ + "\n", + "\n", + "\n", + "\n", + "This is perhaps the best known database to be found in the pattern recognition literature. The data set contains 3 classes of 50 instances each, where each class refers to a type of iris plant. One class is linearly separable from the other 2; the latter are NOT linearly separable from each other. \n", + "\n", + "Each example contains 4 attributes\n", + "1. sepal length in cm \n", + "2. sepal width in cm \n", + "3. petal length in cm \n", + "4. petal width in cm \n", + "\n", + "Predicted attribute: class of iris plant. \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "SY8oOngQSgyZ", + "outputId": "0fd9baa5-e665-4de1-ab17-d986fe0ece4b" + }, + "source": [ + "from sklearn import datasets\n", + "import matplotlib.pyplot as plt\n", + "\n", + "iris = datasets.load_iris()\n", + "\n", + "iris_X = iris.data\n", + "iris_y = iris.target\n", + "\n", + "print(\"Iris: {} examples in {} dimensional space\".format(*iris_X.shape))\n", + "print(\"First example in dataset :\\n Speal lenght: {}cm \\n Speal width: {}cm \\n Petal length: {}cm \\n Petal width: {}cm\".format(*iris_X[0]))\n", + "\n", + "print(\"Avalible classes\", np.unique(iris_y))" + ], + "execution_count": 21, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Iris: 150 examples in 4 dimensional space\n", + "First example in dataset :\n", + " Speal lenght: 5.1cm \n", + " Speal width: 3.5cm \n", + " Petal length: 1.4cm \n", + " Petal width: 0.2cm\n", + "Avalible classes [0 1 2]\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-IlKSX7hSgyZ" + }, + "source": [ + "## Prepare Data\n", + "\n", + "In our data set we have 150 examples (50 examples of each class), we have to divide it into 3 datasets.\n", + "1. Training data set, 90 examples. It will be used to find k - nearest neightbours\n", + "2. Validation data set, 30 examples. It will be used to find best k parameter, the one for which accuracy is highest\n", + "3. Test data set, 30 examples. It will be used to check how good our model performs\n", + "\n", + "Data has to be shuffled (mixed in random order), because originally it is stored 50 examples of class 0, 50 of 1 and 50 of 2.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "RA1Q7kCPSgyZ" + }, + "source": [ + "from sklearn.utils import shuffle\n", + "\n", + "iris_X, iris_y = shuffle(iris_X, iris_y, random_state=134)\n", + "\n", + "\n", + "test_size = 30\n", + "validation_size = 30\n", + "training_size = 90\n", + "\n", + "## Initialize X_test\n", + "## Initialize X_validation \n", + "## Initialize X_train \n", + "X_test=iris_X[:test_size]\n", + "X_validation=iris_X[test_size:test_size+validation_size]\n", + "X_train=iris_X[test_size+validation_size:]\n", + "\n", + "y_test=iris_y[:test_size]\n", + "y_validation=iris_y[test_size:test_size+validation_size]\n", + "y_train=iris_y[test_size+validation_size:]\n", + "## Initialize y_test\n", + "## Initialize y_validation\n", + "## Initialize y_train" + ], + "execution_count": 22, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "r9xJVLzrSgyZ" + }, + "source": [ + "## Find best k parameter" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 345, + "referenced_widgets": [ + "97fb6e979f4f4455ab53206969f946e9", + "9f89ed02ee3f40ceb2a5fd159f03c2ba", + "a975fbfaee414fd29b6319b69b5e0f8a", + "19a7e07e3413408a96e5ce3844185182", + "0ed469c169c14685a41571442dc7d6b5", + "2663fca766e340189787c5d257e1b5bc", + "3199e8deb1994dfda6cd9c03dc611655", + "06d256c3ed17466e9c67be388f9fa4d0", + "c362485c1cb14f859a5e34706ab94503", + "fc7918a519444d53ac61414b6d6e21c3", + "ec74538e65104e31bc1c22b6ec32dc2a" + ] + }, + "id": "hbvZBVNBSgya", + "outputId": "6318331d-bb76-42af-c5d8-ca9f25ae115f" + }, + "source": [ + "k_values = [i for i in range(3,50)]\n", + "\n", + "best_k, accuracies = select_knn_model(X_validation, y_validation, X_train, y_train, k_values, distance_function=get_euclidean_distance)\n", + "plt.plot(k_values,accuracies)\n", + "plt.xlabel(\"K\")\n", + "plt.ylabel(\"Accuracy\")\n", + "plt.grid(True)\n", + "plt.show()\n", + "## Plot accuracy vs k values graph" + ], + "execution_count": 23, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:17: TqdmDeprecationWarning: This function will be removed in tqdm==5.0.0\n", + "Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`\n" + ] + }, + { + "output_type": "display_data", + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "97fb6e979f4f4455ab53206969f946e9", + "version_minor": 0, + "version_major": 2 + }, + "text/plain": [ + " 0%| | 0/47 [00:00" + ] + }, + "metadata": { + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "BjQBDWJMSgya" + }, + "source": [ + "## Count accuracy for training set" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "_f-J5sSESgya", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "f22ab924-75bb-42a8-abe6-6802b16dd0bf" + }, + "source": [ + "prediction = predict(X_test, X_train, y_train, best_k, get_euclidean_distance)\n", + "\n", + "## Calculate Best accuracy using the best k value\n", + "print(\"Accuracy for best k=\",best_k,\":\", 100*count_accuracy(prediction,y_test),\"%\")\n" + ], + "execution_count": 24, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Accuracy for best k= 14 : 93.33333333333333 %\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "72O5eXbCSgyc" + }, + "source": [ + "# Sources\n", + "\n", + "https://en.wikipedia.org/wiki/K-nearest_neighbors_algorithm - first visualisation image\n", + "\n", + "https://en.wikipedia.org/wiki/Euclidean_distance - euclidean distance visualisation\n", + "\n", + "https://rajritvikblog.wordpress.com/2017/06/29/iris-dataset-analysis-python/ - first iris image\n", + "\n", + "https://rpubs.com/wjholst/322258 - second iris image\n", + "\n" + ] + } + ] +} \ No newline at end of file From 80d83a1fa1f3ad8696b2f2f21d76557c8dc4f115 Mon Sep 17 00:00:00 2001 From: Mani9550 <93199718+Mani9550@users.noreply.github.com> Date: Fri, 29 Oct 2021 00:21:36 +0530 Subject: [PATCH 07/11] Updated the code as per your comments Added one pull request for one task. please merge the PR --- Classification_Task3_203174002 (1).ipynb | 1329 ++++++++++++++++++++++ 1 file changed, 1329 insertions(+) create mode 100644 Classification_Task3_203174002 (1).ipynb diff --git a/Classification_Task3_203174002 (1).ipynb b/Classification_Task3_203174002 (1).ipynb new file mode 100644 index 0000000..926d289 --- /dev/null +++ b/Classification_Task3_203174002 (1).ipynb @@ -0,0 +1,1329 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "Classification_Task3_203174002.ipynb", + "provenance": [], + "collapsed_sections": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.8" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "7C5rAxwPGDQf" + }, + "source": [ + "# Importing useful libraries" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "8qvrslgsF4Mn" + }, + "source": [ + "import numpy as np\n", + "import pandas as pd \n", + "from pandas import Series, DataFrame\n", + "\n", + "import seaborn as sns\n", + "import matplotlib.pyplot as plt\n", + "%matplotlib inline" + ], + "execution_count": 3, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6aYOLI2BHF6m" + }, + "source": [ + "## Loading the dataset.\n", + "The dataset can be found [here](https://github.com/shreedharmalpani/Intro-To-ML-Hello-FOSS/blob/main/iris.csv)" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "_hccks2pF4Mq" + }, + "source": [ + "df = pd.read_csv(\"iris.csv\")" + ], + "execution_count": 4, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "um0L09IOF4Ms", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 205 + }, + "outputId": "b1cd9799-9cae-41f4-bcc3-ce0bb8679482" + }, + "source": [ + "df.head()" + ], + "execution_count": 5, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_lengthsepal_widthpetal_lengthpetal_widthspecies
05.13.51.40.2setosa
14.93.01.40.2setosa
24.73.21.30.2setosa
34.63.11.50.2setosa
45.03.61.40.2setosa
\n", + "
" + ], + "text/plain": [ + " sepal_length sepal_width petal_length petal_width species\n", + "0 5.1 3.5 1.4 0.2 setosa\n", + "1 4.9 3.0 1.4 0.2 setosa\n", + "2 4.7 3.2 1.3 0.2 setosa\n", + "3 4.6 3.1 1.5 0.2 setosa\n", + "4 5.0 3.6 1.4 0.2 setosa" + ] + }, + "metadata": {}, + "execution_count": 5 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "cbx8gP4zF4Mt", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "b449df47-a2ce-4924-e5d8-19476d34d618" + }, + "source": [ + "df.info() " + ], + "execution_count": 6, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "RangeIndex: 150 entries, 0 to 149\n", + "Data columns (total 5 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 sepal_length 150 non-null float64\n", + " 1 sepal_width 150 non-null float64\n", + " 2 petal_length 150 non-null float64\n", + " 3 petal_width 150 non-null float64\n", + " 4 species 150 non-null object \n", + "dtypes: float64(4), object(1)\n", + "memory usage: 6.0+ KB\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vjH1pAqoJna2" + }, + "source": [ + "# Data Cleaning & Data Visualization" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "PNfSyZF1F4Mu" + }, + "source": [ + "### 1) Remove unneeded columns\n", + "### 2) Check for duplicate rows \n", + "### 2) Check for rows with missing values\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "collapsed": true, + "id": "HKFsVhubF4Mx" + }, + "source": [ + "df.isna().sum()" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "Rm7rlDcPR0aI", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 422 + }, + "outputId": "09eb2b3d-e418-4c73-ff44-0cc68524710a" + }, + "source": [ + "df.isnull()" + ], + "execution_count": 8, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_lengthsepal_widthpetal_lengthpetal_widthspecies
0FalseFalseFalseFalseFalse
1FalseFalseFalseFalseFalse
2FalseFalseFalseFalseFalse
3FalseFalseFalseFalseFalse
4FalseFalseFalseFalseFalse
..................
145FalseFalseFalseFalseFalse
146FalseFalseFalseFalseFalse
147FalseFalseFalseFalseFalse
148FalseFalseFalseFalseFalse
149FalseFalseFalseFalseFalse
\n", + "

150 rows × 5 columns

\n", + "
" + ], + "text/plain": [ + " sepal_length sepal_width petal_length petal_width species\n", + "0 False False False False False\n", + "1 False False False False False\n", + "2 False False False False False\n", + "3 False False False False False\n", + "4 False False False False False\n", + ".. ... ... ... ... ...\n", + "145 False False False False False\n", + "146 False False False False False\n", + "147 False False False False False\n", + "148 False False False False False\n", + "149 False False False False False\n", + "\n", + "[150 rows x 5 columns]" + ] + }, + "metadata": {}, + "execution_count": 8 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "L1cnZS5XF4M2" + }, + "source": [ + "### EDA (Exploratory Data Analysis) with Iris" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "DjsnhAraF4M7", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 458 + }, + "outputId": "8b95cd96-2bff-4a55-919e-4eb5d4806892" + }, + "source": [ + "##Displaying a scatter plot to show the distribution of Sepal Length vs width the dataset\n", + "\n", + "fig = df[df.species == 'setosa'].plot(kind='scatter', x='petal_length', y='petal_width', color='orange', label='Setosa')\n", + "df[df.species == 'versicolor'].plot(kind='scatter', x='petal_length', y='petal_width', color='blue', label='Versicolor', ax=fig)\n", + "df[df.species == 'virginica'].plot(kind='scatter', x='petal_length', y='petal_width', color='green', label='Virginica', ax=fig)\n", + "\n", + "fig.set_xlabel('Petal Length')\n", + "fig.set_ylabel('Petal Width')\n", + "fig.set_title('Petal Length Vs Width')\n", + "\n", + "fig=plt.gcf()\n", + "fig.set_size_inches(10, 7)\n", + "plt.show()" + ], + "execution_count": 9, + "outputs": [ + { + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "O_eYByFvF4M5", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 458 + }, + "outputId": "b623dd49-eb52-4d54-e6b9-abf1d3132e0b" + }, + "source": [ + "#Display a scatter plot to show the distribution of Sepal Length vs width the dataset (Like previous Petal lenght vs width scatter plot)\n", + "fig=sns.scatterplot(data=df,x=\"sepal_length\",y=\"sepal_width\",hue=\"species\")\n", + "\n", + "\n", + "#code\n", + "\n", + "fig.set_xlabel('Sepal Length')\n", + "fig.set_ylabel('Sepal Width')\n", + "fig.set_title('Sepal Length Vs Width')\n", + "\n", + "\n", + "fig=plt.gcf()\n", + "fig.set_size_inches(10, 7)\n", + "plt.show()\n", + "#example plot" + ], + "execution_count": 10, + "outputs": [ + { + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "EIh_yKQAF4M6", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 242 + }, + "outputId": "8bdd4cb6-6d9f-4bf7-d2f3-55e2bf5469b2" + }, + "source": [ + "#plot the FacetGrid plot using the seaborn library\n", + "\n", + "fg = sns.FacetGrid(df, col = \"species\")\n", + "fg.map(plt.scatter, \"sepal_length\", \"sepal_width\")\n", + "fg.add_legend()\n", + "\n" + ], + "execution_count": 11, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 11 + }, + { + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "yadZQxoKF4M8", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 390 + }, + "outputId": "168425dc-b925-409f-cb41-8499850aa047" + }, + "source": [ + "#Plot the distritbution of the features using histgram\n", + "plt.subplot(2, 2, 1)\n", + "sns.histplot(data=df,x=\"petal_length\")\n", + "plt.xlabel(\"\")\n", + "plt.ylabel(\"\")\n", + "plt.grid(True)\n", + "plt.title(\"Petal length\")\n", + "\n", + "plt.subplot(2, 2, 2)\n", + "sns.histplot(data=df,x=\"petal_width\")\n", + "plt.xlabel(\"\")\n", + "plt.ylabel(\"\")\n", + "plt.grid(True)\n", + "plt.title(\"Petal width\")\n", + "\n", + "plt.subplot(2, 2, 3)\n", + "sns.histplot(data=df,x=\"sepal_length\")\n", + "plt.xlabel(\"\")\n", + "plt.ylabel(\"\")\n", + "plt.grid(True)\n", + "plt.title(\"Sepal length\")\n", + "\n", + "plt.subplot(2, 2, 4)\n", + "sns.histplot(data=df,x=\"sepal_width\")\n", + "plt.xlabel(\"\")\n", + "plt.ylabel(\"\")\n", + "plt.grid(True)\n", + "plt.title(\"Sepal width\")\n", + "\n", + "#fig=sns.histplot(data=df,x=\"petal_length\")\n", + "fig=plt.gcf()\n", + "fig.set_size_inches(12,6)\n", + "plt.show()" + ], + "execution_count": 12, + "outputs": [ + { + "output_type": "display_data", + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAsUAAAF1CAYAAAAA6ZfwAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nO3de5hkZX3o+++PGRQDCDPCaYfpGcdENgnxRNSBICSmR4SgMWASIvgkBhPcM8mRrZ44UeLeJ9HsXHDvHqNu8yijGMdIYAhKJEQT2ISGeDAIjchFSEAEZ2Bg1OHWmiMO/M4fazUp277UfVXV+n6ep56uWrf399aqevtXa71rvZGZSJIkSXW2T9UBSJIkSVUzKZYkSVLtmRRLkiSp9kyKJUmSVHsmxZIkSao9k2JJkiTVnkmxhkJEvDsiPrXAvImI2NnvmMqyF4xLktSddjIifi0irlhk/lREvGmR+Z+IiD/uJAaNPpNidSQi7o2If4+ImYh4qGx4DmhivUUbsEFUZfItSd02TO13Zl6QmSc1s2xEvDEivtDrmDR6TIrVDb+YmQcALwHWA/+t4ngkSc2x/ZZKJsXqmsy8H/g88EKAiDg2Iq6LiEci4isRMVFO/xPgZ4EPlUcoPlRO/0BE7IiIxyJiOiJ+tp04IuKwiPh0RHwzIr4eEW9pmPfuiLg4Ij4ZEY9HxO0Rsb5h/ksi4svlvL+JiO0R8ccRsX9Zt8PKmGci4rBytWcstD1JGgZVtd8RcU1E/Er5/PiIyIj4hfL1CRFxc/n8B47+RsSJEXFnRDxaxhDl9J8APgK8rIzvkYbiVkTE35dt9fUR8WMdvGUaQSbF6pqIWAO8GvhyRKwG/h74Y2AlsBn4dEQcmpn/Ffhn4OzMPCAzzy43cQNwVLn8XwN/ExH7tRjDPsDfAV8BVgMnAG+LiJ9vWOwU4CLgYOAyYLZRfwZwKfCJMoYLgV8CyMzvAK8CHihjPiAzH1hse5I0LCpsv68BJsrnPwfcA7y84fU188R6CPAZiqPahwBfA44HyMw7gN8GvljGd3DDqmcA7wFWAHcDf9JEfKoRk2J1w9+Wv8a/QNGA/Snw68DnMvNzmflUZl4J3EjR6M4rMz+Vmd/OzL2ZuQV4JnBEi7EcDRyamX+UmU9k5j3ARykaw1lfKON6Evgr4EXl9GOB5cAHM/P7mfkZ4EtNlLnQ9iRp0FXdfl9DkfxCkQz/WcPreZPiMo7bM/OSzPw+8H7gwSbKujQzv5SZe4ELKJJ46WnLqw5AI+G1mfm/GydExPOAX42IX2yYvC9w9UIbiYjNwFnAYUACz6Y4CtCK51F0cWg8ZbaM4sjGrMbG87vAfhGxvCz3/szMhvk7mihz3u2VDa8kDbKq2+8vAv8pIsYoktRTgPeUR4OPAa6dZ53DaGibMzMjop22esmLClUvJsXqlR3AX2Xmf15gfmPiSdn/7B0U3R1uz8ynIuJhyn5iLZb79cw8vNWAgV3A6oiIhsR4DcWpuR+KWZJGVN/a78z8bkRMA28FbsvMJyLiOuB3ga9l5rfmWW0XRds8W340vp4bn9Qsu0+oVz4F/GJE/HxELIuI/cpbmo2X8x8CfrRh+QOBvcA3geUR8QcURxpa9SXg8Yh4Z0Q8qyz7hRFxdBPrfhF4Ejg7IpZHxKkURypmPQQ8JyIOaiMuSRoW/W6/rwHO5j+6SkzNeT3X3wM/GRG/XJ7lewvw3Ib5DwHj5XUiUtNMitUTmbkDOBV4F0VDuQP4Pf7jM/cB4LSIeDgiPgj8I/APwL8B9wH/H811XZhb7pPAayhOw30d+BbwMWDJRDYznwB+meIU4CMU/eouB75Xzr+T4uK7e8orsg9baFuSNKwqaL+voUisr13g9dz4vgX8KnAu8G3gcOD/bVjkn4DbgQcjYr4jzdK84ge7T0pqFBHXAx/JzL+sOhZJktQ7HimWGkTEz0XEc8vuE2cCP0VxBESSJI0wL7STftARwMXA/hT3yzwtM3dVG5IkSeo1u09IkiSp9uw+IUmSpNozKZYkSVLt9bVP8SGHHJLr1q1reb3vfOc77L///t0PqA+Mvf+GNW4w9qo0E/v09PS3MvPQPoU0EGbb7GHet62wnqOjDnUE67mUVtvtvibF69at48Ybb2x5vampKSYmJrofUB8Ye/8Na9xg7FVpJvaIuK8/0QyO2TZ7mPdtK6zn6KhDHcF6LqXVdtvuE5IkSao9k2JJkiTVnkmxJEmSas+kWJIkSbVnUixJkqTaG4qk+JZbbyUiKnmsXrO26upLUssiYllEfDkiLi9fPz8iro+IuyNie0Q8o1dlr16z1jZb0tDp6y3Z2vX9J57g9POuq6Ts7ZuOq6RcSerQW4E7gGeXr98L/HlmXhQRHwHOAj7ci4If2LnDNlvS0BmKI8WSpOZFxDjwC8DHytcBvAK4pFxkG/DaaqKTpME0FEeKJUkteT/wDuDA8vVzgEcyc2/5eiewer4VI2IjsBFgbGyMqakpZmZmmJqaarrwyclJVqzc02bonTl6crKlWBu1Ws9hVYd61qGOYD27zaRYkkZIRLwG2J2Z0xEx0er6mbkV2Aqwfv36nJiYaHk0qQ0bNlTXfWLzZjKzrXUdHWx01KGOYD27zaRYkkbL8cApEfFqYD+KPsUfAA6OiOXl0eJx4P4KY5SkgWOfYkkaIZn5+5k5npnrgDOAf8rMXwOuBk4rFzsT+GxFIUrSQDIplqR6eCfwuxFxN0Uf4/MrjkeSBordJyRpRGXmFDBVPr8HOKbKeCRpkDV9pLjKG8FLkiRJvdRK94nZG8HPmr0R/AuAhyluBC9JkiQNnaaSYm8EL0mSpFHW7JHi2RvBP1W+bvpG8JIkSdKgW/JCu05vBD/f6EitGh8fZ8Wy4RsdCYZ7tJlhjX1Y4wZjr8owxy5J6o5m7j7R0Y3g5xsdqVVbtmzhhgOPa3m9buhkdCQY7tFmhjX2YY0bjL0qwxy7JKk7luw+4Y3gJUmSNOo6GbzDG8FLkiRpJLQ0eIc3gpckSdIocphnSZIk1Z5JsSRJkmrPpFiSJEm1Z1IsSZKk2jMpliRJUu2ZFEuSJKn2TIolSZJUeybFkiRJqj2TYkmSJNWeSbEkSZJqz6RYkiRJtWdSLEmSpNozKZYkSVLtmRRLkiSp9kyKJUmSVHsmxZIkSao9k2JJGiERsV9EfCkivhIRt0fEe8rpz4+I6yPi7ojYHhHPqDpWSRokJsWSNFq+B7wiM18EHAWcHBHHAu8F/jwzXwA8DJxVYYySNHBMiiVphGRhpny5b/lI4BXAJeX0bcBrKwhPkgaWSbEkjZiIWBYRNwO7gSuBrwGPZObecpGdwOqq4pOkQbS86gAkSd2VmU8CR0XEwcClwI83u25EbAQ2AoyNjTE1NcXMzAxTU1NNlz85OcmKlXtaC7pLjp6cbCnWRq3Wc1jVoZ51qCNYz24zKZakEZWZj0TE1cDLgIMjYnl5tHgcuH+BdbYCWwHWr1+fExMTTE1NMTEx0XS5GzZs4PTzrus4/nZs37yZzGxr3VbrOazqUM861BGsZ7fZfUKSRkhEHFoeISYingWcCNwBXA2cVi52JvDZaiKUpMG0ZFLs7X1UF6vXrCUi+v5YvWZt1VXXaFkFXB0RtwA3AFdm5uXAO4HfjYi7gecA51cYoyQNnGa6T8ze3mcmIvYFvhARnwd+l+L2PhdFxEcobu/z4R7GKvXUAzt3VHLKd/um4/pepkZXZt4CvHie6fcAx/Q/IkkaDkseKfb2PpIkSRp1TfUp9vY+kiRJGmVN3X2i27f3adX4+Dgrlg3f7X1guG+XMqyxtxt3VbeRavyMDet7DsYuSRpuLd2SrVu392nVli1buOHAavpddnJ7Hxju26UMa+ztxl3VbaQaP2PD+p6DsUuShlszd5/w9j6SJEkaac0cKV4FbIuIZRRJ9MWZeXlEfBW4KCL+GPgy3t5HkiRJQ2rJpNjb+0iSJGnUOaKdJEmSas+kWJIktaWqkUAdDVS90NLdJyRJkmZVNRIoOBqous8jxZIkSao9k2JJkiTVnkmxJEmSas+kWJIkSbVnUixJkqTaMymWJElS7ZkUS5IkqfZMiiVJklR7JsWSJEmqPZNiSZIk1Z5JsSRJkmrPpFiSJEm1Z1IsSZKk2jMpliRJUu2ZFEuSJKn2TIolSZJUeybFkjRCImJNRFwdEV+NiNsj4q3l9JURcWVE3FX+XVF1rJI0SEyKJWm07AXenplHAscCb46II4FzgKsy83DgqvK1JKlkUixJIyQzd2XmTeXzx4E7gNXAqcC2crFtwGuriVCSBtPyqgOQJPVGRKwDXgxcD4xl5q5y1oPA2ALrbAQ2AoyNjTE1NcXMzAxTU1NNlzs5OcmKlXvaD7wDR09OthRro1brOay6Wc9B3dfuy9HSr3oumRRHxBrgkxQNaAJbM/MDEbES2A6sA+4FXpeZD/cuVElSsyLiAODTwNsy87GIeHpeZmZE5HzrZeZWYCvA+vXrc2JigqmpKSYmJpoue8OGDZx+3nWdhN+27Zs3kzlv1ZbUaj2HVTfrOaj72n05WvpVz2a6T9g/TeqlfZYTEUQE09PTTz/vx2P1mrVV1149EBH7UiTEF2TmZ8rJD0XEqnL+KmB3VfFJ0iBa8khxebptV/n88Yho7J82US62DZgC3tmTKKVR9tTep4+0rFi5p69HXbZvOq5vZak/ojgkfD5wR2a+r2HWZcCZwLnl389WEJ4kDayW+hS30z9NktRXxwNvAG6NiJvLae+iSIYvjoizgPuA11UUnyQNpKaT4nb7p8130UarxsfHWbFs8DryN2OYO8EPa+ztxl3VBSNHN5R70LK9nNTHGDr9fDca1s8LDHfsc2XmF4BYYPYJ/YxFkoZJU0nxYv3TMnPXYv3T5rtoo1VbtmzhhgOrOc3byUUbMNyd4Ic19nbjruqCke2bNz9d7kkr93DFnpV9LbuTz3ejYf28wHDHLknqjiUvtGuifxrYP02SJPVTw0XKcx+9vGjZC5RHVzNHiu2fJo2q8p9KN0xOTrJhw4amlz9sfA337/hGV8qWVEMNFynP1cuLlr1AeXQ1c/cJ+6dJo2qRfyqtavWfkP9YJEmDxBHtJEmSmtXFM2yt8gxbb5kUS5IkNauLZ9ha5Rm23mpmRDtJkiRppJkUS5IkqfZMijVQVq9Z2/Htctq9FY8kSaov+xRroDywc0fHfbXavRWPfbUkSaovjxRLkiSp9kyKJUmShsGcUfx6OXJfHUfys/uEJEnSMJhzO7hejtw3Vx26GHqkWJIkSbVnUixJkqTaMymWJEnS4ub0Z+7n45Zbb+1LFe1TLEmSpMVVOLz19x/vT7keKZYkSVLtmRRLkiSp9kyKJUmSVHsmxZIkSao9k2JJkiTVnkmxJEmSas+kWJIkSbVnUixJkqTaMymWpBETER+PiN0RcVvDtJURcWVE3FX+XVFljJI0aEyKJWn0fAI4ec60c4CrMvNw4KrytSSptGRS7BEHSRoumXktsGfO5FOBbeXzbcBr+xqUJA245U0s8wngQ8AnG6bNHnE4NyLOKV+/s/vhSZK6ZCwzd5XPHwTG5lsoIjYCGwHGxsaYmppiZmaGqamppguanJxkxcq5OXl/HD052VKsjVqt56C45dZb+f4TTzS9/Pj4OFu2bOlK2VXv64XKPmjZXk7qUVyLldtrc8vuZT2XKruf9j9ovC/fzSWT4sy8NiLWzZl8KjBRPt8GTGFSLElDITMzInKBeVuBrQDr16/PiYkJpqammJiYaHr7GzZs4PTzrutKrK3avnkzmfNWbUmt1nNQtPp+r1i2hxsOPK4rZW/fdFyl+3qhsk9auYcr9qzse7m9NrfsXtZzqbL76ejH7+T000/veTnt9ilu6oiDJGlgPBQRqwDKv7srjkeSBkoz3ScWtdgRB5j/VFyrxsfHWbFs+E7FwfCejoNqYu/Gqbh2TydVdWqosdx+ngqbW3anWo290+9WNw3z97QFlwFnAueWfz9bbTiSNFjaTYofiohVmblrqSMO852Ka9WWLVu6dqqnVZ2cioPhPR0H1cTejdOu7Z5OqurUUGO5/TwVNrfsTrUae6ffrW4a5u/pfCLiQooubodExE7gDymS4Ysj4izgPuB11UXYQ/ssJyLaWnVycpINGza0XfRh42u4f8c32l5fUrXaTYo94iBJAyozX7/ArBP6GkgVntrb9g+9FSv3dPQjcfumag7eSOqOZm7JdiHwReCIiNhZHmU4FzgxIu4CXlm+liRJkoZSM3efqO8RB0mSJNWCI9pJkiSp9kyKJUmSVHsmxZIkSao9k2JJkiTVnkmxJEmSas+kWJIkSbVnUixJkqTaMymWJElS7ZkUL2Wf5URE24/p6em21lu9Zm1lVV69Zm1HsXfyUI10+N1q91Hld0uSNLiWHNGu9p7ay+nnXdf26itW7mlr/e2bjmu7zE49sHMHp593Xduxd6LKeqvPOvxutcvPmCRpPh4pliRJUu2ZFEuSJKn2TIolSZJUeybFkiRJqj2TYkmSJNWeSbEkSZJqz6RYkiRJtWdSLEmSpNozKZYkSVLtmRRLkiSp9kyKJUmSVHsmxZIkSao9k2JJkiTVXkdJcUScHBH/GhF3R8Q53QpKktQbttuSNL+2k+KIWAb8BfAq4Ejg9RFxZLcCkyR1l+22JC2skyPFxwB3Z+Y9mfkEcBFwanfCkiT1gO22JC2gk6R4NbCj4fXOcpokaTDZbkvSAiIz21sx4jTg5Mx8U/n6DcBPZ+bZc5bbCGwsXx4B/GsbxR0CfKutQKtn7P03rHGDsVelmdifl5mH9iOYXmmm3V6gzR7mfdsK6zk66lBHsJ5LaandXt5GAbPuB9Y0vB4vp/2AzNwKbO2gHCLixsxc38k2qmLs/TescYOxV2WYY2/Rku32fG12Xd4f6zk66lBHsJ7d1kn3iRuAwyPi+RHxDOAM4LLuhCVJ6gHbbUlaQNtHijNzb0ScDfwjsAz4eGbe3rXIJEldZbstSQvrpPsEmfk54HNdimUxHXW/qJix99+wxg3GXpVhjr0lbbbbdXl/rOfoqEMdwXp2VdsX2kmSJEmjwmGeJUmSVHsDnRRHxMcjYndE3FZ1LK2IiDURcXVEfDUibo+It1YdU7MiYr+I+FJEfKWM/T1Vx9SqiFgWEV+OiMurjqUVEXFvRNwaETdHxI1Vx9OKiDg4Ii6JiDsj4o6IeFnVMTUjIo4o3+/Zx2MR8baq46rKUkNAR8QzI2J7Of/6iFjX/yg710Q93xgR32z4XLypijg7sdT/zyh8sHwPbomIl/Q7xm5oop4TEfFow778g37H2Klmcoph359N1rH3+zIzB/YBvBx4CXBb1bG0GPcq4CXl8wOBfwOOrDquJmMP4IDy+b7A9cCxVcfVYh1+F/hr4PKqY2kx7nuBQ6qOo83YtwFvKp8/Azi46pjaqMMy4EGK+1pWHk9F9f8a8KPlPvzK3HYL+L+Aj5TPzwC2Vx13j+r5RuBDVcfaYT0X/f8JvBr4fNnmHwtcX3XMParnxLD9L5inDkvmFMO+P5usY8/35UAfKc7Ma4E9VcfRqszclZk3lc8fB+5gSEaNysJM+XLf8jE0Hc8jYhz4BeBjVcdSFxFxEMU/pvMBMvOJzHyk2qjacgLwtcy8r+pAKtLMENCnUvwAArgEOCEioo8xdkMthrpu4v/nqcAnyzb/X4CDI2JVf6LrnmHNE1rRZE4x1PtzUPKmgU6KR0F5evHFFEdch0LZ/eBmYDdwZWYOTezA+4F3AE9VHUgbErgiIqbLUcWGxfOBbwJ/WXZb+VhE7F91UG04A7iw6iAq1MwQ0E8vk5l7gUeB5/Qluu5pdqjrXylPQ18SEWvmmT/s6jTk98vKLoGfj4ifrDqYTiySU4zM/lwib+rpvjQp7qGIOAD4NPC2zHys6nialZlPZuZRFKNdHRMRL6w6pmZExGuA3Zk5XXUsbfqZzHwJ8CrgzRHx8qoDatJyitOXH87MFwPfAX6on+YgKweyOAX4m6pj0UD4O2BdZv4UcCX/cXRcw+cmii5RLwL+F/C3FcfTtmHNKVqxRB17vi9NinskIval2LEXZOZnqo6nHeUp8KuBk6uOpUnHA6dExL0Up0RfERGfqjak5mXm/eXf3cClFKd5h8FOYGfDGYVLKJLkYfIq4KbMfKjqQCq05BDQjctExHLgIODbfYmue5oZ6vrbmfm98uXHgJf2KbZ+amZ/D73MfGy2S2AW9+jeNyIOqTisljWRUwz9/lyqjv3YlybFPVD2sTsfuCMz31d1PK2IiEMj4uDy+bOAE4E7q42qOZn5+5k5npnrKE6F/1Nm/nrFYTUlIvaPiANnnwMnAUNx15XMfBDYERFHlJNOAL5aYUjteD317joBzQ0BfRlwZvn8NIrv2NBcc1Basp5z+mKeQtG/cdRcBvxGedeCY4FHM3NX1UF1W0Q8d7bfe0QcQ5H3DNUPuSZziqHen83UsR/7sqMR7XotIi6kuNrwkIjYCfxhZp5fbVRNOR54A3Br2TcX4F3lL5tBtwrYFhHLKD5wF2fmwNzarDwK/KbM/N/zzPsExRHL/zZIcTVpDLi0/L4vB/46M/+hS+H1w38BLiiTjHuA36w4nqaVP0JOBDZVHUuVcoEhoCPij4AbM/Myin9afxURd1Nc3HRGdRG3p8l6viUiTgH2UtTzjZUF3Kb5/n9SXDhNZn6EYlTDVwN3A9+lD9/ZLrSTRMTngYsyc1v5urGeu4DnAm8GnirreRrwOxGxF/h3iqOpP0ZR72Exb04BrIXq9meXNVPHufvyjG7/KHdEO7UlIn4G+B/ATwJPUhxJeVtm3tDjcu+l4qR4vnK60dhLUj8MYvvdpe2vA74O7Fv++JkCPpWZH2tYJoHDM3OYkmL1yUAfKdZgiohnA5cDvwNcTHGvz58FvrfYepKkatl+SwuzT7Ha8Z8AMvPC8k4V/56ZV2TmLbMLRMRvRTGy2cMR8Y8R8byGeRkRb4mIeyLiWxHxPyNin3Lej0XEP0XEt8t5F8z2cW5VRLwmilFvHomI6yLipxrm3RsRm8tbLj0axShd+zXMf0dE7IqIByLiTWXML4jiVmm/BrwjImYi4u8aijxqoe1J0oAYuPa77N/9SMN2PhoRuxvm/1WUI01GxFSUIwxGcfvQybKseyjuUT+7zp9QJPsfKtvqDzUU+cqIuKss8y9m+6lKJsVqx78BT0bEtoh4VUSsaJwZEadS9AX6ZeBQ4J/54YuYfglYT3GXglOB35pdHfgz4DDgJyj6f7271QAj4sXAxyn6iT4HOA+4LCKe2bDY6yjurPF84Kco+w1GxMkUo+K9EngBRX81ADJzK3AB8D8y84DM/MWltidJA2Tg2u/M/DrwGMW9aaEYDGgmIn6ifP1zwDXzrPqfgdeU662n6HM6u83/WsZ+dtlWn92w3muAoyna6dcBP79UjKoHk2K1rLx34M9QDDbxUeCbEXFZRIyVi/w28GeZeUd5g/8/pTiK+ryGzbw3M/dk5jcoBtx4fbntuzPzysz8XmZ+E3gfRYPYqo3AeZl5fXk0ZBvF6cFjG5b5YGY+kJl7KO5LelQ5/XXAX2bm7Zn5XZpPyhfaniQNhAFuv68Bfi4inlu+vqR8/Xzg2RTDcc/1OuD9mbmjbHf/rMmyzs3MR8r4r8a2WiWTYrWlbDDfmJnjwAspjgy8v5z9POAD5ampRyiu3g5+cHSdxpF37ivXJyLGIuKiiLg/Ih4DPgW0cx/C5wFvn42hjGPNbDmlBxuefxc4oHx+2Jz4Gp8vZqHtSdLAGND2+xqKs3IvB64FpigS6p8D/jkz5xuldG5b3ewQ7bbVmpdJsTqWmXcCn6BoXKFopDZl5sENj2dl5nUNqzXeZHwt8ED5/E8pjmD8n5n5bODXKRrkVu0A/mRODD+Smc3ci3YXxY3P54uVMj5JGnoD1H5fQ9EHeKJ8/gWK23Qt1HUCirZ6biw/UL0my5YAk2K1ISJ+PCLeHhHj5es1FKfP/qVc5CPA70c5LnlEHBQRvzpnM78XESvKdd8KbC+nHwjMAI9GxGrg99oM86PAb0fET0dh/4j4hSgHyFjCxcBvRsRPRMSPAP/PnPkPAT/aZlySVJlBbb8z8y6Ke8/+OnBN2c3jIeBXWDgpvpjintLjZd/oucPL21arJSbFasfjwE8D10fEdyga09uAtwNk5qXAe4GLylNot1EMo9vos8A0cDPw9xSDAgC8h+LijUfL6W0NkZ2ZN1JchPEh4GGKG5q/scl1Pw98kKKv2d38xz+L2VsWnQ8cWZ5e7PrY65LUQ4Pcfl8DfDszdzS8DuCmBZb/KMUgLF8pl5lb3geA08q7aHywxVhUQw7eob6LIbt5enkF9G3AM8sLTySploat/ZZa4ZFiaR4R8UsR8czylNx7gb8zIZYkaXSZFEvz2wTsBr5GMQzq71QbjiRJ6iW7T0iSJKn2PFIsSZKk2jMpliRJUu0t72dhhxxySK5bt66fRfKd73yH/fffv69lNmtQYzOu1g1qbIMaFwxubAvFNT09/a3MPLSCkCrTbps9qPu22+pQzzrUEaznKGmsY8vtdmb27fHSl740++3qq6/ue5nNGtTYjKt1gxrboMaVObixLRQXcGP2sb0chEe7bfag7ttuq0M961DHTOs5Shrr2Gq7bfcJSZIk1Z5JsSRJkmrPpFiSJEm1Z1IsSZKk2jMpliRJUu2ZFOuH3HLrrUREJY/lz9hvwXnT09M9LXv1mrVVv/WS1Der16xdsl1sp921LdWw6ut9ijUcvv/EE5x+3nWVlL1903ELlr1i5Z6exrV903E927YkDZoHdu5Ysk1tp921LdWw8kixJEmSas+kWJIkSbVnUixJkqTaMymWJElS7ZkUS5IkqfZMiiVJklR7JsWSJEmqPZNiSZIk1Z5JsSRJkmpvyaQ4ItZExNUR8dWIuD0i3lpOf3dE3B8RN5ePV/c+XEmSJKn7mhnmeS/w9sy8KSIOBKYj4spy3p9n5mTvwpMkSZJ6b8mkODN3AbvK549HxB3A6l4HJkmSJPVLS32KI2Id8GLg+nLS2RFxS0R8PCJWdDk2SZIkqS+a6T4BQEQcAHwaeFtmPhYRHwb+O5Dl3z5lkrcAABXfSURBVC3Ab82z3kZgI8DY2BhTU1NdCLt5MzMzfS+zWYMa2/j4OCuW7amk7KMnJ1mxcv6yD1q2l5MWmNetstvdH4O6Lwc1Lhjc2AY1LklSbzWVFEfEvhQJ8QWZ+RmAzHyoYf5HgcvnWzcztwJbAdavX58TExMdhtyaqakp+l1mswY1ti1btnDDgcdVUvb2zZs5/bzr5p130so9XLFnZU/Lzsy21h3UfTmoccHgxjaocUmSequZu08EcD5wR2a+r2H6qobFfgm4rfvhSZIkSb3XzJHi44E3ALdGxM3ltHcBr4+Ioyi6T9wLbOpJhJIkSVKPNXP3iS8AMc+sz3U/HEmSJKn/HNFOkiRJtWdSLEmSpNozKZYkSVLtmRRLkiSp9kyKJUmSVHsmxUtYvWYtEdGTx/T09ILzVq9ZW3XVJUmSaqPpYZ7r6oGdOxYcYa1TK1buWXDb2zdVM6KcpOEWEfsB1wLPpGjjL8nMP4yI5wMXAc8BpoE3ZOYT1UUqSYPFI8WSNFq+B7wiM18EHAWcHBHHAu8F/jwzXwA8DJxVYYySNHBMiiVphGRhpny5b/lI4BXAJeX0bcBrKwhPkgaW3SckacRExDKKLhIvAP4C+BrwSGbuLRfZCaxeYN2NwEaAsbExpqamWi5/ZmamrfWGzbDXc3JykhUr9yy6zEHL9nLSEsvMdfTk5NC9L8O+L5tVh3p2UkeTYkkaMZn5JHBURBwMXAr8eAvrbgW2Aqxfvz4nJiZaLn9qaop21hs2w17PDRs2LHnNzEkr93DFnpUtbXf75s1kZieh9d2w78tm1aGendTR7hOSNKIy8xHgauBlwMERMXsgZBy4v7LAJGkAmRRL0giJiEPLI8RExLOAE4E7KJLj08rFzgQ+W02EkjSY7D4hSaNlFbCt7Fe8D3BxZl4eEV8FLoqIPwa+DJxfZZCSNGhMiiVphGTmLcCL55l+D3BM/yOSpOFg9wlJkiTVnkmxJEmSam/JpDgi1kTE1RHx1Yi4PSLeWk5fGRFXRsRd5d8VvQ9XkiRJ6r5mjhTvBd6emUcCxwJvjogjgXOAqzLzcOCq8rUkSZI0dJZMijNzV2beVD5/nOLWPquBUymGCgWHDJUkSdIQa+nuExGxjuKq5uuBsczcVc56EBhbYJ2OhwztRKdDGjYzDGa7Fhs+8+jJLWzZsqUn5S5lfHycFct6U+elHL3I+93OcKOtlt3uZ2VQh84c1LhgcGMb1LgkSb3VdFIcEQcAnwbelpmPRcTT8zIzI2LeMR27MWRoJzod0rCZYTDbtdjwmds3v71n5S5p53XccOBxlRS9ffPmBevdznCjrZbd7tCkgzp05qDGBYMb26DGJUnqrabuPhER+1IkxBdk5mfKyQ9FxKpy/ipgd29ClCRJknqrmbtPBMXIR3dk5vsaZl1GMVQoOGSoJEmShlgz3SeOB94A3BoRN5fT3gWcC1wcEWcB9wGv602IkiRJUm8tmRRn5heAWGD2Cd0NR5IkSeo/R7STJElS7ZkUS5IkqfZMiiVJklR7JsWSJEmqPZNiSZIk1Z5JsSRJkmrPpFiSpAG3es1aIqKrD0k/qJnBOyRJUoUe2LmD08+7rqvb3L7puK5uTxp2HimWJElS7ZkUSxXrxWnR2cf09PSi81evWVt19SVJGgh2n5Aq1ovTorNWrNyz6LY9fSpJUsEjxZIkSao9k2JJkiTVnkmxJEmSas+kWJIkSbVnUixJkqTaWzIpjoiPR8TuiLitYdq7I+L+iLi5fLy6t2FKkiRJvdPMkeJPACfPM/3PM/Oo8vG57oYlSWpHRKyJiKsj4qsRcXtEvLWcvjIiroyIu8q/K6qOVZIGyZJJcWZeC+zpQyySpM7tBd6emUcCxwJvjogjgXOAqzLzcOCq8rUkqdTJ4B1nR8RvADdSNMAPz7dQRGwENgKMjY0xNTXVQZGtm5mZ6ajMyclJVqzszW+Cg5bt5aQFtn10D8tdyv4HjbNiWTVlL1bvxd6vbpXd7melk89ZVZ8x6KzOner0u9krgxpXszJzF7CrfP54RNwBrAZOBSbKxbYBU8A7KwhRkgZSZObSC0WsAy7PzBeWr8eAbwEJ/HdgVWb+1lLbWb9+fd54442dxNuyqakpJiYm2l4/Ino22thJK/dwxZ6V887bvum4npW7lKMfv44bDqxmpLPF6r3Y+9Wtspv5Psynk89ZVZ8x6KzOner0u9krC8UVEdOZub7/EbWvbLuvBV4IfCMzDy6nB/Dw7Os56zQeyHjpRRdd1HK5MzMzHHDAAe0HPiT6Wc/p6WlWPO/Hu7rNh++7c8ltHrRsL48+2drxs4fvu5OXvvSlnYTWd35mR0djHTds2NBSu93WkeLMfGj2eUR8FLi8ne1IknojIg4APg28LTMfK/LgQmZmRMz7aygztwJboTiQ0c4Pl0H9wdNt/aznhg0buv7jefvmzUtus52DEds3b67sx3a7/MyOjk7q2NYt2SJiVcPLXwJuW2hZSVJ/RcS+FAnxBZn5mXLyQ7Ntd/l3d1XxSdIgWvJIcURcSNEP7ZCI2An8ITAREUdRdJ+4F9jUwxglSU0qu0acD9yRme9rmHUZcCZwbvn3sxWEJ0kDa8mkODNfP8/k83sQi1StfZbTeIq5FZOTk2zYsKHLAUltOR54A3BrRNxcTnsXRTJ8cUScBdwHvK6i+CRpIHVy9wlptDy1t+0+eytW7ml73e2bqrmoUaMpM78ALPTr7oR+xiJJw8RhniVJklR7JsWSJEmqPZNiSZI00FavWUtEdPWxes3aqqulAWOfYkmSNNAe2Lmj+/dp9noOzeGRYkmSJNWeSbEkSZJqz6RYkiRJtWdSLEmSpNozKZYkSVLtmRRLkiSp9kyKJUmSVHsmxZIkSao9k2JJkiTVnkmxJEmSas+kWJIkSbW3ZFIcER+PiN0RcVvDtJURcWVE3FX+XdHbMCVJkqTeaeZI8SeAk+dMOwe4KjMPB64qX0uSJElDacmkODOvBfbMmXwqsK18vg14bZfjkiRpKK1es5aI6OpDUu8tb3O9sczcVT5/EBhbaMGI2AhsBBgbG2NqaqrNItszMzPTUZmTk5OsWDn3N0F3HLRsLyctsO2je1juUvY/aJwVy6ope7F6L/Z+9brspXQSWy/39VJxHT052ffv5KxOv5u9MqhxaXg8sHMHp593XVe3uX3TcV3dnqQf1m5S/LTMzIjIReZvBbYCrF+/PicmJjotsiVTU1N0UuaGDRu63rjNOmnlHq7Ys3Leeds3b+5ZuUs5+vE7ueHAahrgxeq92PvV67KX0klsvdzXS8W1ffNmMhf8+vZUp9/NXhnUuCRJvdVuUvxQRKzKzF0RsQrY3c2gJEnSkNpnuV0+NJTaTYovA84Ezi3/frZrEUmSpOH11F67j2goNXNLtguBLwJHRMTOiDiLIhk+MSLuAl5ZvpYkSZKGUjN3n3h9Zq7KzH0zczwzz8/Mb2fmCZl5eGa+MjN7elVWJ1fyTk9Pe8WvJEmSFtXxhXb90MmVvCtW7unoNI6nbCRJkkafwzxLkiSp9kyKJUmSVHsmxZIkSao9k2JJGjER8fGI2B0RtzVMWxkRV0bEXeXfFVXGOCh6cSG3pOE0FBfaSZJa8gngQ8AnG6adA1yVmedGxDnl63dWENtA6cWF3F6gLQ0njxRL0ojJzGuBubfKPBXYVj7fBry2r0FJ0oAzKZakehjLzF3l8weBsSqDkaRBY/cJSaqZzMyIyPnmRcRGYCPA2NgYU1NTLW9/ZmamrfWqMDk5yYqV7Y0/ddCyvZw0z7pHd7DNhVS1zYXq2Ol2W9Wrbc5+TofpM9uJOtSzkzqaFEtSPTwUEasyc1dErAJ2z7dQZm4FtgKsX78+JyYmWi5oamqKdtarwoYNG9ruU3zSyj1csWflD03fvnlzR4NGzaeqbS5Ux06326pebTOz+G04TJ/ZTtShnp3U0e4TklQPlwFnls/PBD5bYSySNHBMiiVpxETEhcAXgSMiYmdEnAWcC5wYEXcBryxfS/W1z/Ilb6/XzmP5M/br2rZmH6vXrK363aoFu09I0ojJzNcvMOuEvgYiDbKn9j7dJWOh2+u1Y/um47rf1cPb/PWFR4olSZJUeybFkiRJqj2TYkmVuOXWW7ve786+eZKkdnXUpzgi7gUeB54E9mbm+m4EJWn0ff+JJ7re764Z9s2TJM2nGxfabcjMb3VhO5IkSVIl7D4hSZKk2us0KU7gioiYLocGlSSp61avWduTPuaSNKvT7hM/k5n3R8T/AVwZEXdm5rWNC5TJ8kaAsbGxtsaj7sXY9M3qxXjrsxaLrZflLmX/g8ZZsayasherd6f7spOyl9JJbFV9xoqyt7Bly5aelL2U8fFqPmdHT04u2g7NzMy01U6ptx7YuaMnfdDtYy5pVkdJcWbeX/7dHRGXAscA185ZZiuwFWD9+vXZznjUvRibvlm9GG991mKx9bLcpRz9+J3ccGA1/ygWq3en+7KTspfSSWxVfcaKst9e2eeMnddV8jnbvnkzmbng/KmpKdpppyRJw63t7hMRsX9EHDj7HDgJuK1bgUmSJEn90smR4jHg0rJP1nLgrzPzH7oSlSRJktRHbSfFmXkP8KIuxiJJkiRVwluySZIkqfZMiiVJklR7JsWSJEmqPZNiSZIk1Z5JsSRJkmrPpFiSJEm1Z1IsSZKk2jMpliRJUu2ZFEuSJKn2TIolSZIG2T7LiYiOH9PT008/X71mbdW1GjhtD/MsSZKkPnhqL6efd13Hm1mxcs/T29m+6biOtzdqPFIsSZKk2jMpliRJqpsudclofAx7lwy7T0iSJNVNl7pkNBr2LhkeKZYkSVLtmRRLkiSp9jpKiiPi5Ij414i4OyLO6VZQkqTe6Ee7fcutt3a9r6KkIdCDfsr97Kvcdp/iiFgG/AVwIrATuCEiLsvMr3YrOElS9/Sr3f7+E0/YV1Gqox70U4b+ff87OVJ8DHB3Zt6TmU8AFwGndicsSVIP2G5L0gI6SYpXAzsaXu8sp0mSBpPttiQtIDKzvRUjTgNOzsw3la/fAPx0Zp49Z7mNwMby5RHAv7YfblsOAb7V5zKbNaixGVfrBjW2QY0LBje2heJ6XmYe2u9guqmZdrtLbfag7ttuq0M961BHsJ6jpLGOLbXbndyn+H5gTcPr8XLaD8jMrcDWDsrpSETcmJnrqyp/MYMam3G1blBjG9S4YHBjG9S4umTJdrsbbfaIv4dPq0M961BHsJ6jpJM6dtJ94gbg8Ih4fkQ8AzgDuKyD7UmSest2W5IW0PaR4szcGxFnA/8ILAM+npm3dy0ySVJX2W5L0sI6GuY5Mz8HfK5LsfRKZV03mjCosRlX6wY1tkGNCwY3tkGNqyv61G6P9HvYoA71rEMdwXqOkrbr2PaFdpIkSdKocJhnSZIk1d5IJcURsSwivhwRl88z740R8c2IuLl8vKlPMd0bEbeWZd44z/yIiA+WQ67eEhEv6UdcTcY2ERGPNrxnf9CnuA6OiEsi4s6IuCMiXjZnfpXv2VKx9f09i4gjGsq7OSIei4i3zVmmkvesydiq+pz93xFxe0TcFhEXRsR+c+Y/MyK2l+/Z9RGxrh9xDYuIWBMRV0fEV8v38a3zLFPZd7UbmqxjJZ/fboqI/SLiSxHxlbKe75lnmaH/PjRZz0pyhW6LxfOhod+Xs5aoZ8v7sqM+xQPorcAdwLMXmL997n2U+2RDZi50X8BXAYeXj58GPlz+7ZfFYgP458x8Td+iKXwA+IfMPC2KK+R/ZM78Kt+zpWKDPr9nmfmvwFHw9DC+9wOXzlmskvesydigz+9ZRKwG3gIcmZn/HhEXU9yJ4RMNi50FPJyZL4iIM4D3Aqf3K8YhsBd4e2beFBEHAtMRceWcIaOrbt861UwdoZp2spu+B7wiM2ciYl/gCxHx+cz8l4ZlRuH70Ew9obpcoZsWy4dGYV/O6mreNzJHiiNiHPgF4GNVx9KiU4FPZuFfgIMjYlXVQVUlIg4CXg6cD5CZT2TmI3MWq+Q9azK2qp0AfC0z75szfRA+ZwvFVpXlwLMiYjnFj5sH5sw/FdhWPr8EOCEioo/xDbTM3JWZN5XPH6f4xzR3dLxB+Ny1rck6Dr1y/8yUL/ctH3MvOBr670OT9Rx6TeRDQ78voTd538gkxcD7gXcATy2yzK+Up/AuiYg1iyzXTQlcERHTUYwUNVeVw64uFRvAy8pTTZ+PiJ/sQ0zPB74J/GV5SuRjEbH/nGWqes+aiQ36/541OgO4cJ7pgzC870KxQZ/fs8y8H5gEvgHsAh7NzCvmLPb0e5aZe4FHgef0OrZhVJ5+fTFw/ZxZg/C564pF6gjVfue7ojwNfTOwG7gyMxfcl8P8fWiinlBNrtBNS+VDI7Ev6UHeNxJJcUS8BtidmdOLLPZ3wLrM/CngSv7jV1Kv/UxmvoTiNOKbI+LlfSq3GUvFdhPFEIkvAv4X8Ld9iGk58BLgw5n5YuA7wDl9KLcZzcRWxXsGQNmd4xTgb/pVZrOWiK3v71lErKA4WvJ84DBg/4j49V6XO4oi4gDg08DbMvOxquPphSXqWNl3vpsy88nMPIpilMNjIuKFVcfUC03Us6pcoSuazIeGXq/yvpFIioHjgVMi4l7gIuAVEfGpxgUy89uZ+b3y5ceAl/YjsPKIFJm5m6Iv5TFzFmlquOwqYsvMx2ZPNZX3Nt03Ig7pcVg7gZ0Nv94voUhEG1X1ni0ZW0Xv2axXATdl5kPzzKvsc1ZaMLaK3rNXAl/PzG9m5veBzwDHzVnm6fes7GJxEPDtHsc1VMp+mZ8GLsjMz8yzSNWfu44tVceKv/NdV3YJuxo4ec6skfo+LFTPqnKFLloyH2I09mVP8r6RSIoz8/czczwz11Gcov2nzPyBoz5z+rGdQtE3rKciYv/y4gzK0+wnAbfNWewy4DeicCzFadxdgxBbRDx3tp9RRBxD8Xnp6RcnMx8EdkTEEeWkE4C5F7VU8p41E1sV71mD17Nw94RK3rMGC8ZW0Xv2DeDYiPiRsuwT+OE24TLgzPL5aRTtysj1P2xX+b6dD9yRme9bYLGqP3cdaaaOFX/nuyIiDo2Ig8vnzwJOBO6cs9jQfx+aqWcVuUI3NZMPMQL7sld536jdfeIHRMQfATdm5mXAWyLiFIqrifcAb+xDCGPApWV7uRz468z8h4j4bYDM/AjFyFKvBu4Gvgv8Zh/iaja204DfiYi9wL8DZ/Tpi/NfgAvKU+73AL85IO9ZM7FV8p6VP2xOBDY1TBuI96yJ2Pr+nmXm9RFxCcWp773Al4Gtc9qM84G/ioi7KdqMM3oZ0xA6HngDcGvZRxPgXcBaqP5z1yXN1LGqdrKbVgHborhDzD7AxZl5+Qh+H5qpZxW5Qs+N4L6cV6f70hHtJEmSVHsj0X1CkiRJ6oRJsSRJkmrPpFiSJEm1Z1IsSZKk2jMpliRJUu2ZFEuSJKn2TIolSZJUeybFkiRJqr3/H/4nDynD9Qs1AAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "xb-AFaG3PU0D" + }, + "source": [ + "## Importing alll the necessary packages to use the various classification algorithms\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "cJVjbgAjF4M_" + }, + "source": [ + "from sklearn.linear_model import LogisticRegression # for Logistic Regression Algorithm\n", + "from sklearn import svm # for suport vector machine algorithm\n", + "from sklearn import metrics # for checking the model accuracy\n", + "from sklearn.tree import DecisionTreeClassifier # for using DTA" + ], + "execution_count": 21, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "8LizCSuWF4NA" + }, + "source": [ + "df.shape" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "WW5Hp1fFF4NC" + }, + "source": [ + "Now, when we train any algorithm, the number of features and their correlation plays an important role. If there are features and many of the features are highly correlated, then training an algorithm with all the featues will reduce the accuracy. Thus features selection should be done carefully. This dataset has less featues but still we will see the correlation.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "YABeXMklF4ND", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 270 + }, + "outputId": "a9537d5f-a224-42a1-b4ae-f10891512872" + }, + "source": [ + "plt.figure(figsize=(8,4))\n", + "sns.heatmap(df.corr(), annot=True, cmap='cubehelix_r') # draws heatmap with input as correlation matrix calculated by df.corr() \n", + "plt.show()" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "gsd6QaoaF4NE" + }, + "source": [ + "Observation--->\n", + "The Sepal Width and Length are not correlated The Petal Width and Length are highly correlated\n", + "We will use all the features for training the algorithm and check the accuracy.\n", + "\n", + "Then we will use 1 Petal Feature and 1 Sepal Feature to check the accuracy of the algorithm as we are using only 2 features that are not correlated. Thus we can have a variance in the dataset which may help in better accuracy. We will check it later.\n", + "\n", + "Steps To Be followed When Applying an Algorithm\n", + "\n", + "Split the dataset into training and testing dataset. The testing dataset is generally smaller than training one as it will help in training the model better.\n", + "\n", + "Select any algorithm based on the problem (classification or regression) whatever you feel may be good.\n", + "Then pass the training dataset to the algorithm to train it. We use the .fit() method\n", + "Then pass the testing data to the trained algorithm to predict the outcome. We use the .predict() method.\n", + "We then check the accuracy by passing the predicted outcome and the actual output to the model." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "QAD_cNirF4NF" + }, + "source": [ + "# Splitting The Data into Training And Testing Dataset" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "ZqSRd9GzF4NF", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "257d6b8c-654b-49d6-f840-d476bc46ae86" + }, + "source": [ + "from sklearn.model_selection import train_test_split\n", + "train, test = train_test_split(df, test_size=0.3) # our main data split into train and test\n", + "# the attribute test_size=0.3 splits the data into 70% and 30% ratio. train=70% and test=30%\n", + "print(train.shape)\n", + "print(test.shape)" + ], + "execution_count": 15, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "(105, 5)\n", + "(45, 5)\n" + ] + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "yO2J2FpjF4NG" + }, + "source": [ + "train_X = train[['sepal_length','sepal_width','petal_length','petal_width']] # taking the training data features\n", + "train_y = train.species # output of the training data\n", + "\n", + "test_X = test[['sepal_length','sepal_width','petal_length','petal_width']] # taking test data feature\n", + "test_y = test.species # output value of the test data" + ], + "execution_count": 17, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "yR9D2qgQF4NG", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 205 + }, + "outputId": "0c62edeb-9629-49d0-c18e-bc232bb75be1" + }, + "source": [ + "train_X.head()" + ], + "execution_count": 18, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_lengthsepal_widthpetal_lengthpetal_width
114.83.41.60.2
275.23.51.50.2
1436.83.25.92.3
124.83.01.40.1
34.63.11.50.2
\n", + "
" + ], + "text/plain": [ + " sepal_length sepal_width petal_length petal_width\n", + "11 4.8 3.4 1.6 0.2\n", + "27 5.2 3.5 1.5 0.2\n", + "143 6.8 3.2 5.9 2.3\n", + "12 4.8 3.0 1.4 0.1\n", + "3 4.6 3.1 1.5 0.2" + ] + }, + "metadata": {}, + "execution_count": 18 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "KcGbNGkcF4NH", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 205 + }, + "outputId": "dcc6f9ba-8240-4f3f-8d74-ff21e5ac54e3" + }, + "source": [ + "test_X.head()" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_lengthsepal_widthpetal_lengthpetal_width
34.63.11.50.2
454.83.01.40.3
1406.73.15.62.4
465.13.81.60.2
535.52.34.01.3
\n", + "
" + ], + "text/plain": [ + " sepal_length sepal_width petal_length petal_width\n", + "3 4.6 3.1 1.5 0.2\n", + "45 4.8 3.0 1.4 0.3\n", + "140 6.7 3.1 5.6 2.4\n", + "46 5.1 3.8 1.6 0.2\n", + "53 5.5 2.3 4.0 1.3" + ] + }, + "metadata": {}, + "execution_count": 37 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "5sFmts-IF4NI", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "3a0b4cfb-4665-4f28-9de2-0f6540a403e7" + }, + "source": [ + "train_y.head()" + ], + "execution_count": 19, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "11 setosa\n", + "27 setosa\n", + "143 virginica\n", + "12 setosa\n", + "3 setosa\n", + "Name: species, dtype: object" + ] + }, + "metadata": {}, + "execution_count": 19 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "S_w4Me2bF4NL" + }, + "source": [ + "## Logistic Regression " + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "gOQ5JrqrF4NL", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "f7c03398-1ca7-4a77-87de-3b3e3553b729" + }, + "source": [ + "model = LogisticRegression()\n", + "model.fit(train_X, train_y)\n", + "prediction = model.predict(test_X)\n", + "print('The accuracy of Logistic Regression is: ', metrics.accuracy_score(prediction, test_y))" + ], + "execution_count": 22, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "The accuracy of Logistic Regression is: 0.9777777777777777\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "e1NNX-EGF4NJ" + }, + "source": [ + "## Support Vector Machine SVM" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "zSJmVzqnF4NK", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "c6618770-1391-4606-a015-64c020704bd8" + }, + "source": [ + "clf = svm.SVC(kernel='linear')\n", + "clf.fit(train_X, train_y)\n", + "\n", + "#Predict the response for test dataset\n", + "prediction = clf.predict(test_X)\n", + "\n", + "print('The accuracy of Support Vector Machine is: ', metrics.accuracy_score(prediction, test_y))" + ], + "execution_count": 23, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "The accuracy of Support Vector Machine is: 0.9777777777777777\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "GWfemKzPF4NN" + }, + "source": [ + "## Decision Tree" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "iRXy3EZIF4NN", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "c95f9bbc-8021-4b16-b52a-ebb1d641e8a7" + }, + "source": [ + "#implementing using Decision Tree\n", + "#code\n", + "\n", + "print('The accuracy of Decision Tree is: ', metrics.accuracy_score(prediction, test_y))" + ], + "execution_count": 24, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "The accuracy of Decision Tree is: 0.9777777777777777\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "uB2Co6f_F4NQ" + }, + "source": [ + "### We used all the features of iris in above models. Now we will use Petals and Sepals Seperately" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "1_v6cAZMF4NQ" + }, + "source": [ + "### Creating Petals And Sepals Training Data" + ] + }, + { + "cell_type": "code", + "metadata": { + "collapsed": true, + "id": "e1Q-1b9YF4NQ" + }, + "source": [ + "petal = df[['petal_length','petal_width','species']]\n", + "sepal = df[['sepal_length','sepal_width','species']]" + ], + "execution_count": 28, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Lv-nab5oF4NQ" + }, + "source": [ + "### For Iris Petal" + ] + }, + { + "cell_type": "code", + "metadata": { + "collapsed": true, + "id": "DuOqLUWZF4NQ" + }, + "source": [ + "train_p,test_p = train_test_split(petal, test_size=0.3, random_state=0) #petals\n", + "train_x_p = train_p[['petal_width','petal_length']] # taking the training data's Petal features\n", + "train_y_p = train_p.species # output of the training data\n", + "\n", + "test_x_p = test_p[['petal_width','petal_length']] # taking the test data's Petal features\n", + "test_y_p = test_p.species # output of the test data" + ], + "execution_count": 29, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "bgNB8kaNF4NU" + }, + "source": [ + "### For Iris Sepal" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "6hVj5MW3F4NU" + }, + "source": [ + "#Similarly define the split for sepals\n", + "#define the training and test data's Sepal features followed by the output of the training and test data\n", + "\n", + "#use naming- train_s,test_s ; train_x_s, train_y_s; test_x_s, test_y_s\n", + "train_s,test_s = train_test_split(sepal, test_size=0.3, random_state=0) #setals\n", + "train_x_s = train_s[['sepal_width','sepal_length']] # taking the training data's sepal features\n", + "train_y_s = train_s.species # output of the training data\n", + "\n", + "test_x_s = test_s[['sepal_width','sepal_length']] # taking the test data's sepal features\n", + "test_y_s = test_s.species # output of the test data\n", + "#code" + ], + "execution_count": 30, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "y08e1O6aU9mx" + }, + "source": [ + "Implementing the algorithms just like we did on the complete dataset but separately on sepals and petals and calculating accuracy" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "TeMWnQr6F4NV" + }, + "source": [ + "## SVM Algorithm" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "jhlutJ78F4NV", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "58709533-b2cf-45b3-98cf-34556938a806" + }, + "source": [ + "clf = svm.SVC(kernel='linear')\n", + "clf.fit(train_x_p, train_y_p)\n", + "\n", + "#Predict the response for test dataset\n", + "prediction = clf.predict(test_x_p)\n", + "print('The accuracy of the SVM using Petals is:',metrics.accuracy_score(prediction,test_y_p))\n", + "\n", + "clf = svm.SVC(kernel='linear')\n", + "clf.fit(train_x_s, train_y_s)\n", + "\n", + "#Predict the response for test dataset\n", + "prediction = clf.predict(test_x_s)\n", + "#code\n", + "print('The accuracy of the SVM using Sepals is:',metrics.accuracy_score(prediction,test_y_s))" + ], + "execution_count": 34, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "The accuracy of the SVM using Petals is: 0.9777777777777777\n", + "The accuracy of the SVM using Sepals is: 0.8\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Mli7zcq_F4NV" + }, + "source": [ + "## Logistic Regression" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "2DqK_dFCF4NV", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "fba2975d-3919-44a5-caa9-bcecdac3646c" + }, + "source": [ + "model = LogisticRegression()\n", + "model.fit(train_x_p, train_y_p)\n", + "prediction = model.predict(test_x_p)\n", + "\n", + "print('The accuracy of the Logistic Regression using Petals is:',metrics.accuracy_score(prediction,test_y_p))\n", + "\n", + "model = LogisticRegression()\n", + "model.fit(train_x_s, train_y_s)\n", + "prediction = model.predict(test_x_s)\n", + "\n", + "print('The accuracy of the Logistic Regression using Sepals is:',metrics.accuracy_score(prediction,test_y_s))" + ], + "execution_count": 38, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "The accuracy of the Logistic Regression using Petals is: 0.9777777777777777\n", + "The accuracy of the Logistic Regression using Sepals is: 0.8222222222222222\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "aM-7Zx95F4NW" + }, + "source": [ + "## Decision Tree" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "S8tXp-gMF4NW", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "7c084045-f710-4c98-dfbe-de4048897fe7" + }, + "source": [ + "clf = DecisionTreeClassifier()\n", + "clf.fit(train_x_p, train_y_p)\n", + "\n", + "#Predict the response for test dataset\n", + "prediction = clf.predict(test_x_p)\n", + "print('The accuracy of Decision Tree Classifier using Petals is:',metrics.accuracy_score(prediction,test_y_p))\n", + "\n", + "clf = DecisionTreeClassifier()\n", + "clf.fit(train_x_s, train_y_s)\n", + "\n", + "#Predict the response for test dataset\n", + "prediction = clf.predict(test_x_s)\n", + "#code\n", + "print('The accuracy of Decision Tree Classifier using Sepals is:',metrics.accuracy_score(prediction,test_y_s))" + ], + "execution_count": 40, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "The accuracy of Decision Tree Classifier using Petals is: 0.9555555555555556\n", + "The accuracy of Decision Tree Classifier using Sepals is: 0.6444444444444445\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6ec0NUyJF4NW" + }, + "source": [ + "\n", + "\n", + "\n", + "### Question:\n", + "Does Using Petals over Sepals for training the data give a much better accuracy? Why?\n" + ] + } + ] +} \ No newline at end of file From 458d3861eb4b7220b22d59fe6659339ff209b376 Mon Sep 17 00:00:00 2001 From: Mani9550 <93199718+Mani9550@users.noreply.github.com> Date: Fri, 29 Oct 2021 00:22:24 +0530 Subject: [PATCH 08/11] Rename Classification_Task3_203174002 (1).ipynb to Classification_Task3_203174002.ipynb --- ..._203174002 (1).ipynb => Classification_Task3_203174002.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename Classification_Task3_203174002 (1).ipynb => Classification_Task3_203174002.ipynb (99%) diff --git a/Classification_Task3_203174002 (1).ipynb b/Classification_Task3_203174002.ipynb similarity index 99% rename from Classification_Task3_203174002 (1).ipynb rename to Classification_Task3_203174002.ipynb index 926d289..f709133 100644 --- a/Classification_Task3_203174002 (1).ipynb +++ b/Classification_Task3_203174002.ipynb @@ -1326,4 +1326,4 @@ ] } ] -} \ No newline at end of file +} From ac77d68e03b22efe66b06ba5e6518839f690c32e Mon Sep 17 00:00:00 2001 From: Mani9550 <93199718+Mani9550@users.noreply.github.com> Date: Fri, 29 Oct 2021 00:23:52 +0530 Subject: [PATCH 09/11] Updated the code as per your comments --- Linear_Regression_Task2_203174002 (1).ipynb | 1079 +++++++++++++++++++ 1 file changed, 1079 insertions(+) create mode 100644 Linear_Regression_Task2_203174002 (1).ipynb diff --git a/Linear_Regression_Task2_203174002 (1).ipynb b/Linear_Regression_Task2_203174002 (1).ipynb new file mode 100644 index 0000000..ed904a5 --- /dev/null +++ b/Linear_Regression_Task2_203174002 (1).ipynb @@ -0,0 +1,1079 @@ +{ + "nbformat": 4, + "nbformat_minor": 5, + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.8" + }, + "colab": { + "name": "Linear_Regression_Task2_203174002.ipynb", + "provenance": [], + "collapsed_sections": [] + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "89223f98" + }, + "source": [ + "\n", + "\n", + "```\n", + "Import libraries\n", + "```\n", + "\n", + "### Importing useful libraries \n" + ], + "id": "89223f98" + }, + { + "cell_type": "code", + "metadata": { + "id": "26f77ebe" + }, + "source": [ + "# This Python 3 environment comes with many helpful analytics libraries installed\n", + "# For example, here's several helpful packages to load in\n", + "import numpy as np # linear algebra\n", + "import matplotlib.pyplot as plt # data visualization\n", + "import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n", + "import seaborn as sns" + ], + "id": "26f77ebe", + "execution_count": 30, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "31c8220d" + }, + "source": [ + "### Loading the dataset \n", + "#### For implementation we will be using house prediction dataset . The dataset can be found [here](https://github.com/vrinda01go/Hellofoss/blob/main/Room_price_data.csv)" + ], + "id": "31c8220d" + }, + { + "cell_type": "code", + "metadata": { + "id": "1c5d873a" + }, + "source": [ + "df = pd.read_csv(\"Hostel_Linear-Dataset.csv\") #import text file \n" + ], + "id": "1c5d873a", + "execution_count": 10, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "1ca9aba0", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 205 + }, + "outputId": "3b129ba4-1b3a-4288-9b62-fd3344787414" + }, + "source": [ + "df.head()" + ], + "id": "1ca9aba0", + "execution_count": 11, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PriceHostel No.OccupancyRoom SizeFloor
02540.0316868
12900.0329665
2NaN317888
32362.0329242
4NaN3210985
\n", + "
" + ], + "text/plain": [ + " Price Hostel No. Occupancy Room Size Floor\n", + "0 2540.0 3 1 686 8\n", + "1 2900.0 3 2 966 5\n", + "2 NaN 3 1 788 8\n", + "3 2362.0 3 2 924 2\n", + "4 NaN 3 2 1098 5" + ] + }, + "metadata": {}, + "execution_count": 11 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "af08f245" + }, + "source": [ + "# Visualizing and Cleaning the data\n", + "\n", + "We will now be removing the nan values and identical values from the dataset\n", + "\n", + "For seeing if there are nan values in the dataset we will use the isna() function and then to remove them we will use the dropna() function. We will need to set additional parameters like rows and columns in the dropna function depending on the number of nan values present for each column\n", + "\n", + "Using the sum() function with isna() function we can get to know the number of missing values in each column" + ], + "id": "af08f245" + }, + { + "cell_type": "code", + "metadata": { + "id": "2fd4babb", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "dd94b5ef-188f-4c3a-aec4-fe91cdc6a86d" + }, + "source": [ + "df.isna().sum()" + ], + "id": "2fd4babb", + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "Price 1531\n", + "Hostel No. 0\n", + "Occupancy 0\n", + "Room Size 0\n", + "Floor 0\n", + "dtype: int64" + ] + }, + "metadata": {}, + "execution_count": 5 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "83ef03c3" + }, + "source": [ + "After this we will proceed to remove the nan values \n", + "\n", + "Since there are not many nan values in the column 'Price' as compared to the number of rows we will remove the rows which have nan values. \n", + "\n", + "Reseting the index after removing the nan values and dropping the old index will also be important" + ], + "id": "83ef03c3" + }, + { + "cell_type": "code", + "metadata": { + "id": "b65e4503" + }, + "source": [ + "df = df.dropna(subset = ['Price'],how= 'any')\n", + "df = df.reset_index(drop = True)\n", + "## df.isna().sum()" + ], + "id": "b65e4503", + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "40784889" + }, + "source": [ + "Now we can use the drop_duplicate function to remove the duplicate values\n", + "\n", + "This function has a parameter calle 'keep' where we specifiy to drop and which value to keep\n", + "\n", + "For this excercise we will keep the first values and drop the rest of the duplicates" + ], + "id": "40784889" + }, + { + "cell_type": "code", + "metadata": { + "id": "75fa3dc8" + }, + "source": [ + "df = df.drop_duplicates(keep = 'first')\n", + "df = df.reset_index(drop = True)\n", + "## df.duplicated().sum()" + ], + "id": "75fa3dc8", + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "a007a33f" + }, + "source": [ + "For visualizing the data we will first start with looking at the distribution of different columns to see if there are enough number for each category in every column and dropping them if the data is biased for one category more than the other" + ], + "id": "a007a33f" + }, + { + "cell_type": "code", + "metadata": { + "id": "b325df62", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "outputId": "cf878a76-66d3-4e68-f009-819d724a4eae" + }, + "source": [ + "columns = df.columns\n", + "for column in columns:\n", + " if(column== 'Price' or column=='Room Size'): \n", + " continue\n", + " fig = plt.figure(figsize=(5,5))\n", + " ax = fig.gca()\n", + " counts = df[column].value_counts()\n", + " counts.plot.bar(ax = ax, color='blue')\n", + " ax.set_title('No of rooms '+ column)\n", + " ax.set_xlabel(column)\n", + " ax.set_ylabel(\"No of rooms\")\n", + " plt.show()" + ], + "id": "b325df62", + "execution_count": null, + "outputs": [ + { + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + } + }, + { + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + } + }, + { + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "9811a731" + }, + "source": [ + "We can clearly notice that for the Occupancy column the (occupancy) = 4 has a really low set of data points as compared to others. Hence we can proceed in dropping those rows where the occupancy is 4" + ], + "id": "9811a731" + }, + { + "cell_type": "code", + "metadata": { + "id": "825783c0" + }, + "source": [ + "df = df[df['Occupancy'] != 4]\n", + "df = df.reset_index(drop= True)" + ], + "id": "825783c0", + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "30c64310", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 205 + }, + "outputId": "bbb5d4dc-f7e5-4b8e-a248-6ecbd09568da" + }, + "source": [ + "df.head()" + ], + "id": "30c64310", + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PriceHostel No.OccupancyRoom SizeFloor
02540.0316868
12900.0329665
22362.0329242
31432.0217063
41702.02210383
\n", + "
" + ], + "text/plain": [ + " Price Hostel No. Occupancy Room Size Floor\n", + "0 2540.0 3 1 686 8\n", + "1 2900.0 3 2 966 5\n", + "2 2362.0 3 2 924 2\n", + "3 1432.0 2 1 706 3\n", + "4 1702.0 2 2 1038 3" + ] + }, + "metadata": {}, + "execution_count": 11 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "f333875b" + }, + "source": [ + "We will now write the columns between categorical and numerical\n", + "\n", + "categorical = Hostel No, occupancy, floor\n", + "\n", + "Numerical = price, occupancy, roomsize, floor, hostel No.\n", + "\n", + "Remember that we can treat Hostel Number and occupancy as numerical or categorical. For this notebook we will treat them as categorical for data visualization and numerical for the regression" + ], + "id": "f333875b" + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0f34ca6a" + }, + "source": [ + "We will also plot the scatter plots and the correlation map to analyse the relation ships between different numerical columns" + ], + "id": "0f34ca6a" + }, + { + "cell_type": "code", + "metadata": { + "scrolled": false, + "id": "f4a3ab6e" + }, + "source": [ + "categorical = ['Hostel No.', 'Occupancy', 'Floor']\n", + "numerical = [ 'Price', 'Room Size']" + ], + "id": "f4a3ab6e", + "execution_count": 27, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "df2b588a", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 791 + }, + "outputId": "0327277e-f4a5-4f0b-eae9-134ad15a1e86" + }, + "source": [ + "for column1 in numerical:\n", + " for column2 in numerical:\n", + " if(column1 != column2):\n", + " fig = plt.figure(figsize=(6,6))\n", + " ax = fig.gca()\n", + " df.plot.scatter(x=column1,y=column2,ax = ax)\n", + " ax.set_title('Scatter plot of '+ column1 + ' vs ' + column2)\n", + " ax.set_xlabel(column1)\n", + " ax.set_ylabel(column2)\n", + " plt.show()" + ], + "id": "df2b588a", + "execution_count": 28, + "outputs": [ + { + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + } + }, + { + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "625b89a8" + }, + "source": [ + "We can notice that there are no linear relation present between the numerical columns. Hence no need to drop anything" + ], + "id": "625b89a8" + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8a7f95fa" + }, + "source": [ + "Now we will plot box plots of categorical and numerical columns to get more information about the number of outliers and the distrubtion." + ], + "id": "8a7f95fa" + }, + { + "cell_type": "code", + "metadata": { + "id": "e73f0769", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "outputId": "1004ad8d-4668-4006-b743-bb73bd0dc8ca" + }, + "source": [ + "for c in categorical:\n", + " for n in numerical:\n", + " sns.set_style(\"whitegrid\")\n", + " sns.boxplot(x= c, y= n, data=df)\n", + " plt.xlabel(c)\n", + " plt.ylabel(n)\n", + " plt.show()\n", + " " + ], + "id": "e73f0769", + "execution_count": 31, + "outputs": [ + { + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {} + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "a74d69fd" + }, + "source": [ + "Now that we have analysed our data we can proceed to normalixing our data and regression" + ], + "id": "a74d69fd" + }, + { + "cell_type": "markdown", + "metadata": { + "id": "77d8c6ba" + }, + "source": [ + "### Importing useful libraries \n" + ], + "id": "77d8c6ba" + }, + { + "cell_type": "code", + "metadata": { + "id": "fffac537" + }, + "source": [ + "# This Python 3 environment comes with many helpful analytics libraries installed\n", + "# For example, here's several helpful packages to load in\n", + "import numpy as np # linear algebra\n", + "import matplotlib.pyplot as plt # data visualization\n", + "import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)" + ], + "id": "fffac537", + "execution_count": 17, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "04cd7e4e" + }, + "source": [ + "### Loading the dataset \n", + "#### For implementation we will be using house prediction dataset . The dataset can be found [here](https://github.com/vrinda01go/Hellofoss/blob/main/Insti_data.csv)" + ], + "id": "04cd7e4e" + }, + { + "cell_type": "code", + "metadata": { + "id": "4ac8e74b", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "f8967616-b263-4127-bcc1-e4ce5d4799f4" + }, + "source": [ + "df['Room Size']=df['Room Size']/max(df['Room Size']) \n", + "\n", + "data = np.array(df, dtype=float)\n", + "data" + ], + "id": "4ac8e74b", + "execution_count": 21, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([[2.54000000e+03, 3.00000000e+00, 1.00000000e+00, 3.92448513e-01,\n", + " 8.00000000e+00],\n", + " [2.90000000e+03, 3.00000000e+00, 2.00000000e+00, 5.52631579e-01,\n", + " 5.00000000e+00],\n", + " [ nan, 3.00000000e+00, 1.00000000e+00, 4.50800915e-01,\n", + " 8.00000000e+00],\n", + " ...,\n", + " [1.02000000e+03, 3.00000000e+00, 2.00000000e+00, 5.75514874e-01,\n", + " 3.00000000e+00],\n", + " [2.40000000e+03, 2.00000000e+00, 2.00000000e+00, 5.36613272e-01,\n", + " 1.00000000e+00],\n", + " [9.50000000e+02, 3.00000000e+00, 2.00000000e+00, 6.02402746e-01,\n", + " 2.00000000e+00]])" + ] + }, + "metadata": {}, + "execution_count": 21 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "627a5a3a" + }, + "source": [ + "#### Since our dataset has four features i.e Hostel No. , Occupancy, Room Size and Floor ,our hypothesis function becomes\n", + "### hθ(x) = θ0 + θ1x1 + θ2x2 +θ3x3 + θ4x4\n", + "#### where x1 ,x2,x3 and x4 are the two features (i.e. size of house and number of rooms)" + ], + "id": "627a5a3a" + }, + { + "cell_type": "markdown", + "metadata": { + "id": "58fdf0fe" + }, + "source": [ + "### So Your task is to define hypothesis function having 4 features and a corresponding cost function " + ], + "id": "58fdf0fe" + }, + { + "cell_type": "code", + "metadata": { + "id": "34dc5643" + }, + "source": [ + "def hypotheses_fn(theta,X):\n", + " return X.dot(theta)" + ], + "id": "34dc5643", + "execution_count": 12, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "6fae141b" + }, + "source": [ + "def cost(theta,X,Y):\n", + " pred=hypotheses_fn(theta,X)\n", + " return (1/(2*len(Y))) * np.sum(np.square(pred-Y))" + ], + "id": "6fae141b", + "execution_count": 14, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "178eb11f" + }, + "source": [ + "### Gradient Descent \n", + "#### So we have our hypothesis function and we have a way of measuring how well it fits into the data. Now we need to estimate the parameters in the hypothesis function. That's where gradient descent comes in.\n", + "### Your next task is to define gradient descent function having some specific value of learning rate and number of epochs.\n", + "#### Note that learning rate should be neither very high nor very low .Why?\n", + "#### Check out exact reason [here](https://towardsdatascience.com/understanding-learning-rates-and-how-it-improves-performance-in-deep-learning-d0d4059c1c10)\n", + "\n" + ], + "id": "178eb11f" + }, + { + "cell_type": "code", + "metadata": { + "id": "167b5bdc" + }, + "source": [ + "\n", + "def gradient_descent(theta,X,Y,learning_rate,iterations):\n", + " len_y=len(Y)\n", + " costs=[]\n", + " for i in range(iterations):\n", + " pred=hypotheses_fn(theta,X)\n", + " theta-=(1/len_y)*learning_rate*(X.T.dot((pred-Y)))\n", + " costs.append(cost(theta,X,Y))\n", + " \n", + " plt.plot(costs)\n", + " plt.title(\"Costs changing with each epoch\")\n", + " plt.xlabel(\"Epoch\")\n", + " plt.ylabel(\"Cost\")\n", + " plt.show()\n", + " \n", + " return theta" + ], + "id": "167b5bdc", + "execution_count": 24, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "724e407a" + }, + "source": [ + "### Now we want to visualize how our cost function varies with number of epochs .So your next task is to plot graph of updated costs vs number of epochs " + ], + "id": "724e407a" + }, + { + "cell_type": "markdown", + "metadata": { + "id": "e82f6ebf" + }, + "source": [ + "#### After plotting above graph you will notice that your cost function decreases with epochs.\n", + "#### Perfect! This is all what we wanted to seek by doing linear regression. \n", + "\n", + "#### Now it's time to test our model on some test data. \n", + "\n", + "#### For this you will define a test function that will take as input Hostel No. , Occupancy, Room Size , Floor and the final theta vector that was returned by our linear regression model and will give us the price of the house. Compute it for any set of features given and final value of theta as given by gradient descent function" + ], + "id": "e82f6ebf" + }, + { + "cell_type": "code", + "metadata": { + "id": "c522bca3", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 295 + }, + "outputId": "2d0d53e2-9fab-4e5e-f4d1-6cbfd84647e0" + }, + "source": [ + "X=data[:,1:]\n", + "X=np.c_[np.ones((len(X),1)),X]\n", + "Y=data[:,0]\n", + "theta=np.zeros(5)\n", + "theta_ideal=gradient_descent(theta,X,Y,0.001,200)" + ], + "id": "c522bca3", + "execution_count": 22, + "outputs": [ + { + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "947f35b1" + }, + "source": [ + "#### Now since we have defined all required functions , we can call functions one by one and get our final results .\n", + "#### Your final task is to use all functions defined above and predict the price of room for some input combinations to check how well your model works." + ], + "id": "947f35b1" + }, + { + "cell_type": "markdown", + "metadata": { + "id": "06e54ef5" + }, + "source": [ + "#### You can try playing with different values of alpha and epochs and see which combination gives most accurate results but do lookout for overfitting \n" + ], + "id": "06e54ef5" + }, + { + "cell_type": "code", + "metadata": { + "id": "3fc631fd", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 422 + }, + "outputId": "7fe60643-2144-4929-ad41-d79bacb907f4" + }, + "source": [ + "def test(X,Y):\n", + " display(pd.DataFrame({\"Predicted values\":X.dot(theta_ideal),\"Actual values\":Y}))\n", + "\n", + "test(X,Y)" + ], + "id": "3fc631fd", + "execution_count": 23, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Predicted valuesActual values
0NaN2540.0
1NaN2900.0
2NaNNaN
3NaN2362.0
4NaNNaN
.........
6533NaN1624.0
6534NaN1470.0
6535NaN1020.0
6536NaN2400.0
6537NaN950.0
\n", + "

6538 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " Predicted values Actual values\n", + "0 NaN 2540.0\n", + "1 NaN 2900.0\n", + "2 NaN NaN\n", + "3 NaN 2362.0\n", + "4 NaN NaN\n", + "... ... ...\n", + "6533 NaN 1624.0\n", + "6534 NaN 1470.0\n", + "6535 NaN 1020.0\n", + "6536 NaN 2400.0\n", + "6537 NaN 950.0\n", + "\n", + "[6538 rows x 2 columns]" + ] + }, + "metadata": {} + } + ] + } + ] +} \ No newline at end of file From fc8aa85838412fbdcfba72737b9b3541bfa145c4 Mon Sep 17 00:00:00 2001 From: Mani9550 <93199718+Mani9550@users.noreply.github.com> Date: Fri, 29 Oct 2021 00:25:00 +0530 Subject: [PATCH 10/11] Rename Linear_Regression_Task2_203174002 (1).ipynb to Linear_Regression_Task2_203174002.ipynb Created one pull request for each question. Please merge the PR --- ...3174002 (1).ipynb => Linear_Regression_Task2_203174002.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename Linear_Regression_Task2_203174002 (1).ipynb => Linear_Regression_Task2_203174002.ipynb (99%) diff --git a/Linear_Regression_Task2_203174002 (1).ipynb b/Linear_Regression_Task2_203174002.ipynb similarity index 99% rename from Linear_Regression_Task2_203174002 (1).ipynb rename to Linear_Regression_Task2_203174002.ipynb index ed904a5..c4373eb 100644 --- a/Linear_Regression_Task2_203174002 (1).ipynb +++ b/Linear_Regression_Task2_203174002.ipynb @@ -1076,4 +1076,4 @@ ] } ] -} \ No newline at end of file +} From 3d9bb16f2d7d870c658b08b57ad7d91e3ac45df2 Mon Sep 17 00:00:00 2001 From: Mani9550 <93199718+Mani9550@users.noreply.github.com> Date: Mon, 1 Nov 2021 09:50:46 +0530 Subject: [PATCH 11/11] Add files via upload Added separate PR for each task --- Linear_Regression_Task2_203174002 (1).ipynb | 1079 +++++++++++++++++++ 1 file changed, 1079 insertions(+) create mode 100644 Linear_Regression_Task2_203174002 (1).ipynb diff --git a/Linear_Regression_Task2_203174002 (1).ipynb b/Linear_Regression_Task2_203174002 (1).ipynb new file mode 100644 index 0000000..ed904a5 --- /dev/null +++ b/Linear_Regression_Task2_203174002 (1).ipynb @@ -0,0 +1,1079 @@ +{ + "nbformat": 4, + "nbformat_minor": 5, + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.8" + }, + "colab": { + "name": "Linear_Regression_Task2_203174002.ipynb", + "provenance": [], + "collapsed_sections": [] + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "89223f98" + }, + "source": [ + "\n", + "\n", + "```\n", + "Import libraries\n", + "```\n", + "\n", + "### Importing useful libraries \n" + ], + "id": "89223f98" + }, + { + "cell_type": "code", + "metadata": { + "id": "26f77ebe" + }, + "source": [ + "# This Python 3 environment comes with many helpful analytics libraries installed\n", + "# For example, here's several helpful packages to load in\n", + "import numpy as np # linear algebra\n", + "import matplotlib.pyplot as plt # data visualization\n", + "import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n", + "import seaborn as sns" + ], + "id": "26f77ebe", + "execution_count": 30, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "31c8220d" + }, + "source": [ + "### Loading the dataset \n", + "#### For implementation we will be using house prediction dataset . The dataset can be found [here](https://github.com/vrinda01go/Hellofoss/blob/main/Room_price_data.csv)" + ], + "id": "31c8220d" + }, + { + "cell_type": "code", + "metadata": { + "id": "1c5d873a" + }, + "source": [ + "df = pd.read_csv(\"Hostel_Linear-Dataset.csv\") #import text file \n" + ], + "id": "1c5d873a", + "execution_count": 10, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "1ca9aba0", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 205 + }, + "outputId": "3b129ba4-1b3a-4288-9b62-fd3344787414" + }, + "source": [ + "df.head()" + ], + "id": "1ca9aba0", + "execution_count": 11, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PriceHostel No.OccupancyRoom SizeFloor
02540.0316868
12900.0329665
2NaN317888
32362.0329242
4NaN3210985
\n", + "
" + ], + "text/plain": [ + " Price Hostel No. Occupancy Room Size Floor\n", + "0 2540.0 3 1 686 8\n", + "1 2900.0 3 2 966 5\n", + "2 NaN 3 1 788 8\n", + "3 2362.0 3 2 924 2\n", + "4 NaN 3 2 1098 5" + ] + }, + "metadata": {}, + "execution_count": 11 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "af08f245" + }, + "source": [ + "# Visualizing and Cleaning the data\n", + "\n", + "We will now be removing the nan values and identical values from the dataset\n", + "\n", + "For seeing if there are nan values in the dataset we will use the isna() function and then to remove them we will use the dropna() function. We will need to set additional parameters like rows and columns in the dropna function depending on the number of nan values present for each column\n", + "\n", + "Using the sum() function with isna() function we can get to know the number of missing values in each column" + ], + "id": "af08f245" + }, + { + "cell_type": "code", + "metadata": { + "id": "2fd4babb", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "dd94b5ef-188f-4c3a-aec4-fe91cdc6a86d" + }, + "source": [ + "df.isna().sum()" + ], + "id": "2fd4babb", + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "Price 1531\n", + "Hostel No. 0\n", + "Occupancy 0\n", + "Room Size 0\n", + "Floor 0\n", + "dtype: int64" + ] + }, + "metadata": {}, + "execution_count": 5 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "83ef03c3" + }, + "source": [ + "After this we will proceed to remove the nan values \n", + "\n", + "Since there are not many nan values in the column 'Price' as compared to the number of rows we will remove the rows which have nan values. \n", + "\n", + "Reseting the index after removing the nan values and dropping the old index will also be important" + ], + "id": "83ef03c3" + }, + { + "cell_type": "code", + "metadata": { + "id": "b65e4503" + }, + "source": [ + "df = df.dropna(subset = ['Price'],how= 'any')\n", + "df = df.reset_index(drop = True)\n", + "## df.isna().sum()" + ], + "id": "b65e4503", + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "40784889" + }, + "source": [ + "Now we can use the drop_duplicate function to remove the duplicate values\n", + "\n", + "This function has a parameter calle 'keep' where we specifiy to drop and which value to keep\n", + "\n", + "For this excercise we will keep the first values and drop the rest of the duplicates" + ], + "id": "40784889" + }, + { + "cell_type": "code", + "metadata": { + "id": "75fa3dc8" + }, + "source": [ + "df = df.drop_duplicates(keep = 'first')\n", + "df = df.reset_index(drop = True)\n", + "## df.duplicated().sum()" + ], + "id": "75fa3dc8", + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "a007a33f" + }, + "source": [ + "For visualizing the data we will first start with looking at the distribution of different columns to see if there are enough number for each category in every column and dropping them if the data is biased for one category more than the other" + ], + "id": "a007a33f" + }, + { + "cell_type": "code", + "metadata": { + "id": "b325df62", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "outputId": "cf878a76-66d3-4e68-f009-819d724a4eae" + }, + "source": [ + "columns = df.columns\n", + "for column in columns:\n", + " if(column== 'Price' or column=='Room Size'): \n", + " continue\n", + " fig = plt.figure(figsize=(5,5))\n", + " ax = fig.gca()\n", + " counts = df[column].value_counts()\n", + " counts.plot.bar(ax = ax, color='blue')\n", + " ax.set_title('No of rooms '+ column)\n", + " ax.set_xlabel(column)\n", + " ax.set_ylabel(\"No of rooms\")\n", + " plt.show()" + ], + "id": "b325df62", + "execution_count": null, + "outputs": [ + { + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + } + }, + { + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + } + }, + { + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "9811a731" + }, + "source": [ + "We can clearly notice that for the Occupancy column the (occupancy) = 4 has a really low set of data points as compared to others. Hence we can proceed in dropping those rows where the occupancy is 4" + ], + "id": "9811a731" + }, + { + "cell_type": "code", + "metadata": { + "id": "825783c0" + }, + "source": [ + "df = df[df['Occupancy'] != 4]\n", + "df = df.reset_index(drop= True)" + ], + "id": "825783c0", + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "30c64310", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 205 + }, + "outputId": "bbb5d4dc-f7e5-4b8e-a248-6ecbd09568da" + }, + "source": [ + "df.head()" + ], + "id": "30c64310", + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PriceHostel No.OccupancyRoom SizeFloor
02540.0316868
12900.0329665
22362.0329242
31432.0217063
41702.02210383
\n", + "
" + ], + "text/plain": [ + " Price Hostel No. Occupancy Room Size Floor\n", + "0 2540.0 3 1 686 8\n", + "1 2900.0 3 2 966 5\n", + "2 2362.0 3 2 924 2\n", + "3 1432.0 2 1 706 3\n", + "4 1702.0 2 2 1038 3" + ] + }, + "metadata": {}, + "execution_count": 11 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "f333875b" + }, + "source": [ + "We will now write the columns between categorical and numerical\n", + "\n", + "categorical = Hostel No, occupancy, floor\n", + "\n", + "Numerical = price, occupancy, roomsize, floor, hostel No.\n", + "\n", + "Remember that we can treat Hostel Number and occupancy as numerical or categorical. For this notebook we will treat them as categorical for data visualization and numerical for the regression" + ], + "id": "f333875b" + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0f34ca6a" + }, + "source": [ + "We will also plot the scatter plots and the correlation map to analyse the relation ships between different numerical columns" + ], + "id": "0f34ca6a" + }, + { + "cell_type": "code", + "metadata": { + "scrolled": false, + "id": "f4a3ab6e" + }, + "source": [ + "categorical = ['Hostel No.', 'Occupancy', 'Floor']\n", + "numerical = [ 'Price', 'Room Size']" + ], + "id": "f4a3ab6e", + "execution_count": 27, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "df2b588a", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 791 + }, + "outputId": "0327277e-f4a5-4f0b-eae9-134ad15a1e86" + }, + "source": [ + "for column1 in numerical:\n", + " for column2 in numerical:\n", + " if(column1 != column2):\n", + " fig = plt.figure(figsize=(6,6))\n", + " ax = fig.gca()\n", + " df.plot.scatter(x=column1,y=column2,ax = ax)\n", + " ax.set_title('Scatter plot of '+ column1 + ' vs ' + column2)\n", + " ax.set_xlabel(column1)\n", + " ax.set_ylabel(column2)\n", + " plt.show()" + ], + "id": "df2b588a", + "execution_count": 28, + "outputs": [ + { + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + } + }, + { + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "625b89a8" + }, + "source": [ + "We can notice that there are no linear relation present between the numerical columns. Hence no need to drop anything" + ], + "id": "625b89a8" + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8a7f95fa" + }, + "source": [ + "Now we will plot box plots of categorical and numerical columns to get more information about the number of outliers and the distrubtion." + ], + "id": "8a7f95fa" + }, + { + "cell_type": "code", + "metadata": { + "id": "e73f0769", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "outputId": "1004ad8d-4668-4006-b743-bb73bd0dc8ca" + }, + "source": [ + "for c in categorical:\n", + " for n in numerical:\n", + " sns.set_style(\"whitegrid\")\n", + " sns.boxplot(x= c, y= n, data=df)\n", + " plt.xlabel(c)\n", + " plt.ylabel(n)\n", + " plt.show()\n", + " " + ], + "id": "e73f0769", + "execution_count": 31, + "outputs": [ + { + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {} + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "a74d69fd" + }, + "source": [ + "Now that we have analysed our data we can proceed to normalixing our data and regression" + ], + "id": "a74d69fd" + }, + { + "cell_type": "markdown", + "metadata": { + "id": "77d8c6ba" + }, + "source": [ + "### Importing useful libraries \n" + ], + "id": "77d8c6ba" + }, + { + "cell_type": "code", + "metadata": { + "id": "fffac537" + }, + "source": [ + "# This Python 3 environment comes with many helpful analytics libraries installed\n", + "# For example, here's several helpful packages to load in\n", + "import numpy as np # linear algebra\n", + "import matplotlib.pyplot as plt # data visualization\n", + "import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)" + ], + "id": "fffac537", + "execution_count": 17, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "04cd7e4e" + }, + "source": [ + "### Loading the dataset \n", + "#### For implementation we will be using house prediction dataset . The dataset can be found [here](https://github.com/vrinda01go/Hellofoss/blob/main/Insti_data.csv)" + ], + "id": "04cd7e4e" + }, + { + "cell_type": "code", + "metadata": { + "id": "4ac8e74b", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "f8967616-b263-4127-bcc1-e4ce5d4799f4" + }, + "source": [ + "df['Room Size']=df['Room Size']/max(df['Room Size']) \n", + "\n", + "data = np.array(df, dtype=float)\n", + "data" + ], + "id": "4ac8e74b", + "execution_count": 21, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([[2.54000000e+03, 3.00000000e+00, 1.00000000e+00, 3.92448513e-01,\n", + " 8.00000000e+00],\n", + " [2.90000000e+03, 3.00000000e+00, 2.00000000e+00, 5.52631579e-01,\n", + " 5.00000000e+00],\n", + " [ nan, 3.00000000e+00, 1.00000000e+00, 4.50800915e-01,\n", + " 8.00000000e+00],\n", + " ...,\n", + " [1.02000000e+03, 3.00000000e+00, 2.00000000e+00, 5.75514874e-01,\n", + " 3.00000000e+00],\n", + " [2.40000000e+03, 2.00000000e+00, 2.00000000e+00, 5.36613272e-01,\n", + " 1.00000000e+00],\n", + " [9.50000000e+02, 3.00000000e+00, 2.00000000e+00, 6.02402746e-01,\n", + " 2.00000000e+00]])" + ] + }, + "metadata": {}, + "execution_count": 21 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "627a5a3a" + }, + "source": [ + "#### Since our dataset has four features i.e Hostel No. , Occupancy, Room Size and Floor ,our hypothesis function becomes\n", + "### hθ(x) = θ0 + θ1x1 + θ2x2 +θ3x3 + θ4x4\n", + "#### where x1 ,x2,x3 and x4 are the two features (i.e. size of house and number of rooms)" + ], + "id": "627a5a3a" + }, + { + "cell_type": "markdown", + "metadata": { + "id": "58fdf0fe" + }, + "source": [ + "### So Your task is to define hypothesis function having 4 features and a corresponding cost function " + ], + "id": "58fdf0fe" + }, + { + "cell_type": "code", + "metadata": { + "id": "34dc5643" + }, + "source": [ + "def hypotheses_fn(theta,X):\n", + " return X.dot(theta)" + ], + "id": "34dc5643", + "execution_count": 12, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "6fae141b" + }, + "source": [ + "def cost(theta,X,Y):\n", + " pred=hypotheses_fn(theta,X)\n", + " return (1/(2*len(Y))) * np.sum(np.square(pred-Y))" + ], + "id": "6fae141b", + "execution_count": 14, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "178eb11f" + }, + "source": [ + "### Gradient Descent \n", + "#### So we have our hypothesis function and we have a way of measuring how well it fits into the data. Now we need to estimate the parameters in the hypothesis function. That's where gradient descent comes in.\n", + "### Your next task is to define gradient descent function having some specific value of learning rate and number of epochs.\n", + "#### Note that learning rate should be neither very high nor very low .Why?\n", + "#### Check out exact reason [here](https://towardsdatascience.com/understanding-learning-rates-and-how-it-improves-performance-in-deep-learning-d0d4059c1c10)\n", + "\n" + ], + "id": "178eb11f" + }, + { + "cell_type": "code", + "metadata": { + "id": "167b5bdc" + }, + "source": [ + "\n", + "def gradient_descent(theta,X,Y,learning_rate,iterations):\n", + " len_y=len(Y)\n", + " costs=[]\n", + " for i in range(iterations):\n", + " pred=hypotheses_fn(theta,X)\n", + " theta-=(1/len_y)*learning_rate*(X.T.dot((pred-Y)))\n", + " costs.append(cost(theta,X,Y))\n", + " \n", + " plt.plot(costs)\n", + " plt.title(\"Costs changing with each epoch\")\n", + " plt.xlabel(\"Epoch\")\n", + " plt.ylabel(\"Cost\")\n", + " plt.show()\n", + " \n", + " return theta" + ], + "id": "167b5bdc", + "execution_count": 24, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "724e407a" + }, + "source": [ + "### Now we want to visualize how our cost function varies with number of epochs .So your next task is to plot graph of updated costs vs number of epochs " + ], + "id": "724e407a" + }, + { + "cell_type": "markdown", + "metadata": { + "id": "e82f6ebf" + }, + "source": [ + "#### After plotting above graph you will notice that your cost function decreases with epochs.\n", + "#### Perfect! This is all what we wanted to seek by doing linear regression. \n", + "\n", + "#### Now it's time to test our model on some test data. \n", + "\n", + "#### For this you will define a test function that will take as input Hostel No. , Occupancy, Room Size , Floor and the final theta vector that was returned by our linear regression model and will give us the price of the house. Compute it for any set of features given and final value of theta as given by gradient descent function" + ], + "id": "e82f6ebf" + }, + { + "cell_type": "code", + "metadata": { + "id": "c522bca3", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 295 + }, + "outputId": "2d0d53e2-9fab-4e5e-f4d1-6cbfd84647e0" + }, + "source": [ + "X=data[:,1:]\n", + "X=np.c_[np.ones((len(X),1)),X]\n", + "Y=data[:,0]\n", + "theta=np.zeros(5)\n", + "theta_ideal=gradient_descent(theta,X,Y,0.001,200)" + ], + "id": "c522bca3", + "execution_count": 22, + "outputs": [ + { + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "947f35b1" + }, + "source": [ + "#### Now since we have defined all required functions , we can call functions one by one and get our final results .\n", + "#### Your final task is to use all functions defined above and predict the price of room for some input combinations to check how well your model works." + ], + "id": "947f35b1" + }, + { + "cell_type": "markdown", + "metadata": { + "id": "06e54ef5" + }, + "source": [ + "#### You can try playing with different values of alpha and epochs and see which combination gives most accurate results but do lookout for overfitting \n" + ], + "id": "06e54ef5" + }, + { + "cell_type": "code", + "metadata": { + "id": "3fc631fd", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 422 + }, + "outputId": "7fe60643-2144-4929-ad41-d79bacb907f4" + }, + "source": [ + "def test(X,Y):\n", + " display(pd.DataFrame({\"Predicted values\":X.dot(theta_ideal),\"Actual values\":Y}))\n", + "\n", + "test(X,Y)" + ], + "id": "3fc631fd", + "execution_count": 23, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Predicted valuesActual values
0NaN2540.0
1NaN2900.0
2NaNNaN
3NaN2362.0
4NaNNaN
.........
6533NaN1624.0
6534NaN1470.0
6535NaN1020.0
6536NaN2400.0
6537NaN950.0
\n", + "

6538 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " Predicted values Actual values\n", + "0 NaN 2540.0\n", + "1 NaN 2900.0\n", + "2 NaN NaN\n", + "3 NaN 2362.0\n", + "4 NaN NaN\n", + "... ... ...\n", + "6533 NaN 1624.0\n", + "6534 NaN 1470.0\n", + "6535 NaN 1020.0\n", + "6536 NaN 2400.0\n", + "6537 NaN 950.0\n", + "\n", + "[6538 rows x 2 columns]" + ] + }, + "metadata": {} + } + ] + } + ] +} \ No newline at end of file