From 56cfe7fc5a91d514315a6a0bd7efc5040b85cb30 Mon Sep 17 00:00:00 2001 From: Susan Li Date: Sun, 26 Aug 2018 22:01:43 -0400 Subject: [PATCH] Delete Hotel recommendation.ipynb --- Hotel recommendation.ipynb | 1412 ------------------------------------ 1 file changed, 1412 deletions(-) delete mode 100644 Hotel recommendation.ipynb diff --git a/Hotel recommendation.ipynb b/Hotel recommendation.ipynb deleted file mode 100644 index 40ba226..0000000 --- a/Hotel recommendation.ipynb +++ /dev/null @@ -1,1412 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import datetime\n", - "import pandas as pd\n", - "import numpy as np\n", - "import matplotlib.pyplot as plt\n", - "import seaborn as sns\n", - "%matplotlib inline\n", - "\n", - "from sklearn.model_selection import cross_val_score\n", - "from sklearn.ensemble import RandomForestClassifier\n", - "from sklearn.pipeline import make_pipeline\n", - "from sklearn import preprocessing\n", - "from sklearn.preprocessing import StandardScaler\n", - "from sklearn import svm" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Specifying dtypes helps reduce memory requirements for reading in csv file later." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "# data_type={'is_booking':bool,'srch_ci' : np.str_, 'srch_co' : np.str_,\n", - " 'srch_adults_cnt' : np.int32, 'srch_children_cnt' : np.int32,\n", - " 'srch_rm_cnt' : np.int32, 'srch_destination_id':np.int32,\n", - " 'user_location_country' : np.int32, 'user_location_region' : np.int32,\n", - " 'user_location_city' : np.int32, 'hotel_cluster' : np.int32,\n", - " 'orig_destination_distance':np.float64, 'date_time':np.str_,\n", - " 'hotel_market':np.int32}\n", - "# d_type={'is_booking':bool, 'cnt':np.int32, 'hotel_cluster' : np.int32,'srch_destination_id':np.int32}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To be able to process locally, we randomly sample 1% of the records. After that, we still have a large number of records at 241,179." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(241179, 24)" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df = pd.read_csv('train.csv.gz', sep=',').dropna()\n", - "dest = pd.read_csv('destinations.csv.gz')\n", - "df = df.sample(frac=0.01, random_state=99)\n", - "df.shape" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
date_timesite_nameposa_continentuser_location_countryuser_location_regionuser_location_cityorig_destination_distanceuser_idis_mobileis_package...srch_children_cntsrch_rm_cntsrch_destination_idsrch_destination_type_idis_bookingcnthotel_continenthotel_countryhotel_markethotel_cluster
323521342014-05-22 11:40:072366174241032323.523280249901...011442301412517744
297960212013-06-29 12:24:372366311255382288.61218522900...11827210125065959
151851562014-10-30 13:58:32236629440046587.697075521701...011132110125064222
33019482014-08-22 20:14:342366332551212234.439416073301...011152111447150265
254291192014-03-25 18:47:43236631447869839.0087107849300...0182841042506856
\n", - "

5 rows × 24 columns

\n", - "
" - ], - "text/plain": [ - " date_time site_name posa_continent \\\n", - "32352134 2014-05-22 11:40:07 2 3 \n", - "29796021 2013-06-29 12:24:37 2 3 \n", - "15185156 2014-10-30 13:58:32 2 3 \n", - "3301948 2014-08-22 20:14:34 2 3 \n", - "25429119 2014-03-25 18:47:43 2 3 \n", - "\n", - " user_location_country user_location_region user_location_city \\\n", - "32352134 66 174 24103 \n", - "29796021 66 311 25538 \n", - "15185156 66 294 40046 \n", - "3301948 66 332 55121 \n", - "25429119 66 314 47869 \n", - "\n", - " orig_destination_distance user_id is_mobile is_package \\\n", - "32352134 2323.5232 802499 0 1 \n", - "29796021 2288.6121 85229 0 0 \n", - "15185156 587.6970 755217 0 1 \n", - "3301948 2234.4394 160733 0 1 \n", - "25429119 839.0087 1078493 0 0 \n", - "\n", - " ... srch_children_cnt srch_rm_cnt srch_destination_id \\\n", - "32352134 ... 0 1 1442 \n", - "29796021 ... 1 1 8272 \n", - "15185156 ... 0 1 11321 \n", - "3301948 ... 0 1 1152 \n", - "25429119 ... 0 1 8284 \n", - "\n", - " srch_destination_type_id is_booking cnt hotel_continent \\\n", - "32352134 3 0 1 4 \n", - "29796021 1 0 1 2 \n", - "15185156 1 0 1 2 \n", - "3301948 1 1 1 4 \n", - "25429119 1 0 4 2 \n", - "\n", - " hotel_country hotel_market hotel_cluster \n", - "32352134 125 177 44 \n", - "29796021 50 659 59 \n", - "15185156 50 642 22 \n", - "3301948 47 1502 65 \n", - "25429119 50 685 6 \n", - "\n", - "[5 rows x 24 columns]" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### EDA\n", - "\n", - "#### What are we predicting?\n", - "The objective is to predict which hotel_cluster a user will book given the information in their search. There are 100 clusters in total. In another word, we are dealing with a 100 class classification problem." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAtMAAAF3CAYAAABnkcdUAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAIABJREFUeJzs3Xd43FeZ9vHvGY16771Yllzk3h07TnN6QhIgPZAAIWGBwC4ssOy+S1kueHezBVgg8FJCIAlphBAc0ntc4l7kbkuyeu+9znn/kBIcx0UaaTQz0v25Ll2WRr+ZecaWNfeceX7PMdZaRERERERk7BzeLkBERERExF8pTIuIiIiIuElhWkRERETETQrTIiIiIiJuUpgWEREREXGTwrSIiIiIiJsUpkVERERE3KQwLSIiIiLiJoVpERERERE3Ob1dwFgkJCTYnJwcb5chIiIiIlPYrl27Gq21iaM51q/CdE5ODjt37vR2GSIiIiIyhRljykZ7rNo8RERERETcpDAtIiIiIuImhWkRERERETcpTIuIiIiIuElhWkRERETETQrTIiIiIiJuUpgWEREREXGTwrSIiIiIiJsUpkVERERE3KQwLSIiIiLiJoVpERERERE3KUyLiIiIiLhJYVpERERExE1ObxcgIiIi4use21Y+6mNvX5XlwUrE12hlWkRERETETQrTIiIiIiJuUpgWEREREXGTwrSIiIiIiJsUpkVERERE3KQwLSIiIiLiJoVpERERERE3KUyLiIiIiLhJYVpERERExE0K0yIiIiIiblKYFhERERFxk8K0iIiIiIibFKZFRERERNykMC0iIiIi4iaFaRERERERNylMi4iIiIi4SWFaRERERMRNCtMiIiIiIm5SmBYRERERcZPCtIiIiIiImxSmRURERETcpDAtIiIiIuImhWkRERERETcpTIuIiIiIuElhWkRERETETQrTIiIiIiJuUpgWEREREXGTwrSIiIiIiJsUpkVERERE3KQwLSIiIiLiJoVpERERERE3KUyLiIiIiLhJYVpERERExE0K0yIiIiIiblKYFhERERFxk8K0iIiIiIibFKZFRERERNw0qjBtjLnSGHPUGFNkjPnmab4fbIx5cuT724wxOSOXX2aM2WWM2T/y5yUnXWfZyOVFxpifGGPMRD0oEREREZHJcM4wbYwJAB4ArgIKgNuMMQWnHHY30GKtzQN+BNw/cnkj8BFr7QLgLuCRk67zC+BeIH/k48pxPA4RERERkUk3mpXplUCRtbbEWtsPPAFcf8ox1wO/H/n8aWC9McZYa/dYa6tHLj8IhIysYqcCUdbad621FngYuGHcj0ZEREREZBKNJkynAxUnfV05ctlpj7HWDgJtQPwpx3wc2GOt7Rs5vvIctwmAMeZeY8xOY8zOhoaGUZQrIiIiIjI5RhOmT9fLbMdyjDFmHsOtH58bw20OX2jtr6y1y621yxMTE0dRroiIiIjI5BhNmK4EMk/6OgOoPtMxxhgnEA00j3ydAfwZuNNaW3zS8RnnuE0REREREZ82mjC9A8g3xswwxgQBtwIbTjlmA8MnGALcCLxhrbXGmBjgeeCfrbWb3zvYWlsDdBhjVo9M8bgT+Ms4H4uIiIiIyKQ6Z5ge6YG+D3gZOAw8Za09aIz5njHmupHDHgTijTFFwFeB98bn3QfkAd8yxuwd+Uga+d7ngd8ARUAx8OJEPSgRERERkcngHM1B1toXgBdOuezbJ33eC9x0mut9H/j+GW5zJzB/LMWKiIiIiPgS7YAoIiIiIuImhWkRERERETcpTIuIiIiIuElhWkRERETETQrTIiIiIiJuUpgWEREREXGTwrSIiIiIiJsUpkVERERE3KQwLSIiIiLiJoVpERERERE3KUyLiIiIiLhJYVpERERExE0K0yIiIiIiblKYFhERERFxk8K0iIiIiIibFKZFRERERNykMC0iIiIi4iaFaRERERERNylMi4iIiIi4SWFaRERERMRNCtMiIiIiIm5SmBYRERERcZPCtIiIiIiImxSmRURERETc5PR2ASIiIt722LbyUR97+6osD1YiIv5GK9MiIiIiIm7SyrSIyElGu0Kp1UkREQGtTIuIiIiIuE1hWkRERETETQrTIiIiIiJuUpgWEREREXGTwrSIiIiIiJsUpkVERERE3KQwLSIiIiLiJoVpERERERE3KUyLiIiIiLhJYVpERERExE0K0yIiIiIiblKYFhERERFxk8K0iIiIiIibFKZFRERERNykMC0iIiIi4iaFaRERERERNylMi4iIiIi4SWFaRERERMRNCtMiIiIiIm5SmBYRERERcZPCtIiIiIiImxSmRURERETcpDAtIiIiIuImp7cLEBGRsXtsW/moj719VZYHKxERmd60Mi0iIiIi4iaFaRERERERNylMi4iIiIi4SWFaRERERMRNCtMiIiIiIm5SmBYRERERcZPCtIiIiIiImxSmRURERETcNKowbYy50hhz1BhTZIz55mm+H2yMeXLk+9uMMTkjl8cbY940xnQaY352ynXeGrnNvSMfSRPxgEREREREJss5d0A0xgQADwCXAZXADmPMBmvtoZMOuxtosdbmGWNuBe4HbgF6gW8B80c+TnWHtXbnOB+DiIiIiIhXjGZleiVQZK0tsdb2A08A159yzPXA70c+fxpYb4wx1toua+0mhkO1iIiIiMiUMpownQ5UnPR15chlpz3GWjsItAHxo7jth0ZaPL5ljDGnO8AYc68xZqcxZmdDQ8MoblJEREREZHKMJkyfLuRaN4451R3W2gXAupGPT57uIGvtr6y1y621yxMTE89ZrIiIiIjIZBlNmK4EMk/6OgOoPtMxxhgnEA00n+1GrbVVI392AI8x3E4iIiIiIuI3RhOmdwD5xpgZxpgg4FZgwynHbADuGvn8RuANa+0ZV6aNMU5jTMLI54HAtcCBsRYvIiIiIuJN55zmYa0dNMbcB7wMBAC/tdYeNMZ8D9hprd0APAg8YowpYnhF+tb3rm+MKQWigCBjzA3A5UAZ8PJIkA4AXgN+PaGPTERERETEw84ZpgGstS8AL5xy2bdP+rwXuOkM1805w80uG12JIiIiIiK+STsgioiIiIi4SWFaRERERMRNCtMiIiIiIm5SmBYRERERcZPCtIiIiIiImxSmRURERETcpDAtIiIiIuImhWkRERERETcpTIuIiIiIuElhWkRERETETQrTIiIiIiJuUpgWEREREXGT09sFiIiIZz22rXxUx92+KsvDlYiITD0K0yIiIiJTxGhfPINeQE8UtXmIiIiIiLhJYVpERERExE1q85gi9LaOiIiIyOTTyrSIiIiIiJsUpkVERERE3KQwLSIiIiLiJvVMi4yT+tVFRESmL61Mi4iIiIi4SWFaRERERMRNCtMiIiIiIm5Sz7SckXqBRUREPEfPs1ODVqZFRERERNykMC0iIiIi4ia1eYiInENTZx8vHKhlyOVibV4CeYkR3i7pfR29AxQ3dJEcFUxyVAgOY7xdkojItKIwLSJyBi5r2VLUyKuH63AYQ1CAg4c2l5IWHUJkaCBXz0/BGeC9N/gO17Tzp92VdPcPARDkdJARE0pmXBjz0qLIiA3zWm0iItOFwrSIyGnUtvfyzO5KKlt6mJMSyfWL0wkPCmBvRSvvHG/ky4/v4T9jQ/nPGxeyZmbCpNbWOzDEhn1VbC1pJjU6hNtXZdHeM0B5cw8Vzd1sPN7A5qJG7lmXS2acArWIN5U2drFhXzUb9lVT197LBfmJrJ+bxEWzk7xdmkwQhWkRkVPsKW/hmd1VBAc6uGVFJgvTozEj7RPLc+JYmh1LUmQw//HSET738C7+/MW15CVNTuvHkdp2vvz4Ho7VdXJ+XgKXFyS/vzq+ODMWgM6+QX7xVhGPbi3jCxfnER0aOCm1ibhjtBMt/GmaRe/AEI9uLWPDvmoKK9sAWDkjjsWZMbx9rIHn99fgMJAZG8ay7FiWZseqRcuPKUyLiJyko3eADfuqyYgN5ROrswkP/vCvSYcxXD4vhYK0KG54YDOf/f0Onv3iWmLCgjxa27vFTdz10HaiQgL51JocZiVHnva4iGAnd56Xwy/eLubRrWXcsy6XIKfON59snhh7plFqvs9ay9efLuS5fdXMT4/iX66ew7UL00iLCQXA5bIcqG7j9cP1PLmjgmf2VLGzrIXrF6eRGh3q5erFHfrtKiJyklcO1jE4ZPnY0ozTBumTZcSG8ctPLqO6tZcv/GE3A0Muj9VV0dzNF/6wi6y4MF76h3VnDNLvSY4K4dblmVS39vCn3ZVYaz1Wm4j8zdYTzTy3r5qvXT6Lv35pHfdeMPP9IA3gcBgWZsTwlctm8aVL8rhxWQZNnX088GYRzxdW0zcw5MXqxR1amRY5g7GsAMnUsKushV3lLVyQn0hiZPCorrMsO45//9gC/vGP+/i35w7y/RsWTHhdnX2DfPb3OxlyWX5953ISIkZX25zUKK6Yl8JLB2tJjgrhkjnq0RTxpIrmbl4orOGSOUl84aK8cx5vjGFpVixzUiJ55WAdW4qb2F/Vxo3LMietdUzGTyvTIiLAkMvynQ0HiApxcvGcxDFd9+PLMvjchbk8urWch98tndC6XC7LV5/cy/H6Dh64YykzEsLHdP11+QksyYzhtcN1HKpum9DaRORvuvsGeXx7OZGhTn548yIcjtH3QIcFOblhSTqfu3AmIYEB/G7LCbadaPJgtTKRtDItIgI8saOcA1Xt3LIik2BnwDmPP/Wdi8zYMOakRPLdDQcpbez+wKrSeHpXf/z6cV45VMe3ri1gXf7YQj4Mr3zdsCSduo5eniusIT85kkAvjvMTmYpc1vLUrgo6+gb53AW5bp8/kRUXxucvnMkTOyr4y95qGjv6uGpBqk5O9HH6jSoi015LVz//9fJRVs2IY2F6tFu34TCGW5ZnkhgZzGPby2js6Bt3XS/sr+Enrx/npmUZfGZtjtu3Exjg4Kr5qbT1DLDtRPO46xKRD3rraD3H6jq5dmHquOe7BwcG8MnzslkzM57NxU08urVMfdQ+TmFaRKa9/3rlKB29g3zv+vnvj8BzR3BgAJ9cnYPDGB7eWkpPv/tPgLvKmvnqU3tZmhXD9z86vroAZiZGkJcUwVtH6/XELDKBatt6ef1wPYszY1iZEzcht+kwhmsXpnHdojSO1XXwy3dKaJiAF+jiGQrTIjKtHahq4/Ht5dx1Xg6zU84+IWM04sKDuGNVNi1dAzy+o5wh19inaByqbudTD+0gNTqUX35y+ajaTkbj8oJkuvuH2FTcOCG3JyKwqagRZ4Dh2oWp437Re6rVufHceV4OTV19fPnxPW79PhHPU5gWkWntF28XExHk5B8uy5+w25yREM71i9Moqu/khf01Y7ruicYu7vztdiKCnTxy98pRTxUZjYzY4W3GNx1vpKtvcMJuV2S66ugdYF9lK8uyYwkL8sxpaLOSI7l+UTrvljTxw1ePeuQ+ZHx0AqKITFtVrT28dKCWu8+fQVTIxO4SuDwnjvqOPjYVNfKHbWXcsSr7nNepaevhE7/ZhstaHrl79bh7L0/n0rnJHKpu551jDVy1IHXCb3+66eob5Hh9JwDvrUkaw5j/7TSK0z9tLWnG5bKsmZng0ftZmh2LM8DwwJvFLMuO5ZI5yR69PxkbhWkRmbZ+v6UUgLvW5Hjk9q+cn0J9Ry/fevYAh6rb+drls4kNP/1Z/s1d/Xzywe209Qzw+D2rPTZjNjkqhCVZMbxb0sSavARtNe6muvZefrellD9sLaO998Or/A4D2fHhFKRGMS8tyuO7Y8rkGxhyse1EE3NSo0Y9+308vnvdPAor2/jKk/v465fOJzNu4l9si3sUpkVkWuoamQl75fwU0mM8s4WvwxhuW5lFWVM3j2wt46+FNXzt8lncviqbAIfBWsvB6naeK6xmw95qmrv6+f1nVrIgw72JIqO1fk4y+yraePNIPTcsSffofY3FaFdnvblNdl17L1//4z6e3VvFkMty5fwUrluUTpBzeF3aWnBZ2F/ZyhM7Knh+fw3P768hPSaUtXnxLMqImfC+WvGOPeWtdPcPsTYvflLu75ndVVw1P4UH3iri1l9t5XMX5OLUmEufoDAtItPSH3dW0NE7yN3nz/Do/QQ7A/judfO4bWUW391wkG/95SCPba/g4tmJvHSwlpKGLpwOw7r8BP7uwpmsyvX8E3NseBArZsSx/UQT6/ITiJ+EVbWpYOPxBl48UEtIoIPbVmZx9/kzyI4//SY6lxUkkxIdSmNHHwdr2tlX0cpTOyvZVtLMRxalfWB7afE/LmvZXNRIWkwIM87wM+AJ8RHB3Lg0g0e3lfP8/hquX+w7L4anM4VpEfkQf1ghHI8hl+WhLaUsyYphaVbspNzn7JRIHrtnFS8eqOUHzx/mF28Xs3pGPJ89P5er5qecsf3DUy6enciusmbeOtrAx5dlTOp9+6PNRY28eKCW+enRPPyZlcSN8t8rITKYCyMTWZefwO6yFl4+WMsDbxaxIieOywqSCQ/W07A/Ol7XQUNnHzcvz5j0dxoK0qI5Py+BTUWNLM2KVbuHD9D/YhGZdl4/XEdZUzdfv2L2pN6vMYarF6Ry6dxkevqHiA7zXr9yZEggy7Pj2HaiifVzk9TTexbvljTx/P4a5qVFccvyzFEH6ZM5jGF5Thzz0qJ5/UgdW0ua2F/Vxi0rMpmVPP6RjDK5NhU1EhXiZL6bmzyN1/o5Sewqa+HtYw18YvW5T24Wz1KYFpFp58FNJ0iPCeXKeSleuf8gp4Mg54d7HSd7osO6/AS2nWhiU1Ej1y5Mm9T79hfbTzTz3L5q5qZGceuKLAIc41uFDA0K4NqFaazIiePJHRU88m4ZNy3PYGFGzARVLJ5W09ZDcUMXV8xLwenwTs9ycGAAq3PjefNoPfXtvSRFhXilDhmmznURmVYOVLWx7UQzd63JnvYn78SEBbE4M4Ydpc10au70h+wsbebZvVXMTo7kthWZ4w7SJ0uOCuGedblkxIXy5I4KtpY0Tdhti2dtLmokMMBM2G6H7jpvZjyBAYaNx7UJk7dpZVpEJoWv9GH/dtMJwoICuGWFf/Z7T7QL8hPZU97Ku8WN3HtBrrfL8RkljZ38eU8V+UkR3L4qyyMvvEKDAvj0mhk8vr2cDfuq6e4f5OLZSZr24cN6+ofYV9nG8uxYQoMmZmdSd0UEO1meE8e2ErVqedv0XpYRkWmlrr2X5wqruXl5puYrj0iKCmFuahTvljTR0Tvg7XJ8Qv+gi2d2VxE7sjV8oAffwQhyOvjE6mwWZ8bw2uF6Xthfg7XaMtpXFVa1MuSyLM/27qr0e87PG94sZnORVqe9SWFaRKaNh98tZdBl+fTaHG+X4lMump1I74BLu/CNeOVQLc1d/Xxsafppe9snWoDDcOOyDM6bGc/m4ia2nWj2+H2Ke/ZWtJIYGUxajG/0KMeGBbEoI4btpc10q1XLa9TmITKJfKXVYTrq6R/iD9vKuXRu8hlnA09XGbFh5CVG8JtNJ7hrTQ4hgd59+9qbShu7eLe4idW58eQmeGYXytNxGMM1C1Jp6uzj+f01ZMWFaRa1j2nu6qesqZvLC5J9qhXnglmJ7KloZUtJE5fO1Tbj3qCVaRE3WGtp7urnRGMX+6vaeLe4kVcO1bLxeAPd/Vod8EXP7KmktXvA45u0+KsLZyfS0NHHn3ZXersUr+kfdPGn3ZXEhAVyxbzJDyUOY7hpWSbhQQE8tr2c3oGhSa9BzmxvRQsAizJ9a/JK8nutWsVN9A3qZ8YbtDItMkadfYM8s7uSI7UdH7jcYYa3EX7tcB3Ls+NYm5fg1jxamXgul+W3m04wLy2KVTN8o9fR1+QmhLMoM4Zfvl3CLcszp+Wkk9cO19HU1c/d588g2Omd1fnwYCe3rsjiN5tK+POeKm5dkelTq6DTlbWWvRWtzEgIJ9YHT/S7cFYih2va2VnawtqRPmqZPArTMqnG0pPpi60Ox+s6eHpXJd0DQ1w6N5nMuFAigwOJCHESFhRAfXsfG483sO1EE1tLmpifHs36OUmaAeplbx9voLihix/evEjB5AyMMXzxopnc+8guntlTxc3LM71d0qQqb+5mc1EjK3PimJk4ee0dp5OTEM6lc5N55VAduYnhrJrh+S3m5ez2VbbR2NnPBfmJ3i7ltLLiwpiREM7m4kbWzIzX77lJpjAtMgoDQy5eOVjL5uImkiKD+dTaHFKjP9zPmBIdwk3LM7l8XgpbihrZXtrM0doOblyW4bWdsqay0b44e/FADUmRwdqY5BwuK0hmUUY0P371GNctSps2vdPvtXdEhQZy5XzvbORzqgtmJXKisYvnC2vIjFX/tLf9eXclTodhXprv/h5fmhXLn3ZXUt3aS3qsfl4mk8K0yDl09A7wuy2l1LT1sjo3nqvmp5xzVFZ0aCBXLUhlbX4Cf9haxmPby1k/N4mLZyfh0IrBpKpt72Xj8Ua+fsXsSZnM4M+MMfzTVXO4/dfbeOTdMu45y9zpqXQy7e+3lNLQ0cedq7N95gWEwxhuWp7Jz944zpM7KvjS+jyP7bbn7+8YetrAkIvnCmuYkxrl9dnSZzMnJRIDHKppU5ieZHpmETmLwSEXf9hWTmNnH3eel811i9LGNHM2KiSQz67LZUlmDK8frueJ7eX0D7o8WLGcaktRIyGBDm5fOf1CgDvWzEzgglmJPPBWEW09U3/udENHHz95/TizkiOYkxrl7XI+ICLYyUeXpNPQ2ce7xdoh0VveOdZAc1c/S3zsxMNThQc7yUkI51BNu7dLmXZGlQqMMVcaY44aY4qMMd88zfeDjTFPjnx/mzEmZ+TyeGPMm8aYTmPMz065zjJjzP6R6/zEqMFHfIy1lg37qilv7ubjSzOYk+LeE21ggIMbl2Vw1fwUDla388t3imnt7p/gauV0OvsG2VvRyseWZhCrk0FH7RtXzKa1e4Bfvl3s7VI87r9ePkLPwBDXLPDNFqDZKVHMTo7kjSP12lTHS57ZU0VsWCD5yd7tpR+NgtQo6tr7aOrs83Yp08o5w7QxJgB4ALgKKABuM8YUnHLY3UCLtTYP+BFw/8jlvcC3gK+d5qZ/AdwL5I98XOnOA5DJY62lf9CFa5rszrW1pImdZS1cNDuRhRnjW5EwxrAuP5E7z8uhuaufX20s0S+7SbDtRBODLstn1moc3ljMT4/m+sVp/HbzCerae71djscUVrbyx12VfHptDomRwd4u54yuWZDK4JDl1UN13i5l2mnvHeC1Q3V8ZFGax9psJlLByLsrWp2eXKP5yVgJFFlrS6y1/cATwPWnHHM98PuRz58G1htjjLW2y1q7ieFQ/T5jTCoQZa191w7vm/owcMN4Hoh4TnvPAO8ca+DHrx/nu88d5FvPHuB7fz3If750hJ+8fpwndpTz9rEGhlxTJ2RvKWrk+f01zE2JnNAh+LNTIvnsulz6B138emMJ9R1TN6h4W9/gEFtLmpmVHEFeku+vKPmaf7xsNkMuy49fO+7tUjzCWst3NxwkPjyIL63P93Y5Z5UQGcyamfHsKmuhqqXH2+VMKy/tr6Vv0MVHl6R7u5RRiQ0PIjU6hEPVCtOTaTRhOh2oOOnrypHLTnuMtXYQaAPONssnfeR2znabABhj7jXG7DTG7GxoaBhFuTIRXC7Lgao2fr+llPtfOsJLB2sJCwzg8oJkLpqdxJKsWHITw4kND6KovpO7frud8+9/g/9++SiljV3eLn9cypu6+cJju0mICOam5ZkTfsJgekwon12Xi8vCrzeeoLZNgdoT3jnWSFffIJfMTvJ2KX4pKz6MO1Zl89TOCoobOr1dzoR7dm8Vu8tb+cYVc4gKCfR2Oed08ZwkwoKdPFdYjZ0m7w76gj/triQnPozFPt4vfbKC1CjKm7vVFjSJRhOmT5ckTv2fPJpj3DreWvsra+1ya+3yxETfnO841QwOufjaH/fx2PZyatp6uGBWIl+9dBafu3AmF81O4rKCZD6yMI0bl2XyydXZfPPKOfz8jqXMSYnk528VcdF/v8XfPbKL6lb/W0HpHRji3kd2Yi180oNn9qdEhXDPulwCDPx6Y4lWmyZYW88Am4oaWJAeTZa2DnfbfZfkEeJ08N8vH/V2KROqq2+Q/3jxCAszorlxWYa3yxmVkMAArihIpry5m32Vbd4uZ1ooa+pi24lmblruXxvnFKRFYeFDG4uJ54wmTFcCJ0/vzwCqz3SMMcYJRAPN57jNk3+Dne42xQv6Boe477E9PLOnivVzk/jGlXO4Yl4KCWfpJ3QGOLh6QSoPfXolW765nr9fn89bx+q59Idv86t3ihkY8p/pFf/zylGO1Hbw41sXEx/h2R7KxMhg7r1gJiGBDn6zqcTvV/R9yauH6nBZuGKeb8wM9lcJEcM/oy8eqOWF/TXeLmfCPPBmEXXtfXznI/NwOPwnJC3NjiU9JpSXDtRoKtAkeHpXJQ4DH1vqHy0e70mJCiE2LFCtHpNoNGF6B5BvjJlhjAkCbgU2nHLMBuCukc9vBN6wZ3kfylpbA3QYY1aPTPG4E/jLmKuXCdXTP8S9D+/ipYO1fOvaAtbPSR5zi0NKdAhfuWwWr37lQtbMjOf/vnCEj/x0EztLz/bayjdsK2niN5tOcMeqLC6epNaAuPAg7lmXS2RIIL/dfILDOmlk3Kpbe9hT3sKa3Hht5z4BPn/RTBZnxvBPTxdS3tTt7XLG7XBNO796p4SPLU1nWXast8sZE4cxfGRhKu29g7x9rN7b5UxpQy7L07squWBW4mk36PJlxhgKUqMobuikb2DI2+VMC+cM0yM90PcBLwOHgaestQeNMd8zxlw3ctiDQLwxpgj4KvD++DxjTCnwQ+BTxpjKkyaBfB74DVAEFAMvTsxDEnd09A5w10Pbeed4A//xsQXcff74ph9kxoXxm7tW8KtPLqOjd5Ab/9+7/PsLh332JMWuvkG+9vQ+MmPD+Jer507qfceEBfG5C3JJiQ7h0a1lfvHCw1dZa3nhQA0hgQFcpF7pCRHkdPDT25ZgDHzxsd30Dfrvk/PgkIt/+lMh0aGB/Os1pw6l8g9Z8eEszIhmU1GjemI9aFNRIzVtvdy8PPNWrFarAAAgAElEQVTcB/uggrRoBl2WY/VT73wHXzSqOS/W2hestbOstTOttT8Yuezb1toNI5/3WmtvstbmWWtXWmtLTrpujrU2zlobYa3NsNYeGrl8p7V2/sht3ne2lWzxrN6BIT7x4HZ2lbXw41sWc+sEbm5x+bwUXv3qBdyxKotfvlPCbzef8MkngB+8cJjKlh7+5+ZFhAdP/sag4cFO7j5/BnlJETyzp4oH3izSSUZuOFrbQUlDF+vnJvn0TmX+JjMujP+6aRH7q9r49xeOeLsctz20uZTCyja+e908v37X4rK5yQy5LG8e1eq0pzy1o4LYsEDWz/XPF+VZcWGEBQVwqFr99ZPB94cmisf96p0S9lW08tPblnD94onvDQsLcvKDjy7ghzcvorKlm5+9WURZk+/0B799rIHHtpVzz7pcVuTEea2OYGcAnzwvm4UZ0fzXy0f5t+cOMehH/ebeNuSyvHiglvjwIFbO8N6/41R1xbwUPrN2Br/bUspLB/yvf7q0sYv/efUol85N5tqFqd4uZ1ziI4JZnhPHjhMtNHdpA6iJ1tzVzyuHarlhSTrBTv98UR7gMMxNieJoXQeDLj2PeJrC9DRX0dzNA28Wcc2CVK5e4NknmI8tzeDvLpxJUICDX28sYXNRo9dXX9u6B/jG0/vIT4rgq5fN8motAE6Hg5uXZ74fWm751VYqmv2/T3Uy7ChtpqGzj6vmp/jF5gr+6JtXzWFRRjRff7rQr0KctZZvPlNIoMPB92+Y71eTGc7kktlJOBzw2mFt5DLR/rK3ioEhy03L/LPF4z0FaVH0Drg4oZPbPU7PONPc9/56iACH4V+vnZw+4dToUL5wUR6zU6J4fn8Nz+6t9loftbWWb284QFNnPz+8ebHHxuCNlcMYvv2RAv731sUcq+3g6p9s5K+FGnZzNicau3hhfw25CeHMTXVv23c5tyCng5/dvhSAh98t9ZtdPJ/YUcHWkmb+5Zq5pESHeLucCREVGsh5ufHsq2jVrPoJZK3lyR0VLEiPpiDNv3+X5CVFEBhgNNVjEihMT2NvHqnn1UN1fOmS/Ek9Wzk0KIA7VmVx0axEdpQ288jWUq+ccfzHXZX8ZW81X16fz4KM6Em//3O5fnE6L/z9OvKSIrjvsT184+l9dPcPerssn1Pd2sPD75YSExbErSuzpsSqoy/LjAvjl58YPrH4gbeKOOLjE2hq23r5v88f5rzceG5d4d8rjae6YFYiwYEOXj1U6+1SpoyD1e0cqe3g5uX+MX/8bAIDHOQmRFDSoJVpT5v8M63EJ/QODPHd5w6Smxg+7skd7nAYw+XzUogJC2LDvip+vbGEO8/LISp0cnYiO17XwXf+cpA1M+P54sV5k3Kf7siMC+Opz53H/752nAfeKmJzURN/d2EuNy3PfH8l/bFt5aO6rdtXTdyJpTDco1xU30lRfSflzV109w/RO+Cib3D4T6fDkBkXSlZcONnxYfQNDk14/2FjZx8PbSklJDCAz6zNIcILJ49OR2vyEvjixXk8tq2Mh7eWcfHsJNbPTZrw3ULHq6tvkL97dBcDLhf//rEFU+6FVliQkwvyE3nlUB1lTV1ka4OicXtqZwVBTgfXLfKv2dJnkhMfxtG6Djr7BvX70YP0NztN/fqdEsqaunn07lUEOcf/BsVoA92pVs6IIzo0kMe3l/OLt4u5a00OKVGefRu2d2B4Y5qwoAB+fMtiAnx804bAAAdfu2I26/ITuP+lI3zrLwf539eL+Oy6GXxidbbH739gyEVjZx/1HX3Ut/fR0NFLfUcfTZ39DJ3S8x4U4CDY6SA40EH/oIv23g+upMeHBzEnJZI5qVFkx4eNq7e5pq2H324+gbWWz6zNJSbMf6cz+KO48CA+d+FMNuyt5s2j9VS1dnPjskyfecJ+bzfT/VVt/PyOpeQkTM2guWZmAluKm3jlUB2fPX/GlHvBMJl6B4Z4dk8VV85LITrM97eYH433fu7LmrqYl+Z778BOFb7xW08mVUXz8ESNaxakcn5+grfLYXZKJPdckMvDW0r55dvFfGJ1NjMTIzx2f//23CGO1nXw8GdWkuTh4D6RVuXG86fPr2HbiWYeeLOI/3jxCD9/s4g5KVHkJIQzIyGc6HGs7Lf1DFDa2MXx+k5eOlBLfUcvDR19NHf1815kNgyHqKTIYOamRnHdojTykyPISQgnIsj5od3k2roHKGvuoqypm+f2VVM6sj3v5uImgp0O8pMjKUiNZE5K1Jh61us7ernzwe309A/x2fNzSTzLDp3iOYEBDj62NJ3MuDCeK6zm/peOMDc1iuXZseQlRXhtpXpwyMWXH9/D5qIm/uemRVN6J8wgp4OLZyfyXGENx+s7mZUc6e2S/NbLB2tp7x3029nSp5MeE4rTYShtVJj2JIXpaegHzx/GYQz/55rJ3ZzkbNJjQvn8RTP53ZZSfre5lI+OcfvW0bYwPLevmse3l/P5i2ZywaxEd0r1KmMMq3PjWZ0bT2FlK798u4TXDtexfWSjl9iwQHLiw4kOCyTEGUBoYADBgcOrxc8X1tDZN0BH7yCdfYO0dPVT1dpDZUsPVS09dPT9bRU5wBjiI4JIjQllUWYMSZHBJEWGEB8RRGDA31aTP77s7H2F0WGBLAyLYWFGDB0jq9T9gy6KGzo5Ujvcm3igqo0AhyE/KYJ5adFcvSDltKvMQy7LxuMN/HFnJa8eqgMDd67OJj3Wv3Ynm2qMMaycEUdOQhg7TjSzp6KVA1VtRIcGsiQrhtzEcOanR0/airXLZfnGnwp55VAd3/1IwTl/RqeCFTPi2FzcxAv7a5iZGOHz77b5oiGX5RdvFZMTH8aamfHeLmfCOAMcZMSGUToFdi/1ZQrT00xTZx8vHazly5fkkRbjWyFkeCfAmfxhWxlP76qkpbufS2YnTdjblmVNXfzzM/tZlh3rE2PwxmthRgwP3LGUR94to7a9l9LGLkqbuiiq76Szb5BTZ6T8/t2yD3wdGewkPTaUjNhQVs2IIyM2jMy4MPKTI9hS1OSxJ+Qgp4O5qVHMTY3CZS0Vzd0crG7nQFUbR2o7eHZvFSlRIaTFhJAaHUrqyPSFDfuqqWnrJTYskDtWZ3H7yix2lLZ4pEYZu6TIEK5ZmMYV81M4UtPBzrJm3j7awFtHGzAG8hIjWJARzaKMGBZmRDM3dWzvRoyGtZbv/fUQz+yu4h8vm8Wn1k7++SDe4HQ4uHp+Ko9uK2PbiSbWzPT+O47+5tk9VRyp7eCnty350Dts/i4nIYx3jjV45LwVGaYwPc3sKG0hwGG4fZXne23dERoUwKfW5vDn3VW8frie1q4BbliSPu5gV9zQyZ0PbifAYfjJbUs+sLrq7wIchvSYUNJjQlmbN/wkaq2lf9BF76CLnoEhBgZd3LAknYgQJxHBTsKDAnCe5e9gW8nkbGnuMIbs+HCy48O5an4KVa09BAY4KG/uprq1h70Vrbx0oJdBl4sLZyXy7WsLuGRu0vtPCArTvsfpcDA/PZr56dF09g2SnxTBvspW9le28c6xRp7ZXTVynGF2SiQLM6JZOBKwh1zWrf/r1lq2FDfxo1ePsbOshXvWzeC+S3z3xGJPmJsaSV5iBK8drmNRRoxXdnL1V32DQ/zw1WMsSI/mGg/vt+ANOfHhvGUbKG/uJj9JbUCeoP9t08igy8Wu8hYumZPk07NWnQ4HNy7LICYsiDeP1tPU1c+NyzLc3v63sLKVTz20AwP84bOrSPexFXlPMMYQHBhAcGDA+33Us1N8+5eoMYaM2LAPtey4XJb+IZfPzAGX0YsIdnLxnCQunjO8JbO1lpq2Xgor2yisbGV/VRsv7K/l8e0VwHDATo0OIT02jIzY4ReIiZHBZ+293lLcyI9fPc720mZSokL4/g3zuWPV9BuRaIzhmoWp/PSN47x2uM4ju9lOVY9uLaeqtYf7P75wyq1Kw/DW4gYobVSY9hSF6WnkcE0HXX2D3L5yYkekeYIxhssKkkmICGLDvmp+8sZxrp6fyoqc2DE9SW463sjnHtlJbHgQj9y9ihlT9Ix+b3F3istYOByGEIeC9FRgjCEtJpS0mFCunD98UqC1lvLmbvZVtvHUjgoqW3rYXdbC1pImYLgtKC16uB0pyOmgb2CIwspWOvoGqRy5XnJUMP923TxuWZE5rV90JUeFsGpGPFtLmlg5I87b5fiF9t4BfvbGcdblJ/jECfmeEBIYQGpMCKVNmjftKQrT08iOE83EhAb61Yl3S7JiyUkI55ndlTy7t4qD1W18bGnGqKZWPF9Yw1ee3MuMhHAevnslyX40uUNkujAntfp0jpyk6rKWho4+qlp6qGztpqqlh60lTQy6LEFOB0UNnYQHO4kKCeQ7HyngtpVZ0zpEn2z93CT2VrTy18IavnrZrGm3Qj9Wv36nhJbuAf7pyjneLsWjcuLD2X6imUGXa1wjSeX0FKaniabOPooaOrl0bpLfnekdGxbEp9fOYPuJZl48UMP/vn6MlTlxzEyKICc+/AP9z02dfbx+uJ6XD9byxtF6lmXF8uBdK6bMzFCR6cBhDMlRISRHhbA0OxYYnrZgzPD3JnoDoqkkLMjJZQXJbNhXzUsHarlqCvYAT5T69l5+s/EE1y5MZX761B4blxMfzpbiJqpbesjS5j4TTmF6mthR2oLDwLJs/3zrzzEyEi4/KYK/FtawuaiJd4434nQYsuLCKG/uZnd5CztLm3HZ4VF7967L5R8unUVokFasRPydvy0CeNOKnDi2n2jm+88f5uI5SX6zaj845KK5u5/mruGPzt7B4bn2USEkeWCW/E/eOM7AkIuvXT57wm/b12THhwFQ2tStMO0BCtPTwHsnHs5OiRrXph6+ID4imLvW5NA3OERpYzfFDZ0UN3Ty/94uZk5KJPddks/lBcnMS4vS25siMi0FOAzXLkzlN5tO8H/+fID/vmmhT/4+7B0Y4t2SJv6yt4rj9Z20nLRBFAxvEnXy17/ZWMKizBg+sTqbNTPjx/WYDla38cT2Cm5bmTVld8c8WWRIIAkRQZQ2dXEB/tPq6S8UpqeB9048XJnjn6vSpxPsDGB2SuT7Eyo+vixd8zPljCbjREkRX5KbGMFXLp3Fj147RkFaFHef7xszt+vae3n7aAOvH6lj4/FGuvuHCAww5CVFsiQzhrjwIOLDg4iLCCYsKICWrn7qO/qoa+8lLCiAjccbefFALXNSIrn7/BlctzhtzL/73znWwBf+sJv4iCC+vD7fQ4/U9+TEh3Owuh2XtV7bnXSqUpieBt478TA/2XNbdHubgrSIyAd96ZI8DtW08YPnDzE7OXJCplWM9oXpe33tg0Mu9lS08uaRet462sChmnYAUqJC+OiSdC4tSKa8qfuMs//jI4KJjwhmbmoUt6/KondgiA17q3lw0wm+/nQh9790lE+uzuYTq7OIjzh3K8jj28v512cPkJ8UwUOfXkGiB9pHfFVOfDg7y1qob+/z6fG4/khheor724mHyXolKiIyjTgchv+5eTEf//kWvvjYbjbct5bsSeiX7egd4I87K3jrWAMbjzXQ3jtIgMOwLDuWb1w5m4tmJTE3NfL9No2xvHMUEhjAzSsyuWl5BpuLmnhwUwk/eu0YD7xVxEcXp3P3uhnMSv7wLGWXy3L/y0f45dslXDgrkQfuWDppW9z7ivfaWUqbuhSmJ9j0+kmahv524mGst0sREZFJFhHs5Nd3Lue6BzZxz8M7eeYLayc8RLqspaK5m6N1HRyr66C6tReApMhgrpyfwkWzk1iblzCh5+wYYzh/ZDZ0UX0nD20+wZ92V/LkzgrW5SewKCMGi8VacFk4XNPO28cauGNVFv923byz7gA7VcWGBRIV4qS0qYvVufHeLmdKUZiewqy1HKhuIy8pwu9PPBQR/6R+de/Lig/jgduXcudvt/MPT+zhx7cuGXeg7u4b5MhIeD5e10nPwBAOA5lxYVxekMzfX5pPQerknAielxTBDz66gK9dPpvHtpfz6NYyNhc1YozBMDwNKsjp4F+vmcvd58/wyZMxJ8N7M91LG7uw1k7bvwdPUJiewuo7+mju6mfdFN3VSURERmdtXgLfvraA72w4yCX//Rb/cvVcrl+cNqZA1dE7wMHqdg5Vt1PS2InLQmSwk7mpUcxKjiA/KfL9UaTz0iZ/bnNseBBfvDiPL16cN+n37S9yEsLZX9VGS/cAceFB3i5nylCYnsKOjJzoMSclysuViIiIt921JoeFGdF8d8NB/uHJvTy6tYzvXjfvjMcPulxUNPdQ3NBJUX0nFc3dWCAhIoh1+YnMS4siLSZU5+P4kZz35013KUxPIIXpKexwbQdpMSFq8RAREQCWZMXy5y+s5eldldz/0hE+8rNN5CVGEBoUQKDDQaDT4HQ4qO/opbSxm/4hFwZIiwnlkrlJzEuLJjkyWC0Cfio5KoRgp4OK5m6WZulcqomiMD1FdfYNUtHczSVzkrxdioiI+BCHw3DzikyumJ/CT18/zvP7a2jp7mdgyNI/6GLQ5SI6NIglWTHkJUWQmxChnWSnCIcxpMeEUtnS4+1SphSF6SnqaG0HFpiTqhYPERH5sOjQQP712gJyE6fuHgTyYRmxYWwuamRwyOXtUqaM6TcbZpo4UttOVIiTNM2SFBERkREZsaEMWUtNW6+3S5kytDI9BQ0MuThe18nirBj1tfkpjRMTERFPyIgNBaCypdvLlUwdCtNT0InGLvqHXMxN+fAuUCIi04lemIp8UHRoIBHBTvVNTyC1eUxBh2vaCQww6oMTERGRDzDGkBGrkxAnklampxhrLUdqO8hPiiRwGm6XKuKLtDoqMn5j+X90+6osD1bi/zJiQzla20FH7wCRIRqfO14K01NMTVsvbT0DXDr3zCPx9MQuIiIyfWXEhmGB/VVtrJmpXZLHS0uXU8zh2nYMMFu7HoqIiMhpZMQMn4S4r6LNy5VMDQrTU8yRmg4y48KICNabDiIiIvJhYcFO4sKDKKxs9XYpU4IS1xTS3jNAVWsPlxcke7uUSTfa1hX10YnIeKlVTqaC9JhQ9lUoTE8ErUxPIUdrOwDteigiIiJnlxkbSnVbLw0dfd4uxe8pTE8hxxs6iQ4NJDky2NuliIiIiA9Ljw0DUKvHBFCbxxThspaShk5mJ0dq10MRcYvaF0Smj/SYUBwG9lW0sn7u9GsPnUgK01NEXXsv3f1DzNRGLeLnFOhERDwvyOlgVnIk+yo10WO8FKaniJKGLgByE8O9XIlvU1ATEREZtjAjmlcO1WGt1bva46AwPUWUNHQSHx5ETFiQt0sRERHxKk14Gp1FmTE8tbOSiuYesuLDvF2O31KYngIGh1yUNHaxMCPG26WID9OqvIiInGzRSG7YV9mqMD0OmuYxBRysbqdv0MVMtXiIiIjIKM1OiSTI6dC86XHSyvQUsKW4CYAZCQrTIiIi3uYv7wQGBjiYlxZFoU5CHBetTE8BW4obSY4KJjIk0NuliIiIiB9ZlBHD/qo2Bodc3i7FbylM+7n+QRc7SpvJTdBIPBERERmbRZnR9AwMUdTQ6e1S/JbCtJ/bW9FK74D6pUVERGTs3hteUFihVg93KUz7uS3FjRgDM7QyLSIiImM0Iz6cyBAne7WtuNsUpv3cluIm5qdFExoU4O1SRERExM84HIaFGdEUKky7TWHaj/X0D7G3vJU1M+O9XYqIiIj4qYUZMRyp6aB3YMjbpfglhWk/tqushf4hF+cpTIuIiIibFmXEMOiyHKpp93Ypfklh2o9tKW7E6TCsyInzdikiIiLipxZlRgNQqM1b3KIw7ce2FDexODOG8GDtvSMiIiLuSYkKISkymH3avMUtSmF+qr13gMLKVu67OM/bpYiIiHyAv+wAKMOMMSzMiGGfTkJ0i1am/dTO0mZcFlarX1pERETGaVFGNCUNXbT1DHi7FL+jMO2ntpY0ExTgYGlWrLdLERERET+3KHN485YDVWr1GCu1efiprSVNLM6KISRQ86VFZHpRC4HIxFuYMXwS4r7KVtbmJXi5Gv+ilWk/1NE7wIGqNlbP0BQPERERGb+YsCBy4sPYp4keYzaqMG2MudIYc9QYU2SM+eZpvh9sjHly5PvbjDE5J33vn0cuP2qMueKky0uNMfuNMXuNMTsn4sFMFzvLWob7pXPVLy0iIiITY2FGDIWa6DFm5wzTxpgA4AHgKqAAuM0YU3DKYXcDLdbaPOBHwP0j1y0AbgXmAVcCPx+5vfdcbK1dbK1dPu5HMo1sLWkiMMCwRP3SIiIiMkEWZkRT09ZLfXuvt0vxK6NZmV4JFFlrS6y1/cATwPWnHHM98PuRz58G1htjzMjlT1hr+6y1J4CikduTcdhW0syijBhCg9QvLSIiIhNj8chJiJo3PTajCdPpQMVJX1eOXHbaY6y1g0AbEH+O61rgFWPMLmPMvWMvfXrq7Btkf1WbWjxERERkQs1LiybAYSjUvOkxGc00D3Oay+wojznbdddaa6uNMUnAq8aYI9badz5058NB+16ArKysUZQ7te0qa2HIZVmVq5MPRURExkOTYT4oNCiAWcmR7NVJiGMympXpSiDzpK8zgOozHWOMcQLRQPPZrmutfe/PeuDPnKH9w1r7K2vtcmvt8sTExFGUO7VtLWnC6TAsy1a/tIiIiEysRRnR7K9qw9pT103lTEYTpncA+caYGcaYIIZPKNxwyjEbgLtGPr8ReMMO/ytsAG4dmfYxA8gHthtjwo0xkQDGmHDgcuDA+B/O1LetpImFGdGEBWlEuIiIiEyshRkxtHYPUN7c7e1S/MY5w/RID/R9wMvAYeApa+1BY8z3jDHXjRz2IBBvjCkCvgp8c+S6B4GngEPAS8AXrbVDQDKwyRizD9gOPG+tfWliH9rU090/SGGl+qVFRETEMxZlDm/eolaP0RvV8qa19gXghVMu+/ZJn/cCN53huj8AfnDKZSXAorEWO93tKmth0GVZpTAtIiIiHjArOZJgp4PCyjauX3zqvAk5He2A6Ee2ljQRoH5pERER8ZDAAAfz06O1E+IYKEz7kW0lzSxIjyYiWP3SIiIi4hkLM6I5UN3G4JDL26X4BYVpP9HTP8S+ylaNxBMRERGPWpIVS++Ai8M1Hd4uxS8oTPuJ3eUtDAxZnXwoIiIiHrUiZ7iddHtps5cr8Q8K035iW0kTDgPL1S8tIiIiHpQaHUpGbCg7TihMj4bCtJ/YOtIvHRkS6O1SREREZIpbmRPHzrJmbd4yCgrTfqB3YIi9Fa0aiSciIiKTYnlOHI2d/Zxo7PJ2KT5PYdoPbD/RTP+Qi/NmKkyLiIiI562cMdxWukN90+ekMO0HNhU1EhTgYNUMTfIQERERz5uZGEFsWCA7Slu8XYrPU5j2AxuPN7IsO5awIM2XFhEREc8zxrA8J04r06OgMO3jGjr6OFzTzvn5Cd4uRURERKaRlTlxlDV1U9/e6+1SfJrCtI/bXNQIwDqFaREREZlEK0baS9XqcXYK0z5u4/FGYsMCmZcW7e1SREREZBqZlxZFaGCAWj3OQWHah1lr2Xi8gTV5CQQ4jLfLERERkWkkMMDBkqwYtmvzlrNSmPZhx+s7qe/oY12eWjxERERk8q3IieNIbTvtvQPeLsVnKUz7sI3Hh/uldfKhiIiIeMOKnDhcFnaXqW/6TBSmfdim4w3kJoSTERvm7VJERERkGlqSFUOAw6hv+iwUpn1U3+AQW0uatSotIiIiXhMe7GR+WpQmepyFwrSP2l3WSs/AEOerX1pERES8aHlOHHsrWukbHPJ2KT5JYdpHbSpqIMBhOG9mvLdLERERkWlsRU4c/YMu9le2ebsUn6Qw7aM2Hm9kSWYMkSGB3i5FREREprEVObGANm85E4VpH9TS1c/+qjb1S4uIiIjXxUcEk5sYrpMQz0Bh2gdtKW7CWm0hLiIiIr5hdW4820qa1Dd9GgrTPmhTUQORwU4WZcR4uxQRERERLp2bRFf/8KQx+SCFaR9jreXtow2cNzMeZ4D+eURERMT71sxMIDQwgNcO1Xm7FJ+jtOZj9lS0Ut3Wy5XzU7xdioiIiAgAIYEBrMtP4LXDdVhrvV2OT3F6uwD5oOcLawgKcHBpQbK3SxERET/22LZyb5cgU8ylBcm8cqiOg9XtzE+P9nY5PkMr0z7E5bK8sL+GC2YlEKWReCIiIuJDLpmThDHw2mG1epxMYdqH7Klooaatl2sWpnq7FBEREZEPSIgIZmlWrML0KRSmfcjzhbUEOR1cOlctHiIiIuJ7Lp2bzIGqdmraerxdis9QmPYR77V4XDgrUbseioiIiE+6rCAJgNcO13u5Et+hMO0jdpe3UNvey7Vq8RAREREfNTMxgpz4MI3IO4nCtI/4a2ENQU4H69XiISIiIj7KGMOlc5N5t7iJzr5Bb5fjExSmfYDLZXnxQA0Xz04kIljTCkVERMR3XVqQTP+Qi43HGrxdik9QmPYBu8pbqGvv4+oFavEQERER37Y8O5bo0EBe1VQPQGHaJzxfWEOwWjxERETEDzgDHFwyJ4k3j9QzOOTydjlepzDtZUMjUzwunp2kFg8RERHxC5fOTaale4Dd5a3eLsXrFKa9bOf/b+/Og6yszjyOf3/sNotAC8gi2CyiQGSNgUAiOsYgY4mTwYBx4hIsK4kZYWacaGYyE7WSSayhJpHEOOMWNWbQiEsIZhTjFg2isskygCJLB2h2wRakoeln/nhfxk5XY99uod974fepovq+55733Kf71Ol+OPfcc9bvYlt5hQ9qMTMzs4Lx+TNOoXlTMXfFlqxDyZyT6YzNfmszrZonb5eYmZmZFYK2rZpz/pmdmbVoIx8eOJR1OJlyMp2hnR9U8PiijVx8djdae4mHmZmZFZCvjS5h976DPLF4Y9ahZMrJdIYemLeeisoqvn5u76xDMTMzM6uXc0o6Mqh7O+5/dR1VVZF1OJlxMp2R8v0HeXDeei4c0PwVfqMAAAxWSURBVIW+ndtmHY6ZmZlZvUji2jG9eXf7Xl4+gfecdjKdkZlvlPL+/kq+ObZv1qGYmZmZNcj4T3WlS7uW3PfquqxDyYyT6QxUVB7i3lfWMbpvMYNPa591OGZmZmYN0qJZE64cdTqvrtnBqi3vZx1OJpxMZ+DxhZvYVl7BN871rLSZmZkVtis+05NWzZtw/wk6O+1kupEdqgr+6w/vcnaPkxndtzjrcMzMzMw+kfZFLZg4vAdPLdnM9vKKrMNpdE6mG9nvlpWxYec+vjm2D5KyDsfMzMzsE7tmdAkHKqt4eP6GrENpdE6mG1FE8POX3qV3p9ZcOODUrMMxMzMzOyr6dGrD+Wd25uH5G9h/8MQ6xMXJdCN6fuU2Vpa9z9fP7UOTJp6VNjMzs+PHlDEl7Nx7gP9+vTTrUBqVk+lGsq18Pzc/sYy+ndtw6ZDuWYdjZmZmdlR9tk8x557RidufWcXqLeVZh9NonEw3gkNVwbRHlvBBxUHu/MowWjTzj93MzMyOL5KYftlg2rZqxg0zF58wyz2c1TWCGc+/w7x3d3LbhEH0P9WnHZqZmdnxqVPblky/bDCrt5bzb79bmXU4jcLJ9DH2xzU7mPHCO3xpWHcuG94j63DMzMzMjqmx/Ttz7ZgSHnptA3NXbMk6nGPOyfQxtK18P1MfWUKfTm34/qWDvBWemZmZnRD+cVx/BnZrx7cfX8qWPfuzDueYcjJ9jBw8VMXUmR+tky5q0SzrkMzMzMwaRctmTZlx+VAqDlYx7dHFVB6qyjqkY8bJ9DGwafeHTL57Pq+t9TppMzMzOzH16dSGWycMZP7aXXzlnteP2xlqJ9NH2dwVWxh/xyus3lLOjMuH8uURp2UdkpmZmVkmvjziNH48aTDLN+9h/IxXeGn1tqxDOuqcTB8lFZWHuGX2Cq775UJ6dixizt+O4ZLB3bIOy8zMzCxTfzW0B7O/NYbObVty9S/e5PZnVh1Xyz68kPcT2rX3AE8v3czD80tZvbWcr40u4aaL+tOyWdOsQzMzMzPLC307t+Gp60dz629XcNdL7zJvzQ6+Oup0vjiwC21bNc86vE8kp2Ra0jjgDqApcG9E/KjG8y2Bh4DhwE5gUkSsT5/7DjAFOATcEBHP5tJmPttbUckLq7bx1OJNvPz2diqrgv5d2nLPlSP4woAuWYdnZmZmlndaNW/KD790NiN7FzN97mpufOwt/vnJJlxwVhcmDOnGuf07FeRkZJ3JtKSmwJ3AF4CNwJuSZkfE/1arNgV4LyL6SpoM3A5MkjQAmAwMBLoBv5d0RnpPXW3mhXlrdrBgw3ts2LmP0l17Wb9zH9vLKwA4tV0rpnyuhEuHdOesru0yjtTMzMws/00Y0p1LBndjUelufrNkE3OWlvH0sjKaCLqefBK9iovo2bGI0zoWMbxXB0b2Ls465I+Vy8z0OcCaiFgLIOkRYAJQPfGdANySPp4F/EzJpsoTgEciogJYJ2lN2h45tJkXfru0jJlvlHJqu1b0LC7ivP6d6FXcmqE92zOypJgmTbx3tJmZmVl9SGJ4rw4M79WBf7l4AK++s4PFpe9Rumsfpbv28fuVW9nxwQEuP6fncZFMdwf+VO16I/CZI9WJiEpJe4DitHx+jXu7p4/rahMASdcB16WXH0hanUPMR90G4PUsXjh3pwA7sg7CcuK+Kgzup8Lgfioc7qs8c0XtxXnVTz9K/2WgV64Vc0mma5t6jRzrHKm8tl1EaraZFEbcDdz9cQEaSFoQESOyjsPq5r4qDO6nwuB+Khzuq8Lgfqq/XLbG2whU3yy5B7D5SHUkNQNOBnZ9zL25tGlmZmZmltdySabfBPpJKpHUguQDhbNr1JkNXJU+ngi8EBGRlk+W1FJSCdAPeCPHNs3MzMzM8lqdyzzSNdDfAp4l2cbu/ohYIek2YEFEzAbuA36ZfsBwF0lyTFrv1yQfLKwEro+IQwC1tXn0v70TipfCFA73VWFwPxUG91PhcF8VBvdTPSmZQDYzMzMzs/ryceJmZmZmZg3kZNrMzMzMrIGcTB8HJI2TtFrSGkk3Zx2PJSSdJulFSSslrZA0NS3vKOk5Se+kXztkHaslp71KWixpTnpdIun1tJ8eTT8sbRmT1F7SLEmr0rE1ymMq/0j6u/T33nJJMyW18pjKD5Lul7RN0vJqZbWOISVmpPnFUknDsos8fzmZLnDVjnu/CBgAXJ4e427ZqwT+ISLOAkYC16d9czPwfET0A55Pry17U4GV1a5vB36c9tN7wJRMorKa7gCeiYgzgcEkfeYxlUckdQduAEZExCCSjQYm4zGVLx4AxtUoO9IYuohkJ7Z+JAfo3dVIMRYUJ9OF7/+Pe4+IA8Dho9ktYxFRFhGL0sflJH/0u5P0z4NptQeBS7OJ0A6T1AP4S+De9FrA+cCstIr7KQ9Iagd8nmQHKSLiQETsxmMqHzUDTkrPnigCyvCYygsR8QeSndeqO9IYmgA8FIn5QHtJXRsn0sLhZLrw1Xbce/cj1LWMSDodGEpyKn2XiCiDJOEGOmcXmaV+AnwbqEqvi4HdEVGZXntc5YfewHbgF+mSnHsltcZjKq9ExCZgOlBKkkTvARbiMZXPjjSGnGPkwMl04cvluHfLkKQ2wOPAtIh4P+t47M9JuhjYFhELqxfXUtXjKnvNgGHAXRExFNiLl3TknXS97QSgBOgGtCZZLlCTx1T+8+/CHDiZLnw+mj2PSWpOkkj/KiKeSIu3Hn6bLP26Lav4DIDRwCWS1pMskzqfZKa6ffoWNXhc5YuNwMaIeD29nkWSXHtM5ZcLgHURsT0iDgJPAJ/FYyqfHWkMOcfIgZPpwuej2fNUuu72PmBlRPxHtadmA1elj68CftPYsdlHIuI7EdEjIk4nGT8vRMQVwIvAxLSa+ykPRMQW4E+S+qdFf0Fywq7HVH4pBUZKKkp/Dx7uJ4+p/HWkMTQbuDLd1WMksOfwchD7iE9APA5IGk8yk3b4aPYfZBySAZLGAK8Ay/hoLe4/kayb/jXQk+SPzmURUfPDIJYBSWOBGyPiYkm9SWaqOwKLgb+JiIos4zOQNITkg6ItgLXANSQTQx5TeUTSrcAkkl2NFgPXkqy19ZjKmKSZwFjgFGAr8D3gKWoZQ+l/hn5GsvvHPuCaiFiQRdz5zMm0mZmZmVkDeZmHmZmZmVkDOZk2MzMzM2sgJ9NmZmZmZg3kZNrMzMzMrIGcTJuZmZmZNZCTaTMzMzOzBnIybWZ2jEk6XdLyetS/WlK3HOo9IGliXfVq3DNW0pz63FPt3mmSihpyr5nZ8crJtJlZ/rkaqDOZzsA0oF7JtKSmxygWM7O84GTazKxxNJV0j6QVkuZKOknSEEnzJS2V9KSkDulM8wjgV5KWpPWGS3pZ0kJJz0rqmssLSvq0pHmS3pL0hqS2NZ6/RdKN1a6Xp7PorSU9nd63XNIkSTeQJPgvSnoxrX+hpNckLZL0mKQ2afl6Sf8q6VXgsqP08zMzy0tOps3MGkc/4M6IGAjsBv4aeAi4KSLOJjl2/nsRMQtYAFwREUNIjmP+KTAxIoYD9wM/qOvFJLUAHgWmRsRg4ALgwxxjHQdsjojBETEIeCYiZgCbgfMi4jxJpwDfBS6IiGFpzH9frY39ETEmIh7J8TXNzApSs6wDMDM7QayLiCXp44VAH6B9RLyclj0IPFbLff2BQcBzkgCaAmU5vF5/oCwi3gSIiPcB0jbqsgyYLul2YE5EvFJLnZHAAOCPaZstgNeqPf9oLi9kZlbonEybmTWOimqPDwHtc7xPwIqIGFXP1xMQddSp5M/foWwFEBFvSxoOjAd+KGluRNxWS/vPRcTlR2h7bz3jNTMrSF7mYWaWjT3Ae5I+l15/FTg8S10OHF7fvBroJGkUgKTmkgbm0P4qoJukT6f3tZVUcwJlPTAsfX4YUJI+7gbsi4iHgemH69SIaz4wWlLf9J4iSWfk8o2bmR1PPDNtZpadq4D/TLebWwtck5Y/kJZ/CIwCJgIzJJ1M8nv7J8CKj2s4Ig5ImgT8VNJJJOulL6hR7XHgSklLgDeBt9PyTwH/LqkKOAh8Iy2/G/gfSWXpuumrgZmSWqbPf7daG2ZmJwRF1PUuoJmZmZmZ1cbLPMzMzMzMGsjLPMzMCpykJ0nXO1dzU0Q8m0U8ZmYnEi/zMDMzMzNrIC/zMDMzMzNrICfTZmZmZmYN5GTazMzMzKyBnEybmZmZmTXQ/wGUxvk0lan6WQAAAABJRU5ErkJggg==\n", - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "plt.figure(figsize=(12, 6))\n", - "sns.distplot(df['hotel_cluster'])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The data is pretty much well distributed over all 100 clusters and there is skewness in the data." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Feature Engineering" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "from datetime import datetime\n", - "def get_year(x):\n", - " if x is not None and type(x) is not float:\n", - " try:\n", - " return datetime.strptime(x, '%Y-%m-%d').year\n", - " except ValueError:\n", - " return datetime.strptime(x, '%Y-%m-%d %H:%M:%S').year\n", - " else:\n", - " return 2013\n", - " pass\n", - "\n", - "def get_month(x):\n", - " if x is not None and type(x) is not float:\n", - " try:\n", - " return datetime.strptime(x, '%Y-%m-%d').month\n", - " except:\n", - " return datetime.strptime(x, '%Y-%m-%d %H:%M:%S').month\n", - " else:\n", - " return 1\n", - " pass\n", - " \n", - "def left_merge_dataset(left_dframe, right_dframe, merge_column):\n", - " return pd.merge(left_dframe, right_dframe, on=merge_column, how='left')" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [], - "source": [ - "# msk = np.random.rand(len(df)) < 0.8\n", - "# train = df[msk]\n", - "# test = df[~msk]" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "# dealing with date_time column\n", - "\n", - "df['date_time_year'] = pd.Series(df.date_time, index = df.index)\n", - "df['date_time_month'] = pd.Series(df.date_time, index = df.index)\n", - "\n", - "from datetime import datetime\n", - "df.date_time_year = df.date_time_year.apply(lambda x: get_year(x))\n", - "df.date_time_month = df.date_time_month.apply(lambda x: get_month(x))\n", - "\n", - "del df['date_time']" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "# work on srch_ci column\n", - "\n", - "df['srch_ci_year'] = pd.Series(df.srch_ci, index=df.index)\n", - "df['srch_ci_month'] = pd.Series(df.srch_ci, index=df.index)\n", - "\n", - "# convert year & months to int\n", - "df.srch_ci_year = df.srch_ci_year.apply(lambda x: get_year(x))\n", - "df.srch_ci_month = df.srch_ci_month.apply(lambda x: get_month(x))\n", - "\n", - "# remove the srch_ci column\n", - "del df['srch_ci']" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "# work on srch_co column\n", - "\n", - "df['srch_co_year'] = pd.Series(df.srch_co, index=df.index)\n", - "df['srch_co_month'] = pd.Series(df.srch_co, index=df.index)\n", - "\n", - "# convert year & months to int\n", - "df.srch_co_year = df.srch_co_year.apply(lambda x: get_year(x))\n", - "df.srch_co_month = df.srch_co_month.apply(lambda x: get_month(x))\n", - "\n", - "# remove the srch_co column\n", - "del df['srch_co']" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
site_nameposa_continentuser_location_countryuser_location_regionuser_location_cityorig_destination_distanceuser_idis_mobileis_packagechannel...hotel_continenthotel_countryhotel_markethotel_clusterdate_time_yeardate_time_monthsrch_ci_yearsrch_ci_monthsrch_co_yearsrch_co_month
323521342366174241032323.5232802499019...412517744201452014720147
297960212366311255382288.612185229009...25065959201362013720137
15185156236629440046587.6970755217019...25064222201410201412201412
33019482366332551212234.4394160733019...447150265201482015120151
25429119236631447869839.00871078493009...2506856201432014420144
\n", - "

5 rows × 27 columns

\n", - "
" - ], - "text/plain": [ - " site_name posa_continent user_location_country \\\n", - "32352134 2 3 66 \n", - "29796021 2 3 66 \n", - "15185156 2 3 66 \n", - "3301948 2 3 66 \n", - "25429119 2 3 66 \n", - "\n", - " user_location_region user_location_city orig_destination_distance \\\n", - "32352134 174 24103 2323.5232 \n", - "29796021 311 25538 2288.6121 \n", - "15185156 294 40046 587.6970 \n", - "3301948 332 55121 2234.4394 \n", - "25429119 314 47869 839.0087 \n", - "\n", - " user_id is_mobile is_package channel ... \\\n", - "32352134 802499 0 1 9 ... \n", - "29796021 85229 0 0 9 ... \n", - "15185156 755217 0 1 9 ... \n", - "3301948 160733 0 1 9 ... \n", - "25429119 1078493 0 0 9 ... \n", - "\n", - " hotel_continent hotel_country hotel_market hotel_cluster \\\n", - "32352134 4 125 177 44 \n", - "29796021 2 50 659 59 \n", - "15185156 2 50 642 22 \n", - "3301948 4 47 1502 65 \n", - "25429119 2 50 685 6 \n", - "\n", - " date_time_year date_time_month srch_ci_year srch_ci_month \\\n", - "32352134 2014 5 2014 7 \n", - "29796021 2013 6 2013 7 \n", - "15185156 2014 10 2014 12 \n", - "3301948 2014 8 2015 1 \n", - "25429119 2014 3 2014 4 \n", - "\n", - " srch_co_year srch_co_month \n", - "32352134 2014 7 \n", - "29796021 2013 7 \n", - "15185156 2014 12 \n", - "3301948 2015 1 \n", - "25429119 2014 4 \n", - "\n", - "[5 rows x 27 columns]" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Any correlations?\n", - "We want to know if anything correlates well with hotel_cluster .This will tell us if we should pay more attention to any particular columns." - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "srch_destination_type_id -0.036120\n", - "site_name -0.027497\n", - "hotel_country -0.023837\n", - "is_booking -0.022898\n", - "user_location_country -0.020239\n", - "srch_destination_id -0.016736\n", - "srch_co_month -0.005874\n", - "srch_rm_cnt -0.005570\n", - "srch_ci_month -0.005015\n", - "date_time_month -0.002142\n", - "channel -0.001386\n", - "date_time_year -0.000435\n", - "cnt 0.000378\n", - "hotel_continent 0.000422\n", - "user_location_city 0.001241\n", - "user_id 0.003891\n", - "orig_destination_distance 0.006084\n", - "user_location_region 0.006927\n", - "srch_ci_year 0.008562\n", - "is_mobile 0.008788\n", - "srch_co_year 0.009287\n", - "posa_continent 0.012180\n", - "srch_adults_cnt 0.012407\n", - "srch_children_cnt 0.014901\n", - "hotel_market 0.022149\n", - "is_package 0.047598\n", - "hotel_cluster 1.000000\n", - "Name: hotel_cluster, dtype: float64" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.corr()[\"hotel_cluster\"].sort_values()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "No column correlates linearly with hotel_cluster, this means that linear regression and logistic regression won't work well on our data." - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(241179, 27)" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.shape" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "For known combinations of user location cities, origin-destination distances and search destinations, will definitely help finding hotel cluster." - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [], - "source": [ - "pieces = [df.groupby(['srch_destination_id','hotel_country','hotel_market','hotel_cluster'])['is_booking'].agg(['sum','count'])]\n", - "agg = pd.concat(pieces).groupby(level=[0,1,2,3]).sum()\n", - "agg.dropna(inplace=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
sumcount
srch_destination_idhotel_countryhotel_markethotel_cluster
472462201
2901
3001
3212
4301
\n", - "
" - ], - "text/plain": [ - " sum count\n", - "srch_destination_id hotel_country hotel_market hotel_cluster \n", - "4 7 246 22 0 1\n", - " 29 0 1\n", - " 30 0 1\n", - " 32 1 2\n", - " 43 0 1" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "agg.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [], - "source": [ - "agg['sum_and_cnt'] = 0.85*agg['sum'] + 0.15*agg['count']\n", - "agg = agg.groupby(level=[0,1,2]).apply(lambda x: x.astype(float)/x.sum())\n", - "agg.reset_index(inplace=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
srch_destination_idhotel_countryhotel_markethotel_clustersumcountsum_and_cnt
047246220.00.1250.073171
147246290.00.1250.073171
247246300.00.1250.073171
347246321.00.2500.560976
447246430.00.1250.073171
\n", - "
" - ], - "text/plain": [ - " srch_destination_id hotel_country hotel_market hotel_cluster sum \\\n", - "0 4 7 246 22 0.0 \n", - "1 4 7 246 29 0.0 \n", - "2 4 7 246 30 0.0 \n", - "3 4 7 246 32 1.0 \n", - "4 4 7 246 43 0.0 \n", - "\n", - " count sum_and_cnt \n", - "0 0.125 0.073171 \n", - "1 0.125 0.073171 \n", - "2 0.125 0.073171 \n", - "3 0.250 0.560976 \n", - "4 0.125 0.073171 " - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "agg.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [], - "source": [ - "agg_pivot = agg.pivot_table(index=['srch_destination_id','hotel_country','hotel_market'], columns='hotel_cluster', values='sum_and_cnt').reset_index()" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
hotel_clustersrch_destination_idhotel_countryhotel_market0123456...90919293949596979899
047246NaNNaNNaNNaNNaNNaNNaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
1850416NaNNaNNaNNaNNaNNaNNaN...NaN0.025210NaNNaNNaNNaNNaNNaNNaNNaN
21150824NaNNaNNaNNaNNaNNaNNaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
314271434NaNNaNNaNNaNNaNNaNNaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
41650419NaNNaNNaNNaNNaNNaNNaN...NaN0.344828NaNNaNNaNNaNNaNNaNNaNNaN
\n", - "

5 rows × 103 columns

\n", - "
" - ], - "text/plain": [ - "hotel_cluster srch_destination_id hotel_country hotel_market 0 1 2 \\\n", - "0 4 7 246 NaN NaN NaN \n", - "1 8 50 416 NaN NaN NaN \n", - "2 11 50 824 NaN NaN NaN \n", - "3 14 27 1434 NaN NaN NaN \n", - "4 16 50 419 NaN NaN NaN \n", - "\n", - "hotel_cluster 3 4 5 6 ... 90 91 92 93 94 95 96 97 98 \\\n", - "0 NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN \n", - "1 NaN NaN NaN NaN ... NaN 0.025210 NaN NaN NaN NaN NaN NaN NaN \n", - "2 NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN \n", - "3 NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN \n", - "4 NaN NaN NaN NaN ... NaN 0.344828 NaN NaN NaN NaN NaN NaN NaN \n", - "\n", - "hotel_cluster 99 \n", - "0 NaN \n", - "1 NaN \n", - "2 NaN \n", - "3 NaN \n", - "4 NaN \n", - "\n", - "[5 rows x 103 columns]" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "agg_pivot.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [], - "source": [ - "df = pd.merge(df, dest, how='left', on='srch_destination_id')\n", - "df = pd.merge(df, agg_pivot, how='left', on=['srch_destination_id','hotel_country','hotel_market'])" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [], - "source": [ - "df.fillna(0, inplace=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(241179, 276)" - ] - }, - "execution_count": 20, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.shape" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We are only interested in booking events." - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [], - "source": [ - "df = df.loc[df['is_booking'] == 1]" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": {}, - "outputs": [], - "source": [ - "X = df.drop(['user_id', 'hotel_cluster', 'is_booking'], axis=1)\n", - "y = df.hotel_cluster" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "((20032, 273), (20032,))" - ] - }, - "execution_count": 23, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "X.shape, y.shape" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "100" - ] - }, - "execution_count": 24, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "y.nunique()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Random Forest" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0.24865023372782996" - ] - }, - "execution_count": 26, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "clf = make_pipeline(preprocessing.StandardScaler(), RandomForestClassifier(n_estimators=273,max_depth=10,random_state=0))\n", - "np.mean(cross_val_score(clf, X, y, cv=10))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### SVM" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0.3228727137315005" - ] - }, - "execution_count": 28, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from sklearn import svm\n", - "\n", - "clf = make_pipeline(preprocessing.StandardScaler(), svm.SVC(decision_function_shape='ovo'))\n", - "np.mean(cross_val_score(clf, X, y, cv=10))" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.4" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -}