diff --git a/PreprocessingFinal.ipynb b/PreprocessingFinal.ipynb
index 8e586cc..84ba3a5 100644
--- a/PreprocessingFinal.ipynb
+++ b/PreprocessingFinal.ipynb
@@ -5,7 +5,6 @@
     "colab": {
       "name": "PreprocessingFinal.ipynb",
       "provenance": [],
-      "authorship_tag": "ABX9TyMgZo25t5kp1nELT9FOxLtd",
       "include_colab_link": true
     },
     "kernelspec": {
@@ -32,6 +31,8 @@
       "source": [
         "!pip install contractions\n",
         "!pip install emoji\n",
+        "!pip install ekphrasis\n",
+        "\n",
         "import pandas as pd\n",
         "import re\n",
         "import emoji\n",
@@ -64,7 +65,9 @@
         "import json\n",
         "import random\n",
         "import string\n",
-        "import sys"
+        "import sys\n",
+        "from ekphrasis.classes.segmenter import Segmenter\n",
+        "import itertools\n"
       ],
       "execution_count": null,
       "outputs": []
@@ -72,7 +75,126 @@
     {
       "cell_type": "code",
       "metadata": {
-        "id": "z70r1eYrnoB_"
+        "id": "vFOsh5pJq2QZ",
+        "outputId": "5e5b78f9-48e8-4757-a2a1-356258c3d736",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 34
+        }
+      },
+      "source": [
+        "from google.colab import drive\n",
+        "drive.mount('/content/drive')"
+      ],
+      "execution_count": 3,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "Mounted at /content/drive\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "BVXGLwXaq5I5",
+        "outputId": "874c4c78-14a8-49d2-f1bf-d68c621db7bd",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 204
+        }
+      },
+      "source": [
+        "df_comb = pd.read_csv('/content/drive/My Drive/Colab Notebooks/Capstone/Labelled (Percentage change)/AAPL_label2.1.csv')\n",
+        "combine_ds = df_comb.sample(frac=1)\n",
+        "combine_ds.head()"
+      ],
+      "execution_count": 4,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>message</th>\n",
+              "      <th>label</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>563747</th>\n",
+              "      <td>$AAPL sold my 250 April puts! BAM</td>\n",
+              "      <td>-1.0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>378786</th>\n",
+              "      <td>Stocks from the SMB Scanner: $FB $EXPE $FEYE $...</td>\n",
+              "      <td>-1.0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>295238</th>\n",
+              "      <td>$AAPL wish apple would just buyback to 125 to ...</td>\n",
+              "      <td>0.0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>110904</th>\n",
+              "      <td>@zspecs @SlopeOfHope @JRNavarro75 Options can ...</td>\n",
+              "      <td>1.0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1117553</th>\n",
+              "      <td>$SPY $AAPL \\n\\nJesús Christ. That was some day!</td>\n",
+              "      <td>0.0</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "</div>"
+            ],
+            "text/plain": [
+              "                                                   message  label\n",
+              "563747                   $AAPL sold my 250 April puts! BAM   -1.0\n",
+              "378786   Stocks from the SMB Scanner: $FB $EXPE $FEYE $...   -1.0\n",
+              "295238   $AAPL wish apple would just buyback to 125 to ...    0.0\n",
+              "110904   @zspecs @SlopeOfHope @JRNavarro75 Options can ...    1.0\n",
+              "1117553    $SPY $AAPL \\n\\nJesús Christ. That was some day!    0.0"
+            ]
+          },
+          "metadata": {
+            "tags": []
+          },
+          "execution_count": 4
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "z70r1eYrnoB_",
+        "outputId": "bd2bd23f-5c39-4eb8-9169-5e18df89a7e9",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 54
+        }
       },
       "source": [
         "combine_ds['message'] = combine_ds['message'].str.lower()\n",
@@ -80,8 +202,16 @@
         "\n",
         "print(message[:10])\n"
       ],
-      "execution_count": null,
-      "outputs": []
+      "execution_count": 5,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "['$aapl sold my 250 april puts! bam', 'stocks from the smb scanner: $fb $expe $feye $blue $nvo $uso $aapl $expe', '$aapl wish apple would just buyback to 125 to get over the 120/700 mental barrier', '@zspecs @slopeofhope @jrnavarro75 options can also be used to reduce risk, as in these examples: http://stks.co/gne2 $aapl', '$spy $aapl \\n\\njesús christ. that was some day!', '$aapl forging ahead with an upward range every day...whether closing at new highs or just small gains..very bullish signal over all.', '$aapl trump supporters can&#39;t afford apple products anyway so this boycott isn&#39;t going to affect sales. rather, it will boost sales and attn', 'today&#39;s iphone event will be a test of trust for apple $aapl http://goo.gl/fpwxke', 'it&#39;s pretty clear that supply &gt;&gt; demand out there right now.. $spy $dia $qqq $aapl $nflx $nvda $pypl etc', 'the industry average profit margin is 1.64%. $aapl outperforms 97% of its industry peers. https://www.chartmill.com/stock/quote/aapl/fundamental-analysis?key=bb853040-a4ac-41c6-b549-d218d2f21b32&amp;utm_source=stocktwits&amp;utm_medium=fa&amp;utm_content=aapl&amp;utm_campaign=social_tracking']\n"
+          ],
+          "name": "stdout"
+        }
+      ]
     },
     {
       "cell_type": "code",
@@ -90,7 +220,7 @@
       },
       "source": [
         "def remove_stopwords(msg):\n",
-        "    filtered_sentence = [w for w in tokens if not w in stop_words]\n",
+        "    filtered_sentence = [w for w in msg_tokens if not w in stop_words]\n",
         "    return filtered_sentence\n",
         "\n",
         "def remove_punctuation_re(x):\n",
@@ -103,103 +233,126 @@
         "\n",
         "    x = ' '.join(re.sub(r'_',\" \",x).split())              #Removing _ from emojis text\n",
         "\n",
-        "    return x"
+        "    return x\n",
+        "\n",
+        "# replace repeating letter\n",
+        "def rpt_replace(match):\n",
+        "    # print(match.group(1))\n",
+        "    return match.group(1)+match.group(1)\n",
+        "\n",
+        "# substitute original word with replaced word, if any\n",
+        "def processRepeatings(data):\n",
+        "    re_t= re.sub(message_rpt, rpt_replace, data )\n",
+        "    # print(re_t)\n",
+        "    return re_t"
       ],
-      "execution_count": null,
+      "execution_count": 8,
       "outputs": []
     },
     {
       "cell_type": "code",
       "metadata": {
-        "id": "4LRFAxgcn_fi"
+        "id": "4LRFAxgcn_fi",
+        "outputId": "a10b4eae-a37d-4694-fe94-f127beeebfcf",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 51
+        }
       },
       "source": [
-        "message_er = []\n",
-        "ps = PorterStemmer()\n",
         "stop_words = sw.words(\"english\")\n",
         "tweet_tokenizer = TweetTokenizer()\n",
         "detokenizer = TreebankWordDetokenizer()\n",
         "message_p = []\n",
+        "\n",
+        "# for repeating characters in words\n",
+        "message_rpt = re.compile(r\"(.)\\1{2,}\", re.IGNORECASE)\n",
+        "\n",
+        "# segmenter using the word statistics from Twitter\n",
+        "seg_tw = Segmenter(corpus=\"twitter\")\n",
+        "\n",
         "for msg in message:\n",
-        "    \n",
         "    # remove emojis\n",
         "    msg = emoji.demojize(msg)\n",
-        "    \n",
+        "\n",
+        "    # fix contractions\n",
+        "    msg = contractions.fix(msg)\n",
+        "\n",
+        "    # remove punctuations\n",
+        "    msg = remove_punctuation_re(msg) \n",
+        "    message_p.append(msg)\n",
         "    #tokenize\n",
-        "    tokens = tweet_tokenizer.tokenize(msg)\n",
+        "    msg_tokens = tweet_tokenizer.tokenize(msg)\n",
+        "\n",
+        "    #For Hashtags elongated words using Word segmenter\n",
+        "    message_seg = []\n",
+        "    for w in msg_tokens:\n",
+        "      message_seg.append(seg_tw.segment(w))\n",
         "\n",
         "    # remove stopwords\n",
-        "    msg = remove_stopwords(msg)\n",
+        "    msg = remove_stopwords(message_seg)\n",
+        "\n",
         "    if 'rt' in msg:\n",
         "      # remove retweets\n",
         "      message_p.append('-1')\n",
         "    else: \n",
         "      # detokenize\n",
         "      msg = detokenizer.detokenize(msg)\n",
-        " \n",
-        "      # fix contractions\n",
-        "      msg = contractions.fix(msg)\n",
-        "\n",
-        "      # remove punctuations\n",
-        "      msg = remove_punctuation_re(msg) \n",
-        "      message_p.append(msg)"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "pXWjHAcQoIEh"
-      },
-      "source": [
-        "#Removing repeating words like hurrrryyyyyy-- worrks on tokenized list\n",
-        "\n",
-        "strOfMsg = \" \".join(itertools.chain.from_iterable(message_tok))\n",
-        "message_rpt = re.compile(r\"(.)\\1{2,}\", re.IGNORECASE)\n",
-        "\n",
-        "def rpt_replace(match):\n",
-        "  return match.group(1)+match.group(1)\n",
         "\n",
-        "# t = 'amzn dip buyer fulll attack boooyaaaaaaaaaaaaaaaah'\n",
-        "re_t = ''\n",
-        "message_nrp = []\n",
+        "      # removing repeating words like hurrrryyyyyy-- worrks on tokenized list\n",
+        "      msg = processRepeatings(msg)\n",
         "\n",
-        "def processRepeatings(data):\n",
-        "    re_t= re.sub(message_rpt, rpt_replace, data )\n",
-        "    return message_nrp.append(re_t)\n",
-        "\n",
-        "processRepeatings(strOfMsg)"
+        "      message_p.append(msg)\n"
       ],
-      "execution_count": null,
-      "outputs": []
+      "execution_count": 9,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "Reading twitter - 1grams ...\n",
+            "Reading twitter - 2grams ...\n"
+          ],
+          "name": "stdout"
+        }
+      ]
     },
     {
       "cell_type": "code",
       "metadata": {
-        "id": "pwVS7SDnoaIt"
+        "id": "pXWjHAcQoIEh",
+        "outputId": "79b08153-5f77-4203-a1bf-308ca594ab36",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 187
+        }
       },
       "source": [
-        "#For Hashtags elongated words using Word segmenter\n",
-        "!pip install ekphrasis\n",
-        "from ekphrasis.classes.segmenter import Segmenter\n",
-        "\n",
-        "# segmenter using the word statistics from english Wikipedia\n",
-        "# seg_eng = Segmenter(corpus=\"english\") \n",
-        "message_seg = []\n",
-        "\n",
-        "# segmenter using the word statistics from Twitter\n",
-        "seg_tw = Segmenter(corpus=\"twitter\")\n",
-        "\n",
-        "# words = [\"exponentialbackoff\", \"gamedev\", \"retrogaming\", \"thewatercooler\", \"panpsychism\"]\n",
-        "for w in message_sw:\n",
-        "    # print(w)\n",
-        "    message_seg.append(seg_tw.segment(w))\n",
-        "    # print(\"(tw):\", seg_tw.segment(w))\n",
-        "    # print()"
+        "message_p[:10]"
       ],
-      "execution_count": null,
-      "outputs": []
+      "execution_count": 12,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "['aapl sold my 250 april puts bam',\n",
+              " 'aapl sold 250 april puts bam',\n",
+              " 'stocks from the smb scanner fb expe feye blue nvo uso aapl expe',\n",
+              " 'stocks smb scanner fb expe feye blue nvo uso aapl expe',\n",
+              " 'aapl wish apple would just buyback to 125 to get over the 120 700 mental barrier',\n",
+              " 'aapl wish apple would buyback 125 get 120 700 mental barrier',\n",
+              " 'options can also be used to reduce risk as in these examples aapl',\n",
+              " 'options also used reduce risk examples aapl',\n",
+              " 'spy aapl jesús christ that was some day',\n",
+              " 'spy aapl jesús christ day']"
+            ]
+          },
+          "metadata": {
+            "tags": []
+          },
+          "execution_count": 12
+        }
+      ]
     }
   ]
 }
\ No newline at end of file

	message	label
563747	$AAPL sold my 250 April puts! BAM	-1.0
378786	Stocks from the SMB Scanner: $FB $EXPE $FEYE $...	-1.0
295238	$AAPL wish apple would just buyback to 125 to ...	0.0
110904	@zspecs @SlopeOfHope @JRNavarro75 Options can ...	1.0
1117553	$SPY $AAPL \\n\\nJesús Christ. That was some day!	0.0