diff --git a/Financial_Market_News_Sentiment_Analysis b/Financial_Market_News_Sentiment_Analysis
new file mode 100644
index 0000000..cd03be1
--- /dev/null
+++ b/Financial_Market_News_Sentiment_Analysis
@@ -0,0 +1,1047 @@
+{
+ "nbformat": 4,
+ "nbformat_minor": 0,
+ "metadata": {
+ "colab": {
+ "name": "Financial_Market_News.ipynb",
+ "provenance": [],
+ "collapsed_sections": []
+ },
+ "kernelspec": {
+ "name": "python3",
+ "display_name": "Python 3"
+ },
+ "language_info": {
+ "name": "python"
+ }
+ },
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "source": [
+ "# Financial_Market_News"
+ ],
+ "metadata": {
+ "id": "LCd6sgd3ZUdv"
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "**Import Library**"
+ ],
+ "metadata": {
+ "id": "qSpaWpD5rkWT"
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {
+ "id": "FPQTgbYQovS5"
+ },
+ "outputs": [],
+ "source": [
+ "import pandas as pd"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "import numpy as np"
+ ],
+ "metadata": {
+ "id": "Cfbqcr6DrclI"
+ },
+ "execution_count": 2,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "**Import Dataset**"
+ ],
+ "metadata": {
+ "id": "pW65idRdruxT"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "df = pd.read_csv('https://raw.githubusercontent.com/Lorddhaval/Dataset/main/Financial%20Market%20News.csv', encoding = \"ISO-8859-1\")"
+ ],
+ "metadata": {
+ "id": "yy9V_CDlri57"
+ },
+ "execution_count": 3,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "df.head()"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 768
+ },
+ "id": "sXxA2tmssKPa",
+ "outputId": "0b37abc3-0b42-4823-c205-26a7e9e9355e"
+ },
+ "execution_count": 4,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " Date Label News 1 \\\n",
+ "0 01-01-2010 0 McIlroy's men catch cold from Gudjonsson \n",
+ "1 02-01-2010 0 Warning from history points to crash \n",
+ "2 03-01-2010 0 Comment: Why Israel's peaceniks feel betrayed \n",
+ "3 04-01-2010 1 £750,000-a-goal Weah aims parting shot \n",
+ "4 05-01-2010 1 Leeds arrive in Turkey to the silence of the fans \n",
+ "\n",
+ " News 2 \\\n",
+ "0 Obituary: Brian Walsh \n",
+ "1 Investors flee to dollar haven \n",
+ "2 Court deals blow to seizure of drug assets \n",
+ "3 Newcastle pay for Fletcher years \n",
+ "4 One woman's vision offers loan lifeline \n",
+ "\n",
+ " News 3 \\\n",
+ "0 Workplace blues leave employers in the red \n",
+ "1 Banks and tobacco in favour \n",
+ "2 An ideal target for spooks \n",
+ "3 Brown sent to the stands for Scotland qualifier \n",
+ "4 Working Lives: How world leaders worked \n",
+ "\n",
+ " News 4 \\\n",
+ "0 Classical review: Rattle \n",
+ "1 Review: Llama Farmers \n",
+ "2 World steps between two sides intent on war \n",
+ "3 Tourists wary of breaking new ground \n",
+ "4 Working Lives: Tricks of the trade \n",
+ "\n",
+ " News 5 \\\n",
+ "0 Dance review: Merce Cunningham \n",
+ "1 War jitters lead to sell-off \n",
+ "2 What the region's papers say \n",
+ "3 Canary Wharf climbs into the FTSE 100 \n",
+ "4 Working Lives: six-hour days, long lunches and... \n",
+ "\n",
+ " News 6 \\\n",
+ "0 Genetic tests to be used in setting premiums \n",
+ "1 Your not-so-secret history \n",
+ "2 Comment: Fear and rage in Palestine \n",
+ "3 Review: Bill Bailey \n",
+ "4 Pop review: We Love UK \n",
+ "\n",
+ " News 7 \\\n",
+ "0 Opera review: La Bohème \n",
+ "1 Review: The Northern Sinfonia \n",
+ "2 Poverty and resentment fuels Palestinian fury \n",
+ "3 Review: Classical \n",
+ "4 World music review: Marisa Monte \n",
+ "\n",
+ " News 8 ... \\\n",
+ "0 Pop review: Britney Spears ... \n",
+ "1 Review: Hysteria ... \n",
+ "2 Republican feud fear as dissident is killed ... \n",
+ "3 Review: New Contemporaries 2000 ... \n",
+ "4 Art review: Hollingsworth/Heyer ... \n",
+ "\n",
+ " News 16 News 17 \\\n",
+ "0 Finland 0 - 0 England Healy a marked man \n",
+ "1 Why Wenger will stick to his Gunners Out of luck England hit rock bottom \n",
+ "2 FTSE goes upwardly mobile At this price? BP Amoco \n",
+ "3 More cash on way for counties Cairns carries Kiwis to victory \n",
+ "4 Duisenberg in double trouble Pru to cut pension charges \n",
+ "\n",
+ " News 18 \\\n",
+ "0 Happy birthday Harpers & Queen \n",
+ "1 Wilkinson out of his depth \n",
+ "2 Go fish \n",
+ "3 Year of Blanchflower's flourish when Spurs sto... \n",
+ "4 Art review: Paul Graham \n",
+ "\n",
+ " News 19 \\\n",
+ "0 Win unlimited access to the Raindance film fes... \n",
+ "1 Kinsella sparks Irish power play \n",
+ "2 Bosnian Serb blows himself up to evade law \n",
+ "3 New direct approach brings only pay-per-blues \n",
+ "4 Shearer shot sparks Boro humiliation \n",
+ "\n",
+ " News 20 \\\n",
+ "0 Labour pledges £800m to bridge north-south divide \n",
+ "1 Brown banished as Scots rebound \n",
+ "2 Orange float delayed to 2001 \n",
+ "3 Third Division round-up \n",
+ "4 Ridsdale's lingering fears as Leeds revisit Tu... \n",
+ "\n",
+ " News 21 \\\n",
+ "0 Wales: Lib-Lab pact firm despite resignation \n",
+ "1 Battling Wales cling to lifeline \n",
+ "2 Angry factory workers root out fear, favours a... \n",
+ "3 Second Division round-up \n",
+ "4 Champions League: Rangers v Galatasaray \n",
+ "\n",
+ " News 22 \\\n",
+ "0 Donald Dewar \n",
+ "1 Ehiogu close to sealing Boro move \n",
+ "2 Smith defied advice on dome payout \n",
+ "3 First Division round-up \n",
+ "4 Champions League: Lazio v Arsenal \n",
+ "\n",
+ " News 23 \\\n",
+ "0 Regenerating homes regenerates well-being in ... \n",
+ "1 Man-to-man marking \n",
+ "2 Xerox takes the axe to jobs \n",
+ "3 McLean ends his career with a punch \n",
+ "4 Lazio 1 - 1 Arsenal \n",
+ "\n",
+ " News 24 \\\n",
+ "0 Win £100 worth of underwear \n",
+ "1 Match stats \n",
+ "2 Comment: Refugees in Britain \n",
+ "3 Heskey grabs triple crown \n",
+ "4 England in Pakistan \n",
+ "\n",
+ " News 25 \n",
+ "0 TV guide: Random views \n",
+ "1 French referee at centre of storm is no strang... \n",
+ "2 Maverick who sparked the new intifada \n",
+ "3 Weah on his way as City march on \n",
+ "4 England given olive-branch reception \n",
+ "\n",
+ "[5 rows x 27 columns]"
+ ],
+ "text/html": [
+ "\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Date | \n",
+ " Label | \n",
+ " News 1 | \n",
+ " News 2 | \n",
+ " News 3 | \n",
+ " News 4 | \n",
+ " News 5 | \n",
+ " News 6 | \n",
+ " News 7 | \n",
+ " News 8 | \n",
+ " ... | \n",
+ " News 16 | \n",
+ " News 17 | \n",
+ " News 18 | \n",
+ " News 19 | \n",
+ " News 20 | \n",
+ " News 21 | \n",
+ " News 22 | \n",
+ " News 23 | \n",
+ " News 24 | \n",
+ " News 25 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 01-01-2010 | \n",
+ " 0 | \n",
+ " McIlroy's men catch cold from Gudjonsson | \n",
+ " Obituary: Brian Walsh | \n",
+ " Workplace blues leave employers in the red | \n",
+ " Classical review: Rattle | \n",
+ " Dance review: Merce Cunningham | \n",
+ " Genetic tests to be used in setting premiums | \n",
+ " Opera review: La Bohème | \n",
+ " Pop review: Britney Spears | \n",
+ " ... | \n",
+ " Finland 0 - 0 England | \n",
+ " Healy a marked man | \n",
+ " Happy birthday Harpers & Queen | \n",
+ " Win unlimited access to the Raindance film fes... | \n",
+ " Labour pledges £800m to bridge north-south divide | \n",
+ " Wales: Lib-Lab pact firm despite resignation | \n",
+ " Donald Dewar | \n",
+ " Regenerating homes regenerates well-being in ... | \n",
+ " Win £100 worth of underwear | \n",
+ " TV guide: Random views | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 02-01-2010 | \n",
+ " 0 | \n",
+ " Warning from history points to crash | \n",
+ " Investors flee to dollar haven | \n",
+ " Banks and tobacco in favour | \n",
+ " Review: Llama Farmers | \n",
+ " War jitters lead to sell-off | \n",
+ " Your not-so-secret history | \n",
+ " Review: The Northern Sinfonia | \n",
+ " Review: Hysteria | \n",
+ " ... | \n",
+ " Why Wenger will stick to his Gunners | \n",
+ " Out of luck England hit rock bottom | \n",
+ " Wilkinson out of his depth | \n",
+ " Kinsella sparks Irish power play | \n",
+ " Brown banished as Scots rebound | \n",
+ " Battling Wales cling to lifeline | \n",
+ " Ehiogu close to sealing Boro move | \n",
+ " Man-to-man marking | \n",
+ " Match stats | \n",
+ " French referee at centre of storm is no strang... | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 03-01-2010 | \n",
+ " 0 | \n",
+ " Comment: Why Israel's peaceniks feel betrayed | \n",
+ " Court deals blow to seizure of drug assets | \n",
+ " An ideal target for spooks | \n",
+ " World steps between two sides intent on war | \n",
+ " What the region's papers say | \n",
+ " Comment: Fear and rage in Palestine | \n",
+ " Poverty and resentment fuels Palestinian fury | \n",
+ " Republican feud fear as dissident is killed | \n",
+ " ... | \n",
+ " FTSE goes upwardly mobile | \n",
+ " At this price? BP Amoco | \n",
+ " Go fish | \n",
+ " Bosnian Serb blows himself up to evade law | \n",
+ " Orange float delayed to 2001 | \n",
+ " Angry factory workers root out fear, favours a... | \n",
+ " Smith defied advice on dome payout | \n",
+ " Xerox takes the axe to jobs | \n",
+ " Comment: Refugees in Britain | \n",
+ " Maverick who sparked the new intifada | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 04-01-2010 | \n",
+ " 1 | \n",
+ " £750,000-a-goal Weah aims parting shot | \n",
+ " Newcastle pay for Fletcher years | \n",
+ " Brown sent to the stands for Scotland qualifier | \n",
+ " Tourists wary of breaking new ground | \n",
+ " Canary Wharf climbs into the FTSE 100 | \n",
+ " Review: Bill Bailey | \n",
+ " Review: Classical | \n",
+ " Review: New Contemporaries 2000 | \n",
+ " ... | \n",
+ " More cash on way for counties | \n",
+ " Cairns carries Kiwis to victory | \n",
+ " Year of Blanchflower's flourish when Spurs sto... | \n",
+ " New direct approach brings only pay-per-blues | \n",
+ " Third Division round-up | \n",
+ " Second Division round-up | \n",
+ " First Division round-up | \n",
+ " McLean ends his career with a punch | \n",
+ " Heskey grabs triple crown | \n",
+ " Weah on his way as City march on | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 05-01-2010 | \n",
+ " 1 | \n",
+ " Leeds arrive in Turkey to the silence of the fans | \n",
+ " One woman's vision offers loan lifeline | \n",
+ " Working Lives: How world leaders worked | \n",
+ " Working Lives: Tricks of the trade | \n",
+ " Working Lives: six-hour days, long lunches and... | \n",
+ " Pop review: We Love UK | \n",
+ " World music review: Marisa Monte | \n",
+ " Art review: Hollingsworth/Heyer | \n",
+ " ... | \n",
+ " Duisenberg in double trouble | \n",
+ " Pru to cut pension charges | \n",
+ " Art review: Paul Graham | \n",
+ " Shearer shot sparks Boro humiliation | \n",
+ " Ridsdale's lingering fears as Leeds revisit Tu... | \n",
+ " Champions League: Rangers v Galatasaray | \n",
+ " Champions League: Lazio v Arsenal | \n",
+ " Lazio 1 - 1 Arsenal | \n",
+ " England in Pakistan | \n",
+ " England given olive-branch reception | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
5 rows × 27 columns
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ " \n",
+ "\n",
+ " \n",
+ "
\n",
+ "
\n",
+ " "
+ ]
+ },
+ "metadata": {},
+ "execution_count": 4
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "df.info()"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "q1LK5umDscH9",
+ "outputId": "08189c23-78f5-41ef-f756-468c7e1bbfc2"
+ },
+ "execution_count": 5,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "\n",
+ "RangeIndex: 4101 entries, 0 to 4100\n",
+ "Data columns (total 27 columns):\n",
+ " # Column Non-Null Count Dtype \n",
+ "--- ------ -------------- ----- \n",
+ " 0 Date 4101 non-null object\n",
+ " 1 Label 4101 non-null int64 \n",
+ " 2 News 1 4101 non-null object\n",
+ " 3 News 2 4101 non-null object\n",
+ " 4 News 3 4101 non-null object\n",
+ " 5 News 4 4101 non-null object\n",
+ " 6 News 5 4101 non-null object\n",
+ " 7 News 6 4101 non-null object\n",
+ " 8 News 7 4101 non-null object\n",
+ " 9 News 8 4101 non-null object\n",
+ " 10 News 9 4101 non-null object\n",
+ " 11 News 10 4101 non-null object\n",
+ " 12 News 11 4101 non-null object\n",
+ " 13 News 12 4101 non-null object\n",
+ " 14 News 13 4101 non-null object\n",
+ " 15 News 14 4101 non-null object\n",
+ " 16 News 15 4101 non-null object\n",
+ " 17 News 16 4101 non-null object\n",
+ " 18 News 17 4101 non-null object\n",
+ " 19 News 18 4101 non-null object\n",
+ " 20 News 19 4101 non-null object\n",
+ " 21 News 20 4101 non-null object\n",
+ " 22 News 21 4101 non-null object\n",
+ " 23 News 22 4101 non-null object\n",
+ " 24 News 23 4100 non-null object\n",
+ " 25 News 24 4098 non-null object\n",
+ " 26 News 25 4098 non-null object\n",
+ "dtypes: int64(1), object(26)\n",
+ "memory usage: 865.2+ KB\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "df.shape"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "Bzty2olPslQd",
+ "outputId": "f0c21199-d61e-4b2d-ef03-3cd4d6c73286"
+ },
+ "execution_count": 6,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "(4101, 27)"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 6
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "df.columns"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "WR3yXJKgsplh",
+ "outputId": "fc019c08-9560-4a89-eac1-7aff77b12095"
+ },
+ "execution_count": 7,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "Index(['Date', 'Label', 'News 1', 'News 2', 'News 3', 'News 4', 'News 5',\n",
+ " 'News 6', 'News 7', 'News 8', 'News 9', 'News 10', 'News 11', 'News 12',\n",
+ " 'News 13', 'News 14', 'News 15', 'News 16', 'News 17', 'News 18',\n",
+ " 'News 19', 'News 20', 'News 21', 'News 22', 'News 23', 'News 24',\n",
+ " 'News 25'],\n",
+ " dtype='object')"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 7
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "**Get Feature Selection**"
+ ],
+ "metadata": {
+ "id": "L-4JOuPLsvMa"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "' '.join(str(x) for x in df.iloc[1,2:27])"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 122
+ },
+ "id": "mdBdzwXisstn",
+ "outputId": "f43ed1cf-a8ec-44b0-9364-4ae8bf9bbcc5"
+ },
+ "execution_count": 8,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "\"Warning from history points to crash Investors flee to dollar haven Banks and tobacco in favour Review: Llama Farmers War jitters lead to sell-off Your not-so-secret history Review: The Northern Sinfonia Review: Hysteria Review: The Guardsman Opera: The Marriage of Figaro Review: The Turk in Italy Deutsche spells out its plans for diversification Traders' panic sends oil prices skyward TV sport chief leaves home over romance Leader: Hi-tech twitch Why Wenger will stick to his Gunners Out of luck England hit rock bottom Wilkinson out of his depth Kinsella sparks Irish power play Brown banished as Scots rebound Battling Wales cling to lifeline Ehiogu close to sealing Boro move Man-to-man marking Match stats French referee at centre of storm is no stranger to controversy\""
+ ],
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "string"
+ }
+ },
+ "metadata": {},
+ "execution_count": 8
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "df.index"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "C0cylcX0tHDw",
+ "outputId": "906ac52d-42de-4787-9c1b-c798655b8bed"
+ },
+ "execution_count": 9,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "RangeIndex(start=0, stop=4101, step=1)"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 9
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "len(df.index)"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "xWKTW1W-tMRC",
+ "outputId": "7c342a75-afef-447b-ac99-45f9766bbe28"
+ },
+ "execution_count": 10,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "4101"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 10
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "news = []\n",
+ "for row in range(0,len(df.index)):\n",
+ " news.append(''.join(str(x) for x in df.iloc[row,2:27]))"
+ ],
+ "metadata": {
+ "id": "pzfFOBt6tSkn"
+ },
+ "execution_count": 11,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "type(news)"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "dzDfQ_natvhz",
+ "outputId": "fb094103-3fcd-433c-bf6f-5b34dd9af619"
+ },
+ "execution_count": 12,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "list"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 12
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "news[0]"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 122
+ },
+ "id": "Mo2YOuzXt3P_",
+ "outputId": "6cf73e27-4821-4e8d-ac0d-8a589b3c75b8"
+ },
+ "execution_count": 13,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "\"McIlroy's men catch cold from GudjonssonObituary: Brian WalshWorkplace blues leave employers in the redClassical review: RattleDance review: Merce CunninghamGenetic tests to be used in setting premiumsOpera review: La BohèmePop review: Britney SpearsTheatre review: The CircleWales face a fraught nightUnder-21 round-upSmith off to blot his copybookFinns taking the mickeyPraise wasted as Brown studies injury optionsIreland wary of minnowsFinland 0 - 0 EnglandHealy a marked manHappy birthday Harpers & QueenWin unlimited access to the Raindance film festivalLabour pledges £800m to bridge north-south divideWales: Lib-Lab pact firm despite resignationDonald DewarRegenerating homes regenerates well-being in peopleWin £100 worth of underwearTV guide: Random views\""
+ ],
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "string"
+ }
+ },
+ "metadata": {},
+ "execution_count": 13
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "X = news"
+ ],
+ "metadata": {
+ "id": "rL4C7mjZuC-n"
+ },
+ "execution_count": 14,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "type(X)"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "FNl6k8kIuGbs",
+ "outputId": "f6ddce88-7313-4927-a716-b79003f03d50"
+ },
+ "execution_count": 15,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "list"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 15
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "**Get Feature Text Conversion to Bag of Words**"
+ ],
+ "metadata": {
+ "id": "vZe5MKaduMPo"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "from sklearn.feature_extraction.text import CountVectorizer"
+ ],
+ "metadata": {
+ "id": "y2UXDAfOuXN9"
+ },
+ "execution_count": 16,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "cv = CountVectorizer(lowercase = True, ngram_range=(1,1))"
+ ],
+ "metadata": {
+ "id": "UmtqcBWsunng"
+ },
+ "execution_count": 17,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "X = cv.fit_transform(X)"
+ ],
+ "metadata": {
+ "id": "02JnEp24uzDN"
+ },
+ "execution_count": 18,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "X.shape"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "fDfSi1nvu5I5",
+ "outputId": "cc65b0c4-e196-4b29-d7b8-6448af50f7f0"
+ },
+ "execution_count": 19,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "(4101, 108682)"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 19
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "y = df['Label']"
+ ],
+ "metadata": {
+ "id": "YMmn6sNqu7qF"
+ },
+ "execution_count": 20,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "y.shape"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "XsUjnIOdvBcZ",
+ "outputId": "7180db54-08f7-4636-e616-7b39c9f91d67"
+ },
+ "execution_count": 21,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "(4101,)"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 21
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "**Get Train Test Split**"
+ ],
+ "metadata": {
+ "id": "s7NVkZ68vFsD"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "from sklearn.model_selection import train_test_split"
+ ],
+ "metadata": {
+ "id": "eK4mZSEtvSZm"
+ },
+ "execution_count": 22,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.3, stratify = y, random_state=222529)"
+ ],
+ "metadata": {
+ "id": "uDpT5vYWvdOW"
+ },
+ "execution_count": 23,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "from sklearn.ensemble import RandomForestClassifier"
+ ],
+ "metadata": {
+ "id": "_F4fu-_9v7N7"
+ },
+ "execution_count": 24,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "rf = RandomForestClassifier(n_estimators=200)"
+ ],
+ "metadata": {
+ "id": "EZNlsbQPwEp0"
+ },
+ "execution_count": 25,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "rf.fit(X_train, y_train)"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "pA5plm8UwRG5",
+ "outputId": "b3d5378e-4ff0-4929-9761-6308f962414c"
+ },
+ "execution_count": 26,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "RandomForestClassifier(n_estimators=200)"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 26
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "y_pred = rf.predict(X_test)"
+ ],
+ "metadata": {
+ "id": "GrAsi80swkQV"
+ },
+ "execution_count": 27,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "from sklearn.metrics import classification_report,confusion_matrix,accuracy_score"
+ ],
+ "metadata": {
+ "id": "NSFE9b00wxx0"
+ },
+ "execution_count": 28,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "confusion_matrix(y_test, y_pred)"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "U9Rck6I-xBi9",
+ "outputId": "9193587b-1e3e-4583-bea2-f131c14fe50a"
+ },
+ "execution_count": 29,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "array([[117, 464],\n",
+ " [126, 524]])"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 29
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "print(classification_report(y_test, y_pred))"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "9w4qC505xNun",
+ "outputId": "32cb80a3-4ac9-473f-9f05-e4f64ce75aca"
+ },
+ "execution_count": 30,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ " precision recall f1-score support\n",
+ "\n",
+ " 0 0.48 0.20 0.28 581\n",
+ " 1 0.53 0.81 0.64 650\n",
+ "\n",
+ " accuracy 0.52 1231\n",
+ " macro avg 0.51 0.50 0.46 1231\n",
+ "weighted avg 0.51 0.52 0.47 1231\n",
+ "\n"
+ ]
+ }
+ ]
+ }
+ ]
+}