From 5a8253fbd678b5dfb2a3f40c5a8bc866d4cb3ac8 Mon Sep 17 00:00:00 2001 From: Omj69 <144990466+Omj69@users.noreply.github.com> Date: Thu, 17 Apr 2025 09:58:31 +0530 Subject: [PATCH] Add files via upload --- Fake_News_Detector_Improved.ipynb | 647 ++++++++++++++++++++++++++++++ 1 file changed, 647 insertions(+) create mode 100644 Fake_News_Detector_Improved.ipynb diff --git a/Fake_News_Detector_Improved.ipynb b/Fake_News_Detector_Improved.ipynb new file mode 100644 index 0000000..dbe7c65 --- /dev/null +++ b/Fake_News_Detector_Improved.ipynb @@ -0,0 +1,647 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 9, + "id": "b6edf3ba", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import re\n", + "import pickle\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.feature_extraction.text import TfidfVectorizer\n", + "from sklearn.naive_bayes import MultinomialNB\n", + "from sklearn.metrics import accuracy_score, confusion_matrix\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "a8b8f15a", + "metadata": {}, + "outputs": [], + "source": [ + "def preprocess(text):\n", + " text = text.lower()\n", + " text = re.sub(r'\\W', ' ', text)\n", + " text = re.sub(r'\\s+', ' ', text)\n", + " return text.strip()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "cb9f81aa", + "metadata": {}, + "outputs": [], + "source": [ + "# Load datasets\n", + "true = pd.read_csv('data/True.csv')\n", + "fake = pd.read_csv('data/Fake.csv')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "610f29ba", + "metadata": {}, + "outputs": [], + "source": [ + "# Add labels\n", + "true['label'] = 1\n", + "fake['label'] = 0\n" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "e5bd1bae", + "metadata": {}, + "outputs": [], + "source": [ + "# Preprocess\n", + "true['text'] = true['text'].apply(preprocess)\n", + "fake['text'] = fake['text'].apply(preprocess)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "7013c671", + "metadata": {}, + "outputs": [], + "source": [ + "# Balance dataset\n", + "min_len = min(len(true), len(fake))\n", + "true = true.sample(min_len, random_state=42)\n", + "fake = fake.sample(min_len, random_state=42)\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "90325fb1", + "metadata": {}, + "outputs": [], + "source": [ + "# Combine and shuffle\n", + "df = pd.concat([true, fake])\n", + "df = df.sample(frac=1, random_state=42).reset_index(drop=True)\n", + "\n", + "X = df['text']\n", + "y = df['label']" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "2565508c", + "metadata": {}, + "outputs": [], + "source": [ + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n", + "\n", + "# Improved vectorizer settings\n", + "vectorizer = TfidfVectorizer(stop_words='english', max_df=0.9, min_df=2, ngram_range=(1,2))\n", + "X_train_vec = vectorizer.fit_transform(X_train)\n", + "X_test_vec = vectorizer.transform(X_test)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "2fdcccc5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
MultinomialNB()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "MultinomialNB()" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model = MultinomialNB()\n", + "model.fit(X_train_vec, y_train)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "dcb27ef0", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Accuracy: 0.9540095716119995\n", + "Confusion Matrix:\n", + " [[3921 284]\n", + " [ 110 4252]]\n" + ] + } + ], + "source": [ + "y_pred = model.predict(X_test_vec)\n", + "print(\"Accuracy:\", accuracy_score(y_test, y_pred))\n", + "print(\"Confusion Matrix:\\n\", confusion_matrix(y_test, y_pred))\n" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "066dc8e6", + "metadata": {}, + "outputs": [], + "source": [ + "pickle.dump(model, open(\"model.pkl\", \"wb\"))\n", + "pickle.dump(vectorizer, open(\"vectorizer.pkl\", \"wb\"))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ed3218e5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'sample_news = [\\n \"NASA launches Artemis mission to return to the Moon.\",\\n \"Chocolate cures cancer, scientists claim in new study.\",\\n \"Government confirms alien contact in leaked documents.\",\\n \"Apple unveils new iPhone with revolutionary AI chip.\",\\n \"United Nations announces climate change mitigation fund.\",\\n \"Time traveler from 3030 visits Earth to warn of zombie apocalypse.\"\\n]\\n\\n# Preprocess and predict\\nprocessed = [preprocess(news) for news in sample_news]\\nvec = vectorizer.transform(processed)\\npreds = model.predict(vec)\\n\\nfor news, pred in zip(sample_news, preds):\\n print(f\"News: {news}\\nPrediction: {\\'Real News ✅\\' if pred == 1 else \\'Fake News ❌\\'}\\n\")\\n'" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Sample news list\n", + "\"\"\"sample_news = [\n", + " \"NASA launches Artemis mission to return to the Moon.\",\n", + " \"Chocolate cures cancer, scientists claim in new study.\",\n", + " \"Government confirms alien contact in leaked documents.\",\n", + " \"Apple unveils new iPhone with revolutionary AI chip.\",\n", + " \"United Nations announces climate change mitigation fund.\",\n", + " \"Time traveler from 3030 visits Earth to warn of zombie apocalypse.\"\n", + "]\n", + "\n", + "# Preprocess and predict\n", + "processed = [preprocess(news) for news in sample_news]\n", + "vec = vectorizer.transform(processed)\n", + "preds = model.predict(vec)\n", + "\n", + "for news, pred in zip(sample_news, preds):\n", + " print(f\"News: {news}\\nPrediction: {'Real News ✅' if pred == 1 else 'Fake News ❌'}\\n\")\n", + "\"\"\"" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}