UTSAVS26 · akashlogics · Oct 11, 2024 · ruhi47 · Nov 7, 2024
diff --git a/Beginner_Projects/quotes.toscrape.com/output.xlsx b/Beginner_Projects/quotes.toscrape.com/output.xlsx
diff --git a/Beginner_Projects/quotes.toscrape.com/readme.md b/Beginner_Projects/quotes.toscrape.com/readme.md
@@ -0,0 +1,53 @@
+A simple web scraping program using Python that retrieves data from a website. For this example, we will scrape quotes from [quotes.toscrape.com](http://quotes.toscrape.com/), which is a site specifically designed for practicing web scraping.
+
+### Simple Web Scraping Program
+
+#### Requirements
+You'll need to install the `requests` and `BeautifulSoup` libraries. You can do this using pip:
+
+```bash
+pip install requests beautifulsoup4
+```
+
+### Explanation
+1. **Import Libraries**: The program imports the `requests` library to handle HTTP requests and `BeautifulSoup` from `bs4` to parse HTML content.
+
+2. **Function Definition**: The `scrape_quotes()` function:
+   - Defines the URL of the site to scrape.
+   - Sends a GET request to fetch the webpage content.
+   - Checks if the response status code is 200 (OK).
+   - Parses the HTML content using BeautifulSoup.
+   - Finds all quote elements by searching for `div` tags with the class `quote`.
+   - Loops through each quote element, extracting the text and the author, and prints them.
+
+3. **Run the Scraper**: The last line calls the `scrape_quotes()` function to execute the scraping process.
+
+### How to Run the Program
+1. Ensure you have Python installed on your machine.
+2. Install the required libraries as mentioned above.
+3. Copy the provided code into a Python file (e.g., `scrape_quotes.py`).
+4. Run the script from your terminal or command prompt:
+
+   ```bash
+   python scrape_quotes.py
+   ```
+
+### Output
+The program will print out the quotes and their authors from the specified webpage, like this:
+
+```
+Quote: "The world as we have created it is a process of our thinking. It cannot be changed without changing our thinking."
+Author: Albert Einstein
+
+Quote: "It is our choices, Harry, that show what we truly are, far more than our abilities."
+Author: J.K. Rowling
+
+...
+```
+
+### Note
+- The example above is designed for educational purposes and uses a public website that allows scraping.
+- Always check a website's `robots.txt` file and terms of service to ensure that scraping is allowed.
+
+If you have any questions or need further help with web scraping, feel free to ask!
+Here's a simple script that scrapes quotes and their authors from the website:
diff --git a/Beginner_Projects/quotes.toscrape.com/scrap.ipynb b/Beginner_Projects/quotes.toscrape.com/scrap.ipynb
@@ -0,0 +1,86 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Quotes have been written to quotes.xlsx\n"
+     ]
+    }
+   ],
+   "source": [
+    "import requests\n",
+    "from bs4 import BeautifulSoup\n",
+    "import pandas as pd\n",
+    "\n",
+    "def scrape_quotes():\n",
+    "    # URL of the site to scrape\n",
+    "    url = 'http://quotes.toscrape.com/'\n",
+    "    \n",
+    "    # Send a GET request to fetch the webpage content\n",
+    "    response = requests.get(url)\n",
+    "\n",
+    "    # Check if the request was successful\n",
+    "    if response.status_code == 200:\n",
+    "        # Parse the webpage content\n",
+    "        soup = BeautifulSoup(response.text, 'html.parser')\n",
+    "\n",
+    "        # Find all quote elements\n",
+    "        quotes = soup.find_all('div', class_='quote')\n",
+    "\n",
+    "        # Lists to hold quotes and authors\n",
+    "        quotes_list = []\n",
+    "        authors_list = []\n",
+    "\n",
+    "        # Loop through the quotes and store the text and author\n",
+    "        for quote in quotes:\n",
+    "            text = quote.find('span', class_='text').text\n",
+    "            author = quote.find('small', class_='author').text\n",
+    "            quotes_list.append(text)\n",
+    "            authors_list.append(author)\n",
+    "\n",
+    "        # Create a DataFrame\n",
+    "        quotes_df = pd.DataFrame({\n",
+    "            'Quote': quotes_list,\n",
+    "            'Author': authors_list\n",
+    "        })\n",
+    "\n",
+    "        # Write the DataFrame to an Excel file\n",
+    "        quotes_df.to_excel('quotes.xlsx', index=False)\n",
+    "        print(\"Quotes have been written to quotes.xlsx\")\n",
+    "\n",
+    "    else:\n",
+    "        print(f'Failed to retrieve webpage. Status code: {response.status_code}')\n",
+    "\n",
+    "# Run the scraper\n",
+    "scrape_quotes()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}