diff --git a/Computer Vision/Text_Extractor/EasyOCR/EasyOCR_output_pdf.pdf b/Computer Vision/Text_Extractor/EasyOCR/EasyOCR_output_pdf.pdf new file mode 100644 index 00000000..0d3d0a40 Binary files /dev/null and b/Computer Vision/Text_Extractor/EasyOCR/EasyOCR_output_pdf.pdf differ diff --git a/Computer Vision/Text_Extractor/EasyOCR/EasyOCR_output_text.txt b/Computer Vision/Text_Extractor/EasyOCR/EasyOCR_output_text.txt new file mode 100644 index 00000000..9b6172f5 --- /dev/null +++ b/Computer Vision/Text_Extractor/EasyOCR/EasyOCR_output_text.txt @@ -0,0 +1,4 @@ +SEE +LIGHT +In THE +DARKNESS \ No newline at end of file diff --git a/Computer Vision/Text_Extractor/EasyOCR/Easy_OCR_code.ipynb b/Computer Vision/Text_Extractor/EasyOCR/Easy_OCR_code.ipynb new file mode 100644 index 00000000..b3cca3eb --- /dev/null +++ b/Computer Vision/Text_Extractor/EasyOCR/Easy_OCR_code.ipynb @@ -0,0 +1,277 @@ +{ + "cells": [ + { + "cell_type": "code", + "source": [ + "!pip install easyocr" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "collapsed": true, + "id": "lsSJ-LCbo2w-", + "outputId": "8c6ff71f-2692-4e10-961d-497001938cc3" + }, + "execution_count": 1, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Collecting easyocr\n", + " Downloading easyocr-1.7.2-py3-none-any.whl.metadata (10 kB)\n", + "Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (from easyocr) (2.5.0+cu121)\n", + "Requirement already satisfied: torchvision>=0.5 in /usr/local/lib/python3.10/dist-packages (from easyocr) (0.20.0+cu121)\n", + "Requirement already satisfied: opencv-python-headless in /usr/local/lib/python3.10/dist-packages (from easyocr) (4.10.0.84)\n", + "Requirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (from easyocr) (1.13.1)\n", + "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from easyocr) (1.26.4)\n", + "Requirement already satisfied: Pillow in /usr/local/lib/python3.10/dist-packages (from easyocr) (10.4.0)\n", + "Requirement already satisfied: scikit-image in /usr/local/lib/python3.10/dist-packages (from easyocr) (0.24.0)\n", + "Collecting python-bidi (from easyocr)\n", + " Downloading python_bidi-0.6.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.9 kB)\n", + "Requirement already satisfied: PyYAML in /usr/local/lib/python3.10/dist-packages (from easyocr) (6.0.2)\n", + "Requirement already satisfied: Shapely in /usr/local/lib/python3.10/dist-packages (from easyocr) (2.0.6)\n", + "Collecting pyclipper (from easyocr)\n", + " Downloading pyclipper-1.3.0.post6-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl.metadata (9.0 kB)\n", + "Collecting ninja (from easyocr)\n", + " Downloading ninja-1.11.1.1-py2.py3-none-manylinux1_x86_64.manylinux_2_5_x86_64.whl.metadata (5.3 kB)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch->easyocr) (3.16.1)\n", + "Requirement already satisfied: typing-extensions>=4.8.0 in /usr/local/lib/python3.10/dist-packages (from torch->easyocr) (4.12.2)\n", + "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch->easyocr) (3.4.2)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch->easyocr) (3.1.4)\n", + "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from torch->easyocr) (2024.10.0)\n", + "Requirement already satisfied: sympy==1.13.1 in /usr/local/lib/python3.10/dist-packages (from torch->easyocr) (1.13.1)\n", + "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from sympy==1.13.1->torch->easyocr) (1.3.0)\n", + "Requirement already satisfied: imageio>=2.33 in /usr/local/lib/python3.10/dist-packages (from scikit-image->easyocr) (2.36.0)\n", + "Requirement already satisfied: tifffile>=2022.8.12 in /usr/local/lib/python3.10/dist-packages (from scikit-image->easyocr) (2024.9.20)\n", + "Requirement already satisfied: packaging>=21 in /usr/local/lib/python3.10/dist-packages (from scikit-image->easyocr) (24.1)\n", + "Requirement already satisfied: lazy-loader>=0.4 in /usr/local/lib/python3.10/dist-packages (from scikit-image->easyocr) (0.4)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch->easyocr) (3.0.2)\n", + "Downloading easyocr-1.7.2-py3-none-any.whl (2.9 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.9/2.9 MB\u001b[0m \u001b[31m48.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading ninja-1.11.1.1-py2.py3-none-manylinux1_x86_64.manylinux_2_5_x86_64.whl (307 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m307.2/307.2 kB\u001b[0m \u001b[31m22.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading pyclipper-1.3.0.post6-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (912 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m912.2/912.2 kB\u001b[0m \u001b[31m44.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading python_bidi-0.6.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (286 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m286.8/286.8 kB\u001b[0m \u001b[31m19.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hInstalling collected packages: python-bidi, pyclipper, ninja, easyocr\n", + "Successfully installed easyocr-1.7.2 ninja-1.11.1.1 pyclipper-1.3.0.post6 python-bidi-0.6.3\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "!pip install fpdf" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "RzslMvBT3m8B", + "outputId": "42040908-5521-4b4e-e453-f29357b1017b" + }, + "execution_count": 10, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Collecting fpdf\n", + " Downloading fpdf-1.7.2.tar.gz (39 kB)\n", + " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "Building wheels for collected packages: fpdf\n", + " Building wheel for fpdf (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for fpdf: filename=fpdf-1.7.2-py2.py3-none-any.whl size=40704 sha256=ba17c004c11322432e0424a6f6b30cf73adc4f92d3b4827fb4634d993ea06273\n", + " Stored in directory: /root/.cache/pip/wheels/f9/95/ba/f418094659025eb9611f17cbcaf2334236bf39a0c3453ea455\n", + "Successfully built fpdf\n", + "Installing collected packages: fpdf\n", + "Successfully installed fpdf-1.7.2\n" + ] + } + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "vj6ee7edoVGP" + }, + "outputs": [], + "source": [ + "from easyocr import Reader\n", + "from PIL import Image\n", + "from io import BytesIO\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "uCLVTVS6oVGR", + "outputId": "8230340d-b821-40da-cc6e-cbeaca35f28b" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "WARNING:easyocr.easyocr:Downloading detection model, please wait. This may take several minutes depending upon your network connection.\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Progress: |██████████████████████████████████████████████████| 100.0% Complete" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "WARNING:easyocr.easyocr:Downloading recognition model, please wait. This may take several minutes depending upon your network connection.\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Progress: |██████████████████████████████████████████████████| 100.0% Complete" + ] + } + ], + "source": [ + "# Loading model for the English language\n", + "language_reader = Reader([\"en\"])" + ] + }, + { + "cell_type": "code", + "source": [ + "def extract_txt_from_img(file_path):\n", + "\n", + " image = Image.open(file_path)\n", + "\n", + " image_np = np.array(image)\n", + " text = language_reader.readtext(image_np)\n", + " extracted_text = \"\\n\".join([result[1] for result in text])\n", + "\n", + " return extracted_text" + ], + "metadata": { + "id": "RypAbeHmsmfc" + }, + "execution_count": 4, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "filepath = \"testimg6.jfif\"\n", + "img_to_text = extract_txt_from_img(filepath)\n", + "print(img_to_text)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "TJGr3qmvtLmJ", + "outputId": "6ebf19ca-1c31-41e3-8bb0-c458d3993ad7" + }, + "execution_count": 5, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "SEE\n", + "LIGHT\n", + "In THE\n", + "DARKNESS\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "# making text file\n", + "def save_text_as_file(text, text_file_path):\n", + " with open(text_file_path, 'w') as file:\n", + " file.write(text)" + ], + "metadata": { + "id": "d7Cs1Hvw3Rfq" + }, + "execution_count": 6, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "save_text_as_file(img_to_text, \"/content/EasyOCR_output_text.txt\")" + ], + "metadata": { + "id": "k3VnYn-d3W67" + }, + "execution_count": 9, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# making pdf file\n", + "from fpdf import FPDF\n", + "def save_text_as_pdf(text, pdf_file_path):\n", + " pdf = FPDF()\n", + " pdf.set_auto_page_break(auto=True, margin=15)\n", + " pdf.add_page()\n", + "\n", + " pdf.set_font(\"Arial\", size=12)\n", + " pdf.multi_cell(0, 10, text)\n", + "\n", + " pdf.output(pdf_file_path)" + ], + "metadata": { + "id": "ifn93tls3r7Z" + }, + "execution_count": 11, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "save_text_as_pdf(img_to_text, \"/content/EasyOCR_output_pdf.pdf\")" + ], + "metadata": { + "id": "Cg6zo_Ew3v6l" + }, + "execution_count": 12, + "outputs": [] + } + ], + "metadata": { + "language_info": { + "name": "python" + }, + "colab": { + "provenance": [], + "gpuType": "T4" + }, + "accelerator": "GPU", + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/Computer Vision/Text_Extractor/EasyOCR/easyOCR_output.png b/Computer Vision/Text_Extractor/EasyOCR/easyOCR_output.png new file mode 100644 index 00000000..c06bc6c3 Binary files /dev/null and b/Computer Vision/Text_Extractor/EasyOCR/easyOCR_output.png differ diff --git a/Computer Vision/Text_Extractor/EasyOCR/testimg_easyOCR.jfif b/Computer Vision/Text_Extractor/EasyOCR/testimg_easyOCR.jfif new file mode 100644 index 00000000..70478498 Binary files /dev/null and b/Computer Vision/Text_Extractor/EasyOCR/testimg_easyOCR.jfif differ diff --git a/Computer Vision/Text_Extractor/Tesseract/Tesseract_output_pdf.pdf b/Computer Vision/Text_Extractor/Tesseract/Tesseract_output_pdf.pdf new file mode 100644 index 00000000..c0a67625 Binary files /dev/null and b/Computer Vision/Text_Extractor/Tesseract/Tesseract_output_pdf.pdf differ diff --git a/Computer Vision/Text_Extractor/Tesseract/Tesseract_output_text.txt b/Computer Vision/Text_Extractor/Tesseract/Tesseract_output_text.txt new file mode 100644 index 00000000..295f545d --- /dev/null +++ b/Computer Vision/Text_Extractor/Tesseract/Tesseract_output_text.txt @@ -0,0 +1,4 @@ +The Quick Brown +Fox Jumps Over +The Lazy Dog + \ No newline at end of file diff --git a/Computer Vision/Text_Extractor/Tesseract/Test_img_tessseract.png b/Computer Vision/Text_Extractor/Tesseract/Test_img_tessseract.png new file mode 100644 index 00000000..504cadb9 Binary files /dev/null and b/Computer Vision/Text_Extractor/Tesseract/Test_img_tessseract.png differ diff --git a/Computer Vision/Text_Extractor/Tesseract/pyTesseract_code.ipynb b/Computer Vision/Text_Extractor/Tesseract/pyTesseract_code.ipynb new file mode 100644 index 00000000..3d079480 --- /dev/null +++ b/Computer Vision/Text_Extractor/Tesseract/pyTesseract_code.ipynb @@ -0,0 +1,204 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "gpuType": "T4" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + }, + "accelerator": "GPU" + }, + "cells": [ + { + "cell_type": "code", + "source": [ + "!apt update\n", + "!apt install -y tesseract-ocr\n", + "!pip install pytesseract" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "5gr0jxeKxBTH", + "outputId": "b4169741-3631-47b0-cb58-6ef999d07a0a" + }, + "execution_count": 14, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001b[33m\r0% [Working]\u001b[0m\r \rHit:1 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease\n", + "Hit:2 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64 InRelease\n", + "Hit:3 http://security.ubuntu.com/ubuntu jammy-security InRelease\n", + "Hit:4 http://archive.ubuntu.com/ubuntu jammy InRelease\n", + "Hit:5 http://archive.ubuntu.com/ubuntu jammy-updates InRelease\n", + "Hit:6 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease\n", + "Hit:7 http://archive.ubuntu.com/ubuntu jammy-backports InRelease\n", + "Hit:8 https://ppa.launchpadcontent.net/graphics-drivers/ppa/ubuntu jammy InRelease\n", + "Hit:9 https://ppa.launchpadcontent.net/ubuntugis/ppa/ubuntu jammy InRelease\n", + "Hit:10 https://r2u.stat.illinois.edu/ubuntu jammy InRelease\n", + "Reading package lists... Done\n", + "Building dependency tree... Done\n", + "Reading state information... Done\n", + "52 packages can be upgraded. Run 'apt list --upgradable' to see them.\n", + "\u001b[1;33mW: \u001b[0mSkipping acquire of configured file 'main/source/Sources' as repository 'https://r2u.stat.illinois.edu/ubuntu jammy InRelease' does not seem to provide it (sources.list entry misspelt?)\u001b[0m\n", + "Reading package lists... Done\n", + "Building dependency tree... Done\n", + "Reading state information... Done\n", + "tesseract-ocr is already the newest version (4.1.1-2.1build1).\n", + "0 upgraded, 0 newly installed, 0 to remove and 52 not upgraded.\n", + "Requirement already satisfied: pytesseract in /usr/local/lib/python3.10/dist-packages (0.3.13)\n", + "Requirement already satisfied: packaging>=21.3 in /usr/local/lib/python3.10/dist-packages (from pytesseract) (24.1)\n", + "Requirement already satisfied: Pillow>=8.0.0 in /usr/local/lib/python3.10/dist-packages (from pytesseract) (10.4.0)\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "!pip install fpdf" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "mc-mJHTe1fZ7", + "outputId": "694a3e50-4fc8-449e-aed7-d5731fff9ba8" + }, + "execution_count": 27, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Collecting fpdf\n", + " Downloading fpdf-1.7.2.tar.gz (39 kB)\n", + " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "Building wheels for collected packages: fpdf\n", + " Building wheel for fpdf (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for fpdf: filename=fpdf-1.7.2-py2.py3-none-any.whl size=40704 sha256=c65af2b316c1958222f6599226e3149893b294e54f4676f444a06654e0838f9f\n", + " Stored in directory: /root/.cache/pip/wheels/f9/95/ba/f418094659025eb9611f17cbcaf2334236bf39a0c3453ea455\n", + "Successfully built fpdf\n", + "Installing collected packages: fpdf\n", + "Successfully installed fpdf-1.7.2\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "import pytesseract\n", + "from PIL import Image\n", + "\n", + "pytesseract.pytesseract.tesseract_cmd = r'/usr/bin/tesseract'\n", + "\n", + "def extract_text_with_pytesseract(filepath):\n", + " image = Image.open(filepath)\n", + " text = pytesseract.image_to_string(image)\n", + " return text" + ], + "metadata": { + "id": "TA_Nn1S4zAJh" + }, + "execution_count": 17, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Example usage\n", + "filepath = \"imgTest2.png\"\n", + "img_to_text = extract_text_with_pytesseract(filepath)\n", + "print(img_to_text)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "vfjdPeVmzIl7", + "outputId": "7e5e0afc-3f5b-447d-9097-f26a5ac40b3f" + }, + "execution_count": 20, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "The Quick Brown\n", + "Fox Jumps Over\n", + "The Lazy Dog\n", + "\f\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "# making text file\n", + "def save_text_as_file(text, text_file_path):\n", + " with open(text_file_path, 'w') as file:\n", + " file.write(text)" + ], + "metadata": { + "id": "FjO67ZpTzqv_" + }, + "execution_count": 26, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "save_text_as_file(img_to_text, \"/Tesseract_output_text.txt\")" + ], + "metadata": { + "id": "1WnfddBA0J2W" + }, + "execution_count": 25, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# making pdf file\n", + "from fpdf import FPDF\n", + "def save_text_as_pdf(text, pdf_file_path):\n", + " pdf = FPDF()\n", + " pdf.set_auto_page_break(auto=True, margin=15)\n", + " pdf.add_page()\n", + "\n", + " pdf.set_font(\"Arial\", size=12)\n", + " pdf.multi_cell(0, 10, text)\n", + "\n", + " pdf.output(pdf_file_path)" + ], + "metadata": { + "id": "SotTZ6KC1Sqo" + }, + "execution_count": 30, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "save_text_as_pdf(img_to_text, \"/Tesseract_output_pdf.pdf\")" + ], + "metadata": { + "id": "KtZa-7581r3m" + }, + "execution_count": 32, + "outputs": [] + } + ] +} \ No newline at end of file diff --git a/Computer Vision/Text_Extractor/Tesseract/tesseract_output.png b/Computer Vision/Text_Extractor/Tesseract/tesseract_output.png new file mode 100644 index 00000000..2d44c9ed Binary files /dev/null and b/Computer Vision/Text_Extractor/Tesseract/tesseract_output.png differ diff --git a/Computer Vision/Text_Extractor/readme.md b/Computer Vision/Text_Extractor/readme.md new file mode 100644 index 00000000..0555bc59 --- /dev/null +++ b/Computer Vision/Text_Extractor/readme.md @@ -0,0 +1,23 @@ +I have a text extractor, to extract texts from images of any extension(png, jpg, jpeg, etc). +I have done this using two different approaches : + +1. PyTesseract: It is a Python wrapper for Google's Tesseract-OCR Engine. + It's widely used for text extraction from clear, high-quality images. + PyTesseract works well with images where the text is easily distinguishable from the background. + + Key Features: + - Works best with clear, high-contrast images. + - Good for images with clean and well-spaced text. + - Simple and fast for high-quality documents. + +2. EasyOCR : It is a deep learning-based OCR library that supports over 80 languages. + It’s great for noisy, low-quality images where PyTesseract may struggle. + EasyOCR uses more advanced algorithms, making it better at handling blurry text, noisy backgrounds, and images with distorted or handwritten text. + + Key Features: + - Works better on noisy or complex images. + - Supports multiple languages and scripts. + - Handles skewed, rotated, and non-uniform text layout better. + - Can extract text from both printed and handwritten sources. + +