Add Swarmauri PytesseractImg2Text community package

swarmauri · Jan 14, 2025 · 58f440a · 58f440a
1 parent 877c8f5
commit 58f440a
Show file tree

Hide file tree

Showing 6 changed files with 398 additions and 2 deletions.
diff --git a/pkgs/community/swarmauri_community/pyproject.toml b/pkgs/community/swarmauri_community/pyproject.toml
@@ -30,7 +30,7 @@ captcha = "^0.6.0"
 #folium = { version = "^0.18.0", optional = true }
 #gensim = { version = "^4.3.3", optional = true }
 #gradio = { version = "^5.4.0", optional = true }
-leptonai = { version = "^0.22.0", optional = true }
+#leptonai = { version = "^0.22.0", optional = true }
 
 neo4j = { version = "^5.25.0", optional = true }
 #nltk = { version = "^3.9.1", optional = true }
@@ -48,7 +48,7 @@ typing_extensions = "^4.12.2"
 #tiktoken = { version = "^0.8.0", optional = true }
 PyMuPDF = { version = "^1.24.12", optional = true }
 #qdrant-client = { version = "^1.12.0", optional = true }
-pinecone-client = { version = "^5.0.1", optional = true, extras = ["grpc"] }
+#pinecone-client = { version = "^5.0.1", optional = true, extras = ["grpc"] }
 pypdf = { version = "^5.0.1", optional = true }
 pypdftk = { version = "^0.5", optional = true }
 weaviate-client = { version = "^4.9.2", optional = true }

diff --git a/pkgs/community/swarmauri_llm_communitypytesseractImg2text/README.md b/pkgs/community/swarmauri_llm_communitypytesseractImg2text/README.md
@@ -0,0 +1 @@
+# Swarmauri Example Community Package
diff --git a/pkgs/community/swarmauri_llm_communitypytesseractImg2text/pyproject.toml b/pkgs/community/swarmauri_llm_communitypytesseractImg2text/pyproject.toml
@@ -0,0 +1,57 @@
+[tool.poetry]
+name = "swarmauri_llm_communitypytesseractImg2text"
+version = "0.6.0.dev1"
+description = "PytesseractImg2Text Model"
+authors = ["Jacob Stewart <[email protected]>"]
+license = "Apache-2.0"
+readme = "README.md"
+repository = "http://github.com/swarmauri/swarmauri-sdk"
+classifiers = [
+    "License :: OSI Approved :: Apache Software License",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12"
+]
+
+[tool.poetry.dependencies]
+python = ">=3.10,<3.13"
+
+# Swarmauri
+swarmauri_core = { path = "../../core" }
+swarmauri_base = { path = "../../base" }
+
+# Dependencies
+pytesseract = "^0.3.13"
+
+
+[tool.poetry.group.dev.dependencies]
+flake8 = "^7.0"
+pytest = "^8.0"
+pytest-asyncio = ">=0.24.0"
+pytest-xdist = "^3.6.1"
+pytest-json-report = "^1.5.0"
+python-dotenv = "*"
+requests = "^2.32.3"
+
+[build-system]
+requires = ["poetry-core>=1.0.0"]
+build-backend = "poetry.core.masonry.api"
+
+[tool.pytest.ini_options]
+norecursedirs = ["combined", "scripts"]
+
+markers = [
+    "test: standard test",
+    "unit: Unit tests",
+    "integration: Integration tests",
+    "acceptance: Acceptance tests",
+    "experimental: Experimental tests"
+]
+log_cli = true
+log_cli_level = "INFO"
+log_cli_format = "%(asctime)s [%(levelname)s] %(message)s"
+log_cli_date_format = "%Y-%m-%d %H:%M:%S"
+asyncio_default_fixture_loop_scope = "function"
+
+[tool.poetry.plugins."swarmauri.agents"]
+ExampleCommunityAgent = "swm_example_community_package.ExampleCommunityAgent:ExampleCommunityAgent"
diff --git a/...ytesseractImg2text/swarmauri_llm_communitypytesseractImg2text/PytesseractImg2TextModel.py b/...ytesseractImg2text/swarmauri_llm_communitypytesseractImg2text/PytesseractImg2TextModel.py
@@ -0,0 +1,162 @@
+import os
+import asyncio
+from typing import List, Literal, Union
+from pydantic import Field, ConfigDict
+from PIL import Image
+import pytesseract
+from io import BytesIO
+from swarmauri_base.llms.LLMBase import LLMBase
+
+
+class PytesseractImg2TextModel(LLMBase):
+    """
+    A model for performing OCR (Optical Character Recognition) using Pytesseract.
+    It can process both local images and image bytes, returning extracted text.
+    Requires Tesseract-OCR to be installed on the system.
+    """
+
+    tesseract_cmd: str = Field(
+        default_factory=lambda: os.environ.get(
+            "TESSERACT_CMD",
+            ("/usr/bin/tesseract" if os.path.exists("/usr/bin/tesseract") else None),
+        )
+    )
+    type: Literal["PytesseractImg2TextModel"] = "PytesseractImg2TextModel"
+    language: str = Field(default="eng")
+    config: str = Field(default="")  # Custom configuration string
+    model_config = ConfigDict(protected_namespaces=())
+
+    def __init__(self, **data):
+        super().__init__(**data)
+        pytesseract.pytesseract.tesseract_cmd = self.tesseract_cmd
+
+    def _process_image(self, image: Union[str, bytes, Image.Image], **kwargs) -> str:
+        """Process an image and return extracted text."""
+        try:
+            # Handle different input types
+            if isinstance(image, str):
+                # If image is a file path
+                img = Image.open(image)
+            elif isinstance(image, bytes):
+                # If image is bytes
+                img = Image.open(BytesIO(image))
+            elif isinstance(image, Image.Image):
+                # If image is already a PIL Image
+                img = image
+            else:
+                raise ValueError("Unsupported image format")
+
+            # Extract text using pytesseract
+            custom_config = kwargs.get("config", self.config)
+            lang = kwargs.get("language", self.language)
+
+            text = pytesseract.image_to_string(img, lang=lang, config=custom_config)
+
+            return text.strip()
+
+        except Exception as e:
+            raise Exception(f"OCR processing failed: {str(e)}")
+
+    def extract_text(self, image: Union[str, bytes, Image.Image], **kwargs) -> str:
+        """
+        Extracts text from an image.
+
+        Args:
+            image: Can be a file path, bytes, or PIL Image
+            **kwargs: Additional arguments for OCR processing
+                     - language: OCR language (e.g., 'eng', 'fra', etc.)
+                     - config: Custom Tesseract configuration string
+
+        Returns:
+            Extracted text as string
+        """
+        return self._process_image(image, **kwargs)
+
+    async def aextract_text(
+        self, image: Union[str, bytes, Image.Image], **kwargs
+    ) -> str:
+        """
+        Asynchronously extracts text from an image.
+        """
+        loop = asyncio.get_event_loop()
+        return await loop.run_in_executor(None, self.extract_text, image, **kwargs)
+
+    def batch(
+        self, images: List[Union[str, bytes, Image.Image]], **kwargs
+    ) -> List[str]:
+        """
+        Process multiple images in batch.
+
+        Args:
+            images: List of images (file paths, bytes, or PIL Images)
+            **kwargs: Additional arguments for OCR processing
+
+        Returns:
+            List of extracted texts
+        """
+        results = []
+        for image in images:
+            text = self.extract_text(image=image, **kwargs)
+            results.append(text)
+        return results
+
+    async def abatch(
+        self,
+        images: List[Union[str, bytes, Image.Image]],
+        max_concurrent: int = 5,
+        **kwargs,
+    ) -> List[str]:
+        """
+        Asynchronously process multiple images in batch.
+
+        Args:
+            images: List of images (file paths, bytes, or PIL Images)
+            max_concurrent: Maximum number of concurrent operations
+            **kwargs: Additional arguments for OCR processing
+
+        Returns:
+            List of extracted texts
+        """
+        semaphore = asyncio.Semaphore(max_concurrent)
+
+        async def process_image(image):
+            async with semaphore:
+                return await self.aextract_text(image=image, **kwargs)
+
+        tasks = [process_image(image) for image in images]
+        return await asyncio.gather(*tasks)
+
+    def get_supported_languages(self) -> List[str]:
+        """
+        Returns a list of supported languages by executing 'tesseract --list-langs' command.
+
+        Returns:
+            List[str]: List of available language codes (e.g., ['eng', 'osd'])
+
+        Raises:
+            Exception: If the command execution fails or returns unexpected output
+        """
+        try:
+            # Execute tesseract command to list languages
+            import subprocess
+
+            result = subprocess.run(
+                [self.tesseract_cmd, "--list-langs"],
+                capture_output=True,
+                text=True,
+                check=True,
+            )
+
+            # Parse the output
+            output_lines = result.stdout.strip().split("\n")
+
+            # Skip the first line which is the directory info
+            # and filter out empty lines
+            languages = [lang.strip() for lang in output_lines[1:] if lang.strip()]
+
+            return languages
+
+        except subprocess.CalledProcessError as e:
+            raise Exception(f"Failed to get language list from Tesseract: {e.stderr}")
+        except Exception as e:
+            raise Exception(f"Error getting supported languages: {str(e)}")
diff --git a/...i_llm_communitypytesseractImg2text/swarmauri_llm_communitypytesseractImg2text/__init__.py b/...i_llm_communitypytesseractImg2text/swarmauri_llm_communitypytesseractImg2text/__init__.py
@@ -0,0 +1,12 @@
+from .PytesseractImg2TextModel import PytesseractImg2TextModel
+
+__version__ = "0.6.0.dev26"
+__long_desc__ = """
+
+# Swarmauri PytesseractImg2Text Model Plugin
+
+Visit us at: https://swarmauri.com
+Follow us at: https://github.com/swarmauri
+Star us at: https://github.com/swarmauri/swarmauri-sdk
+
+"""