-
Notifications
You must be signed in to change notification settings - Fork 43
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add Swarmauri PytesseractImg2Text community package
- Loading branch information
1 parent
877c8f5
commit 58f440a
Showing
6 changed files
with
398 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
1 change: 1 addition & 0 deletions
1
pkgs/community/swarmauri_llm_communitypytesseractImg2text/README.md
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
# Swarmauri Example Community Package |
57 changes: 57 additions & 0 deletions
57
pkgs/community/swarmauri_llm_communitypytesseractImg2text/pyproject.toml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
[tool.poetry] | ||
name = "swarmauri_llm_communitypytesseractImg2text" | ||
version = "0.6.0.dev1" | ||
description = "PytesseractImg2Text Model" | ||
authors = ["Jacob Stewart <[email protected]>"] | ||
license = "Apache-2.0" | ||
readme = "README.md" | ||
repository = "http://github.com/swarmauri/swarmauri-sdk" | ||
classifiers = [ | ||
"License :: OSI Approved :: Apache Software License", | ||
"Programming Language :: Python :: 3.10", | ||
"Programming Language :: Python :: 3.11", | ||
"Programming Language :: Python :: 3.12" | ||
] | ||
|
||
[tool.poetry.dependencies] | ||
python = ">=3.10,<3.13" | ||
|
||
# Swarmauri | ||
swarmauri_core = { path = "../../core" } | ||
swarmauri_base = { path = "../../base" } | ||
|
||
# Dependencies | ||
pytesseract = "^0.3.13" | ||
|
||
|
||
[tool.poetry.group.dev.dependencies] | ||
flake8 = "^7.0" | ||
pytest = "^8.0" | ||
pytest-asyncio = ">=0.24.0" | ||
pytest-xdist = "^3.6.1" | ||
pytest-json-report = "^1.5.0" | ||
python-dotenv = "*" | ||
requests = "^2.32.3" | ||
|
||
[build-system] | ||
requires = ["poetry-core>=1.0.0"] | ||
build-backend = "poetry.core.masonry.api" | ||
|
||
[tool.pytest.ini_options] | ||
norecursedirs = ["combined", "scripts"] | ||
|
||
markers = [ | ||
"test: standard test", | ||
"unit: Unit tests", | ||
"integration: Integration tests", | ||
"acceptance: Acceptance tests", | ||
"experimental: Experimental tests" | ||
] | ||
log_cli = true | ||
log_cli_level = "INFO" | ||
log_cli_format = "%(asctime)s [%(levelname)s] %(message)s" | ||
log_cli_date_format = "%Y-%m-%d %H:%M:%S" | ||
asyncio_default_fixture_loop_scope = "function" | ||
|
||
[tool.poetry.plugins."swarmauri.agents"] | ||
ExampleCommunityAgent = "swm_example_community_package.ExampleCommunityAgent:ExampleCommunityAgent" |
162 changes: 162 additions & 0 deletions
162
...ytesseractImg2text/swarmauri_llm_communitypytesseractImg2text/PytesseractImg2TextModel.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,162 @@ | ||
import os | ||
import asyncio | ||
from typing import List, Literal, Union | ||
from pydantic import Field, ConfigDict | ||
from PIL import Image | ||
import pytesseract | ||
from io import BytesIO | ||
from swarmauri_base.llms.LLMBase import LLMBase | ||
|
||
|
||
class PytesseractImg2TextModel(LLMBase): | ||
""" | ||
A model for performing OCR (Optical Character Recognition) using Pytesseract. | ||
It can process both local images and image bytes, returning extracted text. | ||
Requires Tesseract-OCR to be installed on the system. | ||
""" | ||
|
||
tesseract_cmd: str = Field( | ||
default_factory=lambda: os.environ.get( | ||
"TESSERACT_CMD", | ||
("/usr/bin/tesseract" if os.path.exists("/usr/bin/tesseract") else None), | ||
) | ||
) | ||
type: Literal["PytesseractImg2TextModel"] = "PytesseractImg2TextModel" | ||
language: str = Field(default="eng") | ||
config: str = Field(default="") # Custom configuration string | ||
model_config = ConfigDict(protected_namespaces=()) | ||
|
||
def __init__(self, **data): | ||
super().__init__(**data) | ||
pytesseract.pytesseract.tesseract_cmd = self.tesseract_cmd | ||
|
||
def _process_image(self, image: Union[str, bytes, Image.Image], **kwargs) -> str: | ||
"""Process an image and return extracted text.""" | ||
try: | ||
# Handle different input types | ||
if isinstance(image, str): | ||
# If image is a file path | ||
img = Image.open(image) | ||
elif isinstance(image, bytes): | ||
# If image is bytes | ||
img = Image.open(BytesIO(image)) | ||
elif isinstance(image, Image.Image): | ||
# If image is already a PIL Image | ||
img = image | ||
else: | ||
raise ValueError("Unsupported image format") | ||
|
||
# Extract text using pytesseract | ||
custom_config = kwargs.get("config", self.config) | ||
lang = kwargs.get("language", self.language) | ||
|
||
text = pytesseract.image_to_string(img, lang=lang, config=custom_config) | ||
|
||
return text.strip() | ||
|
||
except Exception as e: | ||
raise Exception(f"OCR processing failed: {str(e)}") | ||
|
||
def extract_text(self, image: Union[str, bytes, Image.Image], **kwargs) -> str: | ||
""" | ||
Extracts text from an image. | ||
Args: | ||
image: Can be a file path, bytes, or PIL Image | ||
**kwargs: Additional arguments for OCR processing | ||
- language: OCR language (e.g., 'eng', 'fra', etc.) | ||
- config: Custom Tesseract configuration string | ||
Returns: | ||
Extracted text as string | ||
""" | ||
return self._process_image(image, **kwargs) | ||
|
||
async def aextract_text( | ||
self, image: Union[str, bytes, Image.Image], **kwargs | ||
) -> str: | ||
""" | ||
Asynchronously extracts text from an image. | ||
""" | ||
loop = asyncio.get_event_loop() | ||
return await loop.run_in_executor(None, self.extract_text, image, **kwargs) | ||
|
||
def batch( | ||
self, images: List[Union[str, bytes, Image.Image]], **kwargs | ||
) -> List[str]: | ||
""" | ||
Process multiple images in batch. | ||
Args: | ||
images: List of images (file paths, bytes, or PIL Images) | ||
**kwargs: Additional arguments for OCR processing | ||
Returns: | ||
List of extracted texts | ||
""" | ||
results = [] | ||
for image in images: | ||
text = self.extract_text(image=image, **kwargs) | ||
results.append(text) | ||
return results | ||
|
||
async def abatch( | ||
self, | ||
images: List[Union[str, bytes, Image.Image]], | ||
max_concurrent: int = 5, | ||
**kwargs, | ||
) -> List[str]: | ||
""" | ||
Asynchronously process multiple images in batch. | ||
Args: | ||
images: List of images (file paths, bytes, or PIL Images) | ||
max_concurrent: Maximum number of concurrent operations | ||
**kwargs: Additional arguments for OCR processing | ||
Returns: | ||
List of extracted texts | ||
""" | ||
semaphore = asyncio.Semaphore(max_concurrent) | ||
|
||
async def process_image(image): | ||
async with semaphore: | ||
return await self.aextract_text(image=image, **kwargs) | ||
|
||
tasks = [process_image(image) for image in images] | ||
return await asyncio.gather(*tasks) | ||
|
||
def get_supported_languages(self) -> List[str]: | ||
""" | ||
Returns a list of supported languages by executing 'tesseract --list-langs' command. | ||
Returns: | ||
List[str]: List of available language codes (e.g., ['eng', 'osd']) | ||
Raises: | ||
Exception: If the command execution fails or returns unexpected output | ||
""" | ||
try: | ||
# Execute tesseract command to list languages | ||
import subprocess | ||
|
||
result = subprocess.run( | ||
[self.tesseract_cmd, "--list-langs"], | ||
capture_output=True, | ||
text=True, | ||
check=True, | ||
) | ||
|
||
# Parse the output | ||
output_lines = result.stdout.strip().split("\n") | ||
|
||
# Skip the first line which is the directory info | ||
# and filter out empty lines | ||
languages = [lang.strip() for lang in output_lines[1:] if lang.strip()] | ||
|
||
return languages | ||
|
||
except subprocess.CalledProcessError as e: | ||
raise Exception(f"Failed to get language list from Tesseract: {e.stderr}") | ||
except Exception as e: | ||
raise Exception(f"Error getting supported languages: {str(e)}") |
12 changes: 12 additions & 0 deletions
12
...i_llm_communitypytesseractImg2text/swarmauri_llm_communitypytesseractImg2text/__init__.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
from .PytesseractImg2TextModel import PytesseractImg2TextModel | ||
|
||
__version__ = "0.6.0.dev26" | ||
__long_desc__ = """ | ||
# Swarmauri PytesseractImg2Text Model Plugin | ||
Visit us at: https://swarmauri.com | ||
Follow us at: https://github.com/swarmauri | ||
Star us at: https://github.com/swarmauri/swarmauri-sdk | ||
""" |
Oops, something went wrong.