Skip to content

Commit

Permalink
Merge pull request #16 from pythons/feature/support-simple-output
Browse files Browse the repository at this point in the history
Refactor OCR Result Processing: Simplified and Detailed Output Support
  • Loading branch information
straussmaximilian authored Oct 22, 2024
2 parents 6ba7d9c + 4efb960 commit a445319
Showing 1 changed file with 22 additions and 11 deletions.
33 changes: 22 additions & 11 deletions ocrmac/ocrmac.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def convert_coordinates_pil(bbox, im_width, im_height):


def text_from_image(
image, recognition_level="accurate", language_preference=None, confidence_threshold=0.0
image, recognition_level="accurate", language_preference=None, confidence_threshold=0.0, detail = True
) -> List[Tuple[str, float, Tuple[float, float, float, float]]]:
"""
Helper function to call VNRecognizeTextRequest from Apple's vision framework.
Expand All @@ -64,6 +64,7 @@ def text_from_image(
:param recognition_level: Recognition level. Defaults to 'accurate'.
:param language_preference: Language preference. Defaults to None.
:param confidence_threshold: Confidence threshold. Defaults to 0.0.
:param detail: Whether to return the bounding box or not. Defaults to True.
:returns: List of tuples containing the text, the confidence and the bounding box.
Each tuple looks like (text, confidence, (x, y, width, height))
Expand Down Expand Up @@ -117,26 +118,29 @@ def text_from_image(
res = []
if success:
for result in req.results():
bbox = result.boundingBox()
w, h = bbox.size.width, bbox.size.height
x, y = bbox.origin.x, bbox.origin.y

if result.confidence() >= confidence_threshold:
res.append((result.text(), result.confidence(), [x, y, w, h]))

confidence = result.confidence()
if confidence >= confidence_threshold:
if detail:
bbox = result.boundingBox()
x, y = bbox.origin.x, bbox.origin.y
w, h = bbox.size.width, bbox.size.height
res.append((result.text(), confidence, [x, y, w, h]))
else:
res.append(result.text())

return res


class OCR:
def __init__(self, image, recognition_level="accurate", language_preference=None, confidence_threshold=0.0):
def __init__(self, image, recognition_level="accurate", language_preference=None, confidence_threshold=0.0, detail=True):
"""OCR class to extract text from images.
Args:
image (str or PIL image): Path to image or PIL image.
recognition_level (str, optional): Recognition level. Defaults to 'accurate'.
language_preference (list, optional): Language preference. Defaults to None.
param confidence_threshold: Confidence threshold. Defaults to 0.0.
detail (bool, optional): Whether to return the bounding box or not. Defaults to True.
"""

if isinstance(image, str):
Expand All @@ -151,12 +155,13 @@ def __init__(self, image, recognition_level="accurate", language_preference=None
self.language_preference = language_preference
self.confidence_threshold = confidence_threshold
self.res = None
self.detail = detail

def recognize(
self, px=False
) -> List[Tuple[str, float, Tuple[float, float, float, float]]]:
res = text_from_image(
self.image, self.recognition_level, self.language_preference, self.confidence_threshold
self.image, self.recognition_level, self.language_preference, self.confidence_threshold, detail=self.detail
)
self.res = res

Expand Down Expand Up @@ -185,6 +190,9 @@ def annotate_matplotlib(
raise ImportError(
"Matplotlib is not available. Please install matplotlib to use this feature."
)

if not self.detail:
raise ValueError("Please set detail=True to use this feature.")

if self.res is None:
self.recognize()
Expand Down Expand Up @@ -214,6 +222,9 @@ def annotate_PIL(self, color="red", fontsize=12) -> Image.Image:
Returns:
_type_: _description_
"""

if not self.detail:
raise ValueError("Please set detail=True to use this feature.")

annotated_image = self.image.copy()

Expand Down

0 comments on commit a445319

Please sign in to comment.