Skip to content

Commit

Permalink
Add optionnal parameters for Tesseract OCR (#1154)
Browse files Browse the repository at this point in the history
  • Loading branch information
axel-m-d authored Dec 6, 2024
1 parent 445c1fa commit 283bb92
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 8 deletions.
2 changes: 2 additions & 0 deletions packages/core/src/RPA/core/locators/containers.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,8 @@ class OcrLocator(Locator):
confidence: Optional[float] = None
"""3-character ISO 639-2 language code. Passed to pytesseract lang parameter."""
language: Optional[str] = None
"""Tesseract specific parameters (like psm or oem). Passed to pytesserect config parameter."""
configuration: Optional[str] = None

def __post_init__(self):
self.text = str(self.text)
Expand Down
5 changes: 4 additions & 1 deletion packages/main/src/RPA/Desktop/keywords/finder.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,12 +199,14 @@ def _find_ocr(self, base: Geometry, locator: OcrLocator) -> List[Region]:

confidence = locator.confidence or self.confidence
language = locator.language
configuration = locator.configuration
self.logger.info(
"Searching for text '%s' (region: %s, confidence: %.1f, language: %s)",
"Searching for text '%s' (region: %s, confidence: %.1f, language: %s, configuration: %s)",
locator.text,
region or "display",
confidence,
language or "Not set",
configuration or "Not set",
)

def finder(image: Image.Image) -> List[Region]:
Expand All @@ -214,6 +216,7 @@ def finder(image: Image.Image) -> List[Region]:
confidence=confidence,
region=region,
language=language,
configuration=configuration,
)

return [match["region"] for match in matches]
Expand Down
22 changes: 18 additions & 4 deletions packages/main/src/RPA/Desktop/keywords/text.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,19 @@ class TextKeywords(LibraryContext):
"""Keywords for reading screen information and content."""

@keyword
def read_text(self, locator: Optional[str] = None, invert: bool = False):
def read_text(self, locator: Optional[str] = None, invert: bool = False, language: str = None, configuration: str = None):
"""Read text using OCR from the screen, or an area of the
screen defined by the given locator.
:param locator: Location of element to read text from
:param invert: Invert image colors, useful for reading white text
on dark background
:param language: 3-character ISO 639-2 language code of the text.
This is passed directly to the pytesseract lib in the lang parameter.
See https://tesseract-ocr.github.io/tessdoc/Command-Line-Usage.html#using-one-language
:param configuration: Tesseract specific parameters like Page Segmentation Modes(psm) or OCR Engine Mode (oem).
This is passed directly to the pytesseract lib in the config parameter.
See https://tesseract-ocr.github.io/tessdoc/Command-Line-Usage.html
Usage examples:
Expand All @@ -51,19 +57,27 @@ def read_text(self, locator: Optional[str] = None, invert: bool = False):
if not isinstance(element, Region):
raise ValueError("Locator must resolve to a region")

self.logger.info("Reading text from element: %s", element)
area = "element: %s" % element
image = screen.grab(element)
else:
self.logger.info("Reading text from screen")
area = "screen"
image = screen.grab()

screen.log_image(image)

if invert:
image = ImageOps.invert(image)

self.logger.info(
"Reading text from %s (invert: %s, language: %s, configuration: %s)",
area,
invert or "Not set",
language or "Not set",
configuration or "Not set",
)

start_time = time.time()
text = ocr.read(image)
text = ocr.read(image, language, configuration)
self.logger.info("Read text in %.2f seconds", time.time() - start_time)

return text
17 changes: 14 additions & 3 deletions packages/recognition/src/RPA/recognition/ocr.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,25 @@
DEFAULT_CONFIDENCE = 80.0


def read(image: Union[Image.Image, Path]):
def read(
image: Union[Image.Image, Path],
language: Optional[str] = None,
configuration: Optional[str] = None
):
"""Scan image for text and return it as one string.
:param image: Path to image or Image object
:param language: 3-character ISO 639-2 language code of the text.
This is passed directly to the pytesseract lib in the lang parameter.
See https://tesseract-ocr.github.io/tessdoc/Command-Line-Usage.html#using-one-language
:param configuration: Tesseract specific parameters like Page Segmentation Modes(psm) or OCR Engine Mode (oem).
This is passed directly to the pytesseract lib in the config parameter.
See https://tesseract-ocr.github.io/tessdoc/Command-Line-Usage.html
"""
image = to_image(image)

try:
return pytesseract.image_to_string(image).strip()
return pytesseract.image_to_string(image, lang=language, config=configuration).strip()
except TesseractNotFoundError as err:
raise EnvironmentError(INSTALL_PROMPT) from err

Expand All @@ -42,6 +52,7 @@ def find(
confidence: float = DEFAULT_CONFIDENCE,
region: Optional[Region] = None,
language: Optional[str] = None,
configuration: Optional[str] = None
):
"""Scan image for text and return a list of regions
that contain it (or something close to it).
Expand All @@ -67,7 +78,7 @@ def find(

try:
data = pytesseract.image_to_data(
image, lang=language, output_type=pytesseract.Output.DICT
image, lang=language, config=configuration, output_type=pytesseract.Output.DICT
)
except TesseractNotFoundError as err:
raise EnvironmentError(INSTALL_PROMPT) from err
Expand Down

0 comments on commit 283bb92

Please sign in to comment.