From 283bb928b26a4c768cc037b572d172eaf73fb7ba Mon Sep 17 00:00:00 2001 From: axel-m-d <64596049+axel-m-d@users.noreply.github.com> Date: Fri, 6 Dec 2024 11:57:20 +0100 Subject: [PATCH] Add optionnal parameters for Tesseract OCR (#1154) --- .../core/src/RPA/core/locators/containers.py | 2 ++ .../main/src/RPA/Desktop/keywords/finder.py | 5 ++++- .../main/src/RPA/Desktop/keywords/text.py | 22 +++++++++++++++---- .../recognition/src/RPA/recognition/ocr.py | 17 +++++++++++--- 4 files changed, 38 insertions(+), 8 deletions(-) diff --git a/packages/core/src/RPA/core/locators/containers.py b/packages/core/src/RPA/core/locators/containers.py index e6cda2341f..ba388412be 100644 --- a/packages/core/src/RPA/core/locators/containers.py +++ b/packages/core/src/RPA/core/locators/containers.py @@ -124,6 +124,8 @@ class OcrLocator(Locator): confidence: Optional[float] = None """3-character ISO 639-2 language code. Passed to pytesseract lang parameter.""" language: Optional[str] = None + """Tesseract specific parameters (like psm or oem). Passed to pytesserect config parameter.""" + configuration: Optional[str] = None def __post_init__(self): self.text = str(self.text) diff --git a/packages/main/src/RPA/Desktop/keywords/finder.py b/packages/main/src/RPA/Desktop/keywords/finder.py index 3fac2ffa0c..71345044f7 100644 --- a/packages/main/src/RPA/Desktop/keywords/finder.py +++ b/packages/main/src/RPA/Desktop/keywords/finder.py @@ -199,12 +199,14 @@ def _find_ocr(self, base: Geometry, locator: OcrLocator) -> List[Region]: confidence = locator.confidence or self.confidence language = locator.language + configuration = locator.configuration self.logger.info( - "Searching for text '%s' (region: %s, confidence: %.1f, language: %s)", + "Searching for text '%s' (region: %s, confidence: %.1f, language: %s, configuration: %s)", locator.text, region or "display", confidence, language or "Not set", + configuration or "Not set", ) def finder(image: Image.Image) -> List[Region]: @@ -214,6 +216,7 @@ def finder(image: Image.Image) -> List[Region]: confidence=confidence, region=region, language=language, + configuration=configuration, ) return [match["region"] for match in matches] diff --git a/packages/main/src/RPA/Desktop/keywords/text.py b/packages/main/src/RPA/Desktop/keywords/text.py index 65d19fbb61..abf251e9ea 100644 --- a/packages/main/src/RPA/Desktop/keywords/text.py +++ b/packages/main/src/RPA/Desktop/keywords/text.py @@ -21,13 +21,19 @@ class TextKeywords(LibraryContext): """Keywords for reading screen information and content.""" @keyword - def read_text(self, locator: Optional[str] = None, invert: bool = False): + def read_text(self, locator: Optional[str] = None, invert: bool = False, language: str = None, configuration: str = None): """Read text using OCR from the screen, or an area of the screen defined by the given locator. :param locator: Location of element to read text from :param invert: Invert image colors, useful for reading white text on dark background + :param language: 3-character ISO 639-2 language code of the text. + This is passed directly to the pytesseract lib in the lang parameter. + See https://tesseract-ocr.github.io/tessdoc/Command-Line-Usage.html#using-one-language + :param configuration: Tesseract specific parameters like Page Segmentation Modes(psm) or OCR Engine Mode (oem). + This is passed directly to the pytesseract lib in the config parameter. + See https://tesseract-ocr.github.io/tessdoc/Command-Line-Usage.html Usage examples: @@ -51,10 +57,10 @@ def read_text(self, locator: Optional[str] = None, invert: bool = False): if not isinstance(element, Region): raise ValueError("Locator must resolve to a region") - self.logger.info("Reading text from element: %s", element) + area = "element: %s" % element image = screen.grab(element) else: - self.logger.info("Reading text from screen") + area = "screen" image = screen.grab() screen.log_image(image) @@ -62,8 +68,16 @@ def read_text(self, locator: Optional[str] = None, invert: bool = False): if invert: image = ImageOps.invert(image) + self.logger.info( + "Reading text from %s (invert: %s, language: %s, configuration: %s)", + area, + invert or "Not set", + language or "Not set", + configuration or "Not set", + ) + start_time = time.time() - text = ocr.read(image) + text = ocr.read(image, language, configuration) self.logger.info("Read text in %.2f seconds", time.time() - start_time) return text diff --git a/packages/recognition/src/RPA/recognition/ocr.py b/packages/recognition/src/RPA/recognition/ocr.py index b277b7c26b..fabee1ffa5 100644 --- a/packages/recognition/src/RPA/recognition/ocr.py +++ b/packages/recognition/src/RPA/recognition/ocr.py @@ -23,15 +23,25 @@ DEFAULT_CONFIDENCE = 80.0 -def read(image: Union[Image.Image, Path]): +def read( + image: Union[Image.Image, Path], + language: Optional[str] = None, + configuration: Optional[str] = None +): """Scan image for text and return it as one string. :param image: Path to image or Image object + :param language: 3-character ISO 639-2 language code of the text. + This is passed directly to the pytesseract lib in the lang parameter. + See https://tesseract-ocr.github.io/tessdoc/Command-Line-Usage.html#using-one-language + :param configuration: Tesseract specific parameters like Page Segmentation Modes(psm) or OCR Engine Mode (oem). + This is passed directly to the pytesseract lib in the config parameter. + See https://tesseract-ocr.github.io/tessdoc/Command-Line-Usage.html """ image = to_image(image) try: - return pytesseract.image_to_string(image).strip() + return pytesseract.image_to_string(image, lang=language, config=configuration).strip() except TesseractNotFoundError as err: raise EnvironmentError(INSTALL_PROMPT) from err @@ -42,6 +52,7 @@ def find( confidence: float = DEFAULT_CONFIDENCE, region: Optional[Region] = None, language: Optional[str] = None, + configuration: Optional[str] = None ): """Scan image for text and return a list of regions that contain it (or something close to it). @@ -67,7 +78,7 @@ def find( try: data = pytesseract.image_to_data( - image, lang=language, output_type=pytesseract.Output.DICT + image, lang=language, config=configuration, output_type=pytesseract.Output.DICT ) except TesseractNotFoundError as err: raise EnvironmentError(INSTALL_PROMPT) from err