From 2726cd26dde0178c71e3fe9673411ba79957e8b6 Mon Sep 17 00:00:00 2001 From: Sivakumar Mahalingam Date: Tue, 7 Jan 2025 00:01:16 +0400 Subject: [PATCH] added numpy input --- README.md | 14 +++++++------- fastmrz/fastmrz.py | 18 +++++------------- 2 files changed, 12 insertions(+), 20 deletions(-) diff --git a/README.md b/README.md index 5ad24ab..3bc2b2c 100644 --- a/README.md +++ b/README.md @@ -12,13 +12,13 @@ This repository extracts the Machine Readable Zone (MRZ) from document images. The MRZ typically contains important information such as the document holder's name, nationality, document number, date of birth, etc. -**️Features:** +## ️✨Features: -- Detects and extracts the MRZ region from document images -- Contour detection to accurately identify the MRZ area -- Custom trained models using ONNX -- Contains checksum logics for data validation -- Outputs the extracted MRZ region as text/json for further processing or analysis +- 👁️Detects and extracts the MRZ region from document images +- ️🔍Contour detection to accurately identify the MRZ area +- 🪄Custom trained models using ONNX +- 🆗Contains checksum logics for data validation +- 📤Outputs the extracted MRZ region as text/json ## 🛠️Built With @@ -118,7 +118,7 @@ Now, based on the example of a national passport, let us take a closer look at t - [x] Include mrva and mrvb documents - [x] Add wiki page -- [ ] Support numpy array as input +- [x] Support numpy array as input - [x] Support mrz text as input - [x] Support base64 as input - [ ] Support pdf as input diff --git a/fastmrz/fastmrz.py b/fastmrz/fastmrz.py index 26e4e2d..0ec33e9 100644 --- a/fastmrz/fastmrz.py +++ b/fastmrz/fastmrz.py @@ -138,13 +138,6 @@ def _base64_to_image_array(self, base64_string): return image_array - def get_details_old(self, image, ignore_parse=False, include_checkdigit=True): - if not self._is_valid(image): - return {"status": "FAILURE", "message": "Invalid input image"} - mrz_text = self._get_mrz(image) - - return mrz_text if ignore_parse else self._parse_mrz(mrz_text) - def _parse_mrz(self, mrz_text): if not mrz_text: return {"status": "FAILURE", "message": "No MRZ detected"} @@ -269,7 +262,7 @@ def validate_mrz(self, mrz_text): def get_details(self, input_data, input_type="imagepath", ignore_parse=False, include_checkdigit=True): if input_type == "imagepath": if not self._is_valid(input_data): - return {"status": "FAILURE", "message": "Invalid input image"} + raise ValueError("Input is not a valid image file.") image_file = open(input_data, "rb") image_data = image_file.read() image_file.close() @@ -279,12 +272,11 @@ def get_details(self, input_data, input_type="imagepath", ignore_parse=False, in return mrz_text if ignore_parse else self._parse_mrz(mrz_text) elif input_type == "numpy": - # get_details_from_numpy(input_data, ignore_parse=False, include_checkdigit=True) - if isinstance(input_data, np.ndarray): - image = input_data - else: + if not self._is_valid(input_data): raise ValueError("Input is not a valid NumPy array.") - pass + mrz_text = self._get_mrz(input_data) + + return mrz_text if ignore_parse else self._parse_mrz(mrz_text) elif input_type == "base64": image_array = self._base64_to_image_array(input_data) mrz_text = self._get_mrz(image_array)