diff --git a/deploy/hubserving/readme.md b/deploy/hubserving/readme.md index 183a25912c..2264e6eaa4 100755 --- a/deploy/hubserving/readme.md +++ b/deploy/hubserving/readme.md @@ -59,6 +59,7 @@ pip3 install paddlehub==2.1.0 --upgrade -i https://mirror.baidu.com/pypi/simple 检测模型:./inference/ch_PP-OCRv3_det_infer/ 识别模型:./inference/ch_PP-OCRv3_rec_infer/ 方向分类器:./inference/ch_ppocr_mobile_v2.0_cls_infer/ +版面分析模型: 表格结构识别模型:./inference/en_ppocr_mobile_v2.0_table_structure_infer/ ``` @@ -172,7 +173,7 @@ hub serving start -c deploy/hubserving/ocr_system/config.json ## 3. 发送预测请求 配置好服务端,可使用以下命令发送预测请求,获取预测结果: -```python tools/test_hubserving.py server_url image_path``` +```python tools/test_hubserving.py --server_url=server_url --image_dir=image_path``` 需要给脚本传递2个参数: - **server_url**:服务地址,格式为 diff --git a/deploy/hubserving/readme_en.md b/deploy/hubserving/readme_en.md index 27eccbb5e9..6463fcb54b 100755 --- a/deploy/hubserving/readme_en.md +++ b/deploy/hubserving/readme_en.md @@ -61,6 +61,7 @@ Before installing the service module, you need to prepare the inference model an text detection model: ./inference/ch_PP-OCRv3_det_infer/ text recognition model: ./inference/ch_PP-OCRv3_rec_infer/ text angle classifier: ./inference/ch_ppocr_mobile_v2.0_cls_infer/ +layout parse model: tanle recognition: ./inference/en_ppocr_mobile_v2.0_table_structure_infer/ ``` @@ -177,7 +178,7 @@ hub serving start -c deploy/hubserving/ocr_system/config.json ## 3. Send prediction requests After the service starts, you can use the following command to send a prediction request to obtain the prediction result: ```shell -python tools/test_hubserving.py server_url image_path +python tools/test_hubserving.py --server_url=server_url --image_dir=image_path ``` Two parameters need to be passed to the script: diff --git a/deploy/hubserving/structure_system/module.py b/deploy/hubserving/structure_system/module.py index 92846edc66..61c93bb146 100644 --- a/deploy/hubserving/structure_system/module.py +++ b/deploy/hubserving/structure_system/module.py @@ -119,7 +119,7 @@ def predict(self, images=[], paths=[]): all_results.append([]) continue starttime = time.time() - res = self.table_sys(img) + res, _ = self.table_sys(img) elapse = time.time() - starttime logger.info("Predict time: {}".format(elapse)) @@ -144,6 +144,6 @@ def serving_method(self, images, **kwargs): if __name__ == '__main__': structure_system = StructureSystem() structure_system._initialize() - image_path = ['./doc/table/1.png'] + image_path = ['./ppstructure/docs/table/1.png'] res = structure_system.predict(paths=image_path) print(res) diff --git a/deploy/hubserving/structure_system/params.py b/deploy/hubserving/structure_system/params.py index 3cc6a2794f..fe691fbc2d 100755 --- a/deploy/hubserving/structure_system/params.py +++ b/deploy/hubserving/structure_system/params.py @@ -23,8 +23,10 @@ def read_params(): cfg = table_read_params() # params for layout parser model - cfg.layout_path_model = 'lp://PubLayNet/ppyolov2_r50vd_dcn_365e_publaynet/config' - cfg.layout_label_map = None + cfg.layout_model_dir = '' + cfg.layout_dict_path = './ppocr/utils/dict/layout_publaynet_dict.txt' + cfg.layout_score_threshold = 0.5 + cfg.layout_nms_threshold = 0.5 cfg.mode = 'structure' cfg.output = './output' diff --git a/deploy/hubserving/structure_table/module.py b/deploy/hubserving/structure_table/module.py index 00393daa03..b4432b2d7b 100644 --- a/deploy/hubserving/structure_table/module.py +++ b/deploy/hubserving/structure_table/module.py @@ -118,11 +118,11 @@ def predict(self, images=[], paths=[]): all_results.append([]) continue starttime = time.time() - pred_html = self.table_sys(img) + res, _ = self.table_sys(img) elapse = time.time() - starttime logger.info("Predict time: {}".format(elapse)) - all_results.append({'html': pred_html}) + all_results.append({'html': res['html']}) return all_results @serving @@ -138,6 +138,6 @@ def serving_method(self, images, **kwargs): if __name__ == '__main__': table_system = TableSystem() table_system._initialize() - image_path = ['./doc/table/table.jpg'] + image_path = ['./ppstructure/docs/table/table.jpg'] res = table_system.predict(paths=image_path) print(res) diff --git a/doc/doc_en/quickstart_en.md b/doc/doc_en/quickstart_en.md index c678dc4762..9e1de839ff 100644 --- a/doc/doc_en/quickstart_en.md +++ b/doc/doc_en/quickstart_en.md @@ -3,14 +3,14 @@ **Note:** This tutorial mainly introduces the usage of PP-OCR series models, please refer to [PP-Structure Quick Start](../../ppstructure/docs/quickstart_en.md) for the quick use of document analysis related functions. - [1. Installation](#1-installation) - - [1.1 Install PaddlePaddle](#11-install-paddlepaddle) - - [1.2 Install PaddleOCR Whl Package](#12-install-paddleocr-whl-package) + - [1.1 Install PaddlePaddle](#11-install-paddlepaddle) + - [1.2 Install PaddleOCR Whl Package](#12-install-paddleocr-whl-package) - [2. Easy-to-Use](#2-easy-to-use) - - [2.1 Use by Command Line](#21-use-by-command-line) - - [2.1.1 Chinese and English Model](#211-chinese-and-english-model) - - [2.1.2 Multi-language Model](#212-multi-language-model) - - [2.2 Use by Code](#22-use-by-code) - - [2.2.1 Chinese & English Model and Multilingual Model](#221-chinese--english-model-and-multilingual-model) + - [2.1 Use by Command Line](#21-use-by-command-line) + - [2.1.1 Chinese and English Model](#211-chinese-and-english-model) + - [2.1.2 Multi-language Model](#212-multi-language-model) + - [2.2 Use by Code](#22-use-by-code) + - [2.2.1 Chinese & English Model and Multilingual Model](#221-chinese--english-model-and-multilingual-model) - [3. Summary](#3-summary) @@ -51,12 +51,6 @@ pip install "paddleocr>=2.0.1" # Recommend to use version 2.0.1+ Reference: [Solve shapely installation on windows](https://stackoverflow.com/questions/44398265/install-shapely-oserror-winerror-126-the-specified-module-could-not-be-found) -- **For layout analysis users**, run the following command to install **Layout-Parser** - - ```bash - pip3 install -U https://paddleocr.bj.bcebos.com/whl/layoutparser-0.0.0-py3-none-any.whl - ``` - ## 2. Easy-to-Use diff --git a/ppstructure/docs/installation.md b/ppstructure/docs/installation.md index 155baf29de..3f564cb2dd 100644 --- a/ppstructure/docs/installation.md +++ b/ppstructure/docs/installation.md @@ -1,8 +1,7 @@ - [快速安装](#快速安装) - [1. PaddlePaddle 和 PaddleOCR](#1-paddlepaddle-和-paddleocr) - [2. 安装其他依赖](#2-安装其他依赖) - - [2.1 版面分析所需 Layout-Parser](#21-版面分析所需--layout-parser) - - [2.2 VQA所需依赖](#22--vqa所需依赖) + - [2.1 VQA所需依赖](#21--vqa所需依赖) # 快速安装 @@ -12,14 +11,7 @@ ## 2. 安装其他依赖 -### 2.1 版面分析所需 Layout-Parser - -Layout-Parser 可通过如下命令安装 - -```bash -pip3 install -U https://paddleocr.bj.bcebos.com/whl/layoutparser-0.0.0-py3-none-any.whl -``` -### 2.2 VQA所需依赖 +### 2.1 VQA所需依赖 * paddleocr ```bash diff --git a/ppstructure/docs/quickstart.md b/ppstructure/docs/quickstart.md index 31e5941624..d206d1d521 100644 --- a/ppstructure/docs/quickstart.md +++ b/ppstructure/docs/quickstart.md @@ -1,21 +1,21 @@ # PP-Structure 快速开始 -- [1. 安装依赖包](#1) -- [2. 便捷使用](#2) - - [2.1 命令行使用](#21) - - [2.1.1 版面分析+表格识别](#211) - - [2.1.2 版面分析](#212) - - [2.1.3 表格识别](#213) - - [2.1.4 DocVQA](#214) - - [2.2 代码使用](#22) - - [2.2.1 版面分析+表格识别](#221) - - [2.2.2 版面分析](#222) - - [2.2.3 表格识别](#223) - - [2.2.4 DocVQA](#224) - - [2.3 返回结果说明](#23) - - [2.3.1 版面分析+表格识别](#231) - - [2.3.2 DocVQA](#232) - - [2.4 参数说明](#24) +- [1. 安装依赖包](#1-安装依赖包) +- [2. 便捷使用](#2-便捷使用) + - [2.1 命令行使用](#21-命令行使用) + - [2.1.1 版面分析+表格识别](#211-版面分析表格识别) + - [2.1.2 版面分析](#212-版面分析) + - [2.1.3 表格识别](#213-表格识别) + - [2.1.4 DocVQA](#214-docvqa) + - [2.2 代码使用](#22-代码使用) + - [2.2.1 版面分析+表格识别](#221-版面分析表格识别) + - [2.2.2 版面分析](#222-版面分析) + - [2.2.3 表格识别](#223-表格识别) + - [2.2.4 DocVQA](#224-docvqa) + - [2.3 返回结果说明](#23-返回结果说明) + - [2.3.1 版面分析+表格识别](#231-版面分析表格识别) + - [2.3.2 DocVQA](#232-docvqa) + - [2.4 参数说明](#24-参数说明) @@ -24,8 +24,6 @@ ```bash # 安装 paddleocr,推荐使用2.5+版本 pip3 install "paddleocr>=2.5" -# 安装 版面分析依赖包layoutparser(如不需要版面分析功能,可跳过) -pip3 install -U https://paddleocr.bj.bcebos.com/whl/layoutparser-0.0.0-py3-none-any.whl # 安装 DocVQA依赖包paddlenlp(如不需要DocVQA功能,可跳过) pip install paddlenlp diff --git a/ppstructure/docs/quickstart_en.md b/ppstructure/docs/quickstart_en.md index 1f78b43ea3..98d8d2fc3f 100644 --- a/ppstructure/docs/quickstart_en.md +++ b/ppstructure/docs/quickstart_en.md @@ -1,21 +1,21 @@ # PP-Structure Quick Start -- [1. Install package](#1) -- [2. Use](#2) - - [2.1 Use by command line](#21) - - [2.1.1 layout analysis + table recognition](#211) - - [2.1.2 layout analysis](#212) - - [2.1.3 table recognition](#213) - - [2.1.4 DocVQA](#214) - - [2.2 Use by code](#22) - - [2.2.1 layout analysis + table recognition](#221) - - [2.2.2 layout analysis](#222) - - [2.2.3 table recognition](#223) - - [2.2.4 DocVQA](#224) - - [2.3 Result description](#23) - - [2.3.1 layout analysis + table recognition](#231) - - [2.3.2 DocVQA](#232) - - [2.4 Parameter Description](#24) +- [1. Install package](#1-install-package) +- [2. Use](#2-use) + - [2.1 Use by command line](#21-use-by-command-line) + - [2.1.1 layout analysis + table recognition](#211-layout-analysis--table-recognition) + - [2.1.2 layout analysis](#212-layout-analysis) + - [2.1.3 table recognition](#213-table-recognition) + - [2.1.4 DocVQA](#214-docvqa) + - [2.2 Use by code](#22-use-by-code) + - [2.2.1 layout analysis + table recognition](#221-layout-analysis--table-recognition) + - [2.2.2 layout analysis](#222-layout-analysis) + - [2.2.3 table recognition](#223-table-recognition) + - [2.2.4 DocVQA](#224-docvqa) + - [2.3 Result description](#23-result-description) + - [2.3.1 layout analysis + table recognition](#231-layout-analysis--table-recognition) + - [2.3.2 DocVQA](#232-docvqa) + - [2.4 Parameter Description](#24-parameter-description) @@ -24,8 +24,6 @@ ```bash # Install paddleocr, version 2.5+ is recommended pip3 install "paddleocr>=2.5" -# Install layoutparser (if you do not use the layout analysis, you can skip it) -pip3 install -U https://paddleocr.bj.bcebos.com/whl/layoutparser-0.0.0-py3-none-any.whl # Install the DocVQA dependency package paddlenlp (if you do not use the DocVQA, you can skip it) pip install paddlenlp diff --git a/ppstructure/predict_system.py b/ppstructure/predict_system.py index 075d914461..608f4d2fb3 100644 --- a/ppstructure/predict_system.py +++ b/ppstructure/predict_system.py @@ -43,6 +43,7 @@ class StructureSystem(object): def __init__(self, args): self.mode = args.mode + self.recovery = args.recovery if self.mode == 'structure': if not args.show_log: logger.setLevel(logging.INFO) @@ -110,7 +111,7 @@ def __call__(self, img, return_ocr_result_in_table=False): time_dict['rec'] += table_time_dict['rec'] else: if self.text_system is not None: - if args.recovery: + if self.recovery: wht_im = np.ones(ori_im.shape, dtype=ori_im.dtype) wht_im[y1:y2, x1:x2, :] = roi_img filter_boxes, filter_rec_res, ocr_time_dict = self.text_system( @@ -133,7 +134,7 @@ def __call__(self, img, return_ocr_result_in_table=False): for token in style_token: if token in rec_str: rec_str = rec_str.replace(token, '') - if not args.recovery: + if not self.recovery: box += [x1, y1] res.append({ 'text': rec_str, diff --git a/ppstructure/table/predict_table.py b/ppstructure/table/predict_table.py index 35ce8890cf..f580213753 100644 --- a/ppstructure/table/predict_table.py +++ b/ppstructure/table/predict_table.py @@ -101,7 +101,7 @@ def __call__(self, img, return_ocr_result_in_table=False): start = time.time() structure_res, elapse = self._structure(copy.deepcopy(img)) - result['cell_bbox'] = structure_res[1] + result['cell_bbox'] = structure_res[1].tolist() time_dict['table'] = elapse dt_boxes, rec_res, det_elapse, rec_elapse = self._ocr( diff --git a/ppstructure/utility.py b/ppstructure/utility.py index 390736cda9..597d978516 100644 --- a/ppstructure/utility.py +++ b/ppstructure/utility.py @@ -38,14 +38,17 @@ def init_args(): parser.add_argument( "--layout_dict_path", type=str, - default="../ppocr/utils/dict/layout_pubalynet_dict.txt") + default="../ppocr/utils/dict/layout_publaynet_dict.txt") parser.add_argument( "--layout_score_threshold", type=float, default=0.5, help="Threshold of score.") parser.add_argument( - "--layout_nms_threshold", type=float, default=0.5, help="Threshold of nms.") + "--layout_nms_threshold", + type=float, + default=0.5, + help="Threshold of nms.") # params for vqa parser.add_argument("--vqa_algorithm", type=str, default='LayoutXLM') parser.add_argument("--ser_model_dir", type=str)