update doc

m986883511 · Aug 22, 2022 · 27215c6 · 27215c6
2 parents 36f1745 + d41d046
commit 27215c6
Show file tree

Hide file tree

Showing 30 changed files with 208 additions and 186 deletions.
diff --git a/deploy/android_demo/app/src/main/cpp/native.cpp b/deploy/android_demo/app/src/main/cpp/native.cpp
@@ -47,7 +47,7 @@ str_to_cpu_mode(const std::string &cpu_mode) {
   std::string upper_key;
   std::transform(cpu_mode.cbegin(), cpu_mode.cend(), upper_key.begin(),
                  ::toupper);
-  auto index = cpu_mode_map.find(upper_key);
+  auto index = cpu_mode_map.find(upper_key.c_str());
   if (index == cpu_mode_map.end()) {
     LOGE("cpu_mode not found %s", upper_key.c_str());
     return paddle::lite_api::LITE_POWER_HIGH;
@@ -116,4 +116,4 @@ Java_com_baidu_paddle_lite_demo_ocr_OCRPredictorNative_release(
   ppredictor::OCR_PPredictor *ppredictor =
       (ppredictor::OCR_PPredictor *)java_pointer;
   delete ppredictor;
-}
+}
diff --git a/deploy/android_demo/app/src/main/java/com/baidu/paddle/lite/demo/ocr/OCRPredictorNative.java b/deploy/android_demo/app/src/main/java/com/baidu/paddle/lite/demo/ocr/OCRPredictorNative.java
@@ -54,7 +54,7 @@ public static class Config {
     }
 
     public void destory() {
-        if (nativePointer > 0) {
+        if (nativePointer != 0) {
             release(nativePointer);
             nativePointer = 0;
         }

diff --git a/deploy/cpp_infer/docs/windows_vs2019_build.md b/deploy/cpp_infer/docs/windows_vs2019_build.md
@@ -109,8 +109,10 @@ CUDA_LIB、CUDNN_LIB、TENSORRT_DIR、WITH_GPU、WITH_TENSORRT
 
 运行之前，将下面文件拷贝到`build/Release/`文件夹下
 1. `paddle_inference/paddle/lib/paddle_inference.dll`
-2. `opencv/build/x64/vc15/bin/opencv_world455.dll`
-3. 如果使用openblas版本的预测库还需要拷贝 `paddle_inference/third_party/install/openblas/lib/openblas.dll`
+2. `paddle_inference/third_party/install/onnxruntime/lib/onnxruntime.dll`
+3. `paddle_inference/third_party/install/paddle2onnx/lib/paddle2onnx.dll`
+4. `opencv/build/x64/vc15/bin/opencv_world455.dll`
+5. 如果使用openblas版本的预测库还需要拷贝 `paddle_inference/third_party/install/openblas/lib/openblas.dll`
 
 ### Step4: 预测
 

diff --git a/deploy/slim/quantization/README_en.md b/deploy/slim/quantization/README_en.md
@@ -73,4 +73,4 @@ python deploy/slim/quantization/export_model.py -c configs/det/ch_ppocr_v2.0/ch_
 The numerical range of the quantized model parameters derived from the above steps is still FP32, but the numerical range of the parameters is int8.
 The derived model can be converted through the `opt tool` of PaddleLite.
 
-For quantitative model deployment, please refer to [Mobile terminal model deployment](../../lite/readme_en.md)
+For quantitative model deployment, please refer to [Mobile terminal model deployment](../../lite/readme.md)
diff --git a/paddleocr.py b/paddleocr.py
@@ -636,4 +636,6 @@ def main():
 
             for item in result:
                 item.pop('img')
+                item.pop('res')
                 logger.info(item)
+            logger.info('result save to {}'.format(args.output))
diff --git a/ppocr/data/imaug/copy_paste.py b/ppocr/data/imaug/copy_paste.py
@@ -35,10 +35,12 @@ def __call__(self, data):
         point_num = data['polys'].shape[1]
         src_img = data['image']
         src_polys = data['polys'].tolist()
+        src_texts = data['texts']
         src_ignores = data['ignore_tags'].tolist()
         ext_data = data['ext_data'][0]
         ext_image = ext_data['image']
         ext_polys = ext_data['polys']
+        ext_texts = ext_data['texts']
         ext_ignores = ext_data['ignore_tags']
 
         indexs = [i for i in range(len(ext_ignores)) if not ext_ignores[i]]
@@ -53,7 +55,7 @@ def __call__(self, data):
         src_img = cv2.cvtColor(src_img, cv2.COLOR_BGR2RGB)
         ext_image = cv2.cvtColor(ext_image, cv2.COLOR_BGR2RGB)
         src_img = Image.fromarray(src_img).convert('RGBA')
-        for poly, tag in zip(select_polys, select_ignores):
+        for idx, poly, tag in zip(select_idxs, select_polys, select_ignores):
             box_img = get_rotate_crop_image(ext_image, poly)
 
             src_img, box = self.paste_img(src_img, box_img, src_polys)
@@ -62,6 +64,7 @@ def __call__(self, data):
                 for _ in range(len(box), point_num):
                     box.append(box[-1])
                 src_polys.append(box)
+                src_texts.append(ext_texts[idx])
                 src_ignores.append(tag)
         src_img = cv2.cvtColor(np.array(src_img), cv2.COLOR_RGB2BGR)
         h, w = src_img.shape[:2]
@@ -70,6 +73,7 @@ def __call__(self, data):
         src_polys[:, :, 1] = np.clip(src_polys[:, :, 1], 0, h)
         data['image'] = src_img
         data['polys'] = src_polys
+        data['texts'] = src_texts
         data['ignore_tags'] = np.array(src_ignores)
         return data
 

diff --git a/ppocr/metrics/rec_metric.py b/ppocr/metrics/rec_metric.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import Levenshtein
+from rapidfuzz.distance import Levenshtein
 import string
 
 
@@ -46,8 +46,7 @@ def __call__(self, pred_label, *args, **kwargs):
             if self.is_filter:
                 pred = self._normalize_text(pred)
                 target = self._normalize_text(target)
-            norm_edit_dis += Levenshtein.distance(pred, target) / max(
-                len(pred), len(target), 1)
+            norm_edit_dis += Levenshtein.normalized_distance(pred, target)
             if pred == target:
                 correct_num += 1
             all_num += 1

diff --git a/ppocr/utils/dict/kie_dict/xfund_class_list.txt b/ppocr/utils/dict/kie_dict/xfund_class_list.txt
@@ -0,0 +1,4 @@
+OTHER
+QUESTION
+ANSWER
+HEADER
diff --git a/ppocr/utils/save_load.py b/ppocr/utils/save_load.py
@@ -194,6 +194,9 @@ def save_model(model,
     _mkdir_if_not_exist(model_path, logger)
     model_prefix = os.path.join(model_path, prefix)
     paddle.save(optimizer.state_dict(), model_prefix + '.pdopt')
+
+    is_nlp_model = config['Architecture']["model_type"] == 'kie' and config[
+        "Architecture"]["algorithm"] not in ["SDMGR"]
     if is_nlp_model is not True:
         paddle.save(model.state_dict(), model_prefix + '.pdparams')
         metric_prefix = model_prefix

diff --git a/ppstructure/README.md b/ppstructure/README.md
@@ -106,9 +106,9 @@ PP-Structure Series Model List (Updating)
 
 |model name|description|model size|download|
 | --- | --- | --- | --- |
-|ch_PP-OCRv3_det_slim|[New] slim quantization with distillation lightweight model, supporting Chinese, English, multilingual text detection| 1.1M |[inference model](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_slim_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_slim_distill_train.tar)|
-|ch_PP-OCRv3_rec_slim |[New] Slim qunatization with distillation lightweight model, supporting Chinese, English text recognition| 4.9M |[inference model](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_slim_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_slim_train.tar) |
-|ch_ppstructure_mobile_v2.0_SLANet|Chinese table recognition model trained on PubTabNet dataset based on SLANet|9.3M|[inference model](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_train.tar) |
+|ch_PP-OCRv3_det| [New] Lightweight model, supporting Chinese, English, multilingual text detection | 3.8M |[inference model](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_distill_train.tar)|
+|ch_PP-OCRv3_rec| [New] Lightweight model, supporting Chinese, English, multilingual text recognition | 12.4M |[inference model](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_train.tar) |
+|ch_ppstructure_mobile_v2.0_SLANet|Chinese table recognition model based on SLANet|9.3M|[inference model](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_train.tar) |
 
 ### 7.3 KIE model
 

diff --git a/ppstructure/README_ch.md b/ppstructure/README_ch.md
@@ -120,9 +120,9 @@ PP-Structure系列模型列表（更新中）
 
 |模型名称|模型简介|模型大小|下载地址|
 | --- | --- | --- | --- |
-|ch_PP-OCRv3_det_slim|【最新】slim量化+蒸馏版超轻量模型，支持中英文、多语种文本检测| 1.1M |[推理模型](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_slim_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_slim_distill_train.tar)|
-|ch_PP-OCRv3_rec_slim |【最新】slim量化版超轻量模型，支持中英文、数字识别| 4.9M |[推理模型](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_slim_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_slim_train.tar) |
-|ch_ppstructure_mobile_v2.0_SLANet|基于SLANet在PubTabNet数据集上训练的中文表格识别模型|9.3M|[推理模型](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_train.tar) |
+|ch_PP-OCRv3_det| 【最新】超轻量模型，支持中英文、多语种文本检测 | 3.8M |[推理模型](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_distill_train.tar)|
+|ch_PP-OCRv3_rec|【最新】超轻量模型，支持中英文、数字识别|12.4M |[推理模型](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_train.tar) |
+|ch_ppstructure_mobile_v2.0_SLANet|基于SLANet的中文表格识别模型|9.3M|[推理模型](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_train.tar) |
 
 
 <a name="73"></a>

diff --git a/ppstructure/docs/inference.md b/ppstructure/docs/inference.md
@@ -4,7 +4,7 @@
   - [1.1 版面分析+表格识别](#1.1)
   - [1.2 版面分析](#1.2)
   - [1.3 表格识别](#1.3)
-- [2. DocVQA](#2)
+- [2. 关键信息抽取](#2)
 
 <a name="1"></a>
 ## 1. Structure
@@ -16,23 +16,26 @@ cd ppstructure
 下载模型
 ```bash
 mkdir inference && cd inference
-# 下载PP-OCRv2文本检测模型并解压
-wget https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_slim_quant_infer.tar && tar xf ch_PP-OCRv2_det_slim_quant_infer.tar
-# 下载PP-OCRv2文本识别模型并解压
-wget https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_slim_quant_infer.tar && tar xf ch_PP-OCRv2_rec_slim_quant_infer.tar
-# 下载超轻量级英文表格预测模型并解压
-wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_structure_infer.tar && tar xf en_ppocr_mobile_v2.0_table_structure_infer.tar
+# 下载PP-Structurev2版面分析模型并解压
+wget https://paddleocr.bj.bcebos.com/ppstructure/models/layout/picodet_lcnet_x1_0_layout_infer.tar && tar xf picodet_lcnet_x1_0_layout_infer.tar
+# 下载PP-OCRv3文本检测模型并解压
+wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.tar && tar xf ch_PP-OCRv3_det_infer.tar
+# 下载PP-OCRv3文本识别模型并解压
+wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar && tar xf ch_PP-OCRv3_rec_infer.tar
+# 下载PP-Structurev2表格识别模型并解压
+wget https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_infer.tar && tar xf ch_ppstructure_mobile_v2.0_SLANet_infer.tar
 cd ..
 ```
 <a name="1.1"></a>
 ### 1.1 版面分析+表格识别
 ```bash
-python3 predict_system.py --det_model_dir=inference/ch_PP-OCRv2_det_slim_quant_infer \
-                          --rec_model_dir=inference/ch_PP-OCRv2_rec_slim_quant_infer \
-                          --table_model_dir=inference/en_ppocr_mobile_v2.0_table_structure_infer \
+python3 predict_system.py --det_model_dir=inference/ch_PP-OCRv3_det_infer \
+                          --rec_model_dir=inference/ch_PP-OCRv3_rec_infer \
+                          --table_model_dir=inference/ch_ppstructure_mobile_v2.0_SLANet_infer \
+                          --layout_model_dir=inference/picodet_lcnet_x1_0_layout_infer \
                           --image_dir=./docs/table/1.png \
                           --rec_char_dict_path=../ppocr/utils/ppocr_keys_v1.txt \
-                          --table_char_dict_path=../ppocr/utils/dict/table_structure_dict.txt \
+                          --table_char_dict_path=../ppocr/utils/dict/table_structure_dict_ch.txt \
                           --output=../output \
                           --vis_font_path=../doc/fonts/simfang.ttf
 ```
@@ -41,40 +44,46 @@ python3 predict_system.py --det_model_dir=inference/ch_PP-OCRv2_det_slim_quant_i
 <a name="1.2"></a>
 ### 1.2 版面分析
 ```bash
-python3 predict_system.py --image_dir=./docs/table/1.png --table=false --ocr=false --output=../output/
+python3 predict_system.py --layout_model_dir=inference/picodet_lcnet_x1_0_layout_infer \
+                          --image_dir=./docs/table/1.png \
+                          --output=../output \
+                          --table=false \
+                          --ocr=false
 ```
 运行完成后，每张图片会在`output`字段指定的目录下的`structure`目录下有一个同名目录，图片区域会被裁剪之后保存下来，图片名为表格在图片里的坐标。版面分析结果会存储在`res.txt`文件中。
 
 <a name="1.3"></a>
 ### 1.3 表格识别
 ```bash
-python3 predict_system.py --det_model_dir=inference/ch_PP-OCRv2_det_slim_quant_infer \
-                          --rec_model_dir=inference/ch_PP-OCRv2_rec_slim_quant_infer \
-                          --table_model_dir=inference/en_ppocr_mobile_v2.0_table_structure_infer \
+python3 predict_system.py --det_model_dir=inference/ch_PP-OCRv3_det_infer \
+                          --rec_model_dir=inference/ch_PP-OCRv3_rec_infer \
+                          --table_model_dir=inference/ch_ppstructure_mobile_v2.0_SLANet_infer \
                           --image_dir=./docs/table/table.jpg \
                           --rec_char_dict_path=../ppocr/utils/ppocr_keys_v1.txt \
-                          --table_char_dict_path=../ppocr/utils/dict/table_structure_dict.txt \
+                          --table_char_dict_path=../ppocr/utils/dict/table_structure_dict_ch.txt \
                           --output=../output \
                           --vis_font_path=../doc/fonts/simfang.ttf \
                           --layout=false
 ```
 运行完成后，每张图片会在`output`字段指定的目录下的`structure`目录下有一个同名目录，表格会存储为一个excel，excel文件名为`[0,0,img_h,img_w]`。
 
 <a name="2"></a>
-## 2. DocVQA
+## 2. 关键信息抽取
 
 ```bash
 cd ppstructure
 
-# 下载模型
 mkdir inference && cd inference
-# 下载SER xfun 模型并解压
-wget https://paddleocr.bj.bcebos.com/pplayout/PP-Layout_v1.0_ser_pretrained.tar && tar xf PP-Layout_v1.0_ser_pretrained.tar
+# 下载SER XFUND 模型并解压
+wget https://paddleocr.bj.bcebos.com/ppstructure/models/vi_layoutxlm/ser_vi_layoutxlm_xfund_infer.tar && tar -xf ser_vi_layoutxlm_xfund_infer.tar
 cd ..
-
-python3 predict_system.py --model_name_or_path=kie/PP-Layout_v1.0_ser_pretrained/ \
-                          --mode=kie \
-                          --image_dir=kie/images/input/zh_val_0.jpg  \
-                          --vis_font_path=../doc/fonts/simfang.ttf
+python3 kie/predict_kie_token_ser.py \
+  --kie_algorithm=LayoutXLM \
+  --ser_model_dir=../inference/ser_vi_layoutxlm_xfund_infer \
+  --image_dir=./docs/kie/input/zh_val_42.jpg \
+  --ser_dict_path=../ppocr/utils/dict/kie_dict/xfund_class_list.txt \
+  --vis_font_path=../doc/fonts/simfang.ttf \
+  --ocr_order_method="tb-yx"
 ```
+
 运行完成后，每张图片会在`output`字段指定的目录下的`kie`目录下存放可视化之后的图片，图片名和输入图片名一致。