update

vance-coder · Apr 22, 2020 · 0b7d1bb · 0b7d1bb
1 parent 9f7862a
commit 0b7d1bb
Show file tree

Hide file tree

Showing 4 changed files with 66 additions and 8 deletions.
diff --git a/README.md b/README.md
@@ -1,2 +1,37 @@
 # OCR
 OCR demo
+
+#### 
+```bash
+# install the latest version of tesseract for MacOS
+brew install tesseract --head
+
+# install pytesseract via pip
+pip install pytesseract
+
+# download language models from https://github.com/tesseract-ocr/tessdata
+# Then move these models to /usr/local/share/tessdata/
+chi_sim.traineddata (Simplified Chinese)
+chi_tra.traineddata (Traditional Chinese)
+```
+
+```markdown
+OCR language:　识别图像中字体中的语言，在命令行和pytesseract，使用-l 选项
+OCR Engine Mode(oem):tesseract4有2个ocr引擎(legacy,lstm),用—oem选项去设置
+0 Legacy engine only.
+1 Neural nets LSTM engine only.
+2 Legacy + LSTM engines.
+3 Default, based on what is available.
+Page Segmentation Mode(psm): psm 或许是非常有用的，对于结构化文本有额外的信息对于python和命令行工具默认是3.
+0 只有方向和脚本检测（OSD）。
+1 使用OSD自动分页。
+2 自动分页，但没有OSD或OCR。
+3 全自动页面分割，但没有OSD。（默认）
+4 假设一列可变大小的文本。
+5 假设一个统一的垂直排列文本块。
+6 假设一个统一的文本块。
+7 将图像作为单个文本行处理。
+8 将图像视为一个单词。
+9 将图像视为一个圆圈中的单个单词。
+10 将图像视为单个字符。
+```
diff --git a/ctpn_detect_v1/demo.py b/ctpn_detect_v1/demo.py
@@ -22,7 +22,7 @@ def demo():
     '''
     result, img, angle = model.model(
         img, model='crnn', adjust=True, detectAngle=True)
-    print(result, img, angle)
+    # print(result, img, angle)
     print("It takes time:{}s".format(time.time() - t))
     print("---------------------------------------")
     for key in result:

diff --git a/ctpn_detect_v1/model.py b/ctpn_detect_v1/model.py
@@ -2,6 +2,7 @@
 ##添加文本方向 检测模型，自动检测文字方向，0、90、180、270
 import sys
 from math import *
+from concurrent.futures import ProcessPoolExecutor, as_completed
 
 import cv2
 import pytesseract
@@ -12,6 +13,10 @@
 from ctpn.text_detect import text_detect
 
 
+def img_to_string(image):
+    return pytesseract.image_to_string(image, config='-l eng+chi_sim --oem 3 --psm 3')
+
+
 def crnnRec(im, text_recs, ocrMode='keras', adjust=False):
     """
     crnn模型，ocr识别
@@ -21,7 +26,7 @@ def crnnRec(im, text_recs, ocrMode='keras', adjust=False):
     @@text_recs:text box
 
     """
-    index = 0
+    images = []
     results = {}
     xDim, yDim = im.shape[1], im.shape[0]
 
@@ -43,16 +48,30 @@ def crnnRec(im, text_recs, ocrMode='keras', adjust=False):
             pt3 = (min(rec[6], xDim - 2), min(yDim - 2, rec[7]))
             pt4 = (rec[4], rec[5])
 
-        degree = degrees(atan2(pt2[1] - pt1[1], pt2[0] - pt1[0]))  ##图像倾斜角度
+        degree = degrees(atan2(pt2[1] - pt1[1], pt2[0] - pt1[0]))  # 图像倾斜角度
 
         partImg = dumpRotateImage(im, degree, pt1, pt2, pt3, pt4)
         # 根据ctpn进行识别出的文字区域，进行不同文字区域的crnn识别
         image = Image.fromarray(partImg).convert('L')
+
+        # 图片的长宽如果小于30px，则按比例放大
+        w, h = image.size
+        factor = 30 / min(image.size)
+        if factor > 1:
+            print('turn size...')
+            image = image.resize((int(w * factor), int(h * factor)))
+
+        images.append(image)
         # image.save(f'./temp/{index}.png')
 
         # 进行识别出的文字识别
-        sim_pred = pytesseract.image_to_string(image, config='-l eng+chi_sim --oem 3 --psm 3')
-        results[index].append(sim_pred)
+        # sim_pred = pytesseract.image_to_string(image, config='-l eng+chi_sim --oem 3 --psm 3')
+        # results[index].append(sim_pred)
+
+    with ProcessPoolExecutor() as executor:
+        res = [executor.submit(img_to_string, img) for img in images]
+    for idx, r in enumerate(res):
+        results[idx].append(r.result())
 
     return results
 

diff --git a/tesseract_demo.py b/tesseract_demo.py
@@ -2,14 +2,18 @@
 from PIL import Image
 
 """
-tesseract delete alpha channel
+tesseract delete alpha channel 
 the best dpi for tesseract is at lest 300dpi
-image.convert("L")  
+image.convert("L")  Done 
 """
 
 
 def demo():
-    image = Image.open('/Users/liuliangjun/Downloads/test2.jpg')
+    image = Image.open('/Users/liuliangjun/Downloads/pic_3.png')
+    w, h = image.size
+    factor = 30 / min(image.size)
+    if factor > 1:
+        image = image.resize((int(w * factor), int(h * factor)))
 
     # default config --oem 3 --psm 3
     text = pytesseract.image_to_string(image, config='-l eng+chi_sim --oem 3 --psm 3')