-
Notifications
You must be signed in to change notification settings - Fork 0
/
fitz_blocks_rect_drawer.py
34 lines (30 loc) · 1.02 KB
/
fitz_blocks_rect_drawer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
import cv2
import os
import fitz
pdf_path = '/mnt/D/текст_отчета_14-7.pdf'
rect_dir = '/mnt/D/rect_images/'
image_path = '/mnt/D/текст_отчета_14-7.png'
def extract_data(pdf_path: str):
doc = fitz.open(pdf_path)
page = doc.load_page(0)
text_list = page.get_text("blocks")
doc.close()
return text_list
def draw_rects(image_path: str, rect_dir: str, text_list: list):
print('img path', image_path)
img = cv2.imread(image_path)
print(text_list)
# -1 to skip page image
for i in range(len(text_list)-1):
p0,p1 = (int(text_list[i][0]), int(text_list[i][1])), (int(text_list[i][2]), int(text_list[i][3]))
text = text_list[i][4]
print(p0,p1)
# print(text)
cv2.rectangle(img, p0, p1, (0, 255, 0), thickness=3)
_, tail = os.path.split(image_path)
output_path = os.path.join(rect_dir,tail)
print(output_path)
cv2.imwrite(output_path, img)
text_list = extract_data(pdf_path)
print(text_list)
draw_rects(image_path, rect_dir, text_list)