Skip to content

Commit

Permalink
Use pydantic in rectangle
Browse files Browse the repository at this point in the history
  • Loading branch information
gabriel-piles committed Feb 4, 2025
1 parent 03a10ee commit baca720
Show file tree
Hide file tree
Showing 6 changed files with 46 additions and 37 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "pdf-document-layout-analysis"
version = "2025.02.04.01"
version = "2025.02.04.02"
description = "This tool is for PDF document layout analysis"
license = { file = "LICENSE" }
authors = [{ name = "HURIDOCS" }]
Expand Down
65 changes: 37 additions & 28 deletions src/pdf_features/Rectangle.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,18 @@
import sys

from lxml.etree import ElementBase
from pydantic import BaseModel

sys.path.append(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))


class Rectangle:
def __init__(self, left: int, top: int, right: int, bottom: int):
self.left = left
self.top = top
self.right = right
self.bottom = bottom
self.fix_wrong_areas()
self.width = self.right - self.left
self.height = self.bottom - self.top
class Rectangle(BaseModel):
left: int
top: int
right: int
bottom: int
width: int
height: int

@staticmethod
def from_poppler_tag_etree(tag: ElementBase) -> "Rectangle":
Expand All @@ -26,7 +25,7 @@ def from_poppler_tag_etree(tag: ElementBase) -> "Rectangle":
y_max = y_min + int(tag.attrib["height"])

if len(content) <= 1:
return Rectangle(x_min, y_min, x_max, y_max)
return Rectangle.from_coordinates(x_min, y_min, x_max, y_max)

one_character_length = max(int((x_max - x_min) / len(content)), 2)
if content[0] == " ":
Expand All @@ -35,22 +34,7 @@ def from_poppler_tag_etree(tag: ElementBase) -> "Rectangle":
if content[-1] == " ":
x_max -= one_character_length

return Rectangle(x_min, y_min, x_max, y_max)

def fix_wrong_areas(self):
if self.right == self.left:
self.left -= 1
self.right += 1

if self.top == self.bottom:
self.top -= 1
self.bottom += 1

if self.right < self.left:
self.right, self.left = self.left, self.right

if self.bottom < self.top:
self.top, self.bottom = self.bottom, self.top
return Rectangle.from_coordinates(x_min, y_min, x_max, y_max)

def get_intersection_percentage(self, rectangle: "Rectangle") -> float:
x1 = max(self.left, rectangle.left)
Expand Down Expand Up @@ -91,8 +75,33 @@ def merge_rectangles(rectangles: list["Rectangle"]) -> "Rectangle":
right = max([rectangle.right for rectangle in rectangles])
bottom = max([rectangle.bottom for rectangle in rectangles])

return Rectangle(left, top, right, bottom)
return Rectangle.from_coordinates(left, top, right, bottom)

@staticmethod
def from_width_height(left: int, top: int, width: int, height: int):
return Rectangle(left, top, left + width, top + height)
return Rectangle.from_coordinates(left, top, left + width, top + height)

@staticmethod
def from_coordinates(left: float, top: float, right: float, bottom: float):
left, top, right, bottom = Rectangle.fix_wrong_areas(left, top, right, bottom)
width = right - left
height = bottom - top
return Rectangle(left=left, top=top, right=right, bottom=bottom, width=width, height=height)

@staticmethod
def fix_wrong_areas(left: float, top: float, right: float, bottom: float):
if right == left:
left -= 1
right += 1

if top == bottom:
top -= 1
bottom += 1

if right < left:
right, left = left, right

if bottom < top:
top, bottom = bottom, top

return int(left), int(top), int(right), int(bottom)
2 changes: 1 addition & 1 deletion src/pdf_token_type_labels/Label.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ class Label(BaseModel):
metadata: str = ""

def intersection_percentage(self, token_bounding_box: Rectangle):
label_bounding_box = Rectangle(
label_bounding_box = Rectangle.from_coordinates(
left=self.left, top=self.top, right=self.left + self.width, bottom=self.top + self.height
)
return label_bounding_box.get_intersection_percentage(token_bounding_box)
Expand Down
2 changes: 1 addition & 1 deletion src/pdf_tokens_type_trainer/PdfTrainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def get_padding_token(segment_number: int, page_number: int):
"",
PdfFont("pad_font_id", False, False, 0.0, "#000000"),
segment_number,
Rectangle(0, 0, 0, 0),
Rectangle.from_coordinates(0, 0, 0, 0),
TokenType.TEXT,
)

Expand Down
4 changes: 2 additions & 2 deletions src/test_end_to_end.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,9 +269,9 @@ def test_ocr_english(self):
self.assertEqual(1, len(results_list))
self.assertEqual("Test text OCR", results_list[0]["text"])
self.assertEqual(248, results_list[0]["left"])
self.assertEqual(263, results_list[0]["top"])
self.assertEqual(264, results_list[0]["top"])
self.assertEqual(313, results_list[0]["width"])
self.assertEqual(52, results_list[0]["height"])
self.assertEqual(50, results_list[0]["height"])
self.assertEqual(1, results_list[0]["page_number"])
self.assertEqual(842, results_list[0]["page_width"])
self.assertEqual(595, results_list[0]["page_height"])
Expand Down
8 changes: 4 additions & 4 deletions src/vgt/create_word_grid.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,14 @@ def get_words_positions(text: str, rectangle: Rectangle):

width_per_letter = rectangle.width / text_len

words_bboxes = [Rectangle(rectangle.left, rectangle.top, rectangle.left + 5, rectangle.bottom)]
words_bboxes = [Rectangle.from_coordinates(rectangle.left, rectangle.top, rectangle.left + 5, rectangle.bottom)]
words_bboxes[-1].width = 0
words_bboxes[-1].right = words_bboxes[-1].left

for letter in text:
if letter == " ":
left = words_bboxes[-1].right + width_per_letter
words_bboxes.append(Rectangle(left, words_bboxes[-1].top, left + 5, words_bboxes[-1].bottom))
words_bboxes.append(Rectangle.from_coordinates(left, words_bboxes[-1].top, left + 5, words_bboxes[-1].bottom))
words_bboxes[-1].width = 0
words_bboxes[-1].right = words_bboxes[-1].left
else:
Expand All @@ -52,11 +52,11 @@ def get_subwords_positions(word: str, rectangle: Rectangle):
ids = [x[-2] for x in tokenizer(word_tokens)["input_ids"]]

right = rectangle.left + len(word_tokens[0]) * width_per_letter
bboxes = [Rectangle(rectangle.left, rectangle.top, right, rectangle.bottom)]
bboxes = [Rectangle.from_coordinates(rectangle.left, rectangle.top, right, rectangle.bottom)]

for subword in word_tokens[1:]:
right = bboxes[-1].right + len(subword) * width_per_letter
bboxes.append(Rectangle(bboxes[-1].right, rectangle.top, right, rectangle.bottom))
bboxes.append(Rectangle.from_coordinates(bboxes[-1].right, rectangle.top, right, rectangle.bottom))

return ids, bboxes

Expand Down

0 comments on commit baca720

Please sign in to comment.