Skip to content

Commit

Permalink
sanitise loggers: no points, but warnings
Browse files Browse the repository at this point in the history
  • Loading branch information
bertsky committed May 6, 2024
1 parent eb3b635 commit a290322
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 15 deletions.
6 changes: 3 additions & 3 deletions ocrd_tesserocr/crop.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,17 +140,17 @@ def estimate_bounds(self, page, page_image, tessapi, zoom=1.0):
bin_bbox = image.getbbox()
if not bin_bbox:
# this does happen!
self.logger.info("Ignoring region '%s' because its binarization is empty", ID)
self.logger.warning("Ignoring region '%s' because its binarization is empty", ID)
continue
width = bin_bbox[2]-bin_bbox[0]
if width < 25 / zoom:
# we must be conservative here: page numbers are tiny regions, too!
self.logger.info("Ignoring region '%s' because its width is too small (%d)", ID, width)
self.logger.warning("Ignoring region '%s' because its width is too small (%d)", ID, width)
continue
height = bin_bbox[3]-bin_bbox[1]
if height < 25 / zoom:
# we must be conservative here: page numbers are tiny regions, too!
self.logger.debug("Ignoring region '%s' because its height is too small (%d)", ID, height)
self.logger.warning("Ignoring region '%s' because its height is too small (%d)", ID, height)
continue
all_left = min(all_left, left)
all_top = min(all_top, top)
Expand Down
23 changes: 11 additions & 12 deletions ocrd_tesserocr/recognize.py
Original file line number Diff line number Diff line change
Expand Up @@ -534,7 +534,7 @@ def _process_regions_in_page(self, result_it, page, page_coords, mapping, dpi):
coords = CoordsType(points=points)
# plausibilise candidate
if polygon2 is None:
self.logger.info('Ignoring extant region: %s', points)
self.logger.warning('Ignoring extant region: %s', points)
continue
block_type = it.BlockType()
if block_type in [
Expand All @@ -548,20 +548,19 @@ def _process_regions_in_page(self, result_it, page, page_coords, mapping, dpi):
PT.TABLE] and (
xywh['w'] < 20 / 300.0*(dpi or 300) or
xywh['h'] < 10 / 300.0*(dpi or 300)):
self.logger.info('Ignoring too small region: %s', points)
self.logger.warning('Ignoring too small region: %s', points)
continue
region_image_bin = it.GetBinaryImage(RIL.BLOCK)
if not region_image_bin or not region_image_bin.getbbox():
self.logger.info('Ignoring binary-empty region: %s', points)
self.logger.warning('Ignoring binary-empty region: %s', points)
continue
#
# keep and annotate new region
ID = "region%04d" % index
#
# region type switch
block_type = it.BlockType()
self.logger.info("Detected region '%s': %s (%s)",
ID, points, membername(PT, block_type))
self.logger.info("Detected region '%s' (%s)", ID, membername(PT, block_type))
if block_type in [PT.FLOWING_TEXT,
PT.HEADING_TEXT,
PT.PULLOUT_TEXT,
Expand Down Expand Up @@ -671,10 +670,10 @@ def _process_cells_in_table(self, result_it, region, rogroup, page_coords, mappi
points = points_from_polygon(polygon)
coords = CoordsType(points=points)
if polygon2 is None:
self.logger.info('Ignoring extant cell: %s', points)
self.logger.warning('Ignoring extant cell: %s', points)
continue
ID = region.id + "_cell%04d" % index
self.logger.info("Detected cell '%s': %s", ID, points)
self.logger.info("Detected cell '%s'", ID)
cell = TextRegionType(id=ID, Coords=coords)
region.add_TextRegion(cell)
self._add_orientation(it, cell, page_coords)
Expand Down Expand Up @@ -720,10 +719,10 @@ def _process_lines_in_region(self, result_it, region, page_coords, mapping, pare
points = points_from_polygon(polygon)
coords = CoordsType(points=points)
if polygon2 is None:
self.logger.info('Ignoring extant line: %s', points)
self.logger.warning('Ignoring extant line: %s', points)
continue
ID = region.id + "_line%04d" % index
self.logger.info("Detected line '%s': %s", ID, points)
self.logger.info("Detected line '%s'", ID)
line = TextLineType(id=ID, Coords=coords)
region.add_TextLine(line)
if self.parameter['textequiv_level'] != 'line':
Expand Down Expand Up @@ -755,7 +754,7 @@ def _process_words_in_line(self, result_it, line, coords, mapping):
polygon = polygon2
points = points_from_polygon(polygon)
if polygon2 is None:
self.logger.info('Ignoring extant word: %s', points)
self.logger.warning('Ignoring extant word: %s', points)
continue
ID = line.id + "_word%04d" % index
self.logger.debug("Detected word '%s': %s", ID, points)
Expand All @@ -779,7 +778,7 @@ def _process_glyphs_in_word(self, result_it, word, coords, mapping):
polygon = polygon2
points = points_from_polygon(polygon)
if polygon2 is None:
self.logger.info('Ignoring extant glyph: %s', points)
self.logger.warning('Ignoring extant glyph: %s', points)
continue
ID = word.id + '_glyph%04d' % index
#self.logger.debug("Detected glyph '%s': %s", ID, points)
Expand Down Expand Up @@ -1142,7 +1141,7 @@ def _add_orientation(self, result_it, region, coords):
# defined as 'how many radians does one have to rotate the block anti-clockwise'
# i.e. positive amount to be applied counter-clockwise for deskewing:
deskew_angle *= 180 / math.pi
self.logger.debug('orientation/deskewing for %s: %s / %s / %s / %.3f°', region.id,
self.logger.info('orientation/deskewing for %s: %s / %s / %s / %.3f°', region.id,
membername(Orientation, orientation),
membername(WritingDirection, writing_direction),
membername(TextlineOrder, textline_order),
Expand Down

0 comments on commit a290322

Please sign in to comment.