Skip to content

Commit

Permalink
tests/test_general.py: test_2553(): cope with mupdf 1.23.4 and earlier.
Browse files Browse the repository at this point in the history
mupdf-1.23.4 gives us orphaned surrogate characters, so we need to change
whether we expect 0xFFFD replacements depending on the mupdf version.

Also print out the various lists we are testing, useful for debugging.
  • Loading branch information
julian-smith-artifex-com committed Oct 26, 2023
1 parent 7c0f95e commit 676b3a7
Showing 1 changed file with 31 additions and 8 deletions.
39 changes: 31 additions & 8 deletions tests/test_general.py
Original file line number Diff line number Diff line change
Expand Up @@ -572,20 +572,43 @@ def test_2553():
page = doc[0]

# extract plain text, build set of all characters
set1 = set(page.get_text())
list1 = page.get_text()
set1 = set(list1)

# extract text blocks, build set of all characters
set2 = set(page.get_text(sort=True)) # internally uses "blocks"

list2 = page.get_text(sort=True) # internally uses "blocks"
set2 = set(list2)

# extract textbox content, build set of all characters
set3 = set(page.get_textbox(page.rect))

list3 = page.get_textbox(page.rect)
set3 = set(list3)

def show(l):
ret = f'len={len(l)}\n'
for c in l:
cc = ord(c)
if (cc >= 32 and cc < 127) or c == '\n':
ret += c
else:
ret += f' [0x{hex(cc)}]'
return ret
print(f'list1:\n{show(list1)}')
print(f'list2:\n{show(list2)}')
print(f'list3:\n{show(list3)}')

# all sets must be equal
assert set1 == set2
assert set1 == set3

# this special page contains no invalid Unicodes!
assert chr(0xFFFD) not in set1
# With mupdf later than 1.23.4, this special page contains no invalid
# Unicodes.
#
if fitz.mupdf_version_tuple > (1, 23, 4):
print(f'Checking no occurrence of 0xFFFD, {fitz.mupdf_version_tuple=}.')
assert chr(0xFFFD) not in set1
else:
print(f'Checking occurrence of 0xFFFD, {fitz.mupdf_version_tuple=}.')
assert chr(0xFFFD) in set1


def test_2635():
Expand All @@ -596,4 +619,4 @@ def test_2635():

page.clean_contents() # clean page
pix2 = page.get_pixmap() # pixmap after cleaning
assert pix1.samples == pix2.samples # assert equality
assert pix1.samples == pix2.samples # assert equality

0 comments on commit 676b3a7

Please sign in to comment.