From c7a9582eab34bc46b74c245aa137424b9cfaee74 Mon Sep 17 00:00:00 2001 From: Tom Flanagan Date: Tue, 19 Nov 2024 20:40:14 -0800 Subject: [PATCH 1/5] Fix file position desync when calling into libtiff --- Tests/test_libtiff_file_position.py | 23 +++++++++++++++++++++++ src/PIL/TiffImagePlugin.py | 17 +++++++++-------- 2 files changed, 32 insertions(+), 8 deletions(-) create mode 100644 Tests/test_libtiff_file_position.py diff --git a/Tests/test_libtiff_file_position.py b/Tests/test_libtiff_file_position.py new file mode 100644 index 00000000000..d614ea8eca1 --- /dev/null +++ b/Tests/test_libtiff_file_position.py @@ -0,0 +1,23 @@ +from __future__ import annotations + + +import pytest + +from PIL import Image + + +@pytest.mark.parametrize('test_file', [ + 'Tests/images/old-style-jpeg-compression-no-samplesperpixel.tif', + 'Tests/images/old-style-jpeg-compression.tif', +]) +def test_libtiff_exif_loading(test_file) -> None: + # loading image before exif + im1 = Image.open(open(test_file, 'rb', buffering=1048576)) + im1.load() + exif1 = dict(im1.getexif()) + + # loading exif before image + im2 = Image.open(open(test_file, 'rb', buffering=1048576)) + exif2 = dict(im2.getexif()) + + assert exif1 == exif2 diff --git a/src/PIL/TiffImagePlugin.py b/src/PIL/TiffImagePlugin.py index 6bf39b75a5f..72628677278 100644 --- a/src/PIL/TiffImagePlugin.py +++ b/src/PIL/TiffImagePlugin.py @@ -1216,10 +1216,6 @@ def seek(self, frame: int) -> None: def _seek(self, frame: int) -> None: self.fp = self._fp - # reset buffered io handle in case fp - # was passed to libtiff, invalidating the buffer - self.fp.tell() - while len(self._frame_pos) <= frame: if not self.__next: msg = "no more images in TIFF file" @@ -1303,10 +1299,6 @@ def load_end(self) -> None: if not self.is_animated: self._close_exclusive_fp_after_loading = True - # reset buffered io handle in case fp - # was passed to libtiff, invalidating the buffer - self.fp.tell() - # load IFD data from fp before it is closed exif = self.getexif() for key in TiffTags.TAGS_V2_GROUPS: @@ -1381,8 +1373,17 @@ def _load_libtiff(self) -> Image.core.PixelAccess | None: logger.debug("have fileno, calling fileno version of the decoder.") if not close_self_fp: self.fp.seek(0) + # Save and restore the file position, because libtiff will move it + # outside of the python runtime, and that will confuse + # io.BufferedReader and possible others. + # NOTE: This must use os.lseek(), and not fp.tell()/fp.seek(), + # because the buffer read head already may not equal the actual + # file position, and fp.seek() may just adjust it's internal + # pointer and not actually seek the OS file handle. + pos = os.lseek(fp, 0, os.SEEK_CUR) # 4 bytes, otherwise the trace might error out n, err = decoder.decode(b"fpfp") + os.lseek(fp, pos, os.SEEK_SET) else: # we have something else. logger.debug("don't have fileno or getvalue. just reading") From 44cc0bebb8998897f03d77479df750f208d9d00e Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 20 Nov 2024 05:52:50 +0000 Subject: [PATCH 2/5] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- Tests/test_libtiff_file_position.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/Tests/test_libtiff_file_position.py b/Tests/test_libtiff_file_position.py index d614ea8eca1..43ed82c30e1 100644 --- a/Tests/test_libtiff_file_position.py +++ b/Tests/test_libtiff_file_position.py @@ -1,23 +1,25 @@ from __future__ import annotations - import pytest from PIL import Image -@pytest.mark.parametrize('test_file', [ - 'Tests/images/old-style-jpeg-compression-no-samplesperpixel.tif', - 'Tests/images/old-style-jpeg-compression.tif', -]) +@pytest.mark.parametrize( + "test_file", + [ + "Tests/images/old-style-jpeg-compression-no-samplesperpixel.tif", + "Tests/images/old-style-jpeg-compression.tif", + ], +) def test_libtiff_exif_loading(test_file) -> None: # loading image before exif - im1 = Image.open(open(test_file, 'rb', buffering=1048576)) + im1 = Image.open(open(test_file, "rb", buffering=1048576)) im1.load() exif1 = dict(im1.getexif()) # loading exif before image - im2 = Image.open(open(test_file, 'rb', buffering=1048576)) + im2 = Image.open(open(test_file, "rb", buffering=1048576)) exif2 = dict(im2.getexif()) assert exif1 == exif2 From cb1653f6272ebc543530ae03cd7cf8577ed13231 Mon Sep 17 00:00:00 2001 From: Andrew Murray Date: Wed, 20 Nov 2024 22:33:23 +1100 Subject: [PATCH 3/5] Updated comment --- src/PIL/TiffImagePlugin.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/PIL/TiffImagePlugin.py b/src/PIL/TiffImagePlugin.py index 72628677278..08bb7891512 100644 --- a/src/PIL/TiffImagePlugin.py +++ b/src/PIL/TiffImagePlugin.py @@ -1374,7 +1374,7 @@ def _load_libtiff(self) -> Image.core.PixelAccess | None: if not close_self_fp: self.fp.seek(0) # Save and restore the file position, because libtiff will move it - # outside of the python runtime, and that will confuse + # outside of the Python runtime, and that will confuse # io.BufferedReader and possible others. # NOTE: This must use os.lseek(), and not fp.tell()/fp.seek(), # because the buffer read head already may not equal the actual From 925db4552658ac0d8c037b0ac0b39bd7eb96f8f9 Mon Sep 17 00:00:00 2001 From: Andrew Murray Date: Wed, 20 Nov 2024 22:40:29 +1100 Subject: [PATCH 4/5] Moved test --- Tests/test_file_libtiff.py | 19 +++++++++++++++++++ Tests/test_libtiff_file_position.py | 25 ------------------------- 2 files changed, 19 insertions(+), 25 deletions(-) delete mode 100644 Tests/test_libtiff_file_position.py diff --git a/Tests/test_file_libtiff.py b/Tests/test_file_libtiff.py index 62f8719af53..9c49b1534ed 100644 --- a/Tests/test_file_libtiff.py +++ b/Tests/test_file_libtiff.py @@ -1098,6 +1098,25 @@ def test_exif_transpose(self) -> None: assert_image_similar(base_im, im, 0.7) + @pytest.mark.parametrize( + "test_file", + [ + "Tests/images/old-style-jpeg-compression-no-samplesperpixel.tif", + "Tests/images/old-style-jpeg-compression.tif", + ], + ) + def test_buffering(self, test_file: str) -> None: + # load exif first + with Image.open(open(test_file, "rb", buffering=1048576)) as im: + exif = dict(im.getexif()) + + # load image before exif + with Image.open(open(test_file, "rb", buffering=1048576)) as im2: + im2.load() + exif_after_load = dict(im2.getexif()) + + assert exif == exif_after_load + @pytest.mark.valgrind_known_error(reason="Backtrace in Python Core") def test_sampleformat_not_corrupted(self) -> None: # Assert that a TIFF image with SampleFormat=UINT tag is not corrupted diff --git a/Tests/test_libtiff_file_position.py b/Tests/test_libtiff_file_position.py deleted file mode 100644 index 43ed82c30e1..00000000000 --- a/Tests/test_libtiff_file_position.py +++ /dev/null @@ -1,25 +0,0 @@ -from __future__ import annotations - -import pytest - -from PIL import Image - - -@pytest.mark.parametrize( - "test_file", - [ - "Tests/images/old-style-jpeg-compression-no-samplesperpixel.tif", - "Tests/images/old-style-jpeg-compression.tif", - ], -) -def test_libtiff_exif_loading(test_file) -> None: - # loading image before exif - im1 = Image.open(open(test_file, "rb", buffering=1048576)) - im1.load() - exif1 = dict(im1.getexif()) - - # loading exif before image - im2 = Image.open(open(test_file, "rb", buffering=1048576)) - exif2 = dict(im2.getexif()) - - assert exif1 == exif2 From 82dfbc35b190a818a1cbdbee6524eaf31605b7bd Mon Sep 17 00:00:00 2001 From: Tom Flanagan Date: Wed, 20 Nov 2024 05:21:14 -0800 Subject: [PATCH 5/5] Update src/PIL/TiffImagePlugin.py Co-authored-by: Andrew Murray <3112309+radarhere@users.noreply.github.com> --- src/PIL/TiffImagePlugin.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/PIL/TiffImagePlugin.py b/src/PIL/TiffImagePlugin.py index 72628677278..08bb7891512 100644 --- a/src/PIL/TiffImagePlugin.py +++ b/src/PIL/TiffImagePlugin.py @@ -1374,7 +1374,7 @@ def _load_libtiff(self) -> Image.core.PixelAccess | None: if not close_self_fp: self.fp.seek(0) # Save and restore the file position, because libtiff will move it - # outside of the python runtime, and that will confuse + # outside of the Python runtime, and that will confuse # io.BufferedReader and possible others. # NOTE: This must use os.lseek(), and not fp.tell()/fp.seek(), # because the buffer read head already may not equal the actual