From b05e75699014d4feeeb12ede82478eba7bce91f6 Mon Sep 17 00:00:00 2001 From: Steven Maude Date: Fri, 21 Jul 2023 16:22:32 +0000 Subject: [PATCH 1/6] Fix build for poppler-21.11.0 See: * https://gitlab.freedesktop.org/poppler/poppler/-/commit/609992087a1e9ba85e24e76f59235b06149c7354 * https://gitlab.freedesktop.org/poppler/poppler/-/commit/94d9d102f77312d808b7b65112d0a530a7ca4a12 --- src/main.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main.cpp b/src/main.cpp index 8887bec..bde6422 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -140,7 +140,7 @@ static std::string fmt(const Object &o, const UnicodeMap *uMap) { char buf[9]; Unicode *u; - auto len = TextStringToUCS4(s, &u); + auto len = TextStringToUCS4(s->toStr(), &u); std::string out; out.reserve(static_cast(len)); @@ -179,7 +179,7 @@ void dump_font_info(PDFDoc *doc) { packer.pack(fontTypeNames[font->getType()]); packer.pack("Encoding"); - packer.pack(font->getEncoding()->toStr()); + packer.pack(font->getEncoding()); packer.pack("Embedded"); packer.pack(font->getEmbedded()); From 07393c060e5a8fedff6cff02fb4878e39a34e295 Mon Sep 17 00:00:00 2001 From: Steven Maude Date: Fri, 21 Jul 2023 16:33:56 +0000 Subject: [PATCH 2/6] Fix build for poppler-21.12.0 --- src/main.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/main.cpp b/src/main.cpp index bde6422..e75d6ba 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -517,8 +517,10 @@ void dump_document(PDFDoc *doc, const Options &options) { } BaseStream *open_file(const std::string filename) { - GooString goo_filename(filename.c_str()); - auto file = GooFile::open(&goo_filename); + // Original + // GooString goo_filename(filename.c_str()); + // auto file = GooFile::open(&goo_filename); + auto file = GooFile::open(filename); if (file == NULL) { std::cerr << "Failed to open " << filename << std::endl; exit(5); From cef3e95f428c74535448d7372b027bc9373a5b92 Mon Sep 17 00:00:00 2001 From: Steven Maude Date: Tue, 25 Oct 2022 22:50:34 +0100 Subject: [PATCH 3/6] Use `c++17` standard Poppler changed to this as of 21.12.0. --- wscript | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/wscript b/wscript index 3cadc0d..854107c 100644 --- a/wscript +++ b/wscript @@ -37,14 +37,14 @@ def options(opt): def configure(ctx): ctx.load('compiler_cxx') - ctx.check(features='cxx cxxprogram', cxxflags="--std=c++14") + ctx.check(features='cxx cxxprogram', cxxflags="--std=c++17") ctx.env.append_value("CXXFLAGS", [ "-g", "-Wall", "-Werror", "-ansi", - "--std=c++14", + "--std=c++17", "-DMSGPACK_NO_BOOST", ]) From 80455cf8fb3c4ada9cb5aefc3eafd8fb5da91b28 Mon Sep 17 00:00:00 2001 From: Steven Maude Date: Mon, 27 Nov 2023 11:13:39 +0000 Subject: [PATCH 4/6] Update Poppler to 22.03.0 This uses the sensiblecodeio fork with two reverted commits: * right-to-left text ordering * rotated text handling Both of these change outputs of pdf2msgpack substantially. They can be reviewed later to see if the pdf2msgpack code can be changed to retain this existing behaviour, while removing the reverts. --- vendor/anongit.freedesktop.org/git/poppler/poppler.git | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vendor/anongit.freedesktop.org/git/poppler/poppler.git b/vendor/anongit.freedesktop.org/git/poppler/poppler.git index 2886ada..3654607 160000 --- a/vendor/anongit.freedesktop.org/git/poppler/poppler.git +++ b/vendor/anongit.freedesktop.org/git/poppler/poppler.git @@ -1 +1 @@ -Subproject commit 2886ada73fdb6cb02acaa95da747106d61a8aa2d +Subproject commit 3654607528517e511d42345d8cdfcf698ba8f004 From d65ef22f0af366d6cb94fe08c9d0685565278294 Mon Sep 17 00:00:00 2001 From: Steven Maude Date: Fri, 21 Jul 2023 16:59:51 +0000 Subject: [PATCH 5/6] Fix build for poppler-22.03.0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `std::optional` for font name: https://gitlab.freedesktop.org/poppler/poppler/-/commit/996dfb015f5567cdaf191c127c2cf804f852d80b The C++ 17 standard allows checking this with `value_or`, so we don't need the `?` if…then…else construct. `Catalog::embeddedFile`: https://gitlab.freedesktop.org/poppler/poppler/-/commit/beb5519f723cd84d8d4afa989118df5fffbe50fb?merge_request_iid=1064 --- src/main.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main.cpp b/src/main.cpp index e75d6ba..a8e5928 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -173,7 +173,7 @@ void dump_font_info(PDFDoc *doc) { packer.pack_map(6); packer.pack("Name"); - packer.pack(font->getName() ? font->getName()->toStr() : "[none]"); + packer.pack(font->getName().value_or("[none]")); packer.pack("Type"); packer.pack(fontTypeNames[font->getType()]); @@ -239,7 +239,7 @@ void dump_meta_xfa(Catalog *catalog, const UnicodeMap *uMap) { void dump_meta_embedded_files(Catalog *catalog) { packer.pack_array(catalog->numEmbeddedFiles()); for (int i = 0; i < catalog->numEmbeddedFiles(); i++) { - FileSpec *spec = catalog->embeddedFile(i); + std::unique_ptr spec = catalog->embeddedFile(i); EmbFile *file = spec->getEmbeddedFile(); packer.pack_array(6); From e7ebe4e4db61ab74c0104936ade0b875bf72d4b5 Mon Sep 17 00:00:00 2001 From: Steven Maude Date: Mon, 27 Nov 2023 15:02:38 +0000 Subject: [PATCH 6/6] Override Poppler column spacing The default value was changed in Poppler commit f20d9e5f739b7c8dce74ebc60a6dd1e06106c12e. It is overridable, so we should change it back to what it was for now, to keep outputs unchanged. --- src/main.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/main.cpp b/src/main.cpp index a8e5928..0d110c5 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -320,6 +320,9 @@ typedef std::unique_ptr TextPagePtr; TextPagePtr page_to_text_page(Page *page) { auto dev = std::make_unique(nullptr, true, 0, false, false); + // This value was changed in Poppler commit f20d9e5f739b7c8dce74ebc60a6dd1e06106c12e + // We retain the original value to keep outputs unchanged for now. + dev->setMinColSpacing1(0.3); auto gfx = std::unique_ptr( page->createGfx(dev.get(), 72.0, 72.0, 0, false, /* useMediaBox */