From 60aa62011505efeeaf36d8983188044e5134edbf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micka=C3=ABl=20Menu?= Date: Mon, 18 Nov 2024 15:18:40 +0100 Subject: [PATCH] Extract the table of contents from an LCP-protected PDF (#480) --- CHANGELOG.md | 13 +++- .../EPUB/EPUBNavigatorViewController.swift | 16 +++-- Sources/Shared/Publication/Publication.swift | 4 +- .../Services/Search/StringSearchService.swift | 2 +- .../TableOfContentsService.swift | 39 ++++++++++++ .../LCPDFTableOfContentsService.swift | 62 +++++++++++++++++++ .../Parser/Readium/ReadiumWebPubParser.swift | 1 + Support/Carthage/.xcodegen | 3 + .../Readium.xcodeproj/project.pbxproj | 16 +++++ .../Common/Outline/OutlineTableView.swift | 31 ++++++---- 10 files changed, 165 insertions(+), 22 deletions(-) create mode 100644 Sources/Shared/Publication/Services/Table Of Contents/TableOfContentsService.swift create mode 100644 Sources/Streamer/Parser/PDF/Services/LCPDFTableOfContentsService.swift diff --git a/CHANGELOG.md b/CHANGELOG.md index 17eea7dfe..e46081afb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,7 +4,18 @@ All notable changes to this project will be documented in this file. Take a look **Warning:** Features marked as *alpha* may change or be removed in a future release without notice. Use with caution. - +## [Unreleased] + +### Added + +#### Shared + +* `TableOfContentsService` can now be used to customize the computation of `publication.tableOfContents()`. + +#### LCP + +* The table of contents of an LCP-protected PDF is now extracted directly from the PDF if the `tableOfContents` property in `manifest.json` is empty. + ## [3.0.0-alpha.3] diff --git a/Sources/Navigator/EPUB/EPUBNavigatorViewController.swift b/Sources/Navigator/EPUB/EPUBNavigatorViewController.swift index 249d2bd88..104ba6b65 100644 --- a/Sources/Navigator/EPUB/EPUBNavigatorViewController.swift +++ b/Sources/Navigator/EPUB/EPUBNavigatorViewController.swift @@ -449,7 +449,9 @@ open class EPUBNavigatorViewController: UIViewController, } /// Mapping between reading order hrefs and the table of contents title. - private lazy var tableOfContentsTitleByHref: [AnyURL: String] = { + private lazy var tableOfContentsTitleByHref = memoize(computeTableOfContentsTitleByHref) + + private func computeTableOfContentsTitleByHref() async -> [AnyURL: String] { func fulfill(linkList: [Link]) -> [AnyURL: String] { var result = [AnyURL: String]() @@ -465,8 +467,12 @@ open class EPUBNavigatorViewController: UIViewController, return result } - return fulfill(linkList: publication.tableOfContents) - }() + guard let toc = try? await publication.tableOfContents().get() else { + return [:] + } + + return fulfill(linkList: toc) + } /// Goes to the next or previous page in the given scroll direction. private func go(to direction: EPUBSpreadView.Direction, options: NavigatorGoOptions) async -> Bool { @@ -642,8 +648,8 @@ open class EPUBNavigatorViewController: UIViewController, { // Gets the current locator from the positionList, and fill its missing data. let positionIndex = Int(ceil(progression * Double(positionList.count - 1))) - return positionList[positionIndex].copy( - title: tableOfContentsTitleByHref[equivalent: href], + return await positionList[positionIndex].copy( + title: tableOfContentsTitleByHref()[equivalent: href], locations: { $0.progression = progression } ) } else { diff --git a/Sources/Shared/Publication/Publication.swift b/Sources/Shared/Publication/Publication.swift index cb2fa1485..0c91ff6ae 100644 --- a/Sources/Shared/Publication/Publication.swift +++ b/Sources/Shared/Publication/Publication.swift @@ -9,7 +9,7 @@ import Foundation /// Shared model for a Readium Publication. public class Publication: Closeable, Loggable { - private var manifest: Manifest + public var manifest: Manifest private let container: Container private let services: [PublicationService] @@ -20,8 +20,6 @@ public class Publication: Closeable, Loggable { public var readingOrder: [Link] { manifest.readingOrder } /// Identifies resources that are necessary for rendering the publication. public var resources: [Link] { manifest.resources } - /// Identifies the collection that contains a table of contents. - public var tableOfContents: [Link] { manifest.tableOfContents } public var subcollections: [String: [PublicationCollection]] { manifest.subcollections } public init( diff --git a/Sources/Shared/Publication/Services/Search/StringSearchService.swift b/Sources/Shared/Publication/Services/Search/StringSearchService.swift index c350bcaf2..ea626faa8 100644 --- a/Sources/Shared/Publication/Services/Search/StringSearchService.swift +++ b/Sources/Shared/Publication/Services/Search/StringSearchService.swift @@ -143,7 +143,7 @@ public class StringSearchService: SearchService { return [] } - let title = publication.tableOfContents.titleMatchingHREF(link.href) + let title = await publication.tableOfContents().getOrNil()?.titleMatchingHREF(link.href) resourceLocator = resourceLocator.copy( title: Optional(title ?? link.title) ) diff --git a/Sources/Shared/Publication/Services/Table Of Contents/TableOfContentsService.swift b/Sources/Shared/Publication/Services/Table Of Contents/TableOfContentsService.swift new file mode 100644 index 000000000..e2bc6342a --- /dev/null +++ b/Sources/Shared/Publication/Services/Table Of Contents/TableOfContentsService.swift @@ -0,0 +1,39 @@ +// +// Copyright 2024 Readium Foundation. All rights reserved. +// Use of this source code is governed by the BSD-style license +// available in the top-level LICENSE file of the project. +// + +import Foundation + +public typealias TableOfContentsServiceFactory = (PublicationServiceContext) -> TableOfContentsService? + +/// Returns or computes a table of contents for the publication. +public protocol TableOfContentsService: PublicationService { + func tableOfContents() async -> ReadResult<[Link]> +} + +// MARK: Publication Helpers + +public extension Publication { + /// Returns the table of contents for this publication. + func tableOfContents() async -> ReadResult<[Link]> { + if let service = findService(TableOfContentsService.self) { + return await service.tableOfContents() + } else { + return .success(manifest.tableOfContents) + } + } +} + +// MARK: PublicationServicesBuilder Helpers + +public extension PublicationServicesBuilder { + mutating func setTableOfContentsServiceFactory(_ factory: TableOfContentsServiceFactory?) { + if let factory = factory { + set(TableOfContentsService.self, factory) + } else { + remove(TableOfContentsService.self) + } + } +} diff --git a/Sources/Streamer/Parser/PDF/Services/LCPDFTableOfContentsService.swift b/Sources/Streamer/Parser/PDF/Services/LCPDFTableOfContentsService.swift new file mode 100644 index 000000000..d2ab388cd --- /dev/null +++ b/Sources/Streamer/Parser/PDF/Services/LCPDFTableOfContentsService.swift @@ -0,0 +1,62 @@ +// +// Copyright 2024 Readium Foundation. All rights reserved. +// Use of this source code is governed by the BSD-style license +// available in the top-level LICENSE file of the project. +// + +import Foundation +import ReadiumInternal +import ReadiumShared + +/// This ``TableOfContentsService`` will load the table of contents of the +/// single PDF resource in an LCPDF package, if the table of contents is missing +/// from the `manifest.json` file. +final class LCPDFTableOfContentsService: TableOfContentsService, PDFPublicationService, Loggable { + private let manifest: Manifest + private let container: Container + var pdfFactory: PDFDocumentFactory + + init( + manifest: Manifest, + container: Container, + pdfFactory: PDFDocumentFactory + ) { + self.manifest = manifest + self.container = container + self.pdfFactory = pdfFactory + } + + func tableOfContents() async -> ReadResult<[Link]> { + await _tableOfContents() + } + + private lazy var _tableOfContents = memoize(makeTableOfContents) + + private func makeTableOfContents() async -> ReadResult<[Link]> { + guard + manifest.tableOfContents.isEmpty, + manifest.readingOrder.count == 1, + let url = manifest.readingOrder.first?.url(), + let resource = container[url] + else { + return .success(manifest.tableOfContents) + } + + do { + let toc = try await pdfFactory.open(resource: resource, at: url, password: nil).tableOfContents() + return .success(toc.linksWithDocumentHREF(url)) + } catch { + return .failure(.decoding(error)) + } + } + + static func makeFactory(pdfFactory: PDFDocumentFactory) -> (PublicationServiceContext) -> LCPDFTableOfContentsService? { + { context in + LCPDFTableOfContentsService( + manifest: context.manifest, + container: context.container, + pdfFactory: pdfFactory + ) + } + } +} diff --git a/Sources/Streamer/Parser/Readium/ReadiumWebPubParser.swift b/Sources/Streamer/Parser/Readium/ReadiumWebPubParser.swift index 791126c63..92d181978 100644 --- a/Sources/Streamer/Parser/Readium/ReadiumWebPubParser.swift +++ b/Sources/Streamer/Parser/Readium/ReadiumWebPubParser.swift @@ -114,6 +114,7 @@ public class ReadiumWebPubParser: PublicationParser, Loggable { $0.setLocatorServiceFactory(AudioLocatorService.makeFactory()) } else if manifest.conforms(to: .pdf), format.conformsTo(.lcp), let pdfFactory = pdfFactory { + $0.setTableOfContentsServiceFactory(LCPDFTableOfContentsService.makeFactory(pdfFactory: pdfFactory)) $0.setPositionsServiceFactory(LCPDFPositionsService.makeFactory(pdfFactory: pdfFactory)) } diff --git a/Support/Carthage/.xcodegen b/Support/Carthage/.xcodegen index 3756e3f21..b852e6146 100644 --- a/Support/Carthage/.xcodegen +++ b/Support/Carthage/.xcodegen @@ -13841,6 +13841,8 @@ ../../Sources/Shared/Publication/Services/Search ../../Sources/Shared/Publication/Services/Search/SearchService.swift ../../Sources/Shared/Publication/Services/Search/StringSearchService.swift +../../Sources/Shared/Publication/Services/Table Of Contents +../../Sources/Shared/Publication/Services/Table Of Contents/TableOfContentsService.swift ../../Sources/Shared/Publication/Subject.swift ../../Sources/Shared/Publication/User Settings ../../Sources/Shared/Publication/User Settings/UserProperties.swift @@ -14011,6 +14013,7 @@ ../../Sources/Streamer/Parser/PDF/PDFParser.swift ../../Sources/Streamer/Parser/PDF/Services ../../Sources/Streamer/Parser/PDF/Services/LCPDFPositionsService.swift +../../Sources/Streamer/Parser/PDF/Services/LCPDFTableOfContentsService.swift ../../Sources/Streamer/Parser/PDF/Services/PDFPositionsService.swift ../../Sources/Streamer/Parser/PublicationParser.swift ../../Sources/Streamer/Parser/Readium diff --git a/Support/Carthage/Readium.xcodeproj/project.pbxproj b/Support/Carthage/Readium.xcodeproj/project.pbxproj index 00ee088e2..a98c86aec 100644 --- a/Support/Carthage/Readium.xcodeproj/project.pbxproj +++ b/Support/Carthage/Readium.xcodeproj/project.pbxproj @@ -24,6 +24,7 @@ 0A6BF62D6FE0C04DA8B8D3CA /* AnyURL.swift in Sources */ = {isa = PBXBuildFile; fileRef = AE350D88BC82408491D8B516 /* AnyURL.swift */; }; 0AF2BBF12939AFBF6173E333 /* Observable.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5BC6AE42A31D77B548CB0BB4 /* Observable.swift */; }; 0B9AC6EF44DA518E9F37FB49 /* ContentService.swift in Sources */ = {isa = PBXBuildFile; fileRef = 18E809378D79D09192A0AAE1 /* ContentService.swift */; }; + 0BFCDAEC82CFF09AFC53A5D0 /* LCPDFTableOfContentsService.swift in Sources */ = {isa = PBXBuildFile; fileRef = 94414130EC3731CD9920F27D /* LCPDFTableOfContentsService.swift */; }; 0ECE94F27E005FC454EA9D12 /* DecorableNavigator.swift in Sources */ = {isa = PBXBuildFile; fileRef = 626CFFF131E0E840B76428F1 /* DecorableNavigator.swift */; }; 0F1AAB56A6ADEDDE2AD7E41E /* Content.swift in Sources */ = {isa = PBXBuildFile; fileRef = 1039900AC78465AD989D7464 /* Content.swift */; }; 1004CE1C72C85CC3702C09C0 /* Asset.swift in Sources */ = {isa = PBXBuildFile; fileRef = AC811653B33761089E270C4A /* Asset.swift */; }; @@ -314,6 +315,7 @@ C9DAA3C193FA36B843113EC6 /* HTTPContainer.swift in Sources */ = {isa = PBXBuildFile; fileRef = C4BFD453E8BF6FA24F340EE0 /* HTTPContainer.swift */; }; C9FBD23E459FB395377E149E /* ReadiumWebPubParser.swift in Sources */ = {isa = PBXBuildFile; fileRef = E6E97CCA91F910315C260373 /* ReadiumWebPubParser.swift */; }; CAEBD6BA3F2F88E8752CB987 /* KeyEvent.swift in Sources */ = {isa = PBXBuildFile; fileRef = 422C1DA91ED351C9ABA139DF /* KeyEvent.swift */; }; + CB95F5EAA4D0DB5177FED4F7 /* TableOfContentsService.swift in Sources */ = {isa = PBXBuildFile; fileRef = C5BCDE636CED5B883CC5F2B4 /* TableOfContentsService.swift */; }; CC85122A71D3145940827338 /* Comparable.swift in Sources */ = {isa = PBXBuildFile; fileRef = F90C4D94134D9F741D38D8AA /* Comparable.swift */; }; CCAF8FB4DBD81448C99D589A /* Language.swift in Sources */ = {isa = PBXBuildFile; fileRef = 7BB152578CBA091A41A51B25 /* Language.swift */; }; CD0243B5EB8B408E34786214 /* ReadiumInternal.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 42FD63C2720614E558522675 /* ReadiumInternal.framework */; }; @@ -688,6 +690,7 @@ 925CDE3176715EBEBF40B21F /* GeneratedCoverService.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = GeneratedCoverService.swift; sourceTree = ""; }; 93BF3947EBA8736BF20F36FB /* WebView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = WebView.swift; sourceTree = ""; }; 9407E818636BEA4550E57F57 /* ReadiumNavigator.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; includeInIndex = 0; path = ReadiumNavigator.framework; sourceTree = BUILT_PRODUCTS_DIR; }; + 94414130EC3731CD9920F27D /* LCPDFTableOfContentsService.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LCPDFTableOfContentsService.swift; sourceTree = ""; }; 9627A9AFF7C08010248E1700 /* Publication+Deprecated.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = "Publication+Deprecated.swift"; sourceTree = ""; }; 968B4EB4AD29DFA430C8A563 /* LicenseDocument.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LicenseDocument.swift; sourceTree = ""; }; 97BC822B36D72EF548162129 /* ReadiumShared.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; includeInIndex = 0; path = ReadiumShared.framework; sourceTree = BUILT_PRODUCTS_DIR; }; @@ -764,6 +767,7 @@ C51C74A5990A3BA93B3DC587 /* ZIPArchiveOpener.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ZIPArchiveOpener.swift; sourceTree = ""; }; C57EC6B0ADED2B0D395F2AEA /* ContentProtection.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ContentProtection.swift; sourceTree = ""; }; C59803AADFCF32C93C9D9D29 /* ExplodedArchive.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ExplodedArchive.swift; sourceTree = ""; }; + C5BCDE636CED5B883CC5F2B4 /* TableOfContentsService.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = TableOfContentsService.swift; sourceTree = ""; }; C5E7CEDF6EA681FE8119791B /* Feed.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Feed.swift; sourceTree = ""; }; C96FD34093B3C3E83827B70C /* FileSystemError.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = FileSystemError.swift; sourceTree = ""; }; CAD79372361D085CA0500CF4 /* Properties+OPDS.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = "Properties+OPDS.swift"; sourceTree = ""; }; @@ -1085,6 +1089,7 @@ isa = PBXGroup; children = ( 47B9196192A22B8AB80E6B2F /* LCPDFPositionsService.swift */, + 94414130EC3731CD9920F27D /* LCPDFTableOfContentsService.swift */, D0C2A38D366CE8560BCBAC8B /* PDFPositionsService.swift */, ); path = Services; @@ -1247,6 +1252,14 @@ path = Toolkit; sourceTree = ""; }; + 402E67E11F98508D372AC2BA /* Table Of Contents */ = { + isa = PBXGroup; + children = ( + C5BCDE636CED5B883CC5F2B4 /* TableOfContentsService.swift */, + ); + path = "Table Of Contents"; + sourceTree = ""; + }; 40D18A37080F5B1D114CE2E1 /* Extensions */ = { isa = PBXGroup; children = ( @@ -1288,6 +1301,7 @@ 3118D7E15D685347720A0651 /* Locator */, 5BC52D8F4F854FDA56D10A8E /* Positions */, F818D082B369A3D4BE617D46 /* Search */, + 402E67E11F98508D372AC2BA /* Table Of Contents */, ); path = Services; sourceTree = ""; @@ -2391,6 +2405,7 @@ 914DEDFE5594761D3F180491 /* EPUBPositionsService.swift in Sources */, EF15E9163EBC82672B22F6E0 /* ImageParser.swift in Sources */, FCFFE5305127D9FC72549EAA /* LCPDFPositionsService.swift in Sources */, + 0BFCDAEC82CFF09AFC53A5D0 /* LCPDFTableOfContentsService.swift in Sources */, C1A94B2A9C446CB03650DC47 /* NCXParser.swift in Sources */, 01AD628D6DE82E1C1C4C281D /* NavigationDocumentParser.swift in Sources */, 2B8BC06B6B366E67C716DDA1 /* OPFMeta.swift in Sources */, @@ -2667,6 +2682,7 @@ 4DB4C10CB9AB5D38C56C1609 /* StringEncoding.swift in Sources */, E6AC10CCF9711168BE2BE85C /* StringSearchService.swift in Sources */, 3E9F244ACDA938D330B9EAEA /* Subject.swift in Sources */, + CB95F5EAA4D0DB5177FED4F7 /* TableOfContentsService.swift in Sources */, 96048047B4205636ABB66DC9 /* TextTokenizer.swift in Sources */, 40A44414CC911BF49BB5EE60 /* Tokenizer.swift in Sources */, 035807359AFA2EE23E00F8AB /* TransformingContainer.swift in Sources */, diff --git a/TestApp/Sources/Reader/Common/Outline/OutlineTableView.swift b/TestApp/Sources/Reader/Common/Outline/OutlineTableView.swift index 2bb14bced..44eae5996 100644 --- a/TestApp/Sources/Reader/Common/Outline/OutlineTableView.swift +++ b/TestApp/Sources/Reader/Common/Outline/OutlineTableView.swift @@ -25,28 +25,28 @@ struct OutlineTableView: View { @State private var selectedSection: OutlineSection = .tableOfContents // Outlines (list of links) to display for each section. - private var outlines: [OutlineSection: [(level: Int, link: ReadiumShared.Link)]] = [:] + @State private var outlines: [OutlineSection: [(level: Int, link: ReadiumShared.Link)]] = [:] init(publication: Publication, bookId: Book.Id, bookmarkRepository: BookmarkRepository, highlightRepository: HighlightRepository) { self.publication = publication bookmarksModel = BookmarksViewModel(bookId: bookId, repository: bookmarkRepository) highlightsModel = HighlightsViewModel(bookId: bookId, repository: highlightRepository) - func flatten(_ links: [ReadiumShared.Link], level: Int = 0) -> [(level: Int, link: ReadiumShared.Link)] { - links.flatMap { [(level, $0)] + flatten($0.children, level: level + 1) } - } - outlines = [ - .tableOfContents: flatten( - !publication.tableOfContents.isEmpty - ? publication.tableOfContents - : publication.readingOrder - ), + .tableOfContents: [], .landmarks: flatten(publication.landmarks), .pageList: flatten(publication.pageList), ] } + private func loadTableOfContents() async { + guard let toc = try? await publication.tableOfContents().get() else { + return + } + + outlines[.tableOfContents] = flatten(!toc.isEmpty ? toc : publication.readingOrder) + } + var body: some View { VStack { OutlineTablePicker(selectedSection: $selectedSection) @@ -67,8 +67,6 @@ struct OutlineTableView: View { } } } - } else { - preconditionFailure("Outline \(selectedSection) can't be nil!") } case .bookmarks: @@ -93,6 +91,11 @@ struct OutlineTableView: View { } } .frame(maxWidth: .infinity, maxHeight: .infinity, alignment: .top) + .onAppear { + Task { + await loadTableOfContents() + } + } } private let locatorSubject = PassthroughSubject() @@ -123,3 +126,7 @@ enum OutlineTableViewConstants { static let tabLandmarks = NSLocalizedString("reader_outline_tab_landmarks", comment: "Outline landmarks tab name") static let tabHighlights = NSLocalizedString("reader_outline_tab_highlights", comment: "Outline highlights tab name") } + +private func flatten(_ links: [ReadiumShared.Link], level: Int = 0) -> [(level: Int, link: ReadiumShared.Link)] { + links.flatMap { [(level, $0)] + flatten($0.children, level: level + 1) } +}