From f8ba1f5631b00bb66be1f10f0111fe4ebfd33399 Mon Sep 17 00:00:00 2001 From: Paul LeMarquand Date: Thu, 30 Jan 2025 09:18:09 -0500 Subject: [PATCH] Handle paginated registry metadata responses (#8219) In [4.1 List Package Releases](https://github.com/swiftlang/swift-package-manager/blob/main/Documentation/PackageRegistry/Registry.md#41-list-package-releases) it is stated that a server may respond with a `Link` header that contains a pointer to a subsequent page of results. SwiftPM is not checking for the `next` link in the `Link` header and so if a registry returns paginated results, only the first page of versions is searched when resolving. This would result in a "version not found" error when in reality the version is present in a subsequent page of results. Respect the `next` link in the `Link` header by loading the next page of results and building up a list of versions, continuing until there is no `next` link present in the `Link` header of the last result. Registry servers that serve paginated results now have all their results read. Issue: #8215 --- Sources/PackageMetadata/PackageMetadata.swift | 2 +- Sources/PackageRegistry/RegistryClient.swift | 329 ++++++++++++------ .../RegistryClientTests.swift | 195 ++++++++++- 3 files changed, 421 insertions(+), 105 deletions(-) diff --git a/Sources/PackageMetadata/PackageMetadata.swift b/Sources/PackageMetadata/PackageMetadata.swift index 7d5dac640ee..fb1c6253147 100644 --- a/Sources/PackageMetadata/PackageMetadata.swift +++ b/Sources/PackageMetadata/PackageMetadata.swift @@ -352,7 +352,7 @@ public struct PackageSearchClient { ) { result in do { let metadata = try result.get() - let alternateLocations = metadata.alternateLocations ?? [] + let alternateLocations = metadata.alternateLocations return completion(.success(Set(alternateLocations))) } catch { return completion(.failure(error)) diff --git a/Sources/PackageRegistry/RegistryClient.swift b/Sources/PackageRegistry/RegistryClient.swift index 838af6af76f..74f4b8bc6bd 100644 --- a/Sources/PackageRegistry/RegistryClient.swift +++ b/Sources/PackageRegistry/RegistryClient.swift @@ -151,129 +151,183 @@ public final class RegistryClient: Cancellable { observabilityScope: ObservabilityScope, callbackQueue: DispatchQueue ) async throws -> PackageMetadata { - try await withCheckedThrowingContinuation { continuation in - self.getPackageMetadata( - package: package, - timeout: timeout, - observabilityScope: observabilityScope, - callbackQueue: callbackQueue, - completion: { - continuation.resume(with: $0) - } - ) - } - } - - @available(*, noasync, message: "Use the async alternative") - public func getPackageMetadata( - package: PackageIdentity, - timeout: DispatchTimeInterval? = .none, - observabilityScope: ObservabilityScope, - callbackQueue: DispatchQueue, - completion: @escaping (Result) -> Void - ) { - let completion = self.makeAsync(completion, on: callbackQueue) - guard let registryIdentity = package.registry else { - return completion(.failure(RegistryError.invalidPackageIdentity(package))) + throw RegistryError.invalidPackageIdentity(package) } guard let registry = self.configuration.registry(for: registryIdentity.scope) else { - return completion(.failure(RegistryError.registryNotConfigured(scope: registryIdentity.scope))) + throw RegistryError.registryNotConfigured(scope: registryIdentity.scope) } - observabilityScope.emit(debug: "registry for \(package): \(registry)") - let underlying = { - self._getPackageMetadata( + try await self._getPackageMetadata( registry: registry, package: registryIdentity, timeout: timeout, observabilityScope: observabilityScope, - callbackQueue: callbackQueue, - completion: completion + callbackQueue: callbackQueue ) } if registry.supportsAvailability { - self.withAvailabilityCheck( + // The only value that will return is .available, otherwise this check throws an error. + if case .available = try await self.withAvailabilityCheck( registry: registry, observabilityScope: observabilityScope, callbackQueue: callbackQueue - ) { error in - if let error { - return completion(.failure(error)) - } - underlying() + ) { + return try await underlying() } - } else { - underlying() } + return try await underlying() } - // marked internal for testing - func _getPackageMetadata( - registry: Registry, - package: PackageIdentity.RegistryIdentity, - timeout: DispatchTimeInterval?, + @available(*, noasync, message: "Use the async alternative") + public func getPackageMetadata( + package: PackageIdentity, + timeout: DispatchTimeInterval? = .none, observabilityScope: ObservabilityScope, callbackQueue: DispatchQueue, completion: @escaping (Result) -> Void ) { let completion = self.makeAsync(completion, on: callbackQueue) + _ = Task { + do { + let result = try await self.getPackageMetadata( + package: package, + timeout: timeout, + observabilityScope: observabilityScope, + callbackQueue: callbackQueue + ) + completion(.success(result)) + } catch { + completion(.failure(error)) + } + } + } + // marked internal for testing + func _getPackageMetadata( + registry: Registry, + package: PackageIdentity.RegistryIdentity, + timeout: DispatchTimeInterval?, + observabilityScope: ObservabilityScope, + callbackQueue: DispatchQueue + ) async throws -> PackageMetadata { guard var components = URLComponents(url: registry.url, resolvingAgainstBaseURL: true) else { - return completion(.failure(RegistryError.invalidURL(registry.url))) + throw RegistryError.invalidURL(registry.url) } components.appendPathComponents("\(package.scope)", "\(package.name)") guard let url = components.url else { - return completion(.failure(RegistryError.invalidURL(registry.url))) + throw RegistryError.invalidURL(registry.url) } - let request = LegacyHTTPClient.Request( - method: .get, + // If the responses are paginated then iterate until we've exasuasted all the pages and have a full versions list. + func iterateResponses(url: URL, existingMetadata: PackageMetadata) async throws -> PackageMetadata { + try Task.checkCancellation() + + let metadata = try await self._getIndividualPackageMetadata( + url: url, + registry: registry, + package: package, + timeout: timeout, + observabilityScope: observabilityScope, + callbackQueue: callbackQueue + ) + + let mergedMetadata = PackageMetadata( + registry: registry, + versions: existingMetadata.versions + metadata.versions, + alternateLocations: existingMetadata.alternateLocations.count > 0 + ? existingMetadata.alternateLocations + : metadata.alternateLocations, + nextPage: metadata.nextPage + ) + if let nextPage = mergedMetadata.nextPage?.url { + return try await iterateResponses( + url: nextPage, + existingMetadata: mergedMetadata + ) + } else { + return PackageMetadata( + registry: registry, + versions: mergedMetadata.versions.sorted(by: >), + alternateLocations: mergedMetadata.alternateLocations, + nextPage: mergedMetadata.nextPage + ) + } + } + + return try await iterateResponses( url: url, - headers: [ - "Accept": self.acceptHeader(mediaType: .json), - ], - options: self.defaultRequestOptions(timeout: timeout, callbackQueue: callbackQueue) + existingMetadata: PackageMetadata( + registry: registry, + versions: [], + alternateLocations: [], + nextPage: nil + ) ) + } + private func _getIndividualPackageMetadata( + url: URL, + registry: Registry, + package: PackageIdentity.RegistryIdentity, + timeout: DispatchTimeInterval?, + observabilityScope: ObservabilityScope, + callbackQueue: DispatchQueue + ) async throws -> PackageMetadata { let start = DispatchTime.now() - observabilityScope.emit(info: "retrieving \(package) metadata from \(request.url)") - self.httpClient.execute(request, observabilityScope: observabilityScope, progress: nil) { result in - completion( - result.tryMap { response in - observabilityScope - .emit( - debug: "server response for \(request.url): \(response.statusCode) in \(start.distance(to: .now()).descriptionInSeconds)" - ) - switch response.statusCode { - case 200: - let packageMetadata = try response.parseJSON( - Serialization.PackageMetadata.self, - decoder: self.jsonDecoder - ) + observabilityScope.emit(info: "retrieving \(package) metadata from \(url)") + + let response: LegacyHTTPClient.Response + do { + response = try await self.httpClient.get( + url, + headers: [ + "Accept": self.acceptHeader(mediaType: .json), + ], + options: self.defaultRequestOptions(timeout: timeout, callbackQueue: callbackQueue), + observabilityScope: observabilityScope + ) + } catch let error where !(error is _Concurrency.CancellationError) { + throw RegistryError.failedRetrievingReleases(registry: registry, package: package.underlying, error: error) + } + observabilityScope + .emit( + debug: "server response for \(url): \(response.statusCode) in \(start.distance(to: .now()).descriptionInSeconds)" + ) - let versions = packageMetadata.releases.filter { $0.value.problem == nil } - .compactMap { Version($0.key) } - .sorted(by: >) + switch response.statusCode { + case 200: + let packageMetadata = try response.parseJSON( + Serialization.PackageMetadata.self, + decoder: self.jsonDecoder + ) - let alternateLocations = try response.headers.parseAlternativeLocationLinks() + let versions = packageMetadata.releases.filter { $0.value.problem == nil } + .compactMap { Version($0.key) } - return PackageMetadata( - registry: registry, - versions: versions, - alternateLocations: alternateLocations?.map(\.url) - ) - case 404: - throw RegistryError.packageNotFound - default: - throw self.unexpectedStatusError(response, expectedStatus: [200, 404]) - } - }.mapError { - RegistryError.failedRetrievingReleases(registry: registry, package: package.underlying, error: $0) - } + let alternateLocations = response.headers.parseAlternativeLocationLinks() + let paginationLinks = response.headers.parsePaginationLinks() + + return PackageMetadata( + registry: registry, + versions: versions, + alternateLocations: alternateLocations.map(\.url), + nextPage: paginationLinks.first { $0.kind == .next }?.url + ) + case 404: + throw RegistryError.failedRetrievingReleases( + registry: registry, + package: package.underlying, + error: RegistryError.packageNotFound + ) + default: + throw RegistryError.failedRetrievingReleases( + registry: registry, + package: package.underlying, + error: self.unexpectedStatusError(response, expectedStatus: [200, 404]) ) } } @@ -1767,6 +1821,28 @@ public final class RegistryClient: Cancellable { } } + private func withAvailabilityCheck( + registry: Registry, + observabilityScope: ObservabilityScope, + callbackQueue: DispatchQueue + ) async throws -> AvailabilityStatus { + try await withCheckedThrowingContinuation { continuation in + self.withAvailabilityCheck( + registry: registry, + observabilityScope: observabilityScope, + callbackQueue: callbackQueue, + next: { + if let error = $0 { + continuation.resume(throwing: error) + } else { + continuation.resume(returning: .available) + } + } + ) + } + } + + @available(*, noasync, message: "Use the async alternative") private func withAvailabilityCheck( registry: Registry, observabilityScope: ObservabilityScope, @@ -2071,7 +2147,8 @@ extension RegistryClient { public struct PackageMetadata { public let registry: Registry public let versions: [Version] - public let alternateLocations: [SourceControlURL]? + public let alternateLocations: [SourceControlURL] + public let nextPage: SourceControlURL? } public struct PackageVersionMetadata: Sendable { @@ -2142,6 +2219,17 @@ extension RegistryClient { case alternate } } + + fileprivate struct NextLocationLink { + let url: SourceControlURL + let kind: Kind + + enum Kind: String { + // Currently we only care about `next` for pagination, but there are several other values: + // https://github.com/swiftlang/swift-package-manager/blob/0340bb12a56f9696b3966ad82c2aee1594135377/Documentation/PackageRegistry/Registry.md?plain=1#L403-L411 + case next + } + } } extension RegistryClient { @@ -2263,20 +2351,16 @@ extension HTTPClientResponse { } extension HTTPClientHeaders { - /* - ; rel="canonical", - ; rel="alternate", - */ - fileprivate func parseAlternativeLocationLinks() throws -> [RegistryClient.AlternativeLocationLink]? { - try self.get("Link").map { header -> [RegistryClient.AlternativeLocationLink] in + fileprivate func parseLink(_ factory: (String) throws -> T?) rethrows -> [T] { + return try self.get("Link").map { header -> [T] in let linkLines = header.split(separator: ",").map(String.init).map { $0.spm_chuzzle() ?? $0 } return try linkLines.compactMap { linkLine in - try parseAlternativeLocationLine(linkLine) + try factory(linkLine) } }.flatMap { $0 } } - private func parseAlternativeLocationLine(_ value: String) throws -> RegistryClient.AlternativeLocationLink? { + fileprivate func parseLocationLine(_ value: String, _ factory: (String, String) -> T?) -> T? { let fields = value.split(separator: ";") .map(String.init) .map { $0.spm_chuzzle() ?? $0 } @@ -2290,16 +2374,60 @@ extension HTTPClientHeaders { return nil } - guard let rel = fields.first(where: { $0.hasPrefix("rel=") }).flatMap({ parseLinkFieldValue($0) }), - let kind = RegistryClient.AlternativeLocationLink.Kind(rawValue: rel) + guard let rel = fields.first(where: { $0.hasPrefix("rel=") }).flatMap({ parseLinkFieldValue($0) }) else { return nil } - return RegistryClient.AlternativeLocationLink( - url: SourceControlURL(link), - kind: kind - ) + return factory(link, rel) + } +} + +extension HTTPClientHeaders { + /* + https://github.com/swiftlang/swift-package-manager/blob/0340bb12a56f9696b3966ad82c2aee1594135377/Documentation/PackageRegistry/Registry.md?plain=1#L395C1-L401C39 + ; rel="canonical", + ; rel="alternate", + */ + fileprivate func parseAlternativeLocationLinks() -> [RegistryClient.AlternativeLocationLink] { + self.parseLink(self.parseAlternativeLocationLine(_:)) + } + + private func parseAlternativeLocationLine(_ value: String) -> RegistryClient.AlternativeLocationLink? { + return parseLocationLine(value) { link, rel in + guard let kind = RegistryClient.AlternativeLocationLink.Kind(rawValue: rel) else { + return nil + } + + return RegistryClient.AlternativeLocationLink( + url: SourceControlURL(link), + kind: kind + ) + } + } +} + +extension HTTPClientHeaders { + /* + https://github.com/swiftlang/swift-package-manager/blob/0340bb12a56f9696b3966ad82c2aee1594135377/Documentation/PackageRegistry/Registry.md?plain=1#L403-L411 + ; rel="next", + ; rel="last", + */ + fileprivate func parsePaginationLinks() -> [RegistryClient.NextLocationLink] { + self.parseLink(self.parsePaginationLine(_:)) + } + + private func parsePaginationLine(_ value: String) -> RegistryClient.NextLocationLink? { + return parseLocationLine(value) { link, rel in + guard let kind = RegistryClient.NextLocationLink.Kind(rawValue: rel) else { + return nil + } + + return RegistryClient.NextLocationLink( + url: SourceControlURL(link), + kind: kind + ) + } } } @@ -2308,12 +2436,7 @@ extension HTTPClientHeaders { ; rel="alternate"; filename="Package@swift-4.swift"; swift-tools-version="4.0" */ fileprivate func parseManifestLinks() throws -> [RegistryClient.ManifestLink] { - try self.get("Link").map { header -> [RegistryClient.ManifestLink] in - let linkLines = header.split(separator: ",").map(String.init).map { $0.spm_chuzzle() ?? $0 } - return try linkLines.compactMap { linkLine in - try parseManifestLinkLine(linkLine) - } - }.flatMap { $0 } + try self.parseLink(self.parseManifestLinkLine(_:)) } private func parseManifestLinkLine(_ value: String) throws -> RegistryClient.ManifestLink? { diff --git a/Tests/PackageRegistryTests/RegistryClientTests.swift b/Tests/PackageRegistryTests/RegistryClientTests.swift index 8d350b89c4d..14ef5c0f473 100644 --- a/Tests/PackageRegistryTests/RegistryClientTests.swift +++ b/Tests/PackageRegistryTests/RegistryClientTests.swift @@ -88,12 +88,205 @@ final class RegistryClientTests: XCTestCase { let registryClient = makeRegistryClient(configuration: configuration, httpClient: httpClient) let metadata = try await registryClient.getPackageMetadata(package: identity) XCTAssertEqual(metadata.versions, ["1.1.1", "1.0.0"]) - XCTAssertEqual(metadata.alternateLocations!, [ + XCTAssertEqual(metadata.alternateLocations, [ SourceControlURL("https://github.com/mona/LinkedList"), SourceControlURL("ssh://git@github.com:mona/LinkedList.git"), SourceControlURL("git@github.com:mona/LinkedList.git"), SourceControlURL("https://gitlab.com/mona/LinkedList"), ]) + + let metadataSync = try await withCheckedThrowingContinuation { continuation in + return registryClient.getPackageMetadata( + package: identity, + timeout: nil, + observabilityScope: ObservabilitySystem.NOOP, + callbackQueue: .sharedConcurrent, + completion: { continuation.resume(with: $0) } + ) + } + XCTAssertEqual(metadataSync.versions, ["1.1.1", "1.0.0"]) + XCTAssertEqual(metadataSync.alternateLocations, [ + SourceControlURL("https://github.com/mona/LinkedList"), + SourceControlURL("ssh://git@github.com:mona/LinkedList.git"), + SourceControlURL("git@github.com:mona/LinkedList.git"), + SourceControlURL("https://gitlab.com/mona/LinkedList"), + ]) + } + + func testGetPackageMetadataPaginated() async throws { + let registryURL = URL("https://packages.example.com") + let identity = PackageIdentity.plain("mona.LinkedList") + let releasesURL = URL("\(registryURL)/\(identity.registry!.scope)/\(identity.registry!.name)") + let releasesURLPage2 = URL("\(registryURL)/\(identity.registry!.scope)/\(identity.registry!.name)?page=2") + + let handler: LegacyHTTPClient.Handler = { request, _, completion in + guard case .get = request.method else { + return completion(.failure(StringError("method should be `get`"))) + } + + XCTAssertEqual(request.headers.get("Accept").first, "application/vnd.swift.registry.v1+json") + let links: String + let data: Data + switch request.url { + case releasesURL: + data = #""" + { + "releases": { + "1.1.1": { + "url": "https://packages.example.com/mona/LinkedList/1.1.1" + }, + "1.1.0": { + "url": "https://packages.example.com/mona/LinkedList/1.1.0", + "problem": { + "status": 410, + "title": "Gone", + "detail": "this release was removed from the registry" + } + } + } + } + """#.data(using: .utf8)! + + links = """ + ; rel="canonical", + ; rel="alternate", + ; rel="alternate", + ; rel="alternate", + <\(releasesURLPage2)>; rel="next" + """ + case releasesURLPage2: + data = #""" + { + "releases": { + "1.0.0": { + "url": "https://packages.example.com/mona/LinkedList/1.0.0" + } + } + } + """#.data(using: .utf8)! + + links = """ + ; rel="canonical", + ; rel="alternate", + ; rel="alternate", + ; rel="alternate" + """ + default: + return completion(.failure(StringError("method and url should match"))) + } + + completion(.success(.init( + statusCode: 200, + headers: .init([ + .init(name: "Content-Length", value: "\(data.count)"), + .init(name: "Content-Type", value: "application/json"), + .init(name: "Content-Version", value: "1"), + .init(name: "Link", value: links), + ]), + body: data + ))) + } + + let httpClient = LegacyHTTPClient(handler: handler) + httpClient.configuration.circuitBreakerStrategy = .none + httpClient.configuration.retryStrategy = .none + + var configuration = RegistryConfiguration() + configuration.defaultRegistry = Registry(url: registryURL, supportsAvailability: false) + + let registryClient = makeRegistryClient(configuration: configuration, httpClient: httpClient) + let metadata = try await registryClient.getPackageMetadata(package: identity) + XCTAssertEqual(metadata.versions, ["1.1.1", "1.0.0"]) + XCTAssertEqual(metadata.alternateLocations, [ + SourceControlURL("https://github.com/mona/LinkedList"), + SourceControlURL("ssh://git@github.com:mona/LinkedList.git"), + SourceControlURL("git@github.com:mona/LinkedList.git"), + SourceControlURL("https://gitlab.com/mona/LinkedList"), + ]) + } + + func testGetPackageMetadataPaginated_Cancellation() async throws { + let registryURL = URL("https://packages.example.com") + let identity = PackageIdentity.plain("mona.LinkedList") + let releasesURL = URL("\(registryURL)/\(identity.registry!.scope)/\(identity.registry!.name)") + let releasesURLPage2 = URL("\(registryURL)/\(identity.registry!.scope)/\(identity.registry!.name)?page=2") + + var task: Task? = nil + let handler: LegacyHTTPClient.Handler = { request, _, completion in + guard case .get = request.method else { + return completion(.failure(StringError("method should be `get`"))) + } + + XCTAssertEqual(request.headers.get("Accept").first, "application/vnd.swift.registry.v1+json") + let links: String + let data: Data + switch request.url { + case releasesURLPage2: + // Cancel during the second iteration + task?.cancel() + fallthrough + case releasesURL: + data = #""" + { + "releases": { + "1.1.1": { + "url": "https://packages.example.com/mona/LinkedList/1.1.1" + }, + "1.1.0": { + "url": "https://packages.example.com/mona/LinkedList/1.1.0", + "problem": { + "status": 410, + "title": "Gone", + "detail": "this release was removed from the registry" + } + } + } + } + """#.data(using: .utf8)! + + links = """ + ; rel="canonical", + ; rel="alternate", + ; rel="alternate", + ; rel="alternate", + <\(releasesURLPage2)>; rel="next" + """ + default: + return completion(.failure(StringError("method and url should match"))) + } + + completion(.success(.init( + statusCode: 200, + headers: .init([ + .init(name: "Content-Length", value: "\(data.count)"), + .init(name: "Content-Type", value: "application/json"), + .init(name: "Content-Version", value: "1"), + .init(name: "Link", value: links), + ]), + body: data + ))) + } + + let httpClient = LegacyHTTPClient(handler: handler) + httpClient.configuration.circuitBreakerStrategy = .none + httpClient.configuration.retryStrategy = .none + + var configuration = RegistryConfiguration() + configuration.defaultRegistry = Registry(url: registryURL, supportsAvailability: false) + + let registryClient = makeRegistryClient(configuration: configuration, httpClient: httpClient) + task = Task { + do { + _ = try await registryClient.getPackageMetadata(package: identity) + XCTFail("Task completed without being cancelled") + } catch let error where error is _Concurrency.CancellationError { + // OK + } catch { + XCTFail("Task failed with unexpected error: \(error)") + } + } + + try await task?.value } func testGetPackageMetadata_NotFound() async throws {