From 22237358fcfad18b1d3eba4a4706326fbc7861e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micka=C3=ABl=20Menu?= Date: Wed, 18 Dec 2024 15:22:20 +0100 Subject: [PATCH] Add data source (#4) --- .../Archive+BackingConfiguration.swift | 85 ++++------ Sources/ZIPFoundation/Archive+Helpers.swift | 24 +-- Sources/ZIPFoundation/Archive+Reading.swift | 10 +- Sources/ZIPFoundation/Archive+Writing.swift | 65 ++++---- Sources/ZIPFoundation/Archive.swift | 104 ++++++------ Sources/ZIPFoundation/DataSource.swift | 47 ++++++ Sources/ZIPFoundation/FileDataSource.swift | 151 ++++++++++++++++++ ZIPFoundation.xcodeproj/project.pbxproj | 8 + 8 files changed, 342 insertions(+), 152 deletions(-) create mode 100644 Sources/ZIPFoundation/DataSource.swift create mode 100644 Sources/ZIPFoundation/FileDataSource.swift diff --git a/Sources/ZIPFoundation/Archive+BackingConfiguration.swift b/Sources/ZIPFoundation/Archive+BackingConfiguration.swift index 167cbd7c..2e432c87 100644 --- a/Sources/ZIPFoundation/Archive+BackingConfiguration.swift +++ b/Sources/ZIPFoundation/Archive+BackingConfiguration.swift @@ -13,27 +13,27 @@ import Foundation extension Archive { struct BackingConfiguration { - let file: FILEPointer + let dataSource: DataSource let endOfCentralDirectoryRecord: EndOfCentralDirectoryRecord let zip64EndOfCentralDirectory: ZIP64EndOfCentralDirectory? #if swift(>=5.0) let memoryFile: MemoryFile? - init(file: FILEPointer, + init(dataSource: DataSource, endOfCentralDirectoryRecord: EndOfCentralDirectoryRecord, zip64EndOfCentralDirectory: ZIP64EndOfCentralDirectory? = nil, memoryFile: MemoryFile? = nil) { - self.file = file + self.dataSource = dataSource self.endOfCentralDirectoryRecord = endOfCentralDirectoryRecord self.zip64EndOfCentralDirectory = zip64EndOfCentralDirectory self.memoryFile = memoryFile } #else - init(file: FILEPointer, + init(dataSource: DataSource, endOfCentralDirectoryRecord: EndOfCentralDirectoryRecord, zip64EndOfCentralDirectory: ZIP64EndOfCentralDirectory?) { - self.file = file + self.dataSource = dataSource self.endOfCentralDirectoryRecord = endOfCentralDirectoryRecord self.zip64EndOfCentralDirectory = zip64EndOfCentralDirectory } @@ -42,19 +42,10 @@ extension Archive { static func makeBackingConfiguration(for url: URL, mode: AccessMode) throws -> BackingConfiguration { - let fileManager = FileManager() + let dataSource: DataSource switch mode { case .read: - let fileSystemRepresentation = fileManager.fileSystemRepresentation(withPath: url.path) - guard let archiveFile = fopen(fileSystemRepresentation, "rb") else { - throw POSIXError(errno, path: url.path) - } - guard let (eocdRecord, zip64EOCD) = Archive.scanForEndOfCentralDirectoryRecord(in: archiveFile) else { - throw ArchiveError.missingEndOfCentralDirectoryRecord - } - return BackingConfiguration(file: archiveFile, - endOfCentralDirectoryRecord: eocdRecord, - zip64EndOfCentralDirectory: zip64EOCD) + dataSource = try FileDataSource(url: url, mode: .read) case .create: let endOfCentralDirectoryRecord = EndOfCentralDirectoryRecord(numberOfDisk: 0, numberOfDiskStart: 0, totalNumberOfEntriesOnDisk: 0, @@ -66,18 +57,19 @@ extension Archive { try endOfCentralDirectoryRecord.data.write(to: url, options: .withoutOverwriting) fallthrough case .update: - let fileSystemRepresentation = fileManager.fileSystemRepresentation(withPath: url.path) - guard let archiveFile = fopen(fileSystemRepresentation, "rb+") else { - throw POSIXError(errno, path: url.path) - } - guard let (eocdRecord, zip64EOCD) = Archive.scanForEndOfCentralDirectoryRecord(in: archiveFile) else { - throw ArchiveError.missingEndOfCentralDirectoryRecord - } - fseeko(archiveFile, 0, SEEK_SET) - return BackingConfiguration(file: archiveFile, - endOfCentralDirectoryRecord: eocdRecord, - zip64EndOfCentralDirectory: zip64EOCD) + dataSource = try FileDataSource(url: url, mode: .write) } + + guard let (eocdRecord, zip64EOCD) = try Archive.scanForEndOfCentralDirectoryRecord(in: dataSource) else { + throw ArchiveError.missingEndOfCentralDirectoryRecord + } + try dataSource.seek(to: 0) + + return BackingConfiguration( + dataSource: dataSource, + endOfCentralDirectoryRecord: eocdRecord, + zip64EndOfCentralDirectory: zip64EOCD + ) } #if swift(>=5.0) @@ -93,18 +85,10 @@ extension Archive { guard let archiveFile = memoryFile.open(mode: posixMode) else { throw ArchiveError.unreadableArchive } + + let dataSource = FileDataSource(file: archiveFile) - switch mode { - case .read: - guard let (eocdRecord, zip64EOCD) = Archive.scanForEndOfCentralDirectoryRecord(in: archiveFile) else { - throw ArchiveError.missingEndOfCentralDirectoryRecord - } - - return BackingConfiguration(file: archiveFile, - endOfCentralDirectoryRecord: eocdRecord, - zip64EndOfCentralDirectory: zip64EOCD, - memoryFile: memoryFile) - case .create: + if mode == .create { let endOfCentralDirectoryRecord = EndOfCentralDirectoryRecord(numberOfDisk: 0, numberOfDiskStart: 0, totalNumberOfEntriesOnDisk: 0, totalNumberOfEntriesInCentralDirectory: 0, @@ -112,21 +96,18 @@ extension Archive { offsetToStartOfCentralDirectory: 0, zipFileCommentLength: 0, zipFileCommentData: Data()) - _ = endOfCentralDirectoryRecord.data.withUnsafeBytes { (buffer: UnsafeRawBufferPointer) in - fwrite(buffer.baseAddress, buffer.count, 1, archiveFile) // Errors handled during read - } - fallthrough - case .update: - guard let (eocdRecord, zip64EOCD) = Archive.scanForEndOfCentralDirectoryRecord(in: archiveFile) else { - throw ArchiveError.missingEndOfCentralDirectoryRecord - } - - fseeko(archiveFile, 0, SEEK_SET) - return BackingConfiguration(file: archiveFile, - endOfCentralDirectoryRecord: eocdRecord, - zip64EndOfCentralDirectory: zip64EOCD, - memoryFile: memoryFile) + try dataSource.write(endOfCentralDirectoryRecord.data) } + + guard let (eocdRecord, zip64EOCD) = try Archive.scanForEndOfCentralDirectoryRecord(in: dataSource) else { + throw ArchiveError.missingEndOfCentralDirectoryRecord + } + + try dataSource.seek(to: 0) + return BackingConfiguration(dataSource: dataSource, + endOfCentralDirectoryRecord: eocdRecord, + zip64EndOfCentralDirectory: zip64EOCD, + memoryFile: memoryFile) } #endif } diff --git a/Sources/ZIPFoundation/Archive+Helpers.swift b/Sources/ZIPFoundation/Archive+Helpers.swift index 3b89e0e5..048cfadc 100644 --- a/Sources/ZIPFoundation/Archive+Helpers.swift +++ b/Sources/ZIPFoundation/Archive+Helpers.swift @@ -20,7 +20,7 @@ extension Archive { guard size <= .max else { throw ArchiveError.invalidEntrySize } return try Data.consumePart(of: Int64(size), chunkSize: bufferSize, skipCRC32: skipCRC32, provider: { (_, chunkSize) -> Data in - return try Data.readChunk(of: chunkSize, from: self.archiveFile) + return try self.dataSource.read(length: chunkSize) }, consumer: { (data) in if progress?.isCancelled == true { throw ArchiveError.cancelledOperation } try consumer(data) @@ -34,7 +34,7 @@ extension Archive { guard size <= .max else { throw ArchiveError.invalidEntrySize } return try Data.decompress(size: Int64(size), bufferSize: bufferSize, skipCRC32: skipCRC32, provider: { (_, chunkSize) -> Data in - return try Data.readChunk(of: chunkSize, from: self.archiveFile) + return try self.dataSource.read(length: chunkSize) }, consumer: { (data) in if progress?.isCancelled == true { throw ArchiveError.cancelledOperation } try consumer(data) @@ -110,7 +110,7 @@ extension Archive { fileNameLength: UInt16(fileNameData.count), extraFieldLength: extraFieldLength, fileNameData: fileNameData, extraFieldData: zip64ExtendedInformation?.data ?? Data()) - _ = try Data.write(chunk: localFileHeader.data, to: self.archiveFile) + try writableDataSource.write(localFileHeader.data) return localFileHeader } @@ -151,7 +151,7 @@ extension Archive { relativeOffset: relativeOffsetOfCD, extraField: (extraFieldLength, zip64ExtendedInformation?.data ?? Data())) - _ = try Data.write(chunk: centralDirectory.data, to: self.archiveFile) + try writableDataSource.write(centralDirectory.data) return centralDirectory } @@ -202,7 +202,7 @@ extension Archive { numberOfEntriesInCentralDirectory: numberOfTotalEntriesForEOCD, updatedSizeOfCentralDirectory: sizeOfCDForEOCD, startOfCentralDirectory: offsetOfCDForEOCD) - _ = try Data.write(chunk: record.data, to: self.archiveFile) + try writableDataSource.write(record.data) return (record, zip64EOCD) } @@ -216,7 +216,8 @@ extension Archive { let readSize = (size - position) >= bufferSize ? bufferSize : Int(size - position) let entryChunk = try provider(position, readSize) checksum = entryChunk.crc32(checksum: checksum) - sizeWritten += Int64(try Data.write(chunk: entryChunk, to: self.archiveFile)) + try writableDataSource.write(entryChunk) + sizeWritten += Int64(entryChunk.count) position += Int64(bufferSize) progress?.completedUnitCount = sizeWritten } @@ -226,7 +227,10 @@ extension Archive { func writeCompressed(size: Int64, bufferSize: Int, progress: Progress? = nil, provider: Provider) throws -> (sizeWritten: Int64, checksum: CRC32) { var sizeWritten: Int64 = 0 - let consumer: Consumer = { data in sizeWritten += Int64(try Data.write(chunk: data, to: self.archiveFile)) } + let consumer: Consumer = { data in + try self.writableDataSource.write(data) + sizeWritten += Int64(data.count) + } let checksum = try Data.compress(size: size, bufferSize: bufferSize, provider: { (position, size) -> Data in if progress?.isCancelled == true { throw ArchiveError.cancelledOperation } @@ -241,8 +245,8 @@ extension Archive { // The reported size of a symlink is the number of characters in the path it points to. let linkData = try provider(0, size) let checksum = linkData.crc32(checksum: 0) - let sizeWritten = try Data.write(chunk: linkData, to: self.archiveFile) - return (sizeWritten, checksum) + try writableDataSource.write(linkData) + return (linkData.count, checksum) } func writeZIP64EOCD(totalNumberOfEntries: UInt64, @@ -274,7 +278,7 @@ extension Archive { let updatedLocator = ZIP64EndOfCentralDirectoryLocator(locator: zip64EOCD.locator, offsetOfZIP64EOCDRecord: offsetOfEndOfCentralDirectory) zip64EOCD = ZIP64EndOfCentralDirectory(record: updatedRecord, locator: updatedLocator) - _ = try Data.write(chunk: zip64EOCD.data, to: self.archiveFile) + try writableDataSource.write(zip64EOCD.data) return zip64EOCD } } diff --git a/Sources/ZIPFoundation/Archive+Reading.swift b/Sources/ZIPFoundation/Archive+Reading.swift index ef58d22c..f8a84373 100644 --- a/Sources/ZIPFoundation/Archive+Reading.swift +++ b/Sources/ZIPFoundation/Archive+Reading.swift @@ -91,7 +91,7 @@ extension Archive { var checksum = CRC32(0) let localFileHeader = entry.localFileHeader guard entry.dataOffset <= .max else { throw ArchiveError.invalidLocalHeaderDataOffset } - fseeko(self.archiveFile, off_t(entry.dataOffset), SEEK_SET) + try dataSource.seek(to: entry.dataOffset) progress?.totalUnitCount = self.totalUnitCountForReading(entry) switch entry.type { case .file: @@ -110,7 +110,7 @@ extension Archive { case .symlink: let localFileHeader = entry.localFileHeader let size = Int(localFileHeader.compressedSize) - let data = try Data.readChunk(of: size, from: self.archiveFile) + let data = try dataSource.read(length: size) checksum = data.crc32(checksum: 0) try consumer(data) progress?.completedUnitCount = self.totalUnitCountForReading(entry) @@ -167,14 +167,14 @@ extension Archive { bufferSize: Int, consumer: Consumer ) throws { - fseeko(archiveFile, off_t(entry.dataOffset + range.lowerBound), SEEK_SET) + try dataSource.seek(to: entry.dataOffset + range.lowerBound) _ = try Data.consumePart( of: Int64(range.count), chunkSize: bufferSize, skipCRC32: true, provider: { pos, chunkSize -> Data in - try Data.readChunk(of: chunkSize, from: self.archiveFile) + try dataSource.read(length: chunkSize) }, consumer: consumer ) @@ -191,7 +191,7 @@ extension Archive { var bytesRead: UInt64 = 0 do { - fseeko(archiveFile, off_t(entry.dataOffset), SEEK_SET) + try dataSource.seek(to: entry.dataOffset) _ = try readCompressed( entry: entry, diff --git a/Sources/ZIPFoundation/Archive+Writing.swift b/Sources/ZIPFoundation/Archive+Writing.swift index 4274face..3c09fec8 100644 --- a/Sources/ZIPFoundation/Archive+Writing.swift +++ b/Sources/ZIPFoundation/Archive+Writing.swift @@ -117,20 +117,20 @@ extension Archive { modificationDate: Date = Date(), permissions: UInt16? = nil, compressionMethod: CompressionMethod = .none, bufferSize: Int = defaultWriteChunkSize, progress: Progress? = nil, provider: Provider) throws { - guard self.accessMode != .read else { throw ArchiveError.unwritableArchive } + guard self.accessMode != .read, let dataSource = dataSource as? WritableDataSource else { throw ArchiveError.unwritableArchive } // Directories and symlinks cannot be compressed let compressionMethod = type == .file ? compressionMethod : .none progress?.totalUnitCount = type == .directory ? defaultDirectoryUnitCount : uncompressedSize let (eocdRecord, zip64EOCD) = (self.endOfCentralDirectoryRecord, self.zip64EndOfCentralDirectory) guard self.offsetToStartOfCentralDirectory <= .max else { throw ArchiveError.invalidCentralDirectoryOffset } - var startOfCD = Int64(self.offsetToStartOfCentralDirectory) - fseeko(self.archiveFile, off_t(startOfCD), SEEK_SET) + var startOfCD = self.offsetToStartOfCentralDirectory + try dataSource.seek(to: startOfCD) let existingSize = self.sizeOfCentralDirectory - let existingData = try Data.readChunk(of: Int(existingSize), from: self.archiveFile) - fseeko(self.archiveFile, off_t(startOfCD), SEEK_SET) - let fileHeaderStart = Int64(ftello(self.archiveFile)) + let existingData = try dataSource.read(length: Int(existingSize)) + try dataSource.seek(to: startOfCD) + let fileHeaderStart = try dataSource.position() let modDateTime = modificationDate.fileModificationDateTime - defer { fflush(self.archiveFile) } + defer { try? dataSource.flush() } do { // Local File Header var localFileHeader = try self.writeLocalFileHeader(path: path, compressionMethod: compressionMethod, @@ -140,22 +140,22 @@ extension Archive { let (written, checksum) = try self.writeEntry(uncompressedSize: uncompressedSize, type: type, compressionMethod: compressionMethod, bufferSize: bufferSize, progress: progress, provider: provider) - startOfCD = Int64(ftello(self.archiveFile)) + startOfCD = try dataSource.position() // Write the local file header a second time. Now with compressedSize (if applicable) and a valid checksum. - fseeko(self.archiveFile, off_t(fileHeaderStart), SEEK_SET) + try dataSource.seek(to: fileHeaderStart) localFileHeader = try self.writeLocalFileHeader(path: path, compressionMethod: compressionMethod, size: (UInt64(uncompressedSize), UInt64(written)), checksum: checksum, modificationDateTime: modDateTime) // Central Directory - fseeko(self.archiveFile, off_t(startOfCD), SEEK_SET) - _ = try Data.writeLargeChunk(existingData, size: existingSize, bufferSize: bufferSize, to: archiveFile) + try dataSource.seek(to: startOfCD) + try dataSource.writeLargeChunk(existingData, size: existingSize, bufferSize: bufferSize) let permissions = permissions ?? (type == .directory ? defaultDirectoryPermissions : defaultFilePermissions) let externalAttributes = FileManager.externalFileAttributesForEntry(of: type, permissions: permissions) let centralDir = try self.writeCentralDirectoryStructure(localFileHeader: localFileHeader, relativeOffset: UInt64(fileHeaderStart), externalFileAttributes: externalAttributes) // End of Central Directory Record (including ZIP64 End of Central Directory Record/Locator) - let startOfEOCD = UInt64(ftello(self.archiveFile)) + let startOfEOCD = try dataSource.position() let eocd = try self.writeEndOfCentralDirectory(centralDirectoryStructure: centralDir, startOfCentralDirectory: UInt64(startOfCD), startOfEndOfCentralDirectory: startOfEOCD, operation: .add) @@ -174,7 +174,7 @@ extension Archive { /// - progress: A progress object that can be used to track or cancel the remove operation. /// - Throws: An error if the `Entry` is malformed or the receiver is not writable. public func remove(_ entry: Entry, bufferSize: Int = defaultReadChunkSize, progress: Progress? = nil) throws { - guard self.accessMode != .read else { throw ArchiveError.unwritableArchive } + guard self.accessMode != .read, let dataSource = dataSource as? WritableDataSource else { throw ArchiveError.unwritableArchive } let (tempArchive, tempDir) = try self.makeTempArchive() defer { tempDir.map { try? FileManager().removeItem(at: $0) } } progress?.totalUnitCount = self.totalUnitCountForRemoving(entry) @@ -184,13 +184,13 @@ extension Archive { let cds = currentEntry.centralDirectoryStructure if currentEntry != entry { let entryStart = cds.effectiveRelativeOffsetOfLocalHeader - fseeko(self.archiveFile, off_t(entryStart), SEEK_SET) + try dataSource.seek(to: entryStart) let provider: Provider = { (_, chunkSize) -> Data in - return try Data.readChunk(of: chunkSize, from: self.archiveFile) + try dataSource.read(length: chunkSize) } let consumer: Consumer = { if progress?.isCancelled == true { throw ArchiveError.cancelledOperation } - _ = try Data.write(chunk: $0, to: tempArchive.archiveFile) + try tempArchive.writableDataSource.write($0) progress?.completedUnitCount += Int64($0.count) } guard currentEntry.localSize <= .max else { throw ArchiveError.invalidLocalHeaderSize } @@ -201,9 +201,9 @@ extension Archive { centralDirectoryData.append(updatedCentralDirectory.data) } else { offset = currentEntry.localSize } } - let startOfCentralDirectory = UInt64(ftello(tempArchive.archiveFile)) - _ = try Data.write(chunk: centralDirectoryData, to: tempArchive.archiveFile) - let startOfEndOfCentralDirectory = UInt64(ftello(tempArchive.archiveFile)) + let startOfCentralDirectory = try tempArchive.dataSource.position() + try tempArchive.writableDataSource.write(centralDirectoryData) + let startOfEndOfCentralDirectory = try tempArchive.dataSource.position() tempArchive.endOfCentralDirectoryRecord = self.endOfCentralDirectoryRecord tempArchive.zip64EndOfCentralDirectory = self.zip64EndOfCentralDirectory let ecodStructure = try @@ -213,12 +213,13 @@ extension Archive { operation: .remove) (tempArchive.endOfCentralDirectoryRecord, tempArchive.zip64EndOfCentralDirectory) = ecodStructure (self.endOfCentralDirectoryRecord, self.zip64EndOfCentralDirectory) = ecodStructure - fflush(tempArchive.archiveFile) + try tempArchive.writableDataSource.flush() try self.replaceCurrentArchive(with: tempArchive) } func replaceCurrentArchive(with archive: Archive) throws { - fclose(self.archiveFile) + try dataSource.close() + if self.isMemoryArchive { #if swift(>=5.0) guard let data = archive.data else { @@ -226,7 +227,7 @@ extension Archive { } let config = try Archive.makeBackingConfiguration(for: data, mode: .update) - self.archiveFile = config.file + self.dataSource = config.dataSource self.memoryFile = config.memoryFile self.endOfCentralDirectoryRecord = config.endOfCentralDirectoryRecord self.zip64EndOfCentralDirectory = config.zip64EndOfCentralDirectory @@ -244,9 +245,7 @@ extension Archive { _ = try fileManager.removeItem(at: self.url) _ = try fileManager.moveItem(at: archive.url, to: self.url) #endif - let fileSystemRepresentation = fileManager.fileSystemRepresentation(withPath: self.url.path) - guard let file = fopen(fileSystemRepresentation, "rb+") else { throw ArchiveError.unreadableArchive } - self.archiveFile = file + self.dataSource = try FileDataSource(url: self.url, mode: .write) } } } @@ -268,16 +267,16 @@ private extension Archive { func rollback(_ localFileHeaderStart: UInt64, _ existingCentralDirectory: (data: Data, size: UInt64), _ bufferSize: Int, _ endOfCentralDirRecord: EndOfCentralDirectoryRecord, _ zip64EndOfCentralDirectory: ZIP64EndOfCentralDirectory?) throws { - fflush(self.archiveFile) - ftruncate(fileno(self.archiveFile), off_t(localFileHeaderStart)) - fseeko(self.archiveFile, off_t(localFileHeaderStart), SEEK_SET) - _ = try Data.writeLargeChunk(existingCentralDirectory.data, size: existingCentralDirectory.size, - bufferSize: bufferSize, to: archiveFile) - _ = try Data.write(chunk: existingCentralDirectory.data, to: self.archiveFile) + try writableDataSource.flush() + try writableDataSource.truncate(to: localFileHeaderStart) + try writableDataSource.seek(to: localFileHeaderStart) + try writableDataSource.writeLargeChunk(existingCentralDirectory.data, size: existingCentralDirectory.size, + bufferSize: bufferSize) + try writableDataSource.write(existingCentralDirectory.data) if let zip64EOCD = zip64EndOfCentralDirectory { - _ = try Data.write(chunk: zip64EOCD.data, to: self.archiveFile) + try writableDataSource.write(zip64EOCD.data) } - _ = try Data.write(chunk: endOfCentralDirRecord.data, to: self.archiveFile) + try writableDataSource.write(endOfCentralDirRecord.data) } func makeTempArchive() throws -> (Archive, URL?) { diff --git a/Sources/ZIPFoundation/Archive.swift b/Sources/ZIPFoundation/Archive.swift index 92a345ba..87c83688 100644 --- a/Sources/ZIPFoundation/Archive.swift +++ b/Sources/ZIPFoundation/Archive.swift @@ -20,7 +20,7 @@ public let defaultFilePermissions = UInt16(0o644) public let defaultDirectoryPermissions = UInt16(0o755) let defaultPOSIXBufferSize = defaultReadChunkSize let defaultDirectoryUnitCount = Int64(1) -let minEndOfCentralDirectoryOffset = Int64(22) +let minEndOfCentralDirectoryOffset = UInt64(22) let endOfCentralDirectoryStructSignature = 0x06054b50 let localFileHeaderStructSignature = 0x04034b50 let dataDescriptorStructSignature = 0x08074b50 @@ -133,11 +133,16 @@ public final class Archive: Sequence { public let url: URL /// Access mode for an archive file. public let accessMode: AccessMode - var archiveFile: FILEPointer + var dataSource: DataSource var endOfCentralDirectoryRecord: EndOfCentralDirectoryRecord var zip64EndOfCentralDirectory: ZIP64EndOfCentralDirectory? var pathEncoding: String.Encoding? + var writableDataSource: WritableDataSource { + precondition(accessMode != .read) + return dataSource as! WritableDataSource + } + var totalNumberOfEntriesInCentralDirectory: UInt64 { zip64EndOfCentralDirectory?.record.totalNumberOfEntriesInCentralDirectory ?? UInt64(endOfCentralDirectoryRecord.totalNumberOfEntriesInCentralDirectory) @@ -172,10 +177,9 @@ public final class Archive: Sequence { self.accessMode = mode self.pathEncoding = pathEncoding let config = try Archive.makeBackingConfiguration(for: url, mode: mode) - self.archiveFile = config.file + self.dataSource = config.dataSource self.endOfCentralDirectoryRecord = config.endOfCentralDirectoryRecord self.zip64EndOfCentralDirectory = config.zip64EndOfCentralDirectory - setvbuf(self.archiveFile, nil, _IOFBF, Int(defaultPOSIXBufferSize)) } #if swift(>=5.0) @@ -204,54 +208,52 @@ public final class Archive: Sequence { self.accessMode = mode self.pathEncoding = pathEncoding let config = try Archive.makeBackingConfiguration(for: data, mode: mode) - self.archiveFile = config.file + self.dataSource = config.dataSource self.memoryFile = config.memoryFile self.endOfCentralDirectoryRecord = config.endOfCentralDirectoryRecord self.zip64EndOfCentralDirectory = config.zip64EndOfCentralDirectory } #endif - deinit { - fclose(self.archiveFile) - } - public func makeIterator() -> AnyIterator { let totalNumberOfEntriesInCD = self.totalNumberOfEntriesInCentralDirectory var directoryIndex = self.offsetToStartOfCentralDirectory var index = 0 - return AnyIterator { + return AnyIterator { [self] in guard index < totalNumberOfEntriesInCD else { return nil } - guard let centralDirStruct: CentralDirectoryStructure = Data.readStruct(from: self.archiveFile, - at: directoryIndex) else { - return nil - } - let offset = UInt64(centralDirStruct.effectiveRelativeOffsetOfLocalHeader) - guard let localFileHeader: LocalFileHeader = Data.readStruct(from: self.archiveFile, - at: offset) else { return nil } - var dataDescriptor: DataDescriptor? - var zip64DataDescriptor: ZIP64DataDescriptor? - if centralDirStruct.usesDataDescriptor { - let additionalSize = UInt64(localFileHeader.fileNameLength) + UInt64(localFileHeader.extraFieldLength) - let isCompressed = centralDirStruct.compressionMethod != CompressionMethod.none.rawValue - let dataSize = isCompressed - ? centralDirStruct.effectiveCompressedSize - : centralDirStruct.effectiveUncompressedSize - let descriptorPosition = offset + UInt64(LocalFileHeader.size) + additionalSize + dataSize - if centralDirStruct.isZIP64 { - zip64DataDescriptor = Data.readStruct(from: self.archiveFile, at: descriptorPosition) - } else { - dataDescriptor = Data.readStruct(from: self.archiveFile, at: descriptorPosition) + do { + guard let centralDirStruct: CentralDirectoryStructure = try dataSource.readStruct(at: directoryIndex) else { + return nil } + let offset = UInt64(centralDirStruct.effectiveRelativeOffsetOfLocalHeader) + guard let localFileHeader: LocalFileHeader = try dataSource.readStruct(at: offset) else { return nil } + var dataDescriptor: DataDescriptor? + var zip64DataDescriptor: ZIP64DataDescriptor? + if centralDirStruct.usesDataDescriptor { + let additionalSize = UInt64(localFileHeader.fileNameLength) + UInt64(localFileHeader.extraFieldLength) + let isCompressed = centralDirStruct.compressionMethod != CompressionMethod.none.rawValue + let dataSize = isCompressed + ? centralDirStruct.effectiveCompressedSize + : centralDirStruct.effectiveUncompressedSize + let descriptorPosition = offset + UInt64(LocalFileHeader.size) + additionalSize + dataSize + if centralDirStruct.isZIP64 { + zip64DataDescriptor = try dataSource.readStruct(at: descriptorPosition) + } else { + dataDescriptor = try dataSource.readStruct(at: descriptorPosition) + } + } + defer { + directoryIndex += UInt64(CentralDirectoryStructure.size) + directoryIndex += UInt64(centralDirStruct.fileNameLength) + directoryIndex += UInt64(centralDirStruct.extraFieldLength) + directoryIndex += UInt64(centralDirStruct.fileCommentLength) + index += 1 + } + return Entry(centralDirectoryStructure: centralDirStruct, localFileHeader: localFileHeader, + dataDescriptor: dataDescriptor, zip64DataDescriptor: zip64DataDescriptor) + } catch { + return nil } - defer { - directoryIndex += UInt64(CentralDirectoryStructure.size) - directoryIndex += UInt64(centralDirStruct.fileNameLength) - directoryIndex += UInt64(centralDirStruct.extraFieldLength) - directoryIndex += UInt64(centralDirStruct.fileCommentLength) - index += 1 - } - return Entry(centralDirectoryStructure: centralDirStruct, localFileHeader: localFileHeader, - dataDescriptor: dataDescriptor, zip64DataDescriptor: zip64DataDescriptor) } } @@ -272,22 +274,20 @@ public final class Archive: Sequence { // MARK: - Helpers - static func scanForEndOfCentralDirectoryRecord(in file: FILEPointer) - -> EndOfCentralDirectoryStructure? { + static func scanForEndOfCentralDirectoryRecord(in dataSource: DataSource) + throws -> EndOfCentralDirectoryStructure? { var eocdOffset: UInt64 = 0 var index = minEndOfCentralDirectoryOffset - fseeko(file, 0, SEEK_END) - let archiveLength = Int64(ftello(file)) + let archiveLength = try dataSource.length() while eocdOffset == 0 && index <= archiveLength { - fseeko(file, off_t(archiveLength - index), SEEK_SET) - var potentialDirectoryEndTag: UInt32 = UInt32() - fread(&potentialDirectoryEndTag, 1, MemoryLayout.size, file) + try dataSource.seek(to: archiveLength - index) + let potentialDirectoryEndTag = try dataSource.readInt() if potentialDirectoryEndTag == UInt32(endOfCentralDirectoryStructSignature) { eocdOffset = UInt64(archiveLength - index) - guard let eocd: EndOfCentralDirectoryRecord = Data.readStruct(from: file, at: eocdOffset) else { + guard let eocd: EndOfCentralDirectoryRecord = try dataSource.readStruct(at: eocdOffset) else { return nil } - let zip64EOCD = scanForZIP64EndOfCentralDirectory(in: file, eocdOffset: eocdOffset) + let zip64EOCD = try scanForZIP64EndOfCentralDirectory(in: dataSource, eocdOffset: eocdOffset) return (eocd, zip64EOCD) } index += 1 @@ -295,8 +295,8 @@ public final class Archive: Sequence { return nil } - private static func scanForZIP64EndOfCentralDirectory(in file: FILEPointer, eocdOffset: UInt64) - -> ZIP64EndOfCentralDirectory? { + private static func scanForZIP64EndOfCentralDirectory(in dataSource: DataSource, eocdOffset: UInt64) + throws -> ZIP64EndOfCentralDirectory? { guard UInt64(ZIP64EndOfCentralDirectoryLocator.size) < eocdOffset else { return nil } @@ -306,8 +306,8 @@ public final class Archive: Sequence { return nil } let recordOffset = locatorOffset - UInt64(ZIP64EndOfCentralDirectoryRecord.size) - guard let locator: ZIP64EndOfCentralDirectoryLocator = Data.readStruct(from: file, at: locatorOffset), - let record: ZIP64EndOfCentralDirectoryRecord = Data.readStruct(from: file, at: recordOffset) else { + guard let locator: ZIP64EndOfCentralDirectoryLocator = try dataSource.readStruct(at: locatorOffset), + let record: ZIP64EndOfCentralDirectoryRecord = try dataSource.readStruct(at: recordOffset) else { return nil } return ZIP64EndOfCentralDirectory(record: record, locator: locator) diff --git a/Sources/ZIPFoundation/DataSource.swift b/Sources/ZIPFoundation/DataSource.swift new file mode 100644 index 00000000..bf62a005 --- /dev/null +++ b/Sources/ZIPFoundation/DataSource.swift @@ -0,0 +1,47 @@ +// +// DataSource.swift +// ZIPFoundation +// +// Created by Mickaël on 12/17/24. +// + +import Foundation + +/// A ``DataSource`` abstract the access to the ZIP data. +protocol DataSource { + + /// Gets the total length of the source, if known. + func length() throws -> UInt64 + + /// Gets the current offset position. + func position() throws -> UInt64 + + /// Moves to the given offset position. + func seek(to position: UInt64) throws + + /// Reads the requested `length` amount of data. + func read(length: Int) throws -> Data + + /// Reads a single int from the data. + func readInt() throws -> UInt32 + + /// Reads a full serializable structure from the data. + func readStruct(at position: UInt64) throws -> T? + + /// Closes the underlying handles. + func close() throws +} + +protocol WritableDataSource: DataSource { + + /// Writes the given `data` at the current position. + func write(_ data: Data) throws + + func writeLargeChunk(_ data: Data, size: UInt64, bufferSize: Int) throws + + /// Truncates the data source to the given `length`. + func truncate(to length: UInt64) throws + + /// Commits any pending writing operations to the data source. + func flush() throws +} diff --git a/Sources/ZIPFoundation/FileDataSource.swift b/Sources/ZIPFoundation/FileDataSource.swift new file mode 100644 index 00000000..5199aff8 --- /dev/null +++ b/Sources/ZIPFoundation/FileDataSource.swift @@ -0,0 +1,151 @@ +// +// FileDataSource.swift +// ZIPFoundation +// +// Created by Mickaël on 12/17/24. +// + +import Foundation + +/// A `DataSource` working with a ZIP file on the file system. +final class FileDataSource : WritableDataSource { + + enum AccessMode: String { + case read = "rb" + case write = "rb+" + } + + let file: FILEPointer + var isClosed: Bool = false + + convenience init(url: URL, mode: AccessMode) throws { + precondition(url.isFileURL) + + let fsRepr = FileManager.default.fileSystemRepresentation(withPath: url.path) + guard let file = fopen(fsRepr, mode.rawValue) else { + throw POSIXError(errno, path: url.path) + } + + self.init(file: file) + + setvbuf(file, nil, _IOFBF, Int(defaultPOSIXBufferSize)) + try checkNoError() + + try seek(to: 0) + } + + init(file: FILEPointer) { + self.file = file + } + + deinit { + try? close() + } + + func length() throws -> UInt64 { + let currentPos = try position() + fseeko(file, 0, SEEK_END) + try checkNoError() + let length = try position() + try seek(to: currentPos) + return length + } + + func position() throws -> UInt64 { + let position = ftello(file) + try checkNoError() + return UInt64(position) + } + + func seek(to position: UInt64) throws { + fseeko(file, off_t(position), SEEK_SET) + try checkNoError() + } + + func read(length: Int) throws -> Data { + let alignment = MemoryLayout.alignment + let bytes = UnsafeMutableRawPointer.allocate(byteCount: length, alignment: alignment) + let bytesRead = fread(bytes, 1, length, file) + try checkNoError() + return Data( + bytesNoCopy: bytes, + count: bytesRead, + deallocator: .custom({ buf, _ in buf.deallocate() }) + ) + } + + func readInt() throws -> UInt32 { + var int: UInt32 = UInt32() + fread(&int, 1, MemoryLayout.size, file) + try checkNoError() + return int + } + + func readStruct(at position: UInt64) throws -> T? where T : DataSerializable { + try seek(to: position) + + return T( + data: try read(length: T.size), + additionalDataProvider: { additionalDataSize -> Data in + try read(length: additionalDataSize) + } + ) + } + + func write(_ data: Data) throws { + try data.withUnsafeBytes { rawBufferPointer in + if let baseAddress = rawBufferPointer.baseAddress, rawBufferPointer.count > 0 { + let pointer = baseAddress.assumingMemoryBound(to: UInt8.self) + fwrite(pointer, 1, data.count, file) + try checkNoError() + } + } + } + + func writeLargeChunk(_ data: Data, size: UInt64, bufferSize: Int) throws { + var sizeWritten: UInt64 = 0 + try data.withUnsafeBytes { rawBufferPointer in + if let baseAddress = rawBufferPointer.baseAddress, rawBufferPointer.count > 0 { + let pointer = baseAddress.assumingMemoryBound(to: UInt8.self) + + while sizeWritten < size { + let remainingSize = size - sizeWritten + let chunkSize = Swift.min(Int(remainingSize), bufferSize) + let curPointer = pointer.advanced(by: Int(sizeWritten)) + fwrite(curPointer, 1, chunkSize, file) + try checkNoError() + sizeWritten += UInt64(chunkSize) + } + } + } + } + + func truncate(to length: UInt64) throws { + ftruncate(fileno(file), off_t(length)) + try checkNoError() + } + + func flush() throws { + fflush(file) + try checkNoError() + } + + func close() throws { + guard !isClosed else { + return + } + fclose(file) + try checkNoError() + isClosed = true + } + + private func checkNoError() throws { + let code = ferror(file) + guard code > 0 else { + return + } + clearerr(file) + + throw POSIXError(POSIXError.Code(rawValue: code) ?? .EPERM) + } +} diff --git a/ZIPFoundation.xcodeproj/project.pbxproj b/ZIPFoundation.xcodeproj/project.pbxproj index 1711299d..f09e49e0 100644 --- a/ZIPFoundation.xcodeproj/project.pbxproj +++ b/ZIPFoundation.xcodeproj/project.pbxproj @@ -33,6 +33,8 @@ BACE20B826F7CE6C003BA312 /* Archive+BackingConfiguration.swift in Sources */ = {isa = PBXBuildFile; fileRef = BACE20B726F7CE6C003BA312 /* Archive+BackingConfiguration.swift */; }; BACE20BA26F7D18A003BA312 /* Archive+Helpers.swift in Sources */ = {isa = PBXBuildFile; fileRef = BACE20B926F7D18A003BA312 /* Archive+Helpers.swift */; }; BACE20BD26F7D545003BA312 /* Entry+Serialization.swift in Sources */ = {isa = PBXBuildFile; fileRef = BACE20BC26F7D545003BA312 /* Entry+Serialization.swift */; }; + CAF35E4D2D1187060042C460 /* DataSource.swift in Sources */ = {isa = PBXBuildFile; fileRef = CAF35E4B2D1187040042C460 /* DataSource.swift */; }; + CAF35E502D11A4290042C460 /* FileDataSource.swift in Sources */ = {isa = PBXBuildFile; fileRef = CAF35E4E2D11A4260042C460 /* FileDataSource.swift */; }; OBJ_33 /* ZIPFoundationDataSerializationTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = OBJ_18 /* ZIPFoundationDataSerializationTests.swift */; }; OBJ_34 /* ZIPFoundationEntryTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = OBJ_19 /* ZIPFoundationEntryTests.swift */; }; OBJ_35 /* ZIPFoundationFileManagerTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = OBJ_20 /* ZIPFoundationFileManagerTests.swift */; }; @@ -100,6 +102,8 @@ BACE20B726F7CE6C003BA312 /* Archive+BackingConfiguration.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = "Archive+BackingConfiguration.swift"; sourceTree = ""; }; BACE20B926F7D18A003BA312 /* Archive+Helpers.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = "Archive+Helpers.swift"; sourceTree = ""; }; BACE20BC26F7D545003BA312 /* Entry+Serialization.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = "Entry+Serialization.swift"; sourceTree = ""; }; + CAF35E4B2D1187040042C460 /* DataSource.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = DataSource.swift; sourceTree = ""; }; + CAF35E4E2D11A4260042C460 /* FileDataSource.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = FileDataSource.swift; sourceTree = ""; }; OBJ_10 /* Archive+Writing.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = "Archive+Writing.swift"; sourceTree = ""; }; OBJ_11 /* Archive.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Archive.swift; sourceTree = ""; }; OBJ_12 /* Data+Compression.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = "Data+Compression.swift"; sourceTree = ""; }; @@ -218,6 +222,8 @@ 95B5A3D32366731B00D4D8FD /* Archive+MemoryFile.swift */, BA643D7B2648131C00018273 /* Archive+Progress.swift */, OBJ_9 /* Archive+Reading.swift */, + CAF35E4B2D1187040042C460 /* DataSource.swift */, + CAF35E4E2D11A4260042C460 /* FileDataSource.swift */, BA0CE5072746B369004D8DD4 /* Archive+ReadingDeprecated.swift */, OBJ_10 /* Archive+Writing.swift */, BA0CE5032746AF0C004D8DD4 /* Archive+WritingDeprecated.swift */, @@ -375,7 +381,9 @@ BA0CE5042746AF0C004D8DD4 /* Archive+WritingDeprecated.swift in Sources */, OBJ_54 /* FileManager+ZIP.swift in Sources */, BA643D7C2648131C00018273 /* Archive+Progress.swift in Sources */, + CAF35E502D11A4290042C460 /* FileDataSource.swift in Sources */, BA0CE5082746B369004D8DD4 /* Archive+ReadingDeprecated.swift in Sources */, + CAF35E4D2D1187060042C460 /* DataSource.swift in Sources */, BACE20BD26F7D545003BA312 /* Entry+Serialization.swift in Sources */, ); runOnlyForDeploymentPostprocessing = 0;