From f9fcb5816ab6def0920b25787341342bc88803e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Zieli=C5=84ski?= Date: Thu, 6 Jun 2024 09:49:16 +0200 Subject: [PATCH] Add ZipStreamWriter to stream-write zip archives on PHP 7.2+ (#103) This pull request introduces enhancements to the `ZipStreamWriter` class, enabling efficient file streaming and compression when writing ZIP archives. Key features include: 1. **Streaming File into ZIP**: Implements a method to stream a file from disk directly into a ZIP archive without loading the entire file into memory. 2. **Handling Central Directory**: Implements a method to write the central directory entries and the end-of-central-directory record, finalizing the ZIP archive. 3. **Deflate Compression**: Supports optional deflate compression for files being added to the ZIP archive. Closes https://github.com/WordPress/blueprints-library/issues/88 ### Major Changes 1. **`writeFileFromPath` Method**: - Reads the source file from disk in two passes: - First pass: Computes CRC32, uncompressed size, and compressed size without buffering the entire file. - Second pass: Streams the file's compressed data directly into the ZIP archive. - Supports deflate compression using `deflate_add`. 2. **`flush_directory_index` Method**: - Collects and writes central directory entries to the ZIP stream. - Writes the end-of-central-directory record to finalize the ZIP structure. ### Example Usage ```php use WordPress\Zip\ZipStreamWriter; // File paths $sourcePathOnDisk = '/path/to/source/file.txt'; $targetPathInZip = 'archive/file.txt'; // Create a file pointer for the output ZIP file $zipFilePointer = fopen('output.zip', 'wb'); // Instantiate the ZipStreamWriter $zipWriter = new ZipStreamWriter($zipFilePointer); // Write a file from the filesystem into the ZIP archive $zipWriter->writeFileFromPath($sourcePathOnDisk, $targetPathInZip, true); // Use 'false' for no compression // Finalize the ZIP file $zipWriter->flush_directory_index(); fclose($zipFilePointer); ``` ### How to Test 1. Clone the repository and checkout the branch with these changes. 2. Ensure PHPUnit is installed. 3. Run the test suite using the command: ```sh vendor/bin/phpunit ``` 4. Verify that all tests pass, indicating the functionality works as expected. ### Notes - This implementation aims to handle large files efficiently by streaming data in chunks. - The `flush_directory_index` method should be called once all file entries have been written to ensure the ZIP archive is finalized correctly. Feel free to provide any feedback or request further modifications as needed. --------- Co-authored-by: Michael Reichardt <30837295+reimic@users.noreply.github.com> --- .github/workflows/phpunit-tests-run.yml | 1 + .github/workflows/phpunit-tests.yml | 2 +- .../Zip/ZipCentralDirectoryEntry.php | 14 +- src/WordPress/Zip/ZipFileEntry.php | 19 +- src/WordPress/Zip/ZipStreamWriter.php | 424 ++++++++++++++++++ tests/unit/zip/ZipStreamWriterTest.php | 110 +++++ 6 files changed, 565 insertions(+), 5 deletions(-) create mode 100644 src/WordPress/Zip/ZipStreamWriter.php create mode 100644 tests/unit/zip/ZipStreamWriterTest.php diff --git a/.github/workflows/phpunit-tests-run.yml b/.github/workflows/phpunit-tests-run.yml index b993d84f..11878a4d 100644 --- a/.github/workflows/phpunit-tests-run.yml +++ b/.github/workflows/phpunit-tests-run.yml @@ -36,6 +36,7 @@ jobs: with: php-version: '${{ inputs.php }}' tools: phpunit-polyfills + extensions: zip - name: Install Composer dependencies uses: ramsey/composer-install@v3 diff --git a/.github/workflows/phpunit-tests.yml b/.github/workflows/phpunit-tests.yml index 2fee4b6b..4e3c3d4e 100644 --- a/.github/workflows/phpunit-tests.yml +++ b/.github/workflows/phpunit-tests.yml @@ -17,7 +17,7 @@ jobs: fail-fast: false matrix: os: [ ubuntu-latest, macos-latest, windows-latest ] - php: [ '7.0', '7.1', '7.2', '7.3', '7.4', '8.0', '8.1', '8.2', '8.3' ] + php: [ '7.2', '7.3', '7.4', '8.0', '8.1', '8.2', '8.3' ] with: os: ${{ matrix.os }} diff --git a/src/WordPress/Zip/ZipCentralDirectoryEntry.php b/src/WordPress/Zip/ZipCentralDirectoryEntry.php index 1747e95c..c14629e9 100644 --- a/src/WordPress/Zip/ZipCentralDirectoryEntry.php +++ b/src/WordPress/Zip/ZipCentralDirectoryEntry.php @@ -4,6 +4,8 @@ class ZipCentralDirectoryEntry { + const HEADER_SIZE = 46; + public $isDirectory; public $firstByteAt; public $versionCreated; @@ -18,7 +20,6 @@ class ZipCentralDirectoryEntry { public $diskNumber; public $internalAttributes; public $externalAttributes; - public $lastByteAt; public $path; public $extra; public $fileComment; @@ -37,7 +38,6 @@ public function __construct( int $internalAttributes, int $externalAttributes, int $firstByteAt, - int $lastByteAt, string $path, string $extra, string $fileComment @@ -45,7 +45,6 @@ public function __construct( $this->fileComment = $fileComment; $this->extra = $extra; $this->path = $path; - $this->lastByteAt = $lastByteAt; $this->externalAttributes = $externalAttributes; $this->internalAttributes = $internalAttributes; $this->diskNumber = $diskNumber; @@ -65,4 +64,13 @@ public function __construct( public function isFileEntry() { return false; } + + public function size() { + return ( + self::HEADER_SIZE + + strlen($this->path) + + strlen($this->extra) + + strlen($this->fileComment) + ); + } } diff --git a/src/WordPress/Zip/ZipFileEntry.php b/src/WordPress/Zip/ZipFileEntry.php index e8b8ff32..fd09c6e8 100644 --- a/src/WordPress/Zip/ZipFileEntry.php +++ b/src/WordPress/Zip/ZipFileEntry.php @@ -3,6 +3,14 @@ namespace WordPress\Zip; class ZipFileEntry { + + /** + * The size of the ZIP file entry header in bytes. + * + * @var int + */ + const HEADER_SIZE = 30; + /** * @var bool */ @@ -58,7 +66,7 @@ public function __construct( int $compressionMethod, int $lastModifiedTime, int $lastModifiedDate, - int $crc, + $crc, int $compressedSize, int $uncompressedSize, string $path, @@ -82,4 +90,13 @@ public function __construct( public function isFileEntry() { return true; } + + public function size() { + return ( + self::HEADER_SIZE + + strlen($this->path) + + strlen($this->extra) + + $this->compressedSize + ); + } } diff --git a/src/WordPress/Zip/ZipStreamWriter.php b/src/WordPress/Zip/ZipStreamWriter.php new file mode 100644 index 00000000..848d8cc1 --- /dev/null +++ b/src/WordPress/Zip/ZipStreamWriter.php @@ -0,0 +1,424 @@ +fp = $output_stream; + } + + /** + * Streams a file from disk and writes it into a ZIP archive. + * + * This method reads the source file from the given path, computes necessary + * metadata (CRC32 checksum, uncompressed size, and compressed size using Deflate), + * and then writes the appropriate file entry header and data into the ZIP archive + * stream. The file data is read and compressed in two passes: first to compute + * the CRC32 and sizes, and second to write the actual compressed data. + * + * @param string $sourcePathOnDisk The filesystem path to the source file to be included in the ZIP archive. + * @param string $targetPathInZip The desired path (including filename) of the file within the ZIP archive. + * @return number The number of bytes written to the ZIP archive stream. + * + * @note This function is designed to handle large files without loading them entirely + * into memory. It reads and compresses the file in chunks, making it suitable for streaming + * large files effectively. + */ + public function writeFileFromPath($targetPathInZip, $sourcePathOnDisk, $should_deflate = true) { + $uncompressedSize = 0; + $compressedSize = 0; + if (!$should_deflate) { + $uncompressedSize = filesize($sourcePathOnDisk); + // Create the ZipFileEntry object + $entry = new ZipFileEntry( + 2, // Version needed to extract (minimum) + 0, // General purpose bit flag + 0, // Compression method (0 = none) + filemtime($sourcePathOnDisk) >> 16, // File last modification time + filemtime($sourcePathOnDisk) & 0xFFFF, // File last modification date + hexdec(hash_file('crc32b', $sourcePathOnDisk)), // CRC-32 + $uncompressedSize, // Uncompressed size + $uncompressedSize, // Compressed size + $targetPathInZip, // File name + '', // Extra field + '' // Not buffering bytes into memory + ); + + // Write the file entry header + static::writeFileEntry($this->fp, $entry); + $fileResource = fopen($sourcePathOnDisk, 'rb'); + stream_copy_to_stream($fileResource, $this->fp, $uncompressedSize); + fclose($fileResource); + $this->recordFileForCentralDirectory($entry); + $this->bytes_written += $entry->size(); + return $entry->size(); + } + + // Open the source file for reading + $fileResource = fopen($sourcePathOnDisk, 'rb'); + if (!$fileResource) { + error_log("Could not open file: $sourcePathOnDisk"); + return -1; + } + + // Initialize variables for first pass + $hashContext = hash_init('crc32b'); + if( false === $hashContext ) { + error_log("Failed to initialize hash context"); + fclose($fileResource); + return -1; + } + + $deflateContext = deflate_init(ZLIB_ENCODING_RAW); + if(false === $deflateContext) { + error_log("Failed to initialize deflate context"); + fclose($fileResource); + return -1; + } + + // First pass: Calculate the CRC32, uncompressed size, and compressed size + while (!feof($fileResource)) { + $buffer = fread($fileResource, self::BUFFER_SIZE); + if( false === $buffer ) { + error_log("Failed to read file"); + fclose($fileResource); + return -1; + } + $uncompressedSize += strlen($buffer); + hash_update($hashContext, $buffer); + $compressedSize += strlen(deflate_add($deflateContext, $buffer, ZLIB_SYNC_FLUSH)); + } + + $compressedSize += strlen(deflate_add($deflateContext, '', ZLIB_FINISH)); + $crc = hexdec(hash_final($hashContext)); + + // Create the ZipFileEntry object + $entry = new ZipFileEntry( + 2, // Version needed to extract (minimum) + 0, // General purpose bit flag + 8, // Compression method (8 = deflate) + filemtime($sourcePathOnDisk) >> 16, // File last modification time + filemtime($sourcePathOnDisk) & 0xFFFF, // File last modification date + $crc, // CRC-32 + $compressedSize, // Compressed size + $uncompressedSize, // Uncompressed size + $targetPathInZip, // File name + '', // Extra field + '' // Not buffering bytes into memory + ); + + // Write the file entry header + static::writeFileEntry($this->fp, $entry); + + // Second pass: Stream write the compressed data + if(false === rewind($fileResource)) { + error_log("Failed to rewind file"); + fclose($fileResource); + return -1; + } + + $deflateContext = deflate_init(ZLIB_ENCODING_RAW); + if(false === $deflateContext) { + error_log("Failed to initialize deflate context"); + fclose($fileResource); + return -1; + } + while (!feof($fileResource)) { + $buffer = fread($fileResource, self::BUFFER_SIZE); + if(false === $buffer) { + error_log("Failed to read file"); + fclose($fileResource); + return -1; + } + + $compressedData = deflate_add($deflateContext, $buffer, ZLIB_SYNC_FLUSH); + if( false === $compressedData ) { + error_log("Failed to compress data"); + fclose($fileResource); + return -1; + } + + if(false === fwrite($this->fp, $compressedData)) { + error_log("Failed to write compressed data"); + fclose($fileResource); + return -1; + } + } + + $compressedData = deflate_add($deflateContext, '', ZLIB_FINISH); + if( false === $compressedData ) { + error_log("Failed to compress data"); + fclose($fileResource); + return -1; + } + + if(false === fwrite($this->fp, $compressedData)) { + error_log("Failed to write compressed data"); + fclose($fileResource); + return -1; + } + + // Close the source file + if(false === fclose($fileResource)) { + error_log("Failed to close file"); + return -1; + } + + $this->recordFileForCentralDirectory($entry); + $this->bytes_written += $entry->size(); + return true; + } + + public function writeFileFromString($targetPathInZip, $data, $should_deflate = true) + { + if ($should_deflate) { + $compressed_data = gzdeflate($data); + } else { + $compressed_data = $data; + } + + // Create the ZipFileEntry object + $entry = new ZipFileEntry( + 2, // Version needed to extract (minimum) + 0, // General purpose bit flag + $should_deflate ? 8 : 0, // Compression method (8 = deflate, 0 = none) + time() >> 16, // File last modification time + time() & 0xFFFF, // File last modification date + hexdec(hash('crc32b', $data)), // CRC-32 + strlen($compressed_data), // Uncompressed size + strlen($data), // Uncompressed size + $targetPathInZip, // File name + '', // Extra field + $compressed_data // Buffering bytes into memory + ); + + // Write the file entry header + static::writeFileEntry($this->fp, $entry); + $this->recordFileForCentralDirectory($entry); + $this->bytes_written += $entry->size(); + return $entry->size(); + } + + private function recordFileForCentralDirectory(ZipFileEntry $file_entry) { + $this->centralDirectory[] = new ZipCentralDirectoryEntry( + 2, // Version made by + 2, // Version needed to extract + $file_entry->generalPurpose, // General purpose bit flag + $file_entry->compressionMethod, // Compression method (none) + $file_entry->lastModifiedTime, // File last modification time + $file_entry->lastModifiedDate, // File last modification date + $file_entry->crc, // CRC-32 + $file_entry->compressedSize, // Compressed size + $file_entry->uncompressedSize, // Uncompressed size + 0, // Disk number where file starts + 0, // Internal file attributes + 0, // External file attributes + $this->bytes_written, // First byte at + $file_entry->path, // Path + '', // Extra field + '' // File comment + ); + } + + public function finish() + { + $this->flushCentralDirectory(); + } + + /** + * Writes the central directory and its end record to the ZIP archive stream. + * + * This method writes all the central directory entries stored and then writes + * the end of central directory record, finalizing the ZIP archive structure. + */ + private function flushCentralDirectory() { + $fp = $this->fp; + $centralDirectoryOffset = $this->bytes_written; + + // Write all central directory entries + foreach ($this->centralDirectory as $entry) { + static::writeCentralDirectoryEntry($fp, $entry); + $this->bytes_written += $entry->size(); + } + + // Create and write the end of central directory record + $endEntry = new ZipEndCentralDirectoryEntry( + 0, // $diskNumber + 0, // $centralDirectoryStartDisk + count($this->centralDirectory), // $numberCentralDirectoryRecordsOnThisDisk + count($this->centralDirectory), // $numberCentralDirectoryRecords + $this->bytes_written - $centralDirectoryOffset, // $centralDirectorySize + $centralDirectoryOffset, // $centralDirectoryOffset + '' // $comment + ); + + static::writeEndCentralDirectoryEntry($fp, $endEntry); + } + + + /** + * Writes the next zip entry from a stream of zip file bytes. + * + * @param resource $fp A stream of zip file bytes. + */ + public static function writeEntry( $fp, $entry ) { + if ( $entry instanceof ZipFileEntry ) { + return static::writeFileEntry( $fp, $entry ); + } else if ( $entry instanceof ZipCentralDirectoryEntry ) { + return static::writeCentralDirectoryEntry( $fp, $entry ); + } elseif ( $entry instanceof ZipEndCentralDirectoryEntry ) { + return static::writeEndCentralDirectoryEntry( $fp, $entry ); + } + + return null; + } + + /** + * Writes a file entry to a zip file. + * The API consumer may leave $entry->bytes empty to write the bytes + * to the stream separately. + * + * The file entry is structured as follows: + * + * ``` + * Offset Bytes Description + * 0 4 Local file header signature = 0x04034b50 (PK♥♦ or "PK\3\4") + * 4 2 Version needed to extract (minimum) + * 6 2 General purpose bit flag + * 8 2 Compression method; e.g. none = 0, DEFLATE = 8 (or "\0x08\0x00") + * 10 2 File last modification time + * 12 2 File last modification date + * 14 4 CRC-32 of uncompressed data + * 18 4 Compressed size (or 0xffffffff for ZIP64) + * 22 4 Uncompressed size (or 0xffffffff for ZIP64) + * 26 2 File name length (n) + * 28 2 Extra field length (m) + * 30 n File name + * 30+n m Extra field + * ``` + * + * @param resource $stream + */ + protected static function writeFileEntry( $stream, ZipFileEntry $entry ) { + $data = pack( + 'VvvvvvVVVvv', + self::SIGNATURE_FILE, // Local file header signature + $entry->version, // Version needed to extract + $entry->generalPurpose, // General purpose bit flag + $entry->compressionMethod, // Compression method + $entry->lastModifiedTime, // File last modification time + $entry->lastModifiedDate, // File last modification date + $entry->crc, // CRC-32 + $entry->compressedSize, // Compressed size + $entry->uncompressedSize, // Uncompressed size + strlen($entry->path), // File name length + strlen($entry->extra) // Extra field length + ) . $entry->path . $entry->extra . $entry->bytes; + + return fwrite($stream, $data); + } + + /** + * Writes a central directory entry to a zip file. + * + * The central directory entry is structured as follows: + * + * ``` + * Offset Bytes Description + * 0 4 Central directory file header signature = 0x02014b50 + * 4 2 Version made by + * 6 2 Version needed to extract (minimum) + * 8 2 General purpose bit flag + * 10 2 Compression method + * 12 2 File last modification time + * 14 2 File last modification date + * 16 4 CRC-32 of uncompressed data + * 20 4 Compressed size (or 0xffffffff for ZIP64) + * 24 4 Uncompressed size (or 0xffffffff for ZIP64) + * 28 2 File name length (n) + * 30 2 Extra field length (m) + * 32 2 File comment length (k) + * 34 2 Disk number where file starts (or 0xffff for ZIP64) + * 36 2 Internal file attributes + * 38 4 External file attributes + * 42 4 Relative offset of local file header (or 0xffffffff for ZIP64). This is the number of bytes between the start of the first disk on which the file occurs, and the start of the local file header. This allows software reading the central directory to locate the position of the file inside the ZIP file. + * 46 n File name + * 46+n m Extra field + * 46+n+m k File comment + * ``` + * + * @param resource stream + */ + protected static function writeCentralDirectoryEntry( $stream, ZipCentralDirectoryEntry $entry ) { + $data = pack( + 'VvvvvvvVVVvvvvvVV', + self::SIGNATURE_CENTRAL_DIRECTORY, // Central directory file header signature + $entry->versionCreated, // Version made by + $entry->versionNeeded, // Version needed to extract + $entry->generalPurpose, // General purpose bit flag + $entry->compressionMethod, // Compression method + $entry->lastModifiedTime, // File last modification time + $entry->lastModifiedDate, // File last modification date + $entry->crc, // CRC-32 + $entry->compressedSize, // Compressed size + $entry->uncompressedSize, // Uncompressed size + strlen($entry->path), // File name length + strlen($entry->extra), // Extra field length + strlen($entry->fileComment), // File comment length + $entry->diskNumber, // Disk number where file starts + $entry->internalAttributes, // Internal file attributes + $entry->externalAttributes, // External file attributes + $entry->firstByteAt // Relative offset of local file header + ); + + return fwrite($stream, $data . $entry->path . $entry->extra . $entry->fileComment); + } + + /** + * Writes the end of central directory entry to a zip file. + * + * The end of central directory entry is structured as follows: + * + * ``` + * Offset Bytes Description[33] + * 0 4 End of central directory signature = 0x06054b50 + * 4 2 Number of this disk (or 0xffff for ZIP64) + * 6 2 Disk where central directory starts (or 0xffff for ZIP64) + * 8 2 Number of central directory records on this disk (or 0xffff for ZIP64) + * 10 2 Total number of central directory records (or 0xffff for ZIP64) + * 12 4 Size of central directory (bytes) (or 0xffffffff for ZIP64) + * 16 4 Offset of start of central directory, relative to start of archive (or 0xffffffff for ZIP64) + * 20 2 Comment length (n) + * 22 n Comment + * ``` + * + * @param resource $stream + */ + protected static function writeEndCentralDirectoryEntry( $stream, ZipEndCentralDirectoryEntry $entry ) { + $data = pack( + 'VvvvvVVv', + self::SIGNATURE_CENTRAL_DIRECTORY_END, // End of central directory signature + $entry->diskNumber, // Number of this disk + $entry->centralDirectoryStartDisk, // Disk where central directory starts + $entry->numberCentralDirectoryRecordsOnThisDisk, // Number of central directory records on this disk + $entry->numberCentralDirectoryRecords, // Total number of central directory records + $entry->centralDirectorySize, // Size of central directory (bytes) + $entry->centralDirectoryOffset, // Offset of start of central directory + strlen($entry->comment) // Comment length + ); + + return fwrite($stream, $data . $entry->comment); + } +} diff --git a/tests/unit/zip/ZipStreamWriterTest.php b/tests/unit/zip/ZipStreamWriterTest.php new file mode 100644 index 00000000..bb73f73c --- /dev/null +++ b/tests/unit/zip/ZipStreamWriterTest.php @@ -0,0 +1,110 @@ +tempDir = sys_get_temp_dir() . DIRECTORY_SEPARATOR . 'zip_test'; + if (!file_exists($this->tempDir)) { + mkdir($this->tempDir); + } + $this->tempSourceFile = tempnam($this->tempDir, 'testfile'); + file_put_contents($this->tempSourceFile, 'Hello'); // Create a file with some content + } + + /** + * @after + */ + public function after() { + // Cleanup temporary files and directory + if (file_exists($this->tempSourceFile)) { + unlink($this->tempSourceFile); + } + if (file_exists($this->tempZipPath)) { + unlink($this->tempZipPath); + } + if (is_dir($this->tempDir)) { + rmdir($this->tempDir); + } + } + + /** + * @dataProvider shouldDeflateProvider + */ + public function testWriteFileFromPath($should_deflate) { + $this->tempZipPath = tempnam($this->tempDir, 'testzip'); + $fp = fopen($this->tempZipPath, 'wb'); + + $zipWriter = new ZipStreamWriter($fp); + $sourcePathOnDisk = $this->tempSourceFile; + $targetPathInZip = 'file'; + + // Test the function + $zipWriter->writeFileFromPath($targetPathInZip, $sourcePathOnDisk, $should_deflate); + $zipWriter->finish(); + + fclose($fp); + + // Check that the ZIP file was created and is not empty + $this->assertFileExists($this->tempZipPath); + $this->assertGreaterThan(0, filesize($this->tempZipPath)); + + // Open the ZIP file and verify its contents + $zip = new \ZipArchive(); + $zip->open($this->tempZipPath); + $this->assertTrue($zip->locateName($targetPathInZip) !== false, "The file was not found in the ZIP"); + $fileContent = $zip->getFromName($targetPathInZip); + $this->assertEquals(file_get_contents($sourcePathOnDisk), $fileContent, "The file content does not match"); + $zip->close(); + } + + /** + * @dataProvider shouldDeflateProvider + */ + public function testWriteFileFromString($should_deflate) + { + $this->tempZipPath = tempnam($this->tempDir, 'testzip'); + $fp = fopen($this->tempZipPath, 'wb'); + + $zipWriter = new ZipStreamWriter($fp); + $sourceContent = 'Hello'; + $targetPathInZip = 'file'; + + // Test the function + $zipWriter->writeFileFromString($targetPathInZip, $sourceContent, $should_deflate); + $zipWriter->finish(); + + fclose($fp); + + // Check that the ZIP file was created and is not empty + $this->assertFileExists($this->tempZipPath); + $this->assertGreaterThan(0, filesize($this->tempZipPath)); + + // Open the ZIP file and verify its contents + $zip = new \ZipArchive(); + $zip->open($this->tempZipPath); + $this->assertTrue($zip->locateName($targetPathInZip) !== false, "The file was not found in the ZIP"); + $fileContent = $zip->getFromName($targetPathInZip); + $this->assertEquals($sourceContent, $fileContent, "The file content does not match"); + $zip->close(); + } + + static public function shouldDeflateProvider() { + return [ + [true], + [false], + ]; + } + +} +