From 24a7afcdbb680a7e9f387667e646853825049c76 Mon Sep 17 00:00:00 2001 From: Aina Sitraka <35221835+aynsix@users.noreply.github.com> Date: Thu, 28 Nov 2024 14:29:58 +0300 Subject: [PATCH] PS-707 Databox basket - Multi Part upload files to Expose (#489) --- .../Phrasea/Expose/ExposeClient.php | 121 +++++++++++++++--- .../MultipartUploadCancelAction.php | 29 +++++ .../storage-bundle/Entity/MultipartUpload.php | 23 +++- .../Resources/config/services.yaml | 4 + .../storage-bundle/Upload/UploadManager.php | 12 +- 5 files changed, 157 insertions(+), 32 deletions(-) create mode 100644 lib/php/storage-bundle/Controller/MultipartUploadCancelAction.php diff --git a/databox/api/src/Integration/Phrasea/Expose/ExposeClient.php b/databox/api/src/Integration/Phrasea/Expose/ExposeClient.php index 3e5787b44..eb67ff6ba 100644 --- a/databox/api/src/Integration/Phrasea/Expose/ExposeClient.php +++ b/databox/api/src/Integration/Phrasea/Expose/ExposeClient.php @@ -2,16 +2,17 @@ namespace App\Integration\Phrasea\Expose; -use App\Asset\Attribute\AssetTitleResolver; -use App\Asset\Attribute\AttributesResolver; use App\Asset\FileFetcher; -use App\Attribute\AttributeInterface; use App\Entity\Core\Asset; use App\Entity\Core\Attribute; -use App\Entity\Integration\IntegrationToken; +use App\Storage\RenditionManager; +use App\Attribute\AttributeInterface; use App\Integration\IntegrationConfig; +use App\Asset\Attribute\AssetTitleResolver; +use App\Asset\Attribute\AttributesResolver; +use App\Entity\Integration\IntegrationToken; +use Alchemy\StorageBundle\Upload\UploadManager; use App\Integration\Phrasea\PhraseaClientFactory; -use App\Storage\RenditionManager; use Symfony\Contracts\HttpClient\HttpClientInterface; final readonly class ExposeClient @@ -23,6 +24,7 @@ public function __construct( private AssetTitleResolver $assetTitleResolver, private AttributesResolver $attributesResolver, private RenditionManager $renditionManager, + private UploadManager $uploadManager ) { } @@ -127,17 +129,78 @@ public function postAsset(IntegrationConfig $config, IntegrationToken $integrati $source = $asset->getSource(); $fetchedFilePath = $this->fileFetcher->getFile($source); + $fileSize = filesize($fetchedFilePath); + + // @see https://docs.aws.amazon.com/AmazonS3/latest/userguide/qfacts.html + $partSize = 100 * 1024 * 1024; // 100Mb + try { + $uploadsData = [ + 'filename' => $source->getOriginalName(), + 'type' => $source->getType(), + 'size' => (int)$source->getSize(), + ]; + + $resUploads = $this->create($config, $integrationToken) + ->request('POST', '/uploads', [ + 'json' => $uploadsData, + ]) + ->toArray() + ; + + $mUploadId = $resUploads['id']; + + $parts['Parts'] = []; + + try { + $fd = fopen($fetchedFilePath, 'r'); + $alreadyUploaded = 0; + + $partNumber = 1; + + $retryCount = 3; + + while ( ($fileSize - $alreadyUploaded) > 0) { + $resUploadPart = $this->create($config, $integrationToken) + ->request('POST', '/uploads/'. $mUploadId .'/part', [ + 'json' => ['part' => $partNumber], + ]) + ->toArray() + ; + + if (($fileSize - $alreadyUploaded) < $partSize) { + $partSize = $fileSize - $alreadyUploaded; + } + + $headerPutPart = $this->putPart($resUploadPart['url'], $fd, $partSize, $retryCount); + + $alreadyUploaded += $partSize; + + $parts['Parts'][$partNumber] = [ + 'PartNumber' => $partNumber, + 'ETag' => current($headerPutPart['etag']), + ]; + + $partNumber++; + } + + fclose($fd); + } catch (\Throwable $e) { + $this->create($config, $integrationToken) + ->request('DELETE', '/uploads/'. $mUploadId); + + throw $e; + } + $data = array_merge([ 'publication_id' => $publicationId, 'asset_id' => $asset->getId(), 'title' => $resolvedTitle, 'description' => $description, 'translations' => $translations, - 'upload' => [ - 'type' => $source->getType(), - 'size' => $source->getSize(), - 'name' => $source->getOriginalName(), + 'multipart' => [ + 'uploadId' => $mUploadId, + 'parts' => $parts['Parts'], ], ], $extraData); @@ -147,15 +210,6 @@ public function postAsset(IntegrationConfig $config, IntegrationToken $integrati ]) ->toArray() ; - $exposeAssetId = $pubAsset['id']; - - $this->uploadClient->request('PUT', $pubAsset['uploadURL'], [ - 'headers' => [ - 'Content-Type' => $source->getType(), - 'Content-Length' => filesize($fetchedFilePath), - ], - 'body' => fopen($fetchedFilePath, 'r'), - ]); foreach ([ 'preview', @@ -168,7 +222,7 @@ public function postAsset(IntegrationConfig $config, IntegrationToken $integrati $subDefResponse = $this->create($config, $integrationToken) ->request('POST', '/sub-definitions', [ 'json' => [ - 'asset_id' => $exposeAssetId, + 'asset_id' => $pubAsset['id'], 'name' => $renditionName, 'use_as_preview' => 'preview' === $renditionName, 'use_as_thumbnail' => 'thumbnail' === $renditionName, @@ -207,4 +261,33 @@ public function deleteAsset(IntegrationConfig $config, IntegrationToken $integra ->request('DELETE', '/assets/'.$assetId) ; } + + private function putPart(string $url, mixed &$handleFile, int $partSize, int $retryCount): array + { + if ($retryCount > 0) { + $retryCount--; + try { + $maxToRead = $partSize; + $alreadyRead = 0; + return $this->uploadClient->request('PUT', $url, [ + 'headers' => [ + 'Content-Length' => $partSize, + ], + 'body' => function ($size) use (&$handleFile, $maxToRead, &$alreadyRead): mixed { + $toRead = min($size, $maxToRead - $alreadyRead); + $alreadyRead += $toRead; + + return fread($handleFile, $toRead); + }, + ])->getHeaders(); + } catch (\Throwable $e) { + if ($retryCount == 0) { + throw $e; + } + return $this->putPart($url, $handleFile, $partSize, $retryCount); + } + } else { + return []; + } + } } diff --git a/lib/php/storage-bundle/Controller/MultipartUploadCancelAction.php b/lib/php/storage-bundle/Controller/MultipartUploadCancelAction.php new file mode 100644 index 000000000..f8749bd12 --- /dev/null +++ b/lib/php/storage-bundle/Controller/MultipartUploadCancelAction.php @@ -0,0 +1,29 @@ +uploadManager->cancelMultipartUpload($data->getPath(), $data->getUploadId()); + } catch (\Throwable $e) { + // S3 storage will clean up its uncomplete uploads automatically + } + + $this->em->remove($data); + } +} diff --git a/lib/php/storage-bundle/Entity/MultipartUpload.php b/lib/php/storage-bundle/Entity/MultipartUpload.php index 8933a32ff..fae5288d4 100644 --- a/lib/php/storage-bundle/Entity/MultipartUpload.php +++ b/lib/php/storage-bundle/Entity/MultipartUpload.php @@ -4,18 +4,20 @@ namespace Alchemy\StorageBundle\Entity; -use Alchemy\StorageBundle\Controller\MultipartUploadPartAction; -use ApiPlatform\Metadata\ApiProperty; -use ApiPlatform\Metadata\ApiResource; -use ApiPlatform\Metadata\Delete; +use Ramsey\Uuid\Uuid; use ApiPlatform\Metadata\Get; -use ApiPlatform\Metadata\GetCollection; use ApiPlatform\Metadata\Post; use Doctrine\DBAL\Types\Types; +use ApiPlatform\Metadata\Delete; use Doctrine\ORM\Mapping as ORM; use Ramsey\Uuid\Doctrine\UuidType; -use Ramsey\Uuid\Uuid; +use ApiPlatform\Metadata\ApiProperty; +use ApiPlatform\Metadata\ApiResource; +use ApiPlatform\Metadata\GetCollection; use Symfony\Component\Serializer\Annotation\Groups; +use Alchemy\StorageBundle\Controller\MultipartUploadPartAction; +use Alchemy\StorageBundle\Controller\MultipartUploadCancelAction; +use Alchemy\StorageBundle\Controller\MultipartUploadCompleteAction; #[ApiResource( shortName: 'Upload', @@ -67,7 +69,14 @@ ]], ], ]), - new Delete(openapiContext: ['summary' => 'Cancel an upload', 'description' => 'Cancel an upload.']), + new Delete( + controller: MultipartUploadCancelAction::class, + openapiContext: [ + 'summary' => 'Cancel an upload', + 'description' => 'Cancel an upload.' + ] + ), + new GetCollection(security: 'is_granted(\'ROLE_ADMIN\')'), ], normalizationContext: ['groups' => ['upload:read']], diff --git a/lib/php/storage-bundle/Resources/config/services.yaml b/lib/php/storage-bundle/Resources/config/services.yaml index 2e1adf8c3..d8e90a187 100644 --- a/lib/php/storage-bundle/Resources/config/services.yaml +++ b/lib/php/storage-bundle/Resources/config/services.yaml @@ -42,6 +42,10 @@ services: tags: - { name: controller.service_arguments } + Alchemy\StorageBundle\Controller\MultipartUploadCancelAction: + tags: + - { name: controller.service_arguments } + Alchemy\StorageBundle\Doctrine\MultipartUploadListener: ~ Alchemy\StorageBundle\Storage\PathGenerator: ~ diff --git a/lib/php/storage-bundle/Upload/UploadManager.php b/lib/php/storage-bundle/Upload/UploadManager.php index 7d66e74f2..23ad2361e 100644 --- a/lib/php/storage-bundle/Upload/UploadManager.php +++ b/lib/php/storage-bundle/Upload/UploadManager.php @@ -4,14 +4,14 @@ namespace Alchemy\StorageBundle\Upload; -use Alchemy\StorageBundle\Entity\MultipartUpload; -use Aws\Api\DateTimeResult; use Aws\S3\S3Client; -use Doctrine\ORM\EntityManagerInterface; +use Aws\Api\DateTimeResult; use Psr\Log\LoggerInterface; +use Doctrine\ORM\EntityManagerInterface; use Symfony\Component\HttpFoundation\Request; -use Symfony\Component\HttpKernel\Exception\BadRequestHttpException; +use Alchemy\StorageBundle\Entity\MultipartUpload; use Symfony\Component\HttpKernel\Exception\NotFoundHttpException; +use Symfony\Component\HttpKernel\Exception\BadRequestHttpException; final readonly class UploadManager { @@ -62,7 +62,7 @@ public function getSignedUrl(string $uploadId, string $path, int $partNumber): s return (string) $request->getUri(); } - public function markComplete(string $uploadId, string $filename, array $parts): void + public function markComplete(string $uploadId, string $filename, array $parts) { $params = [ 'Bucket' => $this->uploadBucket, @@ -73,7 +73,7 @@ public function markComplete(string $uploadId, string $filename, array $parts): 'UploadId' => $uploadId, ]; - $this->client->completeMultipartUpload($params); + return $this->client->completeMultipartUpload($params); } public function createPutObjectSignedURL(string $path, string $contentType): string