From dfd58f22f8431806b38d8d4061579495e452f239 Mon Sep 17 00:00:00 2001 From: dantb Date: Thu, 21 Sep 2023 10:55:43 +0200 Subject: [PATCH] Remove Tar archiving (#4286) Remove Tar archiving --- .../plugins/archive/ArchiveDownload.scala | 91 ++++++++---------- .../delta/plugins/archive/Archives.scala | 3 +- .../plugins/archive/model/ArchiveFormat.scala | 94 ------------------- .../archive/model/ArchiveRejection.scala | 6 -- .../delta/plugins/archive/model/Zip.scala | 29 ++++++ .../archive/routes/ArchiveRoutes.scala | 46 ++++----- .../plugins/archive/ArchiveDownloadSpec.scala | 68 +++++--------- .../plugins/archive/ArchiveRoutesSpec.scala | 32 +------ .../delta/plugins/archive/ArchivesSpec.scala | 10 +- .../plugins/archive/TarDownloadSpec.scala | 15 --- .../plugins/archive/ZipDownloadSpec.scala | 12 --- .../testkit/archive/ArchiveHelpers.scala | 36 ++----- .../paradox/docs/delta/api/archives-api.md | 19 +--- .../docs/delta/api/assets/archives/fetch.sh | 4 +- .../delta/api/assets/archives/fetched.json | 2 +- docs/src/main/paradox/docs/delta/api/index.md | 2 +- .../docs/releases/v1.9-release-notes.md | 6 ++ .../nexus/tests/kg/ArchiveSpec.scala | 24 ----- 18 files changed, 142 insertions(+), 357 deletions(-) delete mode 100644 delta/plugins/archive/src/main/scala/ch/epfl/bluebrain/nexus/delta/plugins/archive/model/ArchiveFormat.scala create mode 100644 delta/plugins/archive/src/main/scala/ch/epfl/bluebrain/nexus/delta/plugins/archive/model/Zip.scala delete mode 100644 delta/plugins/archive/src/test/scala/ch/epfl/bluebrain/nexus/delta/plugins/archive/TarDownloadSpec.scala delete mode 100644 delta/plugins/archive/src/test/scala/ch/epfl/bluebrain/nexus/delta/plugins/archive/ZipDownloadSpec.scala diff --git a/delta/plugins/archive/src/main/scala/ch/epfl/bluebrain/nexus/delta/plugins/archive/ArchiveDownload.scala b/delta/plugins/archive/src/main/scala/ch/epfl/bluebrain/nexus/delta/plugins/archive/ArchiveDownload.scala index 8ea91c610d..43a7115c8c 100644 --- a/delta/plugins/archive/src/main/scala/ch/epfl/bluebrain/nexus/delta/plugins/archive/ArchiveDownload.scala +++ b/delta/plugins/archive/src/main/scala/ch/epfl/bluebrain/nexus/delta/plugins/archive/ArchiveDownload.scala @@ -35,6 +35,7 @@ import monix.execution.Scheduler import java.nio.ByteBuffer import java.nio.charset.StandardCharsets +import akka.stream.alpakka.file.ArchiveMetadata /** * Archive download functionality. @@ -55,10 +56,9 @@ trait ArchiveDownload { * @param caller * the caller to be used for checking for access */ - def apply[M]( + def apply( value: ArchiveValue, project: ProjectRef, - format: ArchiveFormat[M], ignoreNotFound: Boolean )(implicit caller: Caller, scheduler: Scheduler): IO[ArchiveRejection, AkkaSource] @@ -96,18 +96,17 @@ object ArchiveDownload { private val printer = Printer.spaces2.copy(dropNullValues = true) private val sourcePrinter = Printer.spaces2.copy(dropNullValues = false) - override def apply[M]( + override def apply( value: ArchiveValue, project: ProjectRef, - format: ArchiveFormat[M], ignoreNotFound: Boolean )(implicit caller: Caller, scheduler: Scheduler): IO[ArchiveRejection, AkkaSource] = { for { references <- value.resources.toList.traverse(toFullReference) _ <- checkResourcePermissions(references, project) - contentStream <- resolveReferencesAsStream(references, project, ignoreNotFound, format) + contentStream <- resolveReferencesAsStream(references, project, ignoreNotFound) } yield { - Source.fromGraph(StreamConverter(contentStream)).via(format.writeFlow) + Source.fromGraph(StreamConverter(contentStream)).via(Zip.writeFlow) } } @@ -124,34 +123,29 @@ object ArchiveDownload { } } - private def resolveReferencesAsStream[M]( + private def resolveReferencesAsStream( references: List[FullArchiveReference], project: ProjectRef, - ignoreNotFound: Boolean, - format: ArchiveFormat[M] - )(implicit caller: Caller): IO[ArchiveRejection, Stream[Task, (M, AkkaSource)]] = { + ignoreNotFound: Boolean + )(implicit caller: Caller): IO[ArchiveRejection, Stream[Task, (ArchiveMetadata, AkkaSource)]] = { references .traverseFilter { - case ref: FileReference => fileEntry(ref, project, format, ignoreNotFound) - case ref: ResourceReference => resourceEntry(ref, project, format, ignoreNotFound) + case ref: FileReference => fileEntry(ref, project, ignoreNotFound) + case ref: ResourceReference => resourceEntry(ref, project, ignoreNotFound) } - .map(sortWith(format)) + .map(sortWith) .map(asStream) } - private def sortWith[M]( - format: ArchiveFormat[M] - )(list: List[(M, Task[AkkaSource])]): List[(M, Task[AkkaSource])] = { - list.sortBy { case (entry, _) => - entry - }(format.ordering) - } + private def sortWith(list: List[(ArchiveMetadata, Task[AkkaSource])]): List[(ArchiveMetadata, Task[AkkaSource])] = + list.sortBy { case (entry, _) => entry }(Zip.ordering) - private def asStream[M](list: List[(M, Task[AkkaSource])]) = { - Stream.iterable(list).evalMap[Task, (M, AkkaSource)] { case (metadata, source) => + private def asStream( + list: List[(ArchiveMetadata, Task[AkkaSource])] + ): Stream[Task, (ArchiveMetadata, AkkaSource)] = + Stream.iterable(list).evalMap { case (metadata, source) => source.map(metadata -> _) } - } private def checkResourcePermissions( refs: List[FullArchiveReference], @@ -166,14 +160,13 @@ object ArchiveDownload { ) .void - private def fileEntry[Metadata]( + private def fileEntry( ref: FileReference, project: ProjectRef, - format: ArchiveFormat[Metadata], ignoreNotFound: Boolean )(implicit caller: Caller - ): IO[ArchiveRejection, Option[(Metadata, Task[AkkaSource])]] = { + ): IO[ArchiveRejection, Option[(ArchiveMetadata, Task[AkkaSource])]] = { val refProject = ref.project.getOrElse(project) // the required permissions are checked for each file content fetch val entry = fetchFileContent(ref.ref, refProject, caller) @@ -184,21 +177,19 @@ object ArchiveDownload { case FileRejection.AuthorizationFailed(addr, perm) => AuthorizationFailed(addr, perm) case other => WrappedFileRejection(other) } - .flatMap { case FileResponse(fileMetadata, content) => - IO.fromEither( - pathOf(ref, project, format, fileMetadata.filename).map { path => - val archiveMetadata = format.metadata(path, fileMetadata.bytes) - val contentTask: Task[AkkaSource] = content - .tapError(response => - UIO.delay( - logger - .error(s"Error streaming file '${fileMetadata.filename}' for archive: ${response.value.value}") - ) - ) - .mapError(response => ArchiveDownloadError(fileMetadata.filename, response)) - Some((archiveMetadata, contentTask)) - } - ) + .map { case FileResponse(fileMetadata, content) => + val path = pathOf(ref, project, fileMetadata.filename) + val archiveMetadata = Zip.metadata(path) + val contentTask: Task[AkkaSource] = content + .tapError(response => + UIO.delay( + logger + .error(s"Error streaming file '${fileMetadata.filename}' for archive: ${response.value.value}") + ) + ) + .mapError(response => ArchiveDownloadError(fileMetadata.filename, response)) + Some((archiveMetadata, contentTask)) + } if (ignoreNotFound) entry.onErrorRecover { case _: ResourceNotFound => None } else entry @@ -207,27 +198,21 @@ object ArchiveDownload { private def pathOf( ref: FileReference, project: ProjectRef, - format: ArchiveFormat[_], filename: String - ): Either[FilenameTooLong, String] = - ref.path.map { p => Right(p.value.toString) }.getOrElse { + ): String = + ref.path.map(_.value.toString).getOrElse { val p = ref.project.getOrElse(project) - Either.cond( - format != ArchiveFormat.Tar || filename.length < 100, - s"$p/file/$filename", - FilenameTooLong(ref.ref.original, p, filename) - ) + s"$p/file/$filename" } - private def resourceEntry[Metadata]( + private def resourceEntry( ref: ResourceReference, project: ProjectRef, - format: ArchiveFormat[Metadata], ignoreNotFound: Boolean - ): IO[ArchiveRejection, Option[(Metadata, Task[AkkaSource])]] = { + ): IO[ArchiveRejection, Option[(ArchiveMetadata, Task[AkkaSource])]] = { val archiveEntry = resourceRefToByteString(ref, project).map { content => val path = pathOf(ref, project) - val metadata = format.metadata(path, content.length.toLong) + val metadata = Zip.metadata(path) Some((metadata, Task.pure(Source.single(content)))) } if (ignoreNotFound) archiveEntry.onErrorHandle { _: ResourceNotFound => None } diff --git a/delta/plugins/archive/src/main/scala/ch/epfl/bluebrain/nexus/delta/plugins/archive/Archives.scala b/delta/plugins/archive/src/main/scala/ch/epfl/bluebrain/nexus/delta/plugins/archive/Archives.scala index e451e6fdde..c99b3d5a04 100644 --- a/delta/plugins/archive/src/main/scala/ch/epfl/bluebrain/nexus/delta/plugins/archive/Archives.scala +++ b/delta/plugins/archive/src/main/scala/ch/epfl/bluebrain/nexus/delta/plugins/archive/Archives.scala @@ -166,13 +166,12 @@ class Archives( def download( id: IdSegment, project: ProjectRef, - format: ArchiveFormat[_], ignoreNotFound: Boolean )(implicit caller: Caller, scheduler: Scheduler): IO[ArchiveRejection, AkkaSource] = (for { resource <- fetch(id, project) value = resource.value - source <- archiveDownload(value.value, project, format, ignoreNotFound) + source <- archiveDownload(value.value, project, ignoreNotFound) } yield source).span("downloadArchive") private def eval(cmd: CreateArchive): IO[ArchiveRejection, ArchiveResource] = diff --git a/delta/plugins/archive/src/main/scala/ch/epfl/bluebrain/nexus/delta/plugins/archive/model/ArchiveFormat.scala b/delta/plugins/archive/src/main/scala/ch/epfl/bluebrain/nexus/delta/plugins/archive/model/ArchiveFormat.scala deleted file mode 100644 index 378e39942f..0000000000 --- a/delta/plugins/archive/src/main/scala/ch/epfl/bluebrain/nexus/delta/plugins/archive/model/ArchiveFormat.scala +++ /dev/null @@ -1,94 +0,0 @@ -package ch.epfl.bluebrain.nexus.delta.plugins.archive.model - -import akka.NotUsed -import akka.http.scaladsl.model.{ContentType, HttpRequest, MediaTypes} -import akka.stream.alpakka.file.scaladsl.Archive -import akka.stream.alpakka.file.{ArchiveMetadata, TarArchiveMetadata} -import akka.stream.scaladsl.{Flow, Source} -import akka.util.ByteString -import ch.epfl.bluebrain.nexus.delta.plugins.archive.model.ArchiveFormat.WriteFlow -import ch.epfl.bluebrain.nexus.delta.sdk.utils.HeadersUtils - -/** - * Available format to download the archive - */ -sealed trait ArchiveFormat[Metadata] extends Product with Serializable { - - /** - * Content type - */ - def contentType: ContentType - - /** - * File extension - */ - def fileExtension: String - - /** - * How to build the metadata for the archive entry - */ - def metadata(filename: String, size: Long): Metadata - - /** - * Ordering for the archive entries - */ - def ordering: Ordering[Metadata] = Ordering.by(filePath) - - /** - * How to extract the file path from the archive metadata - */ - def filePath(metadata: Metadata): String - - /** - * Flow to create an archive - */ - def writeFlow: WriteFlow[Metadata] -} - -object ArchiveFormat { - - type WriteFlow[Metadata] = Flow[(Metadata, Source[ByteString, _]), ByteString, NotUsed] - - /** - * Tar format - * @see - * https://en.wikipedia.org/wiki/Tar_(computing)#Limitations for the limitations - */ - final case object Tar extends ArchiveFormat[TarArchiveMetadata] { - override def contentType: ContentType = MediaTypes.`application/x-tar` - - override def fileExtension: String = "tar" - - override def metadata(filename: String, size: Long): TarArchiveMetadata = - TarArchiveMetadata.create(filename, size) - - override def filePath(metadata: TarArchiveMetadata): String = metadata.filePath - - override def writeFlow: WriteFlow[TarArchiveMetadata] = Archive.tar() - } - - /** - * Zip format - * - * @see - * https://en.wikipedia.org/wiki/ZIP_(file_format)#Limits for the limitations - */ - final case object Zip extends ArchiveFormat[ArchiveMetadata] { - override def contentType: ContentType = MediaTypes.`application/zip` - - override def fileExtension: String = "zip" - - override def metadata(filename: String, size: Long): ArchiveMetadata = - ArchiveMetadata.create(filename) - - override def filePath(metadata: ArchiveMetadata): String = metadata.filePath - - override def writeFlow: WriteFlow[ArchiveMetadata] = Archive.zip() - } - - private val availableFormats = List(Tar, Zip) - - def apply(req: HttpRequest): Option[ArchiveFormat[_]] = availableFormats.find { format => - HeadersUtils.matches(req.headers, format.contentType.mediaType) - } -} diff --git a/delta/plugins/archive/src/main/scala/ch/epfl/bluebrain/nexus/delta/plugins/archive/model/ArchiveRejection.scala b/delta/plugins/archive/src/main/scala/ch/epfl/bluebrain/nexus/delta/plugins/archive/model/ArchiveRejection.scala index f278888d2d..49637d95b6 100644 --- a/delta/plugins/archive/src/main/scala/ch/epfl/bluebrain/nexus/delta/plugins/archive/model/ArchiveRejection.scala +++ b/delta/plugins/archive/src/main/scala/ch/epfl/bluebrain/nexus/delta/plugins/archive/model/ArchiveRejection.scala @@ -69,11 +69,6 @@ object ArchiveRejection { )).mkString("\n") ) - final case class FilenameTooLong(id: Iri, project: ProjectRef, fileName: String) - extends ArchiveRejection( - s"File '$id' in project '$project' has a file name '$fileName' exceeding the 100 character limit for a tar file." - ) - /** * Rejection returned when an archive doesn't exist. * @@ -201,7 +196,6 @@ object ArchiveRejection { HttpResponseFields { case ResourceAlreadyExists(_, _) => StatusCodes.Conflict case InvalidResourceCollection(_, _, _) => StatusCodes.BadRequest - case FilenameTooLong(_, _, _) => StatusCodes.BadRequest case ArchiveNotFound(_, _) => StatusCodes.NotFound case InvalidArchiveId(_) => StatusCodes.BadRequest case ProjectContextRejection(rejection) => rejection.status diff --git a/delta/plugins/archive/src/main/scala/ch/epfl/bluebrain/nexus/delta/plugins/archive/model/Zip.scala b/delta/plugins/archive/src/main/scala/ch/epfl/bluebrain/nexus/delta/plugins/archive/model/Zip.scala new file mode 100644 index 0000000000..7477eb6a19 --- /dev/null +++ b/delta/plugins/archive/src/main/scala/ch/epfl/bluebrain/nexus/delta/plugins/archive/model/Zip.scala @@ -0,0 +1,29 @@ +package ch.epfl.bluebrain.nexus.delta.plugins.archive.model + +import akka.NotUsed +import akka.http.scaladsl.model.{ContentType, HttpRequest, MediaTypes} +import akka.stream.alpakka.file.scaladsl.Archive +import akka.stream.alpakka.file.ArchiveMetadata +import akka.stream.scaladsl.{Flow, Source} +import akka.util.ByteString +import ch.epfl.bluebrain.nexus.delta.sdk.utils.HeadersUtils + +/** + * Zip archive format + * + * @see + * https://en.wikipedia.org/wiki/ZIP_(file_format)#Limits for the limitations + */ +object Zip { + type WriteFlow[Metadata] = Flow[(Metadata, Source[ByteString, _]), ByteString, NotUsed] + + lazy val contentType: ContentType = MediaTypes.`application/zip` + + lazy val writeFlow: WriteFlow[ArchiveMetadata] = Archive.zip() + + lazy val ordering: Ordering[ArchiveMetadata] = Ordering.by(md => md.filePath) + + def metadata(filename: String): ArchiveMetadata = ArchiveMetadata.create(filename) + + def checkHeader(req: HttpRequest): Boolean = HeadersUtils.matches(req.headers, Zip.contentType.mediaType) +} diff --git a/delta/plugins/archive/src/main/scala/ch/epfl/bluebrain/nexus/delta/plugins/archive/routes/ArchiveRoutes.scala b/delta/plugins/archive/src/main/scala/ch/epfl/bluebrain/nexus/delta/plugins/archive/routes/ArchiveRoutes.scala index 7153dcc6ce..f4cba81633 100644 --- a/delta/plugins/archive/src/main/scala/ch/epfl/bluebrain/nexus/delta/plugins/archive/routes/ArchiveRoutes.scala +++ b/delta/plugins/archive/src/main/scala/ch/epfl/bluebrain/nexus/delta/plugins/archive/routes/ArchiveRoutes.scala @@ -2,9 +2,10 @@ package ch.epfl.bluebrain.nexus.delta.plugins.archive.routes import akka.http.scaladsl.model.StatusCodes.{Created, SeeOther} import akka.http.scaladsl.server.Directives._ -import akka.http.scaladsl.server.{Directive1, Route} +import akka.http.scaladsl.server.Route import ch.epfl.bluebrain.nexus.delta.plugins.archive.Archives -import ch.epfl.bluebrain.nexus.delta.plugins.archive.model.{permissions, ArchiveFormat} +import ch.epfl.bluebrain.nexus.delta.plugins.archive.model.permissions +import ch.epfl.bluebrain.nexus.delta.plugins.archive.model.Zip import ch.epfl.bluebrain.nexus.delta.rdf.jsonld.context.RemoteContextResolution import ch.epfl.bluebrain.nexus.delta.rdf.utils.JsonKeyOrdering import ch.epfl.bluebrain.nexus.delta.sdk.AkkaSource @@ -53,10 +54,10 @@ class ArchiveRoutes( (post & entity(as[Json]) & pathEndOrSingleSlash) { json => operationName(s"$prefix/archives/{org}/{project}") { authorizeFor(ref, permissions.write).apply { - archiveResponse { - case Some(_) => emitRedirect(SeeOther, archives.create(ref, json).map(_.uris.accessUri)) - case None => emit(Created, archives.create(ref, json).mapValue(_.metadata)) - } + archiveResponse( + emitRedirect(SeeOther, archives.create(ref, json).map(_.uris.accessUri)), + emit(Created, archives.create(ref, json).mapValue(_.metadata)) + ) } } }, @@ -66,25 +67,24 @@ class ArchiveRoutes( // create an archive with an id (put & entity(as[Json]) & pathEndOrSingleSlash) { json => authorizeFor(ref, permissions.write).apply { - archiveResponse { - case Some(_) => emitRedirect(SeeOther, archives.create(id, ref, json).map(_.uris.accessUri)) - case None => emit(Created, archives.create(id, ref, json).mapValue(_.metadata)) - } + archiveResponse( + emitRedirect(SeeOther, archives.create(id, ref, json).map(_.uris.accessUri)), + emit(Created, archives.create(id, ref, json).mapValue(_.metadata)) + ) } }, // fetch or download an archive (get & pathEndOrSingleSlash) { authorizeFor(ref, permissions.read).apply { - archiveResponse { - case Some(format) => - parameter("ignoreNotFound".as[Boolean] ? false) { ignoreNotFound => - val response = archives.download(id, ref, format, ignoreNotFound).map { source => - sourceToFileResponse(source, format) - } - emit(response) + archiveResponse( + parameter("ignoreNotFound".as[Boolean] ? false) { ignoreNotFound => + val response = archives.download(id, ref, ignoreNotFound).map { source => + sourceToFileResponse(source) } - case None => emit(archives.fetch(id, ref)) - } + emit(response) + }, + emit(archives.fetch(id, ref)) + ) } } ) @@ -96,9 +96,9 @@ class ArchiveRoutes( } } - private def sourceToFileResponse(source: AkkaSource, format: ArchiveFormat[_]): FileResponse = - FileResponse(s"archive.${format.fileExtension}", format.contentType, 0L, source) + private def archiveResponse(validResp: Route, invalidResp: Route): Route = + extractRequest.map(Zip.checkHeader(_)).apply(valid => if (valid) validResp else invalidResp) - private def archiveResponse: Directive1[Option[ArchiveFormat[_]]] = - extractRequest.map(ArchiveFormat(_)) + private def sourceToFileResponse(source: AkkaSource): FileResponse = + FileResponse(s"archive.zip", Zip.contentType, 0L, source) } diff --git a/delta/plugins/archive/src/test/scala/ch/epfl/bluebrain/nexus/delta/plugins/archive/ArchiveDownloadSpec.scala b/delta/plugins/archive/src/test/scala/ch/epfl/bluebrain/nexus/delta/plugins/archive/ArchiveDownloadSpec.scala index a549e747de..3d75c62a63 100644 --- a/delta/plugins/archive/src/test/scala/ch/epfl/bluebrain/nexus/delta/plugins/archive/ArchiveDownloadSpec.scala +++ b/delta/plugins/archive/src/test/scala/ch/epfl/bluebrain/nexus/delta/plugins/archive/ArchiveDownloadSpec.scala @@ -10,9 +10,9 @@ import cats.data.NonEmptySet import ch.epfl.bluebrain.nexus.delta.kernel.utils.UrlUtils.encode import ch.epfl.bluebrain.nexus.delta.plugins.archive.FileSelf.ParsingError import ch.epfl.bluebrain.nexus.delta.plugins.archive.model.ArchiveReference.{FileReference, FileSelfReference, ResourceReference} -import ch.epfl.bluebrain.nexus.delta.plugins.archive.model.ArchiveRejection.{AuthorizationFailed, FilenameTooLong, InvalidFileSelf, ResourceNotFound} +import ch.epfl.bluebrain.nexus.delta.plugins.archive.model.ArchiveRejection.{AuthorizationFailed, InvalidFileSelf, ResourceNotFound} import ch.epfl.bluebrain.nexus.delta.sdk.model.ResourceRepresentation.{CompactedJsonLd, Dot, ExpandedJsonLd, NQuads, NTriples, SourceJson} -import ch.epfl.bluebrain.nexus.delta.plugins.archive.model.{ArchiveFormat, ArchiveRejection, ArchiveValue} +import ch.epfl.bluebrain.nexus.delta.plugins.archive.model.{ArchiveRejection, ArchiveValue} import ch.epfl.bluebrain.nexus.delta.plugins.storage.RemoteContextResolutionFixture import ch.epfl.bluebrain.nexus.delta.plugins.storage.files.model.FileAttributes.FileAttributesOrigin.Client import ch.epfl.bluebrain.nexus.delta.plugins.storage.files.model.FileRejection.FileNotFound @@ -50,7 +50,7 @@ import java.util.UUID import scala.concurrent.ExecutionContext import scala.reflect.ClassTag -abstract class ArchiveDownloadSpec +class ArchiveDownloadSpec extends TestKit(ActorSystem()) with AnyWordSpecLike with Inspectors @@ -81,9 +81,7 @@ abstract class ArchiveDownloadSpec private val permissions = Set(Permissions.resources.read) private val aclCheck = AclSimpleCheck((subject, AclAddress.Root, permissions)).accepted - def format: ArchiveFormat[_] - - def sourceToMap(source: AkkaSource): Map[String, String] + def sourceToMap(source: AkkaSource): Map[String, String] = fromZip(source).map { case (k, v) => k -> v.utf8String } "An ArchiveDownload" should { val storageRef = ResourceRef.Revision(iri"http://localhost/${genString()}", 5) @@ -147,20 +145,20 @@ abstract class ArchiveDownloadSpec ) def downloadAndExtract(value: ArchiveValue, ignoreNotFound: Boolean) = { - archiveDownload(value, project.ref, format, ignoreNotFound).map(sourceToMap).accepted + archiveDownload(value, project.ref, ignoreNotFound).map(sourceToMap).accepted } def failToDownload[R <: ArchiveRejection: ClassTag](value: ArchiveValue, ignoreNotFound: Boolean) = { - archiveDownload(value, project.ref, format, ignoreNotFound).rejectedWith[R] + archiveDownload(value, project.ref, ignoreNotFound).rejectedWith[R] } def rejectedAccess(value: ArchiveValue) = { archiveDownload - .apply(value, project.ref, format, ignoreNotFound = true)(Caller.Anonymous, global) + .apply(value, project.ref, ignoreNotFound = true)(Caller.Anonymous, global) .rejectedWith[AuthorizationFailed] } - s"provide a ${format.fileExtension} for both resources and files" in { + s"provide a zip for both resources and files" in { val value = ArchiveValue.unsafe( NonEmptySet.of( ResourceReference(Latest(id1), None, None, None), @@ -175,7 +173,7 @@ abstract class ArchiveDownloadSpec result shouldEqual expected } - s"provide a ${format.fileExtension} for file selfs" in { + s"provide a zip for file selfs" in { val value = ArchiveValue.unsafe( NonEmptySet.of( FileSelfReference(file1Self, None) @@ -188,12 +186,12 @@ abstract class ArchiveDownloadSpec result shouldEqual expected } - s"fail to provide a ${format.fileExtension} for file selfs which do not resolve" in { + s"fail to provide a zip for file selfs which do not resolve" in { val value = ArchiveValue.unsafe(NonEmptySet.of(FileSelfReference("http://wrong.file/self", None))) failToDownload[InvalidFileSelf](value, ignoreNotFound = false) } - s"provide a ${format.fileExtension} for both resources and files with different paths and formats" in { + s"provide a zip for both resources and files with different paths and formats" in { val list = List( SourceJson -> file1.value.asJson.sort.spaces2, CompactedJsonLd -> file1.toCompactedJsonLd.accepted.json.sort.spaces2, @@ -226,39 +224,17 @@ abstract class ArchiveDownloadSpec } } - if (format == ArchiveFormat.Tar) { - "fail to provide a tar if the file name is too long and no path is provided" in { - val value = ArchiveValue.unsafe( - NonEmptySet.of( - FileReference(Latest(id2), None, None) - ) - ) - failToDownload[FilenameTooLong](value, ignoreNotFound = false) - } - - "provide a tar if the file name is too long but a path is provided" in { - val filePath = AbsolutePath.apply(s"/${genString()}/file.txt").rightValue - val value = ArchiveValue.unsafe( - NonEmptySet.of( - FileReference(Latest(id2), None, Some(filePath)) - ) - ) - - downloadAndExtract(value, ignoreNotFound = false) should contain key filePath.value.toString - } - } else { - "provide a zip if the file name is long" in { - val value = ArchiveValue.unsafe( - NonEmptySet.of( - FileReference(Latest(id2), None, None) - ) + "provide a zip if the file name is long" in { + val value = ArchiveValue.unsafe( + NonEmptySet.of( + FileReference(Latest(id2), None, None) ) - val file2Path = s"${project.ref.toString}/file/${file2.value.attributes.filename}" - downloadAndExtract(value, ignoreNotFound = false) should contain key file2Path - } + ) + val file2Path = s"${project.ref.toString}/file/${file2.value.attributes.filename}" + downloadAndExtract(value, ignoreNotFound = false) should contain key file2Path } - s"fail to provide a ${format.fileExtension} when a resource is not found" in { + s"fail to provide a zip when a resource is not found" in { val value = ArchiveValue.unsafe( NonEmptySet.of( ResourceReference(Latest(iri"http://localhost/${genString()}"), None, None, None), @@ -268,7 +244,7 @@ abstract class ArchiveDownloadSpec failToDownload[ResourceNotFound](value, ignoreNotFound = false) } - s"fail to provide a ${format.fileExtension} when a file is not found" in { + s"fail to provide a zip when a file is not found" in { val value = ArchiveValue.unsafe( NonEmptySet.of( ResourceReference(Latest(id1), None, None, None), @@ -306,14 +282,14 @@ abstract class ArchiveDownloadSpec result shouldEqual expected } - s"fail to provide a ${format.fileExtension} when access to a resource is not found" in { + s"fail to provide a zip when access to a resource is not found" in { val value = ArchiveValue.unsafe( NonEmptySet.of(ResourceReference(Latest(id1), None, None, None)) ) rejectedAccess(value) } - s"fail to provide a ${format.fileExtension} when access to a file is not found" in { + s"fail to provide a zip when access to a file is not found" in { val value = ArchiveValue.unsafe( NonEmptySet.of(FileReference(Latest(id1), None, None)) ) diff --git a/delta/plugins/archive/src/test/scala/ch/epfl/bluebrain/nexus/delta/plugins/archive/ArchiveRoutesSpec.scala b/delta/plugins/archive/src/test/scala/ch/epfl/bluebrain/nexus/delta/plugins/archive/ArchiveRoutesSpec.scala index 1755b9d56a..0e061ee440 100644 --- a/delta/plugins/archive/src/test/scala/ch/epfl/bluebrain/nexus/delta/plugins/archive/ArchiveRoutesSpec.scala +++ b/delta/plugins/archive/src/test/scala/ch/epfl/bluebrain/nexus/delta/plugins/archive/ArchiveRoutesSpec.scala @@ -2,7 +2,7 @@ package ch.epfl.bluebrain.nexus.delta.plugins.archive import akka.http.scaladsl.model.ContentTypes.`text/plain(UTF-8)` import akka.http.scaladsl.model.MediaRanges.`*/*` -import akka.http.scaladsl.model.MediaTypes.{`application/x-tar`, `application/zip`} +import akka.http.scaladsl.model.MediaTypes.`application/zip` import akka.http.scaladsl.model.headers.{`Content-Type`, Accept, Location, OAuth2BearerToken} import akka.http.scaladsl.model.{ContentTypes, StatusCodes, Uri} import akka.http.scaladsl.server.Route @@ -284,36 +284,6 @@ class ArchiveRoutesSpec extends BaseRouteSpec with StorageFixtures with TryValue } } - "fetch a tar archive ignoring not found" in { - forAll(List(Accept(`application/x-tar`), acceptAll)) { accept => - Get(s"/v1/archives/$projectRef/$uuid?ignoreNotFound=true") ~> asSubject ~> accept ~> routes ~> check { - status shouldEqual StatusCodes.OK - header[`Content-Type`].value.value() shouldEqual `application/x-tar`.value - val result = fromTar(responseEntity.dataBytes) - - result.keySet shouldEqual Set( - s"${project.ref}/file/file.txt", - s"${project.ref}/compacted/${encode(fileId.toString)}.json" - ) - - val expectedContent = fileContent - val actualContent = result.entryAsString(s"${project.ref}/file/file.txt") - actualContent shouldEqual expectedContent - - val expectedMetadata = FilesRoutesSpec.fileMetadata( - projectRef, - fileId, - file.value.attributes, - storageRef, - createdBy = subject, - updatedBy = subject - ) - val actualMetadata = result.entryAsJson(s"${project.ref}/compacted/${encode(fileId.toString)}.json") - actualMetadata shouldEqual expectedMetadata - } - } - } - "fetch a zip archive ignoring not found" in { Get(s"/v1/archives/$projectRef/$uuid?ignoreNotFound=true") ~> asSubject ~> Accept( `application/zip` diff --git a/delta/plugins/archive/src/test/scala/ch/epfl/bluebrain/nexus/delta/plugins/archive/ArchivesSpec.scala b/delta/plugins/archive/src/test/scala/ch/epfl/bluebrain/nexus/delta/plugins/archive/ArchivesSpec.scala index 5d1c49abc5..273d3ee9dd 100644 --- a/delta/plugins/archive/src/test/scala/ch/epfl/bluebrain/nexus/delta/plugins/archive/ArchivesSpec.scala +++ b/delta/plugins/archive/src/test/scala/ch/epfl/bluebrain/nexus/delta/plugins/archive/ArchivesSpec.scala @@ -5,7 +5,7 @@ import cats.data.NonEmptySet import ch.epfl.bluebrain.nexus.delta.kernel.utils.UUIDF import ch.epfl.bluebrain.nexus.delta.plugins.archive.model.ArchiveReference.{FileReference, ResourceReference} import ch.epfl.bluebrain.nexus.delta.plugins.archive.model.ArchiveRejection.{ArchiveNotFound, ProjectContextRejection} -import ch.epfl.bluebrain.nexus.delta.plugins.archive.model.{Archive, ArchiveFormat, ArchiveRejection, ArchiveValue} +import ch.epfl.bluebrain.nexus.delta.plugins.archive.model.{Archive, ArchiveRejection, ArchiveValue} import ch.epfl.bluebrain.nexus.delta.rdf.Vocabulary.{nxv, schema} import ch.epfl.bluebrain.nexus.delta.rdf.jsonld.api.{JsonLdApi, JsonLdJavaApi} import ch.epfl.bluebrain.nexus.delta.sdk.AkkaSource @@ -63,8 +63,7 @@ class ArchivesSpec private val cfg = ArchivePluginConfig(1, EphemeralLogConfig(5.seconds, 5.hours)) private val download = new ArchiveDownload { - override def apply[M](value: ArchiveValue, project: ProjectRef, format: ArchiveFormat[M], ignoreNotFound: Boolean)( - implicit + override def apply(value: ArchiveValue, project: ProjectRef, ignoreNotFound: Boolean)(implicit caller: Caller, scheduler: Scheduler ): IO[ArchiveRejection, AkkaSource] = @@ -250,7 +249,7 @@ class ArchivesSpec resource.value shouldEqual Archive(id, project.ref, value.resources, 5.hours.toSeconds) } - "download an existing archive as zip and tar" in { + "download an existing archive as zip" in { val id = iri"http://localhost/base/${genString()}" val resourceId = iri"http://localhost/${genString()}" val fileId = iri"http://localhost/${genString()}" @@ -261,8 +260,7 @@ class ArchivesSpec ) ) archives.create(id, project.ref, value).accepted - archives.download(id, project.ref, ArchiveFormat.Tar, ignoreNotFound = true).accepted - archives.download(id, project.ref, ArchiveFormat.Zip, ignoreNotFound = true).accepted + archives.download(id, project.ref, ignoreNotFound = true).accepted } "return not found for unknown archives" in { diff --git a/delta/plugins/archive/src/test/scala/ch/epfl/bluebrain/nexus/delta/plugins/archive/TarDownloadSpec.scala b/delta/plugins/archive/src/test/scala/ch/epfl/bluebrain/nexus/delta/plugins/archive/TarDownloadSpec.scala deleted file mode 100644 index 456b4bc266..0000000000 --- a/delta/plugins/archive/src/test/scala/ch/epfl/bluebrain/nexus/delta/plugins/archive/TarDownloadSpec.scala +++ /dev/null @@ -1,15 +0,0 @@ -package ch.epfl.bluebrain.nexus.delta.plugins.archive -import ch.epfl.bluebrain.nexus.delta.plugins.archive.model.ArchiveFormat -import ch.epfl.bluebrain.nexus.delta.sdk.AkkaSource - -import scala.concurrent.duration.DurationInt - -class TarDownloadSpec extends ArchiveDownloadSpec { - - implicit override def patienceConfig: PatienceConfig = PatienceConfig(3.seconds, 10.millis) - override def format: ArchiveFormat[_] = ArchiveFormat.Tar - - override def sourceToMap(source: AkkaSource): Map[String, String] = - fromTar(source).map { case (k, v) => k -> v.utf8String } - -} diff --git a/delta/plugins/archive/src/test/scala/ch/epfl/bluebrain/nexus/delta/plugins/archive/ZipDownloadSpec.scala b/delta/plugins/archive/src/test/scala/ch/epfl/bluebrain/nexus/delta/plugins/archive/ZipDownloadSpec.scala deleted file mode 100644 index dafffbb2c9..0000000000 --- a/delta/plugins/archive/src/test/scala/ch/epfl/bluebrain/nexus/delta/plugins/archive/ZipDownloadSpec.scala +++ /dev/null @@ -1,12 +0,0 @@ -package ch.epfl.bluebrain.nexus.delta.plugins.archive - -import ch.epfl.bluebrain.nexus.delta.plugins.archive.model.ArchiveFormat -import ch.epfl.bluebrain.nexus.delta.sdk.AkkaSource - -class ZipDownloadSpec extends ArchiveDownloadSpec { - override def format: ArchiveFormat[_] = ArchiveFormat.Zip - - override def sourceToMap(source: AkkaSource): Map[String, String] = - fromZip(source).map { case (k, v) => k -> v.utf8String } - -} diff --git a/delta/testkit/src/main/scala/ch/epfl/bluebrain/nexus/testkit/archive/ArchiveHelpers.scala b/delta/testkit/src/main/scala/ch/epfl/bluebrain/nexus/testkit/archive/ArchiveHelpers.scala index 7f281024d6..e65d69497a 100644 --- a/delta/testkit/src/main/scala/ch/epfl/bluebrain/nexus/testkit/archive/ArchiveHelpers.scala +++ b/delta/testkit/src/main/scala/ch/epfl/bluebrain/nexus/testkit/archive/ArchiveHelpers.scala @@ -13,8 +13,12 @@ import org.scalatest.concurrent.ScalaFutures import java.nio.file.{Files => JFiles} import scala.concurrent.ExecutionContext +import scala.concurrent.duration._ import java.security.MessageDigest +import org.scalatest.concurrent.PatienceConfiguration +import org.scalatest.time.Span +import org.scalatest.time.Seconds trait ArchiveHelpers extends ScalaFutures with EitherValuable with OptionValues { @@ -30,37 +34,15 @@ trait ArchiveHelpers extends ScalaFutures with EitherValuable with OptionValues } } - def fromTar(byteString: ByteString)(implicit m: Materializer, e: ExecutionContext): ArchiveContent = - fromTar(Source.single(byteString)) - - def fromTar(source: Source[ByteString, Any])(implicit m: Materializer, e: ExecutionContext): ArchiveContent = { - val path = JFiles.createTempFile("test", ".tar") - source.runWith(FileIO.toPath(path)).futureValue - val result = FileIO - .fromPath(path) - .via(Archive.tarReader()) - .mapAsync(1) { case (metadata, source) => - source - .runFold(ByteString.empty) { case (bytes, elem) => - bytes ++ elem - } - .map { bytes => - (metadata.filePath, bytes) - } - } - .runFold(Map.empty[String, ByteString]) { case (map, elem) => - map + elem - } - .futureValue - result - } - def fromZip(byteString: ByteString)(implicit m: Materializer, e: ExecutionContext): ArchiveContent = fromZip(Source.single(byteString)) def fromZip(source: Source[ByteString, Any])(implicit m: Materializer, e: ExecutionContext): ArchiveContent = { - val path = JFiles.createTempFile("test", ".tar") - source.runWith(FileIO.toPath(path)).futureValue + val path = JFiles.createTempFile("test", ".zip") + source + .completionTimeout(10.seconds) + .runWith(FileIO.toPath(path)) + .futureValue(PatienceConfiguration.Timeout(Span(10, Seconds))) val result = Archive .zipReader(path.toFile) .mapAsync(1) { case (metadata, source) => diff --git a/docs/src/main/paradox/docs/delta/api/archives-api.md b/docs/src/main/paradox/docs/delta/api/archives-api.md index a0b71f2f39..8e19a7f150 100644 --- a/docs/src/main/paradox/docs/delta/api/archives-api.md +++ b/docs/src/main/paradox/docs/delta/api/archives-api.md @@ -1,7 +1,7 @@ # Archives An archive is a collection of resources stored inside an archive file. The archiving format chosen for this purpose is -tar (or tarball). Archive resources are rooted in the `/v1/archives/{org_label}/{project_label}/` collection. +ZIP. Archive resources are rooted in the `/v1/archives/{org_label}/{project_label}/` collection. Each archive... @@ -101,7 +101,7 @@ The json payload: - If the `@id` value is not found on the payload, an @id will be generated as follows: `base:{UUID}`. The `base` is the `prefix` defined on the resource's project (`{project_label}`). -The response will be an HTTP 303 Location redirect, which will point to the url where to consume the archive (tarball). +The response will be an HTTP 303 Location redirect, which will point to the url where to consume the archive (ZIP). The following diagram can help to understand the HTTP exchange ![post-redirect-get](assets/archives/post-redirect-get.png "Post/Redirect/Get archive") @@ -109,7 +109,7 @@ The following diagram can help to understand the HTTP exchange **Example** The following example shows how to create an archive containing 3 files. 2 of them are resources and the other is a file. -As a response, the tarball will be offered. +As a response, the ZIP file will be offered. Request : @@snip [archive.sh](assets/archives/create.sh) @@ -147,16 +147,7 @@ Note that if the payload contains an @id different from the `{archive_id}`, the When fetching an archive, the response format can be chosen through HTTP content negotiation. In order to fetch the archive metadata, the client can use any of the @ref:[following MIME types](content-negotiation.md#supported-mime-types). -However, in order to fetch the archive content, the HTTP `Accept` header should be provided: - -* `*/*` or `application/x-tar` will return a tar archive (or tarball) -* `application/zip` will return a zip archive - -@@@ note { .warning } - -@link:[The limitations of the tar format](https://en.wikipedia.org/wiki/Tar_(computing)) -makes the usage of archives difficult (among other things, the maximum file name is limited to 100 characters), -so its support will be removed in a future release. +However, in order to fetch the archive content, the HTTP `Accept` header should be provided as `application/zip`. @@@ @@ -170,7 +161,7 @@ GET /v1/archives/{org_label}/{project_label}/{archive_id}?ignoreNotFound=true **Example** -Request (tarball) +Request (ZIP) : @@snip [fetch.sh](assets/archives/fetch.sh) Request (metadata) diff --git a/docs/src/main/paradox/docs/delta/api/assets/archives/fetch.sh b/docs/src/main/paradox/docs/delta/api/assets/archives/fetch.sh index 46fc4031d9..6961af6e50 100644 --- a/docs/src/main/paradox/docs/delta/api/assets/archives/fetch.sh +++ b/docs/src/main/paradox/docs/delta/api/assets/archives/fetch.sh @@ -1,3 +1,3 @@ curl "http://localhost:8080/v1/archives/myorg/myproject/myarchive" \ - -H "Accept: application/x-tar" \ - -o output.tar \ No newline at end of file + -H "Accept: application/x-zip" \ + -o output.zip \ No newline at end of file diff --git a/docs/src/main/paradox/docs/delta/api/assets/archives/fetched.json b/docs/src/main/paradox/docs/delta/api/assets/archives/fetched.json index de3ab9aab3..5d742add13 100644 --- a/docs/src/main/paradox/docs/delta/api/assets/archives/fetched.json +++ b/docs/src/main/paradox/docs/delta/api/assets/archives/fetched.json @@ -32,6 +32,6 @@ "_createdAt": "2021-05-17T14:54:42.939Z", "_createdBy": "http://localhost:8080/v1/realms/myrealm/users/john", "_updatedAt": "2021-05-17T14:54:42.939Z", - "_updatedBy": "http://localhost:8080/v1/realms/myrealm/users/john" + "_updatedBy": "http://localhost:8080/v1/realms/myrealm/users/john", "_expiresInSeconds": 17530 } \ No newline at end of file diff --git a/docs/src/main/paradox/docs/delta/api/index.md b/docs/src/main/paradox/docs/delta/api/index.md index 57ab90a186..6f988d6f1d 100644 --- a/docs/src/main/paradox/docs/delta/api/index.md +++ b/docs/src/main/paradox/docs/delta/api/index.md @@ -121,7 +121,7 @@ A file is a binary attachment resource. ## Archives -An archive is a collection of resources stored inside an archive file. The archiving format chosen for this purpose is tar (or tarball). +An archive is a collection of resources stored inside an archive file. The archiving format chosen for this purpose is ZIP file. @ref:[Operations on archives](archives-api.md) diff --git a/docs/src/main/paradox/docs/releases/v1.9-release-notes.md b/docs/src/main/paradox/docs/releases/v1.9-release-notes.md index d6366685a2..d5d71099e5 100644 --- a/docs/src/main/paradox/docs/releases/v1.9-release-notes.md +++ b/docs/src/main/paradox/docs/releases/v1.9-release-notes.md @@ -119,6 +119,12 @@ Annotated source is now available as an output format when creating an archive. Creating an archive now requires only the `resources/read` permission instead of `archives/write`. +#### Remove support for Tarball archives + +Tarball archives are no longer supported due to unnecessary restrictions. ZIP is now the only allowed format and clients should send `application/zip` in the `Accept` header when creating archives. + +### Storages + ### Remote Storages Storages can no longer be created with credentials that would get stored: diff --git a/tests/src/test/scala/ch/epfl/bluebrain/nexus/tests/kg/ArchiveSpec.scala b/tests/src/test/scala/ch/epfl/bluebrain/nexus/tests/kg/ArchiveSpec.scala index 6c5b5df548..90a41cec9a 100644 --- a/tests/src/test/scala/ch/epfl/bluebrain/nexus/tests/kg/ArchiveSpec.scala +++ b/tests/src/test/scala/ch/epfl/bluebrain/nexus/tests/kg/ArchiveSpec.scala @@ -231,25 +231,6 @@ class ArchiveSpec extends BaseSpec with ArchiveHelpers with CirceEq { } } - "succeed returning tar" in { - val prefix = "https%3A%2F%2Fdev.nexus.test.com%2Fsimplified-resource%2F" - deltaClient.get[ByteString](s"/archives/$fullId/test-resource:archive", Tweety, acceptAll) { - (byteString, response) => - contentType(response) shouldEqual MediaTypes.`application/x-tar`.toContentType - response.status shouldEqual StatusCodes.OK - - val result = fromTar(byteString) - - val actualContent1 = result.entryAsJson(s"$fullId/compacted/${prefix}1%3Frev%3D1.json") - val actualContent2 = result.entryAsJson(s"$fullId2/compacted/${prefix}2.json") - val actualDigest3 = result.entryDigest("/some/other/nexus-logo.png") - - filterMetadataKeys(actualContent1) should equalIgnoreArrayOrder(payloadResponse1) - filterMetadataKeys(actualContent2) should equalIgnoreArrayOrder(payloadResponse2) - actualDigest3 shouldEqual nexusLogoDigest - } - } - "succeed returning zip" in { val prefix = "https%3A%2F%2Fdev.nexus.test.com%2Fsimplified-resource%2F" deltaClient.get[ByteString](s"/archives/$fullId/test-resource:archive", Tweety, acceptZip) { @@ -300,11 +281,6 @@ class ArchiveSpec extends BaseSpec with ArchiveHelpers with CirceEq { response.status shouldEqual StatusCodes.Created } downloadLink = s"/archives/$fullId/test-resource:archive-not-found?ignoreNotFound=true" - _ <- deltaClient.get[ByteString](downloadLink, Tweety, acceptAll) { (byteString, response) => - contentType(response) shouldEqual MediaTypes.`application/x-tar`.toContentType - response.status shouldEqual StatusCodes.OK - assertContent(fromTar(byteString)) - } _ <- deltaClient.get[ByteString](downloadLink, Tweety, acceptZip) { (byteString, response) => contentType(response) shouldEqual MediaTypes.`application/zip`.toContentType response.status shouldEqual StatusCodes.OK