From 11ff7161d234b8793a52f10b9bd998d43b5a9a70 Mon Sep 17 00:00:00 2001 From: Simon Date: Fri, 15 Nov 2024 11:29:33 +0100 Subject: [PATCH 1/6] Update to Postgres 16.5 (#5234) Co-authored-by: Simon Dumas --- .../nexus/delta/dependency/PostgresServiceDependencySpec.scala | 2 +- .../bluebrain/nexus/testkit/postgres/PostgresContainer.scala | 2 +- docs/src/main/paradox/docs/delta/api/assets/version.json | 2 +- .../main/paradox/docs/running-nexus/docker/docker-compose.yaml | 2 +- docs/src/main/paradox/docs/running-nexus/index.md | 2 +- tests/docker/docker-compose.yml | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/delta/app/src/test/scala/ch/epfl/bluebrain/nexus/delta/dependency/PostgresServiceDependencySpec.scala b/delta/app/src/test/scala/ch/epfl/bluebrain/nexus/delta/dependency/PostgresServiceDependencySpec.scala index 62fb05f468..4e04001c07 100644 --- a/delta/app/src/test/scala/ch/epfl/bluebrain/nexus/delta/dependency/PostgresServiceDependencySpec.scala +++ b/delta/app/src/test/scala/ch/epfl/bluebrain/nexus/delta/dependency/PostgresServiceDependencySpec.scala @@ -9,7 +9,7 @@ class PostgresServiceDependencySpec extends CatsEffectSpec with DoobieScalaTestF "PostgresServiceDependency" should { "fetch its service name and version" in { - new PostgresServiceDependency(xas).serviceDescription.accepted shouldEqual ServiceDescription("postgres", "16.4") + new PostgresServiceDependency(xas).serviceDescription.accepted shouldEqual ServiceDescription("postgres", "16.5") } } diff --git a/delta/testkit/src/main/scala/ch/epfl/bluebrain/nexus/testkit/postgres/PostgresContainer.scala b/delta/testkit/src/main/scala/ch/epfl/bluebrain/nexus/testkit/postgres/PostgresContainer.scala index d0067d84f7..3e2f61ebd0 100644 --- a/delta/testkit/src/main/scala/ch/epfl/bluebrain/nexus/testkit/postgres/PostgresContainer.scala +++ b/delta/testkit/src/main/scala/ch/epfl/bluebrain/nexus/testkit/postgres/PostgresContainer.scala @@ -9,7 +9,7 @@ import scala.concurrent.duration.DurationInt import scala.jdk.DurationConverters.ScalaDurationOps class PostgresContainer(user: String, password: String, database: String) - extends GenericContainer[PostgresContainer](DockerImageName.parse("library/postgres:16.4")) { + extends GenericContainer[PostgresContainer](DockerImageName.parse("library/postgres:16.5")) { addEnv("POSTGRES_USER", user) addEnv("POSTGRES_PASSWORD", password) addEnv("POSTGRES_DB222", database) diff --git a/docs/src/main/paradox/docs/delta/api/assets/version.json b/docs/src/main/paradox/docs/delta/api/assets/version.json index ed1df9bdff..135302ae3b 100644 --- a/docs/src/main/paradox/docs/delta/api/assets/version.json +++ b/docs/src/main/paradox/docs/delta/api/assets/version.json @@ -3,7 +3,7 @@ "delta": "1.10.0", "dependencies": { "blazegraph": "2.1.6-RC-21-jre", - "postgresql": "16.4", + "postgresql": "16.5", "elasticsearch": "8.15.1" }, "plugins": { diff --git a/docs/src/main/paradox/docs/running-nexus/docker/docker-compose.yaml b/docs/src/main/paradox/docs/running-nexus/docker/docker-compose.yaml index ce189a1c8b..c548fbecab 100644 --- a/docs/src/main/paradox/docs/running-nexus/docker/docker-compose.yaml +++ b/docs/src/main/paradox/docs/running-nexus/docker/docker-compose.yaml @@ -44,7 +44,7 @@ services: memory: 512M postgres: - image: library/postgres:16.4 + image: library/postgres:16.5 environment: POSTGRES_USER: "postgres" POSTGRES_PASSWORD: "postgres" diff --git a/docs/src/main/paradox/docs/running-nexus/index.md b/docs/src/main/paradox/docs/running-nexus/index.md index 5c8941dab3..25b31cad6b 100644 --- a/docs/src/main/paradox/docs/running-nexus/index.md +++ b/docs/src/main/paradox/docs/running-nexus/index.md @@ -106,7 +106,7 @@ $ curl http://localhost/v1/version | jq "dependencies": { "blazegraph": "2.1.6-SNAPSHOT", "elasticsearch": "8.15.1", - "postgres": "16.4" + "postgres": "16.5" }, "environment": "dev", "plugins": { diff --git a/tests/docker/docker-compose.yml b/tests/docker/docker-compose.yml index 30175d50ad..836dcc9202 100644 --- a/tests/docker/docker-compose.yml +++ b/tests/docker/docker-compose.yml @@ -142,7 +142,7 @@ services: - ./config:/config postgres: - image: library/postgres:16.4 + image: library/postgres:16.5 ports: - 5432:5432 environment: From e6dbb51168236a1f59fa16c52d5160d8264527c4 Mon Sep 17 00:00:00 2001 From: Simon Date: Fri, 15 Nov 2024 18:01:44 +0100 Subject: [PATCH 2/6] Fix origin location decoding for local files with special chars (#5235) Co-authored-by: Simon Dumas --- .../storages/operations/s3/S3Helpers.scala | 6 ++- .../s3/client/S3StorageClientSuite.scala | 19 +++++++++ .../src/main/resources/logback-test.xml | 3 ++ .../nexus/ship/files/FileCopier.scala | 19 ++++++--- .../nexus/ship/files/FileProcessor.scala | 5 ++- .../import/file-import/000000001.json | 3 +- .../bluebrain/nexus/ship/RunShipSuite.scala | 41 +++++++++++++++---- .../nexus/ship/files/FileProcessorSuite.scala | 10 ++++- 8 files changed, 86 insertions(+), 20 deletions(-) diff --git a/delta/plugins/storage/src/test/scala/ch/epfl/bluebrain/nexus/delta/plugins/storage/storages/operations/s3/S3Helpers.scala b/delta/plugins/storage/src/test/scala/ch/epfl/bluebrain/nexus/delta/plugins/storage/storages/operations/s3/S3Helpers.scala index 4fc532ab61..4f79a9eb37 100644 --- a/delta/plugins/storage/src/test/scala/ch/epfl/bluebrain/nexus/delta/plugins/storage/storages/operations/s3/S3Helpers.scala +++ b/delta/plugins/storage/src/test/scala/ch/epfl/bluebrain/nexus/delta/plugins/storage/storages/operations/s3/S3Helpers.scala @@ -39,9 +39,13 @@ trait S3Helpers { self: Generators => def givenAFileInABucket(bucket: String, contents: String)( test: String => IO[Unit] + )(implicit client: S3StorageClient): IO[Unit] = + givenAFileInABucket(bucket, genString(), contents)(test) + + def givenAFileInABucket(bucket: String, key: String, contents: String)( + test: String => IO[Unit] )(implicit client: S3StorageClient): IO[Unit] = { val bytes = contents.getBytes(StandardCharsets.UTF_8) - val key = genString() val put = PutObjectRequest(bucket, key, Some(ContentTypes.`text/plain(UTF-8)`), bytes.length.toLong) client.uploadFile(put, Stream.emit(ByteBuffer.wrap(bytes))) >> test(key) } diff --git a/delta/plugins/storage/src/test/scala/ch/epfl/bluebrain/nexus/delta/plugins/storage/storages/operations/s3/client/S3StorageClientSuite.scala b/delta/plugins/storage/src/test/scala/ch/epfl/bluebrain/nexus/delta/plugins/storage/storages/operations/s3/client/S3StorageClientSuite.scala index d46153ee9c..548b5055e9 100644 --- a/delta/plugins/storage/src/test/scala/ch/epfl/bluebrain/nexus/delta/plugins/storage/storages/operations/s3/client/S3StorageClientSuite.scala +++ b/delta/plugins/storage/src/test/scala/ch/epfl/bluebrain/nexus/delta/plugins/storage/storages/operations/s3/client/S3StorageClientSuite.scala @@ -24,6 +24,25 @@ class S3StorageClientSuite extends NexusSuite with LocalStackS3StorageClient.Fix override def munitFixtures: Seq[AnyFixture[_]] = List(localStackS3Client) + test("Copy a file containing special characters between buckets") { + givenAnS3Bucket { bucket => + givenAnS3Bucket { targetBucket => + val options = CopyOptions(overwriteTarget = false, None) + val key = "/org/proj/9/f/0/3/2/4/f/e/0925_Rhi13.3.13 cell 1+2 (superficial).asc" + givenAFileInABucket(bucket, key, fileContents) { _ => + for { + result <- s3StorageClient.copyObject(bucket, key, targetBucket, key, options) + head <- s3StorageClient.headObject(targetBucket, key) + } yield { + assertEquals(result, S3OperationResult.Success) + assertEquals(head.fileSize, contentLength) + assertEquals(head.contentType, Some(expectedContentType)) + } + } + } + } + } + test("Copy the file to its new location if none is already there without a content type") { givenAnS3Bucket { bucket => val options = CopyOptions(overwriteTarget = false, None) diff --git a/delta/testkit/src/main/resources/logback-test.xml b/delta/testkit/src/main/resources/logback-test.xml index acf0b16f18..7b20ecefea 100644 --- a/delta/testkit/src/main/resources/logback-test.xml +++ b/delta/testkit/src/main/resources/logback-test.xml @@ -15,4 +15,7 @@ + + + \ No newline at end of file diff --git a/ship/src/main/scala/ch/epfl/bluebrain/nexus/ship/files/FileCopier.scala b/ship/src/main/scala/ch/epfl/bluebrain/nexus/ship/files/FileCopier.scala index d2cb280ebb..ae4efb580f 100644 --- a/ship/src/main/scala/ch/epfl/bluebrain/nexus/ship/files/FileCopier.scala +++ b/ship/src/main/scala/ch/epfl/bluebrain/nexus/ship/files/FileCopier.scala @@ -16,11 +16,13 @@ import ch.epfl.bluebrain.nexus.ship.files.FileCopier.FileCopyResult import ch.epfl.bluebrain.nexus.ship.files.FileCopier.FileCopyResult.{FileCopySkipped, FileCopySuccess} import software.amazon.awssdk.services.s3.model.S3Exception +import java.net.URI +import java.nio.file.Paths import scala.concurrent.duration.DurationInt trait FileCopier { - def copyFile(project: ProjectRef, attributes: FileAttributes): IO[FileCopyResult] + def copyFile(project: ProjectRef, attributes: FileAttributes, localOrigin: Boolean): IO[FileCopyResult] } @@ -37,6 +39,8 @@ object FileCopier { logError(logger, "s3Copy") ) + def localDiskPath(relative: Path): String = Paths.get(URI.create(s"file:/$relative")).toString + sealed trait FileCopyResult extends Product with Serializable object FileCopyResult { @@ -54,14 +58,14 @@ object FileCopier { val importBucket = config.importBucket val targetBucket = config.targetBucket val locationGenerator = new S3LocationGenerator(config.prefix.getOrElse(Path.Empty)) - (project: ProjectRef, attributes: FileAttributes) => + (project: ProjectRef, attributes: FileAttributes, localOrigin: Boolean) => { val origin = attributes.path val patchedFileName = if (attributes.filename.isEmpty) "file" else attributes.filename val target = locationGenerator.file(project, attributes.uuid, patchedFileName).path val FIVE_GB = 5_000_000_000L - val originKey = UrlUtils.decode(origin) + val originKey = if (localOrigin) localDiskPath(origin) else UrlUtils.decode(origin) val targetKey = UrlUtils.decode(target) val copyOptions = CopyOptions(overwriteTarget = false, attributes.mediaType) @@ -86,14 +90,17 @@ object FileCopier { isFolder <- if (isObject) IO.pure(false) else s3StorageClient.listObjectsV2(importBucket, originKey).map(_.hasContents) _ <- IO.whenA(isObject) { copy } - _ <- IO.whenA(isFolder) { logger.info(s"$target has been found to be a folder, skipping the file copy...") } + _ <- IO.whenA(isFolder) { + logger.info(s"'$originKey' has been found to be a folder, skipping the file copy...") + } _ <- IO.whenA(!isFolder && !isObject) { - logger.error(s"$target is neither an object or folder, something is wrong.") + logger.error(s"'$originKey' is neither an object or folder, something is wrong.") } } yield if (isObject) FileCopySuccess(target) else FileCopySkipped }.retry(copyRetryStrategy) } - def apply(): FileCopier = (_: ProjectRef, attributes: FileAttributes) => IO.pure(FileCopySuccess(attributes.path)) + def apply(): FileCopier = (_: ProjectRef, attributes: FileAttributes, _: Boolean) => + IO.pure(FileCopySuccess(attributes.path)) } diff --git a/ship/src/main/scala/ch/epfl/bluebrain/nexus/ship/files/FileProcessor.scala b/ship/src/main/scala/ch/epfl/bluebrain/nexus/ship/files/FileProcessor.scala index abc1510edc..6845a4eb7f 100644 --- a/ship/src/main/scala/ch/epfl/bluebrain/nexus/ship/files/FileProcessor.scala +++ b/ship/src/main/scala/ch/epfl/bluebrain/nexus/ship/files/FileProcessor.scala @@ -13,6 +13,7 @@ import ch.epfl.bluebrain.nexus.delta.plugins.storage.files.model._ import ch.epfl.bluebrain.nexus.delta.plugins.storage.files.{Files, MediaTypeDetector} import ch.epfl.bluebrain.nexus.delta.plugins.storage.storages.FetchStorage import ch.epfl.bluebrain.nexus.delta.plugins.storage.storages.model.Storage +import ch.epfl.bluebrain.nexus.delta.plugins.storage.storages.model.StorageType.DiskStorage import ch.epfl.bluebrain.nexus.delta.plugins.storage.storages.operations.LinkFileAction import ch.epfl.bluebrain.nexus.delta.plugins.storage.storages.operations.s3.client.S3StorageClient import ch.epfl.bluebrain.nexus.delta.rdf.IriOrBNode @@ -74,7 +75,7 @@ class FileProcessor private ( val newMediaType = patchMediaType(attrs.filename, attrs.mediaType) val newAttrs = e.attributes.copy(mediaType = newMediaType) val customMetadata = Some(getCustomMetadata(newAttrs)) - fileCopier.copyFile(e.project, newAttrs).flatMap { + fileCopier.copyFile(e.project, newAttrs, e.storageType == DiskStorage).flatMap { case FileCopySuccess(newPath) => val linkRequest = FileLinkRequest(newPath, newMediaType, customMetadata) files @@ -87,7 +88,7 @@ class FileProcessor private ( val newMediaType = patchMediaType(attrs.filename, attrs.mediaType) val newAttrs = e.attributes.copy(mediaType = newMediaType) val customMetadata = Some(getCustomMetadata(newAttrs)) - fileCopier.copyFile(e.project, newAttrs).flatMap { + fileCopier.copyFile(e.project, newAttrs, e.storageType == DiskStorage).flatMap { case FileCopySuccess(newPath) => val linkRequest = FileLinkRequest(newPath, newMediaType, customMetadata) files diff --git a/ship/src/test/resources/import/file-import/000000001.json b/ship/src/test/resources/import/file-import/000000001.json index 9f39d01b1e..de498b7402 100644 --- a/ship/src/test/resources/import/file-import/000000001.json +++ b/ship/src/test/resources/import/file-import/000000001.json @@ -4,4 +4,5 @@ {"ordering":4,"type":"file" ,"org":"public","project":"sscx","id":"https://bbp.epfl.ch/neurosciencegraph/data/old-path","rev":3,"value":{"id": "https://bbp.epfl.ch/neurosciencegraph/data/old-path", "rev": 3, "@type": "FileUpdated", "instant": "2020-09-24T09:58:43.479Z", "project": "public/sscx", "storage": "https://bbp.epfl.ch/neurosciencegraph/data/storage?rev=1", "subject": {"@type": "User", "realm": "bbp", "subject": "bob"}, "attributes": {"path": "75b85666-b66f-4d90-8fd2-c6fb04beb5c6/8/9/5/4/c/3/e/c/002_160120B3_OH_updated.nwb", "uuid": "8954c3ec-da81-47b9-bcec-b72a1706a6a3", "bytes": 10701815, "digest": {"@type": "NotComputedDigest"}, "origin": "Storage", "filename": "002_160120B3_OH_updated.nwb", "location": "file:///path/nexus/75b85666-b66f-4d90-8fd2-c6fb04beb5c6/8/9/5/4/c/3/e/c/002_160120B3_updated.nwb", "mediaType": "application/object-stream"}, "storageType": "RemoteDiskStorage"},"instant":"2020-09-24T11:58:43.479+02:00"} {"ordering":5,"type":"file" ,"org":"public","project":"sscx","id":"https://bbp.epfl.ch/neurosciencegraph/data/empty-filename","rev":1,"value":{"id": "https://bbp.epfl.ch/neurosciencegraph/data/empty-filename", "rev": 1, "@type": "FileCreated", "instant": "2023-03-02T10:46:36.969Z", "project": "public/sscx", "storage": "https://bbp.epfl.ch/neurosciencegraph/data/storage?rev=1", "subject": {"@type": "User", "realm": "bbp", "subject": "alice"}, "attributes": {"path": "public/sscx/2/b/3/9/7/9/3/0/", "uuid": "2b397930-0f69-4dad-bf6a-51825e940e12", "bytes": 538, "digest": {"@type": "ComputedDigest", "value": "b39a754a0988ca1f62e04a34d70479e9610b87beab91c58766f80c6ef6f93f3d", "algorithm": "SHA-256"}, "origin": "Client", "filename": "", "location": "file:///path/public/sscx/2/b/3/9/7/9/3/0", "mediaType": "text/plain"}, "storageType": "RemoteDiskStorage"},"instant":"2023-03-02T11:46:36.969+01:00"} {"ordering":6,"type":"file" ,"org":"public","project":"sscx","id":"https://bbp.epfl.ch/neurosciencegraph/data/special-chars-filename","rev":1,"value":{"id": "https://bbp.epfl.ch/neurosciencegraph/data/special-chars-filename", "rev": 1, "@type": "FileCreated", "instant": "2023-03-02T10:46:36.969Z", "project": "public/sscx", "storage": "https://bbp.epfl.ch/neurosciencegraph/data/storage?rev=1", "subject": {"@type": "User", "realm": "bbp", "subject": "alice"}, "attributes": {"path": "public/sscx/1/2/3/4/5/6/7/8/special%20%5Bfile%5D.json", "uuid": "12345678-0f69-4dad-bf6a-51825e940e12", "bytes": 538, "digest": {"@type": "ComputedDigest", "value": "b39a754a0988ca1f62e04a34d70479e9610b87beab91c58766f80c6ef6f93f3d", "algorithm": "SHA-256"}, "origin": "Client", "filename": "special [file].json", "location": "file:///path/public/sscx/1/2/3/4/5/6/7/8/special%20%5Bfile%5D.json", "mediaType": "text/plain"}, "storageType": "RemoteDiskStorage"},"instant":"2023-03-02T11:46:36.969+01:00"} -{"ordering":7,"type":"file" ,"org":"public","project":"sscx","id":"https://bbp.epfl.ch/neurosciencegraph/data/directory","rev":1,"value":{"id": "https://bbp.epfl.ch/neurosciencegraph/data/directory", "rev": 1, "@type": "FileCreated", "instant": "2022-09-28T12:59:11.800Z", "project": "public/sscx", "storage": "https://bbp.epfl.ch/neurosciencegraph/data/storage?rev=1", "subject": {"@type": "User", "realm": "bbp", "subject": "akkaufma"}, "attributes": {"path": "public/sscx/0/d/8/b/7/b/3/7/test_linking", "uuid": "0d8b7b37-bf62-4576-a5b4-ba2398731b8f", "bytes": 0, "digest": {"@type": "NotComputedDigest"}, "origin": "Storage", "filename": "test_linking", "location": "file:///path/nexus/bbp/atlas/0/d/8/b/7/b/3/7/test_linking", "mediaType": "application/x-directory"}, "storageType": "RemoteDiskStorage"},"instant":"2022-09-28T14:59:11.8+02:00"} \ No newline at end of file +{"ordering":7,"type":"file" ,"org":"public","project":"sscx","id":"https://bbp.epfl.ch/neurosciencegraph/data/directory","rev":1,"value":{"id": "https://bbp.epfl.ch/neurosciencegraph/data/directory", "rev": 1, "@type": "FileCreated", "instant": "2022-09-28T12:59:11.800Z", "project": "public/sscx", "storage": "https://bbp.epfl.ch/neurosciencegraph/data/storage?rev=1", "subject": {"@type": "User", "realm": "bbp", "subject": "akkaufma"}, "attributes": {"path": "public/sscx/0/d/8/b/7/b/3/7/test_linking", "uuid": "0d8b7b37-bf62-4576-a5b4-ba2398731b8f", "bytes": 0, "digest": {"@type": "NotComputedDigest"}, "origin": "Storage", "filename": "test_linking", "location": "file:///path/nexus/bbp/atlas/0/d/8/b/7/b/3/7/test_linking", "mediaType": "application/x-directory"}, "storageType": "RemoteDiskStorage"},"instant":"2022-09-28T14:59:11.8+02:00"} +{"ordering":8,"type":"file" ,"org":"public","project":"sscx","id":"https://bbp.epfl.ch/neurosciencegraph/data/local-plus","rev":1,"value":{"id": "https://bbp.epfl.ch/neurosciencegraph/data/local-plus", "rev": 1, "@type": "FileCreated", "instant": "2022-10-14T12:59:11.800Z", "project": "public/sscx", "storage": "https://bbp.epfl.ch/neurosciencegraph/data/storage?rev=1", "subject": {"@type": "User", "realm": "bbp", "subject": "akkaufma"}, "attributes": {"path": "95b0ee1e-a6a5-43e9-85fb-938b3c38dfc0/9/f/0/3/2/4/f/e/0925_Rhi13.3.13%20cell%201+2%20(superficial).asc", "uuid": "9f0324fe-9aac-4d34-84f0-5a44cd278197", "bytes": 0, "digest": {"@type": "NotComputedDigest"}, "origin": "Storage", "filename": "0925_Rhi13.3.13 cell 1+2 (superficial).asc", "location": "file:///path/nexus/public/sscx/9/f/0/3/2/4/f/e", "mediaType": "application/octet-stream"}, "storageType": "DiskStorage"},"instant":"2022-10-14T14:59:11.8+02:00"} \ No newline at end of file diff --git a/ship/src/test/scala/ch/epfl/bluebrain/nexus/ship/RunShipSuite.scala b/ship/src/test/scala/ch/epfl/bluebrain/nexus/ship/RunShipSuite.scala index 4f84257767..995d91c0ca 100644 --- a/ship/src/test/scala/ch/epfl/bluebrain/nexus/ship/RunShipSuite.scala +++ b/ship/src/test/scala/ch/epfl/bluebrain/nexus/ship/RunShipSuite.scala @@ -1,6 +1,6 @@ package ch.epfl.bluebrain.nexus.ship -import akka.http.scaladsl.model.{ContentType, ContentTypes, MediaTypes} +import akka.http.scaladsl.model.{ContentType, ContentTypes, MediaTypes, Uri} import cats.effect.IO import ch.epfl.bluebrain.nexus.delta.kernel.Hex import ch.epfl.bluebrain.nexus.delta.kernel.utils.UrlUtils @@ -8,17 +8,17 @@ import ch.epfl.bluebrain.nexus.delta.plugins.storage.files.Files import ch.epfl.bluebrain.nexus.delta.plugins.storage.files.model.Digest.ComputedDigest import ch.epfl.bluebrain.nexus.delta.plugins.storage.files.model.FileRejection.FileNotFound import ch.epfl.bluebrain.nexus.delta.plugins.storage.files.model.{FileAttributes, FileState} -import ch.epfl.bluebrain.nexus.delta.plugins.storage.storages.model.DigestAlgorithm +import ch.epfl.bluebrain.nexus.delta.plugins.storage.storages.model.StorageType.DiskStorage +import ch.epfl.bluebrain.nexus.delta.plugins.storage.storages.model.{DigestAlgorithm, StorageType} import ch.epfl.bluebrain.nexus.delta.plugins.storage.storages.operations.s3.{LocalStackS3StorageClient, PutObjectRequest} import ch.epfl.bluebrain.nexus.delta.rdf.IriOrBNode.Iri import ch.epfl.bluebrain.nexus.delta.rdf.Vocabulary.nxv -import ch.epfl.bluebrain.nexus.delta.rdf.syntax.iriStringContextSyntax +import ch.epfl.bluebrain.nexus.delta.sdk.implicits._ import ch.epfl.bluebrain.nexus.delta.sdk.projects.Projects import ch.epfl.bluebrain.nexus.delta.sdk.resolvers.Resolvers import ch.epfl.bluebrain.nexus.delta.sdk.resources.Resources import ch.epfl.bluebrain.nexus.delta.sourcing.Transactors import ch.epfl.bluebrain.nexus.delta.sourcing.exporter.RowEvent -import ch.epfl.bluebrain.nexus.delta.sourcing.implicits._ import ch.epfl.bluebrain.nexus.delta.sourcing.model.{EntityType, Label, ProjectRef} import ch.epfl.bluebrain.nexus.delta.sourcing.offset.Offset import ch.epfl.bluebrain.nexus.delta.sourcing.postgres.Doobie @@ -26,6 +26,7 @@ import ch.epfl.bluebrain.nexus.delta.sourcing.state.ScopedStateGet import ch.epfl.bluebrain.nexus.ship.ImportReport.Statistics import ch.epfl.bluebrain.nexus.ship.RunShipSuite.{checkFor, expectedImportReport, fetchFileAttributes, getDistinctOrgProjects} import ch.epfl.bluebrain.nexus.ship.config.ShipConfigFixtures +import ch.epfl.bluebrain.nexus.ship.files.FileCopier.localDiskPath import ch.epfl.bluebrain.nexus.testkit.mu.NexusSuite import doobie.Get import doobie.syntax.all._ @@ -65,8 +66,19 @@ class RunShipSuite s3Client.uploadFile(put, Stream.emit(contentAsBuffer)) } - private def decodedFilePath(json: Json) = root.attributes.path.string.getOption(json).map(UrlUtils.decode) - private def fileContentType = root.attributes.mediaType.string.getOption(_) + private def decodedFilePath(json: Json) = { + root.storageType.as[StorageType].getOption(json).flatMap { + case DiskStorage => + root.attributes.path.as[Uri.Path].getOption(json).map { path => + localDiskPath(path).toString + } + case _ => + root.attributes.path.string.getOption(json).map(UrlUtils.decode) + } + + } + + private def fileContentType = root.attributes.mediaType.string.getOption(_) private def generatePhysicalFile(row: RowEvent) = IO.whenA(row.`type` == Files.entityType) { @@ -136,7 +148,8 @@ class RunShipSuite report.progress == Map(Projects.entityType -> Statistics(1L, 0L)) test("Import files in S3 and in the primary store") { - val textPlain = MediaTypes.`text/plain`.withMissingCharset + val textPlain = MediaTypes.`text/plain`.withMissingCharset + val applicationOctetStream = MediaTypes.`application/octet-stream` for { events <- eventsStream("import/file-import/000000001.json") report <- RunShip(events, s3Client, inputConfig, xas) @@ -166,13 +179,23 @@ class RunShipSuite _ <- checkFor("file", specialCharsId, xas).assertEquals(1) _ <- assertS3Object(specialCharsLocation, Some(textPlain)) _ <- assertFileAttributes(project, specialCharsId)(specialCharsLocation, "special [file].json", Some(textPlain)) + // Local file containing a plus + localPlusId = iri"https://bbp.epfl.ch/neurosciencegraph/data/local-plus" + localPlusLocation = "/prefix/public/sscx/files/9/f/0/3/2/4/f/e/0925_Rhi13.3.13 cell 1 2 (superficial).asc" + _ <- checkFor("file", localPlusId, xas).assertEquals(1) + _ <- assertS3Object(localPlusLocation, Some(applicationOctetStream)) + _ <- assertFileAttributes(project, localPlusId)( + localPlusLocation, + "0925_Rhi13.3.13 cell 1+2 (superficial).asc", + Some(applicationOctetStream) + ) // Directory, should be skipped directoryId = iri"https://bbp.epfl.ch/neurosciencegraph/data/directory" _ <- checkFor("file", directoryId, xas).assertEquals(0) // Summary S3 check, 4 objects should have been imported in total - _ <- s3Client.listObjectsV2(targetBucket).map(_.keyCount().intValue()).assertEquals(4) + _ <- s3Client.listObjectsV2(targetBucket).map(_.keyCount().intValue()).assertEquals(5) // Summary report check, only the directory event should have been skipped - _ = assertEquals(report.progress(Files.entityType).success, 5L) + _ = assertEquals(report.progress(Files.entityType).success, 6L) _ = assertEquals(report.progress(Files.entityType).dropped, 1L) } yield () } diff --git a/ship/src/test/scala/ch/epfl/bluebrain/nexus/ship/files/FileProcessorSuite.scala b/ship/src/test/scala/ch/epfl/bluebrain/nexus/ship/files/FileProcessorSuite.scala index 769ab4ceec..b48d075fba 100644 --- a/ship/src/test/scala/ch/epfl/bluebrain/nexus/ship/files/FileProcessorSuite.scala +++ b/ship/src/test/scala/ch/epfl/bluebrain/nexus/ship/files/FileProcessorSuite.scala @@ -1,7 +1,8 @@ package ch.epfl.bluebrain.nexus.ship.files -import akka.http.scaladsl.model.{ContentTypes, MediaTypes} +import akka.http.scaladsl.model.{ContentTypes, MediaTypes, Uri} import ch.epfl.bluebrain.nexus.delta.kernel.http.MediaTypeDetectorConfig +import ch.epfl.bluebrain.nexus.ship.files.FileCopier.localDiskPath import ch.epfl.bluebrain.nexus.testkit.mu.NexusSuite class FileProcessorSuite extends NexusSuite { @@ -11,6 +12,13 @@ class FileProcessorSuite extends NexusSuite { "pdf" -> MediaTypes.`application/pdf` ) + test("Correctly decode a local path") { + val encoded = Uri.Path("org/proj/9/f/0/3/2/4/f/e/0925_Rhi13.3.13%20cell%201+2%20(superficial).asc") + val obtained = localDiskPath(encoded) + val expected = "/org/proj/9/f/0/3/2/4/f/e/0925_Rhi13.3.13 cell 1+2 (superficial).asc" + assertEquals(obtained, expected) + } + test("Return a new content type matching the config") { assertEquals( FileProcessor.patchMediaType("file.json", None), From 80b78da7b1a1af248a0a00c37e5c97db185ed43e Mon Sep 17 00:00:00 2001 From: Simon Date: Mon, 18 Nov 2024 11:12:36 +0100 Subject: [PATCH 3/6] Fixing error for local files (#5236) Co-authored-by: Simon Dumas --- .../scala/ch/epfl/bluebrain/nexus/ship/files/FileCopier.scala | 2 +- .../test/scala/ch/epfl/bluebrain/nexus/ship/RunShipSuite.scala | 2 +- .../ch/epfl/bluebrain/nexus/ship/files/FileProcessorSuite.scala | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ship/src/main/scala/ch/epfl/bluebrain/nexus/ship/files/FileCopier.scala b/ship/src/main/scala/ch/epfl/bluebrain/nexus/ship/files/FileCopier.scala index ae4efb580f..7a2ecb0528 100644 --- a/ship/src/main/scala/ch/epfl/bluebrain/nexus/ship/files/FileCopier.scala +++ b/ship/src/main/scala/ch/epfl/bluebrain/nexus/ship/files/FileCopier.scala @@ -39,7 +39,7 @@ object FileCopier { logError(logger, "s3Copy") ) - def localDiskPath(relative: Path): String = Paths.get(URI.create(s"file:/$relative")).toString + def localDiskPath(relative: Path): String = Paths.get(URI.create(s"file:/$relative")).toString.drop(1) sealed trait FileCopyResult extends Product with Serializable diff --git a/ship/src/test/scala/ch/epfl/bluebrain/nexus/ship/RunShipSuite.scala b/ship/src/test/scala/ch/epfl/bluebrain/nexus/ship/RunShipSuite.scala index 995d91c0ca..93e6a2fa1e 100644 --- a/ship/src/test/scala/ch/epfl/bluebrain/nexus/ship/RunShipSuite.scala +++ b/ship/src/test/scala/ch/epfl/bluebrain/nexus/ship/RunShipSuite.scala @@ -192,7 +192,7 @@ class RunShipSuite // Directory, should be skipped directoryId = iri"https://bbp.epfl.ch/neurosciencegraph/data/directory" _ <- checkFor("file", directoryId, xas).assertEquals(0) - // Summary S3 check, 4 objects should have been imported in total + // Summary S3 check, 5 objects should have been imported in total _ <- s3Client.listObjectsV2(targetBucket).map(_.keyCount().intValue()).assertEquals(5) // Summary report check, only the directory event should have been skipped _ = assertEquals(report.progress(Files.entityType).success, 6L) diff --git a/ship/src/test/scala/ch/epfl/bluebrain/nexus/ship/files/FileProcessorSuite.scala b/ship/src/test/scala/ch/epfl/bluebrain/nexus/ship/files/FileProcessorSuite.scala index b48d075fba..e1bee3e7d6 100644 --- a/ship/src/test/scala/ch/epfl/bluebrain/nexus/ship/files/FileProcessorSuite.scala +++ b/ship/src/test/scala/ch/epfl/bluebrain/nexus/ship/files/FileProcessorSuite.scala @@ -15,7 +15,7 @@ class FileProcessorSuite extends NexusSuite { test("Correctly decode a local path") { val encoded = Uri.Path("org/proj/9/f/0/3/2/4/f/e/0925_Rhi13.3.13%20cell%201+2%20(superficial).asc") val obtained = localDiskPath(encoded) - val expected = "/org/proj/9/f/0/3/2/4/f/e/0925_Rhi13.3.13 cell 1+2 (superficial).asc" + val expected = "org/proj/9/f/0/3/2/4/f/e/0925_Rhi13.3.13 cell 1+2 (superficial).asc" assertEquals(obtained, expected) } From cb4fa1c3cc6fa11c45433fa35d4104204f127ac7 Mon Sep 17 00:00:00 2001 From: Simon Date: Tue, 19 Nov 2024 16:53:04 +0100 Subject: [PATCH 4/6] Handle other filename cases related to link operation (#5238) Co-authored-by: Simon Dumas --- .../nexus/ship/files/FileCopier.scala | 53 +++++--- .../import/file-import/000000001.json | 5 +- .../bluebrain/nexus/ship/RunShipSuite.scala | 6 +- .../nexus/ship/files/FileCopierSuite.scala | 125 ++++++++++++++++++ .../nexus/ship/files/FileProcessorSuite.scala | 10 +- 5 files changed, 170 insertions(+), 29 deletions(-) create mode 100644 ship/src/test/scala/ch/epfl/bluebrain/nexus/ship/files/FileCopierSuite.scala diff --git a/ship/src/main/scala/ch/epfl/bluebrain/nexus/ship/files/FileCopier.scala b/ship/src/main/scala/ch/epfl/bluebrain/nexus/ship/files/FileCopier.scala index 7a2ecb0528..dc48927a32 100644 --- a/ship/src/main/scala/ch/epfl/bluebrain/nexus/ship/files/FileCopier.scala +++ b/ship/src/main/scala/ch/epfl/bluebrain/nexus/ship/files/FileCopier.scala @@ -3,6 +3,7 @@ package ch.epfl.bluebrain.nexus.ship.files import akka.http.scaladsl.model.Uri import akka.http.scaladsl.model.Uri.Path import cats.effect.IO +import cats.syntax.all._ import ch.epfl.bluebrain.nexus.delta.kernel.RetryStrategy.logError import ch.epfl.bluebrain.nexus.delta.kernel.utils.UrlUtils import ch.epfl.bluebrain.nexus.delta.kernel.{Logger, RetryStrategy} @@ -51,6 +52,33 @@ object FileCopier { } + def computeOriginKey( + s3StorageClient: S3StorageClient, + importBucket: String, + path: Path, + localOrigin: Boolean + ): IO[Option[String]] = { + def exists(key: String) = s3StorageClient.objectExists(importBucket, key).flatMap { + case true => IO.some(key) + case false => + s3StorageClient + .listObjectsV2(importBucket, key) + .map(_.hasContents) + .flatMap { isFolder => + IO.whenA(isFolder) { + logger.info(s"'$key' has been found to be a folder, skipping the file copy...") + } + } + .as(None) + } + + val decodedKey = if (localOrigin) localDiskPath(path) else UrlUtils.decode(path) + exists(decodedKey).flatMap { + case Some(key) => IO.some(key) + case None => exists(path.toString()) + } + } + def apply( s3StorageClient: S3StorageClient, config: FileProcessingConfig @@ -60,17 +88,15 @@ object FileCopier { val locationGenerator = new S3LocationGenerator(config.prefix.getOrElse(Path.Empty)) (project: ProjectRef, attributes: FileAttributes, localOrigin: Boolean) => { - val origin = attributes.path + val path = attributes.path val patchedFileName = if (attributes.filename.isEmpty) "file" else attributes.filename val target = locationGenerator.file(project, attributes.uuid, patchedFileName).path val FIVE_GB = 5_000_000_000L - val originKey = if (localOrigin) localDiskPath(origin) else UrlUtils.decode(origin) - val targetKey = UrlUtils.decode(target) - + val targetKey = UrlUtils.decode(target) val copyOptions = CopyOptions(overwriteTarget = false, attributes.mediaType) - def copy = { + def copy(originKey: String) = { if (attributes.bytes >= FIVE_GB) { logger.info(s"Attempting to copy a large file from $importBucket/$originKey to $targetBucket/$targetKey") >> s3StorageClient.copyObjectMultiPart(importBucket, originKey, targetBucket, targetKey, copyOptions) @@ -86,17 +112,12 @@ object FileCopier { } for { - isObject <- s3StorageClient.objectExists(importBucket, originKey) - isFolder <- - if (isObject) IO.pure(false) else s3StorageClient.listObjectsV2(importBucket, originKey).map(_.hasContents) - _ <- IO.whenA(isObject) { copy } - _ <- IO.whenA(isFolder) { - logger.info(s"'$originKey' has been found to be a folder, skipping the file copy...") - } - _ <- IO.whenA(!isFolder && !isObject) { - logger.error(s"'$originKey' is neither an object or folder, something is wrong.") - } - } yield if (isObject) FileCopySuccess(target) else FileCopySkipped + originKey <- computeOriginKey(s3StorageClient, importBucket, path, localOrigin) + _ <- originKey.traverse(copy) + _ <- IO.whenA(originKey.isEmpty) { + logger.error(s"$path is neither an object or folder, something is wrong.") + } + } yield if (originKey.isDefined) FileCopySuccess(target) else FileCopySkipped }.retry(copyRetryStrategy) } diff --git a/ship/src/test/resources/import/file-import/000000001.json b/ship/src/test/resources/import/file-import/000000001.json index de498b7402..fa42a74a93 100644 --- a/ship/src/test/resources/import/file-import/000000001.json +++ b/ship/src/test/resources/import/file-import/000000001.json @@ -5,4 +5,7 @@ {"ordering":5,"type":"file" ,"org":"public","project":"sscx","id":"https://bbp.epfl.ch/neurosciencegraph/data/empty-filename","rev":1,"value":{"id": "https://bbp.epfl.ch/neurosciencegraph/data/empty-filename", "rev": 1, "@type": "FileCreated", "instant": "2023-03-02T10:46:36.969Z", "project": "public/sscx", "storage": "https://bbp.epfl.ch/neurosciencegraph/data/storage?rev=1", "subject": {"@type": "User", "realm": "bbp", "subject": "alice"}, "attributes": {"path": "public/sscx/2/b/3/9/7/9/3/0/", "uuid": "2b397930-0f69-4dad-bf6a-51825e940e12", "bytes": 538, "digest": {"@type": "ComputedDigest", "value": "b39a754a0988ca1f62e04a34d70479e9610b87beab91c58766f80c6ef6f93f3d", "algorithm": "SHA-256"}, "origin": "Client", "filename": "", "location": "file:///path/public/sscx/2/b/3/9/7/9/3/0", "mediaType": "text/plain"}, "storageType": "RemoteDiskStorage"},"instant":"2023-03-02T11:46:36.969+01:00"} {"ordering":6,"type":"file" ,"org":"public","project":"sscx","id":"https://bbp.epfl.ch/neurosciencegraph/data/special-chars-filename","rev":1,"value":{"id": "https://bbp.epfl.ch/neurosciencegraph/data/special-chars-filename", "rev": 1, "@type": "FileCreated", "instant": "2023-03-02T10:46:36.969Z", "project": "public/sscx", "storage": "https://bbp.epfl.ch/neurosciencegraph/data/storage?rev=1", "subject": {"@type": "User", "realm": "bbp", "subject": "alice"}, "attributes": {"path": "public/sscx/1/2/3/4/5/6/7/8/special%20%5Bfile%5D.json", "uuid": "12345678-0f69-4dad-bf6a-51825e940e12", "bytes": 538, "digest": {"@type": "ComputedDigest", "value": "b39a754a0988ca1f62e04a34d70479e9610b87beab91c58766f80c6ef6f93f3d", "algorithm": "SHA-256"}, "origin": "Client", "filename": "special [file].json", "location": "file:///path/public/sscx/1/2/3/4/5/6/7/8/special%20%5Bfile%5D.json", "mediaType": "text/plain"}, "storageType": "RemoteDiskStorage"},"instant":"2023-03-02T11:46:36.969+01:00"} {"ordering":7,"type":"file" ,"org":"public","project":"sscx","id":"https://bbp.epfl.ch/neurosciencegraph/data/directory","rev":1,"value":{"id": "https://bbp.epfl.ch/neurosciencegraph/data/directory", "rev": 1, "@type": "FileCreated", "instant": "2022-09-28T12:59:11.800Z", "project": "public/sscx", "storage": "https://bbp.epfl.ch/neurosciencegraph/data/storage?rev=1", "subject": {"@type": "User", "realm": "bbp", "subject": "akkaufma"}, "attributes": {"path": "public/sscx/0/d/8/b/7/b/3/7/test_linking", "uuid": "0d8b7b37-bf62-4576-a5b4-ba2398731b8f", "bytes": 0, "digest": {"@type": "NotComputedDigest"}, "origin": "Storage", "filename": "test_linking", "location": "file:///path/nexus/bbp/atlas/0/d/8/b/7/b/3/7/test_linking", "mediaType": "application/x-directory"}, "storageType": "RemoteDiskStorage"},"instant":"2022-09-28T14:59:11.8+02:00"} -{"ordering":8,"type":"file" ,"org":"public","project":"sscx","id":"https://bbp.epfl.ch/neurosciencegraph/data/local-plus","rev":1,"value":{"id": "https://bbp.epfl.ch/neurosciencegraph/data/local-plus", "rev": 1, "@type": "FileCreated", "instant": "2022-10-14T12:59:11.800Z", "project": "public/sscx", "storage": "https://bbp.epfl.ch/neurosciencegraph/data/storage?rev=1", "subject": {"@type": "User", "realm": "bbp", "subject": "akkaufma"}, "attributes": {"path": "95b0ee1e-a6a5-43e9-85fb-938b3c38dfc0/9/f/0/3/2/4/f/e/0925_Rhi13.3.13%20cell%201+2%20(superficial).asc", "uuid": "9f0324fe-9aac-4d34-84f0-5a44cd278197", "bytes": 0, "digest": {"@type": "NotComputedDigest"}, "origin": "Storage", "filename": "0925_Rhi13.3.13 cell 1+2 (superficial).asc", "location": "file:///path/nexus/public/sscx/9/f/0/3/2/4/f/e", "mediaType": "application/octet-stream"}, "storageType": "DiskStorage"},"instant":"2022-10-14T14:59:11.8+02:00"} \ No newline at end of file +{"ordering":8,"type":"file" ,"org":"public","project":"sscx","id":"https://bbp.epfl.ch/neurosciencegraph/data/local-plus","rev":1,"value":{"id": "https://bbp.epfl.ch/neurosciencegraph/data/local-plus", "rev": 1, "@type": "FileCreated", "instant": "2022-10-14T12:59:11.800Z", "project": "public/sscx", "storage": "https://bbp.epfl.ch/neurosciencegraph/data/storage?rev=1", "subject": {"@type": "User", "realm": "bbp", "subject": "akkaufma"}, "attributes": {"path": "95b0ee1e-a6a5-43e9-85fb-938b3c38dfc0/9/f/0/3/2/4/f/e/0925_Rhi13.3.13%20cell%201+2%20(superficial).asc", "uuid": "9f0324fe-9aac-4d34-84f0-5a44cd278197", "bytes": 0, "digest": {"@type": "NotComputedDigest"}, "origin": "Storage", "filename": "0925_Rhi13.3.13 cell 1+2 (superficial).asc", "location": "file:///path/nexus/public/sscx/9/f/0/3/2/4/f/e", "mediaType": "application/octet-stream"}, "storageType": "DiskStorage"},"instant":"2022-10-14T14:59:11.8+02:00"} +{"ordering":9,"type":"file" ,"org":"public","project":"sscx","id":"https://bbp.epfl.ch/neurosciencegraph/data/local-space","rev":1,"value":{"id": "https://bbp.epfl.ch/neurosciencegraph/data/local-space", "rev": 1, "@type": "FileCreated", "instant": "2022-10-15T12:59:11.800Z", "project": "public/sscx", "storage": "https://bbp.epfl.ch/neurosciencegraph/data/storage?rev=1", "subject": {"@type": "User", "realm": "bbp", "subject": "akkaufma"}, "attributes": {"path": "public/sscx/9/2/f/f/f/2/1/6/566647353__Square%20-%200.5ms%20Subthreshold__stimulus__1.png", "uuid": "92fff216-73a1-48a6-acc8-3dadf17c26bb", "bytes": 0, "digest": {"@type": "NotComputedDigest"}, "origin": "Storage", "filename": "566647353__Square - 0.5ms Subthreshold__stimulus__1.png", "location": "file:///path/nexus/public/sscx/9/2/f/f/f/2/1/6", "mediaType": "application/octet-stream"}, "storageType": "DiskStorage"},"instant":"2022-10-14T14:59:11.8+02:00"} +{"ordering":10,"type":"file" ,"org":"public","project":"sscx","id":"https://bbp.epfl.ch/neurosciencegraph/data/local-equals","rev":1,"value":{"id": "https://bbp.epfl.ch/neurosciencegraph/data/local-equals", "rev": 1, "@type": "FileCreated", "instant": "2022-10-15T12:59:11.800Z", "project": "public/sscx", "storage": "https://bbp.epfl.ch/neurosciencegraph/data/storage?rev=1", "subject": {"@type": "User", "realm": "bbp", "subject": "akkaufma"}, "attributes": {"path": "public/sscx/9/b/1/9/8/5/f/1/emodel=dSTUT_L5BP__etype=dSTUT__mtype=L5BP__species=mouse__brain_region=Primary%20somatosensory%20area__iteration=final_emodel__seed=3__traces.pdf", "uuid": "9b1985f1-e75d-42d2-b101-e527241e2797", "bytes": 0, "digest": {"@type": "NotComputedDigest"}, "origin": "Storage", "filename": "emodel=dSTUT_L5BP__etype=dSTUT__mtype=L5BP__species=mouse__brain_region=Primary somatosensory area__iteration=final_emodel__seed=3__traces.pdf", "location": "file:///path/nexus/public/sscx/9/b/1/9/8/5/f/1", "mediaType": "application/octet-stream"}, "storageType": "DiskStorage"},"instant":"2022-10-14T14:59:11.8+02:00"} +{"ordering":11,"type":"file" ,"org":"public","project":"sscx","id":"https://bbp.epfl.ch/neurosciencegraph/data/special-chars-2","rev":1,"value":{"id": "https://bbp.epfl.ch/neurosciencegraph/data/special-chars-2", "rev": 1, "@type": "FileCreated", "instant": "2023-03-02T10:46:36.969Z", "project": "public/sscx", "storage": "https://bbp.epfl.ch/neurosciencegraph/data/storage?rev=1", "subject": {"@type": "User", "realm": "bbp", "subject": "alice"}, "attributes": {"path": "15849bfc-f2ef-4ddd-89cb-b4658eb1f4ab/5/4/6/7/8/a/0/9/%20P(marker_cre)_overlapping.csv", "uuid": "54678a09-571b-465d-93e7-12137d6ad434", "bytes": 4267, "digest": {"@type": "ComputedDigest", "value": "b39a754a0988ca1f62e04a34d70479e9610b87beab91c58766f80c6ef6f93f3d", "algorithm": "SHA-256"}, "origin": "Client", "filename": " P(marker_cre)_overlapping.csv", "location": "file:///path/public/sscx/15849bfc-f2ef-4ddd-89cb-b4658eb1f4ab/5/4/6/7/8/a/0/9/%20P(marker_cre)_overlapping.csv", "mediaType": "text/plain"}, "storageType": "RemoteDiskStorage"},"instant":"2023-03-02T11:46:36.969+01:00"} diff --git a/ship/src/test/scala/ch/epfl/bluebrain/nexus/ship/RunShipSuite.scala b/ship/src/test/scala/ch/epfl/bluebrain/nexus/ship/RunShipSuite.scala index 93e6a2fa1e..5517a7cb9a 100644 --- a/ship/src/test/scala/ch/epfl/bluebrain/nexus/ship/RunShipSuite.scala +++ b/ship/src/test/scala/ch/epfl/bluebrain/nexus/ship/RunShipSuite.scala @@ -192,10 +192,10 @@ class RunShipSuite // Directory, should be skipped directoryId = iri"https://bbp.epfl.ch/neurosciencegraph/data/directory" _ <- checkFor("file", directoryId, xas).assertEquals(0) - // Summary S3 check, 5 objects should have been imported in total - _ <- s3Client.listObjectsV2(targetBucket).map(_.keyCount().intValue()).assertEquals(5) + // Summary S3 check, 8 objects should have been imported in total + _ <- s3Client.listObjectsV2(targetBucket).map(_.keyCount().intValue()).assertEquals(8) // Summary report check, only the directory event should have been skipped - _ = assertEquals(report.progress(Files.entityType).success, 6L) + _ = assertEquals(report.progress(Files.entityType).success, 9L) _ = assertEquals(report.progress(Files.entityType).dropped, 1L) } yield () } diff --git a/ship/src/test/scala/ch/epfl/bluebrain/nexus/ship/files/FileCopierSuite.scala b/ship/src/test/scala/ch/epfl/bluebrain/nexus/ship/files/FileCopierSuite.scala new file mode 100644 index 0000000000..2a228c1a1f --- /dev/null +++ b/ship/src/test/scala/ch/epfl/bluebrain/nexus/ship/files/FileCopierSuite.scala @@ -0,0 +1,125 @@ +package ch.epfl.bluebrain.nexus.ship.files + +import akka.http.scaladsl.model.Uri +import cats.effect.IO +import ch.epfl.bluebrain.nexus.delta.plugins.storage.storages.StoragesConfig.S3StorageConfig +import ch.epfl.bluebrain.nexus.delta.plugins.storage.storages.operations.s3.client.S3StorageClient +import ch.epfl.bluebrain.nexus.delta.plugins.storage.storages.operations.s3.{LocalStackS3StorageClient, S3Helpers} +import ch.epfl.bluebrain.nexus.ship.files.FileCopier.localDiskPath +import ch.epfl.bluebrain.nexus.testkit.mu.NexusSuite +import io.laserdisc.pure.s3.tagless.S3AsyncClientOp +import munit.AnyFixture + +class FileCopierSuite extends NexusSuite with LocalStackS3StorageClient.Fixture with S3Helpers { + + override def munitFixtures: Seq[AnyFixture[_]] = List(localStackS3Client) + implicit private lazy val (s3Client: S3StorageClient, underlying: S3AsyncClientOp[IO], _: S3StorageConfig) = + localStackS3Client() + + private val fileContents = "file content" + + test("Correctly decode a local path") { + val encoded = Uri.Path("org/proj/9/f/0/3/2/4/f/e/0925_Rhi13.3.13%20cell%201+2%20(superficial).asc") + val obtained = localDiskPath(encoded) + val expected = "org/proj/9/f/0/3/2/4/f/e/0925_Rhi13.3.13 cell 1+2 (superficial).asc" + assertEquals(obtained, expected) + } + + test("Correctly decode another local path with a space") { + val encoded = Uri.Path("org/proj/9/2/f/f/f/2/1/6/566647353__Square%20-%200.5ms%20Subthreshold__stimulus__1.png") + val obtained = localDiskPath(encoded) + val expected = "org/proj/9/2/f/f/f/2/1/6/566647353__Square - 0.5ms Subthreshold__stimulus__1.png" + assertEquals(obtained, expected) + } + + test("Should find a file from a decoded path") { + givenAnS3Bucket { bucket => + val path = Uri.Path( + "bbp/ncmv3/1/a/e/4/1/a/b/6/EMC__emodel=L5_TPC:B_cAC__ttype=182_L45%20IT%20CTX__species=mouse__brain_region=SS__iteration=mettypesv12_1.json" + ) + val key = + "bbp/ncmv3/1/a/e/4/1/a/b/6/EMC__emodel=L5_TPC:B_cAC__ttype=182_L45 IT CTX__species=mouse__brain_region=SS__iteration=mettypesv12_1.json" + givenAFileInABucket(bucket, key, fileContents) { _ => + FileCopier + .computeOriginKey(s3Client, bucket, path, localOrigin = false) + .assertEquals( + Some(key) + ) + } + } + } + + test("Should handle correctly path from local origin") { + givenAnS3Bucket { bucket => + val path = Uri.Path("27554ab5-20f4-4973-91f6-0b2d990cea69/b/e/4/9/b/5/5/b/Log2(CPM(Exon+intron)+1)_Visp_Pyr.csv") + val key = "27554ab5-20f4-4973-91f6-0b2d990cea69/b/e/4/9/b/5/5/b/Log2(CPM(Exon+intron)+1)_Visp_Pyr.csv" + givenAFileInABucket(bucket, key, fileContents) { _ => + FileCopier + .computeOriginKey(s3Client, bucket, path, localOrigin = true) + .assertEquals( + Some(key) + ) + } + } + } + + test("Should handle correctly another path from local origin") { + givenAnS3Bucket { bucket => + val path = Uri.Path( + "95b0ee1e-a6a5-43e9-85fb-938b3c38dfc0/9/f/0/3/2/4/f/e/0925_Rhi13.3.13%20cell%201+2%20(superficial).asc" + ) + val key = "95b0ee1e-a6a5-43e9-85fb-938b3c38dfc0/9/f/0/3/2/4/f/e/0925_Rhi13.3.13 cell 1+2 (superficial).asc" + givenAFileInABucket(bucket, key, fileContents) { _ => + FileCopier + .computeOriginKey(s3Client, bucket, path, localOrigin = true) + .assertEquals( + Some(key) + ) + } + } + } + + test("Should handle correctly yet another path from local origin") { + givenAnS3Bucket { bucket => + val path = Uri.Path( + "c1220611-7415-4476-baee-36e75f87bdeb/6/7/5/f/f/c/8/f/AIBS_morpho+ephys_data(for_Rat_coclustering).csv" + ) + val key = "c1220611-7415-4476-baee-36e75f87bdeb/6/7/5/f/f/c/8/f/AIBS_morpho+ephys_data(for_Rat_coclustering).csv" + givenAFileInABucket(bucket, key, fileContents) { _ => + FileCopier + .computeOriginKey(s3Client, bucket, path, localOrigin = true) + .assertEquals( + Some(key) + ) + } + } + } + + test("Should fallback to the encoded path if the decoded is not found") { + givenAnS3Bucket { bucket => + val path = Uri.Path("15849bfc-f2ef-4ddd-89cb-b4658eb1f4ab/5/4/6/7/8/a/0/9/%20P(marker_cre)_overlapping.csv") + val key = "15849bfc-f2ef-4ddd-89cb-b4658eb1f4ab/5/4/6/7/8/a/0/9/%20P(marker_cre)_overlapping.csv" + givenAFileInABucket(bucket, key, fileContents) { _ => + FileCopier + .computeOriginKey(s3Client, bucket, path, localOrigin = false) + .assertEquals( + Some(key) + ) + } + } + } + + test("Should skip directories") { + givenAnS3Bucket { bucket => + val path = Uri.Path( + "bbp/ncmv3/1/a/e/4/1/a/b/6/EMC__emodel=L5_TPC:B_cAC__ttype=182_L45%20IT%20CTX__species=mouse__brain_region=SS__iteration=mettypesv12_1.json" + ) + val key = + "bbp/ncmv3/1/a/e/4/1/a/b/6/EMC__emodel=L5_TPC:B_cAC__ttype=182_L45 IT CTX__species=mouse__brain_region=SS__iteration=mettypesv12_1.json/pouet.json" + givenAFileInABucket(bucket, key, fileContents) { _ => + FileCopier.computeOriginKey(s3Client, bucket, path, localOrigin = false).assertEquals(None) + } + } + } + +} diff --git a/ship/src/test/scala/ch/epfl/bluebrain/nexus/ship/files/FileProcessorSuite.scala b/ship/src/test/scala/ch/epfl/bluebrain/nexus/ship/files/FileProcessorSuite.scala index e1bee3e7d6..769ab4ceec 100644 --- a/ship/src/test/scala/ch/epfl/bluebrain/nexus/ship/files/FileProcessorSuite.scala +++ b/ship/src/test/scala/ch/epfl/bluebrain/nexus/ship/files/FileProcessorSuite.scala @@ -1,8 +1,7 @@ package ch.epfl.bluebrain.nexus.ship.files -import akka.http.scaladsl.model.{ContentTypes, MediaTypes, Uri} +import akka.http.scaladsl.model.{ContentTypes, MediaTypes} import ch.epfl.bluebrain.nexus.delta.kernel.http.MediaTypeDetectorConfig -import ch.epfl.bluebrain.nexus.ship.files.FileCopier.localDiskPath import ch.epfl.bluebrain.nexus.testkit.mu.NexusSuite class FileProcessorSuite extends NexusSuite { @@ -12,13 +11,6 @@ class FileProcessorSuite extends NexusSuite { "pdf" -> MediaTypes.`application/pdf` ) - test("Correctly decode a local path") { - val encoded = Uri.Path("org/proj/9/f/0/3/2/4/f/e/0925_Rhi13.3.13%20cell%201+2%20(superficial).asc") - val obtained = localDiskPath(encoded) - val expected = "org/proj/9/f/0/3/2/4/f/e/0925_Rhi13.3.13 cell 1+2 (superficial).asc" - assertEquals(obtained, expected) - } - test("Return a new content type matching the config") { assertEquals( FileProcessor.patchMediaType("file.json", None), From 566e436e3cbd9b62fa8b710e3a52effcbf106b8f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cristina=20E=2E=20Gonz=C3=A1lez-Espinoza?= Date: Tue, 19 Nov 2024 17:20:28 +0100 Subject: [PATCH 5/6] Added simulation properties isDraft and status to search index (#5237) * Added simulation properties isDraft and status to SynaptomeSimulation and SingleNeuronSimulation search index * remove quotes * typo --- tests/docker/config/search/construct-query.sparql | 6 ++++++ .../kg/search/data/single-neuron-simulation.json | 4 +++- .../resources/kg/search/data/synaptome-simulation.json | 4 +++- .../nexus/tests/kg/SearchConfigIndexingSpec.scala | 8 ++++++-- 4 files changed, 18 insertions(+), 4 deletions(-) diff --git a/tests/docker/config/search/construct-query.sparql b/tests/docker/config/search/construct-query.sparql index 5330085a2d..7149917c5a 100644 --- a/tests/docker/config/search/construct-query.sparql +++ b/tests/docker/config/search/construct-query.sparql @@ -242,6 +242,8 @@ CONSTRUCT { ?alias :singleNeuronSimulation ?singleNeuronSimulation . ?singleNeuronSimulation :injectionLocation ?injectionLocation . ?singleNeuronSimulation :recordingLocation ?recordingLocation . + ?singleNeuronSimulation :isDraft ?isDraft . + ?singleNeuronSimulation :status ?status . ?singleNeuronSimulation :emodel ?singleNeuronSimulationEmodelId . ?singleNeuronSimulationEmodelId :name ?singleNeuronSimulationEmodelName . ## SingleNeuronSynaptome @@ -254,6 +256,8 @@ CONSTRUCT { ?alias :synaptomeSimulation ?synaptomeSimulation . ?synaptomeSimulation :injectionLocation ?injectionLocation . ?synaptomeSimulation :recordingLocation ?recordingLocation . + ?synaptomeSimulation :isDraft ?isDraft . + ?synaptomeSimulation :status ?status . ?synaptomeSimulation :synaptome ?synaptomeId . ?synaptomeId :name ?synaptomeName . @@ -548,6 +552,8 @@ CONSTRUCT { # Simulations OPTIONAL { ?id bmo:injectionLocation ?injectionLocation . } . OPTIONAL { ?id nsg:recordingLocation ?recordingLocation . } . + OPTIONAL { ?id nsg:status ?status . } . + OPTIONAL { ?id bmo:isDraft ?isDraft . } . # Bouton density OPTIONAL { diff --git a/tests/src/test/resources/kg/search/data/single-neuron-simulation.json b/tests/src/test/resources/kg/search/data/single-neuron-simulation.json index 7df1467dd7..1a58f0e53b 100644 --- a/tests/src/test/resources/kg/search/data/single-neuron-simulation.json +++ b/tests/src/test/resources/kg/search/data/single-neuron-simulation.json @@ -26,5 +26,7 @@ "@id": "https://bbp.epfl.ch/data/emodel", "@type": "EModel" }, - "recordingLocation": ["dendrite.01A", "dendrite.01B"] + "recordingLocation": ["dendrite.01A", "dendrite.01B"], + "isDraft": true, + "status": "validated" } \ No newline at end of file diff --git a/tests/src/test/resources/kg/search/data/synaptome-simulation.json b/tests/src/test/resources/kg/search/data/synaptome-simulation.json index 69371e2c1f..c9f8fe2d35 100644 --- a/tests/src/test/resources/kg/search/data/synaptome-simulation.json +++ b/tests/src/test/resources/kg/search/data/synaptome-simulation.json @@ -44,5 +44,7 @@ "used": { "@id": "https://bbp.epfl.ch/data/synapse/single-neuron-synaptome", "@type": "SingleNeuronSynaptome" - } + }, + "isDraft": false, + "status": "Done" } \ No newline at end of file diff --git a/tests/src/test/scala/ch/epfl/bluebrain/nexus/tests/kg/SearchConfigIndexingSpec.scala b/tests/src/test/scala/ch/epfl/bluebrain/nexus/tests/kg/SearchConfigIndexingSpec.scala index e1902b4085..3c5a2e36c5 100644 --- a/tests/src/test/scala/ch/epfl/bluebrain/nexus/tests/kg/SearchConfigIndexingSpec.scala +++ b/tests/src/test/scala/ch/epfl/bluebrain/nexus/tests/kg/SearchConfigIndexingSpec.scala @@ -1117,7 +1117,9 @@ class SearchConfigIndexingSpec extends BaseIntegrationSpec { "singleNeuronSimulation": { "injectionLocation": "dendrite.01B", "recordingLocation": ["dendrite.01A", "dendrite.01B"], - "emodel": { "@id" : "https://bbp.epfl.ch/data/emodel", "name" : "EM__fa285b7__dSTUT__15"} + "emodel": { "@id" : "https://bbp.epfl.ch/data/emodel", "name" : "EM__fa285b7__dSTUT__15"}, + "isDraft": true, + "status": "validated" } }""" @@ -1146,7 +1148,9 @@ class SearchConfigIndexingSpec extends BaseIntegrationSpec { "synaptomeSimulation": { "injectionLocation": "soma[0]", "recordingLocation": ["soma[0]_0.5", "dend[38]_0.5", "dend[1]_0.4", "apic[53]_0.3"], - "synaptome": { "@id" : "https://bbp.epfl.ch/data/synapse/single-neuron-synaptome", "name" : "synaptome-model-04"} + "synaptome": { "@id" : "https://bbp.epfl.ch/data/synapse/single-neuron-synaptome", "name" : "synaptome-model-04"}, + "isDraft": false, + "status": "Done" } }""" From 84f2cbffba05c3c30532c26e3de0b6b6df8379e9 Mon Sep 17 00:00:00 2001 From: Simon Date: Tue, 19 Nov 2024 18:13:07 +0100 Subject: [PATCH 6/6] Decrement revision on update for tagged resolvers (#5239) Co-authored-by: Simon Dumas --- .../nexus/ship/resolvers/ResolverProcessor.scala | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/ship/src/main/scala/ch/epfl/bluebrain/nexus/ship/resolvers/ResolverProcessor.scala b/ship/src/main/scala/ch/epfl/bluebrain/nexus/ship/resolvers/ResolverProcessor.scala index de6c4d0092..c30589b549 100644 --- a/ship/src/main/scala/ch/epfl/bluebrain/nexus/ship/resolvers/ResolverProcessor.scala +++ b/ship/src/main/scala/ch/epfl/bluebrain/nexus/ship/resolvers/ResolverProcessor.scala @@ -49,12 +49,16 @@ class ResolverProcessor private ( case ResolverUpdated(_, _, value, _, _, _, _) => implicit val caller: Caller = Caller(s, identities(value)) val patched = patchValue(value, projectMapper, iriPatcher) - resolvers.update(id, projectRef, cRev, patched) + resolvers.update(id, projectRef, cRev, patched).recoverWith { case IncorrectRev(_, expectedRev) => + resolvers.update(id, projectRef, expectedRev, patched) + } case _: ResolverTagAdded => // Tags have been removed IO.unit case _: ResolverDeprecated => - resolvers.deprecate(id, projectRef, cRev) + resolvers.deprecate(id, projectRef, cRev).recoverWith { case IncorrectRev(_, expectedRev) => + resolvers.deprecate(id, projectRef, expectedRev) + } } }.redeemWith( {