diff --git a/delta/app/src/test/scala/ch/epfl/bluebrain/nexus/delta/dependency/PostgresServiceDependencySpec.scala b/delta/app/src/test/scala/ch/epfl/bluebrain/nexus/delta/dependency/PostgresServiceDependencySpec.scala index 62fb05f468..4e04001c07 100644 --- a/delta/app/src/test/scala/ch/epfl/bluebrain/nexus/delta/dependency/PostgresServiceDependencySpec.scala +++ b/delta/app/src/test/scala/ch/epfl/bluebrain/nexus/delta/dependency/PostgresServiceDependencySpec.scala @@ -9,7 +9,7 @@ class PostgresServiceDependencySpec extends CatsEffectSpec with DoobieScalaTestF "PostgresServiceDependency" should { "fetch its service name and version" in { - new PostgresServiceDependency(xas).serviceDescription.accepted shouldEqual ServiceDescription("postgres", "16.4") + new PostgresServiceDependency(xas).serviceDescription.accepted shouldEqual ServiceDescription("postgres", "16.5") } } diff --git a/delta/plugins/storage/src/test/scala/ch/epfl/bluebrain/nexus/delta/plugins/storage/storages/operations/s3/S3Helpers.scala b/delta/plugins/storage/src/test/scala/ch/epfl/bluebrain/nexus/delta/plugins/storage/storages/operations/s3/S3Helpers.scala index 4fc532ab61..4f79a9eb37 100644 --- a/delta/plugins/storage/src/test/scala/ch/epfl/bluebrain/nexus/delta/plugins/storage/storages/operations/s3/S3Helpers.scala +++ b/delta/plugins/storage/src/test/scala/ch/epfl/bluebrain/nexus/delta/plugins/storage/storages/operations/s3/S3Helpers.scala @@ -39,9 +39,13 @@ trait S3Helpers { self: Generators => def givenAFileInABucket(bucket: String, contents: String)( test: String => IO[Unit] + )(implicit client: S3StorageClient): IO[Unit] = + givenAFileInABucket(bucket, genString(), contents)(test) + + def givenAFileInABucket(bucket: String, key: String, contents: String)( + test: String => IO[Unit] )(implicit client: S3StorageClient): IO[Unit] = { val bytes = contents.getBytes(StandardCharsets.UTF_8) - val key = genString() val put = PutObjectRequest(bucket, key, Some(ContentTypes.`text/plain(UTF-8)`), bytes.length.toLong) client.uploadFile(put, Stream.emit(ByteBuffer.wrap(bytes))) >> test(key) } diff --git a/delta/plugins/storage/src/test/scala/ch/epfl/bluebrain/nexus/delta/plugins/storage/storages/operations/s3/client/S3StorageClientSuite.scala b/delta/plugins/storage/src/test/scala/ch/epfl/bluebrain/nexus/delta/plugins/storage/storages/operations/s3/client/S3StorageClientSuite.scala index d46153ee9c..548b5055e9 100644 --- a/delta/plugins/storage/src/test/scala/ch/epfl/bluebrain/nexus/delta/plugins/storage/storages/operations/s3/client/S3StorageClientSuite.scala +++ b/delta/plugins/storage/src/test/scala/ch/epfl/bluebrain/nexus/delta/plugins/storage/storages/operations/s3/client/S3StorageClientSuite.scala @@ -24,6 +24,25 @@ class S3StorageClientSuite extends NexusSuite with LocalStackS3StorageClient.Fix override def munitFixtures: Seq[AnyFixture[_]] = List(localStackS3Client) + test("Copy a file containing special characters between buckets") { + givenAnS3Bucket { bucket => + givenAnS3Bucket { targetBucket => + val options = CopyOptions(overwriteTarget = false, None) + val key = "/org/proj/9/f/0/3/2/4/f/e/0925_Rhi13.3.13 cell 1+2 (superficial).asc" + givenAFileInABucket(bucket, key, fileContents) { _ => + for { + result <- s3StorageClient.copyObject(bucket, key, targetBucket, key, options) + head <- s3StorageClient.headObject(targetBucket, key) + } yield { + assertEquals(result, S3OperationResult.Success) + assertEquals(head.fileSize, contentLength) + assertEquals(head.contentType, Some(expectedContentType)) + } + } + } + } + } + test("Copy the file to its new location if none is already there without a content type") { givenAnS3Bucket { bucket => val options = CopyOptions(overwriteTarget = false, None) diff --git a/delta/testkit/src/main/resources/logback-test.xml b/delta/testkit/src/main/resources/logback-test.xml index acf0b16f18..7b20ecefea 100644 --- a/delta/testkit/src/main/resources/logback-test.xml +++ b/delta/testkit/src/main/resources/logback-test.xml @@ -15,4 +15,7 @@ + + + \ No newline at end of file diff --git a/delta/testkit/src/main/scala/ch/epfl/bluebrain/nexus/testkit/postgres/PostgresContainer.scala b/delta/testkit/src/main/scala/ch/epfl/bluebrain/nexus/testkit/postgres/PostgresContainer.scala index d0067d84f7..3e2f61ebd0 100644 --- a/delta/testkit/src/main/scala/ch/epfl/bluebrain/nexus/testkit/postgres/PostgresContainer.scala +++ b/delta/testkit/src/main/scala/ch/epfl/bluebrain/nexus/testkit/postgres/PostgresContainer.scala @@ -9,7 +9,7 @@ import scala.concurrent.duration.DurationInt import scala.jdk.DurationConverters.ScalaDurationOps class PostgresContainer(user: String, password: String, database: String) - extends GenericContainer[PostgresContainer](DockerImageName.parse("library/postgres:16.4")) { + extends GenericContainer[PostgresContainer](DockerImageName.parse("library/postgres:16.5")) { addEnv("POSTGRES_USER", user) addEnv("POSTGRES_PASSWORD", password) addEnv("POSTGRES_DB222", database) diff --git a/docs/src/main/paradox/docs/delta/api/assets/version.json b/docs/src/main/paradox/docs/delta/api/assets/version.json index ed1df9bdff..135302ae3b 100644 --- a/docs/src/main/paradox/docs/delta/api/assets/version.json +++ b/docs/src/main/paradox/docs/delta/api/assets/version.json @@ -3,7 +3,7 @@ "delta": "1.10.0", "dependencies": { "blazegraph": "2.1.6-RC-21-jre", - "postgresql": "16.4", + "postgresql": "16.5", "elasticsearch": "8.15.1" }, "plugins": { diff --git a/docs/src/main/paradox/docs/running-nexus/docker/docker-compose.yaml b/docs/src/main/paradox/docs/running-nexus/docker/docker-compose.yaml index ce189a1c8b..c548fbecab 100644 --- a/docs/src/main/paradox/docs/running-nexus/docker/docker-compose.yaml +++ b/docs/src/main/paradox/docs/running-nexus/docker/docker-compose.yaml @@ -44,7 +44,7 @@ services: memory: 512M postgres: - image: library/postgres:16.4 + image: library/postgres:16.5 environment: POSTGRES_USER: "postgres" POSTGRES_PASSWORD: "postgres" diff --git a/docs/src/main/paradox/docs/running-nexus/index.md b/docs/src/main/paradox/docs/running-nexus/index.md index 5c8941dab3..25b31cad6b 100644 --- a/docs/src/main/paradox/docs/running-nexus/index.md +++ b/docs/src/main/paradox/docs/running-nexus/index.md @@ -106,7 +106,7 @@ $ curl http://localhost/v1/version | jq "dependencies": { "blazegraph": "2.1.6-SNAPSHOT", "elasticsearch": "8.15.1", - "postgres": "16.4" + "postgres": "16.5" }, "environment": "dev", "plugins": { diff --git a/ship/src/main/scala/ch/epfl/bluebrain/nexus/ship/files/FileCopier.scala b/ship/src/main/scala/ch/epfl/bluebrain/nexus/ship/files/FileCopier.scala index d2cb280ebb..dc48927a32 100644 --- a/ship/src/main/scala/ch/epfl/bluebrain/nexus/ship/files/FileCopier.scala +++ b/ship/src/main/scala/ch/epfl/bluebrain/nexus/ship/files/FileCopier.scala @@ -3,6 +3,7 @@ package ch.epfl.bluebrain.nexus.ship.files import akka.http.scaladsl.model.Uri import akka.http.scaladsl.model.Uri.Path import cats.effect.IO +import cats.syntax.all._ import ch.epfl.bluebrain.nexus.delta.kernel.RetryStrategy.logError import ch.epfl.bluebrain.nexus.delta.kernel.utils.UrlUtils import ch.epfl.bluebrain.nexus.delta.kernel.{Logger, RetryStrategy} @@ -16,11 +17,13 @@ import ch.epfl.bluebrain.nexus.ship.files.FileCopier.FileCopyResult import ch.epfl.bluebrain.nexus.ship.files.FileCopier.FileCopyResult.{FileCopySkipped, FileCopySuccess} import software.amazon.awssdk.services.s3.model.S3Exception +import java.net.URI +import java.nio.file.Paths import scala.concurrent.duration.DurationInt trait FileCopier { - def copyFile(project: ProjectRef, attributes: FileAttributes): IO[FileCopyResult] + def copyFile(project: ProjectRef, attributes: FileAttributes, localOrigin: Boolean): IO[FileCopyResult] } @@ -37,6 +40,8 @@ object FileCopier { logError(logger, "s3Copy") ) + def localDiskPath(relative: Path): String = Paths.get(URI.create(s"file:/$relative")).toString.drop(1) + sealed trait FileCopyResult extends Product with Serializable object FileCopyResult { @@ -47,6 +52,33 @@ object FileCopier { } + def computeOriginKey( + s3StorageClient: S3StorageClient, + importBucket: String, + path: Path, + localOrigin: Boolean + ): IO[Option[String]] = { + def exists(key: String) = s3StorageClient.objectExists(importBucket, key).flatMap { + case true => IO.some(key) + case false => + s3StorageClient + .listObjectsV2(importBucket, key) + .map(_.hasContents) + .flatMap { isFolder => + IO.whenA(isFolder) { + logger.info(s"'$key' has been found to be a folder, skipping the file copy...") + } + } + .as(None) + } + + val decodedKey = if (localOrigin) localDiskPath(path) else UrlUtils.decode(path) + exists(decodedKey).flatMap { + case Some(key) => IO.some(key) + case None => exists(path.toString()) + } + } + def apply( s3StorageClient: S3StorageClient, config: FileProcessingConfig @@ -54,19 +86,17 @@ object FileCopier { val importBucket = config.importBucket val targetBucket = config.targetBucket val locationGenerator = new S3LocationGenerator(config.prefix.getOrElse(Path.Empty)) - (project: ProjectRef, attributes: FileAttributes) => + (project: ProjectRef, attributes: FileAttributes, localOrigin: Boolean) => { - val origin = attributes.path + val path = attributes.path val patchedFileName = if (attributes.filename.isEmpty) "file" else attributes.filename val target = locationGenerator.file(project, attributes.uuid, patchedFileName).path val FIVE_GB = 5_000_000_000L - val originKey = UrlUtils.decode(origin) - val targetKey = UrlUtils.decode(target) - + val targetKey = UrlUtils.decode(target) val copyOptions = CopyOptions(overwriteTarget = false, attributes.mediaType) - def copy = { + def copy(originKey: String) = { if (attributes.bytes >= FIVE_GB) { logger.info(s"Attempting to copy a large file from $importBucket/$originKey to $targetBucket/$targetKey") >> s3StorageClient.copyObjectMultiPart(importBucket, originKey, targetBucket, targetKey, copyOptions) @@ -82,18 +112,16 @@ object FileCopier { } for { - isObject <- s3StorageClient.objectExists(importBucket, originKey) - isFolder <- - if (isObject) IO.pure(false) else s3StorageClient.listObjectsV2(importBucket, originKey).map(_.hasContents) - _ <- IO.whenA(isObject) { copy } - _ <- IO.whenA(isFolder) { logger.info(s"$target has been found to be a folder, skipping the file copy...") } - _ <- IO.whenA(!isFolder && !isObject) { - logger.error(s"$target is neither an object or folder, something is wrong.") - } - } yield if (isObject) FileCopySuccess(target) else FileCopySkipped + originKey <- computeOriginKey(s3StorageClient, importBucket, path, localOrigin) + _ <- originKey.traverse(copy) + _ <- IO.whenA(originKey.isEmpty) { + logger.error(s"$path is neither an object or folder, something is wrong.") + } + } yield if (originKey.isDefined) FileCopySuccess(target) else FileCopySkipped }.retry(copyRetryStrategy) } - def apply(): FileCopier = (_: ProjectRef, attributes: FileAttributes) => IO.pure(FileCopySuccess(attributes.path)) + def apply(): FileCopier = (_: ProjectRef, attributes: FileAttributes, _: Boolean) => + IO.pure(FileCopySuccess(attributes.path)) } diff --git a/ship/src/main/scala/ch/epfl/bluebrain/nexus/ship/files/FileProcessor.scala b/ship/src/main/scala/ch/epfl/bluebrain/nexus/ship/files/FileProcessor.scala index abc1510edc..6845a4eb7f 100644 --- a/ship/src/main/scala/ch/epfl/bluebrain/nexus/ship/files/FileProcessor.scala +++ b/ship/src/main/scala/ch/epfl/bluebrain/nexus/ship/files/FileProcessor.scala @@ -13,6 +13,7 @@ import ch.epfl.bluebrain.nexus.delta.plugins.storage.files.model._ import ch.epfl.bluebrain.nexus.delta.plugins.storage.files.{Files, MediaTypeDetector} import ch.epfl.bluebrain.nexus.delta.plugins.storage.storages.FetchStorage import ch.epfl.bluebrain.nexus.delta.plugins.storage.storages.model.Storage +import ch.epfl.bluebrain.nexus.delta.plugins.storage.storages.model.StorageType.DiskStorage import ch.epfl.bluebrain.nexus.delta.plugins.storage.storages.operations.LinkFileAction import ch.epfl.bluebrain.nexus.delta.plugins.storage.storages.operations.s3.client.S3StorageClient import ch.epfl.bluebrain.nexus.delta.rdf.IriOrBNode @@ -74,7 +75,7 @@ class FileProcessor private ( val newMediaType = patchMediaType(attrs.filename, attrs.mediaType) val newAttrs = e.attributes.copy(mediaType = newMediaType) val customMetadata = Some(getCustomMetadata(newAttrs)) - fileCopier.copyFile(e.project, newAttrs).flatMap { + fileCopier.copyFile(e.project, newAttrs, e.storageType == DiskStorage).flatMap { case FileCopySuccess(newPath) => val linkRequest = FileLinkRequest(newPath, newMediaType, customMetadata) files @@ -87,7 +88,7 @@ class FileProcessor private ( val newMediaType = patchMediaType(attrs.filename, attrs.mediaType) val newAttrs = e.attributes.copy(mediaType = newMediaType) val customMetadata = Some(getCustomMetadata(newAttrs)) - fileCopier.copyFile(e.project, newAttrs).flatMap { + fileCopier.copyFile(e.project, newAttrs, e.storageType == DiskStorage).flatMap { case FileCopySuccess(newPath) => val linkRequest = FileLinkRequest(newPath, newMediaType, customMetadata) files diff --git a/ship/src/main/scala/ch/epfl/bluebrain/nexus/ship/resolvers/ResolverProcessor.scala b/ship/src/main/scala/ch/epfl/bluebrain/nexus/ship/resolvers/ResolverProcessor.scala index de6c4d0092..c30589b549 100644 --- a/ship/src/main/scala/ch/epfl/bluebrain/nexus/ship/resolvers/ResolverProcessor.scala +++ b/ship/src/main/scala/ch/epfl/bluebrain/nexus/ship/resolvers/ResolverProcessor.scala @@ -49,12 +49,16 @@ class ResolverProcessor private ( case ResolverUpdated(_, _, value, _, _, _, _) => implicit val caller: Caller = Caller(s, identities(value)) val patched = patchValue(value, projectMapper, iriPatcher) - resolvers.update(id, projectRef, cRev, patched) + resolvers.update(id, projectRef, cRev, patched).recoverWith { case IncorrectRev(_, expectedRev) => + resolvers.update(id, projectRef, expectedRev, patched) + } case _: ResolverTagAdded => // Tags have been removed IO.unit case _: ResolverDeprecated => - resolvers.deprecate(id, projectRef, cRev) + resolvers.deprecate(id, projectRef, cRev).recoverWith { case IncorrectRev(_, expectedRev) => + resolvers.deprecate(id, projectRef, expectedRev) + } } }.redeemWith( { diff --git a/ship/src/test/resources/import/file-import/000000001.json b/ship/src/test/resources/import/file-import/000000001.json index 9f39d01b1e..fa42a74a93 100644 --- a/ship/src/test/resources/import/file-import/000000001.json +++ b/ship/src/test/resources/import/file-import/000000001.json @@ -4,4 +4,8 @@ {"ordering":4,"type":"file" ,"org":"public","project":"sscx","id":"https://bbp.epfl.ch/neurosciencegraph/data/old-path","rev":3,"value":{"id": "https://bbp.epfl.ch/neurosciencegraph/data/old-path", "rev": 3, "@type": "FileUpdated", "instant": "2020-09-24T09:58:43.479Z", "project": "public/sscx", "storage": "https://bbp.epfl.ch/neurosciencegraph/data/storage?rev=1", "subject": {"@type": "User", "realm": "bbp", "subject": "bob"}, "attributes": {"path": "75b85666-b66f-4d90-8fd2-c6fb04beb5c6/8/9/5/4/c/3/e/c/002_160120B3_OH_updated.nwb", "uuid": "8954c3ec-da81-47b9-bcec-b72a1706a6a3", "bytes": 10701815, "digest": {"@type": "NotComputedDigest"}, "origin": "Storage", "filename": "002_160120B3_OH_updated.nwb", "location": "file:///path/nexus/75b85666-b66f-4d90-8fd2-c6fb04beb5c6/8/9/5/4/c/3/e/c/002_160120B3_updated.nwb", "mediaType": "application/object-stream"}, "storageType": "RemoteDiskStorage"},"instant":"2020-09-24T11:58:43.479+02:00"} {"ordering":5,"type":"file" ,"org":"public","project":"sscx","id":"https://bbp.epfl.ch/neurosciencegraph/data/empty-filename","rev":1,"value":{"id": "https://bbp.epfl.ch/neurosciencegraph/data/empty-filename", "rev": 1, "@type": "FileCreated", "instant": "2023-03-02T10:46:36.969Z", "project": "public/sscx", "storage": "https://bbp.epfl.ch/neurosciencegraph/data/storage?rev=1", "subject": {"@type": "User", "realm": "bbp", "subject": "alice"}, "attributes": {"path": "public/sscx/2/b/3/9/7/9/3/0/", "uuid": "2b397930-0f69-4dad-bf6a-51825e940e12", "bytes": 538, "digest": {"@type": "ComputedDigest", "value": "b39a754a0988ca1f62e04a34d70479e9610b87beab91c58766f80c6ef6f93f3d", "algorithm": "SHA-256"}, "origin": "Client", "filename": "", "location": "file:///path/public/sscx/2/b/3/9/7/9/3/0", "mediaType": "text/plain"}, "storageType": "RemoteDiskStorage"},"instant":"2023-03-02T11:46:36.969+01:00"} {"ordering":6,"type":"file" ,"org":"public","project":"sscx","id":"https://bbp.epfl.ch/neurosciencegraph/data/special-chars-filename","rev":1,"value":{"id": "https://bbp.epfl.ch/neurosciencegraph/data/special-chars-filename", "rev": 1, "@type": "FileCreated", "instant": "2023-03-02T10:46:36.969Z", "project": "public/sscx", "storage": "https://bbp.epfl.ch/neurosciencegraph/data/storage?rev=1", "subject": {"@type": "User", "realm": "bbp", "subject": "alice"}, "attributes": {"path": "public/sscx/1/2/3/4/5/6/7/8/special%20%5Bfile%5D.json", "uuid": "12345678-0f69-4dad-bf6a-51825e940e12", "bytes": 538, "digest": {"@type": "ComputedDigest", "value": "b39a754a0988ca1f62e04a34d70479e9610b87beab91c58766f80c6ef6f93f3d", "algorithm": "SHA-256"}, "origin": "Client", "filename": "special [file].json", "location": "file:///path/public/sscx/1/2/3/4/5/6/7/8/special%20%5Bfile%5D.json", "mediaType": "text/plain"}, "storageType": "RemoteDiskStorage"},"instant":"2023-03-02T11:46:36.969+01:00"} -{"ordering":7,"type":"file" ,"org":"public","project":"sscx","id":"https://bbp.epfl.ch/neurosciencegraph/data/directory","rev":1,"value":{"id": "https://bbp.epfl.ch/neurosciencegraph/data/directory", "rev": 1, "@type": "FileCreated", "instant": "2022-09-28T12:59:11.800Z", "project": "public/sscx", "storage": "https://bbp.epfl.ch/neurosciencegraph/data/storage?rev=1", "subject": {"@type": "User", "realm": "bbp", "subject": "akkaufma"}, "attributes": {"path": "public/sscx/0/d/8/b/7/b/3/7/test_linking", "uuid": "0d8b7b37-bf62-4576-a5b4-ba2398731b8f", "bytes": 0, "digest": {"@type": "NotComputedDigest"}, "origin": "Storage", "filename": "test_linking", "location": "file:///path/nexus/bbp/atlas/0/d/8/b/7/b/3/7/test_linking", "mediaType": "application/x-directory"}, "storageType": "RemoteDiskStorage"},"instant":"2022-09-28T14:59:11.8+02:00"} \ No newline at end of file +{"ordering":7,"type":"file" ,"org":"public","project":"sscx","id":"https://bbp.epfl.ch/neurosciencegraph/data/directory","rev":1,"value":{"id": "https://bbp.epfl.ch/neurosciencegraph/data/directory", "rev": 1, "@type": "FileCreated", "instant": "2022-09-28T12:59:11.800Z", "project": "public/sscx", "storage": "https://bbp.epfl.ch/neurosciencegraph/data/storage?rev=1", "subject": {"@type": "User", "realm": "bbp", "subject": "akkaufma"}, "attributes": {"path": "public/sscx/0/d/8/b/7/b/3/7/test_linking", "uuid": "0d8b7b37-bf62-4576-a5b4-ba2398731b8f", "bytes": 0, "digest": {"@type": "NotComputedDigest"}, "origin": "Storage", "filename": "test_linking", "location": "file:///path/nexus/bbp/atlas/0/d/8/b/7/b/3/7/test_linking", "mediaType": "application/x-directory"}, "storageType": "RemoteDiskStorage"},"instant":"2022-09-28T14:59:11.8+02:00"} +{"ordering":8,"type":"file" ,"org":"public","project":"sscx","id":"https://bbp.epfl.ch/neurosciencegraph/data/local-plus","rev":1,"value":{"id": "https://bbp.epfl.ch/neurosciencegraph/data/local-plus", "rev": 1, "@type": "FileCreated", "instant": "2022-10-14T12:59:11.800Z", "project": "public/sscx", "storage": "https://bbp.epfl.ch/neurosciencegraph/data/storage?rev=1", "subject": {"@type": "User", "realm": "bbp", "subject": "akkaufma"}, "attributes": {"path": "95b0ee1e-a6a5-43e9-85fb-938b3c38dfc0/9/f/0/3/2/4/f/e/0925_Rhi13.3.13%20cell%201+2%20(superficial).asc", "uuid": "9f0324fe-9aac-4d34-84f0-5a44cd278197", "bytes": 0, "digest": {"@type": "NotComputedDigest"}, "origin": "Storage", "filename": "0925_Rhi13.3.13 cell 1+2 (superficial).asc", "location": "file:///path/nexus/public/sscx/9/f/0/3/2/4/f/e", "mediaType": "application/octet-stream"}, "storageType": "DiskStorage"},"instant":"2022-10-14T14:59:11.8+02:00"} +{"ordering":9,"type":"file" ,"org":"public","project":"sscx","id":"https://bbp.epfl.ch/neurosciencegraph/data/local-space","rev":1,"value":{"id": "https://bbp.epfl.ch/neurosciencegraph/data/local-space", "rev": 1, "@type": "FileCreated", "instant": "2022-10-15T12:59:11.800Z", "project": "public/sscx", "storage": "https://bbp.epfl.ch/neurosciencegraph/data/storage?rev=1", "subject": {"@type": "User", "realm": "bbp", "subject": "akkaufma"}, "attributes": {"path": "public/sscx/9/2/f/f/f/2/1/6/566647353__Square%20-%200.5ms%20Subthreshold__stimulus__1.png", "uuid": "92fff216-73a1-48a6-acc8-3dadf17c26bb", "bytes": 0, "digest": {"@type": "NotComputedDigest"}, "origin": "Storage", "filename": "566647353__Square - 0.5ms Subthreshold__stimulus__1.png", "location": "file:///path/nexus/public/sscx/9/2/f/f/f/2/1/6", "mediaType": "application/octet-stream"}, "storageType": "DiskStorage"},"instant":"2022-10-14T14:59:11.8+02:00"} +{"ordering":10,"type":"file" ,"org":"public","project":"sscx","id":"https://bbp.epfl.ch/neurosciencegraph/data/local-equals","rev":1,"value":{"id": "https://bbp.epfl.ch/neurosciencegraph/data/local-equals", "rev": 1, "@type": "FileCreated", "instant": "2022-10-15T12:59:11.800Z", "project": "public/sscx", "storage": "https://bbp.epfl.ch/neurosciencegraph/data/storage?rev=1", "subject": {"@type": "User", "realm": "bbp", "subject": "akkaufma"}, "attributes": {"path": "public/sscx/9/b/1/9/8/5/f/1/emodel=dSTUT_L5BP__etype=dSTUT__mtype=L5BP__species=mouse__brain_region=Primary%20somatosensory%20area__iteration=final_emodel__seed=3__traces.pdf", "uuid": "9b1985f1-e75d-42d2-b101-e527241e2797", "bytes": 0, "digest": {"@type": "NotComputedDigest"}, "origin": "Storage", "filename": "emodel=dSTUT_L5BP__etype=dSTUT__mtype=L5BP__species=mouse__brain_region=Primary somatosensory area__iteration=final_emodel__seed=3__traces.pdf", "location": "file:///path/nexus/public/sscx/9/b/1/9/8/5/f/1", "mediaType": "application/octet-stream"}, "storageType": "DiskStorage"},"instant":"2022-10-14T14:59:11.8+02:00"} +{"ordering":11,"type":"file" ,"org":"public","project":"sscx","id":"https://bbp.epfl.ch/neurosciencegraph/data/special-chars-2","rev":1,"value":{"id": "https://bbp.epfl.ch/neurosciencegraph/data/special-chars-2", "rev": 1, "@type": "FileCreated", "instant": "2023-03-02T10:46:36.969Z", "project": "public/sscx", "storage": "https://bbp.epfl.ch/neurosciencegraph/data/storage?rev=1", "subject": {"@type": "User", "realm": "bbp", "subject": "alice"}, "attributes": {"path": "15849bfc-f2ef-4ddd-89cb-b4658eb1f4ab/5/4/6/7/8/a/0/9/%20P(marker_cre)_overlapping.csv", "uuid": "54678a09-571b-465d-93e7-12137d6ad434", "bytes": 4267, "digest": {"@type": "ComputedDigest", "value": "b39a754a0988ca1f62e04a34d70479e9610b87beab91c58766f80c6ef6f93f3d", "algorithm": "SHA-256"}, "origin": "Client", "filename": " P(marker_cre)_overlapping.csv", "location": "file:///path/public/sscx/15849bfc-f2ef-4ddd-89cb-b4658eb1f4ab/5/4/6/7/8/a/0/9/%20P(marker_cre)_overlapping.csv", "mediaType": "text/plain"}, "storageType": "RemoteDiskStorage"},"instant":"2023-03-02T11:46:36.969+01:00"} diff --git a/ship/src/test/scala/ch/epfl/bluebrain/nexus/ship/RunShipSuite.scala b/ship/src/test/scala/ch/epfl/bluebrain/nexus/ship/RunShipSuite.scala index 4f84257767..5517a7cb9a 100644 --- a/ship/src/test/scala/ch/epfl/bluebrain/nexus/ship/RunShipSuite.scala +++ b/ship/src/test/scala/ch/epfl/bluebrain/nexus/ship/RunShipSuite.scala @@ -1,6 +1,6 @@ package ch.epfl.bluebrain.nexus.ship -import akka.http.scaladsl.model.{ContentType, ContentTypes, MediaTypes} +import akka.http.scaladsl.model.{ContentType, ContentTypes, MediaTypes, Uri} import cats.effect.IO import ch.epfl.bluebrain.nexus.delta.kernel.Hex import ch.epfl.bluebrain.nexus.delta.kernel.utils.UrlUtils @@ -8,17 +8,17 @@ import ch.epfl.bluebrain.nexus.delta.plugins.storage.files.Files import ch.epfl.bluebrain.nexus.delta.plugins.storage.files.model.Digest.ComputedDigest import ch.epfl.bluebrain.nexus.delta.plugins.storage.files.model.FileRejection.FileNotFound import ch.epfl.bluebrain.nexus.delta.plugins.storage.files.model.{FileAttributes, FileState} -import ch.epfl.bluebrain.nexus.delta.plugins.storage.storages.model.DigestAlgorithm +import ch.epfl.bluebrain.nexus.delta.plugins.storage.storages.model.StorageType.DiskStorage +import ch.epfl.bluebrain.nexus.delta.plugins.storage.storages.model.{DigestAlgorithm, StorageType} import ch.epfl.bluebrain.nexus.delta.plugins.storage.storages.operations.s3.{LocalStackS3StorageClient, PutObjectRequest} import ch.epfl.bluebrain.nexus.delta.rdf.IriOrBNode.Iri import ch.epfl.bluebrain.nexus.delta.rdf.Vocabulary.nxv -import ch.epfl.bluebrain.nexus.delta.rdf.syntax.iriStringContextSyntax +import ch.epfl.bluebrain.nexus.delta.sdk.implicits._ import ch.epfl.bluebrain.nexus.delta.sdk.projects.Projects import ch.epfl.bluebrain.nexus.delta.sdk.resolvers.Resolvers import ch.epfl.bluebrain.nexus.delta.sdk.resources.Resources import ch.epfl.bluebrain.nexus.delta.sourcing.Transactors import ch.epfl.bluebrain.nexus.delta.sourcing.exporter.RowEvent -import ch.epfl.bluebrain.nexus.delta.sourcing.implicits._ import ch.epfl.bluebrain.nexus.delta.sourcing.model.{EntityType, Label, ProjectRef} import ch.epfl.bluebrain.nexus.delta.sourcing.offset.Offset import ch.epfl.bluebrain.nexus.delta.sourcing.postgres.Doobie @@ -26,6 +26,7 @@ import ch.epfl.bluebrain.nexus.delta.sourcing.state.ScopedStateGet import ch.epfl.bluebrain.nexus.ship.ImportReport.Statistics import ch.epfl.bluebrain.nexus.ship.RunShipSuite.{checkFor, expectedImportReport, fetchFileAttributes, getDistinctOrgProjects} import ch.epfl.bluebrain.nexus.ship.config.ShipConfigFixtures +import ch.epfl.bluebrain.nexus.ship.files.FileCopier.localDiskPath import ch.epfl.bluebrain.nexus.testkit.mu.NexusSuite import doobie.Get import doobie.syntax.all._ @@ -65,8 +66,19 @@ class RunShipSuite s3Client.uploadFile(put, Stream.emit(contentAsBuffer)) } - private def decodedFilePath(json: Json) = root.attributes.path.string.getOption(json).map(UrlUtils.decode) - private def fileContentType = root.attributes.mediaType.string.getOption(_) + private def decodedFilePath(json: Json) = { + root.storageType.as[StorageType].getOption(json).flatMap { + case DiskStorage => + root.attributes.path.as[Uri.Path].getOption(json).map { path => + localDiskPath(path).toString + } + case _ => + root.attributes.path.string.getOption(json).map(UrlUtils.decode) + } + + } + + private def fileContentType = root.attributes.mediaType.string.getOption(_) private def generatePhysicalFile(row: RowEvent) = IO.whenA(row.`type` == Files.entityType) { @@ -136,7 +148,8 @@ class RunShipSuite report.progress == Map(Projects.entityType -> Statistics(1L, 0L)) test("Import files in S3 and in the primary store") { - val textPlain = MediaTypes.`text/plain`.withMissingCharset + val textPlain = MediaTypes.`text/plain`.withMissingCharset + val applicationOctetStream = MediaTypes.`application/octet-stream` for { events <- eventsStream("import/file-import/000000001.json") report <- RunShip(events, s3Client, inputConfig, xas) @@ -166,13 +179,23 @@ class RunShipSuite _ <- checkFor("file", specialCharsId, xas).assertEquals(1) _ <- assertS3Object(specialCharsLocation, Some(textPlain)) _ <- assertFileAttributes(project, specialCharsId)(specialCharsLocation, "special [file].json", Some(textPlain)) + // Local file containing a plus + localPlusId = iri"https://bbp.epfl.ch/neurosciencegraph/data/local-plus" + localPlusLocation = "/prefix/public/sscx/files/9/f/0/3/2/4/f/e/0925_Rhi13.3.13 cell 1 2 (superficial).asc" + _ <- checkFor("file", localPlusId, xas).assertEquals(1) + _ <- assertS3Object(localPlusLocation, Some(applicationOctetStream)) + _ <- assertFileAttributes(project, localPlusId)( + localPlusLocation, + "0925_Rhi13.3.13 cell 1+2 (superficial).asc", + Some(applicationOctetStream) + ) // Directory, should be skipped directoryId = iri"https://bbp.epfl.ch/neurosciencegraph/data/directory" _ <- checkFor("file", directoryId, xas).assertEquals(0) - // Summary S3 check, 4 objects should have been imported in total - _ <- s3Client.listObjectsV2(targetBucket).map(_.keyCount().intValue()).assertEquals(4) + // Summary S3 check, 8 objects should have been imported in total + _ <- s3Client.listObjectsV2(targetBucket).map(_.keyCount().intValue()).assertEquals(8) // Summary report check, only the directory event should have been skipped - _ = assertEquals(report.progress(Files.entityType).success, 5L) + _ = assertEquals(report.progress(Files.entityType).success, 9L) _ = assertEquals(report.progress(Files.entityType).dropped, 1L) } yield () } diff --git a/ship/src/test/scala/ch/epfl/bluebrain/nexus/ship/files/FileCopierSuite.scala b/ship/src/test/scala/ch/epfl/bluebrain/nexus/ship/files/FileCopierSuite.scala new file mode 100644 index 0000000000..2a228c1a1f --- /dev/null +++ b/ship/src/test/scala/ch/epfl/bluebrain/nexus/ship/files/FileCopierSuite.scala @@ -0,0 +1,125 @@ +package ch.epfl.bluebrain.nexus.ship.files + +import akka.http.scaladsl.model.Uri +import cats.effect.IO +import ch.epfl.bluebrain.nexus.delta.plugins.storage.storages.StoragesConfig.S3StorageConfig +import ch.epfl.bluebrain.nexus.delta.plugins.storage.storages.operations.s3.client.S3StorageClient +import ch.epfl.bluebrain.nexus.delta.plugins.storage.storages.operations.s3.{LocalStackS3StorageClient, S3Helpers} +import ch.epfl.bluebrain.nexus.ship.files.FileCopier.localDiskPath +import ch.epfl.bluebrain.nexus.testkit.mu.NexusSuite +import io.laserdisc.pure.s3.tagless.S3AsyncClientOp +import munit.AnyFixture + +class FileCopierSuite extends NexusSuite with LocalStackS3StorageClient.Fixture with S3Helpers { + + override def munitFixtures: Seq[AnyFixture[_]] = List(localStackS3Client) + implicit private lazy val (s3Client: S3StorageClient, underlying: S3AsyncClientOp[IO], _: S3StorageConfig) = + localStackS3Client() + + private val fileContents = "file content" + + test("Correctly decode a local path") { + val encoded = Uri.Path("org/proj/9/f/0/3/2/4/f/e/0925_Rhi13.3.13%20cell%201+2%20(superficial).asc") + val obtained = localDiskPath(encoded) + val expected = "org/proj/9/f/0/3/2/4/f/e/0925_Rhi13.3.13 cell 1+2 (superficial).asc" + assertEquals(obtained, expected) + } + + test("Correctly decode another local path with a space") { + val encoded = Uri.Path("org/proj/9/2/f/f/f/2/1/6/566647353__Square%20-%200.5ms%20Subthreshold__stimulus__1.png") + val obtained = localDiskPath(encoded) + val expected = "org/proj/9/2/f/f/f/2/1/6/566647353__Square - 0.5ms Subthreshold__stimulus__1.png" + assertEquals(obtained, expected) + } + + test("Should find a file from a decoded path") { + givenAnS3Bucket { bucket => + val path = Uri.Path( + "bbp/ncmv3/1/a/e/4/1/a/b/6/EMC__emodel=L5_TPC:B_cAC__ttype=182_L45%20IT%20CTX__species=mouse__brain_region=SS__iteration=mettypesv12_1.json" + ) + val key = + "bbp/ncmv3/1/a/e/4/1/a/b/6/EMC__emodel=L5_TPC:B_cAC__ttype=182_L45 IT CTX__species=mouse__brain_region=SS__iteration=mettypesv12_1.json" + givenAFileInABucket(bucket, key, fileContents) { _ => + FileCopier + .computeOriginKey(s3Client, bucket, path, localOrigin = false) + .assertEquals( + Some(key) + ) + } + } + } + + test("Should handle correctly path from local origin") { + givenAnS3Bucket { bucket => + val path = Uri.Path("27554ab5-20f4-4973-91f6-0b2d990cea69/b/e/4/9/b/5/5/b/Log2(CPM(Exon+intron)+1)_Visp_Pyr.csv") + val key = "27554ab5-20f4-4973-91f6-0b2d990cea69/b/e/4/9/b/5/5/b/Log2(CPM(Exon+intron)+1)_Visp_Pyr.csv" + givenAFileInABucket(bucket, key, fileContents) { _ => + FileCopier + .computeOriginKey(s3Client, bucket, path, localOrigin = true) + .assertEquals( + Some(key) + ) + } + } + } + + test("Should handle correctly another path from local origin") { + givenAnS3Bucket { bucket => + val path = Uri.Path( + "95b0ee1e-a6a5-43e9-85fb-938b3c38dfc0/9/f/0/3/2/4/f/e/0925_Rhi13.3.13%20cell%201+2%20(superficial).asc" + ) + val key = "95b0ee1e-a6a5-43e9-85fb-938b3c38dfc0/9/f/0/3/2/4/f/e/0925_Rhi13.3.13 cell 1+2 (superficial).asc" + givenAFileInABucket(bucket, key, fileContents) { _ => + FileCopier + .computeOriginKey(s3Client, bucket, path, localOrigin = true) + .assertEquals( + Some(key) + ) + } + } + } + + test("Should handle correctly yet another path from local origin") { + givenAnS3Bucket { bucket => + val path = Uri.Path( + "c1220611-7415-4476-baee-36e75f87bdeb/6/7/5/f/f/c/8/f/AIBS_morpho+ephys_data(for_Rat_coclustering).csv" + ) + val key = "c1220611-7415-4476-baee-36e75f87bdeb/6/7/5/f/f/c/8/f/AIBS_morpho+ephys_data(for_Rat_coclustering).csv" + givenAFileInABucket(bucket, key, fileContents) { _ => + FileCopier + .computeOriginKey(s3Client, bucket, path, localOrigin = true) + .assertEquals( + Some(key) + ) + } + } + } + + test("Should fallback to the encoded path if the decoded is not found") { + givenAnS3Bucket { bucket => + val path = Uri.Path("15849bfc-f2ef-4ddd-89cb-b4658eb1f4ab/5/4/6/7/8/a/0/9/%20P(marker_cre)_overlapping.csv") + val key = "15849bfc-f2ef-4ddd-89cb-b4658eb1f4ab/5/4/6/7/8/a/0/9/%20P(marker_cre)_overlapping.csv" + givenAFileInABucket(bucket, key, fileContents) { _ => + FileCopier + .computeOriginKey(s3Client, bucket, path, localOrigin = false) + .assertEquals( + Some(key) + ) + } + } + } + + test("Should skip directories") { + givenAnS3Bucket { bucket => + val path = Uri.Path( + "bbp/ncmv3/1/a/e/4/1/a/b/6/EMC__emodel=L5_TPC:B_cAC__ttype=182_L45%20IT%20CTX__species=mouse__brain_region=SS__iteration=mettypesv12_1.json" + ) + val key = + "bbp/ncmv3/1/a/e/4/1/a/b/6/EMC__emodel=L5_TPC:B_cAC__ttype=182_L45 IT CTX__species=mouse__brain_region=SS__iteration=mettypesv12_1.json/pouet.json" + givenAFileInABucket(bucket, key, fileContents) { _ => + FileCopier.computeOriginKey(s3Client, bucket, path, localOrigin = false).assertEquals(None) + } + } + } + +} diff --git a/tests/docker/config/search/construct-query.sparql b/tests/docker/config/search/construct-query.sparql index 5330085a2d..7149917c5a 100644 --- a/tests/docker/config/search/construct-query.sparql +++ b/tests/docker/config/search/construct-query.sparql @@ -242,6 +242,8 @@ CONSTRUCT { ?alias :singleNeuronSimulation ?singleNeuronSimulation . ?singleNeuronSimulation :injectionLocation ?injectionLocation . ?singleNeuronSimulation :recordingLocation ?recordingLocation . + ?singleNeuronSimulation :isDraft ?isDraft . + ?singleNeuronSimulation :status ?status . ?singleNeuronSimulation :emodel ?singleNeuronSimulationEmodelId . ?singleNeuronSimulationEmodelId :name ?singleNeuronSimulationEmodelName . ## SingleNeuronSynaptome @@ -254,6 +256,8 @@ CONSTRUCT { ?alias :synaptomeSimulation ?synaptomeSimulation . ?synaptomeSimulation :injectionLocation ?injectionLocation . ?synaptomeSimulation :recordingLocation ?recordingLocation . + ?synaptomeSimulation :isDraft ?isDraft . + ?synaptomeSimulation :status ?status . ?synaptomeSimulation :synaptome ?synaptomeId . ?synaptomeId :name ?synaptomeName . @@ -548,6 +552,8 @@ CONSTRUCT { # Simulations OPTIONAL { ?id bmo:injectionLocation ?injectionLocation . } . OPTIONAL { ?id nsg:recordingLocation ?recordingLocation . } . + OPTIONAL { ?id nsg:status ?status . } . + OPTIONAL { ?id bmo:isDraft ?isDraft . } . # Bouton density OPTIONAL { diff --git a/tests/docker/docker-compose.yml b/tests/docker/docker-compose.yml index 30175d50ad..836dcc9202 100644 --- a/tests/docker/docker-compose.yml +++ b/tests/docker/docker-compose.yml @@ -142,7 +142,7 @@ services: - ./config:/config postgres: - image: library/postgres:16.4 + image: library/postgres:16.5 ports: - 5432:5432 environment: diff --git a/tests/src/test/resources/kg/search/data/single-neuron-simulation.json b/tests/src/test/resources/kg/search/data/single-neuron-simulation.json index 7df1467dd7..1a58f0e53b 100644 --- a/tests/src/test/resources/kg/search/data/single-neuron-simulation.json +++ b/tests/src/test/resources/kg/search/data/single-neuron-simulation.json @@ -26,5 +26,7 @@ "@id": "https://bbp.epfl.ch/data/emodel", "@type": "EModel" }, - "recordingLocation": ["dendrite.01A", "dendrite.01B"] + "recordingLocation": ["dendrite.01A", "dendrite.01B"], + "isDraft": true, + "status": "validated" } \ No newline at end of file diff --git a/tests/src/test/resources/kg/search/data/synaptome-simulation.json b/tests/src/test/resources/kg/search/data/synaptome-simulation.json index 69371e2c1f..c9f8fe2d35 100644 --- a/tests/src/test/resources/kg/search/data/synaptome-simulation.json +++ b/tests/src/test/resources/kg/search/data/synaptome-simulation.json @@ -44,5 +44,7 @@ "used": { "@id": "https://bbp.epfl.ch/data/synapse/single-neuron-synaptome", "@type": "SingleNeuronSynaptome" - } + }, + "isDraft": false, + "status": "Done" } \ No newline at end of file diff --git a/tests/src/test/scala/ch/epfl/bluebrain/nexus/tests/kg/SearchConfigIndexingSpec.scala b/tests/src/test/scala/ch/epfl/bluebrain/nexus/tests/kg/SearchConfigIndexingSpec.scala index e1902b4085..3c5a2e36c5 100644 --- a/tests/src/test/scala/ch/epfl/bluebrain/nexus/tests/kg/SearchConfigIndexingSpec.scala +++ b/tests/src/test/scala/ch/epfl/bluebrain/nexus/tests/kg/SearchConfigIndexingSpec.scala @@ -1117,7 +1117,9 @@ class SearchConfigIndexingSpec extends BaseIntegrationSpec { "singleNeuronSimulation": { "injectionLocation": "dendrite.01B", "recordingLocation": ["dendrite.01A", "dendrite.01B"], - "emodel": { "@id" : "https://bbp.epfl.ch/data/emodel", "name" : "EM__fa285b7__dSTUT__15"} + "emodel": { "@id" : "https://bbp.epfl.ch/data/emodel", "name" : "EM__fa285b7__dSTUT__15"}, + "isDraft": true, + "status": "validated" } }""" @@ -1146,7 +1148,9 @@ class SearchConfigIndexingSpec extends BaseIntegrationSpec { "synaptomeSimulation": { "injectionLocation": "soma[0]", "recordingLocation": ["soma[0]_0.5", "dend[38]_0.5", "dend[1]_0.4", "apic[53]_0.3"], - "synaptome": { "@id" : "https://bbp.epfl.ch/data/synapse/single-neuron-synaptome", "name" : "synaptome-model-04"} + "synaptome": { "@id" : "https://bbp.epfl.ch/data/synapse/single-neuron-synaptome", "name" : "synaptome-model-04"}, + "isDraft": false, + "status": "Done" } }"""