Skip to content

Commit

Permalink
Merge branch 'master' into 3284-allow-to-provision-acls-startup
Browse files Browse the repository at this point in the history
  • Loading branch information
imsdu authored Nov 20, 2024
2 parents 9a4b310 + 84f2cbf commit fb93580
Show file tree
Hide file tree
Showing 19 changed files with 268 additions and 43 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ class PostgresServiceDependencySpec extends CatsEffectSpec with DoobieScalaTestF
"PostgresServiceDependency" should {

"fetch its service name and version" in {
new PostgresServiceDependency(xas).serviceDescription.accepted shouldEqual ServiceDescription("postgres", "16.4")
new PostgresServiceDependency(xas).serviceDescription.accepted shouldEqual ServiceDescription("postgres", "16.5")
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,13 @@ trait S3Helpers { self: Generators =>

def givenAFileInABucket(bucket: String, contents: String)(
test: String => IO[Unit]
)(implicit client: S3StorageClient): IO[Unit] =
givenAFileInABucket(bucket, genString(), contents)(test)

def givenAFileInABucket(bucket: String, key: String, contents: String)(
test: String => IO[Unit]
)(implicit client: S3StorageClient): IO[Unit] = {
val bytes = contents.getBytes(StandardCharsets.UTF_8)
val key = genString()
val put = PutObjectRequest(bucket, key, Some(ContentTypes.`text/plain(UTF-8)`), bytes.length.toLong)
client.uploadFile(put, Stream.emit(ByteBuffer.wrap(bytes))) >> test(key)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,25 @@ class S3StorageClientSuite extends NexusSuite with LocalStackS3StorageClient.Fix

override def munitFixtures: Seq[AnyFixture[_]] = List(localStackS3Client)

test("Copy a file containing special characters between buckets") {
givenAnS3Bucket { bucket =>
givenAnS3Bucket { targetBucket =>
val options = CopyOptions(overwriteTarget = false, None)
val key = "/org/proj/9/f/0/3/2/4/f/e/0925_Rhi13.3.13 cell 1+2 (superficial).asc"
givenAFileInABucket(bucket, key, fileContents) { _ =>
for {
result <- s3StorageClient.copyObject(bucket, key, targetBucket, key, options)
head <- s3StorageClient.headObject(targetBucket, key)
} yield {
assertEquals(result, S3OperationResult.Success)
assertEquals(head.fileSize, contentLength)
assertEquals(head.contentType, Some(expectedContentType))
}
}
}
}
}

test("Copy the file to its new location if none is already there without a content type") {
givenAnS3Bucket { bucket =>
val options = CopyOptions(overwriteTarget = false, None)
Expand Down
3 changes: 3 additions & 0 deletions delta/testkit/src/main/resources/logback-test.xml
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,7 @@

<logger name="ch.epfl.bluebrain.nexus" level="ERROR" />

<!-- Uncomment to get some information of the S3 sdk underlying calls -->
<!-- <logger name="software.amazon.awssdk.request" level="DEBUG" />-->

</configuration>
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import scala.concurrent.duration.DurationInt
import scala.jdk.DurationConverters.ScalaDurationOps

class PostgresContainer(user: String, password: String, database: String)
extends GenericContainer[PostgresContainer](DockerImageName.parse("library/postgres:16.4")) {
extends GenericContainer[PostgresContainer](DockerImageName.parse("library/postgres:16.5")) {
addEnv("POSTGRES_USER", user)
addEnv("POSTGRES_PASSWORD", password)
addEnv("POSTGRES_DB222", database)
Expand Down
2 changes: 1 addition & 1 deletion docs/src/main/paradox/docs/delta/api/assets/version.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"delta": "1.10.0",
"dependencies": {
"blazegraph": "2.1.6-RC-21-jre",
"postgresql": "16.4",
"postgresql": "16.5",
"elasticsearch": "8.15.1"
},
"plugins": {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ services:
memory: 512M

postgres:
image: library/postgres:16.4
image: library/postgres:16.5
environment:
POSTGRES_USER: "postgres"
POSTGRES_PASSWORD: "postgres"
Expand Down
2 changes: 1 addition & 1 deletion docs/src/main/paradox/docs/running-nexus/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ $ curl http://localhost/v1/version | jq
"dependencies": {
"blazegraph": "2.1.6-SNAPSHOT",
"elasticsearch": "8.15.1",
"postgres": "16.4"
"postgres": "16.5"
},
"environment": "dev",
"plugins": {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package ch.epfl.bluebrain.nexus.ship.files
import akka.http.scaladsl.model.Uri
import akka.http.scaladsl.model.Uri.Path
import cats.effect.IO
import cats.syntax.all._
import ch.epfl.bluebrain.nexus.delta.kernel.RetryStrategy.logError
import ch.epfl.bluebrain.nexus.delta.kernel.utils.UrlUtils
import ch.epfl.bluebrain.nexus.delta.kernel.{Logger, RetryStrategy}
Expand All @@ -16,11 +17,13 @@ import ch.epfl.bluebrain.nexus.ship.files.FileCopier.FileCopyResult
import ch.epfl.bluebrain.nexus.ship.files.FileCopier.FileCopyResult.{FileCopySkipped, FileCopySuccess}
import software.amazon.awssdk.services.s3.model.S3Exception

import java.net.URI
import java.nio.file.Paths
import scala.concurrent.duration.DurationInt

trait FileCopier {

def copyFile(project: ProjectRef, attributes: FileAttributes): IO[FileCopyResult]
def copyFile(project: ProjectRef, attributes: FileAttributes, localOrigin: Boolean): IO[FileCopyResult]

}

Expand All @@ -37,6 +40,8 @@ object FileCopier {
logError(logger, "s3Copy")
)

def localDiskPath(relative: Path): String = Paths.get(URI.create(s"file:/$relative")).toString.drop(1)

sealed trait FileCopyResult extends Product with Serializable

object FileCopyResult {
Expand All @@ -47,26 +52,51 @@ object FileCopier {

}

def computeOriginKey(
s3StorageClient: S3StorageClient,
importBucket: String,
path: Path,
localOrigin: Boolean
): IO[Option[String]] = {
def exists(key: String) = s3StorageClient.objectExists(importBucket, key).flatMap {
case true => IO.some(key)
case false =>
s3StorageClient
.listObjectsV2(importBucket, key)
.map(_.hasContents)
.flatMap { isFolder =>
IO.whenA(isFolder) {
logger.info(s"'$key' has been found to be a folder, skipping the file copy...")
}
}
.as(None)
}

val decodedKey = if (localOrigin) localDiskPath(path) else UrlUtils.decode(path)
exists(decodedKey).flatMap {
case Some(key) => IO.some(key)
case None => exists(path.toString())
}
}

def apply(
s3StorageClient: S3StorageClient,
config: FileProcessingConfig
): FileCopier = {
val importBucket = config.importBucket
val targetBucket = config.targetBucket
val locationGenerator = new S3LocationGenerator(config.prefix.getOrElse(Path.Empty))
(project: ProjectRef, attributes: FileAttributes) =>
(project: ProjectRef, attributes: FileAttributes, localOrigin: Boolean) =>
{
val origin = attributes.path
val path = attributes.path
val patchedFileName = if (attributes.filename.isEmpty) "file" else attributes.filename
val target = locationGenerator.file(project, attributes.uuid, patchedFileName).path
val FIVE_GB = 5_000_000_000L

val originKey = UrlUtils.decode(origin)
val targetKey = UrlUtils.decode(target)

val targetKey = UrlUtils.decode(target)
val copyOptions = CopyOptions(overwriteTarget = false, attributes.mediaType)

def copy = {
def copy(originKey: String) = {
if (attributes.bytes >= FIVE_GB) {
logger.info(s"Attempting to copy a large file from $importBucket/$originKey to $targetBucket/$targetKey") >>
s3StorageClient.copyObjectMultiPart(importBucket, originKey, targetBucket, targetKey, copyOptions)
Expand All @@ -82,18 +112,16 @@ object FileCopier {
}

for {
isObject <- s3StorageClient.objectExists(importBucket, originKey)
isFolder <-
if (isObject) IO.pure(false) else s3StorageClient.listObjectsV2(importBucket, originKey).map(_.hasContents)
_ <- IO.whenA(isObject) { copy }
_ <- IO.whenA(isFolder) { logger.info(s"$target has been found to be a folder, skipping the file copy...") }
_ <- IO.whenA(!isFolder && !isObject) {
logger.error(s"$target is neither an object or folder, something is wrong.")
}
} yield if (isObject) FileCopySuccess(target) else FileCopySkipped
originKey <- computeOriginKey(s3StorageClient, importBucket, path, localOrigin)
_ <- originKey.traverse(copy)
_ <- IO.whenA(originKey.isEmpty) {
logger.error(s"$path is neither an object or folder, something is wrong.")
}
} yield if (originKey.isDefined) FileCopySuccess(target) else FileCopySkipped
}.retry(copyRetryStrategy)
}

def apply(): FileCopier = (_: ProjectRef, attributes: FileAttributes) => IO.pure(FileCopySuccess(attributes.path))
def apply(): FileCopier = (_: ProjectRef, attributes: FileAttributes, _: Boolean) =>
IO.pure(FileCopySuccess(attributes.path))

}
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import ch.epfl.bluebrain.nexus.delta.plugins.storage.files.model._
import ch.epfl.bluebrain.nexus.delta.plugins.storage.files.{Files, MediaTypeDetector}
import ch.epfl.bluebrain.nexus.delta.plugins.storage.storages.FetchStorage
import ch.epfl.bluebrain.nexus.delta.plugins.storage.storages.model.Storage
import ch.epfl.bluebrain.nexus.delta.plugins.storage.storages.model.StorageType.DiskStorage
import ch.epfl.bluebrain.nexus.delta.plugins.storage.storages.operations.LinkFileAction
import ch.epfl.bluebrain.nexus.delta.plugins.storage.storages.operations.s3.client.S3StorageClient
import ch.epfl.bluebrain.nexus.delta.rdf.IriOrBNode
Expand Down Expand Up @@ -74,7 +75,7 @@ class FileProcessor private (
val newMediaType = patchMediaType(attrs.filename, attrs.mediaType)
val newAttrs = e.attributes.copy(mediaType = newMediaType)
val customMetadata = Some(getCustomMetadata(newAttrs))
fileCopier.copyFile(e.project, newAttrs).flatMap {
fileCopier.copyFile(e.project, newAttrs, e.storageType == DiskStorage).flatMap {
case FileCopySuccess(newPath) =>
val linkRequest = FileLinkRequest(newPath, newMediaType, customMetadata)
files
Expand All @@ -87,7 +88,7 @@ class FileProcessor private (
val newMediaType = patchMediaType(attrs.filename, attrs.mediaType)
val newAttrs = e.attributes.copy(mediaType = newMediaType)
val customMetadata = Some(getCustomMetadata(newAttrs))
fileCopier.copyFile(e.project, newAttrs).flatMap {
fileCopier.copyFile(e.project, newAttrs, e.storageType == DiskStorage).flatMap {
case FileCopySuccess(newPath) =>
val linkRequest = FileLinkRequest(newPath, newMediaType, customMetadata)
files
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,12 +49,16 @@ class ResolverProcessor private (
case ResolverUpdated(_, _, value, _, _, _, _) =>
implicit val caller: Caller = Caller(s, identities(value))
val patched = patchValue(value, projectMapper, iriPatcher)
resolvers.update(id, projectRef, cRev, patched)
resolvers.update(id, projectRef, cRev, patched).recoverWith { case IncorrectRev(_, expectedRev) =>
resolvers.update(id, projectRef, expectedRev, patched)
}
case _: ResolverTagAdded =>
// Tags have been removed
IO.unit
case _: ResolverDeprecated =>
resolvers.deprecate(id, projectRef, cRev)
resolvers.deprecate(id, projectRef, cRev).recoverWith { case IncorrectRev(_, expectedRev) =>
resolvers.deprecate(id, projectRef, expectedRev)
}
}
}.redeemWith(
{
Expand Down
6 changes: 5 additions & 1 deletion ship/src/test/resources/import/file-import/000000001.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,8 @@
{"ordering":4,"type":"file" ,"org":"public","project":"sscx","id":"https://bbp.epfl.ch/neurosciencegraph/data/old-path","rev":3,"value":{"id": "https://bbp.epfl.ch/neurosciencegraph/data/old-path", "rev": 3, "@type": "FileUpdated", "instant": "2020-09-24T09:58:43.479Z", "project": "public/sscx", "storage": "https://bbp.epfl.ch/neurosciencegraph/data/storage?rev=1", "subject": {"@type": "User", "realm": "bbp", "subject": "bob"}, "attributes": {"path": "75b85666-b66f-4d90-8fd2-c6fb04beb5c6/8/9/5/4/c/3/e/c/002_160120B3_OH_updated.nwb", "uuid": "8954c3ec-da81-47b9-bcec-b72a1706a6a3", "bytes": 10701815, "digest": {"@type": "NotComputedDigest"}, "origin": "Storage", "filename": "002_160120B3_OH_updated.nwb", "location": "file:///path/nexus/75b85666-b66f-4d90-8fd2-c6fb04beb5c6/8/9/5/4/c/3/e/c/002_160120B3_updated.nwb", "mediaType": "application/object-stream"}, "storageType": "RemoteDiskStorage"},"instant":"2020-09-24T11:58:43.479+02:00"}
{"ordering":5,"type":"file" ,"org":"public","project":"sscx","id":"https://bbp.epfl.ch/neurosciencegraph/data/empty-filename","rev":1,"value":{"id": "https://bbp.epfl.ch/neurosciencegraph/data/empty-filename", "rev": 1, "@type": "FileCreated", "instant": "2023-03-02T10:46:36.969Z", "project": "public/sscx", "storage": "https://bbp.epfl.ch/neurosciencegraph/data/storage?rev=1", "subject": {"@type": "User", "realm": "bbp", "subject": "alice"}, "attributes": {"path": "public/sscx/2/b/3/9/7/9/3/0/", "uuid": "2b397930-0f69-4dad-bf6a-51825e940e12", "bytes": 538, "digest": {"@type": "ComputedDigest", "value": "b39a754a0988ca1f62e04a34d70479e9610b87beab91c58766f80c6ef6f93f3d", "algorithm": "SHA-256"}, "origin": "Client", "filename": "", "location": "file:///path/public/sscx/2/b/3/9/7/9/3/0", "mediaType": "text/plain"}, "storageType": "RemoteDiskStorage"},"instant":"2023-03-02T11:46:36.969+01:00"}
{"ordering":6,"type":"file" ,"org":"public","project":"sscx","id":"https://bbp.epfl.ch/neurosciencegraph/data/special-chars-filename","rev":1,"value":{"id": "https://bbp.epfl.ch/neurosciencegraph/data/special-chars-filename", "rev": 1, "@type": "FileCreated", "instant": "2023-03-02T10:46:36.969Z", "project": "public/sscx", "storage": "https://bbp.epfl.ch/neurosciencegraph/data/storage?rev=1", "subject": {"@type": "User", "realm": "bbp", "subject": "alice"}, "attributes": {"path": "public/sscx/1/2/3/4/5/6/7/8/special%20%5Bfile%5D.json", "uuid": "12345678-0f69-4dad-bf6a-51825e940e12", "bytes": 538, "digest": {"@type": "ComputedDigest", "value": "b39a754a0988ca1f62e04a34d70479e9610b87beab91c58766f80c6ef6f93f3d", "algorithm": "SHA-256"}, "origin": "Client", "filename": "special [file].json", "location": "file:///path/public/sscx/1/2/3/4/5/6/7/8/special%20%5Bfile%5D.json", "mediaType": "text/plain"}, "storageType": "RemoteDiskStorage"},"instant":"2023-03-02T11:46:36.969+01:00"}
{"ordering":7,"type":"file" ,"org":"public","project":"sscx","id":"https://bbp.epfl.ch/neurosciencegraph/data/directory","rev":1,"value":{"id": "https://bbp.epfl.ch/neurosciencegraph/data/directory", "rev": 1, "@type": "FileCreated", "instant": "2022-09-28T12:59:11.800Z", "project": "public/sscx", "storage": "https://bbp.epfl.ch/neurosciencegraph/data/storage?rev=1", "subject": {"@type": "User", "realm": "bbp", "subject": "akkaufma"}, "attributes": {"path": "public/sscx/0/d/8/b/7/b/3/7/test_linking", "uuid": "0d8b7b37-bf62-4576-a5b4-ba2398731b8f", "bytes": 0, "digest": {"@type": "NotComputedDigest"}, "origin": "Storage", "filename": "test_linking", "location": "file:///path/nexus/bbp/atlas/0/d/8/b/7/b/3/7/test_linking", "mediaType": "application/x-directory"}, "storageType": "RemoteDiskStorage"},"instant":"2022-09-28T14:59:11.8+02:00"}
{"ordering":7,"type":"file" ,"org":"public","project":"sscx","id":"https://bbp.epfl.ch/neurosciencegraph/data/directory","rev":1,"value":{"id": "https://bbp.epfl.ch/neurosciencegraph/data/directory", "rev": 1, "@type": "FileCreated", "instant": "2022-09-28T12:59:11.800Z", "project": "public/sscx", "storage": "https://bbp.epfl.ch/neurosciencegraph/data/storage?rev=1", "subject": {"@type": "User", "realm": "bbp", "subject": "akkaufma"}, "attributes": {"path": "public/sscx/0/d/8/b/7/b/3/7/test_linking", "uuid": "0d8b7b37-bf62-4576-a5b4-ba2398731b8f", "bytes": 0, "digest": {"@type": "NotComputedDigest"}, "origin": "Storage", "filename": "test_linking", "location": "file:///path/nexus/bbp/atlas/0/d/8/b/7/b/3/7/test_linking", "mediaType": "application/x-directory"}, "storageType": "RemoteDiskStorage"},"instant":"2022-09-28T14:59:11.8+02:00"}
{"ordering":8,"type":"file" ,"org":"public","project":"sscx","id":"https://bbp.epfl.ch/neurosciencegraph/data/local-plus","rev":1,"value":{"id": "https://bbp.epfl.ch/neurosciencegraph/data/local-plus", "rev": 1, "@type": "FileCreated", "instant": "2022-10-14T12:59:11.800Z", "project": "public/sscx", "storage": "https://bbp.epfl.ch/neurosciencegraph/data/storage?rev=1", "subject": {"@type": "User", "realm": "bbp", "subject": "akkaufma"}, "attributes": {"path": "95b0ee1e-a6a5-43e9-85fb-938b3c38dfc0/9/f/0/3/2/4/f/e/0925_Rhi13.3.13%20cell%201+2%20(superficial).asc", "uuid": "9f0324fe-9aac-4d34-84f0-5a44cd278197", "bytes": 0, "digest": {"@type": "NotComputedDigest"}, "origin": "Storage", "filename": "0925_Rhi13.3.13 cell 1+2 (superficial).asc", "location": "file:///path/nexus/public/sscx/9/f/0/3/2/4/f/e", "mediaType": "application/octet-stream"}, "storageType": "DiskStorage"},"instant":"2022-10-14T14:59:11.8+02:00"}
{"ordering":9,"type":"file" ,"org":"public","project":"sscx","id":"https://bbp.epfl.ch/neurosciencegraph/data/local-space","rev":1,"value":{"id": "https://bbp.epfl.ch/neurosciencegraph/data/local-space", "rev": 1, "@type": "FileCreated", "instant": "2022-10-15T12:59:11.800Z", "project": "public/sscx", "storage": "https://bbp.epfl.ch/neurosciencegraph/data/storage?rev=1", "subject": {"@type": "User", "realm": "bbp", "subject": "akkaufma"}, "attributes": {"path": "public/sscx/9/2/f/f/f/2/1/6/566647353__Square%20-%200.5ms%20Subthreshold__stimulus__1.png", "uuid": "92fff216-73a1-48a6-acc8-3dadf17c26bb", "bytes": 0, "digest": {"@type": "NotComputedDigest"}, "origin": "Storage", "filename": "566647353__Square - 0.5ms Subthreshold__stimulus__1.png", "location": "file:///path/nexus/public/sscx/9/2/f/f/f/2/1/6", "mediaType": "application/octet-stream"}, "storageType": "DiskStorage"},"instant":"2022-10-14T14:59:11.8+02:00"}
{"ordering":10,"type":"file" ,"org":"public","project":"sscx","id":"https://bbp.epfl.ch/neurosciencegraph/data/local-equals","rev":1,"value":{"id": "https://bbp.epfl.ch/neurosciencegraph/data/local-equals", "rev": 1, "@type": "FileCreated", "instant": "2022-10-15T12:59:11.800Z", "project": "public/sscx", "storage": "https://bbp.epfl.ch/neurosciencegraph/data/storage?rev=1", "subject": {"@type": "User", "realm": "bbp", "subject": "akkaufma"}, "attributes": {"path": "public/sscx/9/b/1/9/8/5/f/1/emodel=dSTUT_L5BP__etype=dSTUT__mtype=L5BP__species=mouse__brain_region=Primary%20somatosensory%20area__iteration=final_emodel__seed=3__traces.pdf", "uuid": "9b1985f1-e75d-42d2-b101-e527241e2797", "bytes": 0, "digest": {"@type": "NotComputedDigest"}, "origin": "Storage", "filename": "emodel=dSTUT_L5BP__etype=dSTUT__mtype=L5BP__species=mouse__brain_region=Primary somatosensory area__iteration=final_emodel__seed=3__traces.pdf", "location": "file:///path/nexus/public/sscx/9/b/1/9/8/5/f/1", "mediaType": "application/octet-stream"}, "storageType": "DiskStorage"},"instant":"2022-10-14T14:59:11.8+02:00"}
{"ordering":11,"type":"file" ,"org":"public","project":"sscx","id":"https://bbp.epfl.ch/neurosciencegraph/data/special-chars-2","rev":1,"value":{"id": "https://bbp.epfl.ch/neurosciencegraph/data/special-chars-2", "rev": 1, "@type": "FileCreated", "instant": "2023-03-02T10:46:36.969Z", "project": "public/sscx", "storage": "https://bbp.epfl.ch/neurosciencegraph/data/storage?rev=1", "subject": {"@type": "User", "realm": "bbp", "subject": "alice"}, "attributes": {"path": "15849bfc-f2ef-4ddd-89cb-b4658eb1f4ab/5/4/6/7/8/a/0/9/%20P(marker_cre)_overlapping.csv", "uuid": "54678a09-571b-465d-93e7-12137d6ad434", "bytes": 4267, "digest": {"@type": "ComputedDigest", "value": "b39a754a0988ca1f62e04a34d70479e9610b87beab91c58766f80c6ef6f93f3d", "algorithm": "SHA-256"}, "origin": "Client", "filename": " P(marker_cre)_overlapping.csv", "location": "file:///path/public/sscx/15849bfc-f2ef-4ddd-89cb-b4658eb1f4ab/5/4/6/7/8/a/0/9/%20P(marker_cre)_overlapping.csv", "mediaType": "text/plain"}, "storageType": "RemoteDiskStorage"},"instant":"2023-03-02T11:46:36.969+01:00"}
Loading

0 comments on commit fb93580

Please sign in to comment.