From 7ca3da0689c3954af13c7184219953597854b62e Mon Sep 17 00:00:00 2001 From: Simon Date: Fri, 7 Jun 2024 16:54:35 +0200 Subject: [PATCH] Deduplicating shapes before Shacl validation (#5021) * Deduplicating shapes before Shacl validation --------- Co-authored-by: Simon Dumas --- .../nexus/delta/rdf/graph/Graph.scala | 2 ++ .../delta/sdk/schemas/model/Schema.scala | 15 +++++++-- .../delta/sdk/schemas/model/SchemaSuite.scala | 32 +++++++++++++++++++ 3 files changed, 46 insertions(+), 3 deletions(-) create mode 100644 delta/sdk/src/test/scala/ch/epfl/bluebrain/nexus/delta/sdk/schemas/model/SchemaSuite.scala diff --git a/delta/rdf/src/main/scala/ch/epfl/bluebrain/nexus/delta/rdf/graph/Graph.scala b/delta/rdf/src/main/scala/ch/epfl/bluebrain/nexus/delta/rdf/graph/Graph.scala index 6ac6f16a31..c0a71f5db2 100644 --- a/delta/rdf/src/main/scala/ch/epfl/bluebrain/nexus/delta/rdf/graph/Graph.scala +++ b/delta/rdf/src/main/scala/ch/epfl/bluebrain/nexus/delta/rdf/graph/Graph.scala @@ -60,6 +60,8 @@ final case class Graph private (rootNode: IriOrBNode, value: DatasetGraph) { sel */ def isEmpty: Boolean = value.isEmpty + def getDefaultGraphSize: Int = value.getDefaultGraph.size() + /** * Returns a subgraph retaining all the triples that satisfy the provided predicate. */ diff --git a/delta/sdk/src/main/scala/ch/epfl/bluebrain/nexus/delta/sdk/schemas/model/Schema.scala b/delta/sdk/src/main/scala/ch/epfl/bluebrain/nexus/delta/sdk/schemas/model/Schema.scala index 91d2b0ff90..3ba1511b07 100644 --- a/delta/sdk/src/main/scala/ch/epfl/bluebrain/nexus/delta/sdk/schemas/model/Schema.scala +++ b/delta/sdk/src/main/scala/ch/epfl/bluebrain/nexus/delta/sdk/schemas/model/Schema.scala @@ -2,6 +2,7 @@ package ch.epfl.bluebrain.nexus.delta.sdk.schemas.model import cats.data.NonEmptyList import cats.effect.IO +import ch.epfl.bluebrain.nexus.delta.rdf.IriOrBNode import ch.epfl.bluebrain.nexus.delta.rdf.IriOrBNode.Iri import ch.epfl.bluebrain.nexus.delta.rdf.Triple.Triple import ch.epfl.bluebrain.nexus.delta.rdf.Vocabulary.{contexts, nxv, owl} @@ -61,9 +62,17 @@ final case class Schema( lazy val ontologies: Graph = graph(types => types.contains(owl.Ontology) && !types.contains(nxv.Schema)) private def graph(filteredTypes: Set[Iri] => Boolean): Graph = { - implicit val api: JsonLdApi = JsonLdJavaApi.lenient - val filtered = expanded.filter(expanded => expanded.cursor.getTypes.exists(filteredTypes)) - val triples = filtered.map(_.toGraph.toOption.get).foldLeft(Set.empty[Triple])((acc, g) => acc ++ g.triples) + implicit val api: JsonLdApi = JsonLdJavaApi.lenient + val init: (Set[IriOrBNode], Vector[ExpandedJsonLd]) = (Set.empty[IriOrBNode], Vector.empty[ExpandedJsonLd]) + val (_, filtered) = expanded.foldLeft(init) { + case ((seen, acc), expanded) + if !seen.contains(expanded.rootId) && expanded.cursor.getTypes.exists(filteredTypes) => + val updatedSeen = seen + expanded.rootId + val updatedAcc = acc :+ expanded + updatedSeen -> updatedAcc + case ((seen, acc), _) => seen -> acc + } + val triples = filtered.map(_.toGraph.toOption.get).foldLeft(Set.empty[Triple])((acc, g) => acc ++ g.triples) Graph.empty(id).add(triples) } diff --git a/delta/sdk/src/test/scala/ch/epfl/bluebrain/nexus/delta/sdk/schemas/model/SchemaSuite.scala b/delta/sdk/src/test/scala/ch/epfl/bluebrain/nexus/delta/sdk/schemas/model/SchemaSuite.scala new file mode 100644 index 0000000000..da76badf19 --- /dev/null +++ b/delta/sdk/src/test/scala/ch/epfl/bluebrain/nexus/delta/sdk/schemas/model/SchemaSuite.scala @@ -0,0 +1,32 @@ +package ch.epfl.bluebrain.nexus.delta.sdk.schemas.model + +import cats.data.NonEmptyList +import cats.effect.IO +import ch.epfl.bluebrain.nexus.delta.rdf.jsonld.ExpandedJsonLd +import ch.epfl.bluebrain.nexus.delta.rdf.syntax._ +import ch.epfl.bluebrain.nexus.delta.sdk.model.Tags +import ch.epfl.bluebrain.nexus.delta.sdk.utils.Fixtures +import ch.epfl.bluebrain.nexus.delta.sourcing.model.ProjectRef +import ch.epfl.bluebrain.nexus.testkit.mu.NexusSuite + +class SchemaSuite extends NexusSuite with Fixtures { + + private val project = ProjectRef.unsafe("org", "proj") + + test("Extract as a graph the content of the schema, removing the duplicates") { + for { + entitySource <- loader.jsonContentOf("schemas/entity.json") + entityExpanded <- ExpandedJsonLd(jsonContentOf("schemas/entity-expanded.json")) + entityExpandedGraphSize <- IO.fromEither(entityExpanded.toGraph.map(_.getDefaultGraphSize)) + entityCompacted <- entityExpanded.toCompacted(entitySource.topContextValueOrEmpty) + licenseExpanded <- ExpandedJsonLd(jsonContentOf("schemas/license-expanded.json")) + licenseExpandedGraphSize <- IO.fromEither(licenseExpanded.toGraph.map(_.getDefaultGraphSize)) + } yield { + val id = iri"https://neuroshapes.org/commons/entity" + val expandeds = NonEmptyList.of(entityExpanded, licenseExpanded, entityExpanded) + val schema = Schema(id, project, Tags.empty, entitySource, entityCompacted, expandeds) + assertEquals(schema.shapes.getDefaultGraphSize, entityExpandedGraphSize + licenseExpandedGraphSize) + } + } + +}