Skip to content

Commit

Permalink
Deduplicating shapes before Shacl validation (#5021)
Browse files Browse the repository at this point in the history
* Deduplicating shapes before Shacl validation

---------

Co-authored-by: Simon Dumas <[email protected]>
  • Loading branch information
imsdu and Simon Dumas authored Jun 7, 2024
1 parent 9ad8819 commit 7ca3da0
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@ final case class Graph private (rootNode: IriOrBNode, value: DatasetGraph) { sel
*/
def isEmpty: Boolean = value.isEmpty

def getDefaultGraphSize: Int = value.getDefaultGraph.size()

/**
* Returns a subgraph retaining all the triples that satisfy the provided predicate.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package ch.epfl.bluebrain.nexus.delta.sdk.schemas.model

import cats.data.NonEmptyList
import cats.effect.IO
import ch.epfl.bluebrain.nexus.delta.rdf.IriOrBNode
import ch.epfl.bluebrain.nexus.delta.rdf.IriOrBNode.Iri
import ch.epfl.bluebrain.nexus.delta.rdf.Triple.Triple
import ch.epfl.bluebrain.nexus.delta.rdf.Vocabulary.{contexts, nxv, owl}
Expand Down Expand Up @@ -61,9 +62,17 @@ final case class Schema(
lazy val ontologies: Graph = graph(types => types.contains(owl.Ontology) && !types.contains(nxv.Schema))

private def graph(filteredTypes: Set[Iri] => Boolean): Graph = {
implicit val api: JsonLdApi = JsonLdJavaApi.lenient
val filtered = expanded.filter(expanded => expanded.cursor.getTypes.exists(filteredTypes))
val triples = filtered.map(_.toGraph.toOption.get).foldLeft(Set.empty[Triple])((acc, g) => acc ++ g.triples)
implicit val api: JsonLdApi = JsonLdJavaApi.lenient
val init: (Set[IriOrBNode], Vector[ExpandedJsonLd]) = (Set.empty[IriOrBNode], Vector.empty[ExpandedJsonLd])
val (_, filtered) = expanded.foldLeft(init) {
case ((seen, acc), expanded)
if !seen.contains(expanded.rootId) && expanded.cursor.getTypes.exists(filteredTypes) =>
val updatedSeen = seen + expanded.rootId
val updatedAcc = acc :+ expanded
updatedSeen -> updatedAcc
case ((seen, acc), _) => seen -> acc
}
val triples = filtered.map(_.toGraph.toOption.get).foldLeft(Set.empty[Triple])((acc, g) => acc ++ g.triples)
Graph.empty(id).add(triples)
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
package ch.epfl.bluebrain.nexus.delta.sdk.schemas.model

import cats.data.NonEmptyList
import cats.effect.IO
import ch.epfl.bluebrain.nexus.delta.rdf.jsonld.ExpandedJsonLd
import ch.epfl.bluebrain.nexus.delta.rdf.syntax._
import ch.epfl.bluebrain.nexus.delta.sdk.model.Tags
import ch.epfl.bluebrain.nexus.delta.sdk.utils.Fixtures
import ch.epfl.bluebrain.nexus.delta.sourcing.model.ProjectRef
import ch.epfl.bluebrain.nexus.testkit.mu.NexusSuite

class SchemaSuite extends NexusSuite with Fixtures {

private val project = ProjectRef.unsafe("org", "proj")

test("Extract as a graph the content of the schema, removing the duplicates") {
for {
entitySource <- loader.jsonContentOf("schemas/entity.json")
entityExpanded <- ExpandedJsonLd(jsonContentOf("schemas/entity-expanded.json"))
entityExpandedGraphSize <- IO.fromEither(entityExpanded.toGraph.map(_.getDefaultGraphSize))
entityCompacted <- entityExpanded.toCompacted(entitySource.topContextValueOrEmpty)
licenseExpanded <- ExpandedJsonLd(jsonContentOf("schemas/license-expanded.json"))
licenseExpandedGraphSize <- IO.fromEither(licenseExpanded.toGraph.map(_.getDefaultGraphSize))
} yield {
val id = iri"https://neuroshapes.org/commons/entity"
val expandeds = NonEmptyList.of(entityExpanded, licenseExpanded, entityExpanded)
val schema = Schema(id, project, Tags.empty, entitySource, entityCompacted, expandeds)
assertEquals(schema.shapes.getDefaultGraphSize, entityExpandedGraphSize + licenseExpandedGraphSize)
}
}

}

0 comments on commit 7ca3da0

Please sign in to comment.