Skip to content

Commit

Permalink
fix: allow for sameAs links
Browse files Browse the repository at this point in the history
resolves #819
  • Loading branch information
sennierer committed Nov 27, 2024
1 parent 624d74a commit bfb72c0
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 7 deletions.
16 changes: 16 additions & 0 deletions apis_core/apis_metainfo/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ class RootObject(GenericModel, models.Model):
By having one overarching super class we gain the advantage of unique identifiers.
"""

_sameas = None # Internal storage for sameas property

# self_contenttype: a foreign key to the respective contenttype comes in handy when querying for
# triples where the subject's or object's contenttype must be respected (e.g. get all triples
# where the subject is a Person)
Expand All @@ -35,10 +37,24 @@ class RootObject(GenericModel, models.Model):
objects = models.Manager()
objects_inheritance = InheritanceManager()

@property
def sameas(self):
uri = [u.uri for u in Uri.objects.filter(root_object=self)]
return uri if len(uri) > 0 else None

@sameas.setter
def sameas(self, value):
self._sameas = value

def save(self, *args, **kwargs):
self.self_contenttype = ContentType.objects.get_for_model(self)
super().save(*args, **kwargs)

if self._sameas is not None:
for uri in self._sameas:
Uri.objects.create(root_object=self, uri=uri)
self._sameas = None

def duplicate(self):
origin = self.__class__
signals.pre_duplicate.send(sender=origin, instance=self)
Expand Down
27 changes: 20 additions & 7 deletions apis_core/generic/importers.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from functools import cache

from django.core.exceptions import ImproperlyConfigured
from django.db.utils import IntegrityError

from apis_core.utils.normalize import clean_uri
from apis_core.utils.rdf import get_definition_and_attributes_from_uri
Expand Down Expand Up @@ -42,16 +43,16 @@ def request(self, uri):
# we first try to use the RDF parser
try:
defn, data = get_definition_and_attributes_from_uri(uri, self.model)
return data
return data, defn
except Exception as e:
logger.debug(e)
# if everything else fails, try parsing json
# if even that does not help, return an empty dict
try:
return json.loads(urllib.request.urlopen(uri).read())
return json.loads(urllib.request.urlopen(uri).read()), {}
except Exception as e:
logger.debug(e)
return {}
return {}, {}

def mangle_data(self, data):
return data
Expand All @@ -64,11 +65,12 @@ def get_data(self, drop_unknown_fields=True):
remove all fields from the data dict that do not
have an equivalent field in the model.
"""
data = self.request(self.import_uri)
data, defn = self.request(self.import_uri)
self.definition = defn
data = self.mangle_data(data)
if drop_unknown_fields:
# we are dropping all fields that are not part of the model
modelfields = [field.name for field in self.model._meta.fields]
modelfields = [field.name for field in self.model._meta.fields] + ["sameas"]
data = {key: data[key] for key in data if key in modelfields}
if not data:
raise ImproperlyConfigured(
Expand All @@ -85,5 +87,16 @@ def import_into_instance(self, instance, fields="__all__"):
setattr(instance, field, data[field])
instance.save()

def create_instance(self):
return self.model.objects.create(**self.get_data(drop_unknown_fields=True))
def create_instance(self, follow_sameas=True):
data = self.get_data(drop_unknown_fields=True)
if follow_sameas:
sa = self.model.objects.filter(uri__uri__in=self.definition["sameas"])
if sa.count() == 1:
return sa.first()
elif sa.count() > 1:
raise IntegrityError(
f"Multiple objects found for sameAs URIs {self.definition['sames']}. "
f"This indicates a data integrity problem as these URIs should be unique."
)
obj = self.model.objects.create(**data)
return obj
14 changes: 14 additions & 0 deletions apis_core/utils/rdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,20 @@ def get_definition_and_attributes_from_uri(
break
model_attributes = dict()
if matching_definition:
sameas = matching_definition.get(
"sameas",
"""
PREFIX owl: <http://www.w3.org/2002/7/owl#>
SELECT ?sameas WHERE {
?subject owl:sameAs ?sameas .
}
""",
)
model_attributes["sameas"] = []
result = graph.query(sameas)
for binding in result.bindings:
for value in binding.values():
model_attributes["sameas"].append(str(value))
attributes = matching_definition.get("attributes", [])
sparql_attributes = list(filter(lambda d: d.get("sparql"), attributes))
for attribute in sparql_attributes:
Expand Down

0 comments on commit bfb72c0

Please sign in to comment.