diff --git a/apis_core/apis_metainfo/models.py b/apis_core/apis_metainfo/models.py index bc0d32361..b0120a24e 100644 --- a/apis_core/apis_metainfo/models.py +++ b/apis_core/apis_metainfo/models.py @@ -22,6 +22,8 @@ class RootObject(GenericModel, models.Model): By having one overarching super class we gain the advantage of unique identifiers. """ + _sameas = None # Internal storage for sameas property + # self_contenttype: a foreign key to the respective contenttype comes in handy when querying for # triples where the subject's or object's contenttype must be respected (e.g. get all triples # where the subject is a Person) @@ -35,10 +37,24 @@ class RootObject(GenericModel, models.Model): objects = models.Manager() objects_inheritance = InheritanceManager() + @property + def sameas(self): + uri = [u.uri for u in Uri.objects.filter(root_object=self)] + return uri if len(uri) > 0 else None + + @sameas.setter + def sameas(self, value): + self._sameas = value + def save(self, *args, **kwargs): self.self_contenttype = ContentType.objects.get_for_model(self) super().save(*args, **kwargs) + if self._sameas is not None: + for uri in self._sameas: + Uri.objects.create(root_object=self, uri=uri) + self._sameas = None + def duplicate(self): origin = self.__class__ signals.pre_duplicate.send(sender=origin, instance=self) diff --git a/apis_core/generic/importers.py b/apis_core/generic/importers.py index 8b8dcc407..d7be1e1cb 100644 --- a/apis_core/generic/importers.py +++ b/apis_core/generic/importers.py @@ -4,6 +4,7 @@ from functools import cache from django.core.exceptions import ImproperlyConfigured +from django.db.utils import IntegrityError from apis_core.utils.normalize import clean_uri from apis_core.utils.rdf import get_definition_and_attributes_from_uri @@ -42,16 +43,16 @@ def request(self, uri): # we first try to use the RDF parser try: defn, data = get_definition_and_attributes_from_uri(uri, self.model) - return data + return data, defn except Exception as e: logger.debug(e) # if everything else fails, try parsing json # if even that does not help, return an empty dict try: - return json.loads(urllib.request.urlopen(uri).read()) + return json.loads(urllib.request.urlopen(uri).read()), {} except Exception as e: logger.debug(e) - return {} + return {}, {} def mangle_data(self, data): return data @@ -64,11 +65,12 @@ def get_data(self, drop_unknown_fields=True): remove all fields from the data dict that do not have an equivalent field in the model. """ - data = self.request(self.import_uri) + data, defn = self.request(self.import_uri) + self.definition = defn data = self.mangle_data(data) if drop_unknown_fields: # we are dropping all fields that are not part of the model - modelfields = [field.name for field in self.model._meta.fields] + modelfields = [field.name for field in self.model._meta.fields] + ["sameas"] data = {key: data[key] for key in data if key in modelfields} if not data: raise ImproperlyConfigured( @@ -85,5 +87,16 @@ def import_into_instance(self, instance, fields="__all__"): setattr(instance, field, data[field]) instance.save() - def create_instance(self): - return self.model.objects.create(**self.get_data(drop_unknown_fields=True)) + def create_instance(self, follow_sameas=True): + data = self.get_data(drop_unknown_fields=True) + if follow_sameas: + sa = self.model.objects.filter(uri__uri__in=self.definition["sameas"]) + if sa.count() == 1: + return sa.first() + elif sa.count() > 1: + raise IntegrityError( + f"Multiple objects found for sameAs URIs {self.definition['sames']}. " + f"This indicates a data integrity problem as these URIs should be unique." + ) + obj = self.model.objects.create(**data) + return obj diff --git a/apis_core/utils/rdf.py b/apis_core/utils/rdf.py index eda6b6796..d6b91d611 100644 --- a/apis_core/utils/rdf.py +++ b/apis_core/utils/rdf.py @@ -68,6 +68,20 @@ def get_definition_and_attributes_from_uri( break model_attributes = dict() if matching_definition: + sameas = matching_definition.get( + "sameas", + """ + PREFIX owl: + SELECT ?sameas WHERE { + ?subject owl:sameAs ?sameas . + } + """, + ) + model_attributes["sameas"] = [] + result = graph.query(sameas) + for binding in result.bindings: + for value in binding.values(): + model_attributes["sameas"].append(str(value)) attributes = matching_definition.get("attributes", []) sparql_attributes = list(filter(lambda d: d.get("sparql"), attributes)) for attribute in sparql_attributes: