Skip to content

Commit

Permalink
feat(utils): let the rdf parser return lists instead of strings
Browse files Browse the repository at this point in the history
... and adapt rdfimport definitions as well as tests accordingly
  • Loading branch information
b1rger committed Dec 17, 2024
1 parent fd689b2 commit 5c97b4e
Show file tree
Hide file tree
Showing 4 changed files with 42 additions and 21 deletions.
26 changes: 21 additions & 5 deletions apis_core/apis_entities/rdfimport/E21_PersonFromDNB.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,27 +9,43 @@ sparql = """
PREFIX gndo: <https://d-nb.info/standards/elementset/gnd#>
SELECT ?forename
WHERE {
?subject gndo:forename ?default_forename .
OPTIONAL {
?subject2 gndo:preferredNameEntityForThePerson ?med .
OPTIONAL {
?subject gndo:preferredNameEntityForThePerson ?med .
?med gndo:forename ?preferred_forename .
}
OPTIONAL {
?subject2 gndo:forename ?default_forename .
}
BIND(COALESCE(?preferred_forename, ?default_forename) AS ?forename)
}
GROUP BY ?subject
"""
[[attributes]]
# alternative_names
sparql = """
PREFIX gndo: <https://d-nb.info/standards/elementset/gnd#>
SELECT ?alternative_names
WHERE {
?subject gndo:variantNameForThePerson ?alternative_names
}
"""

[[attributes]]
# surname
sparql = """
PREFIX gndo: <https://d-nb.info/standards/elementset/gnd#>
SELECT ?surname
WHERE {
?subject gndo:surname ?default_surname .
OPTIONAL {
?subject2 gndo:preferredNameEntityForThePerson ?med .
?subject gndo:preferredNameEntityForThePerson ?med .
?med gndo:surname ?preferred_surname .
}
OPTIONAL {
?subject2 gndo:surname ?default_surname .
}
BIND(COALESCE(?preferred_surname, ?default_surname) AS ?surname)
}
GROUP BY ?subject
"""
[[attributes]]
# date_of_birth
Expand Down
5 changes: 4 additions & 1 deletion apis_core/apis_entities/rdfimport/E74_GroupFromDNB.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,13 @@ sparql = """
PREFIX gndo: <https://d-nb.info/standards/elementset/gnd#>
SELECT ?label
WHERE {
?subject gndo:preferredNameForTheCorporateBody ?name
OPTIONAL {
?subject gndo:preferredNameForTheCorporateBody ?name
}
OPTIONAL {
?subject gndo:variantNameForTheCorporateBody ?altName
}
BIND(COALESCE(?name, ?altName) AS ?label)
}
GROUP BY ?subject
"""
5 changes: 3 additions & 2 deletions apis_core/utils/rdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import importlib
import logging
import re
from collections import defaultdict
from typing import Tuple

from rdflib import Graph
Expand Down Expand Up @@ -66,7 +67,7 @@ def get_definition_and_attributes_from_uri(
matching_definition = definition
matching_definition["filename"] = str(key)
break
model_attributes = dict()
model_attributes = defaultdict(list)
if matching_definition:
attributes = matching_definition.get("attributes", [])
sparql_attributes = list(filter(lambda d: d.get("sparql"), attributes))
Expand All @@ -75,7 +76,7 @@ def get_definition_and_attributes_from_uri(
for binding in result.bindings:
# {rdflib.term.Variable('somekey'): rdflib.term.Literal('some value')}
for key, value in binding.items():
model_attributes[str(key)] = str(value)
model_attributes[str(key)].append(str(value))
else:
raise AttributeError(f"No matching definition found for {uri}")
return matching_definition, model_attributes
27 changes: 14 additions & 13 deletions apis_core/utils/test_rdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,9 @@ class Meta:
class RdfTest(TestCase):
def test_get_definition_from_dict_place_from_geonames(self):
achensee = {
"latitude": "47.5",
"longitude": "11.7",
"label": "Achensee",
"latitude": ["47.5"],
"longitude": ["11.7"],
"label": ["Achensee"],
}
# https://www.geonames.org/2783029/achensee.html
uri = str(testdata / "achensee.rdf")
Expand All @@ -43,7 +43,7 @@ def test_get_definition_from_dict_place_from_geonames(self):
self.assertEqual(achensee, attributes)

def test_get_definition_from_dict_place_from_dnb(self):
wien = {"label": "Wien", "latitude": "048.208199", "longitude": "016.371690"}
wien = {"label": ["Wien"], "latitude": ["048.208199"], "longitude": ["016.371690"]}
# https://d-nb.info/gnd/4066009-6
uri = str(testdata / "wien.rdf")

Expand All @@ -53,21 +53,22 @@ def test_get_definition_from_dict_place_from_dnb(self):

def test_get_definition_from_dict_person_from_dnb(self):
pierre = {
"forename": "Pierre",
"surname": "Ramus",
"date_of_birth": "1882-04-15",
"date_of_death": "1942",
"forename": ["Pierre"],
"surname": ["Ramus"],
"alternative_names": ['Ramus, Pʹer','Großmann, Rudolf','Grossmann, Rudolf','Grossman, Rudolf','Grossman, Rodolphe','Grossmann, Rodolphe','Libertarian, ...'],
"date_of_birth": ["1882-04-15"],
"date_of_death": ["1942"],
}
# https://d-nb.info/gnd/118833197
uri = str(testdata / "ramus.rdf")

person = Person()
defintion, attributes = rdf.get_definition_and_attributes_from_uri(uri, person)
self.assertEqual(pierre, attributes)
self.assertEqual(pierre, dict(attributes))

def test_get_definition_from_dict_institution_from_dnb(self):
pierre_ges = {
"label": "Pierre-Ramus-Gesellschaft",
"label": ["Pierre-Ramus-Gesellschaft"],
}
# https://d-nb.info/gnd/415006-5
uri = str(testdata / "ramus_gesellschaft.rdf")
Expand All @@ -76,11 +77,11 @@ def test_get_definition_from_dict_institution_from_dnb(self):
defintion, attributes = rdf.get_definition_and_attributes_from_uri(
uri, institution
)
self.assertEqual(pierre_ges, attributes)
self.assertEqual(pierre_ges, dict(attributes))

def test_get_definition_from_dict_institution_from_dnb2(self):
pierre_ges = {
"label": "Akademie der Wissenschaften in Wien",
"label": ["Akademie der Wissenschaften in Wien"],
}
# https://d-nb.info/gnd/35077-1
uri = str(testdata / "oeaw.rdf")
Expand All @@ -89,4 +90,4 @@ def test_get_definition_from_dict_institution_from_dnb2(self):
defintion, attributes = rdf.get_definition_and_attributes_from_uri(
uri, institution
)
self.assertEqual(pierre_ges, attributes)
self.assertEqual(pierre_ges, dict(attributes))

0 comments on commit 5c97b4e

Please sign in to comment.