Skip to content

Commit

Permalink
feat: reject import object if sigle doesn't exist
Browse files Browse the repository at this point in the history
entities that refer to a work (siglum) that doesn't exist in the db yet should not be imported/created
  • Loading branch information
babslgam authored and koeaw committed Apr 3, 2024
1 parent 180e4d3 commit 2f63cb3
Show file tree
Hide file tree
Showing 2 changed files with 163 additions and 130 deletions.
4 changes: 4 additions & 0 deletions apis_ontology/scripts/import_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,3 +315,7 @@ def get_expressions_by_work(work_id: int):
i.obj for i in TempTriple.objects.filter(subj__id=work_id, prop=prop)
]
return related_expressions


def work_with_siglum_exists(siglum):
return Work.objects.filter(siglum=siglum).exists()
289 changes: 159 additions & 130 deletions apis_ontology/scripts/import_nonbibl_entities_from_excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import pandas as pd
import logging
from apis_ontology.scripts.access_sharepoint import import_and_parse_data
from .import_helpers import create_triple, create_source
from .import_helpers import create_triple, create_source, work_with_siglum_exists
from apis_ontology.models import (
Place,
Work,
Expand Down Expand Up @@ -71,53 +71,59 @@ def parse_entities_dataframe(sheet_name, df, file):
place_uris = (row["URL_Geonames"], row["URL_Wikipedia"], row["URL_extern"])
place_uri_objects = []

for place_uri in place_uris:
if place_uri:
place_uri = secure_urls(place_uri)
uri, created = Uri.objects.get_or_create(uri=place_uri)
place_uri_objects.append(uri)
if work_with_siglum_exists(related_work_siglum):
for place_uri in place_uris:
if place_uri:
place_uri = secure_urls(place_uri)
uri, created = Uri.objects.get_or_create(uri=place_uri)
place_uri_objects.append(uri)

place_qs = None
place = None
place_qs = None
place = None

if len(place_uri_objects) > 0:
place_qs = Place.objects.filter(
uri__in=[uri.id for uri in place_uri_objects],
)
else:
place_qs = Place.objects.filter(name=place_name)
if len(place_uri_objects) > 0:
place_qs = Place.objects.filter(
uri__in=[uri.id for uri in place_uri_objects],
)
else:
place_qs = Place.objects.filter(name=place_name)

if place_qs.count() == 0:
place, created = Place.objects.get_or_create(
name=place_name, defaults={"data_source": data_source}
)
else:
place = place_qs.first()
alternative_names = list(
filter(None, place.alternative_name.split(";"))
)
if (
place_name
and place_name != place.name
and place_name not in alternative_names
):
alternative_names.append(place_name)
place.alternative_name = ";".join(alternative_names)
place.save()

for uri in place_uri_objects:
if not uri.root_object:
uri.root_object = place
uri.save()

if Work.objects.filter(siglum=related_work_siglum).exists():
work_object = Work.objects.get(siglum=related_work_siglum)
triple, created = create_triple(
entity_subj=work_object,
entity_obj=place,
prop=Property.objects.get(
name_forward=WORK_PLACE_RELATIONTYPES[place_type]
),
)

if place_qs.count() == 0:
place, created = Place.objects.get_or_create(
name=place_name, defaults={"data_source": data_source}
)
else:
place = place_qs.first()
alternative_names = list(
filter(None, place.alternative_name.split(";"))
)
if (
place_name
and place_name != place.name
and place_name not in alternative_names
):
alternative_names.append(place_name)
place.alternative_name = ";".join(alternative_names)
place.save()

for uri in place_uri_objects:
if not uri.root_object:
uri.root_object = place
uri.save()

if Work.objects.filter(siglum=related_work_siglum).exists():
work_object = Work.objects.get(siglum=related_work_siglum)
triple, created = create_triple(
entity_subj=work_object,
entity_obj=place,
prop=Property.objects.get(
name_forward=WORK_PLACE_RELATIONTYPES[place_type]
),
print(
f"work with sigle {related_work_siglum} doesn't exist. entity import rejected"
)

if sheet_name == "Namen":
Expand All @@ -132,112 +138,135 @@ def parse_entities_dataframe(sheet_name, df, file):
related_work_siglum = row["Sigle"]
person_uris = (row["URL_Wikipedia"], row["URL_DNB"], row["URL_extern"])

character = Character.objects.create(
fallback_name=character_name,
description=character_description,
relevancy=character_relevancy,
fictionality=character_fictionality_degree,
data_source=data_source,
)

if Work.objects.filter(siglum=related_work_siglum).exists():
work_object = Work.objects.get(siglum=related_work_siglum)

create_triple(
entity_subj=work_object,
entity_obj=character,
prop=Property.objects.get(name_forward="features"),
if work_with_siglum_exists(related_work_siglum):
character = Character.objects.create(
fallback_name=character_name,
description=character_description,
relevancy=character_relevancy,
fictionality=character_fictionality_degree,
data_source=data_source,
)

if character_fictionality in ("R", "M", "M/R"):
person_uri_objects = []
for person_uri in person_uris:
if person_uri:
uri = secure_urls(person_uri)
uri_obj, uri_created = Uri.objects.get_or_create(uri=uri)
person_uri_objects.append(uri_obj)

person = None
person_qs = None

person_fallback_name = (
character_name if not (person_forename or person_surname) else ""
)
if Work.objects.filter(siglum=related_work_siglum).exists():
work_object = Work.objects.get(siglum=related_work_siglum)

if len(person_uri_objects) > 0:
person_qs = Person.objects.filter(
uri__in=[uri.id for uri in person_uri_objects],
)
else:
person_qs = Person.objects.filter(
fallback_name=person_fallback_name,
forename=person_forename,
surname=person_surname,
create_triple(
entity_subj=work_object,
entity_obj=character,
prop=Property.objects.get(name_forward="features"),
)

if person_qs.count() == 0:
person, created = Person.objects.get_or_create(
fallback_name=person_fallback_name,
forename=person_forename,
surname=person_surname,
alternative_name=person_alternative_name,
defaults={"data_source": data_source},
if character_fictionality in ("R", "M", "M/R"):
person_uri_objects = []
for person_uri in person_uris:
if person_uri:
uri = secure_urls(person_uri)
uri_obj, uri_created = Uri.objects.get_or_create(uri=uri)
person_uri_objects.append(uri_obj)

person = None
person_qs = None

person_fallback_name = (
character_name
if not (person_forename or person_surname)
else ""
)
else:
person = person_qs.first()

for uri in person_uri_objects:
if not uri.root_object:
uri.root_object = person
uri.save()
if len(person_uri_objects) > 0:
person_qs = Person.objects.filter(
uri__in=[uri.id for uri in person_uri_objects],
)
else:
person_qs = Person.objects.filter(
fallback_name=person_fallback_name,
forename=person_forename,
surname=person_surname,
)

if person_qs.count() == 0:
person, created = Person.objects.get_or_create(
fallback_name=person_fallback_name,
forename=person_forename,
surname=person_surname,
alternative_name=person_alternative_name,
defaults={"data_source": data_source},
)
else:
person = person_qs.first()

for uri in person_uri_objects:
if not uri.root_object:
uri.root_object = person
uri.save()

create_triple(
entity_subj=character,
entity_obj=person,
prop=Property.objects.get(name_forward="is based on"),
)

create_triple(
entity_subj=character,
entity_obj=person,
prop=Property.objects.get(name_forward="is based on"),
else:
print(
f"work with sigle {related_work_siglum} doesn't exist. entity import rejected"
)

if sheet_name == "Themen":
topic_name = row["Thema"]
related_work_siglum = row["Sigle"]
topic_alt_name = row["Synonyme"]
topic_description = row["Anmerkungen"]
topic, created = Topic.objects.get_or_create(
name=topic_name,
defaults={"data_source": data_source},
)
topic.alternative_name = topic_alt_name
topic.description = topic_description
topic.save()

if Work.objects.filter(siglum=related_work_siglum).exists():
work_object = Work.objects.get(siglum=related_work_siglum)

create_triple(
entity_subj=work_object,
entity_obj=topic,
prop=Property.objects.get(name_forward="is about topic"),

if work_with_siglum_exists(related_work_siglum):
topic, created = Topic.objects.get_or_create(
name=topic_name,
defaults={"data_source": data_source},
)
topic.alternative_name = topic_alt_name
topic.description = topic_description
topic.save()

if Work.objects.filter(siglum=related_work_siglum).exists():
work_object = Work.objects.get(siglum=related_work_siglum)

create_triple(
entity_subj=work_object,
entity_obj=topic,
prop=Property.objects.get(name_forward="is about topic"),
)

else:
print(
f"work with sigle {related_work_siglum} doesn't exist. entity import rejected"
)

if sheet_name == "Forschungshinsichten":
research_perspective_name = row["Thema"]
related_work_siglum = row["Sigle"]
research_perspective_description = row["Anmerkungen"]

research_perspective, created = ResearchPerspective.objects.get_or_create(
name=research_perspective_name,
defaults={"data_source": data_source},
)
research_perspective.description = research_perspective_description
research_perspective.save()

if Work.objects.filter(siglum=related_work_siglum).exists():
work_object = Work.objects.get(siglum=related_work_siglum)

create_triple(
entity_subj=work_object,
entity_obj=research_perspective,
prop=Property.objects.get(
name_forward="applies research perspective"
),
if work_with_siglum_exists(related_work_siglum):
(
research_perspective,
created,
) = ResearchPerspective.objects.get_or_create(
name=research_perspective_name,
defaults={"data_source": data_source},
)
research_perspective.description = research_perspective_description
research_perspective.save()

if Work.objects.filter(siglum=related_work_siglum).exists():
work_object = Work.objects.get(siglum=related_work_siglum)

create_triple(
entity_subj=work_object,
entity_obj=research_perspective,
prop=Property.objects.get(
name_forward="applies research perspective"
),
)
else:
print(
f"work with sigle {related_work_siglum} doesn't exist. entity import rejected"
)

0 comments on commit 2f63cb3

Please sign in to comment.