Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: reject import object if sigle doesn't exist #60

Merged
merged 1 commit into from
Apr 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions apis_ontology/scripts/import_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,3 +315,7 @@ def get_expressions_by_work(work_id: int):
i.obj for i in TempTriple.objects.filter(subj__id=work_id, prop=prop)
]
return related_expressions


def work_with_siglum_exists(siglum):
return Work.objects.filter(siglum=siglum).exists()
289 changes: 159 additions & 130 deletions apis_ontology/scripts/import_nonbibl_entities_from_excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import pandas as pd
import logging
from apis_ontology.scripts.access_sharepoint import import_and_parse_data
from .import_helpers import create_triple, create_source
from .import_helpers import create_triple, create_source, work_with_siglum_exists
from apis_ontology.models import (
Place,
Work,
Expand Down Expand Up @@ -71,53 +71,59 @@ def parse_entities_dataframe(sheet_name, df, file):
place_uris = (row["URL_Geonames"], row["URL_Wikipedia"], row["URL_extern"])
place_uri_objects = []

for place_uri in place_uris:
if place_uri:
place_uri = secure_urls(place_uri)
uri, created = Uri.objects.get_or_create(uri=place_uri)
place_uri_objects.append(uri)
if work_with_siglum_exists(related_work_siglum):
for place_uri in place_uris:
if place_uri:
place_uri = secure_urls(place_uri)
uri, created = Uri.objects.get_or_create(uri=place_uri)
place_uri_objects.append(uri)

place_qs = None
place = None
place_qs = None
place = None

if len(place_uri_objects) > 0:
place_qs = Place.objects.filter(
uri__in=[uri.id for uri in place_uri_objects],
)
else:
place_qs = Place.objects.filter(name=place_name)
if len(place_uri_objects) > 0:
place_qs = Place.objects.filter(
uri__in=[uri.id for uri in place_uri_objects],
)
else:
place_qs = Place.objects.filter(name=place_name)

if place_qs.count() == 0:
place, created = Place.objects.get_or_create(
name=place_name, defaults={"data_source": data_source}
)
else:
place = place_qs.first()
alternative_names = list(
filter(None, place.alternative_name.split(";"))
)
if (
place_name
and place_name != place.name
and place_name not in alternative_names
):
alternative_names.append(place_name)
place.alternative_name = ";".join(alternative_names)
place.save()

for uri in place_uri_objects:
if not uri.root_object:
uri.root_object = place
uri.save()

if Work.objects.filter(siglum=related_work_siglum).exists():
work_object = Work.objects.get(siglum=related_work_siglum)
triple, created = create_triple(
entity_subj=work_object,
entity_obj=place,
prop=Property.objects.get(
name_forward=WORK_PLACE_RELATIONTYPES[place_type]
),
)

if place_qs.count() == 0:
place, created = Place.objects.get_or_create(
name=place_name, defaults={"data_source": data_source}
)
else:
place = place_qs.first()
alternative_names = list(
filter(None, place.alternative_name.split(";"))
)
if (
place_name
and place_name != place.name
and place_name not in alternative_names
):
alternative_names.append(place_name)
place.alternative_name = ";".join(alternative_names)
place.save()

for uri in place_uri_objects:
if not uri.root_object:
uri.root_object = place
uri.save()

if Work.objects.filter(siglum=related_work_siglum).exists():
work_object = Work.objects.get(siglum=related_work_siglum)
triple, created = create_triple(
entity_subj=work_object,
entity_obj=place,
prop=Property.objects.get(
name_forward=WORK_PLACE_RELATIONTYPES[place_type]
),
print(
f"work with sigle {related_work_siglum} doesn't exist. entity import rejected"
)

if sheet_name == "Namen":
Expand All @@ -132,112 +138,135 @@ def parse_entities_dataframe(sheet_name, df, file):
related_work_siglum = row["Sigle"]
person_uris = (row["URL_Wikipedia"], row["URL_DNB"], row["URL_extern"])

character = Character.objects.create(
fallback_name=character_name,
description=character_description,
relevancy=character_relevancy,
fictionality=character_fictionality_degree,
data_source=data_source,
)

if Work.objects.filter(siglum=related_work_siglum).exists():
work_object = Work.objects.get(siglum=related_work_siglum)

create_triple(
entity_subj=work_object,
entity_obj=character,
prop=Property.objects.get(name_forward="features"),
if work_with_siglum_exists(related_work_siglum):
character = Character.objects.create(
fallback_name=character_name,
description=character_description,
relevancy=character_relevancy,
fictionality=character_fictionality_degree,
data_source=data_source,
)

if character_fictionality in ("R", "M", "M/R"):
person_uri_objects = []
for person_uri in person_uris:
if person_uri:
uri = secure_urls(person_uri)
uri_obj, uri_created = Uri.objects.get_or_create(uri=uri)
person_uri_objects.append(uri_obj)

person = None
person_qs = None

person_fallback_name = (
character_name if not (person_forename or person_surname) else ""
)
if Work.objects.filter(siglum=related_work_siglum).exists():
work_object = Work.objects.get(siglum=related_work_siglum)

if len(person_uri_objects) > 0:
person_qs = Person.objects.filter(
uri__in=[uri.id for uri in person_uri_objects],
)
else:
person_qs = Person.objects.filter(
fallback_name=person_fallback_name,
forename=person_forename,
surname=person_surname,
create_triple(
entity_subj=work_object,
entity_obj=character,
prop=Property.objects.get(name_forward="features"),
)

if person_qs.count() == 0:
person, created = Person.objects.get_or_create(
fallback_name=person_fallback_name,
forename=person_forename,
surname=person_surname,
alternative_name=person_alternative_name,
defaults={"data_source": data_source},
if character_fictionality in ("R", "M", "M/R"):
person_uri_objects = []
for person_uri in person_uris:
if person_uri:
uri = secure_urls(person_uri)
uri_obj, uri_created = Uri.objects.get_or_create(uri=uri)
person_uri_objects.append(uri_obj)

person = None
person_qs = None

person_fallback_name = (
character_name
if not (person_forename or person_surname)
else ""
)
else:
person = person_qs.first()

for uri in person_uri_objects:
if not uri.root_object:
uri.root_object = person
uri.save()
if len(person_uri_objects) > 0:
person_qs = Person.objects.filter(
uri__in=[uri.id for uri in person_uri_objects],
)
else:
person_qs = Person.objects.filter(
fallback_name=person_fallback_name,
forename=person_forename,
surname=person_surname,
)

if person_qs.count() == 0:
person, created = Person.objects.get_or_create(
fallback_name=person_fallback_name,
forename=person_forename,
surname=person_surname,
alternative_name=person_alternative_name,
defaults={"data_source": data_source},
)
else:
person = person_qs.first()

for uri in person_uri_objects:
if not uri.root_object:
uri.root_object = person
uri.save()

create_triple(
entity_subj=character,
entity_obj=person,
prop=Property.objects.get(name_forward="is based on"),
)

create_triple(
entity_subj=character,
entity_obj=person,
prop=Property.objects.get(name_forward="is based on"),
else:
print(
f"work with sigle {related_work_siglum} doesn't exist. entity import rejected"
)

if sheet_name == "Themen":
topic_name = row["Thema"]
related_work_siglum = row["Sigle"]
topic_alt_name = row["Synonyme"]
topic_description = row["Anmerkungen"]
topic, created = Topic.objects.get_or_create(
name=topic_name,
defaults={"data_source": data_source},
)
topic.alternative_name = topic_alt_name
topic.description = topic_description
topic.save()

if Work.objects.filter(siglum=related_work_siglum).exists():
work_object = Work.objects.get(siglum=related_work_siglum)

create_triple(
entity_subj=work_object,
entity_obj=topic,
prop=Property.objects.get(name_forward="is about topic"),

if work_with_siglum_exists(related_work_siglum):
topic, created = Topic.objects.get_or_create(
name=topic_name,
defaults={"data_source": data_source},
)
topic.alternative_name = topic_alt_name
topic.description = topic_description
topic.save()

if Work.objects.filter(siglum=related_work_siglum).exists():
work_object = Work.objects.get(siglum=related_work_siglum)

create_triple(
entity_subj=work_object,
entity_obj=topic,
prop=Property.objects.get(name_forward="is about topic"),
)

else:
print(
f"work with sigle {related_work_siglum} doesn't exist. entity import rejected"
)

if sheet_name == "Forschungshinsichten":
research_perspective_name = row["Thema"]
related_work_siglum = row["Sigle"]
research_perspective_description = row["Anmerkungen"]

research_perspective, created = ResearchPerspective.objects.get_or_create(
name=research_perspective_name,
defaults={"data_source": data_source},
)
research_perspective.description = research_perspective_description
research_perspective.save()

if Work.objects.filter(siglum=related_work_siglum).exists():
work_object = Work.objects.get(siglum=related_work_siglum)

create_triple(
entity_subj=work_object,
entity_obj=research_perspective,
prop=Property.objects.get(
name_forward="applies research perspective"
),
if work_with_siglum_exists(related_work_siglum):
(
research_perspective,
created,
) = ResearchPerspective.objects.get_or_create(
name=research_perspective_name,
defaults={"data_source": data_source},
)
research_perspective.description = research_perspective_description
research_perspective.save()

if Work.objects.filter(siglum=related_work_siglum).exists():
work_object = Work.objects.get(siglum=related_work_siglum)

create_triple(
entity_subj=work_object,
entity_obj=research_perspective,
prop=Property.objects.get(
name_forward="applies research perspective"
),
)
else:
print(
f"work with sigle {related_work_siglum} doesn't exist. entity import rejected"
)
Loading