Skip to content

Commit

Permalink
refactor: adapt use of datasource for vorlass import
Browse files Browse the repository at this point in the history
use correct fields for data source
  • Loading branch information
babslgam authored and koeaw committed Mar 19, 2024
1 parent 3402d26 commit 0e63037
Showing 1 changed file with 15 additions and 10 deletions.
25 changes: 15 additions & 10 deletions apis_ontology/scripts/import_vorlass_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
)
from .additional_infos import WORK_TYPES, WORKTYPE_MAPPINGS
from .import_helpers import create_triple, create_source
from .create_base_entities import create_archives, create_persons, create_types
from apis_ontology.scripts.access_sharepoint import import_and_parse_data

fname = os.path.basename(__file__)
Expand All @@ -31,7 +30,10 @@ def parse_sigle_excel(file):
failure = []

df = pd.read_excel(file)
source, created = create_source(name=file, author="")

vorlass_excel_source, created = create_source(
name="VorlassSourceExcel", file_name=os.path.basename(file), data_type="xslx"
)

df_filtered = df[(df["Werktyp"].notnull()) | (df["status"].notnull())].replace(
{np.nan: None}
Expand All @@ -41,7 +43,7 @@ def parse_sigle_excel(file):

for index, row in df_cleaned.iterrows():
title_siglum_dict[row["Name"] + row["abgeleitet von"]] = row.to_dict()
parse_vorlass_xml(title_siglum_dict)
parse_vorlass_xml(title_siglum_dict, vorlass_excel_source)
return success, failure


Expand All @@ -52,17 +54,14 @@ def get_status(status):
return status_choices.get(status)


def parse_vorlass_xml(title_siglum_dict):
import_name = "Vorlass_Import"
def parse_vorlass_xml(title_siglum_dict, vorlass_excel_source):
b_fr = Person.objects.filter(forename="Barbara", surname="Frischmuth").exclude(
data_source=None
)[0]
archive = Archive.objects.filter(name="Franz-Nabl-Institut für Literaturforschung")[
0
]

source, created = create_source(import_name)

with open(
f"./vorlass_data_frischmuth/04_derived_custom/Frischmuth_Vorlass_FNI-FRISCHMUTH_import-data.xml",
"r",
Expand All @@ -71,6 +70,12 @@ def parse_vorlass_xml(title_siglum_dict):
element = ET.parse(file_obj)
items = element.findall("item")

vorlass_xml_source, created = create_source(
name="VorlassSourceXML",
file_name=os.path.basename(file_obj.name),
data_type="xslx",
)

for workelem in items:
title = workelem.attrib.get("title")
notes = "docx pointer: " + workelem.attrib.get("category")
Expand Down Expand Up @@ -107,7 +112,7 @@ def parse_vorlass_xml(title_siglum_dict):
siglum=siglum,
progress_status=status,
subtitle=subtitle,
defaults={"data_source": source},
defaults={"data_source": vorlass_excel_source},
)
create_triple(
entity_subj=b_fr,
Expand Down Expand Up @@ -138,7 +143,7 @@ def parse_vorlass_xml(title_siglum_dict):
description=description,
vorlass_doc_reference=notes.replace("docx pointer: ", "")[:255],
# notes=notes,
data_source=source,
data_source=vorlass_xml_source,
)
create_triple(
entity_subj=pho,
Expand All @@ -159,7 +164,7 @@ def parse_vorlass_xml(title_siglum_dict):
description=description,
vorlass_doc_reference=notes.replace("docx pointer: ", "")[:255],
# notes=notes,
data_source=source,
data_source=vorlass_xml_source,
)
create_triple(
entity_subj=archive,
Expand Down

0 comments on commit 0e63037

Please sign in to comment.