From 0e630375148df7bdd13f06bd434508a6f563862b Mon Sep 17 00:00:00 2001 From: Barbara Krautgartner Date: Tue, 19 Mar 2024 14:24:07 +0000 Subject: [PATCH] refactor: adapt use of datasource for vorlass import use correct fields for data source --- apis_ontology/scripts/import_vorlass_data.py | 25 ++++++++++++-------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/apis_ontology/scripts/import_vorlass_data.py b/apis_ontology/scripts/import_vorlass_data.py index fdbdf21..e55896b 100644 --- a/apis_ontology/scripts/import_vorlass_data.py +++ b/apis_ontology/scripts/import_vorlass_data.py @@ -15,7 +15,6 @@ ) from .additional_infos import WORK_TYPES, WORKTYPE_MAPPINGS from .import_helpers import create_triple, create_source -from .create_base_entities import create_archives, create_persons, create_types from apis_ontology.scripts.access_sharepoint import import_and_parse_data fname = os.path.basename(__file__) @@ -31,7 +30,10 @@ def parse_sigle_excel(file): failure = [] df = pd.read_excel(file) - source, created = create_source(name=file, author="") + + vorlass_excel_source, created = create_source( + name="VorlassSourceExcel", file_name=os.path.basename(file), data_type="xslx" + ) df_filtered = df[(df["Werktyp"].notnull()) | (df["status"].notnull())].replace( {np.nan: None} @@ -41,7 +43,7 @@ def parse_sigle_excel(file): for index, row in df_cleaned.iterrows(): title_siglum_dict[row["Name"] + row["abgeleitet von"]] = row.to_dict() - parse_vorlass_xml(title_siglum_dict) + parse_vorlass_xml(title_siglum_dict, vorlass_excel_source) return success, failure @@ -52,8 +54,7 @@ def get_status(status): return status_choices.get(status) -def parse_vorlass_xml(title_siglum_dict): - import_name = "Vorlass_Import" +def parse_vorlass_xml(title_siglum_dict, vorlass_excel_source): b_fr = Person.objects.filter(forename="Barbara", surname="Frischmuth").exclude( data_source=None )[0] @@ -61,8 +62,6 @@ def parse_vorlass_xml(title_siglum_dict): 0 ] - source, created = create_source(import_name) - with open( f"./vorlass_data_frischmuth/04_derived_custom/Frischmuth_Vorlass_FNI-FRISCHMUTH_import-data.xml", "r", @@ -71,6 +70,12 @@ def parse_vorlass_xml(title_siglum_dict): element = ET.parse(file_obj) items = element.findall("item") + vorlass_xml_source, created = create_source( + name="VorlassSourceXML", + file_name=os.path.basename(file_obj.name), + data_type="xslx", + ) + for workelem in items: title = workelem.attrib.get("title") notes = "docx pointer: " + workelem.attrib.get("category") @@ -107,7 +112,7 @@ def parse_vorlass_xml(title_siglum_dict): siglum=siglum, progress_status=status, subtitle=subtitle, - defaults={"data_source": source}, + defaults={"data_source": vorlass_excel_source}, ) create_triple( entity_subj=b_fr, @@ -138,7 +143,7 @@ def parse_vorlass_xml(title_siglum_dict): description=description, vorlass_doc_reference=notes.replace("docx pointer: ", "")[:255], # notes=notes, - data_source=source, + data_source=vorlass_xml_source, ) create_triple( entity_subj=pho, @@ -159,7 +164,7 @@ def parse_vorlass_xml(title_siglum_dict): description=description, vorlass_doc_reference=notes.replace("docx pointer: ", "")[:255], # notes=notes, - data_source=source, + data_source=vorlass_xml_source, ) create_triple( entity_subj=archive,