From d9d785419f52f0beebb8509f0da660b3c9623006 Mon Sep 17 00:00:00 2001 From: Barbara Krautgartner Date: Tue, 19 Mar 2024 14:24:07 +0000 Subject: [PATCH 1/2] refactor: adapt use of datasource for vorlass import use correct fields for data source --- apis_ontology/scripts/import_vorlass_data.py | 25 ++++++++++++-------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/apis_ontology/scripts/import_vorlass_data.py b/apis_ontology/scripts/import_vorlass_data.py index fdbdf21..e55896b 100644 --- a/apis_ontology/scripts/import_vorlass_data.py +++ b/apis_ontology/scripts/import_vorlass_data.py @@ -15,7 +15,6 @@ ) from .additional_infos import WORK_TYPES, WORKTYPE_MAPPINGS from .import_helpers import create_triple, create_source -from .create_base_entities import create_archives, create_persons, create_types from apis_ontology.scripts.access_sharepoint import import_and_parse_data fname = os.path.basename(__file__) @@ -31,7 +30,10 @@ def parse_sigle_excel(file): failure = [] df = pd.read_excel(file) - source, created = create_source(name=file, author="") + + vorlass_excel_source, created = create_source( + name="VorlassSourceExcel", file_name=os.path.basename(file), data_type="xslx" + ) df_filtered = df[(df["Werktyp"].notnull()) | (df["status"].notnull())].replace( {np.nan: None} @@ -41,7 +43,7 @@ def parse_sigle_excel(file): for index, row in df_cleaned.iterrows(): title_siglum_dict[row["Name"] + row["abgeleitet von"]] = row.to_dict() - parse_vorlass_xml(title_siglum_dict) + parse_vorlass_xml(title_siglum_dict, vorlass_excel_source) return success, failure @@ -52,8 +54,7 @@ def get_status(status): return status_choices.get(status) -def parse_vorlass_xml(title_siglum_dict): - import_name = "Vorlass_Import" +def parse_vorlass_xml(title_siglum_dict, vorlass_excel_source): b_fr = Person.objects.filter(forename="Barbara", surname="Frischmuth").exclude( data_source=None )[0] @@ -61,8 +62,6 @@ def parse_vorlass_xml(title_siglum_dict): 0 ] - source, created = create_source(import_name) - with open( f"./vorlass_data_frischmuth/04_derived_custom/Frischmuth_Vorlass_FNI-FRISCHMUTH_import-data.xml", "r", @@ -71,6 +70,12 @@ def parse_vorlass_xml(title_siglum_dict): element = ET.parse(file_obj) items = element.findall("item") + vorlass_xml_source, created = create_source( + name="VorlassSourceXML", + file_name=os.path.basename(file_obj.name), + data_type="xslx", + ) + for workelem in items: title = workelem.attrib.get("title") notes = "docx pointer: " + workelem.attrib.get("category") @@ -107,7 +112,7 @@ def parse_vorlass_xml(title_siglum_dict): siglum=siglum, progress_status=status, subtitle=subtitle, - defaults={"data_source": source}, + defaults={"data_source": vorlass_excel_source}, ) create_triple( entity_subj=b_fr, @@ -138,7 +143,7 @@ def parse_vorlass_xml(title_siglum_dict): description=description, vorlass_doc_reference=notes.replace("docx pointer: ", "")[:255], # notes=notes, - data_source=source, + data_source=vorlass_xml_source, ) create_triple( entity_subj=pho, @@ -159,7 +164,7 @@ def parse_vorlass_xml(title_siglum_dict): description=description, vorlass_doc_reference=notes.replace("docx pointer: ", "")[:255], # notes=notes, - data_source=source, + data_source=vorlass_xml_source, ) create_triple( entity_subj=archive, From 6115fa6b0041c1c26e9e41794506da27bf76613f Mon Sep 17 00:00:00 2001 From: Barbara Krautgartner Date: Tue, 19 Mar 2024 14:40:25 +0000 Subject: [PATCH 2/2] refactor: use data source correctly provide correct field names and values --- .../scripts/import_zotero_collections.py | 24 +++---------------- 1 file changed, 3 insertions(+), 21 deletions(-) diff --git a/apis_ontology/scripts/import_zotero_collections.py b/apis_ontology/scripts/import_zotero_collections.py index 07b3ffe..591f921 100644 --- a/apis_ontology/scripts/import_zotero_collections.py +++ b/apis_ontology/scripts/import_zotero_collections.py @@ -7,7 +7,7 @@ from apis_core.apis_relations.models import Property from apis_ontology.models import Expression, Work from .additional_infos import WORK_TYPES, ZOTERO_CREATORS_MAPPING -from .utils import create_import_name, create_import_date_string, convert_year_only_date +from .utils import create_import_date_string, convert_year_only_date from .import_helpers import ( create_triple, create_source, @@ -90,18 +90,10 @@ def import_work_collections(zot, coll_id, include_subs=True): sub_ids.append(d["key"]) # check all required sub collections are present - dt_string = create_import_date_string() - import_name = create_import_name( - [ - collection_data["name"], - dt_string, - ], - import_source="Zotero", - ) for coll_id in sub_ids: imported, failed = import_items_from_collection( - zot, coll_id, include_subs=True, import_name=import_name + zot, coll_id, include_subs=True, import_name=collection_data["name"] ) success.append(imported) failure.append(failed) @@ -300,16 +292,6 @@ def import_items_from_collection(zot, coll_key, include_subs=True, import_name=N """ collection_data = get_collection_data(zot, coll_key, include_subs=include_subs) - if not import_name: - dt_string = create_import_date_string() - import_name = create_import_name( - [ - collection_data["name"], - dt_string, - ], - import_source="Zotero", - ) - success, failure = import_items(collection_data["items"], import_name) return success, failure @@ -325,7 +307,7 @@ def import_items(collection_items, import_name): success = [] failure = [] - source, created = create_source(import_name) + source, created = create_source(import_name, "", "", "", "Zotero") importable, non_importable = get_valid_collection_items(collection_items) if importable: