From d9d785419f52f0beebb8509f0da660b3c9623006 Mon Sep 17 00:00:00 2001
From: Barbara Krautgartner <barbara.krautgartner@oeaw.ac.at>
Date: Tue, 19 Mar 2024 14:24:07 +0000
Subject: [PATCH 1/2] refactor: adapt use of datasource for vorlass import

use correct fields for data source
---
 apis_ontology/scripts/import_vorlass_data.py | 25 ++++++++++++--------
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/apis_ontology/scripts/import_vorlass_data.py b/apis_ontology/scripts/import_vorlass_data.py
index fdbdf21..e55896b 100644
--- a/apis_ontology/scripts/import_vorlass_data.py
+++ b/apis_ontology/scripts/import_vorlass_data.py
@@ -15,7 +15,6 @@
 )
 from .additional_infos import WORK_TYPES, WORKTYPE_MAPPINGS
 from .import_helpers import create_triple, create_source
-from .create_base_entities import create_archives, create_persons, create_types
 from apis_ontology.scripts.access_sharepoint import import_and_parse_data
 
 fname = os.path.basename(__file__)
@@ -31,7 +30,10 @@ def parse_sigle_excel(file):
     failure = []
 
     df = pd.read_excel(file)
-    source, created = create_source(name=file, author="")
+
+    vorlass_excel_source, created = create_source(
+        name="VorlassSourceExcel", file_name=os.path.basename(file), data_type="xslx"
+    )
 
     df_filtered = df[(df["Werktyp"].notnull()) | (df["status"].notnull())].replace(
         {np.nan: None}
@@ -41,7 +43,7 @@ def parse_sigle_excel(file):
 
     for index, row in df_cleaned.iterrows():
         title_siglum_dict[row["Name"] + row["abgeleitet von"]] = row.to_dict()
-    parse_vorlass_xml(title_siglum_dict)
+    parse_vorlass_xml(title_siglum_dict, vorlass_excel_source)
     return success, failure
 
 
@@ -52,8 +54,7 @@ def get_status(status):
     return status_choices.get(status)
 
 
-def parse_vorlass_xml(title_siglum_dict):
-    import_name = "Vorlass_Import"
+def parse_vorlass_xml(title_siglum_dict, vorlass_excel_source):
     b_fr = Person.objects.filter(forename="Barbara", surname="Frischmuth").exclude(
         data_source=None
     )[0]
@@ -61,8 +62,6 @@ def parse_vorlass_xml(title_siglum_dict):
         0
     ]
 
-    source, created = create_source(import_name)
-
     with open(
         f"./vorlass_data_frischmuth/04_derived_custom/Frischmuth_Vorlass_FNI-FRISCHMUTH_import-data.xml",
         "r",
@@ -71,6 +70,12 @@ def parse_vorlass_xml(title_siglum_dict):
         element = ET.parse(file_obj)
         items = element.findall("item")
 
+        vorlass_xml_source, created = create_source(
+            name="VorlassSourceXML",
+            file_name=os.path.basename(file_obj.name),
+            data_type="xslx",
+        )
+
         for workelem in items:
             title = workelem.attrib.get("title")
             notes = "docx pointer: " + workelem.attrib.get("category")
@@ -107,7 +112,7 @@ def parse_vorlass_xml(title_siglum_dict):
                     siglum=siglum,
                     progress_status=status,
                     subtitle=subtitle,
-                    defaults={"data_source": source},
+                    defaults={"data_source": vorlass_excel_source},
                 )
                 create_triple(
                     entity_subj=b_fr,
@@ -138,7 +143,7 @@ def parse_vorlass_xml(title_siglum_dict):
                         description=description,
                         vorlass_doc_reference=notes.replace("docx pointer: ", "")[:255],
                         # notes=notes,
-                        data_source=source,
+                        data_source=vorlass_xml_source,
                     )
                     create_triple(
                         entity_subj=pho,
@@ -159,7 +164,7 @@ def parse_vorlass_xml(title_siglum_dict):
                         description=description,
                         vorlass_doc_reference=notes.replace("docx pointer: ", "")[:255],
                         # notes=notes,
-                        data_source=source,
+                        data_source=vorlass_xml_source,
                     )
                     create_triple(
                         entity_subj=archive,

From 6115fa6b0041c1c26e9e41794506da27bf76613f Mon Sep 17 00:00:00 2001
From: Barbara Krautgartner <barbara.krautgartner@oeaw.ac.at>
Date: Tue, 19 Mar 2024 14:40:25 +0000
Subject: [PATCH 2/2] refactor: use data source correctly

provide correct field names and values
---
 .../scripts/import_zotero_collections.py      | 24 +++----------------
 1 file changed, 3 insertions(+), 21 deletions(-)

diff --git a/apis_ontology/scripts/import_zotero_collections.py b/apis_ontology/scripts/import_zotero_collections.py
index 07b3ffe..591f921 100644
--- a/apis_ontology/scripts/import_zotero_collections.py
+++ b/apis_ontology/scripts/import_zotero_collections.py
@@ -7,7 +7,7 @@
 from apis_core.apis_relations.models import Property
 from apis_ontology.models import Expression, Work
 from .additional_infos import WORK_TYPES, ZOTERO_CREATORS_MAPPING
-from .utils import create_import_name, create_import_date_string, convert_year_only_date
+from .utils import create_import_date_string, convert_year_only_date
 from .import_helpers import (
     create_triple,
     create_source,
@@ -90,18 +90,10 @@ def import_work_collections(zot, coll_id, include_subs=True):
                     sub_ids.append(d["key"])
 
             # check all required sub collections are present
-            dt_string = create_import_date_string()
-            import_name = create_import_name(
-                [
-                    collection_data["name"],
-                    dt_string,
-                ],
-                import_source="Zotero",
-            )
 
             for coll_id in sub_ids:
                 imported, failed = import_items_from_collection(
-                    zot, coll_id, include_subs=True, import_name=import_name
+                    zot, coll_id, include_subs=True, import_name=collection_data["name"]
                 )
                 success.append(imported)
                 failure.append(failed)
@@ -300,16 +292,6 @@ def import_items_from_collection(zot, coll_key, include_subs=True, import_name=N
     """
     collection_data = get_collection_data(zot, coll_key, include_subs=include_subs)
 
-    if not import_name:
-        dt_string = create_import_date_string()
-        import_name = create_import_name(
-            [
-                collection_data["name"],
-                dt_string,
-            ],
-            import_source="Zotero",
-        )
-
     success, failure = import_items(collection_data["items"], import_name)
 
     return success, failure
@@ -325,7 +307,7 @@ def import_items(collection_items, import_name):
     success = []
     failure = []
 
-    source, created = create_source(import_name)
+    source, created = create_source(import_name, "", "", "", "Zotero")
     importable, non_importable = get_valid_collection_items(collection_items)
 
     if importable: