Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bk/refactor/use of datasource #39

Merged
merged 2 commits into from
Mar 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 15 additions & 10 deletions apis_ontology/scripts/import_vorlass_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
)
from .additional_infos import WORK_TYPES, WORKTYPE_MAPPINGS
from .import_helpers import create_triple, create_source
from .create_base_entities import create_archives, create_persons, create_types
from apis_ontology.scripts.access_sharepoint import import_and_parse_data

fname = os.path.basename(__file__)
Expand All @@ -31,7 +30,10 @@ def parse_sigle_excel(file):
failure = []

df = pd.read_excel(file)
source, created = create_source(name=file, author="")

vorlass_excel_source, created = create_source(
name="VorlassSourceExcel", file_name=os.path.basename(file), data_type="xslx"
)

df_filtered = df[(df["Werktyp"].notnull()) | (df["status"].notnull())].replace(
{np.nan: None}
Expand All @@ -41,7 +43,7 @@ def parse_sigle_excel(file):

for index, row in df_cleaned.iterrows():
title_siglum_dict[row["Name"] + row["abgeleitet von"]] = row.to_dict()
parse_vorlass_xml(title_siglum_dict)
parse_vorlass_xml(title_siglum_dict, vorlass_excel_source)
return success, failure


Expand All @@ -52,17 +54,14 @@ def get_status(status):
return status_choices.get(status)


def parse_vorlass_xml(title_siglum_dict):
import_name = "Vorlass_Import"
def parse_vorlass_xml(title_siglum_dict, vorlass_excel_source):
b_fr = Person.objects.filter(forename="Barbara", surname="Frischmuth").exclude(
data_source=None
)[0]
archive = Archive.objects.filter(name="Franz-Nabl-Institut für Literaturforschung")[
0
]

source, created = create_source(import_name)

with open(
f"./vorlass_data_frischmuth/04_derived_custom/Frischmuth_Vorlass_FNI-FRISCHMUTH_import-data.xml",
"r",
Expand All @@ -71,6 +70,12 @@ def parse_vorlass_xml(title_siglum_dict):
element = ET.parse(file_obj)
items = element.findall("item")

vorlass_xml_source, created = create_source(
name="VorlassSourceXML",
file_name=os.path.basename(file_obj.name),
data_type="xslx",
)

for workelem in items:
title = workelem.attrib.get("title")
notes = "docx pointer: " + workelem.attrib.get("category")
Expand Down Expand Up @@ -107,7 +112,7 @@ def parse_vorlass_xml(title_siglum_dict):
siglum=siglum,
progress_status=status,
subtitle=subtitle,
defaults={"data_source": source},
defaults={"data_source": vorlass_excel_source},
)
create_triple(
entity_subj=b_fr,
Expand Down Expand Up @@ -138,7 +143,7 @@ def parse_vorlass_xml(title_siglum_dict):
description=description,
vorlass_doc_reference=notes.replace("docx pointer: ", "")[:255],
# notes=notes,
data_source=source,
data_source=vorlass_xml_source,
)
create_triple(
entity_subj=pho,
Expand All @@ -159,7 +164,7 @@ def parse_vorlass_xml(title_siglum_dict):
description=description,
vorlass_doc_reference=notes.replace("docx pointer: ", "")[:255],
# notes=notes,
data_source=source,
data_source=vorlass_xml_source,
)
create_triple(
entity_subj=archive,
Expand Down
24 changes: 3 additions & 21 deletions apis_ontology/scripts/import_zotero_collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from apis_core.apis_relations.models import Property
from apis_ontology.models import Expression, Work
from .additional_infos import WORK_TYPES, ZOTERO_CREATORS_MAPPING
from .utils import create_import_name, create_import_date_string, convert_year_only_date
from .utils import create_import_date_string, convert_year_only_date
from .import_helpers import (
create_triple,
create_source,
Expand Down Expand Up @@ -90,18 +90,10 @@ def import_work_collections(zot, coll_id, include_subs=True):
sub_ids.append(d["key"])

# check all required sub collections are present
dt_string = create_import_date_string()
import_name = create_import_name(
[
collection_data["name"],
dt_string,
],
import_source="Zotero",
)

for coll_id in sub_ids:
imported, failed = import_items_from_collection(
zot, coll_id, include_subs=True, import_name=import_name
zot, coll_id, include_subs=True, import_name=collection_data["name"]
)
success.append(imported)
failure.append(failed)
Expand Down Expand Up @@ -300,16 +292,6 @@ def import_items_from_collection(zot, coll_key, include_subs=True, import_name=N
"""
collection_data = get_collection_data(zot, coll_key, include_subs=include_subs)

if not import_name:
dt_string = create_import_date_string()
import_name = create_import_name(
[
collection_data["name"],
dt_string,
],
import_source="Zotero",
)

success, failure = import_items(collection_data["items"], import_name)

return success, failure
Expand All @@ -325,7 +307,7 @@ def import_items(collection_items, import_name):
success = []
failure = []

source, created = create_source(import_name)
source, created = create_source(import_name, "", "", "", "Zotero")
importable, non_importable = get_valid_collection_items(collection_items)

if importable:
Expand Down
Loading