diff --git a/issue__17_dataimports_part3_works.ipynb b/issue__17_dataimports_part3_works.ipynb new file mode 100644 index 0000000..bbdb962 --- /dev/null +++ b/issue__17_dataimports_part3_works.ipynb @@ -0,0 +1,109 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "d7335a42", + "metadata": {}, + "outputs": [], + "source": [ + "from acdh_tei_pyutils.tei import TeiReader\n", + "from acdh_tei_pyutils.utils import get_xmlid\n", + "from tqdm import tqdm\n", + "from icecream import ic\n", + "from normdata.utils import import_from_normdata" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dd68245d", + "metadata": {}, + "outputs": [], + "source": [ + "source_file = \"https://raw.githubusercontent.com/hermann-bahr/bahr-index/main/tsn/listwork_TSN_1.xml\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "570aec42", + "metadata": {}, + "outputs": [], + "source": [ + "doc = TeiReader(source_file)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "070e76d4", + "metadata": {}, + "outputs": [], + "source": [ + "nsmap = doc.nsmap\n", + "bahr_url = \"https://hermanbahrtextverzeichnis/\"\n", + "bahr_domain = \"hermanbahrtextverzeichnis\"\n", + "bahr_col, _ = Collection.objects.get_or_create(name=\"Bahr Textverzeichnis\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b6d54fa4", + "metadata": {}, + "outputs": [], + "source": [ + "for x in tqdm(doc.any_xpath(\".//tei:bibl[@xml:id]\")):\n", + " entity = False\n", + " try:\n", + " hbtv_uri = get_xmlid(x)\n", + " except KeyError:\n", + " continue\n", + " hbtv_url = f\"{bahr_url}{hbtv_uri}\"\n", + " uri, _ = Uri.objects.get_or_create(uri=hbtv_url, domain=bahr_domain)\n", + " try:\n", + " name = x.xpath(\"./tei:title[1]/text()\", namespaces=nsmap)[0] \n", + " except IndexError:\n", + " print(hbtv_url)\n", + " continue\n", + " try:\n", + " entity = uri.entity.get_child_entity()\n", + " except:\n", + " entity = Work.objects.create(name=name)\n", + " entity.collection.add(bahr_col)\n", + " uri.entity = entity\n", + " uri.save()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77e897d5", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Django Shell-Plus", + "language": "python", + "name": "django_extensions" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}