Skip to content

Commit

Permalink
wip #17 [skip ci]
Browse files Browse the repository at this point in the history
  • Loading branch information
csae8092 committed Jan 30, 2024
1 parent e909d71 commit 6db295b
Showing 1 changed file with 109 additions and 0 deletions.
109 changes: 109 additions & 0 deletions issue__17_dataimports_part3_works.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "d7335a42",
"metadata": {},
"outputs": [],
"source": [
"from acdh_tei_pyutils.tei import TeiReader\n",
"from acdh_tei_pyutils.utils import get_xmlid\n",
"from tqdm import tqdm\n",
"from icecream import ic\n",
"from normdata.utils import import_from_normdata"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "dd68245d",
"metadata": {},
"outputs": [],
"source": [
"source_file = \"https://raw.githubusercontent.com/hermann-bahr/bahr-index/main/tsn/listwork_TSN_1.xml\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "570aec42",
"metadata": {},
"outputs": [],
"source": [
"doc = TeiReader(source_file)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "070e76d4",
"metadata": {},
"outputs": [],
"source": [
"nsmap = doc.nsmap\n",
"bahr_url = \"https://hermanbahrtextverzeichnis/\"\n",
"bahr_domain = \"hermanbahrtextverzeichnis\"\n",
"bahr_col, _ = Collection.objects.get_or_create(name=\"Bahr Textverzeichnis\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b6d54fa4",
"metadata": {},
"outputs": [],
"source": [
"for x in tqdm(doc.any_xpath(\".//tei:bibl[@xml:id]\")):\n",
" entity = False\n",
" try:\n",
" hbtv_uri = get_xmlid(x)\n",
" except KeyError:\n",
" continue\n",
" hbtv_url = f\"{bahr_url}{hbtv_uri}\"\n",
" uri, _ = Uri.objects.get_or_create(uri=hbtv_url, domain=bahr_domain)\n",
" try:\n",
" name = x.xpath(\"./tei:title[1]/text()\", namespaces=nsmap)[0] \n",
" except IndexError:\n",
" print(hbtv_url)\n",
" continue\n",
" try:\n",
" entity = uri.entity.get_child_entity()\n",
" except:\n",
" entity = Work.objects.create(name=name)\n",
" entity.collection.add(bahr_col)\n",
" uri.entity = entity\n",
" uri.save()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "77e897d5",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Django Shell-Plus",
"language": "python",
"name": "django_extensions"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

0 comments on commit 6db295b

Please sign in to comment.