From 45c3c4cb81289892b1790a0e8b65ab55fdb3dafc Mon Sep 17 00:00:00 2001 From: csae8092 Date: Sat, 13 Jan 2024 14:36:22 +0100 Subject: [PATCH] added mgm-cmd to add gn_feature codes --- .coveragerc | 5 +- .../detail_views/place_detail.html | 2 +- crontab | 1 + .../commands/add_gn_feature_codes.py | 56 +++++++++ fetch_gn_feature_codes__issue__79.ipynb | 110 ++++++++++++++++++ 5 files changed, 171 insertions(+), 3 deletions(-) create mode 100644 dumper/management/commands/add_gn_feature_codes.py create mode 100644 fetch_gn_feature_codes__issue__79.ipynb diff --git a/.coveragerc b/.coveragerc index 4ef16d0..2d15d2e 100644 --- a/.coveragerc +++ b/.coveragerc @@ -1,4 +1,5 @@ [run] branch = True -omit = dumper/management/commands/labels_to_uris.py - \ No newline at end of file +omit = + dumper/management/commands/labels_to_uris.py + dumper/management/commands/add_gn_feature_codes.py diff --git a/apis_core/apis_entities/templates/apis_entities/detail_views/place_detail.html b/apis_core/apis_entities/templates/apis_entities/detail_views/place_detail.html index d012151..632702c 100644 --- a/apis_core/apis_entities/templates/apis_entities/detail_views/place_detail.html +++ b/apis_core/apis_entities/templates/apis_entities/detail_views/place_detail.html @@ -50,7 +50,7 @@ Ortstype - {{ object.kind }} + {% if object.kind.description %} {{ object.kind }} {{ object.kind.description }} {% else %} {{ object.kind }} {% endif %} diff --git a/crontab b/crontab index 0da0e67..9d6bcb6 100644 --- a/crontab +++ b/crontab @@ -5,4 +5,5 @@ 1 3 * * * root cd /opt/app && /usr/local/bin/python3 manage.py wikipedia_minter >> /var/log/cron.log 2>&1 30 3 * * * root cd /opt/app && /usr/local/bin/python3 manage.py wikidata_minter >> /var/log/cron.log 2>&1 1 4 * * * root cd /opt/app && /usr/local/bin/python3 manage.py dump_entities >> /var/log/cron.log 2>&1 +1 6 * * * root cd /opt/app && /usr/local/bin/python3 manage.py add_gn_feature_codes >> /var/log/cron.log 2>&1 # diff --git a/dumper/management/commands/add_gn_feature_codes.py b/dumper/management/commands/add_gn_feature_codes.py new file mode 100644 index 0000000..39b0ae8 --- /dev/null +++ b/dumper/management/commands/add_gn_feature_codes.py @@ -0,0 +1,56 @@ +import os +from tqdm import tqdm + +from datetime import datetime + +from acdh_geonames_utils.gn_client import gn_as_object +from django.conf import settings +from django.core.management.base import BaseCommand + +from apis_core.apis_entities.models import Place +from apis_core.apis_vocabularies.models import PlaceType +from dumper.utils import write_report + + +class Command(BaseCommand): + help = "adds geonames feature codes to places with geoname uris" + + def handle(self, *args, **kwargs): + start_time = datetime.now().strftime(settings.PMB_TIME_PATTERN) + print("start adding geoname feature codes") + cols = ["id", "uri__domain", "uri__uri", "kind__description"] + places = ( + Place.objects.filter(uri__domain__icontains="geonames") + .exclude(kind__description__icontains="geonames") + .values_list(*cols) + ) + places.count() + for x in tqdm(places[:250]): + place = Place.objects.get(id=x[0]) + gn_uri = x[2] + try: + gn_obj = gn_as_object(gn_uri) + except: # noqa + gn_obj = {} + gn_obj["feature code"] = "kein passender Code gefunden" + code = gn_obj["feature code"] + try: + place_type, _ = PlaceType.objects.get_or_create(name=code) + except: # noqa + place_type = ( + PlaceType.objects.filter(name=code) + .exclude(description=None) + .first() + ) + place.kind = place_type + place.save() + places = ( + Place.objects.filter(uri__domain__icontains="geonames") + .exclude(kind__description__icontains="geonames") + .values_list(*cols) + ) + places.count() + print(place.id) + end_time = datetime.now().strftime(settings.PMB_TIME_PATTERN) + report = [os.path.basename(__file__), start_time, end_time] + write_report(report) diff --git a/fetch_gn_feature_codes__issue__79.ipynb b/fetch_gn_feature_codes__issue__79.ipynb new file mode 100644 index 0000000..eef6a95 --- /dev/null +++ b/fetch_gn_feature_codes__issue__79.ipynb @@ -0,0 +1,110 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "080b69c6", + "metadata": {}, + "source": [ + "# fetching gnd feature code descriptions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "85431463", + "metadata": {}, + "outputs": [], + "source": [ + "from acdh_tei_pyutils.tei import TeiReader\n", + "from acdh_tei_pyutils.utils import extract_fulltext\n", + "from tqdm import tqdm" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3ac05c4", + "metadata": {}, + "outputs": [], + "source": [ + "nsmap = {\n", + " \"gn\": \"https://www.geonames.org/ontology#\",\n", + " \"skos\": \"http://www.w3.org/2004/02/skos/core#\",\n", + " \"rdf\": \"http://www.w3.org/1999/02/22-rdf-syntax-ns#\"\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b393e39d", + "metadata": {}, + "outputs": [], + "source": [ + "doc = TeiReader(\"https://www.geonames.org/ontology/ontology_v3.3.rdf\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "00e1887c", + "metadata": {}, + "outputs": [], + "source": [ + "place_types = PlaceType.objects.filter(name__icontains=\"http\")\n", + "for x in tqdm(place_types):\n", + " name = x.name.split('#')[-1]\n", + " x.name = name\n", + " x.save() " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "04d1cd5f", + "metadata": {}, + "outputs": [], + "source": [ + "place_types = PlaceType.objects.exclude(name__icontains=\"(\").filter(name__icontains=\".\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ab9370d4", + "metadata": {}, + "outputs": [], + "source": [ + "for x in tqdm(place_types):\n", + " xpath_expr = f'.//gn:Code[@rdf:about=\"#{x.name}\"]/skos:definition[@xml:lang=\"en\"]'\n", + " try:\n", + " description = doc.tree.xpath(xpath_expr, namespaces=nsmap)[0]\n", + " except IndexError:\n", + " continue\n", + " x.description = f\"{extract_fulltext(description).title()}. Quelle: https://www.geonames.org/ontology/ontology_v3.3.rdf\"\n", + " x.save()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Django Shell-Plus", + "language": "python", + "name": "django_extensions" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}