diff --git a/.flake8 b/.flake8 index f830282..7c90cdd 100644 --- a/.flake8 +++ b/.flake8 @@ -1,5 +1,5 @@ [flake8] -ignore = D203 W504 +ignore = D203 W504 W503 max-line-length = 120 exclude = */migrations, diff --git a/apis_core/apis_entities/models.py b/apis_core/apis_entities/models.py index 6e4870d..855f9d7 100644 --- a/apis_core/apis_entities/models.py +++ b/apis_core/apis_entities/models.py @@ -103,13 +103,16 @@ def create_relation_methods_from_manytomany_fields(cls): or person.get_related_person_instances() - Note that with these methods it is not necessary to differentiate between A and B entities when self-relations exist. + Note that with these methods it is not necessary to differentiate between + A and B entities when self-relations exist. The result of any such method call is the queryset of the related entities. - (And not a ManyToManyManager as is the case when calling ._set where in the case of self-relation + (And not a ManyToManyManager as is the case when calling ._set where + in the case of self-relation it must be differentiated between A and B entities, e.g. person.personA_set ) - It was not possible to my understanding to change managers in such a way that two (the A and the B) could be combined + It was not possible to my understanding to change managers in such a way that two + (the A and the B) could be combined into one manager. Hence these additional shortcut methods. :return: None @@ -121,7 +124,8 @@ def create_function_from_manytomany_field_to_other_entity( """ creates the individual method from a ManyToMany field by calling the manager's objects.all() - This method creation has to be done here in a separate method, so that it can be called once before assignment + This method creation has to be done here in a separate method, so that it can be called once + before assignment as otherwise the variable 'entity_name' in the loop below changes with each iteration and with that also the method references (due to python's "late binding"). A method call in between thus forces the content of 'entity_name' to be assigned for good to the @@ -220,7 +224,7 @@ def get_all_entity_classes(cls): :return: list of all python classes of the entities defined within this models' module """ - if cls._all_entity_classes == None: + if cls._all_entity_classes is None: entity_classes = [] entity_names = [] @@ -515,7 +519,7 @@ class Meta: verbose_name = "Person" verbose_name_plural = "Personen" ordering = [ - "id", + "-id", ] def get_tei_url(self): @@ -550,7 +554,7 @@ class Meta: verbose_name = "Ort" verbose_name_plural = "Orte" ordering = [ - "id", + "-id", ] def get_tei_url(self): @@ -577,7 +581,7 @@ class Meta: verbose_name = "Institution" verbose_name_plural = "Institutionen" ordering = [ - "id", + "-id", ] def get_tei_url(self): @@ -604,7 +608,7 @@ class Meta: verbose_name = "Ereignis" verbose_name_plural = "Ereignisse" ordering = [ - "id", + "-id", ] def get_tei_url(self): @@ -629,7 +633,7 @@ class Meta: verbose_name = "Werk" verbose_name_plural = "Werke" ordering = [ - "id", + "-id", ] def get_tei_url(self): diff --git a/apis_core/apis_relations/utils.py b/apis_core/apis_relations/utils.py index fe7e126..ddf48c5 100644 --- a/apis_core/apis_relations/utils.py +++ b/apis_core/apis_relations/utils.py @@ -143,6 +143,7 @@ class Meta: "relation_type", "start_date__year", "end_date__year", + "collection", ] return MyRelationsListFilter @@ -163,6 +164,7 @@ def __init__(self, *args, **kwargs): "relation_type", "start_date__year", "end_date__year", + "collection", ) return MyRelationsFilterFormHelper diff --git a/dumper/templates/dumper/about.html b/dumper/templates/dumper/about.html index 4cb202e..8e90ceb 100644 --- a/dumper/templates/dumper/about.html +++ b/dumper/templates/dumper/about.html @@ -35,6 +35,16 @@

DatenherkunftHermann Bahr: Tagebücher, Skizzenbücher, Notizhefte. Hg. Moritz Csáky, Mitarbeit von Lottelis Moser, Helene Zand, Lukas Mayerhofer und Kurt Ifkovits. Wien, Köln, Weimar: Böhlau 1994-2003, Link (Januar 2024) +
  • Alexander Wilfing, Christoph Landerer: Hanslick-Online, https://hanslick.acdh.oeaw.ac.at/.
    + 2.385 Entitäten (Oktober 2024)
  • +
  • Vasiliki Papadopoulou und Monika Jaroš: Brahms online, https://brahms-online.oeaw.ac.at/.
    + 687 Entitäten (Oktober 2024)
  • +
  • Schubert digital, https://schubert-digital.at/.
    + 358 Entitäten (Oktober 2024)
  • +
  • Stephan Kurz und Daniel Schopper: Kinobesuche von Clara Katharina Pollaczek und Arthur Schnitzler
    + 820 Entitäten (November 2024)
  • + +

    Die Entitäten werden kontinuierlich erweitert, durchgesehen und auch miteinander in Beziehung gesetzt.

    @@ -456,13 +466,11 @@

    URIs

    Eine Konkordanz zwischen PMB-URIs und GND-URIs kann über folgende BEACON Datei bezogen werden.

    Eine Konkordanz zwischen PMB-URIs und Wikidata-URIs kann über folgende BEACON Datei bezogen werden.

    -

    Wikipedia-Einträge werden nicht direkt verlinkt. Auf der linken Seite im Menü eines Wikipedia-Eintrags - steht ein Link auf das entsprechende Wikidata-Objekt.

    + href="{% url 'apis_core:wikidata_beacon' %}">BEACON Datei
    bezogen werden. (Über Wikidata ist auch die Verlinkung zur Wikipedia sichergestellt.)

    Konkordanz-Listen zwischen Projekt-URIS und PMB-URIs können nach diesem Schema heruntergeladen werden: https://pmb.acdh.oeaw.ac.at/apis/domain-uris/{projekt-uri-domain}, also z.B:

    Zur Personensuche

    @@ -487,8 +495,10 @@

    Berlin

  • Adressbuch Berlin
  • +
  • Deutsche Digitale Bibliothek
  • The European Library - (Internationale Zeitungen, auch Berlin.)
  • + (Internationale Zeitungen) + {% endblock %} \ No newline at end of file diff --git a/notebooks/issue__228-huge-relation-import.ipynb b/notebooks/issue__228-huge-relation-import.ipynb new file mode 100644 index 0000000..813b28e --- /dev/null +++ b/notebooks/issue__228-huge-relation-import.ipynb @@ -0,0 +1,210 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "abdba1e8-027e-4931-a35b-03aaf8aa9464", + "metadata": {}, + "outputs": [], + "source": [ + "# run against production 2024-11-06\n", + "from django.core.exceptions import ObjectDoesNotExist\n", + "from tqdm import tqdm\n", + "from dumper.utils import gsheet_to_df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cdb36196-46aa-4b7b-9871-0a8a3dd279ea", + "metadata": {}, + "outputs": [], + "source": [ + "df = gsheet_to_df(\"1-_WXbdmpLzz_9vBiZ1y9v2tle_26m4tAhPTGzkkin5E\")\n", + "col, _= Collection.objects.get_or_create(name=\"Schnitzler-Veranstaltungen\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "682d2d62-1e30-4db5-8323-c07a3f62222e", + "metadata": {}, + "outputs": [], + "source": [ + "df.head(3)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "342c4e3d-39c5-4f8b-b0b0-950cde457983", + "metadata": {}, + "outputs": [], + "source": [ + "relation_type_lookup = {\n", + " \"1049\": {\n", + " \"relation_type\":PersonWork,\n", + " \"relation\": PersonWorkRelation.objects.get(id=1049),\n", + " \"source_class\": Person,\n", + " \"target_class\": Work,\n", + " },\n", + " \"1181\": {\n", + " \"relation_type\": PersonPlace,\n", + " \"relation\": PersonPlaceRelation.objects.get(id=1181),\n", + " \"source_class\": Person,\n", + " \"target_class\": Place,\n", + " },\n", + " \"1201\": {\n", + " \"relation_type\": EventWork,\n", + " \"relation\": EventWorkRelation.objects.get(id=1201),\n", + " \"source_class\": Event,\n", + " \"target_class\": Work,\n", + " },\n", + " \"1204\": {\n", + " \"relation_type\": EventWork,\n", + " \"relation\": EventWorkRelation.objects.get(id=1204),\n", + " \"source_class\": Event,\n", + " \"target_class\": Work,\n", + " },\n", + " \"1206\": {\n", + " \"relation_type\": EventWork,\n", + " \"relation\": EventWorkRelation.objects.get(id=1206),\n", + " \"source_class\": Event,\n", + " \"target_class\": Work,\n", + " },\n", + " \"1224\": {\n", + " \"relation_type\": PersonWork,\n", + " \"relation\": PersonWorkRelation.objects.get(id=1224),\n", + " \"source_class\": Person,\n", + " \"target_class\": Work,\n", + " },\n", + " \"1252\": {\n", + " \"relation_type\": EventWork,\n", + " \"relation\": EventWorkRelation.objects.get(id=1252),\n", + " \"source_class\": Event,\n", + " \"target_class\": Work,\n", + " },\n", + " \"1256\": {\n", + " \"relation_type\": PersonEvent,\n", + " \"relation\": PersonEventRelation.objects.get(id=1256),\n", + " \"source_class\": Person,\n", + " \"target_class\": Event,\n", + " },\n", + " \"1351\": {\n", + " \"relation_type\": InstitutionEvent,\n", + " \"relation\": InstitutionEventRelation.objects.get(id=1351),\n", + " \"source_class\": Institution,\n", + " \"target_class\": Event,\n", + " },\n", + " \"1369\": {\n", + " \"relation_type\": PlaceEvent,\n", + " \"relation\": PlaceEventRelation.objects.get(id=1369),\n", + " \"source_class\": Place,\n", + " \"target_class\": Event,\n", + " },\n", + " \"1369\": {\n", + " \"relation_type\": PlaceEvent,\n", + " \"relation\": PlaceEventRelation.objects.get(id=1369),\n", + " \"source_class\": Place,\n", + " \"target_class\": Event,\n", + " },\n", + " \"1370\": {\n", + " \"relation_type\": PersonEvent,\n", + " \"relation\": PersonEventRelation.objects.get(id=1370),\n", + " \"source_class\": Person,\n", + " \"target_class\": Event,\n", + " },\n", + " \"1527\": {\n", + " \"relation_type\": EventWork,\n", + " \"relation\": EventWorkRelation.objects.get(id=1527),\n", + " \"source_class\": Event,\n", + " \"target_class\": Work,\n", + " },\n", + " \"1528\": {\n", + " \"relation_type\": EventWork,\n", + " \"relation\": EventWorkRelation.objects.get(id=1528),\n", + " \"source_class\": Event,\n", + " \"target_class\": Work,\n", + " },\n", + " \"4362\": {\n", + " \"relation_type\": EventWork,\n", + " \"relation\": EventWorkRelation.objects.get(id=4362),\n", + " \"source_class\": Event,\n", + " \"target_class\": Work,\n", + " },\n", + " \n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6748b522-2617-4436-ab9b-0c46c1867e6a", + "metadata": {}, + "outputs": [], + "source": [ + "does_not_exist = set()\n", + "for g, ndf in df.groupby(\"relation-id\"):\n", + " relation = relation_type_lookup[str(g)][\"relation\"]\n", + " relation_class = relation_type_lookup[str(g)][\"relation_type\"]\n", + " source_class = relation_type_lookup[str(g)][\"source_class\"]\n", + " target_class = relation_type_lookup[str(g)][\"target_class\"]\n", + " print(relation, relation_class, target_class)\n", + " for i, row in tqdm(ndf.iterrows(), total=len(ndf)):\n", + " try:\n", + " source = source_class.objects.get(pk=int(row[\"source_id\"]))\n", + " except (ObjectDoesNotExist, ValueError):\n", + " does_not_exist.add(row[\"source_id\"])\n", + " continue\n", + " try:\n", + " target = target_class.objects.get(pk=int(row[\"target_id\"]))\n", + " except ObjectDoesNotExist:\n", + " does_not_exist.add(row[\"target_id\"])\n", + " if isinstance(row[\"relation_start_date_written\"], str):\n", + " start_date_written = row[\"relation_start_date_written\"]\n", + " end_date_written = row[\"relation_end_date_written\"]\n", + " else:\n", + " start_date_written = None\n", + " end_date_written = None\n", + " relation_object = {\n", + " relation_class.get_related_entity_field_namea(): source,\n", + " relation_class.get_related_entity_field_nameb(): target,\n", + " \"relation_type\": relation,\n", + " \"start_date_written\": start_date_written,\n", + " \"end_date_written\": end_date_written\n", + " }\n", + " created_relation, _ = relation_class.objects.get_or_create(**relation_object)\n", + " created_relation.collection.add(col)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3193d0cb-c5b9-45cc-ad16-ba18843f3a4a", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Django Shell-Plus", + "language": "python", + "name": "django_extensions" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}