From 95d21f84f6cb9816fb9fb0baba3efca4c673c465 Mon Sep 17 00:00:00 2001 From: Birger Schacht Date: Wed, 13 Dec 2023 10:25:12 +0100 Subject: [PATCH] feat: add professioncategory and refactor professionimport to split --- apis_ontology/admin.py | 4 +- apis_ontology/management/commands/import.py | 92 ++++++++++++------- ...egory_alter_profession_options_and_more.py | 44 +++++++++ apis_ontology/models.py | 17 +++- 4 files changed, 120 insertions(+), 37 deletions(-) create mode 100644 apis_ontology/migrations/0012_professioncategory_alter_profession_options_and_more.py diff --git a/apis_ontology/admin.py b/apis_ontology/admin.py index 0d18f78..6578e96 100644 --- a/apis_ontology/admin.py +++ b/apis_ontology/admin.py @@ -1,9 +1,9 @@ from django.contrib import admin from reversion.admin import VersionAdmin -from .models import Title, Event, Institution, Person, Place, Work, Source, Text, Profession +from .models import Title, Event, Institution, Person, Place, Work, Source, Text, Profession, ProfessionCategory -@admin.register(Title, Event, Institution, Person, Place, Work, Source, Text, Profession) +@admin.register(Title, Event, Institution, Person, Place, Work, Source, Text, Profession, ProfessionCategory) class OeblAdmin(VersionAdmin): pass diff --git a/apis_ontology/management/commands/import.py b/apis_ontology/management/commands/import.py index 1faa3fe..dcb7101 100644 --- a/apis_ontology/management/commands/import.py +++ b/apis_ontology/management/commands/import.py @@ -1,9 +1,10 @@ +import re import requests import os from django.core.management.base import BaseCommand -from apis_ontology.models import Event, Institution, Person, Place, Work, Title, Profession, Source, Text +from apis_ontology.models import Event, Institution, Person, Place, Work, Title, Profession, Source, Text, ProfessionCategory from apis_core.apis_metainfo.models import Uri, RootObject from apis_core.apis_relations.models import Property, TempTriple @@ -148,36 +149,38 @@ def import_professions(): data = page.json() nextpage = data['next'] for result in data["results"]: - print(result["url"]) - newprofession, created = Profession.objects.get_or_create(id=result["id"]) + tokens = re.split(r" und |,", result["name"]) + for pos, token in enumerate(tokens): + if token.startswith("-"): + token = tokens[pos-1] + token + profession, created = Profession.objects.get_or_create(name=token.strip()) + if profession.oldids: + existing = set(profession.oldids.splitlines()) + existing.add(str(result["id"])) + profession.oldids = '\n'.join(list(existing)) + else: + profession.oldids = result["id"] + if profession.oldnames: + existing = set(profession.oldnames.splitlines()) + existing.add(str(result["name"])) + profession.oldnames = '\n'.join(list(existing)) + else: + profession.oldnames = result["name"] + for attribute in result: + if hasattr(profession, attribute) and attribute not in ["name", "id"]: + setattr(profession, attribute, result[attribute]) + profession.save() if result["parent_class"]: - newprofession.parent = Profession.objects.get(pk=result["parent_class"]["id"]) - for attribute in result: - if hasattr(newprofession, attribute): - setattr(newprofession, attribute, result[attribute]) - newprofession.save() + professioncat, created = ProfessionCategory.objects.get_or_create(id=result["parent_class"]["id"]) + professioncat.name = result["parent_class"]["label"] + professioncat.save() -def import_entities(): - entities = { - "event": { - "dst": Event - }, - "institution": { - "dst": Institution, - }, - "person": { - "dst": Person, - }, - "place": { - "dst": Place, - }, - "work": { - "dst": Work, - } - } +def import_entities(entities=[]): + entities = entities or [Event, Institution, Person, Place, Work] - for entity, entitysettings in entities.items(): + for entitymodel in entities: + entity = entitymodel.__name__.lower() nextpage = f"{SRC}/entities/{entity}/?format=json&limit=500" while nextpage: print(nextpage) @@ -190,12 +193,15 @@ def import_entities(): if "kind" in result and result["kind"] is not None: result["kind"] = result["kind"]["label"] professionlist = [] + professioncategory = None if "profession" in result: for profession in result["profession"]: - try: - professionlist.append(Profession.objects.get(pk=profession["id"])) - except Profession.DoesNotExist: - pass + if int(profession["id"]) in list(ProfessionCategory.objects.all().values_list('id', flat=True)): + professioncategory = ProfessionCategory.objects.get(id=profession["id"]) + else: + for dbprofession in Profession.objects.all(): + if profession["id"] in list(map(int, dbprofession.oldids.splitlines())): + professionlist.append(dbprofession) del result["profession"] titlelist = [] if "title" in result: @@ -203,7 +209,7 @@ def import_entities(): newtitle, created = Title.objects.get_or_create(name=title) titlelist.append(newtitle) del result["title"] - newentity, created = entitysettings["dst"].objects.get_or_create(pk=result_id) + newentity, created = entitymodel.objects.get_or_create(pk=result_id) for attribute in result: if hasattr(newentity, attribute): setattr(newentity, attribute, result[attribute]) @@ -211,6 +217,8 @@ def import_entities(): newentity.title.add(title) for profession in professionlist: newentity.profession.add(profession) + if professioncategory: + newentity.professioncategory = professioncategory newentity.save() if result["source"] is not None: if "id" in result["source"]: @@ -306,6 +314,11 @@ def add_arguments(self, parser): parser.add_argument("--relations", action="store_true") parser.add_argument("--sources", action="store_true") parser.add_argument("--texts", action="store_true") + parser.add_argument("--event", action="store_true") + parser.add_argument("--institution", action="store_true") + parser.add_argument("--person", action="store_true") + parser.add_argument("--place", action="store_true") + parser.add_argument("--work", action="store_true") def handle(self, *args, **options): if options["all"]: @@ -328,8 +341,21 @@ def handle(self, *args, **options): if options["uris"]: import_uris() + entities = [] + + if options["event"]: + entities.append(Event) + if options["institution"]: + entities.append(Institution) + if options["person"]: + entities.append(Person) + if options["place"]: + entities.append(Place) + if options["work"]: + entities.append(Work) + if options["entities"]: - import_entities() + import_entities(entities) if options["relations"]: import_relations() diff --git a/apis_ontology/migrations/0012_professioncategory_alter_profession_options_and_more.py b/apis_ontology/migrations/0012_professioncategory_alter_profession_options_and_more.py new file mode 100644 index 0000000..6f0cd79 --- /dev/null +++ b/apis_ontology/migrations/0012_professioncategory_alter_profession_options_and_more.py @@ -0,0 +1,44 @@ +# Generated by Django 4.2.8 on 2023-12-18 11:54 + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ('apis_ontology', '0011_unaccent_20231214_0830'), + ] + + operations = [ + migrations.CreateModel( + name='ProfessionCategory', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('name', models.CharField(max_length=255)), + ], + ), + migrations.AlterModelOptions( + name='profession', + options={'ordering': ('name',)}, + ), + migrations.RemoveField( + model_name='profession', + name='parent', + ), + migrations.AddField( + model_name='profession', + name='oldids', + field=models.TextField(null=True), + ), + migrations.AddField( + model_name='profession', + name='oldnames', + field=models.TextField(null=True), + ), + migrations.AddField( + model_name='person', + name='professioncategory', + field=models.ForeignKey(null=True, on_delete=django.db.models.deletion.CASCADE, to='apis_ontology.professioncategory'), + ), + ] diff --git a/apis_ontology/models.py b/apis_ontology/models.py index b257526..54c0ce9 100644 --- a/apis_ontology/models.py +++ b/apis_ontology/models.py @@ -48,13 +48,25 @@ def __str__(self): return self.name +@reversion.register +class ProfessionCategory(models.Model): + name = models.CharField(max_length=255, blank=False) + + def __str__(self): + return self.name + + @reversion.register class Profession(models.Model): + class Meta: + ordering = ("name",) + name = models.CharField(max_length=255, blank=True) - parent = models.ForeignKey('self', blank=True, null=True, on_delete=models.CASCADE) + oldids = models.TextField(null=True) + oldnames = models.TextField(null=True) def __str__(self): - return self.name + return self.name or f"No name ({self.id})" @reversion.register(follow=["rootobject_ptr"]) @@ -76,6 +88,7 @@ class Person(LegacyStuffMixin, LegacyDateMixin, AbstractEntity): ) first_name = models.CharField(max_length=255, help_text="The personsĀ“s forename. In case of more then one name...", blank=True, null=True) profession = models.ManyToManyField(Profession, blank=True) + professioncategory = models.ForeignKey(ProfessionCategory, on_delete=models.CASCADE, null=True) title = models.ManyToManyField(Title, blank=True) gender = models.CharField(max_length=15, choices=GENDER_CHOICES, blank=True, null=True)