From 293abf729dfcd7057c0d74a03050fd4152745392 Mon Sep 17 00:00:00 2001 From: Nick Jackson Date: Tue, 30 May 2023 11:44:08 +0100 Subject: [PATCH 1/4] Add court dates to admin interface We should never actually touch these via admin, but it might be handy to see for debugging purposes so I'm including anyway. --- judgments/admin.py | 9 +++++++-- judgments/models.py | 4 ++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/judgments/admin.py b/judgments/admin.py index 4185d360e..51e277a35 100644 --- a/judgments/admin.py +++ b/judgments/admin.py @@ -1,3 +1,8 @@ -# from django.contrib import admin +from django.contrib import admin -# Register your models here. +from judgments.models import CourtDates + + +@admin.register(CourtDates) +class CourtDatesAdmin(admin.ModelAdmin): + pass diff --git a/judgments/models.py b/judgments/models.py index 0b71719e9..f219c486f 100644 --- a/judgments/models.py +++ b/judgments/models.py @@ -142,6 +142,10 @@ class Meta: class CourtDates(models.Model): + class Meta: + verbose_name = "court date range" + verbose_name_plural = "court date ranges" + param = models.CharField(max_length=64, primary_key=True) start_year = models.IntegerField(blank=False) end_year = models.IntegerField(blank=False) From 197bb23675ba0391a451bc5adfcb6ed4758ab0c4 Mon Sep 17 00:00:00 2001 From: Nick Jackson Date: Wed, 31 May 2023 12:03:58 +0100 Subject: [PATCH 2/4] Add management command to recalculate court date ranges --- judgments/management/__init__.py | 0 judgments/management/commands/__init__.py | 0 .../commands/recalculate_court_dates.py | 106 ++++++++++++++++++ 3 files changed, 106 insertions(+) create mode 100644 judgments/management/__init__.py create mode 100644 judgments/management/commands/__init__.py create mode 100644 judgments/management/commands/recalculate_court_dates.py diff --git a/judgments/management/__init__.py b/judgments/management/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/judgments/management/commands/__init__.py b/judgments/management/commands/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/judgments/management/commands/recalculate_court_dates.py b/judgments/management/commands/recalculate_court_dates.py new file mode 100644 index 000000000..75b9bf5dd --- /dev/null +++ b/judgments/management/commands/recalculate_court_dates.py @@ -0,0 +1,106 @@ +import datetime + +from django.core.management.base import BaseCommand +from ds_caselaw_utils import courts + +from judgments.models import CourtDates, SearchResult +from judgments.utils import perform_advanced_search + + +class Command(BaseCommand): + help = "Recalculates the date ranges for known courts" + + def add_arguments(self, parser): + # Named (optional) arguments + parser.add_argument( + "--write", + action="store_true", + help="Write changes to the database", + ) + + def handle(self, *args, **options): + CourtDates.objects.all().delete() + for court in courts.get_all(): + self.stdout.write(self.style.NOTICE(f"{court.name}")) + if court.canonical_param: + search_results = perform_advanced_search( + court=court.canonical_param, + order="date", + per_page=1, + ) + + oldest_document = SearchResult.create_from_node( + search_results.results[0] + ) + + if oldest_document.date: + start_year = oldest_document.date.year + self.stdout.write( + self.style.NOTICE( + f"Oldest document: {oldest_document.uri} @ {oldest_document.date.year}" + ) + ) + else: + start_year = court.start_year + self.stdout.write( + self.style.WARNING( + f"Couldn't find date of oldest document {oldest_document.uri},\ + falling back to config value of {court.start_year}" + ) + ) + + search_results = perform_advanced_search( + court=court.canonical_param, + order="-date", + per_page=1, + ) + + newest_document = SearchResult.create_from_node( + search_results.results[0] + ) + + if newest_document.date: + end_year = newest_document.date.year + self.stdout.write( + self.style.NOTICE( + f"Newest document: {newest_document.uri} @ {newest_document.date.year}" + ) + ) + else: + end_year = court.end_year + self.stdout.write( + self.style.WARNING( + f"Couldn't find date of newest document {newest_document.uri},\ + falling back to config value of {court.end_year}" + ) + ) + + if start_year < 2000: + self.stdout.write( + self.style.WARNING( + f"Calculated start year of {start_year} seems improbable,\ + falling back to config value of {court.start_year}" + ) + ) + start_year = court.start_year + + if end_year > datetime.date.today().year: + self.stdout.write( + self.style.WARNING( + f"Calculated end year of {end_year} is impossible,\ + falling back to config value of {court.end_year}" + ) + ) + end_year = court.end_year + + if options["write"]: + CourtDates.objects.update_or_create( + param=court.canonical_param, + defaults={"start_year": start_year, "end_year": end_year}, + ) + else: + self.stdout.write(self.style.NOTICE("Skipping write…")) + else: + self.stdout.write( + self.style.ERROR(f"{court.name} has no canonical_param! Skipping.") + ) From 08b28879d75847733cf8d8600100fa7c9356a162 Mon Sep 17 00:00:00 2001 From: Nick Jackson Date: Wed, 31 May 2023 14:46:07 +0100 Subject: [PATCH 3/4] Refactor into smaller methods to improve clarity --- .../commands/recalculate_court_dates.py | 144 +++++++++--------- 1 file changed, 70 insertions(+), 74 deletions(-) diff --git a/judgments/management/commands/recalculate_court_dates.py b/judgments/management/commands/recalculate_court_dates.py index 75b9bf5dd..02769a05f 100644 --- a/judgments/management/commands/recalculate_court_dates.py +++ b/judgments/management/commands/recalculate_court_dates.py @@ -22,85 +22,81 @@ def handle(self, *args, **options): CourtDates.objects.all().delete() for court in courts.get_all(): self.stdout.write(self.style.NOTICE(f"{court.name}")) - if court.canonical_param: - search_results = perform_advanced_search( - court=court.canonical_param, - order="date", - per_page=1, - ) - oldest_document = SearchResult.create_from_node( - search_results.results[0] + if not court.canonical_param: + self.stdout.write( + self.style.ERROR(f"{court.name} has no canonical_param! Skipping.") ) + continue + + start_year = self.get_start_year(court) + end_year = self.get_end_year(court) + + if not options["write"]: + self.stdout.write(self.style.NOTICE("Skipping write…")) + continue - if oldest_document.date: - start_year = oldest_document.date.year - self.stdout.write( - self.style.NOTICE( - f"Oldest document: {oldest_document.uri} @ {oldest_document.date.year}" - ) - ) - else: - start_year = court.start_year - self.stdout.write( - self.style.WARNING( - f"Couldn't find date of oldest document {oldest_document.uri},\ - falling back to config value of {court.start_year}" - ) - ) - - search_results = perform_advanced_search( - court=court.canonical_param, - order="-date", - per_page=1, + CourtDates.objects.update_or_create( + param=court.canonical_param, + defaults={"start_year": start_year, "end_year": end_year}, + ) + + def get_start_year(self, court): + start_year = self._get_year_of_first_document_in_order( + court.canonical_param, "date", "oldest", court.start_year + ) + + if start_year < 2000: + self.stdout.write( + self.style.WARNING( + f"Calculated start year of {start_year} seems improbable, \ +falling back to config value of {court.start_year}" ) + ) + start_year = court.start_year + + return start_year - newest_document = SearchResult.create_from_node( - search_results.results[0] + def get_end_year(self, court): + end_year = self._get_year_of_first_document_in_order( + court.canonical_param, "-date", "newest", court.end_year + ) + + if end_year > datetime.date.today().year: + self.stdout.write( + self.style.WARNING( + f"Calculated end year of {end_year} is impossible, \ +falling back to config value of {court.end_year}" ) + ) + end_year = court.end_year - if newest_document.date: - end_year = newest_document.date.year - self.stdout.write( - self.style.NOTICE( - f"Newest document: {newest_document.uri} @ {newest_document.date.year}" - ) - ) - else: - end_year = court.end_year - self.stdout.write( - self.style.WARNING( - f"Couldn't find date of newest document {newest_document.uri},\ - falling back to config value of {court.end_year}" - ) - ) - - if start_year < 2000: - self.stdout.write( - self.style.WARNING( - f"Calculated start year of {start_year} seems improbable,\ - falling back to config value of {court.start_year}" - ) - ) - start_year = court.start_year - - if end_year > datetime.date.today().year: - self.stdout.write( - self.style.WARNING( - f"Calculated end year of {end_year} is impossible,\ - falling back to config value of {court.end_year}" - ) - ) - end_year = court.end_year - - if options["write"]: - CourtDates.objects.update_or_create( - param=court.canonical_param, - defaults={"start_year": start_year, "end_year": end_year}, - ) - else: - self.stdout.write(self.style.NOTICE("Skipping write…")) - else: - self.stdout.write( - self.style.ERROR(f"{court.name} has no canonical_param! Skipping.") + return end_year + + def _get_year_of_first_document_in_order( + self, canonical_param, order, document_reference, fallback + ): + search_results = perform_advanced_search( + court=canonical_param, + order=order, + per_page=1, + ) + + first_document = SearchResult.create_from_node(search_results.results[0]) + + if first_document.date: + year = first_document.date.year + self.stdout.write( + self.style.NOTICE( + f"{document_reference.capitalize()} document: {first_document.uri} @ {first_document.date.year}" + ) + ) + else: + year = fallback + self.stdout.write( + self.style.WARNING( + f"Couldn't find date of {document_reference} document {first_document.uri}, \ +falling back to config value of {fallback}" ) + ) + return year From 8d9b39e36692cccceaf06cb0416b5905dd8f40e6 Mon Sep 17 00:00:00 2001 From: Nick Jackson Date: Wed, 31 May 2023 14:49:57 +0100 Subject: [PATCH 4/4] No longer delete all CourtDates during recalculation Since we use update_or_create and key off a court's canonical_param we're safe to leave the database populated and run updates. This is probably the right behaviour, as we might want to stash additional court metadata in the database in future. --- judgments/management/commands/recalculate_court_dates.py | 1 - 1 file changed, 1 deletion(-) diff --git a/judgments/management/commands/recalculate_court_dates.py b/judgments/management/commands/recalculate_court_dates.py index 02769a05f..b949e8008 100644 --- a/judgments/management/commands/recalculate_court_dates.py +++ b/judgments/management/commands/recalculate_court_dates.py @@ -19,7 +19,6 @@ def add_arguments(self, parser): ) def handle(self, *args, **options): - CourtDates.objects.all().delete() for court in courts.get_all(): self.stdout.write(self.style.NOTICE(f"{court.name}"))