From 38d4cadf6504e3d0c35a1dfee44b424d5a189046 Mon Sep 17 00:00:00 2001 From: David McKee Date: Wed, 20 Dec 2023 13:23:55 +0000 Subject: [PATCH] Generate regex for courts/subcourts --- src/ds_caselaw_utils/generate_regex.py | 32 +++++++++++++++----------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/src/ds_caselaw_utils/generate_regex.py b/src/ds_caselaw_utils/generate_regex.py index 33b0962..da9b3c6 100644 --- a/src/ds_caselaw_utils/generate_regex.py +++ b/src/ds_caselaw_utils/generate_regex.py @@ -63,16 +63,22 @@ def url_order_numbers(self) -> list[int]: return [2, 1, 3] -courtlist = courts.get_all() -url_patterns = sorted(list(set(court.ncn for court in courtlist if court.ncn))) - -court_strings = set() -subcourt_strings = set() -for url_pattern in url_patterns: - pattern = ParsedURLPattern(url_pattern) - print(pattern.regex, pattern.url_order_numbers, pattern.url_order) - court_strings.add(pattern.court) - if pattern.subcourt: - subcourt_strings.add(pattern.subcourt) - -print(court_strings, subcourt_strings) +def all_patterns() -> list[ParsedURLPattern]: + url_patterns = [court.ncn for court in courts.get_all() if court.ncn] + for url_pattern in url_patterns: + pattern = ParsedURLPattern(url_pattern) + print(pattern.regex, pattern.url_order_numbers, pattern.url_order) + yield pattern + + +def all_court_regex(): + return "|".join({pattern.court for pattern in all_patterns()}) + + +def all_subcourt_regex(): + return "|".join( + {pattern.subcourt for pattern in all_patterns() if pattern.subcourt} + ) + + +print(all_court_regex(), all_subcourt_regex())