From 8661fee51f8913993dc54224474a4a0d9a8c2f4b Mon Sep 17 00:00:00 2001 From: Gianfranco Rossi Date: Tue, 3 Sep 2024 21:37:55 -0500 Subject: [PATCH] feat(vt): extract neutral citations from text Implements #1150 - implements extract_from_text to collect neutral citations - updates test_ScraperExtractFromText - updates scraper files, makes `vtsuperct_*` not inherit extract_from_text from `vt` --- .../opinions/united_states/state/vt.py | 12 ++++++++ .../united_states/state/vtsuperct_civil.py | 13 +++++---- .../state/vtsuperct_environmental.py | 8 ++---- .../united_states/state/vtsuperct_family.py | 8 ++---- .../united_states/state/vtsuperct_probate.py | 8 ++---- .../local/test_ScraperExtractFromTextTest.py | 28 +++++++++++++++++++ 6 files changed, 54 insertions(+), 23 deletions(-) diff --git a/juriscraper/opinions/united_states/state/vt.py b/juriscraper/opinions/united_states/state/vt.py index 3d26e93b0..30e46aaf9 100644 --- a/juriscraper/opinions/united_states/state/vt.py +++ b/juriscraper/opinions/united_states/state/vt.py @@ -11,6 +11,7 @@ She's very responsive. """ +import re from datetime import date, datetime from typing import Optional, Tuple from urllib.parse import urlencode @@ -112,3 +113,14 @@ def set_url( params["facet_to_date"] = end.strftime("%m/%d/%Y") self.url = f"{self.base_url}?{urlencode(params)}" + + def extract_from_text(self, scraped_text: str): + match = re.search( + r"(?P\d{4}) VT (?P\d+)", scraped_text[:1000] + ) + if match: + return { + "Citation": {"reporter": "VT", "type": 8, **match.groupdict()} + } + + return {} diff --git a/juriscraper/opinions/united_states/state/vtsuperct_civil.py b/juriscraper/opinions/united_states/state/vtsuperct_civil.py index 0a57e542e..c53ee29dc 100644 --- a/juriscraper/opinions/united_states/state/vtsuperct_civil.py +++ b/juriscraper/opinions/united_states/state/vtsuperct_civil.py @@ -4,13 +4,16 @@ Court Contact: submit form here https://www.vermontjudiciary.org/website-feedback-form """ -from . import vt +from juriscraper.opinions.united_states.state import vt +from juriscraper.OpinionSite import OpinionSite class Site(vt.Site): division = 1 days_interval = 100 - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self.court_id = self.__module__ + # Deactivate extract_from_text from parent class + # and avoid triggering the example requirement from + # tests.local.test_ScraperExtractFromTextTest + # Other vtsuperct_* scrapers will inherit from this one + # to inherit the same behaviour + extract_from_text = OpinionSite.extract_from_text diff --git a/juriscraper/opinions/united_states/state/vtsuperct_environmental.py b/juriscraper/opinions/united_states/state/vtsuperct_environmental.py index 06288fee4..eb85e501a 100644 --- a/juriscraper/opinions/united_states/state/vtsuperct_environmental.py +++ b/juriscraper/opinions/united_states/state/vtsuperct_environmental.py @@ -4,13 +4,9 @@ Court Contact: submit form here https://www.vermontjudiciary.org/website-feedback-form """ -from . import vt +from juriscraper.opinions.united_states.state import vtsuperct_civil -class Site(vt.Site): +class Site(vtsuperct_civil.Site): division = 3 days_interval = 90 - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self.court_id = self.__module__ diff --git a/juriscraper/opinions/united_states/state/vtsuperct_family.py b/juriscraper/opinions/united_states/state/vtsuperct_family.py index cfb1685b8..79b00ff96 100644 --- a/juriscraper/opinions/united_states/state/vtsuperct_family.py +++ b/juriscraper/opinions/united_states/state/vtsuperct_family.py @@ -4,13 +4,9 @@ Court Contact: submit form here https://www.vermontjudiciary.org/website-feedback-form """ -from . import vt +from juriscraper.opinions.united_states.state import vtsuperct_civil -class Site(vt.Site): +class Site(vtsuperct_civil.Site): division = 4 days_interval = 360 - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self.court_id = self.__module__ diff --git a/juriscraper/opinions/united_states/state/vtsuperct_probate.py b/juriscraper/opinions/united_states/state/vtsuperct_probate.py index 31457b7c0..188721d39 100644 --- a/juriscraper/opinions/united_states/state/vtsuperct_probate.py +++ b/juriscraper/opinions/united_states/state/vtsuperct_probate.py @@ -4,13 +4,9 @@ Court Contact: submit form here https://www.vermontjudiciary.org/website-feedback-form """ -from . import vt +from juriscraper.opinions.united_states.state import vtsuperct_civil -class Site(vt.Site): +class Site(vtsuperct_civil.Site): division = 6 days_interval = 200 - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self.court_id = self.__module__ diff --git a/tests/local/test_ScraperExtractFromTextTest.py b/tests/local/test_ScraperExtractFromTextTest.py index 54d5f79f0..8abc53fcc 100644 --- a/tests/local/test_ScraperExtractFromTextTest.py +++ b/tests/local/test_ScraperExtractFromTextTest.py @@ -566,6 +566,34 @@ class ScraperExtractFromText(unittest.TestCase): }, ), ], + "juriscraper.opinions.united_states.state.vt": [ + ( + # https://www.courtlistener.com/api/rest/v3/opinions/10566596/ + """NOTICE: This opinion is subject to motions for reargument under V.R.A.P. 40 as well as formal\nrevision before publication in the Vermont Reports. Readers are requested to notify the Reporter\nof Decisions by email at: JUD.Reporter@vtcourts.gov or by mail at: Vermont Supreme Court, 109\nState Street, Montpelier, Vermont 05609-0801, of any errors in order that corrections may be made\nbefore this opinion goes to press.\n\n\n 2024 VT 52\n\n No. 23-AP-226\n\nState of Vermont """, + { + "Citation": { + "volume": "2024", + "reporter": "VT", + "page": "52", + "type": 8, + } + }, + ) + ], + "juriscraper.opinions.united_states.state.vt_criminal": [ + ( + # https://www.courtlistener.com/api/rest/v3/clusters/7854285/ + """NOTICE: This opinion is subject to motions for reargument under V.R.A.P. 40 as well as formal\nrevision before publication in the Vermont Reports. Readers are requested to notify the Reporter\nof Decisions by email at: JUD.Reporter@vermont.gov or by mail at: Vermont Supreme Court, 109\nState Street, Montpelier, Vermont 05609-0801, of any errors in order that corrections may be made\nbefore this opinion goes to press.\n\n\n 2022 VT 35\n\n No. 2021-059\n\nState of Vermont Supreme Court\n\n On Appeal from\n v. Superior Court, Chittenden Unit,\n Criminal Division\n\nRandy F. Therrien """, + { + "Citation": { + "volume": "2022", + "reporter": "VT", + "page": "35", + "type": 8, + } + }, + ) + ], } def test_extract_from_text(self):