diff --git a/CHANGELOG.md b/CHANGELOG.md index c856b43c..080e928d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ The format is based on [Keep a Changelog 1.0.0]. - Add a `validates_against_schema` property - Add a `can_enrich` property - Only enrich if not recently enriched and valid against current schema +- Add function to check if the docx exists for a judgment ## [Release 22.0.2] diff --git a/src/caselawclient/models/utilities/aws.py b/src/caselawclient/models/utilities/aws.py index 5f7a06ad..aee339e3 100644 --- a/src/caselawclient/models/utilities/aws.py +++ b/src/caselawclient/models/utilities/aws.py @@ -78,6 +78,20 @@ def generate_signed_asset_url(key: str) -> str: ) +def check_docx_exists(uri: str) -> bool: + """Does the docx for a document URI actually exist?""" + bucket = env("PRIVATE_ASSET_BUCKET", None) + s3_key = generate_docx_key(uri) + client = create_s3_client() + try: + client.head_object(Bucket=bucket, Key=s3_key) + return True + except botocore.exceptions.ClientError as e: + if e.response["Error"]["Code"] == "404": + return False + raise + + def generate_docx_key(uri: str) -> str: """from a canonical caselaw URI (eat/2022/1) return the S3 key of the associated docx""" return f'{uri}/{uri.replace("/", "_")}.docx' diff --git a/tests/models/utilities/test_utilities.py b/tests/models/utilities/test_utilities.py index 49aaff86..da41bdd1 100644 --- a/tests/models/utilities/test_utilities.py +++ b/tests/models/utilities/test_utilities.py @@ -1,11 +1,18 @@ +import io import os from unittest.mock import ANY, MagicMock, Mock, patch +import boto3 import ds_caselaw_utils import pytest +from moto import mock_aws from caselawclient.models.utilities import extract_version, move, render_versions -from caselawclient.models.utilities.aws import build_new_key, copy_assets +from caselawclient.models.utilities.aws import ( + build_new_key, + check_docx_exists, + copy_assets, +) from ...factories import JudgmentFactory @@ -136,3 +143,18 @@ def test_move_judgment_success( fake_copy.assert_called_with("old/uri", "new/uri") fake_api_client.set_judgment_this_uri.assert_called_with("new/uri") fake_api_client.delete_judgment.assert_called_with("old/uri") + + +class TestCheckDocx: + @patch.dict(os.environ, {"PRIVATE_ASSET_BUCKET": "bucket"}) + @mock_aws + def test_check_docx(aws): + """Make a fake docx, then check if it exists, and for one that doesn't""" + url = "ewhc/2023/1" + docx = "ewhc/2023/1/ewhc_2023_1.docx" + s3 = boto3.resource("s3", region_name="us-east-1") + bucket = s3.create_bucket(Bucket="bucket") + fobj = io.BytesIO(b"placeholder docx") + bucket.upload_fileobj(Key=docx, Fileobj=fobj) + assert check_docx_exists(url) + assert not (check_docx_exists("not/the/url"))