4dn-dcic · netsettler · Apr 26, 2023 · Apr 26, 2023 · Apr 26, 2023 · Apr 26, 2023
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -6,6 +6,59 @@ dcicutils
 Change Log
 ----------
 
+7.5.0
+=====
+
+* In new module ``bucket_utils.py``:
+
+  * ``parse_s3_object_name``
+
+* In ``common.py``:
+
+  * New glacier-related constants:
+
+    * ``STANDARD``
+    * ``REDUCED_REDUNDANCY``
+    * ``STANDARD_IA``
+    * ``ONEZONE_IA``
+    * ``INTELLIGENT_TIERING``
+    * ``GLACIER``
+    * ``DEEP_ARCHIVE``
+    * ``OUTPOSTS``
+    * ``GLACIER_IR``
+
+  * New type hint ``S3ObjectNameSpec``
+
+* In ``glacier_utils.py``:
+
+  * Allow a ``version_id=`` argument to ``GlacierUtils.is_restore_finished``
+
+  * Some improved error messages.
+
+  * Some small code refactors.
+
+* In ``misc_utils.py``:
+
+  * Make ``make_counter`` threadsafe so that threaded functionality can call it.
+
+* In ``qa_utils.py``:
+
+  * Support for mock glacier testing in ``MockBotoS3Client`` for methods:
+
+    * ``create_multipart_upload``
+    * ``upload_part_copy``
+    * ``complete_multipart_upload``
+
+  * Revamp the abstractions for managing MockFileSystem to allow for centralized
+    changes that might be needed to handle new file content types, such as
+
+    * ``MockAbstractContent``
+
+      * ``MockBigContent`` for mocking large files quickly and space-efficiently.
+
+      * ``MockPartableBytes`` for mocking small content that still wants to test
+        piecewise-copying in support of the multipart upload protocol.
+
 
 7.4.1
 =====

diff --git a/dcicutils/bucket_utils.py b/dcicutils/bucket_utils.py
@@ -0,0 +1,32 @@
+import re
+
+from dcicutils.common import S3ObjectNameDict
+from typing import Optional
+
+
+# NOTE: This could be done with urllib's parsing tech, but it accepts a variety of things we don't want,
+#       so the error-checking would be more complicated. The documentation says particular string formats
+#       are accepted, so that's what we're using for now. -kmp 16-May-2023
+LOCATION_STRING_PATTERN = re.compile("^([^/?]+)/([^?]+)(?:[?]versionId=([^&]*))?$")
+
+
+def parse_s3_object_name(object_name, ignore_errors=False) -> Optional[S3ObjectNameDict]:
+    """
+    Parses a string of the form bucket/key or bucket/key?versionId=version, yielding a dictionary form
+    {"Bucket": bucket, "Key": key} or {"Bucket": bucket, "Key": key, "VersionId": version_id}
+
+    :param object_name: a string specifying a bucket, key, and optionally a version
+    :return: a dictionary
+    """
+    location_data = LOCATION_STRING_PATTERN.match(object_name)
+    if not location_data:
+        if ignore_errors:
+            return None
+        else:
+            raise ValueError(f"Not a valid S3 object name: {object_name!r}."
+                             f" Format must be bucket/key or bucket/key?versionId=version")
+    bucket, key, version_id = location_data.groups()
+    result: S3ObjectNameDict = {'Bucket': bucket, 'Key': key}
+    if version_id:
+        result['VersionId'] = version_id
+    return result
diff --git a/dcicutils/common.py b/dcicutils/common.py
@@ -1,13 +1,23 @@
 import os
 
-from typing import Dict, Union, Tuple, List, Any
+from typing import (
+    Any, Dict, List, Optional, Tuple, Union,
+    # Notes on use of Final and TypedDict available at:  https://peps.python.org/pep-0589/
+    # TODO: Available in Python 3.8 (i.e., when we drop Python 3.7 support)
+    # Final, TypedDict,
+)
 from typing_extensions import Literal
 
 
 # ===== Useful constants =====
 
 REGION = 'us-east-1'
 
+# TODO: Available in Python 3.8 (i.e., when we drop Python 3.7 support)
+#
+# APP_CGAP: Final = 'cgap'
+# APP_FOURFRONT: Final = 'fourfront'
+
 APP_CGAP = 'cgap'
 APP_FOURFRONT = 'fourfront'
 
@@ -18,6 +28,11 @@
 
 ORCHESTRATED_APPS = [APP_CGAP, APP_FOURFRONT]
 
+# TODO: Available in Python 3.8 (i.e., when we drop Python 3.7 support)
+#
+# CHALICE_STAGE_DEV: Final = 'dev'
+# CHALICE_STAGE_PROD: Final = 'prod'
+
 CHALICE_STAGE_DEV = 'dev'
 CHALICE_STAGE_PROD = 'prod'
 
@@ -30,7 +45,14 @@
 # Nicknames for enumerated sets of symbols. Note that these values must be syntactic literals,
 # so they can't use the variables defined above.
 
+# TODO: Available in Python 3.8 (i.e., when we drop Python 3.7 support)
+# ChaliceStage = Literal[CHALICE_STAGE_DEV, CHALICE_STAGE_PROD]
+
 ChaliceStage = Literal['dev', 'prod']
+
+# TODO: Available in Python 3.8 (i.e., when we drop Python 3.7 support)
+# OrchestratedApp = Literal[APP_CGAP, APP_FOURFRONT]
+
 OrchestratedApp = Literal['cgap', 'fourfront']
 
 LIBRARY_DIR = os.path.dirname(__file__)
@@ -39,8 +61,24 @@
 
 AuthStr = str
 
+
+# TODO: Available in Python 3.8 (i.e., when we drop Python 3.7 support)
+# class SimpleAuthDict(TypedDict):
+#     key: str
+#     secret: str
+
 SimpleAuthDict = Dict[Literal['key', 'secret'], str]
+
+
+# TODO: Available in Python 3.8 (i.e., when we drop Python 3.7 support)
+# class ServerAuthDict(TypedDict):
+#     key: str
+#     secret: str
+#     server: str
+
 ServerAuthDict = Dict[Literal['key', 'secret', 'server'], str]
+
+
 AuthDict = Union[SimpleAuthDict, ServerAuthDict]
 
 LegacyAuthDict = Dict[Literal['default'], AuthDict]
@@ -55,6 +93,12 @@
 
 AnyJsonData = Union[Dict[str, 'AnyJsonData'], List['AnyJsonData'], str, bool, int, float, None]
 
+
+# TODO: Available in Python 3.8 (i.e., when we drop Python 3.7 support)
+# class KeyValueDict(TypedDict):
+#     Key: str
+#     Value: Any
+
 KeyValueDict = Dict[Literal['Key', 'Value'], Any]
 KeyValueDictList = List[KeyValueDict]
 
@@ -81,6 +125,18 @@
 # plus the intelligent tiering.  Most of the others have a latency issue or are otherwise
 # fragile. In practice, we just want to not overly warn about normal kinds of storage.
 
+# Commonly used storage classes
+STANDARD = 'STANDARD'
+REDUCED_REDUNDANCY = 'REDUCED_REDUNDANCY'
+STANDARD_IA = 'STANDARD_IA'
+ONEZONE_IA = 'ONEZONE_IA'
+INTELLIGENT_TIERING = 'INTELLIGENT_TIERING'
+GLACIER = 'GLACIER'
+DEEP_ARCHIVE = 'DEEP_ARCHIVE'
+OUTPOSTS = 'OUTPOSTS'
+GLACIER_IR = 'GLACIER_IR'
+
+
 ALL_S3_STORAGE_CLASSES = [
     'STANDARD', 'REDUCED_REDUNDANCY', 'STANDARD_IA', 'ONEZONE_IA', 'INTELLIGENT_TIERING',
     'GLACIER', 'DEEP_ARCHIVE', 'OUTPOSTS', 'GLACIER_IR',
@@ -117,6 +173,16 @@
 ]
 
 
+# TODO: Available in Python 3.8 (i.e., when we drop Python 3.7 support)
+# class S3ObjectNameSpec(TypedDict):
+#     Bucket: str
+#     Key: str
+#     VersionId: Optional[str]
+
+S3ObjectNameDict = Dict[Literal['Bucket', 'Key', 'VersionId'], Optional[str]]
+S3ObjectNameSpec = Union[str, S3ObjectNameDict]
+
+
 # This constant is used in our Lifecycle management system to automatically transition objects
 ENCODED_LIFECYCLE_TAG_KEY = 'Lifecycle'
 

diff --git a/dcicutils/ff_mocks.py b/dcicutils/ff_mocks.py
@@ -135,7 +135,8 @@ def mocked_s3utils(environments=None, require_sse=False, other_access_key_names=
 
     def write_config(config_name, record):
         record_string = json.dumps(record)
-        s3_client.s3_files.files[f"{LEGACY_GLOBAL_ENV_BUCKET}/{config_name}"] = bytes(record_string.encode('utf-8'))
+        s3_client.s3_files.set_file_content_for_testing(f"{LEGACY_GLOBAL_ENV_BUCKET}/{config_name}",
+                                                        record_string.encode('utf-8'))
 
     ecosystem_file = "main.ecosystem"
     for environment in environments:
@@ -200,7 +201,7 @@ def mocked_s3utils_with_sse(beanstalks=None, environments=None, require_sse=True
         s3 = mock_boto3.client('s3')
         assert isinstance(s3, MockBotoS3Client)
         for filename, string in (files or {}).items():
-            s3.s3_files.files[filename] = string.encode('utf-8')
+            s3.s3_files.set_file_content_for_testing(filename, string.encode('utf-8'))
         yield mock_boto3