Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Revamp AWS file support to add support for multipart uploads in glacier testing #257

Open
wants to merge 16 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 53 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,59 @@ dcicutils
Change Log
----------

7.5.0
=====

* In new module ``bucket_utils.py``:

* ``parse_s3_object_name``

* In ``common.py``:

* New glacier-related constants:

* ``STANDARD``
* ``REDUCED_REDUNDANCY``
* ``STANDARD_IA``
* ``ONEZONE_IA``
* ``INTELLIGENT_TIERING``
* ``GLACIER``
* ``DEEP_ARCHIVE``
* ``OUTPOSTS``
* ``GLACIER_IR``

* New type hint ``S3ObjectNameSpec``

* In ``glacier_utils.py``:

* Allow a ``version_id=`` argument to ``GlacierUtils.is_restore_finished``

* Some improved error messages.

* Some small code refactors.

* In ``misc_utils.py``:

* Make ``make_counter`` threadsafe so that threaded functionality can call it.

* In ``qa_utils.py``:

* Support for mock glacier testing in ``MockBotoS3Client`` for methods:

* ``create_multipart_upload``
* ``upload_part_copy``
* ``complete_multipart_upload``

* Revamp the abstractions for managing MockFileSystem to allow for centralized
changes that might be needed to handle new file content types, such as

* ``MockAbstractContent``

* ``MockBigContent`` for mocking large files quickly and space-efficiently.

* ``MockPartableBytes`` for mocking small content that still wants to test
piecewise-copying in support of the multipart upload protocol.


7.4.1
=====
Expand Down
32 changes: 32 additions & 0 deletions dcicutils/bucket_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import re

from dcicutils.common import S3ObjectNameDict
from typing import Optional


# NOTE: This could be done with urllib's parsing tech, but it accepts a variety of things we don't want,
# so the error-checking would be more complicated. The documentation says particular string formats
# are accepted, so that's what we're using for now. -kmp 16-May-2023
LOCATION_STRING_PATTERN = re.compile("^([^/?]+)/([^?]+)(?:[?]versionId=([^&]*))?$")


def parse_s3_object_name(object_name, ignore_errors=False) -> Optional[S3ObjectNameDict]:
"""
Parses a string of the form bucket/key or bucket/key?versionId=version, yielding a dictionary form
{"Bucket": bucket, "Key": key} or {"Bucket": bucket, "Key": key, "VersionId": version_id}

:param object_name: a string specifying a bucket, key, and optionally a version
:return: a dictionary
"""
location_data = LOCATION_STRING_PATTERN.match(object_name)
if not location_data:
if ignore_errors:
return None
else:
raise ValueError(f"Not a valid S3 object name: {object_name!r}."
f" Format must be bucket/key or bucket/key?versionId=version")
bucket, key, version_id = location_data.groups()
result: S3ObjectNameDict = {'Bucket': bucket, 'Key': key}
if version_id:
result['VersionId'] = version_id
return result
68 changes: 67 additions & 1 deletion dcicutils/common.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,23 @@
import os

from typing import Dict, Union, Tuple, List, Any
from typing import (
Any, Dict, List, Optional, Tuple, Union,
# Notes on use of Final and TypedDict available at: https://peps.python.org/pep-0589/
# TODO: Available in Python 3.8 (i.e., when we drop Python 3.7 support)
# Final, TypedDict,
)
from typing_extensions import Literal


# ===== Useful constants =====

REGION = 'us-east-1'

# TODO: Available in Python 3.8 (i.e., when we drop Python 3.7 support)
#
# APP_CGAP: Final = 'cgap'
# APP_FOURFRONT: Final = 'fourfront'

APP_CGAP = 'cgap'
APP_FOURFRONT = 'fourfront'

Expand All @@ -18,6 +28,11 @@

ORCHESTRATED_APPS = [APP_CGAP, APP_FOURFRONT]

# TODO: Available in Python 3.8 (i.e., when we drop Python 3.7 support)
#
# CHALICE_STAGE_DEV: Final = 'dev'
# CHALICE_STAGE_PROD: Final = 'prod'

CHALICE_STAGE_DEV = 'dev'
CHALICE_STAGE_PROD = 'prod'

Expand All @@ -30,7 +45,14 @@
# Nicknames for enumerated sets of symbols. Note that these values must be syntactic literals,
# so they can't use the variables defined above.

# TODO: Available in Python 3.8 (i.e., when we drop Python 3.7 support)
# ChaliceStage = Literal[CHALICE_STAGE_DEV, CHALICE_STAGE_PROD]

ChaliceStage = Literal['dev', 'prod']

# TODO: Available in Python 3.8 (i.e., when we drop Python 3.7 support)
# OrchestratedApp = Literal[APP_CGAP, APP_FOURFRONT]

OrchestratedApp = Literal['cgap', 'fourfront']

LIBRARY_DIR = os.path.dirname(__file__)
Expand All @@ -39,8 +61,24 @@

AuthStr = str


# TODO: Available in Python 3.8 (i.e., when we drop Python 3.7 support)
# class SimpleAuthDict(TypedDict):
# key: str
# secret: str

SimpleAuthDict = Dict[Literal['key', 'secret'], str]


# TODO: Available in Python 3.8 (i.e., when we drop Python 3.7 support)
# class ServerAuthDict(TypedDict):
# key: str
# secret: str
# server: str

ServerAuthDict = Dict[Literal['key', 'secret', 'server'], str]


AuthDict = Union[SimpleAuthDict, ServerAuthDict]

LegacyAuthDict = Dict[Literal['default'], AuthDict]
Expand All @@ -55,6 +93,12 @@

AnyJsonData = Union[Dict[str, 'AnyJsonData'], List['AnyJsonData'], str, bool, int, float, None]


# TODO: Available in Python 3.8 (i.e., when we drop Python 3.7 support)
# class KeyValueDict(TypedDict):
# Key: str
# Value: Any

KeyValueDict = Dict[Literal['Key', 'Value'], Any]
KeyValueDictList = List[KeyValueDict]

Expand All @@ -81,6 +125,18 @@
# plus the intelligent tiering. Most of the others have a latency issue or are otherwise
# fragile. In practice, we just want to not overly warn about normal kinds of storage.

# Commonly used storage classes
STANDARD = 'STANDARD'
REDUCED_REDUNDANCY = 'REDUCED_REDUNDANCY'
STANDARD_IA = 'STANDARD_IA'
ONEZONE_IA = 'ONEZONE_IA'
INTELLIGENT_TIERING = 'INTELLIGENT_TIERING'
GLACIER = 'GLACIER'
DEEP_ARCHIVE = 'DEEP_ARCHIVE'
OUTPOSTS = 'OUTPOSTS'
GLACIER_IR = 'GLACIER_IR'


ALL_S3_STORAGE_CLASSES = [
'STANDARD', 'REDUCED_REDUNDANCY', 'STANDARD_IA', 'ONEZONE_IA', 'INTELLIGENT_TIERING',
'GLACIER', 'DEEP_ARCHIVE', 'OUTPOSTS', 'GLACIER_IR',
Expand Down Expand Up @@ -117,6 +173,16 @@
]


# TODO: Available in Python 3.8 (i.e., when we drop Python 3.7 support)
# class S3ObjectNameSpec(TypedDict):
# Bucket: str
# Key: str
# VersionId: Optional[str]

S3ObjectNameDict = Dict[Literal['Bucket', 'Key', 'VersionId'], Optional[str]]
S3ObjectNameSpec = Union[str, S3ObjectNameDict]


# This constant is used in our Lifecycle management system to automatically transition objects
ENCODED_LIFECYCLE_TAG_KEY = 'Lifecycle'

Expand Down
5 changes: 3 additions & 2 deletions dcicutils/ff_mocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,8 @@ def mocked_s3utils(environments=None, require_sse=False, other_access_key_names=

def write_config(config_name, record):
record_string = json.dumps(record)
s3_client.s3_files.files[f"{LEGACY_GLOBAL_ENV_BUCKET}/{config_name}"] = bytes(record_string.encode('utf-8'))
s3_client.s3_files.set_file_content_for_testing(f"{LEGACY_GLOBAL_ENV_BUCKET}/{config_name}",
record_string.encode('utf-8'))

ecosystem_file = "main.ecosystem"
for environment in environments:
Expand Down Expand Up @@ -200,7 +201,7 @@ def mocked_s3utils_with_sse(beanstalks=None, environments=None, require_sse=True
s3 = mock_boto3.client('s3')
assert isinstance(s3, MockBotoS3Client)
for filename, string in (files or {}).items():
s3.s3_files.files[filename] = string.encode('utf-8')
s3.s3_files.set_file_content_for_testing(filename, string.encode('utf-8'))
yield mock_boto3


Expand Down
Loading