diff --git a/pypicloud/storage/s3.py b/pypicloud/storage/s3.py index a05ed95..1b27de2 100644 --- a/pypicloud/storage/s3.py +++ b/pypicloud/storage/s3.py @@ -18,7 +18,6 @@ from pypicloud.dateutil import utcnow from pypicloud.models import Package from pypicloud.util import ( - EnvironSettings, PackageParseError, normalize_metadata, parse_filename, @@ -32,13 +31,27 @@ class S3Storage(ObjectStoreStorage): - """Storage backend that uses S3""" + """Storage backend that uses S3 and support running on EC2 instances with instance profiles. + + bucket_name is not really optional here, but we have to treat it as optional unless + we can validate that request isn't actually optional here either, or risk changing argument order. + """ test = False - def __init__(self, request=None, bucket=None, **kwargs): + def __init__( + self, + request=None, + bucket_name=None, + storage_config=None, + resource_config=None, + **kwargs, + ): super(S3Storage, self).__init__(request=request, **kwargs) - self.bucket = bucket + self.bucket_name = bucket_name if bucket_name is not None else "" + self.storage_config = storage_config if storage_config is not None else {} + self.resource_config = resource_config if resource_config is not None else {} + self._bucket = None @classmethod def _subclass_specific_config(cls, settings, common_config): @@ -55,12 +68,6 @@ def _subclass_specific_config(cls, settings, common_config): if bucket_name is None: raise ValueError("You must specify the 'storage.bucket'") - return {"sse": sse, "bucket": cls.get_bucket(bucket_name, settings)} - - @classmethod - def get_bucket( - cls, bucket_name: str, settings: EnvironSettings - ) -> "boto3.s3.Bucket": config_settings = settings.get_as_dict( "storage.", region_name=str, @@ -80,6 +87,7 @@ def get_bucket( addressing_style=str, signature_version=str, ) + config = Config(**config_settings) def verify_value(val): @@ -90,31 +98,40 @@ def verify_value(val): else: return str(val) - s3conn = boto3.resource( - "s3", - config=config, - **settings.get_as_dict( - "storage.", - region_name=str, - api_version=str, - use_ssl=asbool, - verify=verify_value, - endpoint_url=str, - aws_access_key_id=str, - aws_secret_access_key=str, - aws_session_token=str, - ), + resource_config = settings.get_as_dict( + "storage.", + region_name=str, + api_version=str, + use_ssl=asbool, + verify=verify_value, + endpoint_url=str, + aws_access_key_id=str, + aws_secret_access_key=str, + aws_session_token=str, ) - bucket = s3conn.Bucket(bucket_name) + return { + "sse": sse, + "bucket_name": bucket_name, + "storage_config": config, + "resource_config": resource_config, + } + + def create_bucket_if_not_exist(self): + + s3Resource = boto3.resource( + "s3", config=self.storage_config, **self.resource_config + ) + + bucket = s3Resource.Bucket(self.bucket_name) try: - s3conn.meta.client.head_bucket(Bucket=bucket_name) + s3Resource.meta.client.head_bucket(Bucket=self.bucket_name) except ClientError as e: if e.response["Error"]["Code"] == "404": - LOG.info("Creating S3 bucket %s", bucket_name) + LOG.info("Creating S3 bucket %s", self.bucket_name) - if config.region_name: - location = {"LocationConstraint": config.region_name} + if self.region_name and self.region_name != "us-east-1": + location = {"LocationConstraint": self.region_name} bucket.create(CreateBucketConfiguration=location) else: bucket.create() @@ -150,6 +167,27 @@ def package_from_object(cls, obj, factory): name, version, filename, obj.last_modified, path=obj.key, **metadata ) + @property + def bucket(self): + """Dynamically creates boto3.s3.Bucket resource to ensure automatically refreshing credentials work. + + Taking this approach allows for the credentials to be rotated if they need to be. + E.g. when deployed to an EC2 instance using an instance profile. + boto3 will handle updating the credentials automatically, but the resource itself can't be kept alive forever, else subsequent calls + result in expired credentials errors. + + Separating create_bucket_if_not_exists here from returning a new bucket resource to avoid unnecessary increase in bucket.head calls + that would be introduced by implementing self.bucket as a property. + """ + if self._bucket is None: + self._bucket = self.create_bucket_if_not_exist() + else: + s3Resource = boto3.resource( + "s3", config=self.storage_config, **self.resource_config + ) + self._bucket = s3Resource.Bucket(self.bucket_name) + return self._bucket + def list(self, factory=Package): keys = self.bucket.objects.filter(Prefix=self.bucket_prefix) for summary in keys: @@ -160,21 +198,25 @@ def list(self, factory=Package): yield pkg def _generate_url(self, package): - """Generate a signed url to the S3 file""" + """Generate a signed url to the S3 file + + + ? question: Does this implementation work if someone is specifying an endpoint_url? + """ if self.public_url: if self.region_name: return "https://s3.{0}.amazonaws.com/{1}/{2}".format( - self.region_name, self.bucket.name, self.get_path(package) + self.region_name, self.bucket_name, self.get_path(package) ) else: - if "." in self.bucket.name: + if "." in self.bucket_name: self._log_region_warning() return "https://{0}.s3.amazonaws.com/{1}".format( - self.bucket.name, self.get_path(package) + self.bucket_name, self.get_path(package) ) url = self.bucket.meta.client.generate_presigned_url( "get_object", - Params={"Bucket": self.bucket.name, "Key": self.get_path(package)}, + Params={"Bucket": self.bucket_name, "Key": self.get_path(package)}, ExpiresIn=self.expire_after, ) # There is a special case if your bucket has a '.' in the name. The @@ -182,7 +224,7 @@ def _generate_url(self, package): # If you provide a region_name, boto should correctly generate a url in # the form of `s3..amazonaws.com` # See https://github.com/stevearc/pypicloud/issues/145 - if "." in self.bucket.name: + if "." in self.bucket_name: pieces = urlparse(url) if pieces.netloc == "s3.amazonaws.com" and self.region_name is None: self._log_region_warning() @@ -198,7 +240,7 @@ def _log_region_warning(self): ) def get_uri(self, package): - return f"s3://{self.bucket.name}/{self.get_path(package)}" + return f"s3://{self.bucket_name}/{self.get_path(package)}" def upload(self, package, datastream): kwargs = {} @@ -242,7 +284,7 @@ def delete(self, package): def check_health(self): try: - self.bucket.meta.client.head_bucket(Bucket=self.bucket.name) + self.bucket.meta.client.head_bucket(Bucket=self.bucket_name) except ClientError as e: return False, str(e) else: