Skip to content
This repository has been archived by the owner on Aug 27, 2023. It is now read-only.

Investigation: Add support for instance profiles #322

Closed
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
116 changes: 79 additions & 37 deletions pypicloud/storage/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
from pypicloud.dateutil import utcnow
from pypicloud.models import Package
from pypicloud.util import (
EnvironSettings,
PackageParseError,
normalize_metadata,
parse_filename,
Expand All @@ -32,13 +31,27 @@

class S3Storage(ObjectStoreStorage):

"""Storage backend that uses S3"""
"""Storage backend that uses S3 and support running on EC2 instances with instance profiles.

bucket_name is not really optional here, but we have to treat it as optional unless
we can validate that request isn't actually optional here either, or risk changing argument order.
"""

test = False

def __init__(self, request=None, bucket=None, **kwargs):
def __init__(
self,
request=None,
bucket_name=None,
storage_config=None,
resource_config=None,
**kwargs,
):
super(S3Storage, self).__init__(request=request, **kwargs)
self.bucket = bucket
self.bucket_name = bucket_name if bucket_name is not None else ""
self.storage_config = storage_config if storage_config is not None else {}
self.resource_config = resource_config if resource_config is not None else {}
self._bucket = None

@classmethod
def _subclass_specific_config(cls, settings, common_config):
Expand All @@ -55,12 +68,6 @@ def _subclass_specific_config(cls, settings, common_config):
if bucket_name is None:
raise ValueError("You must specify the 'storage.bucket'")

return {"sse": sse, "bucket": cls.get_bucket(bucket_name, settings)}

@classmethod
def get_bucket(
cls, bucket_name: str, settings: EnvironSettings
) -> "boto3.s3.Bucket":
config_settings = settings.get_as_dict(
"storage.",
region_name=str,
Expand All @@ -80,6 +87,7 @@ def get_bucket(
addressing_style=str,
signature_version=str,
)

config = Config(**config_settings)

def verify_value(val):
Expand All @@ -90,31 +98,40 @@ def verify_value(val):
else:
return str(val)

s3conn = boto3.resource(
"s3",
config=config,
**settings.get_as_dict(
"storage.",
region_name=str,
api_version=str,
use_ssl=asbool,
verify=verify_value,
endpoint_url=str,
aws_access_key_id=str,
aws_secret_access_key=str,
aws_session_token=str,
),
resource_config = settings.get_as_dict(
"storage.",
region_name=str,
api_version=str,
use_ssl=asbool,
verify=verify_value,
endpoint_url=str,
aws_access_key_id=str,
aws_secret_access_key=str,
aws_session_token=str,
)

bucket = s3conn.Bucket(bucket_name)
return {
"sse": sse,
"bucket_name": bucket_name,
"storage_config": config,
"resource_config": resource_config,
}

def create_bucket_if_not_exist(self):

s3Resource = boto3.resource(
"s3", config=self.storage_config, **self.resource_config
)

bucket = s3Resource.Bucket(self.bucket_name)
try:
s3conn.meta.client.head_bucket(Bucket=bucket_name)
s3Resource.meta.client.head_bucket(Bucket=self.bucket_name)
except ClientError as e:
if e.response["Error"]["Code"] == "404":
LOG.info("Creating S3 bucket %s", bucket_name)
LOG.info("Creating S3 bucket %s", self.bucket_name)

if config.region_name:
location = {"LocationConstraint": config.region_name}
if self.region_name and self.region_name != "us-east-1":
location = {"LocationConstraint": self.region_name}
bucket.create(CreateBucketConfiguration=location)
else:
bucket.create()
Expand Down Expand Up @@ -150,6 +167,27 @@ def package_from_object(cls, obj, factory):
name, version, filename, obj.last_modified, path=obj.key, **metadata
)

@property
def bucket(self):
"""Dynamically creates boto3.s3.Bucket resource to ensure automatically refreshing credentials work.

Taking this approach allows for the credentials to be rotated if they need to be.
E.g. when deployed to an EC2 instance using an instance profile.
boto3 will handle updating the credentials automatically, but the resource itself can't be kept alive forever, else subsequent calls
result in expired credentials errors.

Separating create_bucket_if_not_exists here from returning a new bucket resource to avoid unnecessary increase in bucket.head calls
that would be introduced by implementing self.bucket as a property.
"""
if self._bucket is None:
self._bucket = self.create_bucket_if_not_exist()
else:
s3Resource = boto3.resource(
"s3", config=self.storage_config, **self.resource_config
)
self._bucket = s3Resource.Bucket(self.bucket_name)
return self._bucket

def list(self, factory=Package):
keys = self.bucket.objects.filter(Prefix=self.bucket_prefix)
for summary in keys:
Expand All @@ -160,29 +198,33 @@ def list(self, factory=Package):
yield pkg

def _generate_url(self, package):
"""Generate a signed url to the S3 file"""
"""Generate a signed url to the S3 file


? question: Does this implementation work if someone is specifying an endpoint_url?
"""
if self.public_url:
if self.region_name:
return "https://s3.{0}.amazonaws.com/{1}/{2}".format(
self.region_name, self.bucket.name, self.get_path(package)
self.region_name, self.bucket_name, self.get_path(package)
)
else:
if "." in self.bucket.name:
if "." in self.bucket_name:
self._log_region_warning()
return "https://{0}.s3.amazonaws.com/{1}".format(
self.bucket.name, self.get_path(package)
self.bucket_name, self.get_path(package)
)
url = self.bucket.meta.client.generate_presigned_url(
"get_object",
Params={"Bucket": self.bucket.name, "Key": self.get_path(package)},
Params={"Bucket": self.bucket_name, "Key": self.get_path(package)},
ExpiresIn=self.expire_after,
)
# There is a special case if your bucket has a '.' in the name. The
# generated URL will return a 301 and the pip downloads will fail.
# If you provide a region_name, boto should correctly generate a url in
# the form of `s3.<region>.amazonaws.com`
# See https://github.com/stevearc/pypicloud/issues/145
if "." in self.bucket.name:
if "." in self.bucket_name:
pieces = urlparse(url)
if pieces.netloc == "s3.amazonaws.com" and self.region_name is None:
self._log_region_warning()
Expand All @@ -198,7 +240,7 @@ def _log_region_warning(self):
)

def get_uri(self, package):
return f"s3://{self.bucket.name}/{self.get_path(package)}"
return f"s3://{self.bucket_name}/{self.get_path(package)}"

def upload(self, package, datastream):
kwargs = {}
Expand Down Expand Up @@ -242,7 +284,7 @@ def delete(self, package):

def check_health(self):
try:
self.bucket.meta.client.head_bucket(Bucket=self.bucket.name)
self.bucket.meta.client.head_bucket(Bucket=self.bucket_name)
except ClientError as e:
return False, str(e)
else:
Expand Down