-
Notifications
You must be signed in to change notification settings - Fork 21
/
bucketstore.py
281 lines (229 loc) · 9.58 KB
/
bucketstore.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
"""
bucketstore module
"""
import io
import os
import os.path
import boto3
import botocore
from typing import BinaryIO, Callable, List, Union
AWS_DEFAULT_REGION = "us-east-1"
__version__ = "VERSION"
class S3Key:
"""An Amazon S3 Key"""
def __init__(self, bucket: "S3Bucket", name: str) -> None:
"""constructor"""
super().__init__()
self.bucket = bucket
self.name = name
def __repr__(self) -> str:
"""str representation of an s3key"""
return f"<S3Key name={self.name} bucket={self.bucket.name}>"
def __len__(self) -> int:
"""returns the size of the s3 object of this key in bytes"""
return self.size()
@property
def _boto_object(self): # noqa: ANN202
"""the underlying boto3 s3 key object"""
return self.bucket._boto_s3.Object(self.bucket.name, self.name)
def get(self) -> str:
"""Gets the value of the key."""
return self._boto_object.get()["Body"].read()
def download(self, file: Union[str, BinaryIO], callback: Callable = None) -> None:
"""download the key to the given path or file object"""
if self.name not in self.bucket:
raise Exception("this key does not exist!")
_download = self.bucket._boto_s3.meta.client.download_fileobj
if isinstance(file, str):
with open(file, "wb") as data:
_download(self.bucket.name, self.name, data, Callback=callback)
elif isinstance(file, io.IOBase):
_download(self.bucket.name, self.name, file, Callback=callback)
def upload(self, file: Union[str, BinaryIO], callback: Callable = None) -> None:
"""upload the file or file obj at the given path to this key"""
_upload = self.bucket._boto_s3.meta.client.upload_fileobj
if isinstance(file, str):
if not os.path.isfile(file):
raise Exception("file does not exist!")
with open(file, "rb") as data:
_upload(data, self.bucket.name, self.name, Callback=callback)
elif isinstance(file, io.IOBase):
_upload(file, self.bucket.name, self.name, Callback=callback)
def size(self) -> int:
"""get the size of this object in s3"""
total = 0
for key in self.bucket._boto_bucket.objects.filter(Prefix=self.name):
total += key.size
return total
def set(self, value: str, metadata: dict = None, content_type: str = "") -> dict:
"""Sets the key to the given value."""
if not metadata:
metadata = {}
return self._boto_object.put(Body=value, Metadata=metadata, ContentType=content_type)
def rename(self, new_name: str) -> None:
"""renames the key to a given new name"""
# copy the item to avoid pulling and pushing
self.bucket._boto_s3.Object(self.bucket.name, new_name).copy_from(
CopySource=f"{self.bucket.name}/{self.name}"
)
# Delete the current key.
self.delete()
# Set the new name.
self.name = new_name
def delete(
self,
) -> dict:
"""Deletes the key."""
return self._boto_object.delete()
@property
def is_public(self) -> bool:
"""returns True if the public-read ACL is set for the Key."""
for grant in self._boto_object.Acl().grants:
if "AllUsers" in grant["Grantee"].get("URI", "") and grant["Permission"] == "READ":
return True
return False
def make_public(self) -> dict:
"""sets the 'public-read' ACL for the key."""
if not self.is_public:
return self._boto_object.Acl().put(ACL="public-read")
return {}
@property
def meta(self) -> dict:
"""returns the metadata for the key."""
return self._boto_object.get()["Metadata"]
@meta.setter
def meta(self, value: dict) -> None:
"""sets the metadata for the key."""
self.set(self.get(), value)
@property
def url(self) -> str:
"""returns the public URL for the given key."""
if self.is_public:
endpoint = self.bucket._boto_s3.meta.client.meta.endpoint_url
return f"{endpoint}/{self.bucket.name}/{self.name}"
raise ValueError(
f"{self.name} does not have the public-read ACL set. "
"Use the make_public() method to allow for "
"public URL sharing."
)
def temp_url(self, duration: int = 120) -> str:
"""returns a temporary URL for the given key."""
return self.bucket._boto_s3.meta.client.generate_presigned_url(
"get_object",
Params={"Bucket": self.bucket.name, "Key": self.name},
ExpiresIn=duration,
)
class S3Bucket:
"""An Amazon S3 Bucket."""
def __init__(
self,
name: str,
create: bool = False,
region: str = "",
endpoint_url: str = None,
) -> None:
super().__init__()
self.name = name
self.region = region or os.getenv("AWS_DEFAULT_REGION", AWS_DEFAULT_REGION)
env_endpoint_url = os.getenv("AWS_ENDPOINT_URL", "")
self.endpoint_url = endpoint_url or env_endpoint_url if env_endpoint_url else None
self._boto_s3 = boto3.resource("s3", self.region, endpoint_url=self.endpoint_url)
self._boto_bucket = self._boto_s3.Bucket(self.name)
# Check if the bucket exists.
if self._boto_s3.Bucket(self.name) not in self._boto_s3.buckets.all():
if create:
# Create the bucket.
self._boto_s3.create_bucket(Bucket=self.name)
else:
raise ValueError(f"The bucket {self.name} doesn't exist!")
def __getitem__(self, key: str) -> str:
"""allows for accessing keys with the array syntax"""
return self.get(key)
def __setitem__(self, key: str, value: str) -> dict:
"""allows for setting/uploading keys with the array syntax"""
return self.set(key, value)
def __delitem__(self, key: str) -> dict:
"""allow for deletion of keys via the del operator"""
return self.delete(key)
def __contains__(self, item: str) -> bool:
"""allows for use of the in keyword on the bucket object"""
try:
self._boto_s3.Object(self.name, item).load()
return True
except botocore.exceptions.ClientError as exception:
if exception.response["Error"]["Code"] == "404":
# The object does not exist.
return False
raise # pragma: no cover
def list(self, prefix: str = None, legacy_api: bool = False) -> List:
"""returns a list of keys in the bucket."""
if prefix:
if legacy_api:
paginator = self._boto_s3.meta.client.get_paginator("list_objects")
else:
paginator = self._boto_s3.meta.client.get_paginator("list_objects_v2")
objects = []
for page in paginator.paginate(Bucket=self.name, Prefix=prefix):
for obj in page.get("Contents", []):
objects.append(obj["Key"])
return objects
return [k.key for k in self._boto_bucket.objects.all()]
@property
def is_public(self) -> bool:
"""returns True if the public-read ACL is set for the bucket."""
for grant in self._boto_bucket.Acl().grants:
if "AllUsers" in grant["Grantee"].get("URI", "") and grant["Permission"] == "READ":
return True
return False
def make_public(self) -> dict:
"""Makes the bucket public-readable."""
return self._boto_bucket.Acl().put(ACL="public-read")
def key(self, key: str) -> S3Key:
"""returns a given key from the bucket."""
return S3Key(self, key)
def all(self) -> List[S3Key]:
"""returns all keys in the bucket."""
return [self.key(k) for k in self.list()]
def get(self, key: str) -> str:
"""get the contents of the given key"""
selected_key = self.key(key)
return selected_key.get()
def set(self, key: str, value: str, metadata: dict = None, content_type: str = "") -> dict:
"""creates/edits a key in the s3 bucket"""
if not metadata:
metadata = {}
new_key = self.key(key)
return new_key.set(value, metadata, content_type)
def delete(self, key: str = None) -> dict:
"""Deletes the given key, or the whole bucket."""
# Delete the whole bucket.
if key is None:
# Delete everything in the bucket.
for each_key in self.all():
each_key.delete()
# Delete the bucket.
return self._boto_bucket.delete()
# If a key was passed, delete they key.
k = self.key(key)
return k.delete()
def __repr__(self) -> str:
"""representation of an s3bucket object"""
return f"<S3Bucket name={self.name}>"
def list() -> List[str]: # pylint: disable=redefined-builtin
"""lists buckets, by name."""
s3_resource = boto3.resource("s3")
return [bucket.name for bucket in s3_resource.buckets.all()]
def get(bucket_name: str, create: bool = False) -> S3Bucket:
"""get an s3bucket object by name"""
return S3Bucket(bucket_name, create=create)
def login(
access_key_id: str,
secret_access_key: str,
region: str = AWS_DEFAULT_REGION,
endpoint_url: str = "",
) -> None:
"""sets environment variables for boto3."""
os.environ["AWS_ACCESS_KEY_ID"] = access_key_id
os.environ["AWS_SECRET_ACCESS_KEY"] = secret_access_key
os.environ["AWS_DEFAULT_REGION"] = region
os.environ["AWS_ENDPOINT_URL"] = endpoint_url