-
Notifications
You must be signed in to change notification settings - Fork 0
/
sync_with_s3_boto.py
executable file
·94 lines (75 loc) · 2.75 KB
/
sync_with_s3_boto.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
#!/usr/bin/env python
import boto3
import subprocess
import sys
import json
import hashlib
import re
import os
from utils import *
class S3Bucket(object):
"""Convenience methods for S3 buckets"""
def __init__(self, session, name):
resource = session.resource('s3')
self.bucket = resource.Bucket(name)
self.name = name
def info(self):
"""returns all the md5 values from the given s3 bucket."""
return dict((o.key,{"Key": o.key, "Size": o.size, "ETag": o.e_tag})
for o in self.bucket.objects.all())
def set_mimetype(self, key, mime):
self.bucket.copy({'Bucket': self.name,
'Key': key}, key, ExtraArgs={'ContentType': mime})
def delete_objects(self, objs):
if len(objs) == 0:
return
obj_list = [ {"Key": obj} for obj in objs ]
print ('bucket.delete_objects(Delete={"Objects":%s, "Quiet":False})' %
repr(obj_list))
self.bucket.delete_objects(
Delete={"Objects":obj_list, "Quiet":False})
def put_objects(self, objs):
for obj in objs:
mime_type = my_guess_mimetype(obj)
f = open(obj)
print ("bucket.put_object(Key=%s, Body=f, ContentType=%s)" %
(repr(obj), repr(mime_type)))
self.bucket.put_object(Key=obj, Body=f, ContentType=mime_type)
##############################################################################
def local_info():
result = {}
local_files_to_check = find_files()
for f in local_files_to_check:
h = hashlib.md5()
h.update(open(f).read())
result[f[2:]] = {"ETag": h.hexdigest()}
return result
def main():
try:
target_bucket_name = sys.argv[1]
target_bucket = 's3://%s/' % target_bucket_name
debug=True
except KeyError:
print "Expected path for target bucket"
exit(1)
session = boto3.Session(profile_name=os.environ["IEEEVIS_AWS_USER"])
bucket = S3Bucket(session, target_bucket_name)
print "Syncing with", target_bucket_name
diff = diff_local_remote_buckets(local_info(), bucket.info())
files_to_upload = diff['to_insert'] + diff['to_update']
print "Uploading %s files:" % len(files_to_upload)
for o in files_to_upload:
print " %s" % o
bucket.put_objects(files_to_upload)
files_to_remove = diff['to_delete']
print "Removing %s files:" % len(files_to_remove)
for o in files_to_remove:
print " %s" % o
bucket.delete_objects(files_to_remove)
files_to_keep = diff['same']
print "Not touching %s other files." % len(files_to_keep)
# for o in files_to_keep:
# print " %s" % o
print "Done!"
if __name__ == '__main__':
main()