-
Notifications
You must be signed in to change notification settings - Fork 0
/
ExternalMigration(re-experiment).py
186 lines (138 loc) · 5.8 KB
/
ExternalMigration(re-experiment).py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
import subprocess
import time
import datetime
import boto3
from tqdm import tqdm
import csv
from pathlib import Path
csv_path = str(Path(__file__).resolve().parent) + '/infrastructure/CPU Feature Visualization - minimized aws group(all).csv'
import ssh_scripts.playbook as playbook
from pprint import pprint
ec2_client = boto3.client('ec2', region_name='us-west-2')
ec2_resource = boto3.resource('ec2', region_name='us-west-2')
s3_client = boto3.client('s3')
bucket_name = 'migration-compatibility'
prefix = 'Migration-between-groups/pyxgboost/'
def createInfrastructure(CREATE_GROUP, cwd):
# create infrastructure by group
with open(f'terraform.log', 'w') as f:
subprocess.run(['terraform', 'apply', '-auto-approve', '-target', 'module.read-instances', '-var',
f'group={CREATE_GROUP}'], cwd=cwd, stdout=f, stderr=f, encoding='utf-8')
subprocess.run(['terraform', 'apply', '-auto-approve', '-var', f'group={CREATE_GROUP}'],
cwd=cwd, stdout=f, stderr=f, encoding='utf-8')
print('\nComplete infrastructure creation')
print('wating 2.5 minute..')
time.sleep(150)
# checking instance status
print('checking instance status...')
while True:
instances = ec2_client.describe_instances(Filters=[
{
'Name': 'tag:Name',
'Values': ['migration-test_*']
}
])
all_running = True
for reservation in instances['Reservations']:
for instance in reservation['Instances']:
instance_id = instance['InstanceId']
instance_obj = ec2_resource.Instance(instance_id)
instance_state = instance_obj.state['Name']
if instance_state == 'terminated':
break
status = ec2_client.describe_instance_status(
InstanceIds=[instance_id])
if 'InstanceStatuses' not in status or status['InstanceStatuses'][0]['InstanceStatus']['Status'] != 'ok':
all_running = False
break
if not all_running:
break
if all_running:
break
time.sleep(10)
print('Pass all instance health checks')
def performTask(CREATE_GROUP):
# Execute an Ansible command to start the checkpoint.
playbook.externalMigrationDump(CREATE_GROUP, re_exp=True)
# Execute an Ansible command to start the restore.
playbook.externalMigrationRestore(CREATE_GROUP, 0, re_exp=True)
def destroyInfrastructure(CREATE_GROUP, cwd):
# destroy infrastructure by groups
with open(f'terraform.log', 'a') as f:
p = subprocess.Popen(['terraform', 'destroy', '-auto-approve', '-var',
f'group={CREATE_GROUP}'], cwd=cwd, stdout=f, stderr=f)
p.wait()
def getReExp():
instances = ["m5a.large", "m5a.2xlarge", "m5a.8xlarge", "c5a.large", "c6a.large", "m4.large", "h1.2xlarge", "x1e.xlarge", "r4.large", "i3.large", "c5a.24xlarge", "c6a.24xlarge", "c4.8xlarge", "h1.8xlarge", "h1.16xlarge", "x1e.8xlarge", "m4.16xlarge", "r4.8xlarge", "r4.16xlarge", "c6i.large", "c5.large", "m5n.large", "m5.large", "c6i.16xlarge", "c5d.9xlarge", "m5zn.6xlarge", "c5.9xlarge"]
isExists = []
for src in instances:
for dst in instances:
if(src == dst):
continue
isExists.append(src + '_to_' + dst + '.csv')
# 버킷 내의 모든 객체 조회
response = s3_client.list_objects_v2(Bucket=bucket_name, Prefix=prefix)
objects = response.get('Contents', [])
# 객체 이름만 리스트로 저장
file_names = [obj['Key'].split('/')[-1] for obj in objects]
file_names = set(file_names)
file_names.discard('')
notExists = []
for case in isExists:
if case not in file_names:
notExists.append(case)
src = notExists[0].split('_')[0]
dsts = []
reExpCases = []
for missingCase in notExists:
if src != missingCase.split('_')[0]:
reExpCases.append({src: dsts})
dsts = []
src = missingCase.split('_')[0]
dsts.append(missingCase.split('_')[-1].split('.csv')[0])
continue
dsts.append(missingCase.split('_')[-1].split('.csv')[0])
reExpCases.append({src: dsts})
pprint(reExpCases, width=80)
return reExpCases
def setCsv(cases):
data = []
column = ['feature groups']
data.append(column)
data.append(cases.keys())
for values in cases.values():
for value in values:
data.append([value])
# CSV 파일로 저장
with open(csv_path, 'w', newline='') as file:
writer = csv.writer(file)
writer.writerows(data)
if __name__ == '__main__':
playbook.setWorkload()
print('Select experiment option')
print('1. On-Demand\n2. Spot-Instance')
option = int(input()) - 1
if option == 0:
cwd = 'infrastructure/external_migration'
elif option == 1:
cwd = 'infrastructure/external_migration_on_spot'
else:
print('invalid option')
exit()
start_time = datetime.datetime.now()
reExpCases = getReExp()
with tqdm(total=len(reExpCases), unit='Processing') as pbar:
for reExpCase in reExpCases:
setCsv(reExpCase)
# values(dst instances) count + src instance count
length = len(list(reExpCase.values())[0]) + 1
CREATE_GROUP = [i for i in range(length)]
createInfrastructure(CREATE_GROUP, cwd)
performTask(CREATE_GROUP)
destroyInfrastructure(CREATE_GROUP, cwd)
pbar.update(1)
time.sleep(5)
end_time = datetime.datetime.now()
elapsed_time = end_time - start_time
total_seconds = elapsed_time.total_seconds()
print(f'total time : {total_seconds}')