Merge pull request #444 from ddps-lab/gcp-collector

Add function about automatic association.json update
ddps-lab · Aug 11, 2023 · 5ab46f2 · 5ab46f2
2 parents bf50e7d + c805a93
commit 5ab46f2
Show file tree

Hide file tree

Showing 2 changed files with 18 additions and 1 deletion.
diff --git a/collector/spot-dataset/gcp/lambda/lambda_function.py b/collector/spot-dataset/gcp/lambda/lambda_function.py
@@ -10,7 +10,7 @@
 from const_config import GcpCollector, Storage
 from load_pricelist import get_price, preprocessing_price, drop_negative
 from get_metadata import get_aggregated_list, parsing_data_from_aggragated_list
-from s3_management import save_raw, update_latest, upload_timestream, load_metadata
+from s3_management import save_raw, update_latest, upload_timestream, load_metadata, update_query_selector
 from compare_data import compare
 from utility import slack_msg_sender
 
@@ -93,6 +93,8 @@ def gcp_collect(timestamp):
 
     changed_df, removed_df = compare(df_previous, df_current, workload_cols, feature_cols)
 
+    update_query_selector(changed_df)
+
     # wirte timestream
     upload_timestream(changed_df, timestamp)
     upload_timestream(removed_df, timestamp)

diff --git a/collector/spot-dataset/gcp/lambda/s3_management.py b/collector/spot-dataset/gcp/lambda/s3_management.py
@@ -100,6 +100,21 @@ def update_latest(data, timestamp):
     response = object_acl.put(ACL='public-read')
 
 
+def update_query_selector(changed_df):
+    filename = 'query-selector-gcp.json'
+    s3_path = f'query-selector/{filename}'
+    s3 = session.resource('s3')
+    query_selector_gcp = pd.DataFrame(json.loads(s3.Object(STORAGE_CONST.BUCKET_NAME, s3_path).get()['Body'].read()))
+    query_selector_gcp = pd.concat([query_selector_gcp[['InstanceType', 'Region']], changed_df[['InstanceType', 'Region']]], axis=0, ignore_index=True).dropna().drop_duplicates(['InstanceType', 'Region']).reset_index(drop=True)
+    result = query_selector_gcp.to_json(f"/tmp/{filename}", orient="records")
+    s3 = session.client('s3')
+    with open(f"/tmp/{filename}", 'rb') as f:
+        s3.upload_fileobj(f, STORAGE_CONST.BUCKET_NAME, s3_path)
+    s3 = session.resource('s3')
+    object_acl = s3.ObjectAcl(STORAGE_CONST.BUCKET_NAME, s3_path)
+    response = object_acl.put(ACL='public-read')
+
+
 def save_raw(data, timestamp):
     SAVE_FILENAME = f"{LOCAL_PATH}/spotlake_" + f"{timestamp}.csv.gz"
     data['Savings'] = round(