From 17009e7eb066771d7f2bcf8f93df97395e6da983 Mon Sep 17 00:00:00 2001 From: chris0765 <66048830+chris0765@users.noreply.github.com> Date: Thu, 10 Aug 2023 06:04:27 +0900 Subject: [PATCH 1/2] Update s3_management.py --- .../spot-dataset/gcp/lambda/s3_management.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/collector/spot-dataset/gcp/lambda/s3_management.py b/collector/spot-dataset/gcp/lambda/s3_management.py index f83ac86..a398387 100644 --- a/collector/spot-dataset/gcp/lambda/s3_management.py +++ b/collector/spot-dataset/gcp/lambda/s3_management.py @@ -100,6 +100,21 @@ def update_latest(data, timestamp): response = object_acl.put(ACL='public-read') +def update_query_selector(changed_df): + filename = 'query-selector-gcp.json' + s3_path = f'query-selector/{filename}' + s3 = session.resource('s3') + query_selector_gcp = pd.DataFrame(json.loads(s3.Object(STORAGE_CONST.BUCKET_NAME, s3_path).get()['Body'].read())) + query_selector_gcp = pd.concat([query_selector_gcp[['InstanceType', 'Region']], changed_df[['InstanceType', 'Region']]], axis=0, ignore_index=True).dropna().drop_duplicates(['InstanceType', 'Region']).reset_index(drop=True) + result = query_selector_gcp.to_json(f"/tmp/{filename}", orient="records") + s3 = session.client('s3') + with open(f"/tmp/{filename}", 'rb') as f: + s3.upload_fileobj(f, STORAGE_CONST.BUCKET_NAME, s3_path) + s3 = session.resource('s3') + object_acl = s3.ObjectAcl(STORAGE_CONST.BUCKET_NAME, s3_path) + response = object_acl.put(ACL='public-read') + + def save_raw(data, timestamp): SAVE_FILENAME = f"{LOCAL_PATH}/spotlake_" + f"{timestamp}.csv.gz" data['Savings'] = round( From c805a9327d78db497f101cae384cf76dc5ae79b2 Mon Sep 17 00:00:00 2001 From: chris0765 <66048830+chris0765@users.noreply.github.com> Date: Thu, 10 Aug 2023 06:05:36 +0900 Subject: [PATCH 2/2] Update lambda_function.py --- collector/spot-dataset/gcp/lambda/lambda_function.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/collector/spot-dataset/gcp/lambda/lambda_function.py b/collector/spot-dataset/gcp/lambda/lambda_function.py index 588eb28..24e9724 100644 --- a/collector/spot-dataset/gcp/lambda/lambda_function.py +++ b/collector/spot-dataset/gcp/lambda/lambda_function.py @@ -10,7 +10,7 @@ from const_config import GcpCollector, Storage from load_pricelist import get_price, preprocessing_price, drop_negative from get_metadata import get_aggregated_list, parsing_data_from_aggragated_list -from s3_management import save_raw, update_latest, upload_timestream, load_metadata +from s3_management import save_raw, update_latest, upload_timestream, load_metadata, update_query_selector from compare_data import compare from utility import slack_msg_sender @@ -93,6 +93,8 @@ def gcp_collect(timestamp): changed_df, removed_df = compare(df_previous, df_current, workload_cols, feature_cols) + update_query_selector(changed_df) + # wirte timestream upload_timestream(changed_df, timestamp) upload_timestream(removed_df, timestamp)