Skip to content

Commit

Permalink
Merge pull request #444 from ddps-lab/gcp-collector
Browse files Browse the repository at this point in the history
Add function about automatic association.json update
  • Loading branch information
red0sena authored Aug 11, 2023
2 parents bf50e7d + c805a93 commit 5ab46f2
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 1 deletion.
4 changes: 3 additions & 1 deletion collector/spot-dataset/gcp/lambda/lambda_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from const_config import GcpCollector, Storage
from load_pricelist import get_price, preprocessing_price, drop_negative
from get_metadata import get_aggregated_list, parsing_data_from_aggragated_list
from s3_management import save_raw, update_latest, upload_timestream, load_metadata
from s3_management import save_raw, update_latest, upload_timestream, load_metadata, update_query_selector
from compare_data import compare
from utility import slack_msg_sender

Expand Down Expand Up @@ -93,6 +93,8 @@ def gcp_collect(timestamp):

changed_df, removed_df = compare(df_previous, df_current, workload_cols, feature_cols)

update_query_selector(changed_df)

# wirte timestream
upload_timestream(changed_df, timestamp)
upload_timestream(removed_df, timestamp)
Expand Down
15 changes: 15 additions & 0 deletions collector/spot-dataset/gcp/lambda/s3_management.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,21 @@ def update_latest(data, timestamp):
response = object_acl.put(ACL='public-read')


def update_query_selector(changed_df):
filename = 'query-selector-gcp.json'
s3_path = f'query-selector/{filename}'
s3 = session.resource('s3')
query_selector_gcp = pd.DataFrame(json.loads(s3.Object(STORAGE_CONST.BUCKET_NAME, s3_path).get()['Body'].read()))
query_selector_gcp = pd.concat([query_selector_gcp[['InstanceType', 'Region']], changed_df[['InstanceType', 'Region']]], axis=0, ignore_index=True).dropna().drop_duplicates(['InstanceType', 'Region']).reset_index(drop=True)
result = query_selector_gcp.to_json(f"/tmp/{filename}", orient="records")
s3 = session.client('s3')
with open(f"/tmp/{filename}", 'rb') as f:
s3.upload_fileobj(f, STORAGE_CONST.BUCKET_NAME, s3_path)
s3 = session.resource('s3')
object_acl = s3.ObjectAcl(STORAGE_CONST.BUCKET_NAME, s3_path)
response = object_acl.put(ACL='public-read')


def save_raw(data, timestamp):
SAVE_FILENAME = f"{LOCAL_PATH}/spotlake_" + f"{timestamp}.csv.gz"
data['Savings'] = round(
Expand Down

0 comments on commit 5ab46f2

Please sign in to comment.