From f5902c3093530d8abb213113d7744292ec83c1df Mon Sep 17 00:00:00 2001 From: chris0765 Date: Mon, 26 Sep 2022 17:32:49 +0900 Subject: [PATCH 1/4] add id column in latest data json file --- collection/aws/ec2_collector/upload_data.py | 1 + 1 file changed, 1 insertion(+) diff --git a/collection/aws/ec2_collector/upload_data.py b/collection/aws/ec2_collector/upload_data.py index 596097f..7ed9f3e 100644 --- a/collection/aws/ec2_collector/upload_data.py +++ b/collection/aws/ec2_collector/upload_data.py @@ -73,6 +73,7 @@ def upload_timestream(data, timestamp): def update_latest(data): filename = 'latest_aws.json' + data = data.reset_index().rename(columns={'index':'id'}) result = data.to_json(f"{LOCAL_PATH}/{filename}") s3_path = f'latest_data/{filename}' session = boto3.Session() From 57026c8c38facf2d929894e9f674aa2bcbff13cf Mon Sep 17 00:00:00 2001 From: chris0765 Date: Wed, 28 Sep 2022 17:37:14 +0900 Subject: [PATCH 2/4] update latest columns and rows' --- collection/aws/ec2_collector/aws_collect.py | 2 +- collection/aws/ec2_collector/upload_data.py | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/collection/aws/ec2_collector/aws_collect.py b/collection/aws/ec2_collector/aws_collect.py index 85c7272..27fd116 100644 --- a/collection/aws/ec2_collector/aws_collect.py +++ b/collection/aws/ec2_collector/aws_collect.py @@ -51,7 +51,7 @@ current_df = build_join_df(spot_price_df, ondemand_price_df, spotinfo_df, sps_df) -update_latest(current_df) # upload current data to S3 +update_latest(current_df, timestamp) # upload current data to S3 save_raw(current_df, timestamp) if 'latest_df.pkl' not in os.listdir(f'{LOCAL_PATH}/'): diff --git a/collection/aws/ec2_collector/upload_data.py b/collection/aws/ec2_collector/upload_data.py index 7ed9f3e..23eaec9 100644 --- a/collection/aws/ec2_collector/upload_data.py +++ b/collection/aws/ec2_collector/upload_data.py @@ -71,9 +71,11 @@ def upload_timestream(data, timestamp): print(f"end : {counter}") -def update_latest(data): +def update_latest(data, timestamp): filename = 'latest_aws.json' - data = data.reset_index().rename(columns={'index':'id'}) + data = data.drop(data[(data['AZ'].isna()) | (data['Region'].isna()) | (data['InstanceType'].isna())].index) + data['time'] = timestamp.strftime("%Y-%m-%d %H:%M:%S") + data['id'] = data.index+1 result = data.to_json(f"{LOCAL_PATH}/{filename}") s3_path = f'latest_data/{filename}' session = boto3.Session() From 71f96557919d11c1b571ecc27f39d3e3adf9d5ff Mon Sep 17 00:00:00 2001 From: chris0765 Date: Thu, 29 Sep 2022 15:20:18 +0900 Subject: [PATCH 3/4] use to_json with orient='records' --- collection/aws/ec2_collector/upload_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/collection/aws/ec2_collector/upload_data.py b/collection/aws/ec2_collector/upload_data.py index 23eaec9..0348a25 100644 --- a/collection/aws/ec2_collector/upload_data.py +++ b/collection/aws/ec2_collector/upload_data.py @@ -76,7 +76,7 @@ def update_latest(data, timestamp): data = data.drop(data[(data['AZ'].isna()) | (data['Region'].isna()) | (data['InstanceType'].isna())].index) data['time'] = timestamp.strftime("%Y-%m-%d %H:%M:%S") data['id'] = data.index+1 - result = data.to_json(f"{LOCAL_PATH}/{filename}") + result = data.to_json(f"{LOCAL_PATH}/{filename}", orient="records") s3_path = f'latest_data/{filename}' session = boto3.Session() s3 = session.client('s3') From 1ce685e799bc32bc3d3cade4daa586e93b917d84 Mon Sep 17 00:00:00 2001 From: chris0765 Date: Fri, 30 Sep 2022 16:08:37 +0900 Subject: [PATCH 4/4] Change the NA value to fill with -1, not zero --- collection/aws/ec2_collector/join_data.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/collection/aws/ec2_collector/join_data.py b/collection/aws/ec2_collector/join_data.py index e22c846..ee9c893 100644 --- a/collection/aws/ec2_collector/join_data.py +++ b/collection/aws/ec2_collector/join_data.py @@ -21,11 +21,11 @@ def build_join_df(spot_price_df, ondemand_price_df, spotinfo_df, sps_df): join_df = pd.merge(join_df, spotinfo_df, how="outer") join_df['Savings'] = 100.0 - (join_df['SpotPrice'] * 100 / join_df['OndemandPrice']) - join_df['Savings'] = join_df['Savings'].fillna(0) - join_df['SPS'] = join_df['SPS'].fillna(0) - join_df['SpotPrice'] = join_df['SpotPrice'].fillna(0) - join_df['OndemandPrice'] = join_df['OndemandPrice'].fillna(0) - join_df['IF'] = join_df['IF'].fillna(0) + join_df['Savings'] = join_df['Savings'].fillna(-1) + join_df['SPS'] = join_df['SPS'].fillna(-1) + join_df['SpotPrice'] = join_df['SpotPrice'].fillna(-1) + join_df['OndemandPrice'] = join_df['OndemandPrice'].fillna(-1) + join_df['IF'] = join_df['IF'].fillna(-1) join_df['Savings'] = join_df['Savings'].astype('int') join_df['SPS'] = join_df['SPS'].astype('int')