diff --git a/collection/aws/ec2_collector/aws_collect.py b/collection/aws/ec2_collector/aws_collect.py index 85c7272..27fd116 100644 --- a/collection/aws/ec2_collector/aws_collect.py +++ b/collection/aws/ec2_collector/aws_collect.py @@ -51,7 +51,7 @@ current_df = build_join_df(spot_price_df, ondemand_price_df, spotinfo_df, sps_df) -update_latest(current_df) # upload current data to S3 +update_latest(current_df, timestamp) # upload current data to S3 save_raw(current_df, timestamp) if 'latest_df.pkl' not in os.listdir(f'{LOCAL_PATH}/'): diff --git a/collection/aws/ec2_collector/join_data.py b/collection/aws/ec2_collector/join_data.py index e22c846..ee9c893 100644 --- a/collection/aws/ec2_collector/join_data.py +++ b/collection/aws/ec2_collector/join_data.py @@ -21,11 +21,11 @@ def build_join_df(spot_price_df, ondemand_price_df, spotinfo_df, sps_df): join_df = pd.merge(join_df, spotinfo_df, how="outer") join_df['Savings'] = 100.0 - (join_df['SpotPrice'] * 100 / join_df['OndemandPrice']) - join_df['Savings'] = join_df['Savings'].fillna(0) - join_df['SPS'] = join_df['SPS'].fillna(0) - join_df['SpotPrice'] = join_df['SpotPrice'].fillna(0) - join_df['OndemandPrice'] = join_df['OndemandPrice'].fillna(0) - join_df['IF'] = join_df['IF'].fillna(0) + join_df['Savings'] = join_df['Savings'].fillna(-1) + join_df['SPS'] = join_df['SPS'].fillna(-1) + join_df['SpotPrice'] = join_df['SpotPrice'].fillna(-1) + join_df['OndemandPrice'] = join_df['OndemandPrice'].fillna(-1) + join_df['IF'] = join_df['IF'].fillna(-1) join_df['Savings'] = join_df['Savings'].astype('int') join_df['SPS'] = join_df['SPS'].astype('int') diff --git a/collection/aws/ec2_collector/upload_data.py b/collection/aws/ec2_collector/upload_data.py index 596097f..0348a25 100644 --- a/collection/aws/ec2_collector/upload_data.py +++ b/collection/aws/ec2_collector/upload_data.py @@ -71,9 +71,12 @@ def upload_timestream(data, timestamp): print(f"end : {counter}") -def update_latest(data): +def update_latest(data, timestamp): filename = 'latest_aws.json' - result = data.to_json(f"{LOCAL_PATH}/{filename}") + data = data.drop(data[(data['AZ'].isna()) | (data['Region'].isna()) | (data['InstanceType'].isna())].index) + data['time'] = timestamp.strftime("%Y-%m-%d %H:%M:%S") + data['id'] = data.index+1 + result = data.to_json(f"{LOCAL_PATH}/{filename}", orient="records") s3_path = f'latest_data/{filename}' session = boto3.Session() s3 = session.client('s3')