diff --git a/collector/spot-dataset/azure/lambda/current_collector/load_price.py b/collector/spot-dataset/azure/lambda/current_collector/load_price.py index 6ffd7e8..3b4770d 100644 --- a/collector/spot-dataset/azure/lambda/current_collector/load_price.py +++ b/collector/spot-dataset/azure/lambda/current_collector/load_price.py @@ -137,6 +137,6 @@ def collect_price_with_multithreading(): price_df = pd.DataFrame(price_list) savings_df = preprocessing_price(price_df) - savings_df = savings_df.drop_duplicates(subset=['InstanceTier', 'InstanceType', 'Region'], keep='first') + savings_df = savings_df.drop_duplicates(subset=['InstanceTier', 'InstanceType', 'Region'], keep='first', inplace=True) return savings_df diff --git a/collector/spot-dataset/azure/lambda/current_collector/upload_data.py b/collector/spot-dataset/azure/lambda/current_collector/upload_data.py index 3e94db6..ff05359 100644 --- a/collector/spot-dataset/azure/lambda/current_collector/upload_data.py +++ b/collector/spot-dataset/azure/lambda/current_collector/upload_data.py @@ -111,7 +111,7 @@ def update_latest(data, timestamp): # Save raw data in S3 def save_raw(data, timestamp): data['Time'] = timestamp.strftime("%Y-%m-%d %H:%M:%S") - data = data[['Time','InstanceTier','InstanceType','OndemandPrice','SpotPrice', 'IF', 'Savings']] + data = data[['Time','InstanceTier','InstanceType', 'Region', 'OndemandPrice','SpotPrice', 'IF', 'Savings']] data.to_csv(f"{AZURE_CONST.SERVER_SAVE_DIR}/{timestamp}.csv.gz", index=False, compression="gzip")