From a26be9ffac7e7ac4f9b24947895199996be64e30 Mon Sep 17 00:00:00 2001 From: HongSena Date: Fri, 29 Sep 2023 23:33:37 +0900 Subject: [PATCH 1/4] update azure collecotor avoid 429 error --- .../azure/lambda/current_collector/load_price.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/collector/spot-dataset/azure/lambda/current_collector/load_price.py b/collector/spot-dataset/azure/lambda/current_collector/load_price.py index eb32fa0..49f9f84 100644 --- a/collector/spot-dataset/azure/lambda/current_collector/load_price.py +++ b/collector/spot-dataset/azure/lambda/current_collector/load_price.py @@ -1,3 +1,5 @@ +import time +import random import requests import pandas as pd import numpy as np @@ -14,6 +16,7 @@ MAX_SKIP = 2000 SKIP_NUM_LIST = [i*100 for i in range(AZURE_CONST.MAX_SKIP)] event = threading.Event() +lock = threading.Lock() # get instancetier from armSkuName @@ -54,6 +57,9 @@ def get_instaceType(armSkuName): # get price data using the API def get_price(skip_num): + sleep_time = random.uniform(0, 5) + time.sleep(sleep_time) + get_link = AZURE_CONST.GET_PRICE_URL + str(skip_num) response = requests.get(get_link) @@ -74,7 +80,9 @@ def get_price(skip_num): return + lock.acquire() price_list.extend(price_data) + lock.release() return From bf4476d0121717c8ace3e995abaf688ff1a350f8 Mon Sep 17 00:00:00 2001 From: red0sena Date: Tue, 17 Oct 2023 12:25:25 +0900 Subject: [PATCH 2/4] update azure-collector change skip_num and rest api url --- .../spot-dataset/azure/lambda/current_collector/load_price.py | 4 ++-- const_config.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/collector/spot-dataset/azure/lambda/current_collector/load_price.py b/collector/spot-dataset/azure/lambda/current_collector/load_price.py index 49f9f84..8f1299d 100644 --- a/collector/spot-dataset/azure/lambda/current_collector/load_price.py +++ b/collector/spot-dataset/azure/lambda/current_collector/load_price.py @@ -13,8 +13,7 @@ price_list = [] response_dict = {} -MAX_SKIP = 2000 -SKIP_NUM_LIST = [i*100 for i in range(AZURE_CONST.MAX_SKIP)] +SKIP_NUM_LIST = [i*1000 for i in range(AZURE_CONST.MAX_SKIP)] event = threading.Event() lock = threading.Lock() @@ -143,5 +142,6 @@ def collect_price_with_multithreading(): price_df = pd.DataFrame(price_list) savings_df = preprocessing_price(price_df) + savings_df = savings_df.drop_duplicates(subset=['InstanceTier', 'InstanceType', 'Region'], keep='first') return savings_df diff --git a/const_config.py b/const_config.py index 0d4143c..105ab6f 100644 --- a/const_config.py +++ b/const_config.py @@ -104,7 +104,7 @@ def SERVER_SAVE_FILENAME(): @constant def GET_PRICE_URL(): - return "https://prices.azure.com:443/api/retail/prices?$filter=serviceName%20eq%20%27Virtual%20Machines%27%20and%20priceType%20eq%20%27Consumption%27%20and%20unitOfMeasure%20eq%20%271%20Hour%27&$skip=" + return "https://prices.azure.com:443/api/retail/prices?$filter=serviceName eq 'Virtual Machines' and priceType eq 'Consumption' and unitOfMeasure eq '1 Hour' and contains(productName, 'Windows') eq false and contains(meterName, 'Low Priority') eq false and contains(meterName, 'Expired') eq false and contains(location, 'Gov') eq false and contains(location, 'ATT') eq false &$skip=" @constant def FILTER_LOCATIONS(): @@ -112,7 +112,7 @@ def FILTER_LOCATIONS(): @constant def MAX_SKIP(): - return 2000 + return 200 class GcpCollector(object): @constant From 44d77764669505b0f2eeb9bd9bf3f95fe5775164 Mon Sep 17 00:00:00 2001 From: red0sena Date: Tue, 17 Oct 2023 12:29:15 +0900 Subject: [PATCH 3/4] update azure collector remove sleep --- .../spot-dataset/azure/lambda/current_collector/load_price.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/collector/spot-dataset/azure/lambda/current_collector/load_price.py b/collector/spot-dataset/azure/lambda/current_collector/load_price.py index 8f1299d..4182096 100644 --- a/collector/spot-dataset/azure/lambda/current_collector/load_price.py +++ b/collector/spot-dataset/azure/lambda/current_collector/load_price.py @@ -56,9 +56,6 @@ def get_instaceType(armSkuName): # get price data using the API def get_price(skip_num): - sleep_time = random.uniform(0, 5) - time.sleep(sleep_time) - get_link = AZURE_CONST.GET_PRICE_URL + str(skip_num) response = requests.get(get_link) From 68626793698de74f027b0b63ae2977b9b3f64393 Mon Sep 17 00:00:00 2001 From: red0sena Date: Tue, 17 Oct 2023 13:12:17 +0900 Subject: [PATCH 4/4] update azure collector price module remove imports --- .../spot-dataset/azure/lambda/current_collector/load_price.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/collector/spot-dataset/azure/lambda/current_collector/load_price.py b/collector/spot-dataset/azure/lambda/current_collector/load_price.py index 4182096..6ffd7e8 100644 --- a/collector/spot-dataset/azure/lambda/current_collector/load_price.py +++ b/collector/spot-dataset/azure/lambda/current_collector/load_price.py @@ -1,5 +1,3 @@ -import time -import random import requests import pandas as pd import numpy as np