From ed6af9e552d8f2b2149059a6d4f67fbf3d681496 Mon Sep 17 00:00:00 2001 From: a-maumau Date: Fri, 2 Oct 2020 03:15:15 +0900 Subject: [PATCH] v1.2.4 update --- README.md | 31 ++++++++- examples/local_settings.yaml | 3 + gpu_info_sender.py | 3 +- requirements.txt | 3 +- vesta/__version__.py | 2 +- vesta/send_gpu_info.py | 118 +++++++++++++++++++++++++---------- vesta/server.py | 88 ++++++++++++++------------ 7 files changed, 171 insertions(+), 77 deletions(-) diff --git a/README.md b/README.md index 04582a9..2f490d5 100644 --- a/README.md +++ b/README.md @@ -33,10 +33,35 @@ TIMESTAMP_FORMAT: "DMY" # it will be fed in python `re.search()`, so you can use regular expressions VALID_NETWORK: "192.168.11.(129|1[3-9][0-9]|2[0-5][0-9])" # this allows 192.168.11.129~255 - +... ``` Example is in `example/local_settings.yaml` +`nvidia-smi`'s information printing format has been changed, so you need to specify a paring version for the client (which is sending a GPU information) script. +Please specify the format version (1 or 2) using `--nvidia-smi_parse_version` or write `NVIDIA_SMI_PARSE_VER` in local .yaml file. + +version: 1 is for format of following +``` ++-----------------------------------------------------------------------------+ +| Processes: GPU Memory | +| GPU PID Type Process name Usage | +|=============================================================================| +| 0 16163 C python 240MiB | +| 1 16163 C python 8522MiB | ++-----------------------------------------------------------------------------+ +``` +version: 2 is for format of following (this is default now) +``` ++-----------------------------------------------------------------------------+ +| Processes: | +| GPU GI CI PID Type Process name GPU Memory | +| ID ID Usage | +|=============================================================================| +| 0 N/A N/A 24898 C python 17939MiB | +| 1 N/A N/A 24899 C python 17063MiB | ++-----------------------------------------------------------------------------+ +``` + # Usage You can use simple wrapper, for Server @@ -57,7 +82,7 @@ You will get like ``` $ curl "http://0.0.0.0:8080/?term=true" +------------------------------------------------------------------------------+ -| vesta ver. 1.0.1 gpu info. | +| vesta ver. 1.2.4 gpu info. | +------------------+------------------------+-----------------+--------+-------+ | host | gpu | memory usage | volat. | temp. | +------------------+------------------------+-----------------+--------+-------+ @@ -78,7 +103,7 @@ If you want to see detail information you can use `detail` option like `http://< You will get like ``` $ curl "http://0.0.0.0:8080/?term=true&detail=true" -vesta ver. 1.0.1 +vesta ver. 1.2.4 #### mau_local :: 127.0.0.1 #################################################### last update: 24/03/2019 20:27:10 diff --git a/examples/local_settings.yaml b/examples/local_settings.yaml index f026455..6ef1509 100644 --- a/examples/local_settings.yaml +++ b/examples/local_settings.yaml @@ -12,6 +12,9 @@ TOKEN: '0000' # how many information to read in each page PAGE_PER_HOST_NUM: 8 +# nvidia-smi parsing version +NVIDIA_SMI_PARSE_VER: 2 + MAIN_PAGE_TITLE: "AWSOME GPUs" MAIN_PAGE_DESCRIPTION: "awsome description" TABLE_PAGE_TITLE: "AWSOME Table" diff --git a/gpu_info_sender.py b/gpu_info_sender.py index 07daa16..96d7bc1 100644 --- a/gpu_info_sender.py +++ b/gpu_info_sender.py @@ -18,6 +18,7 @@ parser.add_argument('--yaml_dir', dest='YAML_DIR', type=str, default="data", help='the dir of yaml which token is saved.') parser.add_argument('--yaml_name', dest='YAML_NAME', type=str, default="token", help='path of yaml file.') parser.add_argument('--nvidia-smi', dest='NVIDIA_SMI', type=str, default="nvidia-smi", help='if you want to specify nvidia-smi command.') + parser.add_argument('--nvidia-smi_parse_version', dest='NVIDIA_SMI_PARSE_VER', type=int, default=2, help="since nvidia-smi's process information has changed, you need to set to a suitable verson.\n 1: (GPU, PID, Type, Process name, Usage) format\n 2: (GPU, GI, CI, PID, Type, Process name, GPU Memory) format\nto see more detail, see send_gpu_info.py's get_gpu_info()\n default is 2.") parser.add_argument('--use_https', dest='USE_HTTPS', action="store_true", default=False, help='') settings = parser.parse_args() @@ -25,7 +26,7 @@ if settings.local_settings_yaml_path is not None: try: with open(settings.local_settings_yaml_path, "r") as yaml_file: - yaml_data = yaml.load(yaml_file, yaml.safe_load) + yaml_data = yaml.load(yaml_file, yaml.FullLoader) except Exception as e: print(e) yaml_data = [] diff --git a/requirements.txt b/requirements.txt index 6433be7..3cb1dff 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,4 +4,5 @@ PyYAML==5.1 schedule==0.5.0 gevent==1.2.1 gevent-websocket==0.10.1 -slackclient==2.5.0 +slackclient==2.9.1 +nest-asyncio==1.4.1 diff --git a/vesta/__version__.py b/vesta/__version__.py index 528a6c2..712796d 100644 --- a/vesta/__version__.py +++ b/vesta/__version__.py @@ -1,4 +1,4 @@ __title__ = 'vesta' __description__ = 'simple gpu monitoring script' __url__ = 'https://github.com/a-maumau/vesta' -__version__ = '1.2.2' +__version__ = '1.2.4' diff --git a/vesta/send_gpu_info.py b/vesta/send_gpu_info.py index 055db9a..66beca8 100644 --- a/vesta/send_gpu_info.py +++ b/vesta/send_gpu_info.py @@ -26,37 +26,63 @@ def get_gpu_info(settings): """ - example output of this function + parsing nvidia-smi: + version: 1 is for format of following + +-----------------------------------------------------------------------------+ + | Processes: GPU Memory | + | GPU PID Type Process name Usage | + |=============================================================================| + | 0 16163 C python 240MiB | + | 1 16163 C super_python 8522MiB | + +-----------------------------------------------------------------------------+ + + at least, around less version than 440.100 has this format + + version: 2 is for format of following + +-----------------------------------------------------------------------------+ + | Processes: | + | GPU GI CI PID Type Process name GPU Memory | + | ID ID Usage | + |=============================================================================| + | 0 N/A N/A 24898 C nython 17939MiB | + | 1 N/A N/A 24899 C rython 17063MiB | + +-----------------------------------------------------------------------------+ + + around more and eq than 450.51.06 has this format + + these version differences will effect the parsing using awk command - {'gpu:0', - {'available_memory': '10934', - 'device_num': '0', - 'gpu_name': 'GeForce GTX 1080 Ti', - 'gpu_volatile': '0', - 'processes': [{'name': '/usr/bin/X', - 'pid': '1963', - 'used_memory': '148', - 'user': 'root'}, - {'name': 'compiz', - 'pid': '3437', - 'used_memory': '84', - 'user': 'user1'}], - 'temperature': '36', - 'timestamp': '2018/11/30 23:29:47.115', - 'total_memory': '11169', - 'used_memory': '235', - 'uuid': 'GPU-...'}), - {'gpu:1', - {'available_memory': '11170', - 'device_num': '1', - 'gpu_name': 'GeForce GTX 1080 Ti', - 'gpu_volatile': '0', - 'processes': [], - 'temperature': '38', - 'timestamp': '2018/11/30 23:29:47.117', - 'total_memory': '11172', - 'used_memory': '2', - 'uuid': 'GPU-...'}} + + example output of this function + {'gpu:0', + {'available_memory': '10934', + 'device_num': '0', + 'gpu_name': 'GeForce GTX 1080 Ti', + 'gpu_volatile': '0', + 'processes': [{'name': '/usr/bin/X', + 'pid': '1963', + 'used_memory': '148', + 'user': 'root'}, + {'name': 'compiz', + 'pid': '3437', + 'used_memory': '84', + 'user': 'user1'}], + 'temperature': '36', + 'timestamp': '2018/11/30 23:29:47.115', + 'total_memory': '11169', + 'used_memory': '235', + 'uuid': 'GPU-...'}), + {'gpu:1', + {'available_memory': '11170', + 'device_num': '1', + 'gpu_name': 'GeForce GTX 1080 Ti', + 'gpu_volatile': '0', + 'processes': [], + 'temperature': '38', + 'timestamp': '2018/11/30 23:29:47.117', + 'total_memory': '11172', + 'used_memory': '2', + 'uuid': 'GPU-...'}} """ # for me @@ -103,7 +129,35 @@ def get_gpu_info(settings): gpu_info_dict["gpu:{}".format(line[0])] = {k:int(v) if k in NUMBERS else v for k, v in zip(alias_list+["processes"], line+[[]])} # get gpu processes ################################################################## - cmd = "nvidia-smi | awk '$2==\"Processes:\" {{p=1}} p && $2 ~ /[0-9]+/ && $3 > 0 {{print $2,$3,$5,$6}}'".format(settings.NVIDIA_SMI) + if settings.NVIDIA_SMI_PARSE_VER == 1: + """ + parse + +-----------------------------------------------------------------------------+ + | Processes: GPU Memory | + | GPU PID Type Process name Usage | + |=============================================================================| + | 0 16163 C python 240MiB | + | 1 16163 C super_python 8522MiB | + +-----------------------------------------------------------------------------+ + """ + cmd = "nvidia-smi | awk '$2==\"Processes:\" {{p=1}} p && $2 ~ /[0-9]+/ && $3 > 0 {{print $2,$3,$5,$6}}'".format(settings.NVIDIA_SMI) + elif settings.NVIDIA_SMI_PARSE_VER == 2: + """ + parse + +-----------------------------------------------------------------------------+ + | Processes: | + | GPU GI CI PID Type Process name GPU Memory | + | ID ID Usage | + |=============================================================================| + | 0 N/A N/A 24898 C nython 17939MiB | + | 1 N/A N/A 24899 C rython 17063MiB | + +-----------------------------------------------------------------------------+ + """ + cmd = "nvidia-smi | awk '$2==\"Processes:\" {{p=1}} p && $2 ~ /[0-9]+/ && $3 > 0 {{print $2,$5,$7,$8}}'".format(settings.NVIDIA_SMI) + else: + # this is same as NVIDIA_SMI_PARSE_VER == 1 + cmd = "nvidia-smi | awk '$2==\"Processes:\" {{p=1}} p && $2 ~ /[0-9]+/ && $3 > 0 {{print $2,$3,$5,$6}}'".format(settings.NVIDIA_SMI) + output = subprocess.check_output(cmd, shell=True).decode("utf-8") lines = output.split('\n') lines = [ line.strip().split(" ") for line in lines if line.strip() != '' ] @@ -179,7 +233,7 @@ def send_info(settings): if path_exist(yaml_path): with open(yaml_path, "r") as f: - yaml_data = yaml.load(f, yaml.safe_load) + yaml_data = yaml.load(f, yaml.FullLoader) if yaml_data is not None: token = yaml_data["hash_key"] else: diff --git a/vesta/server.py b/vesta/server.py index 25f7723..999991e 100644 --- a/vesta/server.py +++ b/vesta/server.py @@ -333,49 +333,59 @@ def client_get_update(self): client_ip = request.remote_addr self.client_update[client_ip] = {"page": 1, "queue":set()} + keep_update = True - while True: - # wait 1sec for client, - # and check if new page number is requested or not - page_num = None - with Timeout(self.settings.WS_RECEIVE_TIMEOUT, False): - page_num = ws.receive() - - if page_num is None: - pass - # if new page number was requested - else: - page_num = int(page_num) - if page_num < 1: - page_num = 1 - - if page_num != self.client_update[client_ip]["page"]: - self.client_update[client_ip]["page"] = page_num - - page_host_list = self.database.get_page_host_names(self.client_update[client_ip]["page"]) - update_data = {"update":self.fetch_update(page_host_list), - "page_name_list":page_host_list, + try: + while keep_update: + # wait 1sec for client, + # and check if new page number is requested or not + page_num = None + with Timeout(self.settings.WS_RECEIVE_TIMEOUT, False): + page_num = ws.receive() + + if page_num is None: + pass + # if new page number was requested + else: + page_num = int(page_num) + if page_num < 1: + page_num = 1 + + if page_num != self.client_update[client_ip]["page"]: + self.client_update[client_ip]["page"] = page_num + + page_host_list = self.database.get_page_host_names(self.client_update[client_ip]["page"]) + update_data = {"update":self.fetch_update(page_host_list), + "page_name_list":page_host_list, + "total_page_num":self.database.total_page} + + self.client_update[client_ip]["queue"] = set() + try: + ws.send(json.dumps(update_data)) + except: + del self.client_update[client_ip] + keep_update = False + + if ws.closed: + if client_ip in self.client_update: + try: + del self.client_update[client_ip] + except: + pass + keep_update = False + else: + update_data = {"update":self.fetch_cache_update(self.client_update[client_ip]["queue"]), + "page_name_list":self.database.get_page_host_names(self.client_update[client_ip]["page"]), "total_page_num":self.database.total_page} - self.client_update[client_ip]["queue"] = set() - ws.send(json.dumps(update_data)) - - if ws.closed: - if client_ip in self.client_update: - del self.client_update[client_ip] + if update_data["update"] != {}: + self.client_update[client_ip]["queue"] = set() + ws.send(json.dumps(update_data)) - break - else: - update_data = {"update":self.fetch_cache_update(self.client_update[client_ip]["queue"]), - "page_name_list":self.database.get_page_host_names(self.client_update[client_ip]["page"]), - "total_page_num":self.database.total_page} - - if update_data["update"] != {}: - self.client_update[client_ip]["queue"] = set() - ws.send(json.dumps(update_data)) - - # return empty content - return ('', 204) + # return empty content + return ('', 204) + except Exception as e: + print(e) else: abort(405)