Merge pull request #23 from a-maumau/v1.2.4

v1.2.4 update
a-maumau · Oct 1, 2020 · e0e305d · e0e305d
2 parents 6c78558 + ed6af9e
commit e0e305d
Show file tree

Hide file tree

Showing 7 changed files with 171 additions and 77 deletions.
diff --git a/README.md b/README.md
@@ -33,10 +33,35 @@ TIMESTAMP_FORMAT: "DMY"
 # it will be fed in python `re.search()`, so you can use regular expressions
 VALID_NETWORK: "192.168.11.(129|1[3-9][0-9]|2[0-5][0-9])"
 # this allows 192.168.11.129~255
-
+...
 ```  
 Example is in `example/local_settings.yaml`  
 
+`nvidia-smi`'s information printing format has been changed, so you need to specify a paring version for the client (which is sending a GPU information) script.  
+Please specify the format version (1 or 2) using `--nvidia-smi_parse_version` or write `NVIDIA_SMI_PARSE_VER` in local .yaml file.  
+
+version: 1 is for format of following  
+```
++-----------------------------------------------------------------------------+
+| Processes:                                                       GPU Memory |
+|  GPU       PID  Type  Process name                               Usage      |
+|=============================================================================|
+|    0     16163    C   python                                         240MiB |
+|    1     16163    C   python                                        8522MiB |
++-----------------------------------------------------------------------------+
+```
+version: 2 is for format of following (this is default now)  
+```
++-----------------------------------------------------------------------------+
+| Processes:                                                                  |
+|  GPU   GI   CI        PID   Type   Process name                  GPU Memory |
+|        ID   ID                                                   Usage      |
+|=============================================================================|
+|    0   N/A  N/A     24898      C   python                          17939MiB |
+|    1   N/A  N/A     24899      C   python                          17063MiB |
++-----------------------------------------------------------------------------+
+```
+
 # Usage
 You can use simple wrapper,  
 for Server  
@@ -57,7 +82,7 @@ You will get like
 ```
 $ curl "http://0.0.0.0:8080/?term=true"
 +------------------------------------------------------------------------------+
-| vesta ver. 1.0.1                                                   gpu info. |
+| vesta ver. 1.2.4                                                   gpu info. |
 +------------------+------------------------+-----------------+--------+-------+
 | host             | gpu                    | memory usage    | volat. | temp. |
 +------------------+------------------------+-----------------+--------+-------+
@@ -78,7 +103,7 @@ If you want to see detail information you can use `detail` option like `http://<
 You will get like  
 ```
 $ curl "http://0.0.0.0:8080/?term=true&detail=true"
-vesta ver. 1.0.1
+vesta ver. 1.2.4
 
 #### mau_local :: 127.0.0.1 ####################################################
   last update: 24/03/2019 20:27:10

diff --git a/examples/local_settings.yaml b/examples/local_settings.yaml
@@ -12,6 +12,9 @@ TOKEN: '0000'
 # how many information to read in each page
 PAGE_PER_HOST_NUM: 8
 
+# nvidia-smi parsing version
+NVIDIA_SMI_PARSE_VER: 2
+
 MAIN_PAGE_TITLE: "AWSOME GPUs"
 MAIN_PAGE_DESCRIPTION: "awsome description"
 TABLE_PAGE_TITLE: "AWSOME Table"

diff --git a/gpu_info_sender.py b/gpu_info_sender.py
@@ -18,14 +18,15 @@
     parser.add_argument('--yaml_dir', dest='YAML_DIR', type=str, default="data", help='the dir of yaml which token is saved.')
     parser.add_argument('--yaml_name', dest='YAML_NAME', type=str, default="token", help='path of yaml file.')
     parser.add_argument('--nvidia-smi', dest='NVIDIA_SMI', type=str, default="nvidia-smi", help='if you want to specify nvidia-smi command.')
+    parser.add_argument('--nvidia-smi_parse_version', dest='NVIDIA_SMI_PARSE_VER', type=int, default=2, help="since nvidia-smi's process information has changed, you need to set to a suitable verson.\n    1: (GPU, PID, Type, Process name, Usage) format\n    2: (GPU, GI, CI, PID, Type, Process name, GPU Memory) format\nto see more detail, see send_gpu_info.py's get_gpu_info()\n default is 2.")
     parser.add_argument('--use_https', dest='USE_HTTPS', action="store_true", default=False, help='')
 
     settings = parser.parse_args()
 
     if settings.local_settings_yaml_path is not None:
         try:
             with open(settings.local_settings_yaml_path, "r") as yaml_file:
-                yaml_data = yaml.load(yaml_file, yaml.safe_load)
+                yaml_data = yaml.load(yaml_file, yaml.FullLoader)
         except Exception as e:
             print(e)
             yaml_data = []

diff --git a/requirements.txt b/requirements.txt
@@ -4,4 +4,5 @@ PyYAML==5.1
 schedule==0.5.0
 gevent==1.2.1
 gevent-websocket==0.10.1
-slackclient==2.5.0
+slackclient==2.9.1
+nest-asyncio==1.4.1
diff --git a/vesta/__version__.py b/vesta/__version__.py
@@ -1,4 +1,4 @@
 __title__ = 'vesta'
 __description__ = 'simple gpu monitoring script'
 __url__ = 'https://github.com/a-maumau/vesta'
-__version__ = '1.2.2'
+__version__ = '1.2.4'
diff --git a/vesta/send_gpu_info.py b/vesta/send_gpu_info.py
@@ -26,37 +26,63 @@
 
 def get_gpu_info(settings):
     """
-        example output of this function
+        parsing nvidia-smi:
+            version: 1 is for format of following
+                +-----------------------------------------------------------------------------+
+                | Processes:                                                       GPU Memory |
+                |  GPU       PID  Type  Process name                               Usage      |
+                |=============================================================================|
+                |    0     16163    C   python                                         240MiB |
+                |    1     16163    C   super_python                                  8522MiB |
+                +-----------------------------------------------------------------------------+
+
+                at least, around less version than 440.100 has this format
+
+            version: 2 is for format of following
+                +-----------------------------------------------------------------------------+
+                | Processes:                                                                  |
+                |  GPU   GI   CI        PID   Type   Process name                  GPU Memory |
+                |        ID   ID                                                   Usage      |
+                |=============================================================================|
+                |    0   N/A  N/A     24898      C   nython                          17939MiB |
+                |    1   N/A  N/A     24899      C   rython                          17063MiB |
+                +-----------------------------------------------------------------------------+
+
+                around more and eq than 450.51.06 has this format
+
+            these version differences will effect the parsing using awk command 
 
-        {'gpu:0',
-              {'available_memory': '10934',
-               'device_num': '0',
-               'gpu_name': 'GeForce GTX 1080 Ti',
-               'gpu_volatile': '0',
-               'processes': [{'name': '/usr/bin/X',
-                              'pid': '1963',
-                              'used_memory': '148',
-                              'user': 'root'},
-                             {'name': 'compiz',
-                              'pid': '3437',
-                              'used_memory': '84',
-                              'user': 'user1'}],
-               'temperature': '36',
-               'timestamp': '2018/11/30 23:29:47.115',
-               'total_memory': '11169',
-               'used_memory': '235',
-               'uuid': 'GPU-...'}),
-        {'gpu:1',
-              {'available_memory': '11170',
-               'device_num': '1',
-               'gpu_name': 'GeForce GTX 1080 Ti',
-               'gpu_volatile': '0',
-               'processes': [],
-               'temperature': '38',
-               'timestamp': '2018/11/30 23:29:47.117',
-               'total_memory': '11172',
-               'used_memory': '2',
-               'uuid': 'GPU-...'}}
+
+        example output of this function
+            {'gpu:0',
+                  {'available_memory': '10934',
+                   'device_num': '0',
+                   'gpu_name': 'GeForce GTX 1080 Ti',
+                   'gpu_volatile': '0',
+                   'processes': [{'name': '/usr/bin/X',
+                                  'pid': '1963',
+                                  'used_memory': '148',
+                                  'user': 'root'},
+                                 {'name': 'compiz',
+                                  'pid': '3437',
+                                  'used_memory': '84',
+                                  'user': 'user1'}],
+                   'temperature': '36',
+                   'timestamp': '2018/11/30 23:29:47.115',
+                   'total_memory': '11169',
+                   'used_memory': '235',
+                   'uuid': 'GPU-...'}),
+            {'gpu:1',
+                  {'available_memory': '11170',
+                   'device_num': '1',
+                   'gpu_name': 'GeForce GTX 1080 Ti',
+                   'gpu_volatile': '0',
+                   'processes': [],
+                   'temperature': '38',
+                   'timestamp': '2018/11/30 23:29:47.117',
+                   'total_memory': '11172',
+                   'used_memory': '2',
+                   'uuid': 'GPU-...'}}
     """
 
     # for me
@@ -103,7 +129,35 @@ def get_gpu_info(settings):
         gpu_info_dict["gpu:{}".format(line[0])] = {k:int(v) if k in NUMBERS else v for k, v in zip(alias_list+["processes"], line+[[]])}
 
     # get gpu processes ##################################################################
-    cmd = "nvidia-smi | awk '$2==\"Processes:\" {{p=1}} p && $2 ~ /[0-9]+/ && $3 > 0 {{print $2,$3,$5,$6}}'".format(settings.NVIDIA_SMI)
+    if settings.NVIDIA_SMI_PARSE_VER == 1:
+        """
+        parse
+        +-----------------------------------------------------------------------------+
+        | Processes:                                                       GPU Memory |
+        |  GPU       PID  Type  Process name                               Usage      |
+        |=============================================================================|
+        |    0     16163    C   python                                         240MiB |
+        |    1     16163    C   super_python                                  8522MiB |
+        +-----------------------------------------------------------------------------+
+        """
+        cmd = "nvidia-smi | awk '$2==\"Processes:\" {{p=1}} p && $2 ~ /[0-9]+/ && $3 > 0 {{print $2,$3,$5,$6}}'".format(settings.NVIDIA_SMI)
+    elif settings.NVIDIA_SMI_PARSE_VER == 2:
+        """
+        parse
+        +-----------------------------------------------------------------------------+
+        | Processes:                                                                  |
+        |  GPU   GI   CI        PID   Type   Process name                  GPU Memory |
+        |        ID   ID                                                   Usage      |
+        |=============================================================================|
+        |    0   N/A  N/A     24898      C   nython                          17939MiB |
+        |    1   N/A  N/A     24899      C   rython                          17063MiB |
+        +-----------------------------------------------------------------------------+
+        """
+        cmd = "nvidia-smi | awk '$2==\"Processes:\" {{p=1}} p && $2 ~ /[0-9]+/ && $3 > 0 {{print $2,$5,$7,$8}}'".format(settings.NVIDIA_SMI)
+    else:
+        # this is same as NVIDIA_SMI_PARSE_VER == 1
+        cmd = "nvidia-smi | awk '$2==\"Processes:\" {{p=1}} p && $2 ~ /[0-9]+/ && $3 > 0 {{print $2,$3,$5,$6}}'".format(settings.NVIDIA_SMI)
+
     output = subprocess.check_output(cmd, shell=True).decode("utf-8")
     lines = output.split('\n')
     lines = [ line.strip().split(" ") for line in lines if line.strip() != '' ]
@@ -179,7 +233,7 @@ def send_info(settings):
 
     if path_exist(yaml_path):
         with open(yaml_path, "r") as f:
-            yaml_data = yaml.load(f, yaml.safe_load)
+            yaml_data = yaml.load(f, yaml.FullLoader)
             if yaml_data is not None:
                 token = yaml_data["hash_key"]
             else:

diff --git a/vesta/server.py b/vesta/server.py
@@ -333,49 +333,59 @@ def client_get_update(self):
             client_ip = request.remote_addr
 
             self.client_update[client_ip] = {"page": 1, "queue":set()}
+            keep_update = True
 
-            while True:
-                # wait 1sec for client,
-                # and check if new page number is requested or not
-                page_num = None
-                with Timeout(self.settings.WS_RECEIVE_TIMEOUT, False):
-                    page_num = ws.receive()
-
-                if page_num is None:
-                    pass
-                # if new page number was requested
-                else:
-                    page_num = int(page_num)
-                    if page_num < 1:
-                        page_num = 1
-
-                    if page_num != self.client_update[client_ip]["page"]:
-                        self.client_update[client_ip]["page"] = page_num
-
-                        page_host_list = self.database.get_page_host_names(self.client_update[client_ip]["page"])
-                        update_data = {"update":self.fetch_update(page_host_list),
-                                       "page_name_list":page_host_list,
+            try:
+                while keep_update:
+                    # wait 1sec for client,
+                    # and check if new page number is requested or not
+                    page_num = None
+                    with Timeout(self.settings.WS_RECEIVE_TIMEOUT, False):
+                        page_num = ws.receive()
+
+                    if page_num is None:
+                        pass
+                    # if new page number was requested
+                    else:
+                        page_num = int(page_num)
+                        if page_num < 1:
+                            page_num = 1
+
+                        if page_num != self.client_update[client_ip]["page"]:
+                            self.client_update[client_ip]["page"] = page_num
+
+                            page_host_list = self.database.get_page_host_names(self.client_update[client_ip]["page"])
+                            update_data = {"update":self.fetch_update(page_host_list),
+                                           "page_name_list":page_host_list,
+                                           "total_page_num":self.database.total_page}
+
+                            self.client_update[client_ip]["queue"] = set()
+                            try:
+                                ws.send(json.dumps(update_data))
+                            except:
+                                del self.client_update[client_ip]
+                                keep_update = False
+
+                    if ws.closed:
+                        if client_ip in self.client_update:
+                            try:
+                                del self.client_update[client_ip]
+                            except:
+                                pass
+                        keep_update = False
+                    else:
+                        update_data = {"update":self.fetch_cache_update(self.client_update[client_ip]["queue"]),
+                                       "page_name_list":self.database.get_page_host_names(self.client_update[client_ip]["page"]),
                                        "total_page_num":self.database.total_page}
 
-                        self.client_update[client_ip]["queue"] = set()
-                        ws.send(json.dumps(update_data))
-
-                if ws.closed:
-                    if client_ip in self.client_update:
-                        del self.client_update[client_ip]
+                        if update_data["update"] != {}:
+                            self.client_update[client_ip]["queue"] = set()
+                            ws.send(json.dumps(update_data))
 
-                    break
-                else:
-                    update_data = {"update":self.fetch_cache_update(self.client_update[client_ip]["queue"]),
-                                   "page_name_list":self.database.get_page_host_names(self.client_update[client_ip]["page"]),
-                                   "total_page_num":self.database.total_page}
-
-                    if update_data["update"] != {}:
-                        self.client_update[client_ip]["queue"] = set()
-                        ws.send(json.dumps(update_data))
-
-            # return empty content
-            return ('', 204)
+                # return empty content
+                return ('', 204)
+            except Exception as e:
+                print(e)
         else:
             abort(405)