Skip to content

Commit

Permalink
Merge pull request #23 from a-maumau/v1.2.4
Browse files Browse the repository at this point in the history
v1.2.4 update
  • Loading branch information
a-maumau authored Oct 1, 2020
2 parents 6c78558 + ed6af9e commit e0e305d
Show file tree
Hide file tree
Showing 7 changed files with 171 additions and 77 deletions.
31 changes: 28 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,35 @@ TIMESTAMP_FORMAT: "DMY"
# it will be fed in python `re.search()`, so you can use regular expressions
VALID_NETWORK: "192.168.11.(129|1[3-9][0-9]|2[0-5][0-9])"
# this allows 192.168.11.129~255
...
```
Example is in `example/local_settings.yaml`

`nvidia-smi`'s information printing format has been changed, so you need to specify a paring version for the client (which is sending a GPU information) script.
Please specify the format version (1 or 2) using `--nvidia-smi_parse_version` or write `NVIDIA_SMI_PARSE_VER` in local .yaml file.

version: 1 is for format of following
```
+-----------------------------------------------------------------------------+
| Processes: GPU Memory |
| GPU PID Type Process name Usage |
|=============================================================================|
| 0 16163 C python 240MiB |
| 1 16163 C python 8522MiB |
+-----------------------------------------------------------------------------+
```
version: 2 is for format of following (this is default now)
```
+-----------------------------------------------------------------------------+
| Processes: |
| GPU GI CI PID Type Process name GPU Memory |
| ID ID Usage |
|=============================================================================|
| 0 N/A N/A 24898 C python 17939MiB |
| 1 N/A N/A 24899 C python 17063MiB |
+-----------------------------------------------------------------------------+
```

# Usage
You can use simple wrapper,
for Server
Expand All @@ -57,7 +82,7 @@ You will get like
```
$ curl "http://0.0.0.0:8080/?term=true"
+------------------------------------------------------------------------------+
| vesta ver. 1.0.1 gpu info. |
| vesta ver. 1.2.4 gpu info. |
+------------------+------------------------+-----------------+--------+-------+
| host | gpu | memory usage | volat. | temp. |
+------------------+------------------------+-----------------+--------+-------+
Expand All @@ -78,7 +103,7 @@ If you want to see detail information you can use `detail` option like `http://<
You will get like
```
$ curl "http://0.0.0.0:8080/?term=true&detail=true"
vesta ver. 1.0.1
vesta ver. 1.2.4
#### mau_local :: 127.0.0.1 ####################################################
last update: 24/03/2019 20:27:10
Expand Down
3 changes: 3 additions & 0 deletions examples/local_settings.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ TOKEN: '0000'
# how many information to read in each page
PAGE_PER_HOST_NUM: 8

# nvidia-smi parsing version
NVIDIA_SMI_PARSE_VER: 2

MAIN_PAGE_TITLE: "AWSOME GPUs"
MAIN_PAGE_DESCRIPTION: "awsome description"
TABLE_PAGE_TITLE: "AWSOME Table"
Expand Down
3 changes: 2 additions & 1 deletion gpu_info_sender.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,15 @@
parser.add_argument('--yaml_dir', dest='YAML_DIR', type=str, default="data", help='the dir of yaml which token is saved.')
parser.add_argument('--yaml_name', dest='YAML_NAME', type=str, default="token", help='path of yaml file.')
parser.add_argument('--nvidia-smi', dest='NVIDIA_SMI', type=str, default="nvidia-smi", help='if you want to specify nvidia-smi command.')
parser.add_argument('--nvidia-smi_parse_version', dest='NVIDIA_SMI_PARSE_VER', type=int, default=2, help="since nvidia-smi's process information has changed, you need to set to a suitable verson.\n 1: (GPU, PID, Type, Process name, Usage) format\n 2: (GPU, GI, CI, PID, Type, Process name, GPU Memory) format\nto see more detail, see send_gpu_info.py's get_gpu_info()\n default is 2.")
parser.add_argument('--use_https', dest='USE_HTTPS', action="store_true", default=False, help='')

settings = parser.parse_args()

if settings.local_settings_yaml_path is not None:
try:
with open(settings.local_settings_yaml_path, "r") as yaml_file:
yaml_data = yaml.load(yaml_file, yaml.safe_load)
yaml_data = yaml.load(yaml_file, yaml.FullLoader)
except Exception as e:
print(e)
yaml_data = []
Expand Down
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@ PyYAML==5.1
schedule==0.5.0
gevent==1.2.1
gevent-websocket==0.10.1
slackclient==2.5.0
slackclient==2.9.1
nest-asyncio==1.4.1
2 changes: 1 addition & 1 deletion vesta/__version__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__title__ = 'vesta'
__description__ = 'simple gpu monitoring script'
__url__ = 'https://github.com/a-maumau/vesta'
__version__ = '1.2.2'
__version__ = '1.2.4'
118 changes: 86 additions & 32 deletions vesta/send_gpu_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,37 +26,63 @@

def get_gpu_info(settings):
"""
example output of this function
parsing nvidia-smi:
version: 1 is for format of following
+-----------------------------------------------------------------------------+
| Processes: GPU Memory |
| GPU PID Type Process name Usage |
|=============================================================================|
| 0 16163 C python 240MiB |
| 1 16163 C super_python 8522MiB |
+-----------------------------------------------------------------------------+
at least, around less version than 440.100 has this format
version: 2 is for format of following
+-----------------------------------------------------------------------------+
| Processes: |
| GPU GI CI PID Type Process name GPU Memory |
| ID ID Usage |
|=============================================================================|
| 0 N/A N/A 24898 C nython 17939MiB |
| 1 N/A N/A 24899 C rython 17063MiB |
+-----------------------------------------------------------------------------+
around more and eq than 450.51.06 has this format
these version differences will effect the parsing using awk command
{'gpu:0',
{'available_memory': '10934',
'device_num': '0',
'gpu_name': 'GeForce GTX 1080 Ti',
'gpu_volatile': '0',
'processes': [{'name': '/usr/bin/X',
'pid': '1963',
'used_memory': '148',
'user': 'root'},
{'name': 'compiz',
'pid': '3437',
'used_memory': '84',
'user': 'user1'}],
'temperature': '36',
'timestamp': '2018/11/30 23:29:47.115',
'total_memory': '11169',
'used_memory': '235',
'uuid': 'GPU-...'}),
{'gpu:1',
{'available_memory': '11170',
'device_num': '1',
'gpu_name': 'GeForce GTX 1080 Ti',
'gpu_volatile': '0',
'processes': [],
'temperature': '38',
'timestamp': '2018/11/30 23:29:47.117',
'total_memory': '11172',
'used_memory': '2',
'uuid': 'GPU-...'}}
example output of this function
{'gpu:0',
{'available_memory': '10934',
'device_num': '0',
'gpu_name': 'GeForce GTX 1080 Ti',
'gpu_volatile': '0',
'processes': [{'name': '/usr/bin/X',
'pid': '1963',
'used_memory': '148',
'user': 'root'},
{'name': 'compiz',
'pid': '3437',
'used_memory': '84',
'user': 'user1'}],
'temperature': '36',
'timestamp': '2018/11/30 23:29:47.115',
'total_memory': '11169',
'used_memory': '235',
'uuid': 'GPU-...'}),
{'gpu:1',
{'available_memory': '11170',
'device_num': '1',
'gpu_name': 'GeForce GTX 1080 Ti',
'gpu_volatile': '0',
'processes': [],
'temperature': '38',
'timestamp': '2018/11/30 23:29:47.117',
'total_memory': '11172',
'used_memory': '2',
'uuid': 'GPU-...'}}
"""

# for me
Expand Down Expand Up @@ -103,7 +129,35 @@ def get_gpu_info(settings):
gpu_info_dict["gpu:{}".format(line[0])] = {k:int(v) if k in NUMBERS else v for k, v in zip(alias_list+["processes"], line+[[]])}

# get gpu processes ##################################################################
cmd = "nvidia-smi | awk '$2==\"Processes:\" {{p=1}} p && $2 ~ /[0-9]+/ && $3 > 0 {{print $2,$3,$5,$6}}'".format(settings.NVIDIA_SMI)
if settings.NVIDIA_SMI_PARSE_VER == 1:
"""
parse
+-----------------------------------------------------------------------------+
| Processes: GPU Memory |
| GPU PID Type Process name Usage |
|=============================================================================|
| 0 16163 C python 240MiB |
| 1 16163 C super_python 8522MiB |
+-----------------------------------------------------------------------------+
"""
cmd = "nvidia-smi | awk '$2==\"Processes:\" {{p=1}} p && $2 ~ /[0-9]+/ && $3 > 0 {{print $2,$3,$5,$6}}'".format(settings.NVIDIA_SMI)
elif settings.NVIDIA_SMI_PARSE_VER == 2:
"""
parse
+-----------------------------------------------------------------------------+
| Processes: |
| GPU GI CI PID Type Process name GPU Memory |
| ID ID Usage |
|=============================================================================|
| 0 N/A N/A 24898 C nython 17939MiB |
| 1 N/A N/A 24899 C rython 17063MiB |
+-----------------------------------------------------------------------------+
"""
cmd = "nvidia-smi | awk '$2==\"Processes:\" {{p=1}} p && $2 ~ /[0-9]+/ && $3 > 0 {{print $2,$5,$7,$8}}'".format(settings.NVIDIA_SMI)
else:
# this is same as NVIDIA_SMI_PARSE_VER == 1
cmd = "nvidia-smi | awk '$2==\"Processes:\" {{p=1}} p && $2 ~ /[0-9]+/ && $3 > 0 {{print $2,$3,$5,$6}}'".format(settings.NVIDIA_SMI)

output = subprocess.check_output(cmd, shell=True).decode("utf-8")
lines = output.split('\n')
lines = [ line.strip().split(" ") for line in lines if line.strip() != '' ]
Expand Down Expand Up @@ -179,7 +233,7 @@ def send_info(settings):

if path_exist(yaml_path):
with open(yaml_path, "r") as f:
yaml_data = yaml.load(f, yaml.safe_load)
yaml_data = yaml.load(f, yaml.FullLoader)
if yaml_data is not None:
token = yaml_data["hash_key"]
else:
Expand Down
88 changes: 49 additions & 39 deletions vesta/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -333,49 +333,59 @@ def client_get_update(self):
client_ip = request.remote_addr

self.client_update[client_ip] = {"page": 1, "queue":set()}
keep_update = True

while True:
# wait 1sec for client,
# and check if new page number is requested or not
page_num = None
with Timeout(self.settings.WS_RECEIVE_TIMEOUT, False):
page_num = ws.receive()

if page_num is None:
pass
# if new page number was requested
else:
page_num = int(page_num)
if page_num < 1:
page_num = 1

if page_num != self.client_update[client_ip]["page"]:
self.client_update[client_ip]["page"] = page_num

page_host_list = self.database.get_page_host_names(self.client_update[client_ip]["page"])
update_data = {"update":self.fetch_update(page_host_list),
"page_name_list":page_host_list,
try:
while keep_update:
# wait 1sec for client,
# and check if new page number is requested or not
page_num = None
with Timeout(self.settings.WS_RECEIVE_TIMEOUT, False):
page_num = ws.receive()

if page_num is None:
pass
# if new page number was requested
else:
page_num = int(page_num)
if page_num < 1:
page_num = 1

if page_num != self.client_update[client_ip]["page"]:
self.client_update[client_ip]["page"] = page_num

page_host_list = self.database.get_page_host_names(self.client_update[client_ip]["page"])
update_data = {"update":self.fetch_update(page_host_list),
"page_name_list":page_host_list,
"total_page_num":self.database.total_page}

self.client_update[client_ip]["queue"] = set()
try:
ws.send(json.dumps(update_data))
except:
del self.client_update[client_ip]
keep_update = False

if ws.closed:
if client_ip in self.client_update:
try:
del self.client_update[client_ip]
except:
pass
keep_update = False
else:
update_data = {"update":self.fetch_cache_update(self.client_update[client_ip]["queue"]),
"page_name_list":self.database.get_page_host_names(self.client_update[client_ip]["page"]),
"total_page_num":self.database.total_page}

self.client_update[client_ip]["queue"] = set()
ws.send(json.dumps(update_data))

if ws.closed:
if client_ip in self.client_update:
del self.client_update[client_ip]
if update_data["update"] != {}:
self.client_update[client_ip]["queue"] = set()
ws.send(json.dumps(update_data))

break
else:
update_data = {"update":self.fetch_cache_update(self.client_update[client_ip]["queue"]),
"page_name_list":self.database.get_page_host_names(self.client_update[client_ip]["page"]),
"total_page_num":self.database.total_page}

if update_data["update"] != {}:
self.client_update[client_ip]["queue"] = set()
ws.send(json.dumps(update_data))

# return empty content
return ('', 204)
# return empty content
return ('', 204)
except Exception as e:
print(e)
else:
abort(405)

Expand Down

0 comments on commit e0e305d

Please sign in to comment.