Skip to content

Commit 79abaa8

Browse files
Merge pull request #16 from acm-uiuc/gpu_memory_data
New GPU Memory data methods
2 parents a1855aa + 422faba commit 79abaa8

File tree

1 file changed

+44
-22
lines changed

1 file changed

+44
-22
lines changed

nvdocker/nvdocker.py

+44-22
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,24 @@
33
from subprocess import check_output
44
import re
55
import docker
6+
from py3nvml.py3nvml import *
67

78
class NVDockerClient:
89

10+
nvml_initialized = False
11+
912
def __init__(self):
1013
self.docker_client = docker.from_env(version="auto")
14+
NVDockerClient.__check_nvml_init()
15+
16+
"""
17+
Private method to check if nvml is loaded (and load the library if it isn't loaded)
18+
"""
19+
def __check_nvml_init():
20+
if not NVDockerClient.nvml_initialized:
21+
nvmlInit()
22+
print("NVIDIA Driver Version:", nvmlSystemGetDriverVersion())
23+
NVDockerClient.nvml_initialized = True
1124

1225
#TODO: Testing on MultiGPU
1326
def create_container(self, image, **kwargs):
@@ -151,28 +164,37 @@ def exec_run(self, cid, cmd):
151164
return c.exec_run(cmd)
152165

153166
@staticmethod
154-
def list_gpus():
155-
output = check_output(["nvidia-smi", "-L"]).decode("utf-8")
156-
regex = re.compile(r"GPU (?P<id>\d+):")
157-
gpus = []
158-
for line in output.strip().split("\n"):
159-
m = regex.match(line)
160-
assert m, "unable to parse " + line
161-
gpus.append(int(m.group("id")))
167+
def gpu_info():
168+
NVDockerClient.__check_nvml_init()
169+
gpus = {}
170+
num_gpus = nvmlDeviceGetCount()
171+
for i in range(num_gpus):
172+
gpu_handle = nvmlDeviceGetHandleByIndex(i)
173+
gpu_name = nvmlDeviceGetName(gpu_handle)
174+
gpus[i] = {"gpu_handle": gpu_handle, "gpu_name": gpu_name}
162175
return gpus
163176

164177
@staticmethod
165-
def gpu_memory_usage():
166-
output = check_output(["nvidia-smi"]).decode("utf-8")
167-
smi_output = output[output.find("GPU Memory"):]
168-
rows = smi_output.split("\n")
169-
regex = re.compile(r"[|]\s+?(?P<id>\d+)\D+?(?P<pid>\d+).+[ ](?P<usage>\d+)MiB")
170-
usage = {gpu_id: 0 for gpu_id in NVDockerClient.list_gpus()}
171-
for row in smi_output.split("\n"):
172-
gpu = regex.search(row)
173-
if not gpu:
174-
continue
175-
id = int(gpu.group("id"))
176-
memory = int(gpu.group("usage"))
177-
usage[id] += memory
178-
return usage
178+
def gpu_memory_usage(id):
179+
gpus = NVDockerClient.gpu_info()
180+
if id not in gpus.keys():
181+
return None
182+
gpu_handle = gpus[id]["gpu_handle"]
183+
gpu_memory_data = nvmlDeviceGetMemoryInfo(gpu_handle)
184+
rv = {}
185+
#returns in megabytes
186+
rv["used_mb"] = gpu_memory_data.used/1e6
187+
rv["free_mb"] = gpu_memory_data.free/1e6
188+
return rv
189+
190+
@staticmethod
191+
def least_used_gpu():
192+
gpus = NVDockerClient.gpu_info()
193+
lowest_key = None;
194+
lowest_used_memory = 1e9;
195+
for id in gpus.keys():
196+
memory = NVDockerClient.gpu_memory_usage(id)["used_mb"]
197+
if lowest_key is None or memory < lowest_used_memory:
198+
lowest_key = id
199+
lowest_used_memory = memory
200+
return lowest_key

0 commit comments

Comments
 (0)