Skip to content

Commit

Permalink
Passing no return test
Browse files Browse the repository at this point in the history
  • Loading branch information
isVoid committed Dec 3, 2024
1 parent 81118de commit 8805e99
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 23 deletions.
5 changes: 3 additions & 2 deletions numba_cuda/numba/cuda/dispatcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,8 +341,6 @@ def launch(self, args, griddim, blockdim, stream=0, sharedmem=0):
# Prepare kernel
cufunc = self._codelibrary.get_cufunc()

rtsys.allocate()

if self.debug:
excname = cufunc.name + "__errcode__"
excmem, excsz = cufunc.module.get_global_symbol(excname)
Expand All @@ -364,8 +362,11 @@ def launch(self, args, griddim, blockdim, stream=0, sharedmem=0):

stream_handle = stream and stream.handle or zero_stream

rtsys.allocate(stream_handle)
rtsys.set_memsys_to_module(cufunc.module, stream_handle)
rtsys.initialize(stream_handle)
rtsys.enable(stream_handle)
rtsys.print_memsys(0)

# Invoke kernel
driver.launch_kernel(cufunc.handle,
Expand Down
43 changes: 23 additions & 20 deletions numba_cuda/numba/cuda/runtime/nrt.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,13 +38,13 @@ def _compile_memsys_module(self):

self._memsys_module = module

def _ensure_allocate(self):
def _ensure_allocate(self, stream):
if self._memsys is not None:
return

self.allocate()
self.allocate(stream)

def allocate(self):
def allocate(self, stream):
from numba.cuda import device_array

if self._memsys_module is None:
Expand All @@ -53,7 +53,10 @@ def allocate(self):
if self._memsys is None:
# Allocate space for NRT_MemSys
# TODO: determine the size of NRT_MemSys at runtime
self._memsys = device_array((40,), dtype="i1")
self._memsys = device_array((40,), dtype="i1", stream=stream)
# TODO: Memsys module needs a stream that's consistent with the
# system's stream.
self.set_memsys_to_module(self._memsys_module, stream=stream)

def _single_thread_launch(self, module, stream, name, params=()):
func = module.get_function(name)
Expand Down Expand Up @@ -84,18 +87,17 @@ def initialize(self, stream):

def enable(self, stream):
self._single_thread_launch(
self._memsys_module, stream, "NR_MemSys_enable")
self._memsys_module, stream, "NRT_MemSys_enable")

def disable(self, stream):
self._single_thread_launch(
self._memsys_module, stream, "NR_MemSys_disable")
self._memsys_module, stream, "NRT_MemSys_disable")

def _copy_memsys_to_host(self, stream=0):
self._ensure_allocate()
self._ensure_allocate(stream)
self._ensure_initialize(stream)

# Q: What stream should we execute this on?
# read the stats
dt = np.dtype([
('alloc', np.uint64),
('free', np.uint64),
Expand All @@ -116,33 +118,34 @@ def _copy_memsys_to_host(self, stream=0):
return stats_for_read[0]

def get_allocation_stats(self):
# This is commented out to test the 700 error code from cuda.
# if self._memsys is None or (not self._initialized):
# return _nrt_mstats(
# alloc=0,
# free=0,
# mi_alloc=0,
# mi_free=0
# )
memsys = self._copy_memsys_to_host()
return _nrt_mstats(
alloc=memsys.alloc,
free=memsys.free,
mi_alloc=memsys.mi_alloc,
mi_free=memsys.mi_free
alloc=memsys["alloc"],
free=memsys["free"],
mi_alloc=memsys["mi_alloc"],
mi_free=memsys["mi_free"]
)

def set_memsys_to_module(self, module, stream):
if self._memsys is None:
raise RuntimeError(
"Please allocate NRT Memsys first before initializing.")

print(f"Setting {self._memsys.device_ctypes_pointer} to {module}")
self._single_thread_launch(
module,
stream,
"NRT_MemSys_set",
[self._memsys.device_ctypes_pointer,]
)

def print_memsys(self, stream):
cuda.synchronize()
self._single_thread_launch(
self._memsys_module,
stream,
"NRT_MemSys_print"
)


rtsys = _Runtime()
4 changes: 3 additions & 1 deletion numba_cuda/numba/cuda/tests/nrt/test_nrt.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def test_no_return(self):
"""
n = 10

@cuda.jit
@cuda.jit(debug=True)
def kernel():
for i in range(n):
temp = cuda_empty(2, np.float64) # noqa: F841
Expand All @@ -35,6 +35,8 @@ def kernel():

with patch('numba.config.CUDA_ENABLE_NRT', True, create=True):
kernel[1,1]()
print("After kernel launch...")
rtsys.print_memsys(0)
cur_stats = rtsys.get_allocation_stats()
self.assertEqual(cur_stats.alloc - init_stats.alloc, n)
self.assertEqual(cur_stats.free - init_stats.free, n)
Expand Down

0 comments on commit 8805e99

Please sign in to comment.