Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use cuda.bindings and cuda.core for Linker #133

Draft
wants to merge 4 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions numba_cuda/numba/cuda/codegen.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,10 +246,11 @@ def get_cufunc(self):
return cufunc

cubin = self.get_cubin(cc=device.compute_capability)
module = ctx.create_module_image(cubin)
#module = ctx.create_module_image(cubin)

# Load
cufunc = module.get_function(self._entry_name)
#cufunc = module.get_function(self._entry_name)
cufunc = cubin.get_kernel(self._entry_name)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since we switched to context independent loading API, the CUDADispatcher.bind method should probably be renamed since it no longer binds context via calling the get_cufunc function.


# Populate caches
self._cufunc_cache[device.id] = cufunc
Expand Down
109 changes: 107 additions & 2 deletions numba_cuda/numba/cuda/cudadrv/driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,14 @@
import numpy as np
from collections import namedtuple, deque

from cuda.core.experimental import (
Linker as _CUDALinker,
LinkerOptions as _CUDALinkerOptions,
ObjectCode,
Program,
ProgramOptions
)

from numba import mviewbuf
from numba.core import utils, serialize, config
from .error import CudaSupportError, CudaDriverError
Expand Down Expand Up @@ -2597,10 +2605,11 @@ def new(cls,
linker = PyNvJitLinker

elif config.CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY:
# TODO - who handles MVC now?
linker = MVCLinker
else:
if USE_NV_BINDING:
linker = CudaPythonLinker
linker = CUDALinker
else:
linker = CtypesLinker

Expand Down Expand Up @@ -2637,7 +2646,6 @@ def add_cu(self, cu, name):
with driver.get_active_context() as ac:
dev = driver.get_device(ac.devnum)
cc = dev.compute_capability

ptx, log = nvrtc.compile(cu, name, cc)

if config.DUMP_ASSEMBLY:
Expand Down Expand Up @@ -2749,6 +2757,103 @@ def complete(self):
"""


class CUDALinker(Linker):
def __init__(self, max_registers=None, lineinfo=False, cc=None):
arch = f"sm_{cc[0] * 10 + cc[1]}"
self.options = _CUDALinkerOptions(
max_register_count=max_registers,
lineinfo=lineinfo,
arch=arch
)

self.max_registers = max_registers
self.lineinfo = lineinfo
self.cc = cc
self.arch = arch
self.lto = False

self._complete = False
self._object_codes = []
self.linker = None # need at least one program

@property
def info_log(self):
if not self.linker:
raise ValueError("Not Initialized")
return self.linker.get_info_log()

@property
def error_log(self):
if not self.linker:
raise ValueError("Not Initialized")
return self.linker.get_error_log()

def add_ptx(self, ptx, name='<cudapy-ptx>'):
prog = Program(
ptx.decode('utf-8'),
'ptx',
ProgramOptions(
arch=self.arch,
lineinfo=self.lineinfo,
max_register_count=self.max_registers
)
)

# calls Linker.link() internally?
obj = prog.compile('cubin')
self._complete = True
self._linked = obj
self.linker = prog._linker

def add_cu(self, cu, name='<cudapy-cu>'):
prog = Program(
cu.decode('utf-8'),
'c++',
ProgramOptions(
arch=self.arch,
lineinfo=self.lineinfo,
max_register_count=self.max_registers
)
)
obj = prog.compile('ptx')
self._object_codes.append(obj)
prog.close()

def add_cubin(self, cubin, name='<cudapy-cubin>'):
obj = ObjectCode.from_cubin(cubin)
self._object_codes.append(obj)

def add_file(self, path, kind):
try:
with open(path, 'rb') as f:
data = f.read()
except FileNotFoundError:
raise LinkerError(f'{path} not found')

name = pathlib.Path(path).name
if kind == FILE_EXTENSION_MAP['ptx']:
fn = self.add_ptx
elif kind == FILE_EXTENSION_MAP['cubin']:
fn = self.add_cubin
elif kind == 'cu':
fn = self.add_cu
else:
raise LinkerError(f"Don't know how to link {kind}")

fn(data, name)

def complete(self):
# TODO
if self._linked:
return self._linked
result = _CUDALinker(
*self._object_codes,
options=self.options
).link('cubin')
self._linker.close()
return result


class MVCLinker(Linker):
"""
Linker supporting Minor Version Compatibility, backed by the cubinlinker
Expand Down
3 changes: 1 addition & 2 deletions numba_cuda/numba/cuda/dispatcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,6 @@ def __init__(self, py_func, argtypes, link=None, debug=False,
self.debug = debug
self.lineinfo = lineinfo
self.extensions = extensions or []

nvvm_options = {
'fastmath': fastmath,
'opt': 3 if opt else 0
Expand Down Expand Up @@ -406,7 +405,7 @@ def launch(self, args, griddim, blockdim, stream=0, sharedmem=0):
stream_handle = stream and stream.handle or zero_stream

# Invoke kernel
driver.launch_kernel(cufunc.handle,
driver.launch_kernel(cufunc._handle,
*griddim,
*blockdim,
sharedmem,
Expand Down