Skip to content

Commit

Permalink
GPU type 3 in Python interface (#588)
Browse files Browse the repository at this point in the history
* py: add basic type 3 interface for CUDA

* cuda: fix stray printf

* py: add type 3 testing for GPU

* py: fix error msg for type 3 plan
  • Loading branch information
janden authored Jan 24, 2025
1 parent a6bf689 commit 622296e
Show file tree
Hide file tree
Showing 5 changed files with 132 additions and 14 deletions.
8 changes: 4 additions & 4 deletions python/cufinufft/cufinufft/_cufinufft.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,14 +116,14 @@ class NufftOpts(ctypes.Structure):

_set_pts = lib.cufinufft_setpts
_set_pts.argtypes = [
c_void_p, c_int64, c_void_p, c_void_p, c_void_p, ctypes.c_int, c_double_p,
c_double_p, c_double_p]
c_void_p, c_int64, c_void_p, c_void_p, c_void_p, ctypes.c_int64, c_void_p,
c_void_p, c_void_p]
_set_pts.restype = c_int

_set_ptsf = lib.cufinufftf_setpts
_set_ptsf.argtypes = [
c_void_p, c_int64, c_void_p, c_void_p, c_void_p, ctypes.c_int, c_float_p,
c_float_p, c_float_p]
c_void_p, c_int64, c_void_p, c_void_p, c_void_p, ctypes.c_int64, c_void_p,
c_void_p, c_void_p]
_set_ptsf.restype = c_int

_exec_plan = lib.cufinufft_execute
Expand Down
59 changes: 50 additions & 9 deletions python/cufinufft/cufinufft/_plan.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,14 +112,25 @@ def __init__(self, nufft_type, n_modes, n_trans=1, eps=1e-6, isign=None,
else:
raise TypeError("Expected complex64 or complex128.")

if isinstance(n_modes, numbers.Integral):
n_modes = (n_modes,)
elif isinstance(n_modes, collections.abc.Iterable):
n_modes = tuple(n_modes)
if nufft_type == 3:
if isinstance(n_modes, numbers.Integral):
dim = n_modes
n_modes = (1,) * dim # Ignored, can be anything
else:
raise ValueError("For a type 3 plan, n_modes_or_dim must be a single number, the dimension")
else:
raise ValueError(f"Invalid n_modes '{n_modes}'")
if isinstance(n_modes, numbers.Integral):
n_modes = (n_modes,)
elif isinstance(n_modes, collections.abc.Iterable):
n_modes = tuple(n_modes)
else:
raise ValueError(f"Invalid n_modes '{n_modes}'")
dim = len(n_modes)

if dim not in [1, 2, 3]:
raise ValueError("Only dimensions 1, 2, and 3 supported")

self.dim = len(n_modes)
self.dim = dim
self.type = nufft_type
self.isign = isign
self.eps = float(eps)
Expand Down Expand Up @@ -218,6 +229,17 @@ def setpts(self, x, y=None, z=None, s=None, t=None, u=None):

M = _compat.get_array_size(_x)

if self.type == 3:
_s = _ensure_array_type(s, "s", self.real_dtype)
_t = _ensure_array_type(t, "t", self.real_dtype)
_u = _ensure_array_type(u, "u", self.real_dtype)

_s, _t, _u = _ensure_valid_pts(_s, _t, _u, self.dim)

N = _compat.get_array_size(_s)
else:
N = 0

# Because FINUFFT/cufinufft are internally column major,
# we will reorder the pts axes. Reordering references
# save us from having to actually transpose signal data
Expand All @@ -235,15 +257,31 @@ def setpts(self, x, y=None, z=None, s=None, t=None, u=None):
if self.dim >= 2:
fpts_axes.insert(0, _compat.get_array_ptr(_y))
self._references.append(_y)

if self.dim >= 3:
fpts_axes.insert(0, _compat.get_array_ptr(_z))
self._references.append(_z)

# Do the same for type 3
if self.type == 3:
fpts_axes_t3 = [_compat.get_array_ptr(_s), None, None]
self._references.append(_s)
if self.dim >= 2:
fpts_axes_t3.insert(0, _compat.get_array_ptr(_t))
self._references.append(_t)

if self.dim >= 3:
fpts_axes_t3.insert(0, _compat.get_array_ptr(_u))
self._references.append(_u)
else:
fpts_axes_t3 = [None] * 3

# Then take three items off the stack as our reordered axis.
ier = self._setpts(self._plan, M, *fpts_axes[:3], 0, None, None, None)
ier = self._setpts(self._plan,
M, *fpts_axes[:3],
N, *fpts_axes_t3[:3])

self.nj = M
self.nk = N

if ier != 0:
raise RuntimeError('Error setting non-uniform points.')
Expand Down Expand Up @@ -280,6 +318,9 @@ def execute(self, data, out=None):
elif self.type == 2:
req_data_shape = (self.n_trans, *self.n_modes)
req_out_shape = (self.nj,)
elif self.type == 3:
req_data_shape = (self.n_trans, self.nj)
req_out_shape = (self.nk,)

_data, data_shape = _ensure_array_shape(_data, "data", req_data_shape,
allow_reshape=True)
Expand All @@ -295,7 +336,7 @@ def execute(self, data, out=None):
else:
_out = _ensure_array_shape(_out, "out", req_out_shape)

if self.type == 1:
if self.type in [1, 3]:
ier = self._exec_plan(self._plan, _compat.get_array_ptr(_data),
_compat.get_array_ptr(_out))
elif self.type == 2:
Expand Down
38 changes: 38 additions & 0 deletions python/cufinufft/tests/test_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,44 @@ def _execute(*args, **kwargs):
utils.verify_type2(k, fk, c, tol)


@pytest.mark.parametrize("dtype", DTYPES)
@pytest.mark.parametrize("dim", list(set(len(shape) for shape in SHAPES)))
@pytest.mark.parametrize("n_source_pts", MS)
@pytest.mark.parametrize("n_target_pts", MS)
@pytest.mark.parametrize("output_arg", OUTPUT_ARGS)
def test_type3(to_gpu, to_cpu, dtype, dim, n_source_pts, n_target_pts, output_arg):
if dtype == np.float32 and dim >= 2 and min(n_source_pts, n_target_pts) > 4000:
pytest.xfail("Garbage result for larger numbers of pts in single precision type 3")
# Strangely, this does not reproduce if we isolate the single case. To
# trigger it, we must run many other tests preceding this test case.
# So it's related to some global state of the library.

complex_dtype = utils._complex_dtype(dtype)

source_pts, source_coefs, target_pts = utils.type3_problem(complex_dtype,
dim, n_source_pts, n_target_pts)

plan = Plan(3, dim, dtype=complex_dtype)

source_pts_gpu = to_gpu(source_pts)
target_pts_gpu = to_gpu(target_pts)

source_coefs_gpu = to_gpu(source_coefs)

plan.setpts(*source_pts_gpu, *((None,) * (3 - dim)), *target_pts_gpu)

if not output_arg:
target_coefs_gpu = plan.execute(source_coefs_gpu)
else:
target_coefs_gpu = _compat.array_empty_like(source_coefs_gpu,
n_target_pts, dtype=complex_dtype)
plan.execute(source_coefs_gpu, out=target_coefs_gpu)

target_coefs = to_cpu(target_coefs_gpu)

utils.verify_type3(source_pts, source_coefs, target_pts, target_coefs, 1e-6)


def test_opts(to_gpu, to_cpu, shape=(8, 8, 8), M=32, tol=1e-3):
dtype = np.float32

Expand Down
40 changes: 40 additions & 0 deletions python/cufinufft/tests/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,16 @@ def type2_problem(dtype, shape, M, n_trans=()):
return k, fk


def type3_problem(dtype, dim, n_source_pts, n_target_pts, n_trans=()):
real_dtype = _real_dtype(dtype)

source_pts = gen_nu_pts(n_source_pts, dim=dim).astype(real_dtype)
source_coefs = gen_nonuniform_data(n_source_pts, n_trans=n_trans).astype(dtype)
target_pts = gen_nu_pts(n_target_pts, dim=dim).astype(real_dtype)

return source_pts, source_coefs, target_pts


def make_grid(shape):
dim = len(shape)
shape = shape
Expand Down Expand Up @@ -97,6 +107,17 @@ def direct_type2(fk, k, dim):
return c


def direct_type3(source_pts, source_coefs, target_pts, ind):
target_pt = target_pts[:, ind[-1]]
target_pt = target_pt[:, np.newaxis]

_source_coef = source_coefs[ind[:-1]]

target_coef = np.sum(np.exp(1j * np.sum(target_pt * source_pts, axis=0)) * _source_coef)

return target_coef


def verify_type1(k, c, fk, tol):
dim = fk.ndim - (c.ndim - 1)

Expand Down Expand Up @@ -128,6 +149,25 @@ def verify_type2(k, fk, c, tol):
assert type2_rel_err < 25 * tol


def verify_type3(source_pts, source_coef, target_pts, target_coef, tol):
dim = source_pts.shape[0]

n_source_pts = source_pts.shape[-1]
n_target_pts = target_pts.shape[-1]
n_tr = source_coef.shape[:-1]

assert target_coef.shape == n_tr + (n_target_pts,)

ind = (int(0.1789 * n_target_pts),)

target_est = target_coef[ind]
target_true = direct_type3(source_pts, source_coef, target_pts, ind)

type3_rel_err = np.linalg.norm(target_est - target_true) / np.linalg.norm(target_true)

assert type3_rel_err < 100 * tol


def transfer_funcs(module_name):
if module_name == "pycuda":
import pycuda.autoinit # NOQA:401
Expand Down
1 change: 0 additions & 1 deletion src/cuda/3d/cufinufft3d.cu
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,6 @@ int cufinufft3d3_exec(cuda_complex<T> *d_c, cuda_complex<T> *d_fk,
cuda_complex<T> *d_cstart;
cuda_complex<T> *d_fkstart;
const auto stream = d_plan->stream;
printf("[cufinufft] d_plan->ntransf = %d\n", d_plan->ntransf);
for (int i = 0; i * d_plan->batchsize < d_plan->ntransf; i++) {
int blksize = min(d_plan->ntransf - i * d_plan->batchsize, d_plan->batchsize);
d_cstart = d_c + i * d_plan->batchsize * d_plan->M;
Expand Down

0 comments on commit 622296e

Please sign in to comment.