diff --git a/python/cufinufft/cufinufft/_cufinufft.py b/python/cufinufft/cufinufft/_cufinufft.py index 944417518..1c87bb4c8 100644 --- a/python/cufinufft/cufinufft/_cufinufft.py +++ b/python/cufinufft/cufinufft/_cufinufft.py @@ -116,14 +116,14 @@ class NufftOpts(ctypes.Structure): _set_pts = lib.cufinufft_setpts _set_pts.argtypes = [ - c_void_p, c_int64, c_void_p, c_void_p, c_void_p, ctypes.c_int, c_double_p, - c_double_p, c_double_p] + c_void_p, c_int64, c_void_p, c_void_p, c_void_p, ctypes.c_int64, c_void_p, + c_void_p, c_void_p] _set_pts.restype = c_int _set_ptsf = lib.cufinufftf_setpts _set_ptsf.argtypes = [ - c_void_p, c_int64, c_void_p, c_void_p, c_void_p, ctypes.c_int, c_float_p, - c_float_p, c_float_p] + c_void_p, c_int64, c_void_p, c_void_p, c_void_p, ctypes.c_int64, c_void_p, + c_void_p, c_void_p] _set_ptsf.restype = c_int _exec_plan = lib.cufinufft_execute diff --git a/python/cufinufft/cufinufft/_plan.py b/python/cufinufft/cufinufft/_plan.py index 67bcee59b..882d77f59 100644 --- a/python/cufinufft/cufinufft/_plan.py +++ b/python/cufinufft/cufinufft/_plan.py @@ -112,14 +112,25 @@ def __init__(self, nufft_type, n_modes, n_trans=1, eps=1e-6, isign=None, else: raise TypeError("Expected complex64 or complex128.") - if isinstance(n_modes, numbers.Integral): - n_modes = (n_modes,) - elif isinstance(n_modes, collections.abc.Iterable): - n_modes = tuple(n_modes) + if nufft_type == 3: + if isinstance(n_modes, numbers.Integral): + dim = n_modes + n_modes = (1,) * dim # Ignored, can be anything + else: + raise ValueError("For a type 3 plan, n_modes_or_dim must be a single number, the dimension") else: - raise ValueError(f"Invalid n_modes '{n_modes}'") + if isinstance(n_modes, numbers.Integral): + n_modes = (n_modes,) + elif isinstance(n_modes, collections.abc.Iterable): + n_modes = tuple(n_modes) + else: + raise ValueError(f"Invalid n_modes '{n_modes}'") + dim = len(n_modes) + + if dim not in [1, 2, 3]: + raise ValueError("Only dimensions 1, 2, and 3 supported") - self.dim = len(n_modes) + self.dim = dim self.type = nufft_type self.isign = isign self.eps = float(eps) @@ -218,6 +229,17 @@ def setpts(self, x, y=None, z=None, s=None, t=None, u=None): M = _compat.get_array_size(_x) + if self.type == 3: + _s = _ensure_array_type(s, "s", self.real_dtype) + _t = _ensure_array_type(t, "t", self.real_dtype) + _u = _ensure_array_type(u, "u", self.real_dtype) + + _s, _t, _u = _ensure_valid_pts(_s, _t, _u, self.dim) + + N = _compat.get_array_size(_s) + else: + N = 0 + # Because FINUFFT/cufinufft are internally column major, # we will reorder the pts axes. Reordering references # save us from having to actually transpose signal data @@ -235,15 +257,31 @@ def setpts(self, x, y=None, z=None, s=None, t=None, u=None): if self.dim >= 2: fpts_axes.insert(0, _compat.get_array_ptr(_y)) self._references.append(_y) - if self.dim >= 3: fpts_axes.insert(0, _compat.get_array_ptr(_z)) self._references.append(_z) + # Do the same for type 3 + if self.type == 3: + fpts_axes_t3 = [_compat.get_array_ptr(_s), None, None] + self._references.append(_s) + if self.dim >= 2: + fpts_axes_t3.insert(0, _compat.get_array_ptr(_t)) + self._references.append(_t) + + if self.dim >= 3: + fpts_axes_t3.insert(0, _compat.get_array_ptr(_u)) + self._references.append(_u) + else: + fpts_axes_t3 = [None] * 3 + # Then take three items off the stack as our reordered axis. - ier = self._setpts(self._plan, M, *fpts_axes[:3], 0, None, None, None) + ier = self._setpts(self._plan, + M, *fpts_axes[:3], + N, *fpts_axes_t3[:3]) self.nj = M + self.nk = N if ier != 0: raise RuntimeError('Error setting non-uniform points.') @@ -280,6 +318,9 @@ def execute(self, data, out=None): elif self.type == 2: req_data_shape = (self.n_trans, *self.n_modes) req_out_shape = (self.nj,) + elif self.type == 3: + req_data_shape = (self.n_trans, self.nj) + req_out_shape = (self.nk,) _data, data_shape = _ensure_array_shape(_data, "data", req_data_shape, allow_reshape=True) @@ -295,7 +336,7 @@ def execute(self, data, out=None): else: _out = _ensure_array_shape(_out, "out", req_out_shape) - if self.type == 1: + if self.type in [1, 3]: ier = self._exec_plan(self._plan, _compat.get_array_ptr(_data), _compat.get_array_ptr(_out)) elif self.type == 2: diff --git a/python/cufinufft/tests/test_basic.py b/python/cufinufft/tests/test_basic.py index b66e00f18..f826f6ef2 100644 --- a/python/cufinufft/tests/test_basic.py +++ b/python/cufinufft/tests/test_basic.py @@ -107,6 +107,44 @@ def _execute(*args, **kwargs): utils.verify_type2(k, fk, c, tol) +@pytest.mark.parametrize("dtype", DTYPES) +@pytest.mark.parametrize("dim", list(set(len(shape) for shape in SHAPES))) +@pytest.mark.parametrize("n_source_pts", MS) +@pytest.mark.parametrize("n_target_pts", MS) +@pytest.mark.parametrize("output_arg", OUTPUT_ARGS) +def test_type3(to_gpu, to_cpu, dtype, dim, n_source_pts, n_target_pts, output_arg): + if dtype == np.float32 and dim >= 2 and min(n_source_pts, n_target_pts) > 4000: + pytest.xfail("Garbage result for larger numbers of pts in single precision type 3") + # Strangely, this does not reproduce if we isolate the single case. To + # trigger it, we must run many other tests preceding this test case. + # So it's related to some global state of the library. + + complex_dtype = utils._complex_dtype(dtype) + + source_pts, source_coefs, target_pts = utils.type3_problem(complex_dtype, + dim, n_source_pts, n_target_pts) + + plan = Plan(3, dim, dtype=complex_dtype) + + source_pts_gpu = to_gpu(source_pts) + target_pts_gpu = to_gpu(target_pts) + + source_coefs_gpu = to_gpu(source_coefs) + + plan.setpts(*source_pts_gpu, *((None,) * (3 - dim)), *target_pts_gpu) + + if not output_arg: + target_coefs_gpu = plan.execute(source_coefs_gpu) + else: + target_coefs_gpu = _compat.array_empty_like(source_coefs_gpu, + n_target_pts, dtype=complex_dtype) + plan.execute(source_coefs_gpu, out=target_coefs_gpu) + + target_coefs = to_cpu(target_coefs_gpu) + + utils.verify_type3(source_pts, source_coefs, target_pts, target_coefs, 1e-6) + + def test_opts(to_gpu, to_cpu, shape=(8, 8, 8), M=32, tol=1e-3): dtype = np.float32 diff --git a/python/cufinufft/tests/utils.py b/python/cufinufft/tests/utils.py index 9ea3281f3..8224fce64 100644 --- a/python/cufinufft/tests/utils.py +++ b/python/cufinufft/tests/utils.py @@ -65,6 +65,16 @@ def type2_problem(dtype, shape, M, n_trans=()): return k, fk +def type3_problem(dtype, dim, n_source_pts, n_target_pts, n_trans=()): + real_dtype = _real_dtype(dtype) + + source_pts = gen_nu_pts(n_source_pts, dim=dim).astype(real_dtype) + source_coefs = gen_nonuniform_data(n_source_pts, n_trans=n_trans).astype(dtype) + target_pts = gen_nu_pts(n_target_pts, dim=dim).astype(real_dtype) + + return source_pts, source_coefs, target_pts + + def make_grid(shape): dim = len(shape) shape = shape @@ -97,6 +107,17 @@ def direct_type2(fk, k, dim): return c +def direct_type3(source_pts, source_coefs, target_pts, ind): + target_pt = target_pts[:, ind[-1]] + target_pt = target_pt[:, np.newaxis] + + _source_coef = source_coefs[ind[:-1]] + + target_coef = np.sum(np.exp(1j * np.sum(target_pt * source_pts, axis=0)) * _source_coef) + + return target_coef + + def verify_type1(k, c, fk, tol): dim = fk.ndim - (c.ndim - 1) @@ -128,6 +149,25 @@ def verify_type2(k, fk, c, tol): assert type2_rel_err < 25 * tol +def verify_type3(source_pts, source_coef, target_pts, target_coef, tol): + dim = source_pts.shape[0] + + n_source_pts = source_pts.shape[-1] + n_target_pts = target_pts.shape[-1] + n_tr = source_coef.shape[:-1] + + assert target_coef.shape == n_tr + (n_target_pts,) + + ind = (int(0.1789 * n_target_pts),) + + target_est = target_coef[ind] + target_true = direct_type3(source_pts, source_coef, target_pts, ind) + + type3_rel_err = np.linalg.norm(target_est - target_true) / np.linalg.norm(target_true) + + assert type3_rel_err < 100 * tol + + def transfer_funcs(module_name): if module_name == "pycuda": import pycuda.autoinit # NOQA:401 diff --git a/src/cuda/3d/cufinufft3d.cu b/src/cuda/3d/cufinufft3d.cu index 9f376297c..805cdf4bb 100644 --- a/src/cuda/3d/cufinufft3d.cu +++ b/src/cuda/3d/cufinufft3d.cu @@ -144,7 +144,6 @@ int cufinufft3d3_exec(cuda_complex *d_c, cuda_complex *d_fk, cuda_complex *d_cstart; cuda_complex *d_fkstart; const auto stream = d_plan->stream; - printf("[cufinufft] d_plan->ntransf = %d\n", d_plan->ntransf); for (int i = 0; i * d_plan->batchsize < d_plan->ntransf; i++) { int blksize = min(d_plan->ntransf - i * d_plan->batchsize, d_plan->batchsize); d_cstart = d_c + i * d_plan->batchsize * d_plan->M;