diff --git a/examples/spreadinterponly1d.cpp b/examples/spreadinterponly1d.cpp new file mode 100644 index 000000000..1ec51d984 --- /dev/null +++ b/examples/spreadinterponly1d.cpp @@ -0,0 +1,83 @@ +// this is all you must include for the finufft lib... +#include + +// also used in this example... +#include +#include +#include +#include +#include +#include +using namespace std; +using namespace std::chrono; + +int main(int argc, char *argv[]) +/* Example of double-prec spread/interp only tasks, with basic math test (total mass, + ie testing the zero-frequency component). A better math test would be, + ironically, to use this spreader/interpolator as part of a NUFFT :) + Complex I/O arrays, but kernel is real. Barnett 1/8/25 + See: spreadtestnd for demos of non-FINUFFT interface to spread/interp module. + + Compile and run (static library case): + + g++ spreadinterponly1d.cpp -I../include ../lib-static/libfinufft.a -o + spreadinterponly1d -lfftw3 -lfftw3_omp && ./spreadinterponly1d +*/ +{ + int M = 1e7; // number of nonuniform points + int N = 1e7; // size of regular grid + finufft_opts opts; + finufft_default_opts(&opts); + opts.spreadinterponly = 1; // task: the following two control kernel used... + double tol = 1e-9; // tolerance for (real) kernel shape design only + opts.upsampfac = 2.0; // pretend upsampling factor (really no upsampling) + // opts.spread_kerevalmeth = 0; // would be needed for nonstd upsampfacs + + complex I = complex(0.0, 1.0); // the imaginary unit + vector x(M); // input + vector> c(M); // input + vector> F(N); // output (spread to this array) + + // first spread M=1 single unit-strength at the origin, to get its total mass... + x[0] = 0.0; + c[0] = 1.0; + int unused = 1; + int ier = finufft1d1(1, &x[0], &c[0], unused, tol, N, &F[0], &opts); + if (ier > 1) return ier; + complex kersum = 0.0; + for (auto Fk : F) kersum += Fk; // kernel mass + + // Now generate random nonuniform points (x) and complex strengths (c)... + for (int j = 0; j < M; ++j) { + x[j] = M_PI * (2 * ((double)rand() / RAND_MAX) - 1); // uniform random in [-pi,pi) + c[j] = + 2 * ((double)rand() / RAND_MAX) - 1 + I * (2 * ((double)rand() / RAND_MAX) - 1); + } + + opts.debug = 1; + auto t0 = steady_clock::now(); // now spread with all M pts... (dir=1) + ier = finufft1d1(M, &x[0], &c[0], unused, tol, N, &F[0], &opts); + double t = (steady_clock::now() - t0) / 1.0s; + if (ier > 1) return ier; + complex csum = 0.0; // tot input strength + for (auto cj : c) csum += cj; + complex mass = 0.0; // tot output mass + for (auto Fk : F) mass += Fk; + double relerr = abs(mass - kersum * csum) / abs(mass); + printf("1D spread-only, double-prec, %.3g s (%.3g NU pt/sec), ier=%d, mass err %.3g\n", + t, M / t, ier, relerr); + + for (auto &Fk : F) Fk = complex{1.0, 0.0}; // unit grid input + opts.debug = 0; + t0 = steady_clock::now(); // now interp to all M pts... (dir=2) + ier = finufft1d2(M, &x[0], &c[0], unused, tol, N, &F[0], &opts); + t = (steady_clock::now() - t0) / 1.0s; + if (ier > 1) return ier; + csum = 0.0; // tot output + for (auto cj : c) csum += cj; + double maxerr = 0.0; + for (auto cj : c) maxerr = max(maxerr, abs(cj - kersum)); + printf("1D interp-only, double-prec, %.3g s (%.3g NU pt/sec), ier=%d, max err %.3g\n", + t, M / t, ier, maxerr / abs(kersum)); + return 0; +} diff --git a/include/cufinufft/impl.h b/include/cufinufft/impl.h index 543634f15..20c39934a 100644 --- a/include/cufinufft/impl.h +++ b/include/cufinufft/impl.h @@ -157,7 +157,7 @@ int cufinufft_makeplan_impl(int type, int dim, int *nmodes, int iflag, int ntran } if (d_plan->opts.gpu_spreadinterponly) { // XNOR implementation below with boolean logic. - if ((d_plan->opts.upsampfac != 1) == (type != 3)) { + if ((d_plan->opts.upsampfac != 1.0) == (type != 3)) { ier = FINUFFT_ERR_SPREADONLY_UPSAMP_INVALID; goto finalize; } @@ -197,7 +197,6 @@ int cufinufft_makeplan_impl(int type, int dim, int *nmodes, int iflag, int ntran printf("[cufinufft] shared memory required for the spreader: %ld\n", mem_required); } - // dynamically request the maximum amount of shared memory available // for the spreader @@ -765,8 +764,8 @@ int cufinufft_setpts_impl(int M, T *d_kx, T *d_ky, T *d_kz, int N, T *d_s, T *d_ thrust::cuda::par.on(stream), phase_iterator, phase_iterator + N, d_plan->deconv, d_plan->deconv, [c1, c2, c3, d1, d2, d3, realsign] __host__ __device__( - const thrust::tuple tuple, cuda_complex deconv) - -> cuda_complex { + const thrust::tuple tuple, + cuda_complex deconv) -> cuda_complex { // d2 and d3 are 0 if dim < 2 and dim < 3 const auto phase = c1 * (thrust::get<0>(tuple) + d1) + c2 * (thrust::get<1>(tuple) + d2) + diff --git a/src/cuda/cufinufft.cu b/src/cuda/cufinufft.cu index fa4990285..85acb7aca 100644 --- a/src/cuda/cufinufft.cu +++ b/src/cuda/cufinufft.cu @@ -99,8 +99,6 @@ void cufinufft_default_opts(cufinufft_opts *opts) The resulting struct may then be passed (instead of NULL) to the last argument of cufinufft_plan(). - Options with prefix "gpu_" are used for gpu code. - Notes: 1) Values set in this function for different type and dimensions are preferable based on experiments. User can experiment with different settings by @@ -112,17 +110,15 @@ void cufinufft_default_opts(cufinufft_opts *opts) { // sphinx tag (don't remove): @gpu_defopts_start // data handling opts... - opts->modeord = 0; - opts->gpu_device_id = 0; - - // diagnostic opts... + opts->modeord = 0; + opts->gpu_device_id = 0; opts->gpu_spreadinterponly = 0; // algorithm performance opts... opts->gpu_method = 0; opts->gpu_sort = 1; opts->gpu_kerevalmeth = 1; - opts->upsampfac = 0; + opts->upsampfac = 0.0; opts->gpu_maxsubprobsize = 1024; opts->gpu_obinsizex = 0; opts->gpu_obinsizey = 0; diff --git a/src/finufft_core.cpp b/src/finufft_core.cpp index 0b076c3d7..8dff843b4 100644 --- a/src/finufft_core.cpp +++ b/src/finufft_core.cpp @@ -652,27 +652,42 @@ FINUFFT_PLAN_T::FINUFFT_PLAN_T(int type_, int dim_, const BIGINT *n_modes, i // Note: batchSize not used since might be only 1. spopts.spread_direction = type; + constexpr TF EPSILON = std::numeric_limits::epsilon(); - constexpr TF EPSILON = std::numeric_limits::epsilon(); - if (opts.showwarn) { // user warn round-off error (due to prob condition #)... - for (int idim = 0; idim < dim; ++idim) - if (EPSILON * mstu[idim] > 1.0) - fprintf(stderr, - "%s warning: rounding err (due to cond # of prob) eps_mach*N%d = %.3g " - "> 1 !\n", - __func__, idim, (double)(EPSILON * mstu[idim])); - } + if (opts.spreadinterponly) { // (unusual case of no NUFFT, just report) - // determine fine grid sizes, sanity check them... - for (int idim = 0; idim < dim; ++idim) { - int nfier = set_nf_type12(mstu[idim], opts, spopts, &nfdim[idim]); - if (nfier) throw nfier; // nf too big; we're done - } + // spreadinterp grid will simply be the user's "mode" grid... + for (int idim = 0; idim < dim; ++idim) nfdim[idim] = mstu[idim]; - if (!opts.spreadinterponly) { // ..... eval Fourier series, alloc workspace ..... + if (opts.debug) { // "long long" here is to avoid warnings with printf... + printf("[%s] %dd spreadinterponly(dir=%d): (ms,mt,mu)=(%lld,%lld,%lld)" + "\n ntrans=%d nthr=%d batchSize=%d kernel width ns=%d", + __func__, dim, type, (long long)mstu[0], (long long)mstu[1], + (long long)mstu[2], ntrans, nthr, batchSize, spopts.nspread); + if (batchSize == 1) // spread_thread has no effect in this case + printf("\n"); + else + printf(" spread_thread=%d\n", opts.spread_thread); + } - for (int idim = 0; idim < dim; ++idim) + } else { // ..... usual NUFFT: eval Fourier series, alloc workspace ..... + + if (opts.showwarn) { // user warn round-off error (due to prob condition #)... + for (int idim = 0; idim < dim; ++idim) + if (EPSILON * mstu[idim] > 1.0) + fprintf( + stderr, + "%s warning: rounding err (due to cond # of prob) eps_mach*N%d = %.3g " + "> 1 !\n", + __func__, idim, (double)(EPSILON * mstu[idim])); + } + + // determine fine grid sizes, sanity check, then alloc... + for (int idim = 0; idim < dim; ++idim) { + int nfier = set_nf_type12(mstu[idim], opts, spopts, &nfdim[idim]); + if (nfier) throw nfier; // nf too big; we're done phiHat[idim].resize(nfdim[idim] / 2 + 1); // alloc fseries + } if (opts.debug) { // "long long" here is to avoid warnings with printf... printf("[%s] %dd%d: (ms,mt,mu)=(%lld,%lld,%lld) " @@ -1018,7 +1033,8 @@ int FINUFFT_PLAN_T::execute(std::complex *cj, std::complex *fk) { spreadinterpSortedBatch(thisBatchSize, this, fwBatch_or_fkb, cjb); t_sprint += timer.elapsedsec(); } - // Release the fwBatch vector to prevent double freeing of memory (explain!) + // Release the fwBatch vector to prevent double freeing of memory (explain or + // delete!) } // ........end b loop if (opts.debug) { // report total times in their natural order... diff --git a/src/spreadinterp.cpp b/src/spreadinterp.cpp index be1abfeb0..038abd7db 100644 --- a/src/spreadinterp.cpp +++ b/src/spreadinterp.cpp @@ -2161,7 +2161,7 @@ FINUFFT_EXPORT int FINUFFT_CDECL setup_spreader(finufft_spread_opts &opts, T eps return FINUFFT_ERR_HORNER_WRONG_BETA; } if (upsampfac < 1.0) { // no digits would result - fprintf(stderr, "FINUFFT setup_spreader: error, upsampfac=%.3g is <=1.0\n", + fprintf(stderr, "FINUFFT setup_spreader: error, upsampfac=%.3g is < 1.0\n", upsampfac); return FINUFFT_ERR_UPSAMPFAC_TOO_SMALL; }