Merge pull request #34 from mrphys/points-range

Add new options to manage points range
mrphys · Oct 12, 2022 · 14f6394 · 14f6394
2 parents 7400bb0 + 5814895
commit 14f6394
Show file tree

Hide file tree

Showing 18 changed files with 1,584 additions and 914 deletions.
diff --git a/Makefile b/Makefile
@@ -110,10 +110,10 @@ all: lib wheel
 lib: proto $(TARGET_LIB)
 
 %.cu.o: %.cu.cc
-	$(NVCC) -ccbin $(CXX) -dc -x cu $(CUFLAGS) -t 0 -o $@ -c $<
+	$(NVCC) --compiler-bindir $(CXX) --device-c -x cu $(CUFLAGS) --threads 0 --output-file $@ --compile $<
 
 $(TARGET_DLINK): $(CUOBJECTS)
-	$(NVCC) -ccbin $(CXX) -dlink $(CUFLAGS) -t 0 -o $@ $^
+	$(NVCC) --compiler-bindir $(CXX) --device-link $(CUFLAGS) --threads 0 --output-file $@ $^
 
 $(TARGET_LIB): $(CXXSOURCES) $(PROTO_OBJECTS) $(CUOBJECTS) $(TARGET_DLINK)
 	$(CXX) -shared $(CXXFLAGS) -o $@ $^ $(LDFLAGS)

diff --git a/README.md b/README.md
@@ -43,6 +43,7 @@ versions of TensorFlow and TensorFlow NUFFT according to the table below.
 
 | TensorFlow NUFFT Version | TensorFlow Compatibility | Release Date |
 | ------------------------ | ------------------------ | ------------ |
+| v0.11.0                  | v2.10.x                  | Oct 12, 2022 |
 | v0.10.1                  | v2.10.x                  | Sep 26, 2022 |
 | v0.10.0                  | v2.10.x                  | Sep 7, 2022  |
 | v0.9.0                   | v2.9.x                   | Sep 5, 2022  |

diff --git a/RELEASE.md b/RELEASE.md
@@ -1,6 +1,19 @@
-# Release 0.10.1
+# Release 0.11.0
+
+## Major Features and Improvements
+
+- Added new option `points_range` to control the range supported by the
+  algorithm. This option provides a trade-off between flexibility and
+  performance.
+- Added new option `debugging.check_points_range` to assert that the input
+  points lie within the supported range.
 
 ## Bug Fixes and Other Changes
 
-- Fixed a bug where the `options` where not being passed to the NUFFT op when
-  computing the gradient with respect to the points.
+- `nufft` type-1 will now raise an error when `source` and `points` have
+  incompatible samples dimensions. Previously the computation would have
+  proceeded, ignoring any additional samples in `source`.
+- Improved error reporting for invalid `grid_shape` arguments. `nufft` will
+  now raise an informative error when `grid_shape` has an invalid length or
+  when the user fails to provide it for type-1 transforms. Previously, `nufft`
+  would have behaved erratically or crashed.
diff --git a/docs/api_docs/tfft.md b/docs/api_docs/tfft.md
@@ -15,9 +15,11 @@ template: class.md
 nosignatures:
 ---
 
+DebuggingOptions
 FftwOptions
 FftwPlanningRigor
 Options
+PointsRange
 ```
 
 ## Functions

diff --git a/tensorflow_nufft/__about__.py b/tensorflow_nufft/__about__.py
@@ -28,7 +28,7 @@
 __summary__ = "A fast, native non-uniform FFT op for TensorFlow."
 __uri__ = "https://github.com/mrphys/tensorflow-nufft"
 
-__version__ = "0.10.1"
+__version__ = "0.11.0"
 
 __author__ = "Javier Montalt Tordera"
 __email__ = "[email protected]"

diff --git a/tensorflow_nufft/cc/kernels/nufft_kernels.cc b/tensorflow_nufft/cc/kernels/nufft_kernels.cc
@@ -25,7 +25,6 @@ limitations under the License.
 #include "tensorflow_nufft/cc/kernels/nufft_plan.h"
 #include "tensorflow_nufft/cc/kernels/reverse_functor.h"
 #include "tensorflow_nufft/cc/kernels/transpose_functor.h"
-#include "tensorflow_nufft/proto/nufft_options.pb.h"
 
 
 namespace tensorflow {
@@ -84,6 +83,15 @@ class NUFFTBaseOp : public OpKernel {
                         "grid_shape must be 1D, but got shape: ",
                         grid_shape_tensor.shape().DebugString()));
 
+        // Check that `grid_shape` has length equal to rank.
+        OP_REQUIRES(ctx, grid_shape_tensor.dim_size(0) == rank,
+                    errors::InvalidArgument(
+                        "grid_shape must have length ", rank,
+                        " for a ", rank, "D transform ",
+                        "(as inferred from points), but got length: ",
+                        grid_shape_tensor.dim_size(0)));
+
+        // Check that `grid_shape` is of integer dtype.
         if (grid_shape_tensor.dtype() == DT_INT32) {
           OP_REQUIRES_OK(ctx, TensorShapeUtils::MakeShape(
               grid_shape_tensor.vec<int32>(), &grid_shape));
@@ -96,6 +104,14 @@ class NUFFTBaseOp : public OpKernel {
           LOG(FATAL) << "shape must have type int32 or int64";
         }
 
+        // Check that `source` has the same number of points as `points`.
+        OP_REQUIRES(ctx, source.dim_size(source.dims() - 1) == num_points,
+                    errors::InvalidArgument(
+                        "source and points must have equal samples ",
+                        "dimensions for type-1 transforms, but got ",
+                        "source.shape[-1] = ",
+                        source.dim_size(source.dims() - 1),
+                        " and points.shape[-2] = ", num_points));
         break;
       }
       case TransformType::TYPE_2: {   // uniform to nonuniform
@@ -431,30 +447,12 @@ class NUFFTBaseOp : public OpKernel {
 
     // NUFFT options.
     InternalOptions options;
-    // Read in user options.
-    options.max_batch_size = this->options_.max_batch_size();
-    switch (this->options_.fftw().planning_rigor()) {
-      case FftwPlanningRigor::AUTO: {
-        options.fftw_flags = FFTW_MEASURE;
-        break;
-      }
-      case FftwPlanningRigor::ESTIMATE: {
-        options.fftw_flags = FFTW_ESTIMATE;
-        break;
-      }
-      case FftwPlanningRigor::MEASURE: {
-        options.fftw_flags = FFTW_MEASURE;
-        break;
-      }
-      case FftwPlanningRigor::PATIENT: {
-        options.fftw_flags = FFTW_PATIENT;
-        break;
-      }
-      case FftwPlanningRigor::EXHAUSTIVE: {
-        options.fftw_flags = FFTW_EXHAUSTIVE;
-        break;
-      }
-    }
+    options.mutable_debugging()->set_check_points_range(
+        this->options_.debugging().check_points_range());
+    options.mutable_fftw()->set_planning_rigor(
+        this->options_.fftw().planning_rigor());
+    options.set_max_batch_size(this->options_.max_batch_size());
+    options.set_points_range(this->options_.points_range());
 
     if (op_type != OpType::NUFFT) {
       options.spread_only = true;
@@ -540,7 +538,7 @@ class NUFFTBaseOp : public OpKernel {
           break;
       }
     }
-    return Status::OK();
+    return OkStatus();
   }
 
  protected:

diff --git a/tensorflow_nufft/cc/kernels/nufft_options.h b/tensorflow_nufft/cc/kernels/nufft_options.h
@@ -37,6 +37,9 @@ limitations under the License.
 #include "third_party/gpus/cuda/include/vector_types.h"
 #endif  // GOOGLE_CUDA
 
+#include "tensorflow_nufft/proto/nufft_options.pb.h"
+
+
 namespace tensorflow {
 namespace nufft {
 
@@ -73,22 +76,25 @@ enum class SpreadMethod {
   BLOCK_GATHER = 3
 };
 
+// Specifies the measurement unit for the non-uniform points.
+enum class PointsUnit {
+  CYCLES = 0,               // points in [-N/2, N/2] - unimplemented
+  CYCLES_PER_SAMPLE = 1,    // points in [-0.5, 0.5] - unimplemented
+  RADIANS_PER_SAMPLE = 2    // points in [-pi, pi]
+};
+
 // InternalOptions for the NUFFT operations. This class is used for both the
 // CPU and the GPU implementation, although some options are only used by one
 // or the other.
 // TODO(jmontalt): Consider splitting into two classes, one for CPU and one for
 // GPU, derived from a common base.
 // TODO(jmontalt): Consider replacing entirely by proto options.
-struct InternalOptions {
+class InternalOptions : public Options {
+ public:
   // The mode order to use. See enum above. Applies only to type 1 and type 2
   // transforms. Applies only to the CPU kernel.
   ModeOrder mode_order = ModeOrder::CMCL;
 
-  // Whether to check if the NUFFT points are in the correct range
-  // [-3*pi, 3*pi]. This check has a small performance penalty. Applies only to
-  // the CPU kernel.
-  bool check_bounds = true;
-
   // The verbosity level. 0 means silent, 1 means some timing/debug, and 2 means
   // more debug. Applies to the CPU and the GPU kernels.
   int verbosity = 0;
@@ -100,9 +106,6 @@ struct InternalOptions {
   // appropriate number. Applies only to the CPU kernel.
   int num_threads = 0;
 
-  // FFTW flags. Applies only to the CPU kernel.
-  int fftw_flags = FFTW_ESTIMATE;
-
   // Whether to sort the non-uniform points. See enum above. Used by CPU and GPU
   // kernels.
   SortPoints sort_points = SortPoints::AUTO;
@@ -121,14 +124,13 @@ struct InternalOptions {
   // the GPU kernels.
   double upsampling_factor = 0.0;
 
+  // The kernel width.
+  int kernel_width = 0.0;
+
   // The spreader threading strategy. See enum above. Only relevant if the
   // number of threads is larger than 1. Applies only to the CPU kernel.
   SpreadThreading spread_threading = SpreadThreading::AUTO;
 
-  // The maximum batch size for the vectorized NUFFT. A value of 0 means the
-  // batch size is automatically chosen. Applies to CPU and GPU kernels.
-  int max_batch_size = 0;
-
   // The number of threads above which the spreader OMP critical goes atomic.
   // Applies only to the CPU kernel.
   int num_threads_for_atomic_spread = -1;
@@ -142,6 +144,9 @@ struct InternalOptions {
   // The CUDA interpolation/spreading method.
   SpreadMethod spread_method = SpreadMethod::AUTO;
 
+  // The input unit for the nonuniform points. See enum above.
+  PointsUnit points_unit = PointsUnit::RADIANS_PER_SAMPLE;
+
   #if GOOGLE_CUDA
 
   // Maximum subproblem size.