Skip to content

Commit

Permalink
Merge pull request #34 from mrphys/points-range
Browse files Browse the repository at this point in the history
Add new options to manage points range
  • Loading branch information
jmontalt authored Oct 12, 2022
2 parents 7400bb0 + 5814895 commit 14f6394
Show file tree
Hide file tree
Showing 18 changed files with 1,584 additions and 914 deletions.
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -110,10 +110,10 @@ all: lib wheel
lib: proto $(TARGET_LIB)

%.cu.o: %.cu.cc
$(NVCC) -ccbin $(CXX) -dc -x cu $(CUFLAGS) -t 0 -o $@ -c $<
$(NVCC) --compiler-bindir $(CXX) --device-c -x cu $(CUFLAGS) --threads 0 --output-file $@ --compile $<

$(TARGET_DLINK): $(CUOBJECTS)
$(NVCC) -ccbin $(CXX) -dlink $(CUFLAGS) -t 0 -o $@ $^
$(NVCC) --compiler-bindir $(CXX) --device-link $(CUFLAGS) --threads 0 --output-file $@ $^

$(TARGET_LIB): $(CXXSOURCES) $(PROTO_OBJECTS) $(CUOBJECTS) $(TARGET_DLINK)
$(CXX) -shared $(CXXFLAGS) -o $@ $^ $(LDFLAGS)
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ versions of TensorFlow and TensorFlow NUFFT according to the table below.

| TensorFlow NUFFT Version | TensorFlow Compatibility | Release Date |
| ------------------------ | ------------------------ | ------------ |
| v0.11.0 | v2.10.x | Oct 12, 2022 |
| v0.10.1 | v2.10.x | Sep 26, 2022 |
| v0.10.0 | v2.10.x | Sep 7, 2022 |
| v0.9.0 | v2.9.x | Sep 5, 2022 |
Expand Down
19 changes: 16 additions & 3 deletions RELEASE.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,19 @@
# Release 0.10.1
# Release 0.11.0

## Major Features and Improvements

- Added new option `points_range` to control the range supported by the
algorithm. This option provides a trade-off between flexibility and
performance.
- Added new option `debugging.check_points_range` to assert that the input
points lie within the supported range.

## Bug Fixes and Other Changes

- Fixed a bug where the `options` where not being passed to the NUFFT op when
computing the gradient with respect to the points.
- `nufft` type-1 will now raise an error when `source` and `points` have
incompatible samples dimensions. Previously the computation would have
proceeded, ignoring any additional samples in `source`.
- Improved error reporting for invalid `grid_shape` arguments. `nufft` will
now raise an informative error when `grid_shape` has an invalid length or
when the user fails to provide it for type-1 transforms. Previously, `nufft`
would have behaved erratically or crashed.
2 changes: 2 additions & 0 deletions docs/api_docs/tfft.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,11 @@ template: class.md
nosignatures:
---
DebuggingOptions
FftwOptions
FftwPlanningRigor
Options
PointsRange
```

## Functions
Expand Down
2 changes: 1 addition & 1 deletion tensorflow_nufft/__about__.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
__summary__ = "A fast, native non-uniform FFT op for TensorFlow."
__uri__ = "https://github.com/mrphys/tensorflow-nufft"

__version__ = "0.10.1"
__version__ = "0.11.0"

__author__ = "Javier Montalt Tordera"
__email__ = "[email protected]"
Expand Down
50 changes: 24 additions & 26 deletions tensorflow_nufft/cc/kernels/nufft_kernels.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ limitations under the License.
#include "tensorflow_nufft/cc/kernels/nufft_plan.h"
#include "tensorflow_nufft/cc/kernels/reverse_functor.h"
#include "tensorflow_nufft/cc/kernels/transpose_functor.h"
#include "tensorflow_nufft/proto/nufft_options.pb.h"


namespace tensorflow {
Expand Down Expand Up @@ -84,6 +83,15 @@ class NUFFTBaseOp : public OpKernel {
"grid_shape must be 1D, but got shape: ",
grid_shape_tensor.shape().DebugString()));

// Check that `grid_shape` has length equal to rank.
OP_REQUIRES(ctx, grid_shape_tensor.dim_size(0) == rank,
errors::InvalidArgument(
"grid_shape must have length ", rank,
" for a ", rank, "D transform ",
"(as inferred from points), but got length: ",
grid_shape_tensor.dim_size(0)));

// Check that `grid_shape` is of integer dtype.
if (grid_shape_tensor.dtype() == DT_INT32) {
OP_REQUIRES_OK(ctx, TensorShapeUtils::MakeShape(
grid_shape_tensor.vec<int32>(), &grid_shape));
Expand All @@ -96,6 +104,14 @@ class NUFFTBaseOp : public OpKernel {
LOG(FATAL) << "shape must have type int32 or int64";
}

// Check that `source` has the same number of points as `points`.
OP_REQUIRES(ctx, source.dim_size(source.dims() - 1) == num_points,
errors::InvalidArgument(
"source and points must have equal samples ",
"dimensions for type-1 transforms, but got ",
"source.shape[-1] = ",
source.dim_size(source.dims() - 1),
" and points.shape[-2] = ", num_points));
break;
}
case TransformType::TYPE_2: { // uniform to nonuniform
Expand Down Expand Up @@ -431,30 +447,12 @@ class NUFFTBaseOp : public OpKernel {

// NUFFT options.
InternalOptions options;
// Read in user options.
options.max_batch_size = this->options_.max_batch_size();
switch (this->options_.fftw().planning_rigor()) {
case FftwPlanningRigor::AUTO: {
options.fftw_flags = FFTW_MEASURE;
break;
}
case FftwPlanningRigor::ESTIMATE: {
options.fftw_flags = FFTW_ESTIMATE;
break;
}
case FftwPlanningRigor::MEASURE: {
options.fftw_flags = FFTW_MEASURE;
break;
}
case FftwPlanningRigor::PATIENT: {
options.fftw_flags = FFTW_PATIENT;
break;
}
case FftwPlanningRigor::EXHAUSTIVE: {
options.fftw_flags = FFTW_EXHAUSTIVE;
break;
}
}
options.mutable_debugging()->set_check_points_range(
this->options_.debugging().check_points_range());
options.mutable_fftw()->set_planning_rigor(
this->options_.fftw().planning_rigor());
options.set_max_batch_size(this->options_.max_batch_size());
options.set_points_range(this->options_.points_range());

if (op_type != OpType::NUFFT) {
options.spread_only = true;
Expand Down Expand Up @@ -540,7 +538,7 @@ class NUFFTBaseOp : public OpKernel {
break;
}
}
return Status::OK();
return OkStatus();
}

protected:
Expand Down
31 changes: 18 additions & 13 deletions tensorflow_nufft/cc/kernels/nufft_options.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@ limitations under the License.
#include "third_party/gpus/cuda/include/vector_types.h"
#endif // GOOGLE_CUDA

#include "tensorflow_nufft/proto/nufft_options.pb.h"


namespace tensorflow {
namespace nufft {

Expand Down Expand Up @@ -73,22 +76,25 @@ enum class SpreadMethod {
BLOCK_GATHER = 3
};

// Specifies the measurement unit for the non-uniform points.
enum class PointsUnit {
CYCLES = 0, // points in [-N/2, N/2] - unimplemented
CYCLES_PER_SAMPLE = 1, // points in [-0.5, 0.5] - unimplemented
RADIANS_PER_SAMPLE = 2 // points in [-pi, pi]
};

// InternalOptions for the NUFFT operations. This class is used for both the
// CPU and the GPU implementation, although some options are only used by one
// or the other.
// TODO(jmontalt): Consider splitting into two classes, one for CPU and one for
// GPU, derived from a common base.
// TODO(jmontalt): Consider replacing entirely by proto options.
struct InternalOptions {
class InternalOptions : public Options {
public:
// The mode order to use. See enum above. Applies only to type 1 and type 2
// transforms. Applies only to the CPU kernel.
ModeOrder mode_order = ModeOrder::CMCL;

// Whether to check if the NUFFT points are in the correct range
// [-3*pi, 3*pi]. This check has a small performance penalty. Applies only to
// the CPU kernel.
bool check_bounds = true;

// The verbosity level. 0 means silent, 1 means some timing/debug, and 2 means
// more debug. Applies to the CPU and the GPU kernels.
int verbosity = 0;
Expand All @@ -100,9 +106,6 @@ struct InternalOptions {
// appropriate number. Applies only to the CPU kernel.
int num_threads = 0;

// FFTW flags. Applies only to the CPU kernel.
int fftw_flags = FFTW_ESTIMATE;

// Whether to sort the non-uniform points. See enum above. Used by CPU and GPU
// kernels.
SortPoints sort_points = SortPoints::AUTO;
Expand All @@ -121,14 +124,13 @@ struct InternalOptions {
// the GPU kernels.
double upsampling_factor = 0.0;

// The kernel width.
int kernel_width = 0.0;

// The spreader threading strategy. See enum above. Only relevant if the
// number of threads is larger than 1. Applies only to the CPU kernel.
SpreadThreading spread_threading = SpreadThreading::AUTO;

// The maximum batch size for the vectorized NUFFT. A value of 0 means the
// batch size is automatically chosen. Applies to CPU and GPU kernels.
int max_batch_size = 0;

// The number of threads above which the spreader OMP critical goes atomic.
// Applies only to the CPU kernel.
int num_threads_for_atomic_spread = -1;
Expand All @@ -142,6 +144,9 @@ struct InternalOptions {
// The CUDA interpolation/spreading method.
SpreadMethod spread_method = SpreadMethod::AUTO;

// The input unit for the nonuniform points. See enum above.
PointsUnit points_unit = PointsUnit::RADIANS_PER_SAMPLE;

#if GOOGLE_CUDA

// Maximum subproblem size.
Expand Down
Loading

0 comments on commit 14f6394

Please sign in to comment.