diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml index 07707c4..c27e191 100644 --- a/.github/workflows/cmake.yml +++ b/.github/workflows/cmake.yml @@ -33,5 +33,5 @@ jobs: working-directory: ${{github.workspace}}/build # Execute tests defined by the CMake configuration. # See https://cmake.org/cmake/help/latest/manual/ctest.1.html for more detail - run: ctest --output-on-failure -R intp-* + run: ctest --output-on-failure -L intp-regular diff --git a/CMakeLists.txt b/CMakeLists.txt index aa45bf8..3b4c90b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,7 +1,7 @@ cmake_minimum_required(VERSION 3.12.0) project( BSplineInterpolation - VERSION 1.3.1 + VERSION 1.4.0 HOMEPAGE_URL "https://github.com/12ff54e/BSplineInterpolation.git" LANGUAGES CXX) include(CTest) diff --git a/README.md b/README.md index 614884c..88e0ff1 100644 --- a/README.md +++ b/README.md @@ -51,6 +51,7 @@ You can control the bahavior of this library by defining the following macros: - `INTP_ENABLE_ASSERTION`: Add assertion to ensure data to be interplolated is valid. - `INTP_PERIODIC_NO_DUMMY_POINT`: Whether to accept dummy point when interpolating periodic data. If this macro is defined, number of data point is one less than the specified dimension, since the last point is implicitly set as the same of the first one. - `INTP_STACK_ALLOCATOR`: Use stack allocator in some small dynamic memory allocation scenario. +- `INTP_CELL_LAYOUT`: Store weights in cell layout redundantly to speed up evaluation. By default all of the above macros is not defined. diff --git a/src/include/BSpline.hpp b/src/include/BSpline.hpp index a653cd5..b871df8 100644 --- a/src/include/BSpline.hpp +++ b/src/include/BSpline.hpp @@ -9,6 +9,10 @@ #include // is_same, is_arithmatic #include +#ifdef INTP_MULTITHREAD +#include "DedicatedThreadPool.hpp" +#endif + #ifdef INTP_DEBUG #include #endif @@ -33,12 +37,21 @@ class BSpline { using knot_type = U; using KnotContainer = std::vector; + using ControlPointContainer = Mesh>>; +#ifdef INTP_CELL_LAYOUT + using ControlPointCellContainer = + Mesh>>; +#endif using BaseSpline = std::vector; using diff_type = typename KnotContainer::iterator::difference_type; @@ -156,45 +169,6 @@ class BSpline { return {get_knot_iter(indices, coords.first, coords.second)...}; } - private: - size_type order_; - - DimArray periodicity_; - - DimArray knots_; - ControlPointContainer control_points_; - - DimArray> range_; - - size_type buf_size_; - - // maximum stack buffer size - // This buffer is for storing weights when calculating spline derivative - // value. - constexpr static size_type MAX_BUF_SIZE_ = 1000; - - // auxiliary methods - - /** - * @brief Calculate base spline value of each dimension - * - * @tparam Coords knot_type ... - * @tparam indices 0, 1, ... - * @param knot_iters an array of knot iters - * @param spline_order an array of spline order - * @param coords a bunch of coordinates - */ - template - inline DimArray calc_base_spline_vals( - util::index_sequence, - const DimArray& knot_iters, - const DimArray& spline_order, - Coords... coords) const { - return {base_spline_value(indices, knot_iters[indices], coords, - spline_order[indices])...}; - } - - public: /** * @brief Construct a new BSpline object, with periodicity of each dimension * specified. @@ -217,19 +191,23 @@ class BSpline { template BSpline(size_type spline_order, DimArray periodicity, - ControlPointContainer ctrl_points, + ControlPointContainer ctrl_pts, std::pair... knot_iter_pairs) : order_(spline_order), periodicity_(periodicity), knots_{ KnotContainer(knot_iter_pairs.first, knot_iter_pairs.second)...}, - control_points_(std::move(ctrl_points)), +#ifdef INTP_CELL_LAYOUT + control_points_(generate_cell_layout(ctrl_pts)), +#else + control_points_(std::move(ctrl_pts)), +#endif range_{std::make_pair( (knot_iter_pairs.first)[order_], (knot_iter_pairs.second)[-static_cast(order_) - 1])...}, buf_size_(util::pow(order_ + 1, dim)) { for (size_type d = 0; d < dim; ++d) { - INTP_ASSERT(knots_[d].size() - control_points_.dim_size(d) == + INTP_ASSERT(knots_[d].size() - ctrl_pts.dim_size(d) == (periodicity_[d] ? 2 * order_ + 1 : order_ + 1), std::string("Inconsistency between knot number and " "control point number at dimension ") + @@ -258,14 +236,20 @@ class BSpline { knots_[dim_ind][knots_[dim_ind].size() - order_ - (2 - order_ % 2)]; } +#ifdef INTP_CELL_LAYOUT + void load_ctrlPts(const ControlPointContainer& control_points) { + control_points_ = generate_cell_layout(control_points); + } +#else template typename std::enable_if< std::is_same::type, ControlPointContainer>::value, void>::type - load_ctrlPts(C&& _control_points) { - control_points_ = std::forward(_control_points); + load_ctrlPts(C&& control_points) { + control_points_ = std::forward(control_points); } +#endif /** * @brief Get spline value at given pairs of coordinate and position hint @@ -298,12 +282,26 @@ class BSpline { // combine control points and basic spline values to get spline value val_type v{}; +#ifdef INTP_CELL_LAYOUT + std::array ind_arr{}; + for (size_type d = 0; d < dim; ++d) { + ind_arr[d] = static_cast( + distance(knots_begin(d), knot_iters[d])) - + order_; + } + auto cell_iter = control_points_.begin(dim, ind_arr); for (size_type i = 0; i < buf_size_; ++i) { - DimArray ind_arr; + knot_type coef = 1; for (size_type d = 0, combined_ind = i; d < dim; ++d) { - ind_arr[d] = combined_ind % (order_ + 1); + coef *= base_spline_values_1d[d][combined_ind % (order_ + 1)]; combined_ind /= (order_ + 1); } + v += coef * (*cell_iter++); + } +#else + MeshDimension local_mesh_dim(order_ + 1); + for (size_type i = 0; i < buf_size_; ++i) { + DimArray ind_arr = local_mesh_dim.dimwise_indices(i); knot_type coef = 1; for (size_type d = 0; d < dim; ++d) { @@ -329,6 +327,7 @@ class BSpline { v += coef * control_points_(ind_arr); } +#endif return v; } @@ -402,6 +401,15 @@ class BSpline { #endif // get local control points and basic spline values + +#ifdef INTP_CELL_LAYOUT + std::array ind_arr{}; + for (size_type d = 0; d < dim; ++d) { + ind_arr[d] = static_cast( + distance(knots_begin(d), knot_iters[d])) - + order_; + } + auto cell_iter = control_points_.begin(dim, ind_arr); for (size_type i = 0; i < buf_size_; ++i) { DimArray local_ind_arr{}; for (size_type d = 0, combined_ind = i; d < dim; ++d) { @@ -409,7 +417,23 @@ class BSpline { combined_ind /= (order_ + 1); } - val_type coef = 1; + knot_type coef = 1; + for (size_type d = 0; d < dim; ++d) { + coef *= base_spline_values_1d[d][local_ind_arr[d]]; + } + + local_spline_val(local_ind_arr) = coef; + local_control_points(local_ind_arr) = *cell_iter++; + } +#else + for (size_type i = 0; i < buf_size_; ++i) { + DimArray local_ind_arr{}; + for (size_type d = 0, combined_ind = i; d < dim; ++d) { + local_ind_arr[d] = combined_ind % (order_ + 1); + combined_ind /= (order_ + 1); + } + + knot_type coef = 1; DimArray ind_arr{}; for (size_type d = 0; d < dim; ++d) { coef *= base_spline_values_1d[d][local_ind_arr[d]]; @@ -422,7 +446,8 @@ class BSpline { knots_begin(d), knot_iters[d])) - order_); - // check periodicity, put out-of-right-boundary index to left + // check periodicity, put out-of-right-boundary index to + // left if (periodicity_[d]) { ind_arr[d] %= control_points_.dim_size(d); } @@ -431,6 +456,7 @@ class BSpline { local_spline_val(local_ind_arr) = coef; local_control_points(local_ind_arr) = control_points_(ind_arr); } +#endif for (size_type d = 0; d < dim; ++d) { if (spline_order[d] == order_) { continue; } @@ -571,6 +597,133 @@ class BSpline { std::cout << '\n'; } #endif + + private: + size_type order_; + + DimArray periodicity_; + + DimArray knots_; +#ifdef INTP_CELL_LAYOUT + ControlPointCellContainer control_points_; +#else + ControlPointContainer control_points_; +#endif + + DimArray> range_; + + size_type buf_size_; + + // maximum stack buffer size + // This buffer is for storing weights when calculating spline derivative + // value. + constexpr static size_type MAX_BUF_SIZE_ = 1000; + + // auxiliary methods + + /** + * @brief Calculate base spline value of each dimension + * + * @tparam Coords knot_type ... + * @tparam indices 0, 1, ... + * @param knot_iters an array of knot iters + * @param spline_order an array of spline order + * @param coords a bunch of coordinates + */ + template + inline DimArray calc_base_spline_vals( + util::index_sequence, + const DimArray& knot_iters, + const DimArray& spline_order, + Coords... coords) const { + return {base_spline_value(indices, knot_iters[indices], coords, + spline_order[indices])...}; + } + +#ifdef INTP_CELL_LAYOUT + ControlPointCellContainer generate_cell_layout( + const ControlPointContainer& ctrl_pts) const { + MeshDimension cell_container_dim( + util::pow(order_ + 1, dim - 1)); + for (size_type d = 0; d < dim; ++d) { + cell_container_dim.dim_size(d) = ctrl_pts.dim_size(d) - + (d == dim - 1 ? 0 : order_) + + (periodicity(d) ? order_ : 0); + } + + // Size of the last two dimension of control point cell container. The + // (dim-1)th dimention of cell container has the same length (order + // points more in periodic case) as the last dimension (which is also + // the (dim-1)th dimension) of the origin container, while other + // dimensions are #order shorter, except the last dimension with length + // (order+1)^(dim-1). + const auto line_size = cell_container_dim.dim_size(dim - 1) * + cell_container_dim.dim_size(dim); + + ControlPointCellContainer control_point_cell(cell_container_dim); + // size of hyperplane orthogonal to last 2 dimension + const size_type hyper_surface_size = + control_point_cell.size() / line_size; + + auto fill_cell = [&](size_type begin, size_type end) { + // iterate over hyperplane + for (size_type h_ind = begin; h_ind < end; ++h_ind) { + const auto ind_arr_on_hyper_surface = + control_point_cell.dimension().dimwise_indices(h_ind * + line_size); + const auto line_begin = + control_point_cell.begin(dim - 1, ind_arr_on_hyper_surface); + const auto line_end = + control_point_cell.end(dim - 1, ind_arr_on_hyper_surface); + // iterate along the (dim-1)th dimension + for (auto iter = line_begin; iter != line_end; ++iter) { + auto cell_ind_arr = control_point_cell.iter_indices(iter); + MeshDimension cell_dim(order_ + 1); + // iterate the (dim)th dimension + for (size_type i = 0; i < cell_dim.size(); ++i) { + auto local_shift_ind = cell_dim.dimwise_indices(i); + cell_ind_arr[dim] = i; + auto cp_ind_arr = + typename ControlPointContainer::index_type{}; + for (size_type d = 0; d < dim; ++d) { + cp_ind_arr[d] = + (cell_ind_arr[d] + + (d == dim - 1 + ? 0 + : local_shift_ind[dim - 2 - d])) % + ctrl_pts.dim_size(d); + } + control_point_cell(cell_ind_arr) = ctrl_pts(cp_ind_arr); + } + } + } + }; +#ifdef INTP_MULTITHREAD + const size_type block_num = static_cast( + std::sqrt(static_cast(hyper_surface_size))); + const size_type task_per_block = block_num == 0 + ? hyper_surface_size + : hyper_surface_size / block_num; + + auto& thread_pool = DedicatedThreadPool::get_instance(8); + std::vector> res; + + for (size_type i = 0; i < block_num; ++i) { + res.push_back(thread_pool.queue_task([=]() { + fill_cell(i * task_per_block, (i + 1) * task_per_block); + })); + } + // main thread deals with the remaining part in case hyper_surface_size + // not divisible by thread_num + fill_cell(block_num * task_per_block, hyper_surface_size); + // wait for all tasks are complete + for (auto&& f : res) { f.get(); } +#else + fill_cell(0, hyper_surface_size); +#endif // INTP_MULTITHREAD + return control_point_cell; + } +#endif // INTP_CELL_LAYOUT }; } // namespace intp diff --git a/src/include/Interpolation.hpp b/src/include/Interpolation.hpp index 41a9499..b3a4e3b 100644 --- a/src/include/Interpolation.hpp +++ b/src/include/Interpolation.hpp @@ -5,8 +5,10 @@ #include #ifdef INTP_TRACE +#ifndef INTP_DEBUG #define INTP_DEBUG #endif +#endif #include "InterpolationTemplate.hpp" @@ -23,227 +25,11 @@ class InterpolationFunction { // TODO: Add integration constexpr static size_type dim = D; - private: - size_type order_; - spline_type spline_; - template using DimArray = std::array; - DimArray dx_; - DimArray uniform_; - friend class InterpolationFunctionTemplate; - // auxiliary methods - - template - inline val_type call_op_helper(util::index_sequence, - DimArray c) const { - return spline_(std::make_pair( - c[di], - uniform_[di] - ? std::min( - spline_.knots_num(di) - order_ - 2, - static_cast(std::ceil(std::max( - coord_type{0.}, - (c[di] - range(di).first) / dx_[di] - - (periodicity(di) - ? coord_type{1.} - : coord_type{.5} * static_cast( - order_ + 1))))) + - order_) - : order_)...); - } - - template - inline val_type derivative_helper(util::index_sequence, - DimArray c, - DimArray d) const { - return spline_.derivative_at(std::make_tuple( - static_cast(c[di]), static_cast(d[di]), - uniform_[di] - ? std::min(spline_.knots_num(di) - order_ - 2, - static_cast(std::ceil(std::max( - 0., (c[di] - range(di).first) / dx_[di] - - (periodicity(di) - ? 1. - : .5 * static_cast( - order_ + 1))))) + - order_) - : order_)...); - } - - // overload for uniform knots - template - typename std::enable_if::value>::type - create_knot_vector_(size_type dim_ind, - const MeshDimension& mesh_dimension, - DimArray&, - std::pair x_range) { - uniform_[dim_ind] = true; - const auto periodic = periodicity(dim_ind); - const size_type n = mesh_dimension.dim_size(dim_ind) -#ifdef INTP_PERIODIC_NO_DUMMY_POINT - + (periodic ? 1 : 0) -#endif - ; - dx_[dim_ind] = - (x_range.second - x_range.first) / static_cast(n - 1); - - const size_t extra = - periodic ? 2 * order_ + (1 - order_ % 2) : order_ + 1; - - std::vector xs(n + extra, x_range.first); - - if (periodic) { - for (size_type i = 0; i < xs.size(); ++i) { - xs[i] = x_range.first + - (static_cast(i) - - coord_type{.5} * static_cast(extra)) * - dx_[dim_ind]; - } - } else { - for (size_type i = order_ + 1; i < xs.size() - order_ - 1; ++i) { - xs[i] = x_range.first + - (static_cast(i) - - coord_type{.5} * static_cast(extra)) * - dx_[dim_ind]; - } - for (size_type i = xs.size() - order_ - 1; i < xs.size(); ++i) { - xs[i] = x_range.second; - } - } - - spline_.load_knots(dim_ind, std::move(xs)); - } - - // overload for nonuniform knots, given by iterator pair - template - typename std::enable_if::iterator_category, - std::input_iterator_tag>::value>::type - create_knot_vector_( - size_type dim_ind, - const MeshDimension& mesh_dimension, - DimArray& input_coords, - std::pair x_range) { - uniform_[dim_ind] = false; - const auto periodic = periodicity(dim_ind); - - const size_type n{static_cast( - std::distance(x_range.first, x_range.second))}; - INTP_ASSERT(n == mesh_dimension.dim_size(dim_ind) -#ifdef INTP_PERIODIC_NO_DUMMY_POINT - + (periodic ? 1 : 0) -#endif - , - std::string("Inconsistency between knot number and " - "interpolated value number at dimension ") + - std::to_string(dim_ind)); -#ifndef INTP_ENABLE_ASSERTION - // suppress unused parameter warning - (void)mesh_dimension; -#endif - typename spline_type::KnotContainer xs( - periodic ? n + 2 * order_ + (1 - order_ % 2) : n + order_ + 1); - - auto& input_coord = input_coords[dim_ind]; - input_coord.reserve(n); - // The x_range may be given by input iterators, which can not be - // multi-passed. - if (periodic) { - // In periodic case, the knots are data points, shifted by half of - // local grid size if spline order is odd. - - auto iter = x_range.first; - input_coord.push_back(*iter); - for (size_type i = order_ + 1; i < order_ + n; ++i) { - coord_type present = *(++iter); - xs[i] = order_ % 2 == 0 ? .5 * (input_coord.back() + present) - : present; - input_coord.push_back(present); - } - coord_type period = input_coord.back() - input_coord.front(); - for (size_type i = 0; i < order_ + 1; ++i) { - xs[i] = xs[n + i - 1] - period; - xs[xs.size() - i - 1] = xs[xs.size() - i - n] + period; - } - } else { - // In aperiodic case, the internal knots are moving average of data - // points with windows size equal to spline order. - - auto it = x_range.first; - auto l_knot = *it; - // fill leftmost *order+1* identical knots - for (size_type i = 0; i < order_ + 1; ++i) { xs[i] = l_knot; } - // first knot is same as first input coordinate - input_coord.emplace_back(l_knot); - // Every knot in middle is average of *order* input - // coordinates. This var is to track the sum of a moving window with - // width *order*. - coord_type window_sum{}; - for (size_type i = 1; i < order_; ++i) { - input_coord.emplace_back(*(++it)); - window_sum += input_coord[i]; - } - for (size_type i = order_ + 1; i < n; ++i) { - input_coord.emplace_back(*(++it)); - window_sum += input_coord[i - 1]; - xs[i] = window_sum / static_cast(order_); - window_sum -= input_coord[i - order_]; - } - auto r_knot = *(++it); - // fill rightmost *order+1* identical knots - for (size_type i = n; i < n + order_ + 1; ++i) { xs[i] = r_knot; } - // last knot is same as last input coordinate - input_coord.emplace_back(r_knot); - } - // check whether input coordinates is monotonic - for (std::size_t i = 0; i < input_coord.size() - 1; ++i) { - INTP_ASSERT(input_coord[i + 1] > input_coord[i], - std::string("Given coordinate is not monotonically " - "increasing at dimension ") + - std::to_string(dim_ind)); - } - -#ifdef INTP_TRACE - std::cout << "[TRACE] Nonuniform knots along dimension" << dim_ind - << ":\n"; - for (auto& c : xs) { std::cout << "[TRACE] " << c << '\n'; } - std::cout << std::endl; -#endif - - spline_.load_knots(dim_ind, std::move(xs)); - } - - template - void create_knots_( - util::index_sequence, - MeshDimension mesh_dimension, - DimArray& input_coords, - std::pair... x_ranges) { -#if __cplusplus >= 201703L - (create_knot_vector_(di, mesh_dimension, input_coords, x_ranges), ...); -#else - // polyfill of C++17 fold expression over comma - static_cast(std::array{ - (create_knot_vector_(di, mesh_dimension, input_coords, x_ranges), - nullptr)...}); -#endif - } - - inline void boundary_check_(const DimArray& coord) const { - for (size_type d = 0; d < dim; ++d) { - if (!periodicity(d) && - (coord[d] < range(d).first || coord[d] > range(d).second)) { - throw std::domain_error( - "Given coordinate out of interpolation function range!"); - } - } - } - - public: /** * @brief Construct a new 1D Interpolation Function object, mimicking * Mathematica's `Interpolation` function, with option `Method->"Spline"`. @@ -465,9 +251,7 @@ class InterpolationFunction { // TODO: Add integration return spline_.periodicity(dim_ind); } - inline bool uniform(size_type dim_ind) const { - return uniform_[dim_ind]; - } + inline bool uniform(size_type dim_ind) const { return uniform_[dim_ind]; } const std::pair& @@ -480,12 +264,226 @@ class InterpolationFunction { // TODO: Add integration * * @return spline_type& */ - const spline_type& spline() const { - return spline_; + const spline_type& spline() const { return spline_; } + + size_type order() const { return order_; } + + private: + size_type order_; + spline_type spline_; + + DimArray dx_; + DimArray uniform_; + + // auxiliary methods + + template + inline val_type call_op_helper(util::index_sequence, + DimArray c) const { + return spline_(std::make_pair( + c[di], + uniform_[di] + ? std::min( + spline_.knots_num(di) - order_ - 2, + static_cast(std::ceil(std::max( + coord_type{0.}, + (c[di] - range(di).first) / dx_[di] - + (periodicity(di) + ? coord_type{1.} + : coord_type{.5} * static_cast( + order_ + 1))))) + + order_) + : order_)...); + } + + template + inline val_type derivative_helper(util::index_sequence, + DimArray c, + DimArray d) const { + return spline_.derivative_at(std::make_tuple( + static_cast(c[di]), static_cast(d[di]), + uniform_[di] + ? std::min(spline_.knots_num(di) - order_ - 2, + static_cast(std::ceil(std::max( + 0., (c[di] - range(di).first) / dx_[di] - + (periodicity(di) + ? 1. + : .5 * static_cast( + order_ + 1))))) + + order_) + : order_)...); + } + + // overload for uniform knots + template + typename std::enable_if::value>::type + create_knot_vector_(size_type dim_ind, + const MeshDimension& mesh_dimension, + DimArray&, + std::pair x_range) { + uniform_[dim_ind] = true; + const auto periodic = periodicity(dim_ind); + const size_type n = mesh_dimension.dim_size(dim_ind) +#ifdef INTP_PERIODIC_NO_DUMMY_POINT + + (periodic ? 1 : 0) +#endif + ; + dx_[dim_ind] = + (x_range.second - x_range.first) / static_cast(n - 1); + + const size_t extra = + periodic ? 2 * order_ + (1 - order_ % 2) : order_ + 1; + + std::vector xs(n + extra, x_range.first); + + if (periodic) { + for (size_type i = 0; i < xs.size(); ++i) { + xs[i] = x_range.first + + (static_cast(i) - + coord_type{.5} * static_cast(extra)) * + dx_[dim_ind]; + } + } else { + for (size_type i = order_ + 1; i < xs.size() - order_ - 1; ++i) { + xs[i] = x_range.first + + (static_cast(i) - + coord_type{.5} * static_cast(extra)) * + dx_[dim_ind]; + } + for (size_type i = xs.size() - order_ - 1; i < xs.size(); ++i) { + xs[i] = x_range.second; + } + } + + spline_.load_knots(dim_ind, std::move(xs)); + } + + // overload for nonuniform knots, given by iterator pair + template + typename std::enable_if::iterator_category, + std::input_iterator_tag>::value>::type + create_knot_vector_( + size_type dim_ind, + const MeshDimension& mesh_dimension, + DimArray& input_coords, + std::pair x_range) { + uniform_[dim_ind] = false; + const auto periodic = periodicity(dim_ind); + + const size_type n{static_cast( + std::distance(x_range.first, x_range.second))}; +#ifdef INTP_PERIODIC_NO_DUMMY_POINT + INTP_ASSERT(n == mesh_dimension.dim_size(dim_ind) + (periodic ? 1 : 0), + std::string("Inconsistency between knot number and " + "interpolated value number at dimension ") + + std::to_string(dim_ind)); +#else + INTP_ASSERT(n == mesh_dimension.dim_size(dim_ind), + std::string("Inconsistency between knot number and " + "interpolated value number at dimension ") + + std::to_string(dim_ind)); +#endif +#ifndef INTP_ENABLE_ASSERTION + // suppress unused parameter warning + (void)mesh_dimension; +#endif + typename spline_type::KnotContainer xs( + periodic ? n + 2 * order_ + (1 - order_ % 2) : n + order_ + 1); + + auto& input_coord = input_coords[dim_ind]; + input_coord.reserve(n); + // The x_range may be given by input iterators, which can not be + // multi-passed. + if (periodic) { + // In periodic case, the knots are data points, shifted by half of + // local grid size if spline order is odd. + + auto iter = x_range.first; + input_coord.push_back(*iter); + for (size_type i = order_ + 1; i < order_ + n; ++i) { + coord_type present = *(++iter); + xs[i] = order_ % 2 == 0 ? .5 * (input_coord.back() + present) + : present; + input_coord.push_back(present); + } + coord_type period = input_coord.back() - input_coord.front(); + for (size_type i = 0; i < order_ + 1; ++i) { + xs[i] = xs[n + i - 1] - period; + xs[xs.size() - i - 1] = xs[xs.size() - i - n] + period; + } + } else { + // In aperiodic case, the internal knots are moving average of data + // points with windows size equal to spline order. + + auto it = x_range.first; + auto l_knot = *it; + // fill leftmost *order+1* identical knots + for (size_type i = 0; i < order_ + 1; ++i) { xs[i] = l_knot; } + // first knot is same as first input coordinate + input_coord.emplace_back(l_knot); + // Every knot in middle is average of *order* input + // coordinates. This var is to track the sum of a moving window with + // width *order*. + coord_type window_sum{}; + for (size_type i = 1; i < order_; ++i) { + input_coord.emplace_back(*(++it)); + window_sum += input_coord[i]; + } + for (size_type i = order_ + 1; i < n; ++i) { + input_coord.emplace_back(*(++it)); + window_sum += input_coord[i - 1]; + xs[i] = window_sum / static_cast(order_); + window_sum -= input_coord[i - order_]; + } + auto r_knot = *(++it); + // fill rightmost *order+1* identical knots + for (size_type i = n; i < n + order_ + 1; ++i) { xs[i] = r_knot; } + // last knot is same as last input coordinate + input_coord.emplace_back(r_knot); + } + // check whether input coordinates is monotonic + for (std::size_t i = 0; i < input_coord.size() - 1; ++i) { + INTP_ASSERT(input_coord[i + 1] > input_coord[i], + std::string("Given coordinate is not monotonically " + "increasing at dimension ") + + std::to_string(dim_ind)); + } + +#ifdef INTP_TRACE + std::cout << "[TRACE] Nonuniform knots along dimension" << dim_ind + << ":\n"; + for (auto& c : xs) { std::cout << "[TRACE] " << c << '\n'; } + std::cout << std::endl; +#endif + + spline_.load_knots(dim_ind, std::move(xs)); + } + + template + void create_knots_( + util::index_sequence, + MeshDimension mesh_dimension, + DimArray& input_coords, + std::pair... x_ranges) { +#if __cplusplus >= 201703L + (create_knot_vector_(di, mesh_dimension, input_coords, x_ranges), ...); +#else + // polyfill of C++17 fold expression over comma + static_cast(std::array{ + (create_knot_vector_(di, mesh_dimension, input_coords, x_ranges), + nullptr)...}); +#endif } - size_type order() const { - return order_; + inline void boundary_check_(const DimArray& coord) const { + for (size_type d = 0; d < dim; ++d) { + if (!periodicity(d) && + (coord[d] < range(d).first || coord[d] > range(d).second)) { + throw std::domain_error( + "Given coordinate out of interpolation function range!"); + } + } } }; diff --git a/src/include/Mesh.hpp b/src/include/Mesh.hpp index c331a8f..7870bc5 100644 --- a/src/include/Mesh.hpp +++ b/src/include/Mesh.hpp @@ -38,6 +38,7 @@ class MeshDimension { } size_type dim_size(size_type dim_ind) const { return dim_size_[dim_ind]; } + size_type& dim_size(size_type dim_ind) { return dim_size_[dim_ind]; } size_type dim_acc_size(size_type dim_ind) const { size_type das = 1; @@ -120,6 +121,7 @@ class Mesh { using value_type = U; using difference_type = std::ptrdiff_t; using pointer = U*; + using const_pointer = const U*; using reference = U&; using iterator_category = std::random_access_iterator_tag; @@ -132,13 +134,9 @@ class Mesh { : ptr_(ptr), step_length_(step_length) {} // allow iterator to const_iterator conversion - template - skip_iterator( - skip_iterator::type>:: - value, - V>::type> other) - : ptr_(other.ptr_), step_length_(other.step_length_) {} + operator skip_iterator() { return {ptr_, step_length_}; } + // cast to (const) underlying pointer type + explicit operator const_pointer() const { return ptr_; } // forward iterator requirement @@ -301,6 +299,14 @@ class Mesh { return storage_[dimension_.indexing(indices...)]; } + val_type& operator()(index_type indices) { + return storage_[dimension_.indexing(indices)]; + } + + const val_type& operator()(index_type indices) const { + return storage_[dimension_.indexing(indices)]; + } + const val_type* data() const { return storage_.data(); } // iterator @@ -352,6 +358,13 @@ class Mesh { return dimension_.dimwise_indices( static_cast(std::distance(begin(), iter))); } + + index_type iter_indices(skip_iterator skip_iter) const { + return dimension_.dimwise_indices(static_cast( + static_cast::pointer>( + skip_iter) - + data())); + } }; } // namespace intp diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 9c52bdd..928664a 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -24,30 +24,51 @@ else() endif() endif() -add_compile_definitions($<$:INTP_TRACE> - $<$,$>:INTP_DEBUG> - $<$,$>:INTP_MULTITHREAD>) +add_compile_definitions($<$,$>:INTP_DEBUG> + $<$:INTP_MULTITHREAD>) -add_compile_definitions(INTP_PERIODIC_NO_DUMMY_POINT) +add_compile_definitions(INTP_PERIODIC_NO_DUMMY_POINT + INTP_CELL_LAYOUT) + +#add_compile_definitions(INTP_TRACE) # Specify tests -list(APPEND tests "util-test" "mesh-test" "band-matrix-and-solver-test" "bspline-test" "interpolation-test" "interpolation-speed-test" "interpolation-template-test") +list(APPEND regular_tests "util-test" "mesh-test" "band-matrix-and-solver-test" "bspline-test" "interpolation-test") +list(APPEND speed_tests "interpolation-speed-test" "interpolation-template-test") -list(LENGTH tests test_num) +list(LENGTH regular_tests test_num) message(STATUS) message(STATUS ${test_num} " tests found: ") enable_testing() -foreach(test ${tests}) +foreach(test ${regular_tests}) string(JOIN "" test_src_file src/ ${test} .cpp) add_executable(${test} ${test_src_file}) - target_compile_features(${test} PRIVATE cxx_std_17) target_include_directories( ${test} PRIVATE $) string(JOIN "-" test_name intp ${test}) add_test(${test_name} ${test}) + set_tests_properties(${test_name} PROPERTIES LABELS intp-regular) message(STATUS ${test} " from source file " ${test_src_file}) -endforeach(test ${tests}) +endforeach(test ${regular_tests}) + +if(CMAKE_BUILD_TYPE STREQUAL Release) + message(STATUS) + list(LENGTH speed_tests test_num) + message(STATUS ${test_num} " speed tests found: ") + foreach(test ${speed_tests}) + string(JOIN "" test_src_file src/ ${test} .cpp) + add_executable(${test} ${test_src_file}) + target_include_directories( + ${test} PRIVATE $) + + string(JOIN "-" test_name intp ${test}) + add_test(${test_name} ${test}) + set_tests_properties(${test_name} PROPERTIES LABELS intp-speed) + message(STATUS ${test} " from source file " ${test_src_file}) + endforeach(test ${speed_tests}) +endif() + # target_compile_options(interpolation-speed-test PRIVATE $<$:-march=native>) diff --git a/test/src/interpolation-speed-test.cpp b/test/src/interpolation-speed-test.cpp index 81e3150..98ca7db 100644 --- a/test/src/interpolation-speed-test.cpp +++ b/test/src/interpolation-speed-test.cpp @@ -2,6 +2,7 @@ #include "include/Assertion.hpp" #include "include/rel_err.hpp" +#include #include #include #include @@ -40,26 +41,28 @@ int main() { } Assertion assertion; - constexpr size_t len = 256; + constexpr size_t len_power = 6 * 4; constexpr size_t eval_count = 1 << 20; - std::vector eval_coord_x; - std::vector eval_coord_y; - std::vector eval_coord_z; - eval_coord_x.reserve(eval_count); - eval_coord_y.reserve(eval_count); - eval_coord_z.reserve(eval_count); - for (size_t i = 0; i < eval_count; ++i) { - eval_coord_x.push_back(rand_dist(rand_gen)); - eval_coord_y.push_back(rand_dist(rand_gen)); - eval_coord_z.push_back(rand_dist2(rand_gen)); + std::vector eval_coord_1d; + std::vector> eval_coord_2d; + std::vector> eval_coord_3d; + { + eval_coord_1d.reserve(eval_count); + eval_coord_2d.reserve(eval_count); + eval_coord_3d.reserve(eval_count); + for (size_t i = 0; i < eval_count; ++i) { + eval_coord_1d.push_back(rand_dist(rand_gen)); + eval_coord_2d.push_back({rand_dist(rand_gen), rand_dist(rand_gen)}); + eval_coord_3d.push_back({rand_dist(rand_gen), rand_dist(rand_gen), + rand_dist2(rand_gen)}); + } } - // 1D case { const auto t_start_1d = high_resolution_clock::now(); - constexpr size_t len_1d = len * len * len; + constexpr size_t len_1d = 1 << len_power; constexpr double dx = 2 * M_PI / (len_1d); std::vector vec_1d{}; @@ -88,10 +91,20 @@ int main() { const auto t_after_interpolation = high_resolution_clock::now(); - for (size_t i = 0; i < eval_count; ++i) { interp1d(eval_coord_x[i]); } + for (size_t i = 0; i < eval_count; ++i) { interp1d(eval_coord_1d[i]); } + // for (auto& x : eval_coord_1d) { interp1d(x); } const auto t_after_eval = high_resolution_clock::now(); + std::sort(eval_coord_1d.begin(), eval_coord_1d.end()); + + const auto t_before_seq_eval = high_resolution_clock::now(); + + for (size_t i = 0; i < eval_count; ++i) { interp1d(eval_coord_1d[i]); } + // for (auto& x : eval_coord_1d) { interp1d(x); } + + const auto t_after_seq_eval = high_resolution_clock::now(); + double err_1d = rel_err(interp1d, std::make_pair(coord_1d.begin(), coord_1d.end()), std::make_pair(vals_1d.begin(), vals_1d.end())); @@ -115,10 +128,15 @@ int main() { t_after_interpolation - t_after_vec) .count() << "ms\n"; - std::cout << "Evaluate\t\t" + std::cout << "Evaluate(random)\t" << duration( t_after_eval - t_after_interpolation) .count() + << "ms\n"; + std::cout << "Evaluate(sequential)\t" + << duration(t_after_seq_eval - + t_before_seq_eval) + .count() << "ms\n\n"; } @@ -126,8 +144,8 @@ int main() { { const auto t_start_2d = high_resolution_clock::now(); - const size_t len_2d = static_cast(std::pow(len, 1.5)); - const double dt = 2 * M_PI / static_cast(len_2d); + constexpr size_t len_2d = 1 << (len_power / 2); + constexpr double dt = 2 * M_PI / static_cast(len_2d); #ifdef INTP_PERIODIC_NO_DUMMY_POINT Mesh trig_mesh_2d(len_2d); @@ -164,12 +182,28 @@ int main() { const auto t_after_interpolation = high_resolution_clock::now(); - for (size_t i = 0; i < eval_count; ++i) { - interp2d(eval_coord_y[i], eval_coord_y[i]); - } + for (size_t i = 0; i < eval_count; ++i) { interp2d(eval_coord_2d[i]); } const auto t_after_eval = high_resolution_clock::now(); + std::sort(eval_coord_2d.begin(), eval_coord_2d.end(), + [](const std::array& p1, + const std::array& p2) { + const auto x_1 = + static_cast(std::floor((p1[0] + M_PI) / dt)); + const auto x_2 = + static_cast(std::floor((p2[0] + M_PI) / dt)); + return x_1 < x_2 || (x_1 == x_2 && + std::floor((p1[1] + M_PI) / dt) <= + std::floor((p2[1] + M_PI) / dt)); + }); + + const auto t_before_seq_eval = high_resolution_clock::now(); + + for (size_t i = 0; i < eval_count; ++i) { interp2d(eval_coord_2d[i]); } + + const auto t_after_seq_eval = high_resolution_clock::now(); + double err_2d = rel_err(interp2d, std::make_pair(coord_2d.begin(), coord_2d.end()), std::make_pair(vals_2d.begin(), vals_2d.end())); @@ -195,10 +229,15 @@ int main() { t_after_interpolation - t_after_mesh) .count() << "ms\n"; - std::cout << "Evaluate\t\t" + std::cout << "Evaluate(random)\t" << duration( t_after_eval - t_after_interpolation) .count() + << "ms\n"; + std::cout << "Evaluate(sequential)\t" + << duration(t_after_seq_eval - + t_before_seq_eval) + .count() << "ms\n\n"; } @@ -206,14 +245,15 @@ int main() { { const auto t_start_3d = high_resolution_clock::now(); - constexpr double dt_3d = 2 * M_PI / len; - constexpr double dt_3d_aperiodic = 1. / (len - 1); + constexpr size_t len_3d = 1 << (len_power / 3); + constexpr double dt_3d = 2 * M_PI / len_3d; + constexpr double dt_3d_aperiodic = 1. / (len_3d - 1); #ifdef INTP_PERIODIC_NO_DUMMY_POINT - Mesh mesh_3d{len, len, len}; - for (size_t i = 0; i < len; ++i) { - for (size_t j = 0; j < len; ++j) { - for (size_t k = 0; k < len; ++k) { + Mesh mesh_3d{len_3d, len_3d, len_3d}; + for (size_t i = 0; i < len_3d; ++i) { + for (size_t j = 0; j < len_3d; ++j) { + for (size_t k = 0; k < len_3d; ++k) { mesh_3d(i, j, k) = std::sin(static_cast(i) * dt_3d - M_PI) * std::cos(static_cast(j) * dt_3d - M_PI) * @@ -223,10 +263,10 @@ int main() { } } #else - Mesh mesh_3d{len + 1, len + 1, len}; - for (size_t i = 0; i <= len; ++i) { - for (size_t j = 0; j <= len; ++j) { - for (size_t k = 0; k < len; ++k) { + Mesh mesh_3d{len_3d + 1, len_3d + 1, len_3d}; + for (size_t i = 0; i <= len_3d; ++i) { + for (size_t j = 0; j <= len_3d; ++j) { + for (size_t k = 0; k < len_3d; ++k) { mesh_3d(i, j, k) = std::sin(static_cast(i) * dt_3d - M_PI) * std::cos(static_cast(j) * dt_3d - M_PI) * @@ -253,17 +293,38 @@ int main() { const auto t_after_interpolation = high_resolution_clock::now(); - for (size_t i = 0; i < eval_count; ++i) { - interp3d(eval_coord_x[i], eval_coord_y[i], eval_coord_z[i]); - } + for (size_t i = 0; i < eval_count; ++i) { interp3d(eval_coord_3d[i]); } const auto t_after_eval = high_resolution_clock::now(); + std::sort(eval_coord_3d.begin(), eval_coord_3d.end(), + [](const std::array& p1, + const std::array& p2) { + const auto x_1 = + static_cast(std::floor((p1[0] + M_PI) / dt_3d)); + const auto x_2 = + static_cast(std::floor((p2[0] + M_PI) / dt_3d)); + const auto y_1 = + static_cast(std::floor((p1[1] + M_PI) / dt_3d)); + const auto y_2 = + static_cast(std::floor((p2[1] + M_PI) / dt_3d)); + return x_1 < x_2 || (x_1 == x_2 && y_1 < y_2) || + (x_1 == x_2 && y_1 == y_2 && + std::floor((p1[2] + .5) / dt_3d_aperiodic) <= + std::floor((p2[2] + .5) / dt_3d_aperiodic)); + }); + + const auto t_before_seq_eval = high_resolution_clock::now(); + + for (size_t i = 0; i < eval_count; ++i) { interp3d(eval_coord_3d[i]); } + + const auto t_after_seq_eval = high_resolution_clock::now(); + double err_3d = rel_err(interp3d, std::make_pair(coord_3d.begin(), coord_3d.end()), std::make_pair(vals_3d.begin(), vals_3d.end())); - const double eps = std::pow(2 * M_PI / len, 4) / 2; + const double eps = std::pow(2 * M_PI / len_3d, 4) / 2; assertion(err_3d < eps); std::cout << "Interpolation 3d trig-exp function with err = " << err_3d << '\n'; @@ -282,11 +343,16 @@ int main() { t_after_interpolation - t_after_mesh_3d) .count() << "ms\n"; - std::cout << "Evaluate\t\t" + std::cout << "Evaluate(random)\t" << duration( t_after_eval - t_after_interpolation) .count() << "ms\n"; + std::cout << "Evaluate(sequential)\t" + << duration(t_after_seq_eval - + t_before_seq_eval) + .count() + << "ms\n\n"; } return assertion.status();