From edf3e1e4d56be0ea7fe71f70b12f9ecc5389a3a2 Mon Sep 17 00:00:00 2001 From: qzhong <12ff54e@gmail.com> Date: Tue, 9 Apr 2024 11:59:45 +0800 Subject: [PATCH 01/11] Refactor some code --- src/include/BSpline.hpp | 77 +++--- src/include/Interpolation.hpp | 439 +++++++++++++++++----------------- 2 files changed, 254 insertions(+), 262 deletions(-) diff --git a/src/include/BSpline.hpp b/src/include/BSpline.hpp index a653cd5..11a2224 100644 --- a/src/include/BSpline.hpp +++ b/src/include/BSpline.hpp @@ -156,45 +156,6 @@ class BSpline { return {get_knot_iter(indices, coords.first, coords.second)...}; } - private: - size_type order_; - - DimArray periodicity_; - - DimArray knots_; - ControlPointContainer control_points_; - - DimArray> range_; - - size_type buf_size_; - - // maximum stack buffer size - // This buffer is for storing weights when calculating spline derivative - // value. - constexpr static size_type MAX_BUF_SIZE_ = 1000; - - // auxiliary methods - - /** - * @brief Calculate base spline value of each dimension - * - * @tparam Coords knot_type ... - * @tparam indices 0, 1, ... - * @param knot_iters an array of knot iters - * @param spline_order an array of spline order - * @param coords a bunch of coordinates - */ - template - inline DimArray calc_base_spline_vals( - util::index_sequence, - const DimArray& knot_iters, - const DimArray& spline_order, - Coords... coords) const { - return {base_spline_value(indices, knot_iters[indices], coords, - spline_order[indices])...}; - } - - public: /** * @brief Construct a new BSpline object, with periodicity of each dimension * specified. @@ -571,6 +532,44 @@ class BSpline { std::cout << '\n'; } #endif + + private: + size_type order_; + + DimArray periodicity_; + + DimArray knots_; + ControlPointContainer control_points_; + + DimArray> range_; + + size_type buf_size_; + + // maximum stack buffer size + // This buffer is for storing weights when calculating spline derivative + // value. + constexpr static size_type MAX_BUF_SIZE_ = 1000; + + // auxiliary methods + + /** + * @brief Calculate base spline value of each dimension + * + * @tparam Coords knot_type ... + * @tparam indices 0, 1, ... + * @param knot_iters an array of knot iters + * @param spline_order an array of spline order + * @param coords a bunch of coordinates + */ + template + inline DimArray calc_base_spline_vals( + util::index_sequence, + const DimArray& knot_iters, + const DimArray& spline_order, + Coords... coords) const { + return {base_spline_value(indices, knot_iters[indices], coords, + spline_order[indices])...}; + } }; } // namespace intp diff --git a/src/include/Interpolation.hpp b/src/include/Interpolation.hpp index 41a9499..6e678c0 100644 --- a/src/include/Interpolation.hpp +++ b/src/include/Interpolation.hpp @@ -23,227 +23,11 @@ class InterpolationFunction { // TODO: Add integration constexpr static size_type dim = D; - private: - size_type order_; - spline_type spline_; - template using DimArray = std::array; - DimArray dx_; - DimArray uniform_; - friend class InterpolationFunctionTemplate; - // auxiliary methods - - template - inline val_type call_op_helper(util::index_sequence, - DimArray c) const { - return spline_(std::make_pair( - c[di], - uniform_[di] - ? std::min( - spline_.knots_num(di) - order_ - 2, - static_cast(std::ceil(std::max( - coord_type{0.}, - (c[di] - range(di).first) / dx_[di] - - (periodicity(di) - ? coord_type{1.} - : coord_type{.5} * static_cast( - order_ + 1))))) + - order_) - : order_)...); - } - - template - inline val_type derivative_helper(util::index_sequence, - DimArray c, - DimArray d) const { - return spline_.derivative_at(std::make_tuple( - static_cast(c[di]), static_cast(d[di]), - uniform_[di] - ? std::min(spline_.knots_num(di) - order_ - 2, - static_cast(std::ceil(std::max( - 0., (c[di] - range(di).first) / dx_[di] - - (periodicity(di) - ? 1. - : .5 * static_cast( - order_ + 1))))) + - order_) - : order_)...); - } - - // overload for uniform knots - template - typename std::enable_if::value>::type - create_knot_vector_(size_type dim_ind, - const MeshDimension& mesh_dimension, - DimArray&, - std::pair x_range) { - uniform_[dim_ind] = true; - const auto periodic = periodicity(dim_ind); - const size_type n = mesh_dimension.dim_size(dim_ind) -#ifdef INTP_PERIODIC_NO_DUMMY_POINT - + (periodic ? 1 : 0) -#endif - ; - dx_[dim_ind] = - (x_range.second - x_range.first) / static_cast(n - 1); - - const size_t extra = - periodic ? 2 * order_ + (1 - order_ % 2) : order_ + 1; - - std::vector xs(n + extra, x_range.first); - - if (periodic) { - for (size_type i = 0; i < xs.size(); ++i) { - xs[i] = x_range.first + - (static_cast(i) - - coord_type{.5} * static_cast(extra)) * - dx_[dim_ind]; - } - } else { - for (size_type i = order_ + 1; i < xs.size() - order_ - 1; ++i) { - xs[i] = x_range.first + - (static_cast(i) - - coord_type{.5} * static_cast(extra)) * - dx_[dim_ind]; - } - for (size_type i = xs.size() - order_ - 1; i < xs.size(); ++i) { - xs[i] = x_range.second; - } - } - - spline_.load_knots(dim_ind, std::move(xs)); - } - - // overload for nonuniform knots, given by iterator pair - template - typename std::enable_if::iterator_category, - std::input_iterator_tag>::value>::type - create_knot_vector_( - size_type dim_ind, - const MeshDimension& mesh_dimension, - DimArray& input_coords, - std::pair x_range) { - uniform_[dim_ind] = false; - const auto periodic = periodicity(dim_ind); - - const size_type n{static_cast( - std::distance(x_range.first, x_range.second))}; - INTP_ASSERT(n == mesh_dimension.dim_size(dim_ind) -#ifdef INTP_PERIODIC_NO_DUMMY_POINT - + (periodic ? 1 : 0) -#endif - , - std::string("Inconsistency between knot number and " - "interpolated value number at dimension ") + - std::to_string(dim_ind)); -#ifndef INTP_ENABLE_ASSERTION - // suppress unused parameter warning - (void)mesh_dimension; -#endif - typename spline_type::KnotContainer xs( - periodic ? n + 2 * order_ + (1 - order_ % 2) : n + order_ + 1); - - auto& input_coord = input_coords[dim_ind]; - input_coord.reserve(n); - // The x_range may be given by input iterators, which can not be - // multi-passed. - if (periodic) { - // In periodic case, the knots are data points, shifted by half of - // local grid size if spline order is odd. - - auto iter = x_range.first; - input_coord.push_back(*iter); - for (size_type i = order_ + 1; i < order_ + n; ++i) { - coord_type present = *(++iter); - xs[i] = order_ % 2 == 0 ? .5 * (input_coord.back() + present) - : present; - input_coord.push_back(present); - } - coord_type period = input_coord.back() - input_coord.front(); - for (size_type i = 0; i < order_ + 1; ++i) { - xs[i] = xs[n + i - 1] - period; - xs[xs.size() - i - 1] = xs[xs.size() - i - n] + period; - } - } else { - // In aperiodic case, the internal knots are moving average of data - // points with windows size equal to spline order. - - auto it = x_range.first; - auto l_knot = *it; - // fill leftmost *order+1* identical knots - for (size_type i = 0; i < order_ + 1; ++i) { xs[i] = l_knot; } - // first knot is same as first input coordinate - input_coord.emplace_back(l_knot); - // Every knot in middle is average of *order* input - // coordinates. This var is to track the sum of a moving window with - // width *order*. - coord_type window_sum{}; - for (size_type i = 1; i < order_; ++i) { - input_coord.emplace_back(*(++it)); - window_sum += input_coord[i]; - } - for (size_type i = order_ + 1; i < n; ++i) { - input_coord.emplace_back(*(++it)); - window_sum += input_coord[i - 1]; - xs[i] = window_sum / static_cast(order_); - window_sum -= input_coord[i - order_]; - } - auto r_knot = *(++it); - // fill rightmost *order+1* identical knots - for (size_type i = n; i < n + order_ + 1; ++i) { xs[i] = r_knot; } - // last knot is same as last input coordinate - input_coord.emplace_back(r_knot); - } - // check whether input coordinates is monotonic - for (std::size_t i = 0; i < input_coord.size() - 1; ++i) { - INTP_ASSERT(input_coord[i + 1] > input_coord[i], - std::string("Given coordinate is not monotonically " - "increasing at dimension ") + - std::to_string(dim_ind)); - } - -#ifdef INTP_TRACE - std::cout << "[TRACE] Nonuniform knots along dimension" << dim_ind - << ":\n"; - for (auto& c : xs) { std::cout << "[TRACE] " << c << '\n'; } - std::cout << std::endl; -#endif - - spline_.load_knots(dim_ind, std::move(xs)); - } - - template - void create_knots_( - util::index_sequence, - MeshDimension mesh_dimension, - DimArray& input_coords, - std::pair... x_ranges) { -#if __cplusplus >= 201703L - (create_knot_vector_(di, mesh_dimension, input_coords, x_ranges), ...); -#else - // polyfill of C++17 fold expression over comma - static_cast(std::array{ - (create_knot_vector_(di, mesh_dimension, input_coords, x_ranges), - nullptr)...}); -#endif - } - - inline void boundary_check_(const DimArray& coord) const { - for (size_type d = 0; d < dim; ++d) { - if (!periodicity(d) && - (coord[d] < range(d).first || coord[d] > range(d).second)) { - throw std::domain_error( - "Given coordinate out of interpolation function range!"); - } - } - } - - public: /** * @brief Construct a new 1D Interpolation Function object, mimicking * Mathematica's `Interpolation` function, with option `Method->"Spline"`. @@ -465,9 +249,7 @@ class InterpolationFunction { // TODO: Add integration return spline_.periodicity(dim_ind); } - inline bool uniform(size_type dim_ind) const { - return uniform_[dim_ind]; - } + inline bool uniform(size_type dim_ind) const { return uniform_[dim_ind]; } const std::pair& @@ -480,12 +262,223 @@ class InterpolationFunction { // TODO: Add integration * * @return spline_type& */ - const spline_type& spline() const { - return spline_; + const spline_type& spline() const { return spline_; } + + size_type order() const { return order_; } + + private: + size_type order_; + spline_type spline_; + + DimArray dx_; + DimArray uniform_; + + // auxiliary methods + + template + inline val_type call_op_helper(util::index_sequence, + DimArray c) const { + return spline_(std::make_pair( + c[di], + uniform_[di] + ? std::min( + spline_.knots_num(di) - order_ - 2, + static_cast(std::ceil(std::max( + coord_type{0.}, + (c[di] - range(di).first) / dx_[di] - + (periodicity(di) + ? coord_type{1.} + : coord_type{.5} * static_cast( + order_ + 1))))) + + order_) + : order_)...); + } + + template + inline val_type derivative_helper(util::index_sequence, + DimArray c, + DimArray d) const { + return spline_.derivative_at(std::make_tuple( + static_cast(c[di]), static_cast(d[di]), + uniform_[di] + ? std::min(spline_.knots_num(di) - order_ - 2, + static_cast(std::ceil(std::max( + 0., (c[di] - range(di).first) / dx_[di] - + (periodicity(di) + ? 1. + : .5 * static_cast( + order_ + 1))))) + + order_) + : order_)...); + } + + // overload for uniform knots + template + typename std::enable_if::value>::type + create_knot_vector_(size_type dim_ind, + const MeshDimension& mesh_dimension, + DimArray&, + std::pair x_range) { + uniform_[dim_ind] = true; + const auto periodic = periodicity(dim_ind); + const size_type n = mesh_dimension.dim_size(dim_ind) +#ifdef INTP_PERIODIC_NO_DUMMY_POINT + + (periodic ? 1 : 0) +#endif + ; + dx_[dim_ind] = + (x_range.second - x_range.first) / static_cast(n - 1); + + const size_t extra = + periodic ? 2 * order_ + (1 - order_ % 2) : order_ + 1; + + std::vector xs(n + extra, x_range.first); + + if (periodic) { + for (size_type i = 0; i < xs.size(); ++i) { + xs[i] = x_range.first + + (static_cast(i) - + coord_type{.5} * static_cast(extra)) * + dx_[dim_ind]; + } + } else { + for (size_type i = order_ + 1; i < xs.size() - order_ - 1; ++i) { + xs[i] = x_range.first + + (static_cast(i) - + coord_type{.5} * static_cast(extra)) * + dx_[dim_ind]; + } + for (size_type i = xs.size() - order_ - 1; i < xs.size(); ++i) { + xs[i] = x_range.second; + } + } + + spline_.load_knots(dim_ind, std::move(xs)); } - size_type order() const { - return order_; + // overload for nonuniform knots, given by iterator pair + template + typename std::enable_if::iterator_category, + std::input_iterator_tag>::value>::type + create_knot_vector_( + size_type dim_ind, + const MeshDimension& mesh_dimension, + DimArray& input_coords, + std::pair x_range) { + uniform_[dim_ind] = false; + const auto periodic = periodicity(dim_ind); + + const size_type n{static_cast( + std::distance(x_range.first, x_range.second))}; + INTP_ASSERT(n == mesh_dimension.dim_size(dim_ind) +#ifdef INTP_PERIODIC_NO_DUMMY_POINT + + (periodic ? 1 : 0) +#endif + , + std::string("Inconsistency between knot number and " + "interpolated value number at dimension ") + + std::to_string(dim_ind)); +#ifndef INTP_ENABLE_ASSERTION + // suppress unused parameter warning + (void)mesh_dimension; +#endif + typename spline_type::KnotContainer xs( + periodic ? n + 2 * order_ + (1 - order_ % 2) : n + order_ + 1); + + auto& input_coord = input_coords[dim_ind]; + input_coord.reserve(n); + // The x_range may be given by input iterators, which can not be + // multi-passed. + if (periodic) { + // In periodic case, the knots are data points, shifted by half of + // local grid size if spline order is odd. + + auto iter = x_range.first; + input_coord.push_back(*iter); + for (size_type i = order_ + 1; i < order_ + n; ++i) { + coord_type present = *(++iter); + xs[i] = order_ % 2 == 0 ? .5 * (input_coord.back() + present) + : present; + input_coord.push_back(present); + } + coord_type period = input_coord.back() - input_coord.front(); + for (size_type i = 0; i < order_ + 1; ++i) { + xs[i] = xs[n + i - 1] - period; + xs[xs.size() - i - 1] = xs[xs.size() - i - n] + period; + } + } else { + // In aperiodic case, the internal knots are moving average of data + // points with windows size equal to spline order. + + auto it = x_range.first; + auto l_knot = *it; + // fill leftmost *order+1* identical knots + for (size_type i = 0; i < order_ + 1; ++i) { xs[i] = l_knot; } + // first knot is same as first input coordinate + input_coord.emplace_back(l_knot); + // Every knot in middle is average of *order* input + // coordinates. This var is to track the sum of a moving window with + // width *order*. + coord_type window_sum{}; + for (size_type i = 1; i < order_; ++i) { + input_coord.emplace_back(*(++it)); + window_sum += input_coord[i]; + } + for (size_type i = order_ + 1; i < n; ++i) { + input_coord.emplace_back(*(++it)); + window_sum += input_coord[i - 1]; + xs[i] = window_sum / static_cast(order_); + window_sum -= input_coord[i - order_]; + } + auto r_knot = *(++it); + // fill rightmost *order+1* identical knots + for (size_type i = n; i < n + order_ + 1; ++i) { xs[i] = r_knot; } + // last knot is same as last input coordinate + input_coord.emplace_back(r_knot); + } + // check whether input coordinates is monotonic + for (std::size_t i = 0; i < input_coord.size() - 1; ++i) { + INTP_ASSERT(input_coord[i + 1] > input_coord[i], + std::string("Given coordinate is not monotonically " + "increasing at dimension ") + + std::to_string(dim_ind)); + } + +#ifdef INTP_TRACE + std::cout << "[TRACE] Nonuniform knots along dimension" << dim_ind + << ":\n"; + for (auto& c : xs) { std::cout << "[TRACE] " << c << '\n'; } + std::cout << std::endl; +#endif + + spline_.load_knots(dim_ind, std::move(xs)); + } + + template + void create_knots_( + util::index_sequence, + MeshDimension mesh_dimension, + DimArray& input_coords, + std::pair... x_ranges) { +#if __cplusplus >= 201703L + (create_knot_vector_(di, mesh_dimension, input_coords, x_ranges), ...); +#else + // polyfill of C++17 fold expression over comma + static_cast(std::array{ + (create_knot_vector_(di, mesh_dimension, input_coords, x_ranges), + nullptr)...}); +#endif + } + + inline void boundary_check_(const DimArray& coord) const { + for (size_type d = 0; d < dim; ++d) { + if (!periodicity(d) && + (coord[d] < range(d).first || coord[d] > range(d).second)) { + throw std::domain_error( + "Given coordinate out of interpolation function range!"); + } + } } }; From a96176c8cc521c3206eda917f04b0aee035c00a7 Mon Sep 17 00:00:00 2001 From: qzhong <12ff54e@gmail.com> Date: Wed, 10 Apr 2024 22:39:56 +0800 Subject: [PATCH 02/11] Adjust micros in CMakeLists in tests --- src/include/Interpolation.hpp | 2 ++ test/CMakeLists.txt | 10 ++++++---- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/include/Interpolation.hpp b/src/include/Interpolation.hpp index 6e678c0..eb02eae 100644 --- a/src/include/Interpolation.hpp +++ b/src/include/Interpolation.hpp @@ -5,8 +5,10 @@ #include #ifdef INTP_TRACE +#ifndef INTP_DEBUG #define INTP_DEBUG #endif +#endif #include "InterpolationTemplate.hpp" diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 9c52bdd..33fc37b 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -24,11 +24,13 @@ else() endif() endif() -add_compile_definitions($<$:INTP_TRACE> - $<$,$>:INTP_DEBUG> - $<$,$>:INTP_MULTITHREAD>) +add_compile_definitions($<$,$>:INTP_DEBUG> + $<$:INTP_MULTITHREAD>) -add_compile_definitions(INTP_PERIODIC_NO_DUMMY_POINT) +add_compile_definitions(INTP_PERIODIC_NO_DUMMY_POINT + INTP_CELL_LAYOUT) + +#add_compile_definitions(INTP_TRACE) # Specify tests list(APPEND tests "util-test" "mesh-test" "band-matrix-and-solver-test" "bspline-test" "interpolation-test" "interpolation-speed-test" "interpolation-template-test") From 0af66d56ac93b555a7a7c9b25e1c33e34ad89cd2 Mon Sep 17 00:00:00 2001 From: qzhong <12ff54e@gmail.com> Date: Thu, 11 Apr 2024 19:19:16 +0800 Subject: [PATCH 03/11] Get coordinates from a skip_iterator --- src/include/Mesh.hpp | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/src/include/Mesh.hpp b/src/include/Mesh.hpp index c331a8f..7870bc5 100644 --- a/src/include/Mesh.hpp +++ b/src/include/Mesh.hpp @@ -38,6 +38,7 @@ class MeshDimension { } size_type dim_size(size_type dim_ind) const { return dim_size_[dim_ind]; } + size_type& dim_size(size_type dim_ind) { return dim_size_[dim_ind]; } size_type dim_acc_size(size_type dim_ind) const { size_type das = 1; @@ -120,6 +121,7 @@ class Mesh { using value_type = U; using difference_type = std::ptrdiff_t; using pointer = U*; + using const_pointer = const U*; using reference = U&; using iterator_category = std::random_access_iterator_tag; @@ -132,13 +134,9 @@ class Mesh { : ptr_(ptr), step_length_(step_length) {} // allow iterator to const_iterator conversion - template - skip_iterator( - skip_iterator::type>:: - value, - V>::type> other) - : ptr_(other.ptr_), step_length_(other.step_length_) {} + operator skip_iterator() { return {ptr_, step_length_}; } + // cast to (const) underlying pointer type + explicit operator const_pointer() const { return ptr_; } // forward iterator requirement @@ -301,6 +299,14 @@ class Mesh { return storage_[dimension_.indexing(indices...)]; } + val_type& operator()(index_type indices) { + return storage_[dimension_.indexing(indices)]; + } + + const val_type& operator()(index_type indices) const { + return storage_[dimension_.indexing(indices)]; + } + const val_type* data() const { return storage_.data(); } // iterator @@ -352,6 +358,13 @@ class Mesh { return dimension_.dimwise_indices( static_cast(std::distance(begin(), iter))); } + + index_type iter_indices(skip_iterator skip_iter) const { + return dimension_.dimwise_indices(static_cast( + static_cast::pointer>( + skip_iter) - + data())); + } }; } // namespace intp From d4e6c5b4ce40dcaf37dc58413b48602fb12664a8 Mon Sep 17 00:00:00 2001 From: qzhong <12ff54e@gmail.com> Date: Fri, 12 Apr 2024 09:59:55 +0800 Subject: [PATCH 04/11] Add cell layout to speed up interpolation --- src/include/BSpline.hpp | 143 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 134 insertions(+), 9 deletions(-) diff --git a/src/include/BSpline.hpp b/src/include/BSpline.hpp index 11a2224..a475678 100644 --- a/src/include/BSpline.hpp +++ b/src/include/BSpline.hpp @@ -33,12 +33,21 @@ class BSpline { using knot_type = U; using KnotContainer = std::vector; + using ControlPointContainer = Mesh>>; +#ifdef INTP_CELL_LAYOUT + using ControlPointCellContainer = + Mesh>>; +#endif using BaseSpline = std::vector; using diff_type = typename KnotContainer::iterator::difference_type; @@ -178,19 +187,23 @@ class BSpline { template BSpline(size_type spline_order, DimArray periodicity, - ControlPointContainer ctrl_points, + ControlPointContainer ctrl_pts, std::pair... knot_iter_pairs) : order_(spline_order), periodicity_(periodicity), knots_{ KnotContainer(knot_iter_pairs.first, knot_iter_pairs.second)...}, - control_points_(std::move(ctrl_points)), +#ifdef INTP_CELL_LAYOUT + control_points_(generate_cell_layout(ctrl_pts)), +#else + control_points_(std::move(ctrl_pts)), +#endif range_{std::make_pair( (knot_iter_pairs.first)[order_], (knot_iter_pairs.second)[-static_cast(order_) - 1])...}, buf_size_(util::pow(order_ + 1, dim)) { for (size_type d = 0; d < dim; ++d) { - INTP_ASSERT(knots_[d].size() - control_points_.dim_size(d) == + INTP_ASSERT(knots_[d].size() - ctrl_pts.dim_size(d) == (periodicity_[d] ? 2 * order_ + 1 : order_ + 1), std::string("Inconsistency between knot number and " "control point number at dimension ") + @@ -219,14 +232,20 @@ class BSpline { knots_[dim_ind][knots_[dim_ind].size() - order_ - (2 - order_ % 2)]; } +#ifdef INTP_CELL_LAYOUT + void load_ctrlPts(const ControlPointContainer& control_points) { + control_points_ = generate_cell_layout(control_points); + } +#else template typename std::enable_if< std::is_same::type, ControlPointContainer>::value, void>::type - load_ctrlPts(C&& _control_points) { - control_points_ = std::forward(_control_points); + load_ctrlPts(C&& control_points) { + control_points_ = std::forward(control_points); } +#endif /** * @brief Get spline value at given pairs of coordinate and position hint @@ -259,12 +278,26 @@ class BSpline { // combine control points and basic spline values to get spline value val_type v{}; +#ifdef INTP_CELL_LAYOUT + std::array ind_arr{}; + for (size_type d = 0; d < dim; ++d) { + ind_arr[d] = static_cast( + distance(knots_begin(d), knot_iters[d])) - + order_; + } + auto cell_iter = control_points_.begin(dim, ind_arr); for (size_type i = 0; i < buf_size_; ++i) { - DimArray ind_arr; + knot_type coef = 1; for (size_type d = 0, combined_ind = i; d < dim; ++d) { - ind_arr[d] = combined_ind % (order_ + 1); + coef *= base_spline_values_1d[d][combined_ind % (order_ + 1)]; combined_ind /= (order_ + 1); } + v += coef * (*cell_iter++); + } +#else + MeshDimension local_mesh_dim(order_ + 1); + for (size_type i = 0; i < buf_size_; ++i) { + DimArray ind_arr = local_mesh_dim.dimwise_indices(i); knot_type coef = 1; for (size_type d = 0; d < dim; ++d) { @@ -290,6 +323,7 @@ class BSpline { v += coef * control_points_(ind_arr); } +#endif return v; } @@ -363,6 +397,15 @@ class BSpline { #endif // get local control points and basic spline values + +#ifdef INTP_CELL_LAYOUT + std::array ind_arr{}; + for (size_type d = 0; d < dim; ++d) { + ind_arr[d] = static_cast( + distance(knots_begin(d), knot_iters[d])) - + order_; + } + auto cell_iter = control_points_.begin(dim, ind_arr); for (size_type i = 0; i < buf_size_; ++i) { DimArray local_ind_arr{}; for (size_type d = 0, combined_ind = i; d < dim; ++d) { @@ -370,7 +413,23 @@ class BSpline { combined_ind /= (order_ + 1); } - val_type coef = 1; + knot_type coef = 1; + for (size_type d = 0; d < dim; ++d) { + coef *= base_spline_values_1d[d][local_ind_arr[d]]; + } + + local_spline_val(local_ind_arr) = coef; + local_control_points(local_ind_arr) = *cell_iter++; + } +#else + for (size_type i = 0; i < buf_size_; ++i) { + DimArray local_ind_arr{}; + for (size_type d = 0, combined_ind = i; d < dim; ++d) { + local_ind_arr[d] = combined_ind % (order_ + 1); + combined_ind /= (order_ + 1); + } + + knot_type coef = 1; DimArray ind_arr{}; for (size_type d = 0; d < dim; ++d) { coef *= base_spline_values_1d[d][local_ind_arr[d]]; @@ -383,7 +442,8 @@ class BSpline { knots_begin(d), knot_iters[d])) - order_); - // check periodicity, put out-of-right-boundary index to left + // check periodicity, put out-of-right-boundary index to + // left if (periodicity_[d]) { ind_arr[d] %= control_points_.dim_size(d); } @@ -392,6 +452,7 @@ class BSpline { local_spline_val(local_ind_arr) = coef; local_control_points(local_ind_arr) = control_points_(ind_arr); } +#endif for (size_type d = 0; d < dim; ++d) { if (spline_order[d] == order_) { continue; } @@ -539,7 +600,11 @@ class BSpline { DimArray periodicity_; DimArray knots_; +#ifdef INTP_CELL_LAYOUT + ControlPointCellContainer control_points_; +#else ControlPointContainer control_points_; +#endif DimArray> range_; @@ -570,6 +635,66 @@ class BSpline { return {base_spline_value(indices, knot_iters[indices], coords, spline_order[indices])...}; } + +#ifdef INTP_CELL_LAYOUT + ControlPointCellContainer generate_cell_layout( + const ControlPointContainer& ctrl_pts) const { + MeshDimension cell_container_dim( + util::pow(order_ + 1, dim - 1)); + for (size_type d = 0; d < dim; ++d) { + cell_container_dim.dim_size(d) = ctrl_pts.dim_size(d) - + (d == dim - 1 ? 0 : order_) + + (periodicity(d) ? order_ : 0); + } + + // Size of the last two dimension of control point cell container. The + // (dim-1)th dimention of cell container has the same length (order + // points more in periodic case) as the last dimension (which is also + // the (dim-1)th dimension) of the origin container, while other + // dimensions are #order shorter, except the last dimension with length + // (order+1)^(dim-1). + const auto line_size = cell_container_dim.dim_size(dim - 1) * + cell_container_dim.dim_size(dim); + + ControlPointCellContainer control_point_cell(cell_container_dim); + // size of hyperplane orthogonal to last 2 dimension + const size_type hyper_surface_size = + control_point_cell.size() / line_size; + + // iterate over hyperplane + for (size_type h_ind = 0; h_ind < hyper_surface_size; ++h_ind) { + const auto ind_arr_on_hyper_surface = + control_point_cell.dimension().dimwise_indices(h_ind * + line_size); + const auto line_begin = + control_point_cell.begin(dim - 1, ind_arr_on_hyper_surface); + const auto line_end = + control_point_cell.end(dim - 1, ind_arr_on_hyper_surface); + // iterate along the (dim-1)th dimension + for (auto iter = line_begin; iter != line_end; ++iter) { + auto cell_ind_arr = control_point_cell.iter_indices(iter); + MeshDimension cell_dim(order_ + 1); + // iterate the (dim)th dimension + for (size_type i = 0; i < cell_dim.size(); ++i) { + auto local_shift_ind = cell_dim.dimwise_indices(i); + cell_ind_arr[dim] = i; + auto cp_ind_arr = + typename ControlPointContainer::index_type{}; + for (size_type d = 0; d < dim; ++d) { + cp_ind_arr[d] = + (cell_ind_arr[d] + + (d == dim - 1 ? 0 + : local_shift_ind[dim - 2 - d])) % + ctrl_pts.dim_size(d); + } + // cp_ind_arr[dim - 1] %= ctrl_pts.dim_size(dim - 1); + control_point_cell(cell_ind_arr) = ctrl_pts(cp_ind_arr); + } + } + } + return control_point_cell; + } +#endif // INTP_CELL_LAYOUT }; } // namespace intp From bbebc69f54f1ed9f6a66e1dd72d02dca47800380 Mon Sep 17 00:00:00 2001 From: qzhong <12ff54e@gmail.com> Date: Fri, 12 Apr 2024 11:44:58 +0800 Subject: [PATCH 05/11] Use multi-thread to speed cell filling --- src/include/BSpline.hpp | 83 +++++++++++++++++++++++++++-------------- 1 file changed, 56 insertions(+), 27 deletions(-) diff --git a/src/include/BSpline.hpp b/src/include/BSpline.hpp index a475678..b871df8 100644 --- a/src/include/BSpline.hpp +++ b/src/include/BSpline.hpp @@ -9,6 +9,10 @@ #include // is_same, is_arithmatic #include +#ifdef INTP_MULTITHREAD +#include "DedicatedThreadPool.hpp" +#endif + #ifdef INTP_DEBUG #include #endif @@ -661,37 +665,62 @@ class BSpline { const size_type hyper_surface_size = control_point_cell.size() / line_size; - // iterate over hyperplane - for (size_type h_ind = 0; h_ind < hyper_surface_size; ++h_ind) { - const auto ind_arr_on_hyper_surface = - control_point_cell.dimension().dimwise_indices(h_ind * - line_size); - const auto line_begin = - control_point_cell.begin(dim - 1, ind_arr_on_hyper_surface); - const auto line_end = - control_point_cell.end(dim - 1, ind_arr_on_hyper_surface); - // iterate along the (dim-1)th dimension - for (auto iter = line_begin; iter != line_end; ++iter) { - auto cell_ind_arr = control_point_cell.iter_indices(iter); - MeshDimension cell_dim(order_ + 1); - // iterate the (dim)th dimension - for (size_type i = 0; i < cell_dim.size(); ++i) { - auto local_shift_ind = cell_dim.dimwise_indices(i); - cell_ind_arr[dim] = i; - auto cp_ind_arr = - typename ControlPointContainer::index_type{}; - for (size_type d = 0; d < dim; ++d) { - cp_ind_arr[d] = - (cell_ind_arr[d] + - (d == dim - 1 ? 0 - : local_shift_ind[dim - 2 - d])) % - ctrl_pts.dim_size(d); + auto fill_cell = [&](size_type begin, size_type end) { + // iterate over hyperplane + for (size_type h_ind = begin; h_ind < end; ++h_ind) { + const auto ind_arr_on_hyper_surface = + control_point_cell.dimension().dimwise_indices(h_ind * + line_size); + const auto line_begin = + control_point_cell.begin(dim - 1, ind_arr_on_hyper_surface); + const auto line_end = + control_point_cell.end(dim - 1, ind_arr_on_hyper_surface); + // iterate along the (dim-1)th dimension + for (auto iter = line_begin; iter != line_end; ++iter) { + auto cell_ind_arr = control_point_cell.iter_indices(iter); + MeshDimension cell_dim(order_ + 1); + // iterate the (dim)th dimension + for (size_type i = 0; i < cell_dim.size(); ++i) { + auto local_shift_ind = cell_dim.dimwise_indices(i); + cell_ind_arr[dim] = i; + auto cp_ind_arr = + typename ControlPointContainer::index_type{}; + for (size_type d = 0; d < dim; ++d) { + cp_ind_arr[d] = + (cell_ind_arr[d] + + (d == dim - 1 + ? 0 + : local_shift_ind[dim - 2 - d])) % + ctrl_pts.dim_size(d); + } + control_point_cell(cell_ind_arr) = ctrl_pts(cp_ind_arr); } - // cp_ind_arr[dim - 1] %= ctrl_pts.dim_size(dim - 1); - control_point_cell(cell_ind_arr) = ctrl_pts(cp_ind_arr); } } + }; +#ifdef INTP_MULTITHREAD + const size_type block_num = static_cast( + std::sqrt(static_cast(hyper_surface_size))); + const size_type task_per_block = block_num == 0 + ? hyper_surface_size + : hyper_surface_size / block_num; + + auto& thread_pool = DedicatedThreadPool::get_instance(8); + std::vector> res; + + for (size_type i = 0; i < block_num; ++i) { + res.push_back(thread_pool.queue_task([=]() { + fill_cell(i * task_per_block, (i + 1) * task_per_block); + })); } + // main thread deals with the remaining part in case hyper_surface_size + // not divisible by thread_num + fill_cell(block_num * task_per_block, hyper_surface_size); + // wait for all tasks are complete + for (auto&& f : res) { f.get(); } +#else + fill_cell(0, hyper_surface_size); +#endif // INTP_MULTITHREAD return control_point_cell; } #endif // INTP_CELL_LAYOUT From 06d67025548d63b3265f1d50963b1d3a4c252ee3 Mon Sep 17 00:00:00 2001 From: qzhong <12ff54e@gmail.com> Date: Fri, 12 Apr 2024 11:45:27 +0800 Subject: [PATCH 06/11] Separate speed tests from regualr tests --- .github/workflows/cmake.yml | 2 +- test/CMakeLists.txt | 29 ++++++++++++++++++++++++----- 2 files changed, 25 insertions(+), 6 deletions(-) diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml index 07707c4..c27e191 100644 --- a/.github/workflows/cmake.yml +++ b/.github/workflows/cmake.yml @@ -33,5 +33,5 @@ jobs: working-directory: ${{github.workspace}}/build # Execute tests defined by the CMake configuration. # See https://cmake.org/cmake/help/latest/manual/ctest.1.html for more detail - run: ctest --output-on-failure -R intp-* + run: ctest --output-on-failure -L intp-regular diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 33fc37b..928664a 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -33,23 +33,42 @@ add_compile_definitions(INTP_PERIODIC_NO_DUMMY_POINT #add_compile_definitions(INTP_TRACE) # Specify tests -list(APPEND tests "util-test" "mesh-test" "band-matrix-and-solver-test" "bspline-test" "interpolation-test" "interpolation-speed-test" "interpolation-template-test") +list(APPEND regular_tests "util-test" "mesh-test" "band-matrix-and-solver-test" "bspline-test" "interpolation-test") +list(APPEND speed_tests "interpolation-speed-test" "interpolation-template-test") -list(LENGTH tests test_num) +list(LENGTH regular_tests test_num) message(STATUS) message(STATUS ${test_num} " tests found: ") enable_testing() -foreach(test ${tests}) +foreach(test ${regular_tests}) string(JOIN "" test_src_file src/ ${test} .cpp) add_executable(${test} ${test_src_file}) - target_compile_features(${test} PRIVATE cxx_std_17) target_include_directories( ${test} PRIVATE $) string(JOIN "-" test_name intp ${test}) add_test(${test_name} ${test}) + set_tests_properties(${test_name} PROPERTIES LABELS intp-regular) message(STATUS ${test} " from source file " ${test_src_file}) -endforeach(test ${tests}) +endforeach(test ${regular_tests}) + +if(CMAKE_BUILD_TYPE STREQUAL Release) + message(STATUS) + list(LENGTH speed_tests test_num) + message(STATUS ${test_num} " speed tests found: ") + foreach(test ${speed_tests}) + string(JOIN "" test_src_file src/ ${test} .cpp) + add_executable(${test} ${test_src_file}) + target_include_directories( + ${test} PRIVATE $) + + string(JOIN "-" test_name intp ${test}) + add_test(${test_name} ${test}) + set_tests_properties(${test_name} PROPERTIES LABELS intp-speed) + message(STATUS ${test} " from source file " ${test_src_file}) + endforeach(test ${speed_tests}) +endif() + # target_compile_options(interpolation-speed-test PRIVATE $<$:-march=native>) From 466b93795e3af407d89c9d2098536b3ae20af255 Mon Sep 17 00:00:00 2001 From: qzhong <12ff54e@gmail.com> Date: Fri, 12 Apr 2024 22:03:22 +0800 Subject: [PATCH 07/11] Update readme for new feature --- CMakeLists.txt | 2 +- README.md | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index aa45bf8..3b4c90b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,7 +1,7 @@ cmake_minimum_required(VERSION 3.12.0) project( BSplineInterpolation - VERSION 1.3.1 + VERSION 1.4.0 HOMEPAGE_URL "https://github.com/12ff54e/BSplineInterpolation.git" LANGUAGES CXX) include(CTest) diff --git a/README.md b/README.md index 614884c..88e0ff1 100644 --- a/README.md +++ b/README.md @@ -51,6 +51,7 @@ You can control the bahavior of this library by defining the following macros: - `INTP_ENABLE_ASSERTION`: Add assertion to ensure data to be interplolated is valid. - `INTP_PERIODIC_NO_DUMMY_POINT`: Whether to accept dummy point when interpolating periodic data. If this macro is defined, number of data point is one less than the specified dimension, since the last point is implicitly set as the same of the first one. - `INTP_STACK_ALLOCATOR`: Use stack allocator in some small dynamic memory allocation scenario. +- `INTP_CELL_LAYOUT`: Store weights in cell layout redundantly to speed up evaluation. By default all of the above macros is not defined. From 77e6cf4d4bcc52b7a51459a4b6c19011fced439c Mon Sep 17 00:00:00 2001 From: qzhong <12ff54e@gmail.com> Date: Sat, 13 Apr 2024 13:19:05 +0800 Subject: [PATCH 08/11] Minor change in speed test --- test/src/interpolation-speed-test.cpp | 29 ++++++++++++++------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/test/src/interpolation-speed-test.cpp b/test/src/interpolation-speed-test.cpp index 81e3150..78578af 100644 --- a/test/src/interpolation-speed-test.cpp +++ b/test/src/interpolation-speed-test.cpp @@ -40,7 +40,7 @@ int main() { } Assertion assertion; - constexpr size_t len = 256; + constexpr size_t len_power = 6 * 4; constexpr size_t eval_count = 1 << 20; std::vector eval_coord_x; @@ -59,7 +59,7 @@ int main() { { const auto t_start_1d = high_resolution_clock::now(); - constexpr size_t len_1d = len * len * len; + constexpr size_t len_1d = 1 << len_power; constexpr double dx = 2 * M_PI / (len_1d); std::vector vec_1d{}; @@ -126,7 +126,7 @@ int main() { { const auto t_start_2d = high_resolution_clock::now(); - const size_t len_2d = static_cast(std::pow(len, 1.5)); + const size_t len_2d = 1 << (len_power / 2); const double dt = 2 * M_PI / static_cast(len_2d); #ifdef INTP_PERIODIC_NO_DUMMY_POINT @@ -206,14 +206,15 @@ int main() { { const auto t_start_3d = high_resolution_clock::now(); - constexpr double dt_3d = 2 * M_PI / len; - constexpr double dt_3d_aperiodic = 1. / (len - 1); + constexpr size_t len_3d = 1 << (len_power / 3); + constexpr double dt_3d = 2 * M_PI / len_3d; + constexpr double dt_3d_aperiodic = 1. / (len_3d - 1); #ifdef INTP_PERIODIC_NO_DUMMY_POINT - Mesh mesh_3d{len, len, len}; - for (size_t i = 0; i < len; ++i) { - for (size_t j = 0; j < len; ++j) { - for (size_t k = 0; k < len; ++k) { + Mesh mesh_3d{len_3d, len_3d, len_3d}; + for (size_t i = 0; i < len_3d; ++i) { + for (size_t j = 0; j < len_3d; ++j) { + for (size_t k = 0; k < len_3d; ++k) { mesh_3d(i, j, k) = std::sin(static_cast(i) * dt_3d - M_PI) * std::cos(static_cast(j) * dt_3d - M_PI) * @@ -223,10 +224,10 @@ int main() { } } #else - Mesh mesh_3d{len + 1, len + 1, len}; - for (size_t i = 0; i <= len; ++i) { - for (size_t j = 0; j <= len; ++j) { - for (size_t k = 0; k < len; ++k) { + Mesh mesh_3d{len_3d + 1, len_3d + 1, len_3d}; + for (size_t i = 0; i <= len_3d; ++i) { + for (size_t j = 0; j <= len_3d; ++j) { + for (size_t k = 0; k < len_3d; ++k) { mesh_3d(i, j, k) = std::sin(static_cast(i) * dt_3d - M_PI) * std::cos(static_cast(j) * dt_3d - M_PI) * @@ -263,7 +264,7 @@ int main() { rel_err(interp3d, std::make_pair(coord_3d.begin(), coord_3d.end()), std::make_pair(vals_3d.begin(), vals_3d.end())); - const double eps = std::pow(2 * M_PI / len, 4) / 2; + const double eps = std::pow(2 * M_PI / len_3d, 4) / 2; assertion(err_3d < eps); std::cout << "Interpolation 3d trig-exp function with err = " << err_3d << '\n'; From d4f7107b299d9a499fb71975f04550494fecc34c Mon Sep 17 00:00:00 2001 From: qzhong <12ff54e@gmail.com> Date: Mon, 29 Apr 2024 22:35:50 +0800 Subject: [PATCH 09/11] Add sequential evaluation in speed test --- test/src/interpolation-speed-test.cpp | 107 +++++++++++++++++++++----- 1 file changed, 86 insertions(+), 21 deletions(-) diff --git a/test/src/interpolation-speed-test.cpp b/test/src/interpolation-speed-test.cpp index 78578af..9f34afd 100644 --- a/test/src/interpolation-speed-test.cpp +++ b/test/src/interpolation-speed-test.cpp @@ -2,6 +2,7 @@ #include "include/Assertion.hpp" #include "include/rel_err.hpp" +#include #include #include #include @@ -43,18 +44,20 @@ int main() { constexpr size_t len_power = 6 * 4; constexpr size_t eval_count = 1 << 20; - std::vector eval_coord_x; - std::vector eval_coord_y; - std::vector eval_coord_z; - eval_coord_x.reserve(eval_count); - eval_coord_y.reserve(eval_count); - eval_coord_z.reserve(eval_count); - for (size_t i = 0; i < eval_count; ++i) { - eval_coord_x.push_back(rand_dist(rand_gen)); - eval_coord_y.push_back(rand_dist(rand_gen)); - eval_coord_z.push_back(rand_dist2(rand_gen)); + std::vector eval_coord_1d; + std::vector> eval_coord_2d; + std::vector> eval_coord_3d; + { + eval_coord_1d.reserve(eval_count); + eval_coord_2d.reserve(eval_count); + eval_coord_3d.reserve(eval_count); + for (size_t i = 0; i < eval_count; ++i) { + eval_coord_1d.push_back(rand_dist(rand_gen)); + eval_coord_2d.push_back({rand_dist(rand_gen), rand_dist(rand_gen)}); + eval_coord_3d.push_back({rand_dist(rand_gen), rand_dist(rand_gen), + rand_dist2(rand_gen)}); + } } - // 1D case { const auto t_start_1d = high_resolution_clock::now(); @@ -88,10 +91,20 @@ int main() { const auto t_after_interpolation = high_resolution_clock::now(); - for (size_t i = 0; i < eval_count; ++i) { interp1d(eval_coord_x[i]); } + for (size_t i = 0; i < eval_count; ++i) { interp1d(eval_coord_1d[i]); } + // for (auto& x : eval_coord_1d) { interp1d(x); } const auto t_after_eval = high_resolution_clock::now(); + std::sort(eval_coord_1d.begin(), eval_coord_1d.end()); + + const auto t_before_seq_eval = high_resolution_clock::now(); + + for (size_t i = 0; i < eval_count; ++i) { interp1d(eval_coord_1d[i]); } + // for (auto& x : eval_coord_1d) { interp1d(x); } + + const auto t_after_seq_eval = high_resolution_clock::now(); + double err_1d = rel_err(interp1d, std::make_pair(coord_1d.begin(), coord_1d.end()), std::make_pair(vals_1d.begin(), vals_1d.end())); @@ -115,10 +128,15 @@ int main() { t_after_interpolation - t_after_vec) .count() << "ms\n"; - std::cout << "Evaluate\t\t" + std::cout << "Evaluate(random)\t" << duration( t_after_eval - t_after_interpolation) .count() + << "ms\n"; + std::cout << "Evaluate(sequential)\t" + << duration(t_after_seq_eval - + t_before_seq_eval) + .count() << "ms\n\n"; } @@ -164,12 +182,28 @@ int main() { const auto t_after_interpolation = high_resolution_clock::now(); - for (size_t i = 0; i < eval_count; ++i) { - interp2d(eval_coord_y[i], eval_coord_y[i]); - } + for (size_t i = 0; i < eval_count; ++i) { interp2d(eval_coord_2d[i]); } const auto t_after_eval = high_resolution_clock::now(); + std::sort(eval_coord_2d.begin(), eval_coord_2d.end(), + [dt](const std::array& p1, + const std::array& p2) { + const auto x_1 = + static_cast(std::floor((p1[0] + M_PI) / dt)); + const auto x_2 = + static_cast(std::floor((p2[0] + M_PI) / dt)); + return x_1 < x_2 || (x_1 == x_2 && + std::floor((p1[1] + M_PI) / dt) <= + std::floor((p2[1] + M_PI) / dt)); + }); + + const auto t_before_seq_eval = high_resolution_clock::now(); + + for (size_t i = 0; i < eval_count; ++i) { interp2d(eval_coord_2d[i]); } + + const auto t_after_seq_eval = high_resolution_clock::now(); + double err_2d = rel_err(interp2d, std::make_pair(coord_2d.begin(), coord_2d.end()), std::make_pair(vals_2d.begin(), vals_2d.end())); @@ -195,10 +229,15 @@ int main() { t_after_interpolation - t_after_mesh) .count() << "ms\n"; - std::cout << "Evaluate\t\t" + std::cout << "Evaluate(random)\t" << duration( t_after_eval - t_after_interpolation) .count() + << "ms\n"; + std::cout << "Evaluate(sequential)\t" + << duration(t_after_seq_eval - + t_before_seq_eval) + .count() << "ms\n\n"; } @@ -254,12 +293,33 @@ int main() { const auto t_after_interpolation = high_resolution_clock::now(); - for (size_t i = 0; i < eval_count; ++i) { - interp3d(eval_coord_x[i], eval_coord_y[i], eval_coord_z[i]); - } + for (size_t i = 0; i < eval_count; ++i) { interp3d(eval_coord_3d[i]); } const auto t_after_eval = high_resolution_clock::now(); + std::sort(eval_coord_3d.begin(), eval_coord_3d.end(), + [dt_3d_aperiodic, dt_3d](const std::array& p1, + const std::array& p2) { + const auto x_1 = + static_cast(std::floor((p1[0] + M_PI) / dt_3d)); + const auto x_2 = + static_cast(std::floor((p2[0] + M_PI) / dt_3d)); + const auto y_1 = + static_cast(std::floor((p1[1] + M_PI) / dt_3d)); + const auto y_2 = + static_cast(std::floor((p2[1] + M_PI) / dt_3d)); + return x_1 < x_2 || (x_1 == x_2 && y_1 < y_2) || + (x_1 == x_2 && y_1 == y_2 && + std::floor((p1[2] + .5) / dt_3d_aperiodic) <= + std::floor((p2[2] + .5) / dt_3d_aperiodic)); + }); + + const auto t_before_seq_eval = high_resolution_clock::now(); + + for (size_t i = 0; i < eval_count; ++i) { interp3d(eval_coord_3d[i]); } + + const auto t_after_seq_eval = high_resolution_clock::now(); + double err_3d = rel_err(interp3d, std::make_pair(coord_3d.begin(), coord_3d.end()), std::make_pair(vals_3d.begin(), vals_3d.end())); @@ -283,11 +343,16 @@ int main() { t_after_interpolation - t_after_mesh_3d) .count() << "ms\n"; - std::cout << "Evaluate\t\t" + std::cout << "Evaluate(random)\t" << duration( t_after_eval - t_after_interpolation) .count() << "ms\n"; + std::cout << "Evaluate(sequential)\t" + << duration(t_after_seq_eval - + t_before_seq_eval) + .count() + << "ms\n\n"; } return assertion.status(); From 96ebcea974d8880456dc998879df77cf2b65c932 Mon Sep 17 00:00:00 2001 From: qzhong <12ff54e@gmail.com> Date: Fri, 10 May 2024 17:26:35 +0800 Subject: [PATCH 10/11] Fix a bug in macro use --- src/include/Interpolation.hpp | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/include/Interpolation.hpp b/src/include/Interpolation.hpp index eb02eae..b3a4e3b 100644 --- a/src/include/Interpolation.hpp +++ b/src/include/Interpolation.hpp @@ -373,14 +373,17 @@ class InterpolationFunction { // TODO: Add integration const size_type n{static_cast( std::distance(x_range.first, x_range.second))}; - INTP_ASSERT(n == mesh_dimension.dim_size(dim_ind) #ifdef INTP_PERIODIC_NO_DUMMY_POINT - + (periodic ? 1 : 0) -#endif - , + INTP_ASSERT(n == mesh_dimension.dim_size(dim_ind) + (periodic ? 1 : 0), + std::string("Inconsistency between knot number and " + "interpolated value number at dimension ") + + std::to_string(dim_ind)); +#else + INTP_ASSERT(n == mesh_dimension.dim_size(dim_ind), std::string("Inconsistency between knot number and " "interpolated value number at dimension ") + std::to_string(dim_ind)); +#endif #ifndef INTP_ENABLE_ASSERTION // suppress unused parameter warning (void)mesh_dimension; From 207d2ef678fa9bacf61aa472840df669d60624c1 Mon Sep 17 00:00:00 2001 From: qzhong <12ff54e@gmail.com> Date: Fri, 10 May 2024 17:27:37 +0800 Subject: [PATCH 11/11] Minor changes in speed test --- test/src/interpolation-speed-test.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/test/src/interpolation-speed-test.cpp b/test/src/interpolation-speed-test.cpp index 9f34afd..98ca7db 100644 --- a/test/src/interpolation-speed-test.cpp +++ b/test/src/interpolation-speed-test.cpp @@ -144,8 +144,8 @@ int main() { { const auto t_start_2d = high_resolution_clock::now(); - const size_t len_2d = 1 << (len_power / 2); - const double dt = 2 * M_PI / static_cast(len_2d); + constexpr size_t len_2d = 1 << (len_power / 2); + constexpr double dt = 2 * M_PI / static_cast(len_2d); #ifdef INTP_PERIODIC_NO_DUMMY_POINT Mesh trig_mesh_2d(len_2d); @@ -187,8 +187,8 @@ int main() { const auto t_after_eval = high_resolution_clock::now(); std::sort(eval_coord_2d.begin(), eval_coord_2d.end(), - [dt](const std::array& p1, - const std::array& p2) { + [](const std::array& p1, + const std::array& p2) { const auto x_1 = static_cast(std::floor((p1[0] + M_PI) / dt)); const auto x_2 = @@ -298,8 +298,8 @@ int main() { const auto t_after_eval = high_resolution_clock::now(); std::sort(eval_coord_3d.begin(), eval_coord_3d.end(), - [dt_3d_aperiodic, dt_3d](const std::array& p1, - const std::array& p2) { + [](const std::array& p1, + const std::array& p2) { const auto x_1 = static_cast(std::floor((p1[0] + M_PI) / dt_3d)); const auto x_2 =