From 319f597f38a8c510a4901dcdbafbee729bcf1f17 Mon Sep 17 00:00:00 2001 From: ahmed Date: Sun, 2 Jun 2024 21:04:14 -0500 Subject: [PATCH 01/96] replace vector with glm --- apps/SurfaceTracking/CMakeLists.txt | 3 +- apps/SurfaceTracking/collapser.cuh | 2 +- apps/SurfaceTracking/flipper.cuh | 2 +- apps/XPBD/remesh.cuh | 20 +- include/rxmesh/attribute.h | 13 +- .../rxmesh/geometry_util.cuh | 70 +-- include/rxmesh/rxmesh_static.h | 14 +- include/rxmesh/util/vector.h | 544 ------------------ tests/Polyscope_test/test_polyscope.cu | 14 +- tests/RXMesh_test/CMakeLists.txt | 3 +- tests/RXMesh_test/rxmesh_test_main.cu | 1 - tests/RXMesh_test/test_ev_diamond.h | 17 +- tests/RXMesh_test/test_sparse_matrix.cuh | 17 +- tests/RXMesh_test/test_vector.cu | 81 --- 14 files changed, 85 insertions(+), 716 deletions(-) rename apps/SurfaceTracking/primitives.cuh => include/rxmesh/geometry_util.cuh (50%) delete mode 100644 include/rxmesh/util/vector.h delete mode 100644 tests/RXMesh_test/test_vector.cu diff --git a/apps/SurfaceTracking/CMakeLists.txt b/apps/SurfaceTracking/CMakeLists.txt index 721f671f..ab33882a 100644 --- a/apps/SurfaceTracking/CMakeLists.txt +++ b/apps/SurfaceTracking/CMakeLists.txt @@ -9,8 +9,7 @@ set(SOURCE_LIST smoother.cuh frame_stepper.h simulation.h - noise.h - primitives.cuh + noise.h collapser.cuh link_condition.cuh ) diff --git a/apps/SurfaceTracking/collapser.cuh b/apps/SurfaceTracking/collapser.cuh index 6a3e7932..e63e2379 100644 --- a/apps/SurfaceTracking/collapser.cuh +++ b/apps/SurfaceTracking/collapser.cuh @@ -12,7 +12,7 @@ using Vec3 = glm::vec<3, T, glm::defaultp>; #include "rxmesh/cavity_manager.cuh" #include "rxmesh/query.cuh" -#include "primitives.cuh" +#include "rxmesh/geometry_util.cuh" #include "link_condition.cuh" diff --git a/apps/SurfaceTracking/flipper.cuh b/apps/SurfaceTracking/flipper.cuh index e59e23fe..c8c92928 100644 --- a/apps/SurfaceTracking/flipper.cuh +++ b/apps/SurfaceTracking/flipper.cuh @@ -13,7 +13,7 @@ using Vec3 = glm::vec<3, T, glm::defaultp>; #include "rxmesh/query.cuh" #include "link_condition.cuh" -#include "primitives.cuh" +#include "rxmesh/geometry_util.cuh" template __global__ static void __launch_bounds__(blockThreads) diff --git a/apps/XPBD/remesh.cuh b/apps/XPBD/remesh.cuh index 12ccec3e..db1c3531 100644 --- a/apps/XPBD/remesh.cuh +++ b/apps/XPBD/remesh.cuh @@ -1,21 +1,9 @@ #pragma once #include "rxmesh/query.cuh" - +#include "rxmesh/util/vector.h" #include "svd.cuh" -template -using vec2 = glm::vec<2, T, glm::defaultp>; - -template -using mat2x2 = glm::mat<2, 2, T, glm::defaultp>; - -template -using mat3x2 = glm::mat<3, 2, T, glm::defaultp>; - -template -using mat2x3 = glm::mat<2, 3, T, glm::defaultp>; - template __inline__ __device__ vec3 normal(const vec3& x0, @@ -127,9 +115,9 @@ __inline__ __device__ mat2x2 compression_metric(const vec3& w0, mat2x2 Sw2 = glm::transpose(UV) * S2 * UV; mat2x2 D = e2 - 4.0 * c * c * perp(Sw2); - //TODO - //https://github.com/taichi-dev/taichi/blob/master/python/taichi/_funcs.py - //return get_positive(-e + sqrt(D)) / (2.0 * sq(c)); + // TODO + // https://github.com/taichi-dev/taichi/blob/master/python/taichi/_funcs.py + // return get_positive(-e + sqrt(D)) / (2.0 * sq(c)); return mat2x2(0); } diff --git a/include/rxmesh/attribute.h b/include/rxmesh/attribute.h index f7481aec..530a3e0b 100644 --- a/include/rxmesh/attribute.h +++ b/include/rxmesh/attribute.h @@ -13,7 +13,12 @@ #include "rxmesh/util/cuda_query.h" #include "rxmesh/util/log.h" #include "rxmesh/util/util.h" -#include "rxmesh/util/vector.h" + + +#define GLM_ENABLE_EXPERIMENTAL +#include +#include +#include class RXMeshTest; @@ -240,9 +245,9 @@ class Attribute : public AttributeBase } /** - * @brief return the amount of allocated memory in megabytes - */ - const double get_memory_mg()const + * @brief return the amount of allocated memory in megabytes + */ + const double get_memory_mg() const { return m_memory_mega_bytes; } diff --git a/apps/SurfaceTracking/primitives.cuh b/include/rxmesh/geometry_util.cuh similarity index 50% rename from apps/SurfaceTracking/primitives.cuh rename to include/rxmesh/geometry_util.cuh index 91c01e05..23be142e 100644 --- a/apps/SurfaceTracking/primitives.cuh +++ b/include/rxmesh/geometry_util.cuh @@ -5,17 +5,18 @@ #include template -using Vec3 = glm::vec<3, T, glm::defaultp>; +using vec3 = glm::vec<3, T, glm::defaultp>; +namespace rxmesh { /** * @brief Compute the signed volume of a tetrahedron. */ template -__inline__ __device__ T signed_volume(const Vec3& x0, - const Vec3& x1, - const Vec3& x2, - const Vec3& x3) +__inline__ __device__ __host__ T signed_volume(const vec3& x0, + const vec3& x1, + const vec3& x2, + const vec3& x3) { // Equivalent to triple(x1-x0, x2-x0, x3-x0), six times the signed volume of // the tetrahedron. But, for robustness, we want the result (up to sign) to @@ -38,45 +39,47 @@ __inline__ __device__ T signed_volume(const Vec3& x0, } template -__device__ __inline__ Vec3 tri_normal(const Vec3& p0, - const Vec3& p1, - const Vec3& p2) +__inline__ __device__ __host__ vec3 tri_normal(const vec3& p0, + const vec3& p1, + const vec3& p2) { - const Vec3 u = p1 - p0; - const Vec3 v = p2 - p0; + const vec3 u = p1 - p0; + const vec3 v = p2 - p0; return glm::normalize(glm::cross(u, v)); }; template -__device__ __inline__ T tri_area(const Vec3& p0, - const Vec3& p1, - const Vec3& p2) +__inline__ __device__ __host__ T tri_area(const vec3& p0, + const vec3& p1, + const vec3& p2) { - const Vec3 u = p1 - p0; - const Vec3 v = p2 - p0; + const vec3 u = p1 - p0; + const vec3 v = p2 - p0; return T(0.5) * glm::length(glm::cross(u, v)); }; - +/** + * @brief return the angle at c + */ template -__device__ __inline__ T tri_angle(const Vec3& l, - const Vec3& c, - const Vec3& r) +__inline__ __device__ __host__ T tri_angle(const vec3& l, + const vec3& c, + const vec3& r) { - glm::vec3 ll = glm::normalize(l - c); - glm::vec3 rr = glm::normalize(r - c); + vec3 ll = glm::normalize(l - c); + vec3 rr = glm::normalize(r - c); return glm::acos(glm::dot(rr, ll)); }; template -__device__ __inline__ void triangle_angles(const Vec3& a, - const Vec3& b, - const Vec3& c, - T& angle_a, - T& angle_b, - T& angle_c) +__inline__ __device__ __host__ void triangle_angles(const vec3& a, + const vec3& b, + const vec3& c, + T& angle_a, + T& angle_b, + T& angle_c) { angle_a = tri_angle(b, a, c); angle_b = tri_angle(c, b, a); @@ -85,11 +88,11 @@ __device__ __inline__ void triangle_angles(const Vec3& a, template -__device__ __inline__ void triangle_min_max_angle(const Vec3& a, - const Vec3& b, - const Vec3& c, - T& min_angle, - T& max_angle) +__inline__ __device__ __host__ void triangle_min_max_angle(const vec3& a, + const vec3& b, + const vec3& c, + T& min_angle, + T& max_angle) { T angle_a, angle_b, angle_c; triangle_angles(a, b, c, angle_a, angle_b, angle_c); @@ -98,4 +101,5 @@ __device__ __inline__ void triangle_min_max_angle(const Vec3& a, max_angle = std::max(angle_a, angle_b); max_angle = std::max(max_angle, angle_c); -}; \ No newline at end of file +}; +} // namespace rxmesh \ No newline at end of file diff --git a/include/rxmesh/rxmesh_static.h b/include/rxmesh/rxmesh_static.h index 137e0ca5..7f694a05 100644 --- a/include/rxmesh/rxmesh_static.h +++ b/include/rxmesh/rxmesh_static.h @@ -21,6 +21,8 @@ #include "polyscope/surface_mesh.h" #endif +#include + namespace rxmesh { /** @@ -1027,7 +1029,7 @@ class RXMeshStatic : public RXMesh * @param lower bounding box lower corner * @param upper bounding box upper corner */ - void scale(Vector3f lower, Vector3f upper) + void scale(glm::vec3 lower, glm::vec3 upper) { if (lower[0] > upper[0] || lower[1] > upper[1] || lower[2] > upper[2]) { RXMESH_ERROR( @@ -1042,11 +1044,11 @@ class RXMeshStatic : public RXMesh return; } - Vector3f bb_lower, bb_upper; + glm::vec3 bb_lower, bb_upper; bounding_box(bb_lower, bb_upper); - Vector3f factor; + glm::vec3 factor; for (int i = 0; i < 3; ++i) { factor[i] = (upper[i] - lower[i]) / ((bb_upper[i] - bb_lower[i]) + @@ -1073,7 +1075,7 @@ class RXMeshStatic : public RXMesh * @param lower * @param upper */ - void bounding_box(Vector3f& lower, Vector3f& upper) + void bounding_box(glm::vec3 lower, glm::vec3 upper) { lower[0] = std::numeric_limits::max(); lower[1] = std::numeric_limits::max(); @@ -1088,7 +1090,7 @@ class RXMeshStatic : public RXMesh for_each_vertex( HOST, [&](const VertexHandle vh) { - Vector3f v(coord(vh, 0), coord(vh, 1), coord(vh, 2)); + glm::vec3 v(coord(vh, 0), coord(vh, 1), coord(vh, 2)); for (int i = 0; i < 3; ++i) { lower[i] = std::min(lower[i], v[i]); upper[i] = std::max(upper[i], v[i]); @@ -1201,7 +1203,7 @@ class RXMeshStatic : public RXMesh file.precision(30); - std::vector obj_coords(get_num_vertices()); + std::vector obj_coords(get_num_vertices()); for_each_vertex( HOST, [&](const VertexHandle vh) { diff --git a/include/rxmesh/util/vector.h b/include/rxmesh/util/vector.h deleted file mode 100644 index 4b8a0ad6..00000000 --- a/include/rxmesh/util/vector.h +++ /dev/null @@ -1,544 +0,0 @@ -#pragma once -#include -#include -#include -namespace rxmesh { - -template -struct Vector -{ - static_assert(N > 0); - - // constructors - __host__ __device__ __forceinline__ Vector() - { - } - __host__ __device__ __forceinline__ Vector(T value) - { - for (uint32_t i = 0; i < N; ++i) { - m_v[i] = value; - } - } - - __host__ __device__ __forceinline__ Vector(const T* source) - { - for (uint32_t i = 0; i < N; ++i) { - m_v[i] = source[i]; - } - } - - __host__ __device__ __forceinline__ Vector(const Vector& source) - { - for (uint32_t i = 0; i < N; ++i) { - m_v[i] = source[i]; - } - } - - __host__ __device__ __forceinline__ Vector(T a0, T a1) - { - assert(N >= 2); - m_v[0] = a0; - m_v[1] = a1; - } - - __host__ __device__ __forceinline__ Vector(T a0, T a1, T a2) - { - assert(N >= 3); - m_v[0] = a0; - m_v[1] = a1; - m_v[2] = a2; - } - - __host__ __device__ __forceinline__ Vector(T a0, T a1, T a2, T a3) - { - assert(N >= 4); - m_v[0] = a0; - m_v[1] = a1; - m_v[2] = a2; - m_v[3] = a3; - } - - __host__ __device__ __forceinline__ Vector(T a0, T a1, T a2, T a3, T a4) - { - assert(N >= 5); - m_v[0] = a0; - m_v[1] = a1; - m_v[2] = a2; - m_v[3] = a3; - m_v[4] = a4; - } - - __host__ __device__ __forceinline__ - Vector(T a0, T a1, T a2, T a3, T a4, T a5) - { - assert(N >= 6); - m_v[0] = a0; - m_v[1] = a1; - m_v[2] = a2; - m_v[3] = a3; - m_v[4] = a4; - m_v[5] = a5; - } - - // static functions - __host__ __device__ __forceinline__ static Vector zero() - { - Vector ret; - for (uint32_t i = 0; i < N; ++i) { - ret.m_v[i] = 0; - } - return ret; - } - - __host__ __device__ __forceinline__ static Vector constant(T c) - { - Vector ret; - for (uint32_t i = 0; i < N; ++i) { - ret.m_v[i] = c; - } - return ret; - } - - // indexing - __host__ __device__ __forceinline__ T& operator[](int index) - { - assert(index >= 0 && index < N); - return m_v[index]; - } - - __host__ __device__ __forceinline__ T operator[](int index) const - { - assert(index >= 0 && index < N); - return m_v[index]; - } - - // unary operators - __host__ __device__ __forceinline__ const Vector& operator+() const - { - return *this; - } - - __host__ __device__ __forceinline__ Vector operator-() const - { - Vector ret; - for (uint32_t i = 0; i < N; ++i) { - ret.m_v[i] = -m_v[i]; - } - return ret; - } - - - // binary operators - // plus - __host__ __device__ __forceinline__ Vector& operator+=( - const Vector& v) - { - for (uint32_t i = 0; i < N; ++i) { - m_v[i] += v.m_v[i]; - } - return *this; - } - __host__ __device__ __forceinline__ Vector - operator+(const Vector& v) const - { - Vector ret(*this); - ret += v; - return ret; - } - template - __host__ __device__ __forceinline__ Vector& operator+=(const R c) - { - for (uint32_t i = 0; i < N; ++i) { - m_v[i] += c; - } - return *this; - } - template - __host__ __device__ __forceinline__ Vector operator+(const R c) const - { - Vector ret(*this); - ret += c; - return ret; - } - - // minus - __host__ __device__ __forceinline__ Vector& operator-=( - const Vector& v) - { - for (uint32_t i = 0; i < N; ++i) { - m_v[i] -= v.m_v[i]; - } - return *this; - } - __host__ __device__ __forceinline__ Vector - operator-(const Vector& v) const - { - Vector ret(*this); - ret -= v; - return ret; - } - template - __host__ __device__ __forceinline__ Vector& operator-=(const R c) - { - for (uint32_t i = 0; i < N; ++i) { - m_v[i] -= c; - } - return *this; - } - template - __host__ __device__ __forceinline__ Vector operator-(const R c) const - { - Vector ret(*this); - ret -= c; - return ret; - } - - // multiply - __host__ __device__ __forceinline__ Vector& operator*=(const Vector& v) - { - for (uint32_t i = 0; i < N; ++i) { - m_v[i] *= v.m_v[i]; - } - return *this; - } - __host__ __device__ __forceinline__ Vector operator*(const Vector& v) const - { - Vector ret(*this); - ret *= v; - return ret; - } - template - __host__ __device__ __forceinline__ Vector& operator*=(const R c) - { - for (uint32_t i = 0; i < N; ++i) { - m_v[i] *= c; - } - return *this; - } - template - __host__ __device__ __forceinline__ Vector operator*(const R c) const - { - Vector ret(*this); - ret *= c; - return ret; - } - - // division - __host__ __device__ __forceinline__ Vector& operator/=(const Vector& v) - { - for (uint32_t i = 0; i < N; ++i) { - m_v[i] /= v.m_v[i]; - } - return *this; - } - __host__ __device__ __forceinline__ Vector operator/(const Vector& v) - { - Vector ret(*this); - ret /= v; - return ret; - } - template - __host__ __device__ __forceinline__ Vector& operator/=(const R c) - { - for (uint32_t i = 0; i < N; ++i) { - m_v[i] /= c; - } - return *this; - } - template - __host__ __device__ __forceinline__ Vector operator/(const R c) - { - Vector ret(*this); - ret /= c; - return ret; - } - - // equality - __host__ __device__ __forceinline__ bool operator==(const Vector& v) const - { - for (uint32_t i = 0; i < N; ++i) { - if (m_v[i] != v.m_v[i]) { - return false; - } - } - return true; - } - - __host__ __device__ __forceinline__ bool operator!=(const Vector& v) const - { - return !(*this == v); - } - - - // operations - __host__ __device__ __forceinline__ T norm() const - { - T len = 0; - for (uint32_t i = 0; i < N; ++i) { - len += m_v[i] * m_v[i]; - } - return sqrt(len); - } - __host__ __device__ __forceinline__ T norm2() const - { - T len = 0; - for (uint32_t i = 0; i < N; ++i) { - len += m_v[i] * m_v[i]; - } - return len; - } - __host__ __device__ __forceinline__ void normalize() - { - T r = norm(); - if (r == T(0.0)) { - for (uint32_t i = 0; i < N; ++i) { - m_v[i] = 0; - } - } else { - r = 1. / r; - (*this) *= r; - } - } - - __host__ __device__ __forceinline__ T sum() - { - T s = 0; - for (uint32_t i = 0; i < N; ++i) { - s += m_v[i]; - } - return s; - } - - __host__ __device__ __forceinline__ void clamp(T low, T high) - { - for (uint32_t i = 0; i < N; ++i) { - m_v[i] = (m_v[i] <= low) ? low : ((m_v[i] >= high) ? high : m_v[i]); - } - } - - __host__ __device__ __forceinline__ T max() - { - T m = m_v[0]; - for (uint32_t i = 1; i < N; ++i) { - m = (m_v[i] > m) ? m_v[i] : m; - } - return m; - } - - __host__ __device__ __forceinline__ T min() - { - T m = m_v[0]; - for (uint32_t i = 1; i < N; ++i) { - m = (m_v[i] < m) ? m_v[i] : m; - } - return m; - } - - private: - T m_v[N]; -}; - -// operations on vectors -template -__host__ __device__ __forceinline__ T norm(const Vector& v) -{ - return v.norm(); -} - -template -__host__ __device__ __forceinline__ T norm2(const Vector& v) -{ - return v.norm2(); -} - -template -__host__ __device__ __forceinline__ void normalize(Vector& v) -{ - v.normalize(); -} - -template -__host__ __device__ __forceinline__ Vector<3, T> cross(const Vector<3, T>& u, - const Vector<3, T>& v) -{ - T x = u[1] * v[2] - u[2] * v[1]; - T y = u[2] * v[0] - u[0] * v[2]; - T z = u[0] * v[1] - u[1] * v[0]; - return Vector<3, T>{x, y, z}; -} - - -template -__host__ __device__ __forceinline__ Vector<2, T> cross(const Vector<2, T>& u, - const Vector<2, T>& v) -{ - return u[0] * v[1] - u[1] * v[0]; -} - -template -__host__ __device__ __forceinline__ T dot(const Vector<3, T>& u, - const Vector<3, T>& v) -{ - return u[0] * v[0] + u[1] * v[1] + u[2] * v[2]; -} - -template -__host__ __device__ __forceinline__ T dot(const Vector<2, T>& u, - const Vector<2, T>& v) -{ - return u[0] * v[0] + u[1] * v[1]; -} - -template -__host__ __device__ __forceinline__ T dist2(const Vector& u, - const Vector& v) -{ - T d = 0; - for (uint32_t i = 0; i < N; ++i) { - d += (u[i] - v[i]) * (u[i] - v[i]); - } - return d; -} - -template -__host__ __device__ __forceinline__ T dist(const Vector& u, - const Vector& v) -{ - return sqrt(dist2(u, v)); -} - -template -__host__ __device__ __forceinline__ Vector min(const Vector& u, - const Vector& v) -{ - Vector ret; - for (uint32_t i = 0; i < N; ++i) { - ret[i] = std::min(u[i], v[i]); - } - return ret; -} -template -__host__ __device__ __forceinline__ Vector max(const Vector& u, - const Vector& v) -{ - Vector ret; - for (uint32_t i = 0; i < N; ++i) { - ret[i] = std::max(u[i], v[i]); - } - return ret; -} - -template -inline std::string to_string(const Vector& v) -{ - std::stringstream ss; - ss << "["; - for (uint32_t i = 0; i < N; i++) { - ss << v[i]; - if (i != N - 1) { - ss << ", "; - } - } - ss << "]"; - return ss.str(); -} - -template -inline std::ostream& operator<<(std::ostream& output, const Vector& v) -{ - output << to_string(v); - return output; -} - -template -inline std::istream& operator>>(std::istream& input, const Vector& v) -{ - for (uint32_t i = 0; i < N; i++) { - input >> v[i]; - } - return input; -} - -template -__host__ __device__ __forceinline__ Vector operator+( - const Vector& v0, - const Vector& v1) -{ - return Vector(v0) += v1; -} - - -template -__host__ __device__ __forceinline__ Vector operator-( - const Vector& v0, - const Vector& v1) -{ - return Vector(v0) -= v1; -} - -template -__host__ __device__ __forceinline__ Vector operator-( - const Vector& v) -{ - Vector ret; - for (int i = 0; i < N; ++i) { - ret[i] = -v[i]; - } - return ret; -} - -// Alias -using Vector2d = Vector<2, double>; -using Vector2f = Vector<2, float>; -using Vector2i = Vector<2, int32_t>; -using Vector2ui = Vector<2, uint32_t>; -using Vector2s = Vector<2, int16_t>; -using Vector2us = Vector<2, uint16_t>; -using Vector2c = Vector<2, int8_t>; -using Vector2uc = Vector<2, uint8_t>; - -using Vector3d = Vector<3, double>; -using Vector3f = Vector<3, float>; -using Vector3i = Vector<3, int32_t>; -using Vector3ui = Vector<3, uint32_t>; -using Vector3s = Vector<3, int16_t>; -using Vector3us = Vector<3, uint16_t>; -using Vector3c = Vector<3, int8_t>; -using Vector3uc = Vector<3, uint8_t>; - -using Vector4d = Vector<4, double>; -using Vector4f = Vector<4, float>; -using Vector4i = Vector<4, int32_t>; -using Vector4ui = Vector<4, uint32_t>; -using Vector4s = Vector<4, int16_t>; -using Vector4us = Vector<4, uint16_t>; -using Vector4c = Vector<4, int8_t>; -using Vector4uc = Vector<4, uint8_t>; - -using Vector6d = Vector<6, double>; -using Vector6f = Vector<6, float>; -using Vector6i = Vector<6, int32_t>; -using Vector6ui = Vector<6, uint32_t>; -using Vector6s = Vector<6, int16_t>; -using Vector6us = Vector<6, uint16_t>; -using Vector6c = Vector<6, int8_t>; -using Vector6uc = Vector<6, uint8_t>; -} // namespace rxmesh - -// Hash -namespace std { - -template -struct hash> -{ - std::size_t operator()(const rxmesh::Vector& v) const - { - std::size_t h = 0; - for (int i = 0; i < N; i++) { - h = std::hash()(v[i]) ^ (h << 1); - } - return h; - } -}; - -} // namespace std \ No newline at end of file diff --git a/tests/Polyscope_test/test_polyscope.cu b/tests/Polyscope_test/test_polyscope.cu index 5794b4f6..df9b7bfb 100644 --- a/tests/Polyscope_test/test_polyscope.cu +++ b/tests/Polyscope_test/test_polyscope.cu @@ -17,15 +17,17 @@ __global__ static void compute_vertex_normal(const rxmesh::Context context, auto vn_lambda = [&](FaceHandle face_id, VertexIterator& fv) { // get the face's three vertices coordinates - Vector<3, T> c0(coords(fv[0], 0), coords(fv[0], 1), coords(fv[0], 2)); - Vector<3, T> c1(coords(fv[1], 0), coords(fv[1], 1), coords(fv[1], 2)); - Vector<3, T> c2(coords(fv[2], 0), coords(fv[2], 1), coords(fv[2], 2)); + glm::fvec3 c0(coords(fv[0], 0), coords(fv[0], 1), coords(fv[0], 2)); + glm::fvec3 c1(coords(fv[1], 0), coords(fv[1], 1), coords(fv[1], 2)); + glm::fvec3 c2(coords(fv[2], 0), coords(fv[2], 1), coords(fv[2], 2)); // compute the face normal - Vector<3, T> n = cross(c1 - c0, c2 - c0); + glm::fvec3 n = cross(c1 - c0, c2 - c0); // the three edges length - Vector<3, T> l(dist2(c0, c1), dist2(c1, c2), dist2(c2, c0)); + glm::fvec3 l(glm::distance2(c0, c1), + glm::distance2(c1, c2), + glm::distance2(c2, c0)); // add the face's normal to its vertices for (uint32_t v = 0; v < 3; ++v) { // for every vertex in this face @@ -93,7 +95,7 @@ int main(int argc, char** argv) polyscope_mesh->addVertexVectorQuantity("vNormal", *vertex_normals); - //polyscope::show(); + polyscope::show(); return 0; } diff --git a/tests/RXMesh_test/CMakeLists.txt b/tests/RXMesh_test/CMakeLists.txt index 16eefd98..a42652be 100644 --- a/tests/RXMesh_test/CMakeLists.txt +++ b/tests/RXMesh_test/CMakeLists.txt @@ -3,8 +3,7 @@ add_executable( RXMesh_test ) set( SOURCE_LIST rxmesh_test_main.cu rxmesh_test.h - test_attribute.cuh - test_vector.cu + test_attribute.cuh test_util.cu test_iterator.cu test_queries.h diff --git a/tests/RXMesh_test/rxmesh_test_main.cu b/tests/RXMesh_test/rxmesh_test_main.cu index 5a1c7896..2b89df15 100644 --- a/tests/RXMesh_test/rxmesh_test_main.cu +++ b/tests/RXMesh_test/rxmesh_test_main.cu @@ -2,7 +2,6 @@ #include "gtest/gtest.h" #include "rxmesh/util/log.h" #include "rxmesh/util/report.h" -#include "rxmesh/util/vector.h" using dataT = float; diff --git a/tests/RXMesh_test/test_ev_diamond.h b/tests/RXMesh_test/test_ev_diamond.h index 10387062..bc61a08f 100644 --- a/tests/RXMesh_test/test_ev_diamond.h +++ b/tests/RXMesh_test/test_ev_diamond.h @@ -1,6 +1,7 @@ -#include "gtest/gtest.h" #include +#include "gtest/gtest.h" +#include "rxmesh/geometry_util.cuh" #include "rxmesh/rxmesh_static.h" #include "query_kernel.cuh" @@ -46,12 +47,6 @@ TEST(RXMeshStatic, EVDiamond) auto coords = *rx.get_input_vertex_coordinates(); - auto tri_area = [&](Vector3f& x0, Vector3f& x1, Vector3f& x2) { - float a((x0 - x1).norm()), b((x1 - x2).norm()), c((x2 - x0).norm()); - float s = (a + b + c) / 2; - return std::sqrt(s * (s - a) * (s - b) * (s - c)); - }; - rx.for_each_edge(HOST, [&](const EdgeHandle& eh) { // v3 // / \ @@ -70,10 +65,10 @@ TEST(RXMeshStatic, EVDiamond) if (v0.is_valid() && v1.is_valid() && v2.is_valid() && v3.is_valid()) { - Vector3f x0(coords(v0, 0), coords(v0, 1), coords(v0, 2)); - Vector3f x1(coords(v1, 0), coords(v1, 1), coords(v1, 2)); - Vector3f x2(coords(v2, 0), coords(v2, 1), coords(v2, 2)); - Vector3f x3(coords(v3, 0), coords(v3, 1), coords(v3, 2)); + glm::vec3 x0(coords(v0, 0), coords(v0, 1), coords(v0, 2)); + glm::vec3 x1(coords(v1, 0), coords(v1, 1), coords(v1, 2)); + glm::vec3 x2(coords(v2, 0), coords(v2, 1), coords(v2, 2)); + glm::vec3 x3(coords(v3, 0), coords(v3, 1), coords(v3, 2)); float t0 = tri_area(x0, x1, x2); float t1 = tri_area(x0, x2, x3); diff --git a/tests/RXMesh_test/test_sparse_matrix.cuh b/tests/RXMesh_test/test_sparse_matrix.cuh index 7145a9bc..ada5be38 100644 --- a/tests/RXMesh_test/test_sparse_matrix.cuh +++ b/tests/RXMesh_test/test_sparse_matrix.cuh @@ -6,6 +6,8 @@ #include "rxmesh/query.cuh" #include "rxmesh/rxmesh_static.h" +template +using vec3 = glm::vec<3, T, glm::defaultp>; template __global__ static void sparse_mat_test(const rxmesh::Context context, @@ -46,9 +48,9 @@ __global__ static void sparse_mat_edge_len_test( arr_ref[row_index] = 0; sparse_mat(v_id, v_id) = 0; - Vector<3, T> v_coord(coords(v_id, 0), coords(v_id, 1), coords(v_id, 2)); + vec3 v_coord(coords(v_id, 0), coords(v_id, 1), coords(v_id, 2)); for (uint32_t v = 0; v < iter.size(); ++v) { - Vector<3, T> vi_coord( + vec3 vi_coord( coords(iter[v], 0), coords(iter[v], 1), coords(iter[v], 2)); sparse_mat(v_id, iter[v]) = 1; // dist(v_coord, vi_coord); @@ -111,8 +113,7 @@ __global__ static void simple_A_X_B_setup(const rxmesh::Context context, X_mat(row_index, 1) = coords(v_id, 1) * v_weight; X_mat(row_index, 2) = coords(v_id, 2) * v_weight; - Vector<3, float> vi_coord( - coords(v_id, 0), coords(v_id, 1), coords(v_id, 2)); + vec3 vi_coord(coords(v_id, 0), coords(v_id, 1), coords(v_id, 2)); for (uint32_t v = 0; v < iter.size(); ++v) { T e_weight = 1; A_mat(v_id, iter[v]) = time_step * e_weight; @@ -317,12 +318,12 @@ TEST(RXMeshStatic, SparseMatrixSimpleSolve) timer.stop(); RXMESH_TRACE("SPMM_rxmesh() took {} (ms) ", timer.elapsed_millis()); - std::vector h_ret_mat(num_vertices); + std::vector> h_ret_mat(num_vertices); CUDA_ERROR(cudaMemcpy(h_ret_mat.data(), ret_mat.data(), num_vertices * 3 * sizeof(float), cudaMemcpyDeviceToHost)); - std::vector h_B_mat(num_vertices); + std::vector> h_B_mat(num_vertices); CUDA_ERROR(cudaMemcpy(h_B_mat.data(), B_mat.data(), num_vertices * 3 * sizeof(float), @@ -384,12 +385,12 @@ TEST(RXMeshStatic, SparseMatrixLowerLevelAPISolve) A_mat.denmat_mul(X_mat, ret_mat); - std::vector h_ret_mat(num_vertices); + std::vector> h_ret_mat(num_vertices); CUDA_ERROR(cudaMemcpy(h_ret_mat.data(), ret_mat.data(), num_vertices * 3 * sizeof(float), cudaMemcpyDeviceToHost)); - std::vector h_B_mat(num_vertices); + std::vector> h_B_mat(num_vertices); CUDA_ERROR(cudaMemcpy(h_B_mat.data(), B_mat.data(), num_vertices * 3 * sizeof(float), diff --git a/tests/RXMesh_test/test_vector.cu b/tests/RXMesh_test/test_vector.cu deleted file mode 100644 index 42322431..00000000 --- a/tests/RXMesh_test/test_vector.cu +++ /dev/null @@ -1,81 +0,0 @@ -#include "gtest/gtest.h" -#include "rxmesh/util/vector.h" - -TEST(RXMesh, Vector) -{ - using namespace rxmesh; - - // constrctors - Vector3f v0(0.5f); - EXPECT_TRUE(v0[0] == 0.5f && v0[1] == 0.5f && v0[2] == 0.5f); - - Vector3f v1(v0); - EXPECT_TRUE(v1[0] == 0.5f && v1[1] == 0.5f && v1[2] == 0.5f); - - Vector3i v2(10, 20, 30); - EXPECT_TRUE(v2[0] == 10 && v2[1] == 20 && v2[2] == 30); - - Vector4ui c = Vector4ui::constant(5); - EXPECT_TRUE(c[0] == 5 && c[1] == 5 && c[2] == 5 && c[3] == 5); - - Vector2s z = Vector2s::zero(); - EXPECT_TRUE(z[0] == 0 && z[1] == 0); - - // assignment - z[0] = 10; - z[1] = 20; - EXPECT_TRUE(z[0] == 10 && z[1] == 20); - - // neg - Vector2s neg_z = -z; - EXPECT_TRUE(neg_z[0] == -10 && neg_z[1] == -20); - - // sum - auto sum = z + z; - EXPECT_TRUE(sum[0] == 20 && sum[1] == 40); - - z += z; - EXPECT_TRUE(z[0] == 20 && z[1] == 40); - - // diff - auto diff = neg_z - z; - EXPECT_TRUE(diff[0] == -30 && diff[1] == -60); - - neg_z -= z; - EXPECT_TRUE(neg_z[0] == -30 && neg_z[1] == -60); - - // mul - auto mul = z * z; - EXPECT_TRUE(mul[0] == 20 * 20 && mul[1] == 40 * 40); - - z *= z; - EXPECT_TRUE(z[0] == 20 * 20 && z[1] == 40 * 40); - - // division - auto div = mul / z; - EXPECT_TRUE(div[0] == 1 && div[1] == 1); - - v0 /= 0.2f; - EXPECT_TRUE(v0[0] == 2.5f && v0[1] == 2.5f && v0[2] == 2.5f); - - // equality - EXPECT_TRUE(mul == z); - - // std::cout << "mul= " << mul << " v0= " << v0; - - // norm - EXPECT_TRUE(v0.norm2() == 2.5 * 2.5 * 3); - - // sum - EXPECT_EQ(neg_z.sum(), -90); - - // max - EXPECT_EQ(neg_z.max(), -30); - - // min - EXPECT_EQ(neg_z.min(), -60); - - // normalize - normalize(v0); - EXPECT_NEAR(norm(v0), 1, 0.001); -} From 1e5ee83b985ab6a80df40981c0aadf39886d0f2b Mon Sep 17 00:00:00 2001 From: ahmed Date: Mon, 3 Jun 2024 15:25:41 -0500 Subject: [PATCH 02/96] reorg and fix glm --- apps/Delaunay/delaunay_rxmesh.cuh | 20 ++-- apps/Delaunay/mcf_rxmesh.h | 1 - apps/Delaunay/mcf_rxmesh_kernel.cuh | 19 ++-- apps/MCF/CMakeLists.txt | 1 - apps/MCF/mcf_rxmesh_kernel.cuh | 18 ++-- apps/MCF/mcf_util.h | 111 ----------------------- include/rxmesh/geometry_util.cuh | 108 +++++++++++++++++++++- include/rxmesh/types.h | 27 ++++++ tests/RXMesh_test/test_multi_queries.cuh | 14 ++- tests/RXMesh_test/test_sparse_matrix.cuh | 3 - 10 files changed, 165 insertions(+), 157 deletions(-) delete mode 100644 apps/MCF/mcf_util.h diff --git a/apps/Delaunay/delaunay_rxmesh.cuh b/apps/Delaunay/delaunay_rxmesh.cuh index 6cd552b5..ae1376d2 100644 --- a/apps/Delaunay/delaunay_rxmesh.cuh +++ b/apps/Delaunay/delaunay_rxmesh.cuh @@ -20,7 +20,7 @@ __global__ static void __launch_bounds__(blockThreads) uint32_t* num_sliced) { using namespace rxmesh; - using VecT = glm::vec<3, T, glm::defaultp>; + using vec3 = glm::vec<3, T, glm::defaultp>; auto block = cooperative_groups::this_thread_block(); ShmemAllocator shrd_alloc; CavityManager cavity( @@ -67,17 +67,17 @@ __global__ static void __launch_bounds__(blockThreads) constexpr T PII = 3.14159265358979323f; - const VecT V0(coords(v0, 0), coords(v0, 1), coords(v0, 2)); - const VecT V1(coords(v1, 0), coords(v1, 1), coords(v1, 2)); - const VecT V2(coords(v2, 0), coords(v2, 1), coords(v2, 2)); - const VecT V3(coords(v3, 0), coords(v3, 1), coords(v3, 2)); + const vec3 V0(coords(v0, 0), coords(v0, 1), coords(v0, 2)); + const vec3 V1(coords(v1, 0), coords(v1, 1), coords(v1, 2)); + const vec3 V2(coords(v2, 0), coords(v2, 1), coords(v2, 2)); + const vec3 V3(coords(v3, 0), coords(v3, 1), coords(v3, 2)); // find the angle between S, M, Q vertices (i.e., angle at M) - auto angle_between_three_vertices = [](const VecT& S, - const VecT& M, - const VecT& Q) { - VecT p1 = S - M; - VecT p2 = Q - M; + auto angle_between_three_vertices = [](const vec3& S, + const vec3& M, + const vec3& Q) { + vec3 p1 = S - M; + vec3 p2 = Q - M; T dot_pro = glm::dot(p1, p2); if constexpr (std::is_same_v) { return acosf(dot_pro / (glm::length(p1) * glm::length(p2))); diff --git a/apps/Delaunay/mcf_rxmesh.h b/apps/Delaunay/mcf_rxmesh.h index 7ee2ad12..ea1934f4 100644 --- a/apps/Delaunay/mcf_rxmesh.h +++ b/apps/Delaunay/mcf_rxmesh.h @@ -7,7 +7,6 @@ #include "rxmesh/rxmesh_static.h" #include "rxmesh/util/report.h" #include "rxmesh/util/timer.h" -#include "rxmesh/util/vector.h" struct MCFData { diff --git a/apps/Delaunay/mcf_rxmesh_kernel.cuh b/apps/Delaunay/mcf_rxmesh_kernel.cuh index 6adeac93..990f0277 100644 --- a/apps/Delaunay/mcf_rxmesh_kernel.cuh +++ b/apps/Delaunay/mcf_rxmesh_kernel.cuh @@ -1,10 +1,9 @@ #pragma once -#include "../apps/MCF/mcf_util.h" #include "rxmesh/attribute.h" #include "rxmesh/context.h" +#include "rxmesh/geometry_util.cuh" #include "rxmesh/query.cuh" -#include "rxmesh/util/vector.h" /** * edge_cotan_weight() @@ -21,10 +20,10 @@ edge_cotan_weight(const rxmesh::VertexHandle& p_id, // q and s composes the diamond around p-r using namespace rxmesh; - const Vector<3, T> p(X(p_id, 0), X(p_id, 1), X(p_id, 2)); - const Vector<3, T> r(X(r_id, 0), X(r_id, 1), X(r_id, 2)); - const Vector<3, T> q(X(q_id, 0), X(q_id, 1), X(q_id, 2)); - const Vector<3, T> s(X(s_id, 0), X(s_id, 1), X(s_id, 2)); + const vec3 p(X(p_id, 0), X(p_id, 1), X(p_id, 2)); + const vec3 r(X(r_id, 0), X(r_id, 1), X(r_id, 2)); + const vec3 q(X(q_id, 0), X(q_id, 1), X(q_id, 2)); + const vec3 s(X(s_id, 0), X(s_id, 1), X(s_id, 2)); return edge_cotan_weight(p, r, q, s); } @@ -43,9 +42,9 @@ partial_voronoi_area(const rxmesh::VertexHandle& p_id, // center // the triangle p->q->r (oriented ccw) using namespace rxmesh; - const Vector<3, T> p(X(p_id, 0), X(p_id, 1), X(p_id, 2)); - const Vector<3, T> q(X(q_id, 0), X(q_id, 1), X(q_id, 2)); - const Vector<3, T> r(X(r_id, 0), X(r_id, 1), X(r_id, 2)); + const vec3 p(X(p_id, 0), X(p_id, 1), X(p_id, 2)); + const vec3 q(X(q_id, 0), X(q_id, 1), X(q_id, 2)); + const vec3 r(X(r_id, 0), X(r_id, 1), X(r_id, 2)); return partial_voronoi_area(p, q, r); } @@ -136,7 +135,7 @@ __global__ static void rxmesh_matvec(const rxmesh::Context context, auto matvec_lambda = [&](VertexHandle& p_id, const VertexIterator& iter) { T sum_e_weight(0); - Vector<3, T> x(T(0)); + vec3 x(T(0)); // vertex weight T v_weight(0); diff --git a/apps/MCF/CMakeLists.txt b/apps/MCF/CMakeLists.txt index 7c6f93cb..b242058b 100644 --- a/apps/MCF/CMakeLists.txt +++ b/apps/MCF/CMakeLists.txt @@ -5,7 +5,6 @@ set(SOURCE_LIST mcf_rxmesh_kernel.cuh mcf_openmesh.h mcf_rxmesh.h - mcf_util.h mcf_sparse_matrix.cuh ) diff --git a/apps/MCF/mcf_rxmesh_kernel.cuh b/apps/MCF/mcf_rxmesh_kernel.cuh index 425c188c..7d5b31e7 100644 --- a/apps/MCF/mcf_rxmesh_kernel.cuh +++ b/apps/MCF/mcf_rxmesh_kernel.cuh @@ -1,10 +1,8 @@ #pragma once -#include "mcf_util.h" #include "rxmesh/attribute.h" #include "rxmesh/context.h" #include "rxmesh/query.cuh" -#include "rxmesh/util/vector.h" /** * edge_cotan_weight() @@ -21,10 +19,10 @@ edge_cotan_weight(const rxmesh::VertexHandle& p_id, // q and s composes the diamond around p-r using namespace rxmesh; - const Vector<3, T> p(X(p_id, 0), X(p_id, 1), X(p_id, 2)); - const Vector<3, T> r(X(r_id, 0), X(r_id, 1), X(r_id, 2)); - const Vector<3, T> q(X(q_id, 0), X(q_id, 1), X(q_id, 2)); - const Vector<3, T> s(X(s_id, 0), X(s_id, 1), X(s_id, 2)); + const vec3 p(X(p_id, 0), X(p_id, 1), X(p_id, 2)); + const vec3 r(X(r_id, 0), X(r_id, 1), X(r_id, 2)); + const vec3 q(X(q_id, 0), X(q_id, 1), X(q_id, 2)); + const vec3 s(X(s_id, 0), X(s_id, 1), X(s_id, 2)); return edge_cotan_weight(p, r, q, s); } @@ -43,9 +41,9 @@ partial_voronoi_area(const rxmesh::VertexHandle& p_id, // center // the triangle p->q->r (oriented ccw) using namespace rxmesh; - const Vector<3, T> p(X(p_id, 0), X(p_id, 1), X(p_id, 2)); - const Vector<3, T> q(X(q_id, 0), X(q_id, 1), X(q_id, 2)); - const Vector<3, T> r(X(r_id, 0), X(r_id, 1), X(r_id, 2)); + const vec3 p(X(p_id, 0), X(p_id, 1), X(p_id, 2)); + const vec3 q(X(q_id, 0), X(q_id, 1), X(q_id, 2)); + const vec3 r(X(r_id, 0), X(r_id, 1), X(r_id, 2)); return partial_voronoi_area(p, q, r); } @@ -136,7 +134,7 @@ __global__ static void rxmesh_matvec(const rxmesh::Context context, auto matvec_lambda = [&](VertexHandle& p_id, const VertexIterator& iter) { T sum_e_weight(0); - Vector<3, T> x(T(0)); + vec3 x(T(0)); // vertex weight T v_weight(0); diff --git a/apps/MCF/mcf_util.h b/apps/MCF/mcf_util.h deleted file mode 100644 index 90d23a26..00000000 --- a/apps/MCF/mcf_util.h +++ /dev/null @@ -1,111 +0,0 @@ -#pragma once -#include "rxmesh/util/vector.h" - -/** - * clamp_cot() - */ -template -__host__ __device__ __forceinline__ void clamp_cot(T& v) -{ - // clamp cotangent values as if angles are in[1, 179] - - const T bound = 19.1; // 3 degrees - v = (v < -bound) ? -bound : ((v > bound) ? bound : v); -} - -/** - * partial_voronoi_area() - */ -template -__host__ __device__ __forceinline__ T -partial_voronoi_area(const rxmesh::Vector<3, T>& p, // center - const rxmesh::Vector<3, T>& q, // before center - const rxmesh::Vector<3, T>& r) // after center - -{ - // compute partial Voronoi area of the center vertex that is associated with - // the triangle p->q->r (oriented ccw) - using namespace rxmesh; - - // Edge vector p->q - const Vector<3, T> pq = q - p; - - // Edge vector q->r - const Vector<3, T> qr = r - q; - - // Edge vector p->r - const Vector<3, T> pr = r - p; - - // compute and check triangle area - T triangle_area = cross(pq, pr).norm(); - - if (triangle_area <= std::numeric_limits::min()) { - return -1; - } - - - // dot products for each corner (of its two emanating edge vectors) - T dotp = dot(pq, pr); - T dotq = -dot(qr, pq); - T dotr = dot(qr, pr); - if (dotp < 0.0) { - return 0.25 * triangle_area; - } - - // angle at q or r obtuse - else if (dotq < 0.0 || dotr < 0.0) { - return 0.125 * triangle_area; - } - - // no obtuse angles - else { - // cot(angle) = cos(angle)/sin(angle) = dot(A,B)/norm(cross(A,B)) - T cotq = dotq / triangle_area; - T cotr = dotr / triangle_area; - - // clamp cot(angle) by clamping angle to [1,179] - clamp_cot(cotq); - clamp_cot(cotr); - - return 0.125 * (pr.norm2() * cotq + pq.norm2() * cotr); - } - - return -1; -} - -/** - * edge_cotan_weight() - */ -template -__host__ __device__ __forceinline__ T -edge_cotan_weight(const rxmesh::Vector<3, T>& p, - const rxmesh::Vector<3, T>& r, - const rxmesh::Vector<3, T>& q, - const rxmesh::Vector<3, T>& s) -{ - // Get the edge weight between the two vertices p-r where - // q and s composes the diamond around p-r - using namespace rxmesh; - - auto partial_weight = [&](const Vector<3, T>& v) -> T { - const Vector<3, T> d0 = p - v; - const Vector<3, T> d1 = r - v; - - T triangle_area = cross(d0, d1).norm(); - if (triangle_area > std::numeric_limits::min()) { - T cot = dot(d0, d1) / triangle_area; - clamp_cot(cot); - return cot; - } - return T(0.0); - }; - - T eweight = 0.0; - eweight += partial_weight(q); - eweight += partial_weight(s); - - assert(!isnan(eweight)); - assert(!isinf(eweight)); - - return eweight; -} \ No newline at end of file diff --git a/include/rxmesh/geometry_util.cuh b/include/rxmesh/geometry_util.cuh index 23be142e..c0692ddd 100644 --- a/include/rxmesh/geometry_util.cuh +++ b/include/rxmesh/geometry_util.cuh @@ -1,11 +1,9 @@ #pragma once - #define GLM_ENABLE_EXPERIMENTAL #include #include -template -using vec3 = glm::vec<3, T, glm::defaultp>; +#include "rxmesh/types.h" namespace rxmesh { @@ -102,4 +100,108 @@ __inline__ __device__ __host__ void triangle_min_max_angle(const vec3& a, max_angle = std::max(angle_a, angle_b); max_angle = std::max(max_angle, angle_c); }; + +/** + * clamp_cot() + */ +template +__host__ __device__ __forceinline__ void clamp_cot(T& v) +{ + // clamp cotangent values as if angles are in[1, 179] + + const T bound = 19.1; // 3 degrees + v = (v < -bound) ? -bound : ((v > bound) ? bound : v); +} + +/** + * compute partial Voronoi area of the center vertex that is associated with the + * triangle p->q->r (oriented ccw) + */ +template +__host__ __device__ __forceinline__ T +partial_voronoi_area(const vec3& p, // center + const vec3& q, // before center + const vec3& r) // after center + +{ + // Edge vector p->q + const vec3 pq = q - p; + + // Edge vector q->r + const vec3 qr = r - q; + + // Edge vector p->r + const vec3 pr = r - p; + + // compute and check triangle area + T triangle_area = tri_area(p, q, r); + + if (triangle_area <= std::numeric_limits::min()) { + return -1; + } + + + // dot products for each corner (of its two emanating edge vectors) + T dotp = glm::dot(pq, pr); + T dotq = -glm::dot(qr, pq); + T dotr = glm::dot(qr, pr); + if (dotp < 0.0) { + return 0.25 * triangle_area; + } + + // angle at q or r obtuse + else if (dotq < 0.0 || dotr < 0.0) { + return 0.125 * triangle_area; + } + + // no obtuse angles + else { + // cot(angle) = cos(angle)/sin(angle) = dot(A,B)/norm(cross(A,B)) + T cotq = dotq / triangle_area; + T cotr = dotr / triangle_area; + + // clamp cot(angle) by clamping angle to [1,179] + clamp_cot(cotq); + clamp_cot(cotr); + + + return 0.125 * (glm::length2(pr) * cotq + glm::length2(pq) * cotr); + } + + return -1; +} + +/** + * Get the edge weight between the two vertices p-r where q and s composes the + * diamond around p-r + */ +template +__host__ __device__ __forceinline__ T edge_cotan_weight(const vec3& p, + const vec3& r, + const vec3& q, + const vec3& s) +{ + auto partial_weight = [&](const vec3& v) -> T { + const vec3 d0 = p - v; + const vec3 d1 = r - v; + + T triangle_area = tri_area(p, r, v); + + if (triangle_area > std::numeric_limits::min()) { + T cot = glm::dot(d0, d1) / triangle_area; + clamp_cot(cot); + return cot; + } + return T(0.0); + }; + + T eweight = 0.0; + eweight += partial_weight(q); + eweight += partial_weight(s); + + assert(!isnan(eweight)); + assert(!isinf(eweight)); + + return eweight; +} } // namespace rxmesh \ No newline at end of file diff --git a/include/rxmesh/types.h b/include/rxmesh/types.h index 8d726e16..e529ca53 100644 --- a/include/rxmesh/types.h +++ b/include/rxmesh/types.h @@ -3,8 +3,35 @@ #include #include "rxmesh/util/macros.h" +#include + namespace rxmesh { +template +using vec1 = glm::vec<1, T, glm::defaultp>; + +template +using vec2 = glm::vec<2, T, glm::defaultp>; + +template +using vec3 = glm::vec<3, T, glm::defaultp>; + +template +using mat2x2 = glm::mat<2, 2, T, glm::defaultp>; + +template +using mat3x2 = glm::mat<3, 2, T, glm::defaultp>; + +template +using mat2x3 = glm::mat<2, 3, T, glm::defaultp>; + +template +using mat3x3 = glm::mat<3, 3, T, glm::defaultp>; + +template +using mat4x4 = glm::mat<4, 4, T, glm::defaultp>; + + /** * @brief Flags for where data resides. Used with Attributes */ diff --git a/tests/RXMesh_test/test_multi_queries.cuh b/tests/RXMesh_test/test_multi_queries.cuh index 459daefd..08a73d41 100644 --- a/tests/RXMesh_test/test_multi_queries.cuh +++ b/tests/RXMesh_test/test_multi_queries.cuh @@ -6,8 +6,6 @@ #include "rxmesh/rxmesh_static.h" -template -using Vec3 = glm::vec<3, T, glm::defaultp>; template __global__ static void sum_edges_ev(const rxmesh::Context context, @@ -19,9 +17,9 @@ __global__ static void sum_edges_ev(const rxmesh::Context context, auto sum_edges = [&](const EdgeHandle& id, const VertexIterator& iter) { - const Vec3 p0( + const vec3 p0( coords(iter[0], 0), coords(iter[0], 1), coords(iter[0], 2)); - const Vec3 p1( + const vec3 p1( coords(iter[1], 0), coords(iter[1], 1), coords(iter[1], 2)); const T edge_len = glm::distance2(p0, p1); @@ -57,7 +55,7 @@ __global__ static void sum_edges_multi_queries( auto sum_edges = [&](const VertexHandle& vertex, const EdgeIterator& eiter) { - const Vec3 p0( + const vec3 p0( coords(vertex, 0), coords(vertex, 1), coords(vertex, 2)); // for each incident to the vertex @@ -76,11 +74,11 @@ __global__ static void sum_edges_multi_queries( assert(vh0 == vertex || vh1 == vertex); // get the other end vertex coordinates - Vec3 p1; + vec3 p1; if (vertex != vh0) { - p1 = Vec3(coords(vh0, 0), coords(vh0, 1), coords(vh0, 2)); + p1 = vec3(coords(vh0, 0), coords(vh0, 1), coords(vh0, 2)); } else { - p1 = Vec3(coords(vh1, 0), coords(vh1, 1), coords(vh1, 2)); + p1 = vec3(coords(vh1, 0), coords(vh1, 1), coords(vh1, 2)); } const T edge_len = glm::distance2(p0, p1); diff --git a/tests/RXMesh_test/test_sparse_matrix.cuh b/tests/RXMesh_test/test_sparse_matrix.cuh index ada5be38..7125e396 100644 --- a/tests/RXMesh_test/test_sparse_matrix.cuh +++ b/tests/RXMesh_test/test_sparse_matrix.cuh @@ -6,9 +6,6 @@ #include "rxmesh/query.cuh" #include "rxmesh/rxmesh_static.h" -template -using vec3 = glm::vec<3, T, glm::defaultp>; - template __global__ static void sparse_mat_test(const rxmesh::Context context, IndexT* vet_degree) From e4e0bc8e2224002a46a18bde9c19ac8b6466649a Mon Sep 17 00:00:00 2001 From: ahmed Date: Mon, 3 Jun 2024 15:41:31 -0500 Subject: [PATCH 03/96] fix filtering app --- apps/Filtering/filtering_rxmesh.cuh | 11 ++- apps/Filtering/filtering_rxmesh_kernel.cuh | 89 ++++++++++------------ apps/Filtering/filtering_util.h | 18 ++--- 3 files changed, 56 insertions(+), 62 deletions(-) diff --git a/apps/Filtering/filtering_rxmesh.cuh b/apps/Filtering/filtering_rxmesh.cuh index e6f79236..5ad230c9 100644 --- a/apps/Filtering/filtering_rxmesh.cuh +++ b/apps/Filtering/filtering_rxmesh.cuh @@ -113,12 +113,11 @@ void filtering_rxmesh(const std::string file_path, // Verify const T tol = 0.01; rx.for_each_vertex(HOST, [&](const VertexHandle& vh) { - uint32_t v_id = rx.map_to_global(vh); - const Vector<3, T> gt(ground_truth[v_id][0], - ground_truth[v_id][1], - ground_truth[v_id][2]); - const Vector<3, T> co( - (*coords)(vh, 0), (*coords)(vh, 1), (*coords)(vh, 2)); + uint32_t v_id = rx.map_to_global(vh); + const vec3 gt(ground_truth[v_id][0], + ground_truth[v_id][1], + ground_truth[v_id][2]); + const vec3 co((*coords)(vh, 0), (*coords)(vh, 1), (*coords)(vh, 2)); EXPECT_LT(std::fabs((*coords)(vh, 0) - ground_truth[v_id][0]), tol); EXPECT_LT(std::fabs((*coords)(vh, 1) - ground_truth[v_id][1]), tol); diff --git a/apps/Filtering/filtering_rxmesh_kernel.cuh b/apps/Filtering/filtering_rxmesh_kernel.cuh index 51862e5e..e68b19d8 100644 --- a/apps/Filtering/filtering_rxmesh_kernel.cuh +++ b/apps/Filtering/filtering_rxmesh_kernel.cuh @@ -8,10 +8,6 @@ #include "rxmesh/context.h" #include "rxmesh/kernels/query_dispatcher.cuh" #include "rxmesh/query.cuh" -#include "rxmesh/util/vector.h" - -constexpr float EPS = 10e-6; - /** * compute_vertex_normal() @@ -27,13 +23,12 @@ __global__ static void compute_vertex_normal(const rxmesh::Context context, VertexHandle v0(fv[0]), v1(fv[1]), v2(fv[2]); // get the face's three vertices coordinates - Vector<3, T> c0(coords(fv[0], 0), coords(fv[0], 1), coords(fv[0], 2)); - Vector<3, T> c1(coords(fv[1], 0), coords(fv[1], 1), coords(fv[1], 2)); - Vector<3, T> c2(coords(fv[2], 0), coords(fv[2], 1), coords(fv[2], 2)); + vec3 c0(coords(fv[0], 0), coords(fv[0], 1), coords(fv[0], 2)); + vec3 c1(coords(fv[1], 0), coords(fv[1], 1), coords(fv[1], 2)); + vec3 c2(coords(fv[2], 0), coords(fv[2], 1), coords(fv[2], 2)); // compute the face normal - Vector<3, T> n = cross(c1 - c0, c2 - c0); - n.normalize(); + vec3 n = glm::normalize(glm::cross(c1 - c0, c2 - c0)); // add the face's normal to its vertices for (uint32_t v = 0; v < 3; ++v) { // for every vertex in this face @@ -59,8 +54,8 @@ __device__ __inline__ void compute_new_coordinates( const rxmesh::VertexHandle& v_id, const rxmesh::VertexHandle vv[], const uint8_t num_vv, - rxmesh::Vector<3, T>& v, - const rxmesh::Vector<3, T>& n, + rxmesh::vec3& v, + const rxmesh::vec3& n, const T sigma_c_sq, const rxmesh::VertexAttribute& input_coords, rxmesh::VertexAttribute& filtered_coords) @@ -70,11 +65,11 @@ __device__ __inline__ void compute_new_coordinates( T sum = 0; T normalizer = 0; for (uint8_t i = 0; i < num_vv; ++i) { - rxmesh::Vector<3, T> q(input_coords(vv[i], 0), - input_coords(vv[i], 1), - input_coords(vv[i], 2)); + rxmesh::vec3 q(input_coords(vv[i], 0), + input_coords(vv[i], 1), + input_coords(vv[i], 2)); q -= v; - T t = q.norm(); + T t = glm::length(q); T h = dot(q, n); T wc = exp(-0.5 * t * t / sigma_c_sq); T ws = exp(-0.5 * h * h / sigma_s_sq); @@ -108,11 +103,11 @@ __launch_bounds__(blockThreads) __global__ uint32_t vv_patch[maxVVSize]; uint16_t vv_local[maxVVSize]; - uint8_t num_vv = 0; - T sigma_c_sq = 0; - T radius = 0; - Vector<3, T> vertex, normal; - uint32_t v_id = INVALID32; + uint8_t num_vv = 0; + T sigma_c_sq = 0; + T radius = 0; + vec3 vertex, normal; + uint32_t v_id = INVALID32; __shared__ uint32_t s_num_patches; __shared__ uint32_t s_block_patches[blockThreads]; @@ -149,10 +144,10 @@ __launch_bounds__(blockThreads) __global__ sigma_c_sq = 1e10; for (uint32_t v = 0; v < iter.size(); ++v) { - const uint32_t vv_id = iter[v]; - const Vector<3, T> q(input_coords(vv_id, 0), - input_coords(vv_id, 1), - input_coords(vv_id, 2)); + const uint32_t vv_id = iter[v]; + const vec3 q(input_coords(vv_id, 0), + input_coords(vv_id, 1), + input_coords(vv_id, 2)); T len = dist2(vertex, q); if (len < sigma_c_sq) { @@ -167,9 +162,9 @@ __launch_bounds__(blockThreads) __global__ for (uint32_t v = 0; v < iter.size(); ++v) { uint32_t vv_id = iter[v]; - const Vector<3, T> vvc(input_coords(vv_id, 0), - input_coords(vv_id, 1), - input_coords(vv_id, 2)); + const vec3 vvc(input_coords(vv_id, 0), + input_coords(vv_id, 1), + input_coords(vv_id, 2)); T dist = dist2(vertex, vvc); @@ -212,9 +207,9 @@ __launch_bounds__(blockThreads) __global__ // make sure that it is not a duplicate if (!linear_search(vv, vvv_id, num_vv)) { - const Vector<3, T> vvv(input_coords(vvv_id, 0), - input_coords(vvv_id, 1), - input_coords(vvv_id, 2)); + const vec3 vvv(input_coords(vvv_id, 0), + input_coords(vvv_id, 1), + input_coords(vvv_id, 2)); T dist = dist2(vvv, vertex); @@ -361,9 +356,9 @@ __launch_bounds__(blockThreads) __global__ // make sure that it is not a duplicate if (!linear_search(vv, vvv_id, num_vv)) { - const Vector<3, T> vvv(input_coords(vvv_id, 0), - input_coords(vvv_id, 1), - input_coords(vvv_id, 2)); + const vec3 vvv(input_coords(vvv_id, 0), + input_coords(vvv_id, 1), + input_coords(vvv_id, 2)); T dist = dist2(vvv, vertex); @@ -441,7 +436,7 @@ __global__ static void bilateral_filtering( uint32_t num_vv = 0; T sigma_c_sq = 0; T radius = 0; - Vector<3, T> vertex, normal; + vec3 vertex, normal; VertexHandle v_id; auto first_ring = [&](VertexHandle& p_id, VertexIterator& iter) { @@ -454,7 +449,7 @@ __global__ static void bilateral_filtering( normal[1] = vertex_normals(v_id, 1); normal[2] = vertex_normals(v_id, 2); - normal.normalize(); + normal = glm::normalize(normal); vv[0] = v_id; ++num_vv; @@ -463,11 +458,11 @@ __global__ static void bilateral_filtering( for (uint32_t v = 0; v < iter.size(); ++v) { const VertexHandle vv_id = iter[v]; - const Vector<3, T> q(input_coords(vv_id, 0), - input_coords(vv_id, 1), - input_coords(vv_id, 2)); + const vec3 q(input_coords(vv_id, 0), + input_coords(vv_id, 1), + input_coords(vv_id, 2)); - T len = dist2(vertex, q); + T len = glm::distance2(vertex, q); if (len < sigma_c_sq) { sigma_c_sq = len; } @@ -479,11 +474,11 @@ __global__ static void bilateral_filtering( for (uint32_t v = 0; v < iter.size(); ++v) { const VertexHandle vv_id = iter[v]; - const Vector<3, T> vvc(input_coords(vv_id, 0), - input_coords(vv_id, 1), - input_coords(vv_id, 2)); + const vec3 vvc(input_coords(vv_id, 0), + input_coords(vv_id, 1), + input_coords(vv_id, 2)); - T dist = dist2(vertex, vvc); + T dist = glm::distance2(vertex, vvc); if (dist <= radius) { uint8_t id = num_vv++; @@ -513,12 +508,12 @@ __global__ static void bilateral_filtering( if (vvv_id != v_id) { // make sure that we don't store duplicate outputs if (!linear_search(vv, vvv_id, num_vv)) { - const Vector<3, T> vvv(input_coords(vvv_id, 0), - input_coords(vvv_id, 1), - input_coords(vvv_id, 2)); + const vec3 vvv(input_coords(vvv_id, 0), + input_coords(vvv_id, 1), + input_coords(vvv_id, 2)); - T dist = dist2(vvv, vertex); + T dist = glm::distance2(vvv, vertex); if (dist <= radius) { uint32_t id = num_vv++; assert(id < maxVVSize); diff --git a/apps/Filtering/filtering_util.h b/apps/Filtering/filtering_util.h index 58915b5a..e4fe5372 100644 --- a/apps/Filtering/filtering_util.h +++ b/apps/Filtering/filtering_util.h @@ -7,15 +7,15 @@ template __device__ __inline__ T compute_sigma_c_sq( const rxmesh::VertexHandle vv[], const uint8_t num_vv, - const rxmesh::Vector<3, T>& v, + const rxmesh::vec3& v, const rxmesh::VertexAttribute& input_coords) { T sigma_c = 1e10; for (uint8_t i = 1; i < num_vv; ++i) { - const rxmesh::Vector<3, T> q(input_coords(vv[i], 0), - input_coords(vv[i], 1), - input_coords(vv[i], 2)); + const rxmesh::vec3 q(input_coords(vv[i], 0), + input_coords(vv[i], 1), + input_coords(vv[i], 2)); T len = dist2(v, q); if (len < sigma_c) { @@ -33,8 +33,8 @@ __device__ __inline__ T compute_sigma_s_sq( const rxmesh::VertexHandle& v_id, const rxmesh::VertexHandle vv[], const uint8_t num_vv, - const rxmesh::Vector<3, T>& v, - const rxmesh::Vector<3, T>& n, + const rxmesh::vec3& v, + const rxmesh::vec3& n, const rxmesh::VertexAttribute& input_coords) { @@ -42,9 +42,9 @@ __device__ __inline__ T compute_sigma_s_sq( T sum_sqs = 0; for (uint32_t i = 0; i < num_vv; ++i) { - rxmesh::Vector<3, T> q(input_coords(vv[i], 0), - input_coords(vv[i], 1), - input_coords(vv[i], 2)); + rxmesh::vec3 q(input_coords(vv[i], 0), + input_coords(vv[i], 1), + input_coords(vv[i], 2)); q -= v; T t = dot(q, n); From eb94a42bb9942d4273d37e5e8804c6b5be5905f0 Mon Sep 17 00:00:00 2001 From: ahmed Date: Mon, 3 Jun 2024 15:50:20 -0500 Subject: [PATCH 04/96] fix gaussian curvature --- .../gaussian_curvature_kernel.cuh | 25 +++++++++++-------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/apps/GaussianCurvature/gaussian_curvature_kernel.cuh b/apps/GaussianCurvature/gaussian_curvature_kernel.cuh index dcc02952..c1a265ae 100644 --- a/apps/GaussianCurvature/gaussian_curvature_kernel.cuh +++ b/apps/GaussianCurvature/gaussian_curvature_kernel.cuh @@ -2,7 +2,7 @@ #include "rxmesh/attribute.h" #include "rxmesh/context.h" #include "rxmesh/query.cuh" -#include "rxmesh/util/vector.h" + /** * gaussian_curvature() */ @@ -17,17 +17,22 @@ __global__ static void compute_gaussian_curvature( auto gc_lambda = [&](FaceHandle face_id, VertexIterator& fv) { // get the face's three vertices coordinates - Vector<3, T> c0(coords(fv[0], 0), coords(fv[0], 1), coords(fv[0], 2)); - Vector<3, T> c1(coords(fv[1], 0), coords(fv[1], 1), coords(fv[1], 2)); - Vector<3, T> c2(coords(fv[2], 0), coords(fv[2], 1), coords(fv[2], 2)); + vec3 c0(coords(fv[0], 0), coords(fv[0], 1), coords(fv[0], 2)); + vec3 c1(coords(fv[1], 0), coords(fv[1], 1), coords(fv[1], 2)); + vec3 c2(coords(fv[2], 0), coords(fv[2], 1), coords(fv[2], 2)); // the three edges length - Vector<3, T> l(dist2(c0, c1), dist2(c1, c2), dist2(c2, c0)); - T s = cross(c1 - c0, c2 - c0).norm(); - Vector<3, T> c(dot(c1 - c0, c2 - c0), - dot(c2 - c1, c0 - c1), - dot(c0 - c2, c1 - c2)); - Vector<3, T> rads(atan2(s, c[0]), atan2(s, c[1]), atan2(s, c[2])); + vec3 l(glm::distance2(c0, c1), + glm::distance2(c1, c2), + glm::distance2(c2, c0)); + + T s = glm::length(glm::cross(c1 - c0, c2 - c0)); + + vec3 c(glm::dot(c1 - c0, c2 - c0), + glm::dot(c2 - c1, c0 - c1), + glm::dot(c0 - c2, c1 - c2)); + + vec3 rads(atan2(s, c[0]), atan2(s, c[1]), atan2(s, c[2])); bool is_ob = false; for (int i = 0; i < 3; ++i) { From 8720fba7df9c64ce8a8a1c5b55b14318f2552ca5 Mon Sep 17 00:00:00 2001 From: ahmed Date: Mon, 3 Jun 2024 15:55:43 -0500 Subject: [PATCH 05/96] fix geodesic --- apps/Geodesic/geodesic_kernel.cuh | 36 +++++++++++++++---------------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/apps/Geodesic/geodesic_kernel.cuh b/apps/Geodesic/geodesic_kernel.cuh index cfae752c..3de8d5ca 100644 --- a/apps/Geodesic/geodesic_kernel.cuh +++ b/apps/Geodesic/geodesic_kernel.cuh @@ -3,7 +3,7 @@ #include "rxmesh/attribute.h" #include "rxmesh/context.h" #include "rxmesh/query.cuh" -#include "rxmesh/util/vector.h" + /** * update_step() @@ -18,11 +18,11 @@ __device__ __inline__ T update_step( const T infinity_val) { using namespace rxmesh; - const Vector<3, T> v0(coords(v0_id, 0), coords(v0_id, 1), coords(v0_id, 2)); - const Vector<3, T> v1(coords(v1_id, 0), coords(v1_id, 1), coords(v1_id, 2)); - const Vector<3, T> v2(coords(v2_id, 0), coords(v2_id, 1), coords(v2_id, 2)); - const Vector<3, T> x0 = v1 - v0; - const Vector<3, T> x1 = v2 - v0; + const vec3 v0(coords(v0_id, 0), coords(v0_id, 1), coords(v0_id, 2)); + const vec3 v1(coords(v1_id, 0), coords(v1_id, 1), coords(v1_id, 2)); + const vec3 v2(coords(v2_id, 0), coords(v2_id, 1), coords(v2_id, 2)); + const vec3 x0 = v1 - v0; + const vec3 x1 = v2 - v0; T t[2]; t[0] = geo_distance(v1_id); @@ -30,10 +30,10 @@ __device__ __inline__ T update_step( T q[2][2]; - q[0][0] = dot(x0, x0); - q[0][1] = dot(x0, x1); - q[1][0] = dot(x1, x0); - q[1][1] = dot(x1, x1); + q[0][0] = glm::dot(x0, x0); + q[0][1] = glm::dot(x0, x1); + q[1][0] = glm::dot(x1, x0); + q[1][1] = glm::dot(x1, x1); T det = q[0][0] * q[1][1] - q[0][1] * q[1][0]; @@ -50,13 +50,13 @@ __device__ __inline__ T update_step( t[1] * t[1] * Q[1][1] - 1); T p = (delta + std::sqrt(dis)) / (Q[0][0] + Q[0][1] + Q[1][0] + Q[1][1]); T tp[2]; - tp[0] = t[0] - p; - tp[1] = t[1] - p; - const Vector<3, T> n = (x0 * Q[0][0] + x1 * Q[1][0]) * tp[0] + - (x0 * Q[0][1] + x1 * Q[1][1]) * tp[1]; + tp[0] = t[0] - p; + tp[1] = t[1] - p; + const vec3 n = (x0 * Q[0][0] + x1 * Q[1][0]) * tp[0] + + (x0 * Q[0][1] + x1 * Q[1][1]) * tp[1]; T cond[2]; - cond[0] = dot(x0, n); - cond[1] = dot(x1, n); + cond[0] = glm::dot(x0, n); + cond[1] = glm::dot(x1, n); T c[2]; c[0] = cond[0] * Q[0][0] + cond[1] * Q[0][1]; @@ -65,8 +65,8 @@ __device__ __inline__ T update_step( if (t[0] == infinity_val || t[1] == infinity_val || dis < 0 || c[0] >= 0 || c[1] >= 0) { T dp[2]; - dp[0] = geo_distance(v1_id) + x0.norm(); - dp[1] = geo_distance(v2_id) + x1.norm(); + dp[0] = geo_distance(v1_id) + glm::length(x0); + dp[1] = geo_distance(v2_id) + glm::length(x1); p = dp[dp[1] < dp[0]]; } return p; From daf58b29ad14aa09c606cbf8ed5d6f6d664741f0 Mon Sep 17 00:00:00 2001 From: ahmed Date: Mon, 3 Jun 2024 20:08:40 -0500 Subject: [PATCH 06/96] fix MCF --- apps/MCF/mcf_openmesh.h | 31 ++++++++++++++++--------------- apps/MCF/mcf_rxmesh.h | 2 +- apps/MCF/mcf_rxmesh_kernel.cuh | 1 + apps/MCF/mcf_sparse_matrix.cuh | 1 - apps/Remesh/remesh_kernels.cuh | 11 ++++------- 5 files changed, 22 insertions(+), 24 deletions(-) diff --git a/apps/MCF/mcf_openmesh.h b/apps/MCF/mcf_openmesh.h index 56984251..9550d5ea 100644 --- a/apps/MCF/mcf_openmesh.h +++ b/apps/MCF/mcf_openmesh.h @@ -1,9 +1,9 @@ #pragma once #include "../common/openmesh_report.h" #include "../common/openmesh_trimesh.h" -#include "mcf_util.h" #include "rxmesh/util/timer.h" -#include "rxmesh/util/vector.h" + +#include "rxmesh/geometry_util.cuh" /** * axpy3() @@ -41,7 +41,8 @@ T dot3(const std::vector>& A, T ret = 0; int size = static_cast(A.size()); -#pragma omp parallel for schedule(static) num_threads(num_omp_threads) reduction(+ : ret) +#pragma omp parallel for schedule(static) num_threads(num_omp_threads) \ + reduction(+ : ret) for (int i = 0; i < size; ++i) { T partial = 0; for (size_t j = 0; j < A[i].size(); ++j) { @@ -73,14 +74,14 @@ T partial_voronoi_area(const int p_id, // center assert((*q_it).idx() == q_id); assert((*r_it).idx() == r_id); - const rxmesh::Vector<3, T> p( + const rxmesh::vec3 p( mesh.point(*p_it)[0], mesh.point(*p_it)[1], mesh.point(*p_it)[2]); - const rxmesh::Vector<3, T> q( + const rxmesh::vec3 q( mesh.point(*q_it)[0], mesh.point(*q_it)[1], mesh.point(*q_it)[2]); - const rxmesh::Vector<3, T> r( + const rxmesh::vec3 r( mesh.point(*r_it)[0], mesh.point(*r_it)[1], mesh.point(*r_it)[2]); - return partial_voronoi_area(p, q, r); + return rxmesh::partial_voronoi_area(p, q, r); } /** @@ -102,16 +103,16 @@ T edge_cotan_weight(const int p_id, TriMesh::VertexIter q_it = mesh.vertices_begin() + q_id; TriMesh::VertexIter s_it = mesh.vertices_begin() + s_id; - const rxmesh::Vector<3, T> p( + const rxmesh::vec3 p( mesh.point(*p_it)[0], mesh.point(*p_it)[1], mesh.point(*p_it)[2]); - const rxmesh::Vector<3, T> r( + const rxmesh::vec3 r( mesh.point(*r_it)[0], mesh.point(*r_it)[1], mesh.point(*r_it)[2]); - const rxmesh::Vector<3, T> q( + const rxmesh::vec3 q( mesh.point(*q_it)[0], mesh.point(*q_it)[1], mesh.point(*q_it)[2]); - const rxmesh::Vector<3, T> s( + const rxmesh::vec3 s( mesh.point(*s_it)[0], mesh.point(*s_it)[1], mesh.point(*s_it)[2]); - return edge_cotan_weight(p, r, q, s); + return rxmesh::edge_cotan_weight(p, r, q, s); } @@ -149,8 +150,8 @@ void mcf_matvec(TriMesh& mesh, TriMesh::VertexIter p_iter = mesh.vertices_begin() + p_id; // Off-diagonal entries - rxmesh::Vector<3, T> x(T(0)); - T sum_e_weight(0); + rxmesh::vec3 x(T(0)); + T sum_e_weight(0); // vertex weight T v_weight(0); @@ -440,7 +441,7 @@ void mcf_openmesh(const int num_omp_threads, // write output - //#pragma omp parallel for + // #pragma omp parallel for // for (int v_id = 0; v_id < int(input_mesh.n_vertices()); ++v_id) { // TriMesh::VertexIter v_iter = input_mesh.vertices_begin() + v_id; // input_mesh.point(*v_iter)[0] = smoothed_coord[v_id][0]; diff --git a/apps/MCF/mcf_rxmesh.h b/apps/MCF/mcf_rxmesh.h index 429c3348..138b5c13 100644 --- a/apps/MCF/mcf_rxmesh.h +++ b/apps/MCF/mcf_rxmesh.h @@ -7,7 +7,7 @@ #include "rxmesh/rxmesh_static.h" #include "rxmesh/util/report.h" #include "rxmesh/util/timer.h" -#include "rxmesh/util/vector.h" + #include "mcf_sparse_matrix.cuh" diff --git a/apps/MCF/mcf_rxmesh_kernel.cuh b/apps/MCF/mcf_rxmesh_kernel.cuh index 7d5b31e7..990f0277 100644 --- a/apps/MCF/mcf_rxmesh_kernel.cuh +++ b/apps/MCF/mcf_rxmesh_kernel.cuh @@ -2,6 +2,7 @@ #include "rxmesh/attribute.h" #include "rxmesh/context.h" +#include "rxmesh/geometry_util.cuh" #include "rxmesh/query.cuh" /** diff --git a/apps/MCF/mcf_sparse_matrix.cuh b/apps/MCF/mcf_sparse_matrix.cuh index c2d56c9b..bdbdbd0a 100644 --- a/apps/MCF/mcf_sparse_matrix.cuh +++ b/apps/MCF/mcf_sparse_matrix.cuh @@ -1,5 +1,4 @@ #pragma once -#include "mcf_util.h" #include "rxmesh/attribute.h" #include "rxmesh/matrix/dense_matrix.cuh" #include "rxmesh/matrix/sparse_matrix.cuh" diff --git a/apps/Remesh/remesh_kernels.cuh b/apps/Remesh/remesh_kernels.cuh index fa4a6679..8576e089 100644 --- a/apps/Remesh/remesh_kernels.cuh +++ b/apps/Remesh/remesh_kernels.cuh @@ -3,14 +3,9 @@ #include "link_condition.cuh" -#define GLM_ENABLE_EXPERIMENTAL -#include -#include - #include "rxmesh/kernels/debug.cuh" -template -using Vec3 = glm::vec<3, T, glm::defaultp>; + template __global__ static void stats_kernel(const rxmesh::Context context, @@ -261,12 +256,14 @@ __global__ static void __launch_bounds__(blockThreads) // 3 | 1 // \ | / // 2 - assert(iter.size() == 4); + if (edge_status(eh) == UNSEEN) { const VertexIterator iter = query.template get_iterator(eh.local_id()); + assert(iter.size() == 4); + const VertexHandle v0 = iter[0]; const VertexHandle v1 = iter[2]; From 3502822f6ee4515a8019827f96c721ad261e6cc9 Mon Sep 17 00:00:00 2001 From: ahmed Date: Mon, 3 Jun 2024 20:15:23 -0500 Subject: [PATCH 07/96] fix remesh --- apps/Remesh/remesh_kernels.cuh | 47 +++++++++++++++++----------------- apps/Remesh/remesh_rxmesh.cuh | 4 +-- 2 files changed, 24 insertions(+), 27 deletions(-) diff --git a/apps/Remesh/remesh_kernels.cuh b/apps/Remesh/remesh_kernels.cuh index 8576e089..bd05dab9 100644 --- a/apps/Remesh/remesh_kernels.cuh +++ b/apps/Remesh/remesh_kernels.cuh @@ -6,7 +6,6 @@ #include "rxmesh/kernels/debug.cuh" - template __global__ static void stats_kernel(const rxmesh::Context context, const rxmesh::VertexAttribute coords, @@ -20,8 +19,8 @@ __global__ static void stats_kernel(const rxmesh::Context context, ShmemAllocator shrd_alloc; auto compute_edge_len = [&](const EdgeHandle eh, const VertexIterator& ev) { - const Vec3 v0(coords(ev[0], 0), coords(ev[0], 1), coords(ev[0], 2)); - const Vec3 v1(coords(ev[1], 0), coords(ev[1], 1), coords(ev[1], 2)); + const vec3 v0(coords(ev[0], 0), coords(ev[0], 1), coords(ev[0], 2)); + const vec3 v1(coords(ev[1], 0), coords(ev[1], 1), coords(ev[1], 2)); T len = glm::distance(v0, v1); @@ -94,8 +93,8 @@ __global__ static void __launch_bounds__(blockThreads) edge_status(eh) = OKAY; return; } - const Vec3 pa(coords(va, 0), coords(va, 1), coords(va, 2)); - const Vec3 pb(coords(vb, 0), coords(vb, 1), coords(vb, 2)); + const vec3 pa(coords(va, 0), coords(va, 1), coords(va, 2)); + const vec3 pb(coords(vb, 0), coords(vb, 1), coords(vb, 2)); const T edge_len = glm::distance2(pa, pb); @@ -278,8 +277,8 @@ __global__ static void __launch_bounds__(blockThreads) v2 == v3) { return; } - const Vec3 p0(coords(v0, 0), coords(v0, 1), coords(v0, 2)); - const Vec3 p1(coords(v1, 0), coords(v1, 1), coords(v1, 2)); + const vec3 p0(coords(v0, 0), coords(v0, 1), coords(v0, 2)); + const vec3 p1(coords(v1, 0), coords(v1, 1), coords(v1, 2)); const T edge_len_sq = glm::distance2(p0, p1); if (edge_len_sq < low_edge_len_sq) { @@ -323,10 +322,10 @@ __global__ static void __launch_bounds__(blockThreads) cavity.get_vertices(src, v0, v1); - const Vec3 p0(coords(v0, 0), coords(v0, 1), coords(v0, 2)); - const Vec3 p1(coords(v1, 0), coords(v1, 1), coords(v1, 2)); + const vec3 p0(coords(v0, 0), coords(v0, 1), coords(v0, 2)); + const vec3 p1(coords(v1, 0), coords(v1, 1), coords(v1, 2)); - const Vec3 new_p((p0[0] + p1[0]) * T(0.5), + const vec3 new_p((p0[0] + p1[0]) * T(0.5), (p0[1] + p1[1]) * T(0.5), (p0[2] + p1[2]) * T(0.5)); @@ -338,7 +337,7 @@ __global__ static void __launch_bounds__(blockThreads) const VertexHandle vvv = cavity.get_cavity_vertex(c, i); - const Vec3 vp( + const vec3 vp( coords(vvv, 0), coords(vvv, 1), coords(vvv, 2)); const T edge_len_sq = glm::distance2(vp, new_p); @@ -482,8 +481,8 @@ __global__ static void __launch_bounds__(blockThreads) const VertexHandle v0(iter[0]), v1(iter[1]); - const Vec3 p0(coords(v0, 0), coords(v0, 1), coords(v0, 2)); - const Vec3 p1(coords(v1, 0), coords(v1, 1), coords(v1, 2)); + const vec3 p0(coords(v0, 0), coords(v0, 1), coords(v0, 2)); + const vec3 p1(coords(v1, 0), coords(v1, 1), coords(v1, 2)); const T edge_len_sq = glm::distance2(p0, p1); if (edge_len_sq < low_edge_len_sq) { @@ -572,10 +571,10 @@ __global__ static void __launch_bounds__(blockThreads) cavity.get_vertices(src, v0, v1); - const Vec3 p0(coords(v0, 0), coords(v0, 1), coords(v0, 2)); - const Vec3 p1(coords(v1, 0), coords(v1, 1), coords(v1, 2)); + const vec3 p0(coords(v0, 0), coords(v0, 1), coords(v0, 2)); + const vec3 p1(coords(v1, 0), coords(v1, 1), coords(v1, 2)); - const Vec3 new_p((p0[0] + p1[0]) * T(0.5), + const vec3 new_p((p0[0] + p1[0]) * T(0.5), (p0[1] + p1[1]) * T(0.5), (p0[2] + p1[2]) * T(0.5)); @@ -585,7 +584,7 @@ __global__ static void __launch_bounds__(blockThreads) for (uint16_t i = 0; i < size; ++i) { const VertexHandle vvv = cavity.get_cavity_vertex(c, i); - const Vec3 vp( + const vec3 vp( coords(vvv, 0), coords(vvv, 1), coords(vvv, 2)); const T edge_len_sq = glm::distance2(vp, new_p); @@ -1029,7 +1028,7 @@ __global__ static void __launch_bounds__(blockThreads) return; } - const Vec3 v(coords(v_id, 0), coords(v_id, 1), coords(v_id, 2)); + const vec3 v(coords(v_id, 0), coords(v_id, 1), coords(v_id, 2)); // compute both vertex normal and the new position // the new position is the average of the one-ring @@ -1040,10 +1039,10 @@ __global__ static void __launch_bounds__(blockThreads) // this is the last vertex in the one-ring (before r_id) VertexHandle q_id = iter.back(); - Vec3 q(coords(q_id, 0), coords(q_id, 1), coords(q_id, 2)); + vec3 q(coords(q_id, 0), coords(q_id, 1), coords(q_id, 2)); - Vec3 new_v(0.0, 0.0, 0.0); - Vec3 v_normal(0.0, 0.0, 0.0); + vec3 new_v(0.0, 0.0, 0.0); + vec3 v_normal(0.0, 0.0, 0.0); T w = 0.0; @@ -1051,9 +1050,9 @@ __global__ static void __launch_bounds__(blockThreads) // the current one ring vertex const VertexHandle r_id = iter[i]; - const Vec3 r(coords(r_id, 0), coords(r_id, 1), coords(r_id, 2)); + const vec3 r(coords(r_id, 0), coords(r_id, 1), coords(r_id, 2)); - Vec3 c = glm::cross(q - v, r - v); + vec3 c = glm::cross(q - v, r - v); const T area = glm::length(c) / T(2.0); w += area; @@ -1062,7 +1061,7 @@ __global__ static void __launch_bounds__(blockThreads) c = glm::normalize(c); } - const Vec3 n = c * area; + const vec3 n = c * area; v_normal += n; diff --git a/apps/Remesh/remesh_rxmesh.cuh b/apps/Remesh/remesh_rxmesh.cuh index 553e3f26..292331e8 100644 --- a/apps/Remesh/remesh_rxmesh.cuh +++ b/apps/Remesh/remesh_rxmesh.cuh @@ -1,10 +1,8 @@ #include #include "rxmesh/rxmesh_dynamic.h" - -#include "rxmesh/util/util.h" - #include "rxmesh/util/report.h" +#include "rxmesh/util/util.h" int ps_iddd = 0; From 87d18652c8234569691eeb9ee10a0557df792c3d Mon Sep 17 00:00:00 2001 From: ahmed Date: Mon, 3 Jun 2024 20:21:12 -0500 Subject: [PATCH 08/96] fix sec --- apps/ShortestEdgeCollapse/sec_kernels.cuh | 12 ++++++------ apps/ShortestEdgeCollapse/sec_rxmesh.cuh | 9 --------- 2 files changed, 6 insertions(+), 15 deletions(-) diff --git a/apps/ShortestEdgeCollapse/sec_kernels.cuh b/apps/ShortestEdgeCollapse/sec_kernels.cuh index b93154a3..d6f15ad9 100644 --- a/apps/ShortestEdgeCollapse/sec_kernels.cuh +++ b/apps/ShortestEdgeCollapse/sec_kernels.cuh @@ -48,8 +48,8 @@ __global__ static void sec(rxmesh::Context context, const VertexHandle v0 = iter[0]; const VertexHandle v1 = iter[1]; - const Vec3 p0(coords(v0, 0), coords(v0, 1), coords(v0, 2)); - const Vec3 p1(coords(v1, 0), coords(v1, 1), coords(v1, 2)); + const vec3 p0(coords(v0, 0), coords(v0, 1), coords(v0, 2)); + const vec3 p1(coords(v1, 0), coords(v1, 1), coords(v1, 2)); T len2 = logf(glm::distance2(p0, p1)); @@ -170,8 +170,8 @@ __global__ static void compute_min_max_cost( const VertexHandle v0 = iter[0]; const VertexHandle v1 = iter[1]; - const Vec3 p0(coords(v0, 0), coords(v0, 1), coords(v0, 2)); - const Vec3 p1(coords(v1, 0), coords(v1, 1), coords(v1, 2)); + const vec3 p0(coords(v0, 0), coords(v0, 1), coords(v0, 2)); + const vec3 p1(coords(v1, 0), coords(v1, 1), coords(v1, 2)); T len2 = logf(glm::distance2(p0, p1)); @@ -198,8 +198,8 @@ __global__ static void populate_histogram( const VertexHandle v0 = iter[0]; const VertexHandle v1 = iter[1]; - const Vec3 p0(coords(v0, 0), coords(v0, 1), coords(v0, 2)); - const Vec3 p1(coords(v1, 0), coords(v1, 1), coords(v1, 2)); + const vec3 p0(coords(v0, 0), coords(v0, 1), coords(v0, 2)); + const vec3 p1(coords(v1, 0), coords(v1, 1), coords(v1, 2)); T len2 = logf(glm::distance2(p0, p1)); diff --git a/apps/ShortestEdgeCollapse/sec_rxmesh.cuh b/apps/ShortestEdgeCollapse/sec_rxmesh.cuh index 6bb5368c..a241797c 100644 --- a/apps/ShortestEdgeCollapse/sec_rxmesh.cuh +++ b/apps/ShortestEdgeCollapse/sec_rxmesh.cuh @@ -1,19 +1,10 @@ #pragma once -#define GLM_ENABLE_EXPERIMENTAL -#include -#include - - #include "rxmesh/query.cuh" #include "rxmesh/rxmesh_dynamic.h" -template -using Vec3 = glm::vec<3, T, glm::defaultp>; - #include "histogram.cuh" #include "sec_kernels.cuh" - #include "rxmesh/util/report.h" inline void sec_rxmesh(rxmesh::RXMeshDynamic& rx, From 0800ca47e8c8fe5144803a47b8126c0eaca1b9ce Mon Sep 17 00:00:00 2001 From: ahmed Date: Mon, 3 Jun 2024 20:42:49 -0500 Subject: [PATCH 09/96] fix surface tracking --- apps/SurfaceTracking/collapser.cuh | 37 +++++++---------- apps/SurfaceTracking/flipper.cuh | 33 ++++++--------- apps/SurfaceTracking/noise.h | 42 +++++++++---------- apps/SurfaceTracking/smoother.cuh | 24 ++++------- apps/SurfaceTracking/splitter.cuh | 17 +++----- apps/SurfaceTracking/tracking.cu | 2 +- apps/SurfaceTracking/tracking_kernels.cuh | 50 ++++++++++------------- apps/SurfaceTracking/tracking_rxmesh.cuh | 6 --- include/rxmesh/geometry_factory.h | 4 +- 9 files changed, 86 insertions(+), 129 deletions(-) diff --git a/apps/SurfaceTracking/collapser.cuh b/apps/SurfaceTracking/collapser.cuh index e63e2379..1589888a 100644 --- a/apps/SurfaceTracking/collapser.cuh +++ b/apps/SurfaceTracking/collapser.cuh @@ -1,12 +1,5 @@ #pragma once -#define GLM_ENABLE_EXPERIMENTAL -#include -#include - -template -using Vec3 = glm::vec<3, T, glm::defaultp>; - #include #include "rxmesh/cavity_manager.cuh" @@ -45,13 +38,13 @@ __global__ static void __launch_bounds__(blockThreads) auto new_vertex_position = [&](VertexHandle& v_to_keep, VertexHandle& v_to_delete) { - Vec3 new_p; + vec3 new_p; - const Vec3 p_keep(position(v_to_keep, 0), + const vec3 p_keep(position(v_to_keep, 0), position(v_to_keep, 1), position(v_to_keep, 2)); - const Vec3 p_delete(position(v_to_delete, 0), + const vec3 p_delete(position(v_to_delete, 0), position(v_to_delete, 1), position(v_to_delete, 2)); @@ -114,10 +107,10 @@ __global__ static void __launch_bounds__(blockThreads) is_vertex_bd(ah) == 0 && is_vertex_bd(bh) == 0) { // vertices position - const Vec3 va(position(ah, 0), position(ah, 1), position(ah, 2)); - const Vec3 vb(position(bh, 0), position(bh, 1), position(bh, 2)); - const Vec3 vc(position(ch, 0), position(ch, 1), position(ch, 2)); - const Vec3 vd(position(dh, 0), position(dh, 1), position(dh, 2)); + const vec3 va(position(ah, 0), position(ah, 1), position(ah, 2)); + const vec3 vb(position(bh, 0), position(bh, 1), position(bh, 2)); + const vec3 vc(position(ch, 0), position(ch, 1), position(ch, 2)); + const vec3 vd(position(dh, 0), position(dh, 1), position(dh, 2)); bool should_it = true; @@ -195,10 +188,10 @@ __global__ static void __launch_bounds__(blockThreads) cavity.get_vertices(src, v0, v1); // decide on new vertex position - Vec3 new_p = new_vertex_position(v0, v1); + vec3 new_p = new_vertex_position(v0, v1); - const Vec3 p0(position(v0, 0), position(v0, 1), position(v0, 2)); - const Vec3 p1(position(v1, 0), position(v1, 1), position(v1, 2)); + const vec3 p0(position(v0, 0), position(v0, 1), position(v0, 2)); + const vec3 p1(position(v1, 0), position(v1, 1), position(v1, 2)); // check if the new triangles will be bad i.e., will have normal // inversion, will have tiny area, will have bad angles @@ -214,16 +207,16 @@ __global__ static void __launch_bounds__(blockThreads) const VertexHandle vi = cavity.get_cavity_vertex(c, i); const VertexHandle vj = cavity.get_cavity_vertex(c, j); - const Vec3 pi( + const vec3 pi( position(vi, 0), position(vi, 1), position(vi, 2)); - const Vec3 pj( + const vec3 pj( position(vj, 0), position(vj, 1), position(vj, 2)); // the new triangle will be pi-pj-new_p - const Vec3 n_new = tri_normal(pi, pj, new_p); - const Vec3 n_0 = tri_normal(pi, pj, p0); - const Vec3 n_1 = tri_normal(pi, pj, p1); + const vec3 n_new = tri_normal(pi, pj, new_p); + const vec3 n_0 = tri_normal(pi, pj, p0); + const vec3 n_1 = tri_normal(pi, pj, p1); const T area_new = tri_area(pi, pj, new_p); diff --git a/apps/SurfaceTracking/flipper.cuh b/apps/SurfaceTracking/flipper.cuh index c8c92928..560b2ed9 100644 --- a/apps/SurfaceTracking/flipper.cuh +++ b/apps/SurfaceTracking/flipper.cuh @@ -1,12 +1,5 @@ #pragma once -#define GLM_ENABLE_EXPERIMENTAL -#include -#include - -template -using Vec3 = glm::vec<3, T, glm::defaultp>; - #include #include "rxmesh/cavity_manager.cuh" @@ -39,11 +32,11 @@ __global__ static void __launch_bounds__(blockThreads) } - const Vec3 v(position(vh, 0), position(vh, 1), position(vh, 2)); + const vec3 v(position(vh, 0), position(vh, 1), position(vh, 2)); VertexHandle qh = iter.back(); - Vec3 q(position(qh, 0), position(qh, 1), position(qh, 2)); + vec3 q(position(qh, 0), position(qh, 1), position(qh, 2)); Eigen::Matrix A; A << 0, 0, 0, 0, 0, 0, 0, 0, 0; @@ -54,14 +47,14 @@ __global__ static void __launch_bounds__(blockThreads) const VertexHandle rh = iter[i]; - const Vec3 r(position(rh, 0), position(rh, 1), position(rh, 2)); + const vec3 r(position(rh, 0), position(rh, 1), position(rh, 2)); // triangle normal - const Vec3 c = glm::cross(q - v, r - v); + const vec3 c = glm::cross(q - v, r - v); assert(glm::length(c) > std::numeric_limits::min()); - const Vec3 n = glm::normalize(c); + const vec3 n = glm::normalize(c); // triangle area const T area = T(0.5) * glm::length(c); @@ -185,16 +178,16 @@ __global__ static void __launch_bounds__(blockThreads) } // vertices position - const Vec3 va( + const vec3 va( position(ah, 0), position(ah, 1), position(ah, 2)); - const Vec3 vb( + const vec3 vb( position(bh, 0), position(bh, 1), position(bh, 2)); - const Vec3 vc( + const vec3 vc( position(ch, 0), position(ch, 1), position(ch, 2)); - const Vec3 vd( + const vec3 vd( position(dh, 0), position(dh, 1), position(dh, 2)); // change in length i.e., delaunay check @@ -222,12 +215,12 @@ __global__ static void __launch_bounds__(blockThreads) // they agree after flipping if (flip_it) { // old triangles normals - const Vec3 n0 = tri_normal(va, vb, vc); - const Vec3 n1 = tri_normal(va, vd, vb); + const vec3 n0 = tri_normal(va, vb, vc); + const vec3 n1 = tri_normal(va, vd, vb); // new triangles normals - const Vec3 n2 = tri_normal(vc, vd, vb); - const Vec3 n3 = tri_normal(vc, va, vd); + const vec3 n2 = tri_normal(vc, vd, vb); + const vec3 n3 = tri_normal(vc, va, vd); if (glm::dot(n0, n1) > T(0)) { if (glm::dot(n2, n3) < T(0)) { diff --git a/apps/SurfaceTracking/noise.h b/apps/SurfaceTracking/noise.h index ce0cb613..2a392142 100644 --- a/apps/SurfaceTracking/noise.h +++ b/apps/SurfaceTracking/noise.h @@ -1,11 +1,6 @@ #pragma once -#define GLM_ENABLE_EXPERIMENTAL -#include -#include - -template -using Vec3 = glm::vec<3, T, glm::defaultp>; +#include "rxmesh/types.h" template constexpr __host__ __device__ inline S lerp(const S& value0, @@ -73,9 +68,9 @@ constexpr inline T randhash(unsigned int seed, T a, T b) template -Vec3 sample_sphere(unsigned int& seed) +rxmesh::vec3 sample_sphere(unsigned int& seed) { - Vec3 v; + rxmesh::vec3 v; T m2; @@ -109,10 +104,10 @@ struct FlowNoise3 } using namespace rxmesh; - CUDA_ERROR(cudaMalloc((void**)&d_basis, n * sizeof(Vec3))); + CUDA_ERROR(cudaMalloc((void**)&d_basis, n * sizeof(vec3))); CUDA_ERROR(cudaMalloc((void**)&d_perm, n * sizeof(int))); CUDA_ERROR(cudaMemcpy( - d_basis, h_basis, n * sizeof(Vec3), cudaMemcpyHostToDevice)); + d_basis, h_basis, n * sizeof(vec3), cudaMemcpyHostToDevice)); CUDA_ERROR(cudaMemcpy( d_perm, h_perm, n * sizeof(int), cudaMemcpyHostToDevice)); } @@ -135,14 +130,14 @@ struct FlowNoise3 int j = (int)floory; int k = (int)floorz; - const Vec3& n000 = basis(hash_index(i, j, k)); - const Vec3& n100 = basis(hash_index(i + 1, j, k)); - const Vec3& n010 = basis(hash_index(i, j + 1, k)); - const Vec3& n110 = basis(hash_index(i + 1, j + 1, k)); - const Vec3& n001 = basis(hash_index(i, j, k + 1)); - const Vec3& n101 = basis(hash_index(i + 1, j, k + 1)); - const Vec3& n011 = basis(hash_index(i, j + 1, k + 1)); - const Vec3& n111 = basis(hash_index(i + 1, j + 1, k + 1)); + const rxmesh::vec3& n000 = basis(hash_index(i, j, k)); + const rxmesh::vec3& n100 = basis(hash_index(i + 1, j, k)); + const rxmesh::vec3& n010 = basis(hash_index(i, j + 1, k)); + const rxmesh::vec3& n110 = basis(hash_index(i + 1, j + 1, k)); + const rxmesh::vec3& n001 = basis(hash_index(i, j, k + 1)); + const rxmesh::vec3& n101 = basis(hash_index(i + 1, j, k + 1)); + const rxmesh::vec3& n011 = basis(hash_index(i, j + 1, k + 1)); + const rxmesh::vec3& n111 = basis(hash_index(i + 1, j + 1, k + 1)); T fx = x - floorx, fy = y - floory, fz = z - floorz; T sx = fx * fx * fx * (10 - fx * (15 - fx * 6)), @@ -175,7 +170,8 @@ struct FlowNoise3 return perm((perm((perm(i % n) + j) % n) + k) % n); } - constexpr __device__ __host__ const Vec3& basis(unsigned int h) const + constexpr __device__ __host__ const rxmesh::vec3& basis( + unsigned int h) const { assert(h < n); #ifdef __CUDA_ARCH__ @@ -195,8 +191,8 @@ struct FlowNoise3 #endif } - Vec3 h_basis[n]; - Vec3* d_basis; - int h_perm[n]; - int* d_perm; + rxmesh::vec3 h_basis[n]; + rxmesh::vec3* d_basis; + int h_perm[n]; + int* d_perm; }; diff --git a/apps/SurfaceTracking/smoother.cuh b/apps/SurfaceTracking/smoother.cuh index f4cc011a..b717c974 100644 --- a/apps/SurfaceTracking/smoother.cuh +++ b/apps/SurfaceTracking/smoother.cuh @@ -1,11 +1,5 @@ #pragma once -#define GLM_ENABLE_EXPERIMENTAL -#include -#include - -template -using Vec3 = glm::vec<3, T, glm::defaultp>; #include @@ -25,7 +19,7 @@ __global__ static void __launch_bounds__(blockThreads) auto block = cooperative_groups::this_thread_block(); auto smooth = [&](VertexHandle vh, VertexIterator& iter) { - const Vec3 v(current_position(vh, 0), + const vec3 v(current_position(vh, 0), current_position(vh, 1), current_position(vh, 2)); @@ -38,7 +32,7 @@ __global__ static void __launch_bounds__(blockThreads) VertexHandle qh = iter.back(); - Vec3 q(current_position(qh, 0), + vec3 q(current_position(qh, 0), current_position(qh, 1), current_position(qh, 2)); @@ -52,16 +46,16 @@ __global__ static void __launch_bounds__(blockThreads) const VertexHandle rh = iter[i]; - const Vec3 r(current_position(rh, 0), + const vec3 r(current_position(rh, 0), current_position(rh, 1), current_position(rh, 2)); // triangle normal - const Vec3 c = glm::cross(q - v, r - v); + const vec3 c = glm::cross(q - v, r - v); assert(glm::length(c) >= std::numeric_limits::min()); - const Vec3 n = glm::normalize(c); + const vec3 n = glm::normalize(c); // triangle area const T area = T(0.5) * glm::length(c); @@ -115,7 +109,7 @@ __global__ static void __launch_bounds__(blockThreads) // displacement VertexHandle ph = iter.back(); - Vec3 p(current_position(ph, 0), + vec3 p(current_position(ph, 0), current_position(ph, 1), current_position(ph, 2)); @@ -123,12 +117,12 @@ __global__ static void __launch_bounds__(blockThreads) const VertexHandle rh = iter[i]; - const Vec3 r(current_position(rh, 0), + const vec3 r(current_position(rh, 0), current_position(rh, 1), current_position(rh, 2)); // triangle normal - const Vec3 c = glm::cross(p - v, r - v); + const vec3 c = glm::cross(p - v, r - v); assert(glm::length(c) >= std::numeric_limits::min()); @@ -138,7 +132,7 @@ __global__ static void __launch_bounds__(blockThreads) // centriod constexpr T third = T(1) / T(3); - Vec3 center = (third * (v + p + r)) - v; + vec3 center = (third * (v + p + r)) - v; sum_areas += area; diff --git a/apps/SurfaceTracking/splitter.cuh b/apps/SurfaceTracking/splitter.cuh index 20bfdc4c..49805781 100644 --- a/apps/SurfaceTracking/splitter.cuh +++ b/apps/SurfaceTracking/splitter.cuh @@ -1,12 +1,5 @@ #pragma once -#define GLM_ENABLE_EXPERIMENTAL -#include -#include - -template -using Vec3 = glm::vec<3, T, glm::defaultp>; - #include "rxmesh/cavity_manager.cuh" #include "rxmesh/query.cuh" @@ -87,16 +80,16 @@ __global__ static void __launch_bounds__(blockThreads) } // vertices position - const Vec3 va( + const vec3 va( position(ah, 0), position(ah, 1), position(ah, 2)); - const Vec3 vb( + const vec3 vb( position(bh, 0), position(bh, 1), position(bh, 2)); - const Vec3 vc( + const vec3 vc( position(ch, 0), position(ch, 1), position(ch, 2)); - const Vec3 vd( + const vec3 vd( position(dh, 0), position(dh, 1), position(dh, 2)); @@ -131,7 +124,7 @@ __global__ static void __launch_bounds__(blockThreads) // Check angles of new triangles if (split_it) { // mid point (new) vertex - const Vec3 ve = T(0.5) * (va + vb); + const vec3 ve = T(0.5) * (va + vb); // current min and max angles T cur_min1, cur_min2, cur_max1, cur_max2; diff --git a/apps/SurfaceTracking/tracking.cu b/apps/SurfaceTracking/tracking.cu index 95770c98..0242df0b 100644 --- a/apps/SurfaceTracking/tracking.cu +++ b/apps/SurfaceTracking/tracking.cu @@ -46,7 +46,7 @@ TEST(Apps, SurfaceTracking) std::vector> fv; - const Vector<3, float> lower_corner(-3.0, 0.0, -3.0); + const vec3 lower_corner(-3.0, 0.0, -3.0); Arg.plane_name = "plane" + std::to_string(Arg.n) + "x" + std::to_string(Arg.n); diff --git a/apps/SurfaceTracking/tracking_kernels.cuh b/apps/SurfaceTracking/tracking_kernels.cuh index be6eab45..1c9d6141 100644 --- a/apps/SurfaceTracking/tracking_kernels.cuh +++ b/apps/SurfaceTracking/tracking_kernels.cuh @@ -1,12 +1,6 @@ #pragma once #include "rxmesh/rxmesh_dynamic.h" -#define GLM_ENABLE_EXPERIMENTAL -#include -#include -template -using Vec3 = glm::vec<3, T, glm::defaultp>; - #include "rxmesh/query.cuh" template @@ -17,29 +11,29 @@ noise_gen(const FlowNoise3& noise, T x, T y, T z) } template -__inline__ __device__ Vec3 potential(const FlowNoise3& noise, - T x, - T y, - T z) +__inline__ __device__ rxmesh::vec3 potential(const FlowNoise3& noise, + T x, + T y, + T z) { constexpr T height_factor = 0.5; - const Vec3 centre(0.0, 1.0, 0.0); - const T radius = 4.0; + const rxmesh::vec3 centre(0.0, 1.0, 0.0); + const T radius = 4.0; T sx = x / noise.noise_lengthscale; T sy = y / noise.noise_lengthscale; T sz = z / noise.noise_lengthscale; - Vec3 psi_i(0.f, 0.f, noise_gen(noise, sx, sy, sz)); + rxmesh::vec3 psi_i(0.f, 0.f, noise_gen(noise, sx, sy, sz)); - T dist = glm::length(Vec3(x, y, z) - centre); + T dist = glm::length(rxmesh::vec3(x, y, z) - centre); T scale = std::max((radius - dist) / radius, T(0.0)); psi_i *= scale; - Vec3 psi(0, 0, 0); + rxmesh::vec3 psi(0, 0, 0); psi += height_factor * noise.noise_gain * psi_i; @@ -47,9 +41,9 @@ __inline__ __device__ Vec3 potential(const FlowNoise3& noise, } template -__inline__ __device__ void get_velocity(const FlowNoise3& noise, - const Vec3& x, - Vec3& v) +__inline__ __device__ void get_velocity(const FlowNoise3& noise, + const rxmesh::vec3& x, + rxmesh::vec3& v) { const T delta_x = noise.delta_x; @@ -80,24 +74,24 @@ void curl_noise_predicate_new_position(rxmesh::RXMeshDynamic& rx, using namespace rxmesh; rx.for_each_vertex(DEVICE, [=] __device__(const VertexHandle vh) { - const Vec3 p(position(vh, 0), position(vh, 1), position(vh, 2)); + const vec3 p(position(vh, 0), position(vh, 1), position(vh, 2)); - Vec3 v; + vec3 v; get_velocity(noise, p, v); - Vec3 k1 = adaptive_dt * v; + vec3 k1 = adaptive_dt * v; get_velocity(noise, p + T(0.5) * k1, v); - Vec3 k2 = adaptive_dt * v; + vec3 k2 = adaptive_dt * v; get_velocity(noise, p + T(0.5) * k2, v); - Vec3 k3 = adaptive_dt * v; + vec3 k3 = adaptive_dt * v; get_velocity(noise, p + T(0.5) * k3, v); - Vec3 k4 = adaptive_dt * v; + vec3 k4 = adaptive_dt * v; - const Vec3 new_p = + const vec3 new_p = p + T(1.0) / T(6.0) * (k1 + k4) + T(1.0) / T(3.0) * (k2 + k3); position(vh, 0) = new_p[0]; @@ -117,10 +111,10 @@ __global__ static void __launch_bounds__(blockThreads) auto block = cooperative_groups::this_thread_block(); auto len = [&](EdgeHandle eh, VertexIterator& iter) { - const Vec3 v0( + const vec3 v0( position(iter[0], 0), position(iter[0], 1), position(iter[0], 2)); - const Vec3 v1( + const vec3 v1( position(iter[1], 0), position(iter[1], 1), position(iter[1], 2)); ::atomicAdd(d_sum_edge_len, glm::distance(v0, v1)); @@ -164,7 +158,7 @@ __global__ static void __launch_bounds__(blockThreads) is_vertex_bd(iter[1]) = 1; } }; - + query.dispatch(block, shrd_alloc, bd_vertices); } diff --git a/apps/SurfaceTracking/tracking_rxmesh.cuh b/apps/SurfaceTracking/tracking_rxmesh.cuh index 6673b5bb..75e7f218 100644 --- a/apps/SurfaceTracking/tracking_rxmesh.cuh +++ b/apps/SurfaceTracking/tracking_rxmesh.cuh @@ -2,18 +2,12 @@ #define G_EIGENVALUE_RANK_RATIO 0.03 -#define GLM_ENABLE_EXPERIMENTAL -#include -#include - #include "frame_stepper.h" #include "rxmesh/rxmesh_dynamic.h" #include "simulation.h" #include "rxmesh/util/report.h" -template -using Vec3 = glm::vec<3, T, glm::defaultp>; using EdgeStatus = int8_t; enum : EdgeStatus diff --git a/include/rxmesh/geometry_factory.h b/include/rxmesh/geometry_factory.h index 5f38edc2..6e5fb3e6 100644 --- a/include/rxmesh/geometry_factory.h +++ b/include/rxmesh/geometry_factory.h @@ -1,8 +1,8 @@ #pragma once #include #include +#include "rxmesh/types.h" #include "rxmesh/util/macros.h" -#include "rxmesh/util/vector.h" namespace rxmesh { @@ -15,7 +15,7 @@ void create_plane(std::vector>& verts, uint32_t nx, uint32_t ny, T dx = 1.0, - const Vector<3, T> low_corner = {0, 0, 0}) + const vec3 low_corner = {0, 0, 0}) { verts.clear(); tris.clear(); From 291f0c79936379d920372b003e3953a426e4fee3 Mon Sep 17 00:00:00 2001 From: ahmed Date: Mon, 3 Jun 2024 20:46:02 -0500 Subject: [PATCH 10/96] fix vertex normal --- apps/VertexNormal/vertex_normal_kernel.cuh | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/apps/VertexNormal/vertex_normal_kernel.cuh b/apps/VertexNormal/vertex_normal_kernel.cuh index 28b45cb8..01bf77c1 100644 --- a/apps/VertexNormal/vertex_normal_kernel.cuh +++ b/apps/VertexNormal/vertex_normal_kernel.cuh @@ -3,7 +3,7 @@ #include "rxmesh/attribute.h" #include "rxmesh/context.h" #include "rxmesh/query.cuh" -#include "rxmesh/util/vector.h" + /** * vertex_normal() */ @@ -15,15 +15,17 @@ __global__ static void compute_vertex_normal(const rxmesh::Context context, using namespace rxmesh; auto vn_lambda = [&](FaceHandle face_id, VertexIterator& fv) { // get the face's three vertices coordinates - Vector<3, T> c0(coords(fv[0], 0), coords(fv[0], 1), coords(fv[0], 2)); - Vector<3, T> c1(coords(fv[1], 0), coords(fv[1], 1), coords(fv[1], 2)); - Vector<3, T> c2(coords(fv[2], 0), coords(fv[2], 1), coords(fv[2], 2)); + vec3 c0(coords(fv[0], 0), coords(fv[0], 1), coords(fv[0], 2)); + vec3 c1(coords(fv[1], 0), coords(fv[1], 1), coords(fv[1], 2)); + vec3 c2(coords(fv[2], 0), coords(fv[2], 1), coords(fv[2], 2)); // compute the face normal - Vector<3, T> n = cross(c1 - c0, c2 - c0); + vec3 n = cross(c1 - c0, c2 - c0); // the three edges length - Vector<3, T> l(dist2(c0, c1), dist2(c1, c2), dist2(c2, c0)); + vec3 l(glm::distance2(c0, c1), + glm::distance2(c1, c2), + glm::distance2(c2, c0)); // add the face's normal to its vertices for (uint32_t v = 0; v < 3; ++v) { // for every vertex in this face From 178b7af383610edd267a9253f787ed8c7ef5c86e Mon Sep 17 00:00:00 2001 From: ahmed Date: Mon, 3 Jun 2024 21:22:37 -0500 Subject: [PATCH 11/96] fix xpbd --- apps/Delaunay/delaunay_rxmesh.cuh | 6 -- apps/Remesh/remesh_rxmesh.cuh | 7 -- apps/ShortestEdgeCollapse/sec_rxmesh.cuh | 7 -- apps/SurfaceTracking/tracking_rxmesh.cuh | 3 - apps/XPBD/remesh.cuh | 82 ++++++++++-------- apps/XPBD/svd.cuh | 14 +-- apps/XPBD/xpbd.cu | 105 ++++++++++++----------- include/rxmesh/rxmesh_static.h | 6 +- 8 files changed, 104 insertions(+), 126 deletions(-) diff --git a/apps/Delaunay/delaunay_rxmesh.cuh b/apps/Delaunay/delaunay_rxmesh.cuh index ae1376d2..113691d7 100644 --- a/apps/Delaunay/delaunay_rxmesh.cuh +++ b/apps/Delaunay/delaunay_rxmesh.cuh @@ -269,12 +269,6 @@ inline void delaunay_rxmesh(rxmesh::RXMeshDynamic& rx, bool with_verify = true) const uint32_t num_edges = rx.get_num_edges(); const uint32_t num_faces = rx.get_num_faces(); -#if USE_POLYSCOPE - rx.render_vertex_patch(); - rx.render_edge_patch(); - rx.render_face_patch(); - // polyscope::show(); -#endif MCFData mcf_data_before = mcf_rxmesh_cg(rx, false); report.add_member("mcf_before_time", mcf_data_before.total_time); diff --git a/apps/Remesh/remesh_rxmesh.cuh b/apps/Remesh/remesh_rxmesh.cuh index 292331e8..d5fb238a 100644 --- a/apps/Remesh/remesh_rxmesh.cuh +++ b/apps/Remesh/remesh_rxmesh.cuh @@ -705,13 +705,6 @@ inline void remesh_rxmesh(rxmesh::RXMeshDynamic& rx) report.model_data(Arg.obj_file_name + "_before", rx, "model_before"); report.add_member("method", std::string("RXMesh")); -#if USE_POLYSCOPE - rx.render_vertex_patch(); - rx.render_edge_patch(); - rx.render_face_patch(); - rx.get_polyscope_mesh(); - // polyscope::show(); -#endif auto coords = rx.get_input_vertex_coordinates(); auto new_coords = rx.add_vertex_attribute("newCoords", 3); diff --git a/apps/ShortestEdgeCollapse/sec_rxmesh.cuh b/apps/ShortestEdgeCollapse/sec_rxmesh.cuh index a241797c..6a02cdc7 100644 --- a/apps/ShortestEdgeCollapse/sec_rxmesh.cuh +++ b/apps/ShortestEdgeCollapse/sec_rxmesh.cuh @@ -47,13 +47,6 @@ inline void sec_rxmesh(rxmesh::RXMeshDynamic& rx, uint32_t max_num_registers_per_thread = 0; uint32_t max_num_blocks = 0; -#if USE_POLYSCOPE - rx.render_vertex_patch(); - rx.render_edge_patch(); - rx.render_face_patch(); - // polyscope::show(); -#endif - bool validate = false; int num_passes = 0; diff --git a/apps/SurfaceTracking/tracking_rxmesh.cuh b/apps/SurfaceTracking/tracking_rxmesh.cuh index 75e7f218..b708e8af 100644 --- a/apps/SurfaceTracking/tracking_rxmesh.cuh +++ b/apps/SurfaceTracking/tracking_rxmesh.cuh @@ -53,9 +53,6 @@ void update_polyscope(rxmesh::RXMeshDynamic& rx, ps_mesh->setEdgeWidth(1.0); ps_mesh->setEnabled(true); - rx.render_vertex_patch(); - rx.render_edge_patch(); - rx.render_face_patch(); polyscope::show(); ps_mesh->setEnabled(false); #endif diff --git a/apps/XPBD/remesh.cuh b/apps/XPBD/remesh.cuh index db1c3531..e841ed99 100644 --- a/apps/XPBD/remesh.cuh +++ b/apps/XPBD/remesh.cuh @@ -1,22 +1,21 @@ #pragma once #include "rxmesh/query.cuh" -#include "rxmesh/util/vector.h" #include "svd.cuh" template -__inline__ __device__ vec3 normal(const vec3& x0, - const vec3& x1, - const vec3& x2) +__inline__ __device__ rxmesh::vec3 normal(const vec3& x0, + const vec3& x1, + const vec3& x2) { return glm::normalize(glm::cross(x1 - x0, x2 - x0)); } template -__inline__ __device__ mat3x3 local_base(const vec3& n) +__inline__ __device__ rxmesh::mat3x3 local_base(const vec3& n) { - vec3 u = + rxmesh::vec3 u = (glm::dot(n, vec3(1, 0, 0)) > glm::dot(n, vec3(0, 0, 1))) ? vec3(0, 0, 1) : vec3(1, 0, 0); @@ -30,25 +29,27 @@ __inline__ __device__ mat3x3 local_base(const vec3& n) template -__inline__ __device__ vec2 perp(const vec2& u) +__inline__ __device__ rxmesh::vec2 perp(const vec2& u) { return vec2(-u[1], u[0]); } template -__inline__ __device__ mat2x2 perp(const mat2x2& A) +__inline__ __device__ rxmesh::mat2x2 perp(const mat2x2& A) { - return mat2x2(vec2(A(1, 1), -A(1, 0)), vec2(-A(0, 1), A(0, 0))); + return rxmesh::mat2x2(vec2(A(1, 1), -A(1, 0)), + vec2(-A(0, 1), A(0, 0))); } template -__inline__ __device__ mat2x2 projected_curvature(const vec3& m0, - const vec3& m1, - const vec3& m2, - const mat2x3& base, - const T& area) +__inline__ __device__ rxmesh::mat2x2 projected_curvature( + const rxmesh::vec3& m0, + const rxmesh::vec3& m1, + const rxmesh::vec3& m2, + const rxmesh::mat2x3& base, + const T& area) { - mat2x2 S; + rxmesh::mat2x2 S; for (int e = 0; e < 3; e++) { vec2 e_mat; @@ -82,43 +83,45 @@ __inline__ __device__ mat3x3 derivative(const vec3& w0, const vec3& dz, const mat3x3& invDm) { - return mat3x3(w1 - w0, w2 - w0, dz) * invDm; + return rxmesh::mat3x3(w1 - w0, w2 - w0, dz) * invDm; } template -__inline__ __device__ mat3x3 deformation_gradient(const vec3& v0, - const vec3& v1, - const vec3& v2, - const vec3& n, - const mat3x3& invDm, - const mat3x3& Sp_str) +__inline__ __device__ rxmesh::mat3x3 deformation_gradient( + const rxmesh::vec3& v0, + const rxmesh::vec3& v1, + const rxmesh::vec3& v2, + const rxmesh::vec3& n, + const rxmesh::mat3x3& invDm, + const rxmesh::mat3x3& Sp_str) { return derivative(v0, v1, v2, n, invDm) * Sp_str; } template -__inline__ __device__ mat2x2 compression_metric(const vec3& w0, - const vec3& w1, - const vec3& w2, - const vec3& n, - const mat3x3& invDm, - const mat3x3& Sp_str, - const mat3x3& S2, - const mat3x2& UV, - const T c) +__inline__ __device__ rxmesh::mat2x2 compression_metric( + const rxmesh::vec3& w0, + const rxmesh::vec3& w1, + const rxmesh::vec3& w2, + const rxmesh::vec3& n, + const rxmesh::mat3x3& invDm, + const rxmesh::mat3x3& Sp_str, + const rxmesh::mat3x3& S2, + const rxmesh::mat3x2& UV, + const T c) { - mat3x3 F = deformation_gradient(w0, w1, w2, n, invDm, Sp_str); - mat3x3 G = glm::transpose(F) * F - mat3x3(1); - mat2x2 e = glm::transpose(UV) * G * UV; - mat2x2 e2 = glm::transpose(UV) * glm::transpose(G) * G * UV; - mat2x2 Sw2 = glm::transpose(UV) * S2 * UV; - mat2x2 D = e2 - 4.0 * c * c * perp(Sw2); + rxmesh::mat3x3 F = deformation_gradient(w0, w1, w2, n, invDm, Sp_str); + rxmesh::mat3x3 G = glm::transpose(F) * F - mat3x3(1); + rxmesh::mat2x2 e = glm::transpose(UV) * G * UV; + rxmesh::mat2x2 e2 = glm::transpose(UV) * glm::transpose(G) * G * UV; + rxmesh::mat2x2 Sw2 = glm::transpose(UV) * S2 * UV; + rxmesh::mat2x2 D = e2 - 4.0 * c * c * perp(Sw2); // TODO // https://github.com/taichi-dev/taichi/blob/master/python/taichi/_funcs.py // return get_positive(-e + sqrt(D)) / (2.0 * sq(c)); - return mat2x2(0); + return rxmesh::mat2x2(0); } template @@ -134,6 +137,7 @@ void __global__ compute_face_sizing( const T refine_velocity, const T refine_compression) { + using namespace rxmesh; auto calc_sizing = [&](const FaceHandle& fh, const VertexIterator& iter) { const VertexHandle v0 = iter[0]; @@ -247,6 +251,8 @@ void __global__ compute_vertex_normal(const Context context, VertexAttribute w_coord, VertexAttribute v_normal) { + using namespace rxmesh; + // TODO make sure to init v_normal with zero auto calc_vn = [&](const FaceHandle& f, const VertexIterator& v) { diff --git a/apps/XPBD/svd.cuh b/apps/XPBD/svd.cuh index a7138d3c..8a9bd6f0 100644 --- a/apps/XPBD/svd.cuh +++ b/apps/XPBD/svd.cuh @@ -1,19 +1,11 @@ #pragma once -#define GLM_ENABLE_EXPERIMENTAL -#include -#include - -template -using vec3 = glm::vec<3, T, glm::defaultp>; - -template -using mat3x3 = glm::mat<3, 3, T, glm::defaultp>; +#include "rxmesh/types.h" template struct SVD { - mat3x3 U, S, V; + rxmesh::mat3x3 U, S, V; }; @@ -663,7 +655,7 @@ __host__ __device__ __forceinline__ void svd( // input A template __host__ __device__ __forceinline__ SVD singular_value_decomposition( - const mat3x3& mat) + const rxmesh::mat3x3& mat) { SVD ret; diff --git a/apps/XPBD/xpbd.cu b/apps/XPBD/xpbd.cu index c29d50df..7365e28f 100644 --- a/apps/XPBD/xpbd.cu +++ b/apps/XPBD/xpbd.cu @@ -15,10 +15,10 @@ void __global__ init_edges(const Context context, auto v0 = iter[0]; auto v1 = iter[1]; - const Vector3f x0(x(v0, 0), x(v0, 1), x(v0, 2)); - const Vector3f x1(x(v1, 0), x(v1, 1), x(v1, 2)); + const glm::fvec3 x0(x(v0, 0), x(v0, 1), x(v0, 2)); + const glm::fvec3 x1(x(v1, 0), x(v1, 1), x(v1, 2)); - rest_len(eh, 0) = (x0 - x1).norm(); + rest_len(eh, 0) = glm::length(x0 - x1); }; auto block = cooperative_groups::this_thread_block(); @@ -45,18 +45,18 @@ void __global__ solve_stretch(const Context context, auto v0 = iter[0]; auto v1 = iter[1]; - const Vector3f x0(new_x(v0, 0), new_x(v0, 1), new_x(v0, 2)); - const Vector3f x1(new_x(v1, 0), new_x(v1, 1), new_x(v1, 2)); + const glm::fvec3 x0(new_x(v0, 0), new_x(v0, 1), new_x(v0, 2)); + const glm::fvec3 x1(new_x(v1, 0), new_x(v1, 1), new_x(v1, 2)); const float w1(invM(v0, 0)), w2(invM(v1, 0)); if (w1 + w2 > 0.f) { - Vector3f n = x0 - x1; - const float d = n.norm(); - Vector3f dpp(0.f, 0.f, 0.f); + glm::fvec3 n = x0 - x1; + const float d = glm::length(n); + glm::fvec3 dpp(0.f, 0.f, 0.f); const float constraint = (d - rest_len(eh, 0)); - n.normalize(); + n = glm::normalize(n); if (XPBD) { const float compliance = stretch_compliance / dt2; @@ -118,40 +118,40 @@ void __global__ solve_bending(const Context context, const float w1(invM(v1, 0)), w2(invM(v2, 0)), w3(invM(v3, 0)), w4(invM(v4, 0)); if (w1 + w2 + w3 + w4 > 0.f) { - Vector3f p2(new_x(v2, 0) - new_x(v1, 0), - new_x(v2, 1) - new_x(v1, 1), - new_x(v2, 2) - new_x(v1, 2)); - Vector3f p3(new_x(v3, 0) - new_x(v1, 0), - new_x(v3, 1) - new_x(v1, 1), - new_x(v3, 2) - new_x(v1, 2)); - Vector3f p4(new_x(v4, 0) - new_x(v1, 0), - new_x(v4, 1) - new_x(v1, 1), - new_x(v4, 2) - new_x(v1, 2)); - - float l23 = cross(p2, p3).norm(); - float l24 = cross(p2, p4).norm(); + glm::fvec3 p2(new_x(v2, 0) - new_x(v1, 0), + new_x(v2, 1) - new_x(v1, 1), + new_x(v2, 2) - new_x(v1, 2)); + glm::fvec3 p3(new_x(v3, 0) - new_x(v1, 0), + new_x(v3, 1) - new_x(v1, 1), + new_x(v3, 2) - new_x(v1, 2)); + glm::fvec3 p4(new_x(v4, 0) - new_x(v1, 0), + new_x(v4, 1) - new_x(v1, 1), + new_x(v4, 2) - new_x(v1, 2)); + + float l23 = glm::length(glm::cross(p2, p3)); + float l24 = glm::length(glm::cross(p2, p4)); if (l23 < 1e-8) { l23 = 1.f; } if (l24 < 1e-8) { l24 = 1.f; } - Vector3f n1 = cross(p2, p3); + glm::fvec3 n1 = glm::cross(p2, p3); n1 /= l23; - Vector3f n2 = cross(p2, p4); + glm::fvec3 n2 = glm::cross(p2, p4); n2 /= l24; // clamp(dot(n1, n2), -1., 1.) float d = std::max(1.f, std::min(dot(n1, n2), -1.f)); - Vector3f q3 = (cross(p2, n2) + cross(n1, p2) * d) / l23; - Vector3f q4 = (cross(p2, n1) + cross(n2, p2) * d) / l24; - Vector3f q2 = -(cross(p3, n2) + cross(n1, p3) * d) / l23 - - (cross(p4, n1) + cross(n2, p4) * d) / l24; - Vector3f q1 = -q2 - q3 - q4; + glm::fvec3 q3 = (cross(p2, n2) + cross(n1, p2) * d) / l23; + glm::fvec3 q4 = (cross(p2, n1) + cross(n2, p2) * d) / l24; + glm::fvec3 q2 = -(cross(p3, n2) + cross(n1, p3) * d) / l23 - + (cross(p4, n1) + cross(n2, p4) * d) / l24; + glm::fvec3 q1 = -q2 - q3 - q4; - float sum_wq = w1 * q1.norm2() + w2 * q2.norm2() + - w3 * q3.norm2() + w4 * q4.norm2(); + float sum_wq = w1 * glm::length2(q1) + w2 * glm::length2(q2) + + w3 * glm::length2(q3) + w4 * glm::length2(q4); float constraint = acos(d) - acos(-1.); if (XPBD) { @@ -203,22 +203,22 @@ int main(int argc, char** argv) constexpr uint32_t blockThreads = 256; // XPBD paramters - const float frame_dt = 1e-2; - const float dt = 5e-4; - const Vector3f gravity(0.f, 0.f, -15.0f); - const uint32_t rest_iter = 5; - const float stretch_relaxation = 0.3; - const float bending_relaxation = 0.2; - const float stretch_compliance = 1e-7; - const float bending_compliance = 1e-6; - const float mass = 1.0; - const bool XPBD = false; + const float frame_dt = 1e-2; + const float dt = 5e-4; + const glm::fvec3 gravity(0.f, 0.f, -15.0f); + const uint32_t rest_iter = 5; + const float stretch_relaxation = 0.3; + const float bending_relaxation = 0.2; + const float stretch_compliance = 1e-7; + const float bending_compliance = 1e-6; + const float mass = 1.0; + const bool XPBD = false; // fixtures paramters - const Vector4f fixure_spheres[4] = {{0.f, 1.f, 0.f, 0.004}, - {1.f, 1.f, 0.f, 0.004}, - {0.f, 0.f, 0.f, 0.004}, - {1.f, 0.f, 0.f, 0.004}}; + const glm::fvec4 fixure_spheres[4] = {{0.f, 1.f, 0.f, 0.004}, + {1.f, 1.f, 0.f, 0.004}, + {0.f, 0.f, 0.f, 0.004}, + {1.f, 0.f, 0.f, 0.004}}; // mesh data auto x = rx.get_input_vertex_coordinates(); @@ -238,12 +238,16 @@ int main(int argc, char** argv) [mass, fixure_spheres, invM = *invM, x = *x] __device__( VertexHandle vh) { invM(vh, 0) = mass; - Vector4f v(x(vh, 0), x(vh, 1), x(vh, 2), 0.f); - float eps = std::numeric_limits::epsilon(); - if ((v - fixure_spheres[0]).norm2() - fixure_spheres[0][3] < eps || - (v - fixure_spheres[1]).norm2() - fixure_spheres[1][3] < eps || - (v - fixure_spheres[2]).norm2() - fixure_spheres[2][3] < eps || - (v - fixure_spheres[3]).norm2() - fixure_spheres[3][3] < eps) { + glm::fvec4 v(x(vh, 0), x(vh, 1), x(vh, 2), 0.f); + float eps = std::numeric_limits::epsilon(); + if (glm::length2(v - fixure_spheres[0]) - fixure_spheres[0][3] < + eps || + glm::length2(v - fixure_spheres[1]) - fixure_spheres[1][3] < + eps || + glm::length2(v - fixure_spheres[2]) - fixure_spheres[2][3] < + eps || + glm::length2(v - fixure_spheres[3]) - fixure_spheres[3][3] < + eps) { invM(vh, 0) = 0; } }); @@ -397,5 +401,4 @@ int main(int argc, char** argv) if (test) { RXMESH_INFO("mean= {}, mean2= {}", mean, mean2); } - } \ No newline at end of file diff --git a/include/rxmesh/rxmesh_static.h b/include/rxmesh/rxmesh_static.h index 7f694a05..0c5fca49 100644 --- a/include/rxmesh/rxmesh_static.h +++ b/include/rxmesh/rxmesh_static.h @@ -1029,7 +1029,7 @@ class RXMeshStatic : public RXMesh * @param lower bounding box lower corner * @param upper bounding box upper corner */ - void scale(glm::vec3 lower, glm::vec3 upper) + void scale(glm::fvec3 lower, glm::fvec3 upper) { if (lower[0] > upper[0] || lower[1] > upper[1] || lower[2] > upper[2]) { RXMESH_ERROR( @@ -1044,7 +1044,7 @@ class RXMeshStatic : public RXMesh return; } - glm::vec3 bb_lower, bb_upper; + glm::vec3 bb_lower(0), bb_upper(0); bounding_box(bb_lower, bb_upper); @@ -1075,7 +1075,7 @@ class RXMeshStatic : public RXMesh * @param lower * @param upper */ - void bounding_box(glm::vec3 lower, glm::vec3 upper) + void bounding_box(glm::vec3& lower, glm::vec3& upper) { lower[0] = std::numeric_limits::max(); lower[1] = std::numeric_limits::max(); From 9482f332e0036e4f00f321fc4a8b0e4bb07881ac Mon Sep 17 00:00:00 2001 From: ahmed Date: Thu, 6 Jun 2024 19:42:03 -0500 Subject: [PATCH 12/96] cleanup xpbd --- apps/XPBD/CMakeLists.txt | 4 +- apps/XPBD/remesh.cuh | 281 --- apps/XPBD/svd.cuh | 704 ------ apps/XPBD/xpbd.cu | 6 +- input/cloth.obj | 5165 +++++++++++++++++++------------------- 5 files changed, 2584 insertions(+), 3576 deletions(-) delete mode 100644 apps/XPBD/remesh.cuh delete mode 100644 apps/XPBD/svd.cuh diff --git a/apps/XPBD/CMakeLists.txt b/apps/XPBD/CMakeLists.txt index 067f9a02..35e46119 100644 --- a/apps/XPBD/CMakeLists.txt +++ b/apps/XPBD/CMakeLists.txt @@ -1,9 +1,7 @@ add_executable(XPBD) set(SOURCE_LIST - xpbd.cu - remesh.cuh - svd.cuh + xpbd.cu ) target_sources(XPBD diff --git a/apps/XPBD/remesh.cuh b/apps/XPBD/remesh.cuh deleted file mode 100644 index e841ed99..00000000 --- a/apps/XPBD/remesh.cuh +++ /dev/null @@ -1,281 +0,0 @@ -#pragma once - -#include "rxmesh/query.cuh" -#include "svd.cuh" - - -template -__inline__ __device__ rxmesh::vec3 normal(const vec3& x0, - const vec3& x1, - const vec3& x2) -{ - return glm::normalize(glm::cross(x1 - x0, x2 - x0)); -} - -template -__inline__ __device__ rxmesh::mat3x3 local_base(const vec3& n) -{ - rxmesh::vec3 u = - (glm::dot(n, vec3(1, 0, 0)) > glm::dot(n, vec3(0, 0, 1))) ? - vec3(0, 0, 1) : - vec3(1, 0, 0); - - u = glm::normalize(u - glm::dot(u, n) * n); - - vec3 v = glm::cross(n, u); - - return mat3x3(u, v, n); -} - - -template -__inline__ __device__ rxmesh::vec2 perp(const vec2& u) -{ - return vec2(-u[1], u[0]); -} - -template -__inline__ __device__ rxmesh::mat2x2 perp(const mat2x2& A) -{ - return rxmesh::mat2x2(vec2(A(1, 1), -A(1, 0)), - vec2(-A(0, 1), A(0, 0))); -} - -template -__inline__ __device__ rxmesh::mat2x2 projected_curvature( - const rxmesh::vec3& m0, - const rxmesh::vec3& m1, - const rxmesh::vec3& m2, - const rxmesh::mat2x3& base, - const T& area) -{ - rxmesh::mat2x2 S; - - for (int e = 0; e < 3; e++) { - vec2 e_mat; - if (e == 0) { - e_mat = base * (m2 - m1); - } else if (e == 1) { - e_mat = base * (m0 - m2); - } else if (e == 2) { - e_mat = base * (m1 - m0); - } - - vec2 t_mat = perp(glm::normalize(e_mat)); - - // TODO compute dihedral angle using world coordinates - // - // double theta = dihedral_angle(face->adje[e]); - T theta = 0; - - - S -= - 0.5f * theta * glm::length(e_mat) * glm::outerProduct(t_mat, t_mat); - } - S /= area; - return S; -} - -template -__inline__ __device__ mat3x3 derivative(const vec3& w0, - const vec3& w1, - const vec3& w2, - const vec3& dz, - const mat3x3& invDm) -{ - return rxmesh::mat3x3(w1 - w0, w2 - w0, dz) * invDm; -} - - -template -__inline__ __device__ rxmesh::mat3x3 deformation_gradient( - const rxmesh::vec3& v0, - const rxmesh::vec3& v1, - const rxmesh::vec3& v2, - const rxmesh::vec3& n, - const rxmesh::mat3x3& invDm, - const rxmesh::mat3x3& Sp_str) -{ - return derivative(v0, v1, v2, n, invDm) * Sp_str; -} - -template -__inline__ __device__ rxmesh::mat2x2 compression_metric( - const rxmesh::vec3& w0, - const rxmesh::vec3& w1, - const rxmesh::vec3& w2, - const rxmesh::vec3& n, - const rxmesh::mat3x3& invDm, - const rxmesh::mat3x3& Sp_str, - const rxmesh::mat3x3& S2, - const rxmesh::mat3x2& UV, - const T c) -{ - rxmesh::mat3x3 F = deformation_gradient(w0, w1, w2, n, invDm, Sp_str); - rxmesh::mat3x3 G = glm::transpose(F) * F - mat3x3(1); - rxmesh::mat2x2 e = glm::transpose(UV) * G * UV; - rxmesh::mat2x2 e2 = glm::transpose(UV) * glm::transpose(G) * G * UV; - rxmesh::mat2x2 Sw2 = glm::transpose(UV) * S2 * UV; - rxmesh::mat2x2 D = e2 - 4.0 * c * c * perp(Sw2); - - // TODO - // https://github.com/taichi-dev/taichi/blob/master/python/taichi/_funcs.py - // return get_positive(-e + sqrt(D)) / (2.0 * sq(c)); - return rxmesh::mat2x2(0); -} - -template -void __global__ compute_face_sizing( - const Context context, - const VertexAttribute w_coord, - const VertexAttribute m_coord, - const VertexAttribute v_normal, // using world coordinates - FaceAttribute face_sizing, - const T remeshing_size_min, - const T material_density, - const T refine_angle, - const T refine_velocity, - const T refine_compression) -{ - using namespace rxmesh; - - auto calc_sizing = [&](const FaceHandle& fh, const VertexIterator& iter) { - const VertexHandle v0 = iter[0]; - const VertexHandle v1 = iter[1]; - const VertexHandle v2 = iter[2]; - - // material space - const vec3 m0(m_coord(v0, 0), m_coord(v0, 1), m_coord(v0, 2)); - const vec3 m1(m_coord(v1, 0), m_coord(v1, 1), m_coord(v1, 2)); - const vec3 m2(m_coord(v2, 0), m_coord(v2, 1), m_coord(v2, 2)); - - // world space - const vec3 w0(w_coord(v0, 0), w_coord(v0, 1), w_coord(v0, 2)); - const vec3 w1(w_coord(v1, 0), w_coord(v1, 1), w_coord(v1, 2)); - const vec3 w2(w_coord(v2, 0), w_coord(v2, 1), w_coord(v2, 2)); - - // project to in-plane 2D - - // TODO plastic stretching - const mat3x3 Sp_str; - - // local normal - const vec3 fn_m = normal(m0, m1, m2); - const vec3 fn_w = normal(w0, w1, w2); - - // local base - const mat3x3 base = local_base(fn_m); - - const mat3x2 UV(base.col(0), base.col(1)); - const mat2x3 UVt = glm::transpose(UV); - - - // compute_ms_data - mat3x3 invDm; // finite element matrix - - const vec3 d0 = m1 - m0; - const vec3 d1 = m2 - m0; - const vec3 d2 = glm::cross(d0, d1); - - // face area - const T area = 0.5 * glm::length(d2); - - // mass - const T mass = area * material_density; - - - if (area < std::numeric_limits::epsilon()) { - invDm = mat3x3(0); - } else { - const mat3x3 Dm3(d0, d1, d2 / (2.f * area)); - invDm = glm::inverse(Dm3); - - // clamp - const T clamp = 1000.f / remeshing_size_min; - SVD svd = singular_value_decomposition(invDm); - for (int i = 0; i < 3; i++) { - if (svd.S[i][i] > clamp) { - svd.S[i][i] = clamp; - } - } - invDm = svd.U * svd.S * glm::transpose(svd.V); - } - - - const mat2x2 sw1 = projected_curvature(m0, m1, m2, UVt, area); - - const vec3 n0(v_normal(v0, 0), v_normal(v0, 1), v_normal(v0, 2)); - const vec3 n1(v_normal(v1, 0), v_normal(v1, 1), v_normal(v1, 2)); - const vec3 n2(v_normal(v2, 0), v_normal(v2, 1), v_normal(v2, 2)); - - const mat3x3 sw2 = derivative(n0, n1, n2, vec3(0), invDm); - - const mat2x2 Mcurvw1 = - (glm::transpose(sw1) * sw1) / (refine_angle * refine_angle); - - const mat2x2 Mcurvw2 = UVt * (glm::transpose(sw2) * sw2) * UV / - (refine_angle * refine_angle); - - const mat3x3 V = derivative(w0, w1, w2, vec3(0), invDm); - - const mat2x2 Mvel = UVt * (glm::transpose(V) * V) * UV / - (refine_velocity * refine_velocity); - - const mat2x2 Mcomp = compression_metric(w0, - w1, - w2, - fn_w, - invDm, - Sp_str, - glm::transpose(sw2) * sw2, - UV, - refine_compression); - - const mat2x2 Mobs = mat2x2(0); - - const mat2x2 Mfrac = mat2x2(0); - - mat3x3 f_sizing; - }; - - auto block = cooperative_groups::this_thread_block(); - - Query query(context); - ShmemAllocator shrd_alloc; - query.dispatch(block, shrd_alloc, calc_sizing); -} - - -template -void __global__ compute_vertex_normal(const Context context, - VertexAttribute w_coord, - VertexAttribute v_normal) -{ - using namespace rxmesh; - - // TODO make sure to init v_normal with zero - - auto calc_vn = [&](const FaceHandle& f, const VertexIterator& v) { - const vec3 w0(w_coord(v[0], 0), w_coord(v[0], 1), w_coord(v[0], 2)); - const vec3 w1(w_coord(v[1], 0), w_coord(v[1], 1), w_coord(v[1], 2)); - const vec3 w2(w_coord(v[2], 0), w_coord(v[2], 1), w_coord(v[2], 2)); - - const vec3 e1 = w1 - w0; - const vec3 e2 = w2 - w0; - - vec3 n = - glm::cross(e1, e2) / (2 * glm::length2(e1) * glm::length2(e2)); - - for (int i = 0; i < 3; ++i) { // v - for (int j = 0; j < 3; ++j) { - ::atomicAdd(&w_coord(v[i], j), n[j]); - } - } - }; - - auto block = cooperative_groups::this_thread_block(); - - Query query(context); - ShmemAllocator shrd_alloc; - query.dispatch(block, shrd_alloc, calc_vn); -} diff --git a/apps/XPBD/svd.cuh b/apps/XPBD/svd.cuh deleted file mode 100644 index 8a9bd6f0..00000000 --- a/apps/XPBD/svd.cuh +++ /dev/null @@ -1,704 +0,0 @@ -#pragma once - -#include "rxmesh/types.h" - -template -struct SVD -{ - rxmesh::mat3x3 U, S, V; -}; - - -/************************************************************************** -** https://github.com/ericjang/svd3/tree/master/svd3_cuda -** svd3 -** -** Quick singular value decomposition as described by: -** A. McAdams, A. Selle, R. Tamstorf, J. Teran and E. Sifakis, -** Computing the Singular Value Decomposition of 3x3 matrices -** with minimal branching and elementary floating point operations, -** University of Wisconsin - Madison technical report TR1690, May 2011 -** -** OPTIMIZED GPU VERSION -** Implementation by: Eric Jang -** -** 13 Apr 2014 -** -**************************************************************************/ - -#define _gamma 5.828427124 // FOUR_GAMMA_SQUARED = sqrt(8)+3; -#define _cstar 0.923879532 // cos(pi/8) -#define _sstar 0.3826834323 // sin(p/8) -#define EPSILON 1e-6 - -namespace detail { - -// CUDA's rsqrt seems to be faster than the inlined approximation? -__host__ __device__ __forceinline__ float accurateSqrt(float x) -{ - return x * rsqrt(x); -} - -template -__host__ __device__ __forceinline__ void condSwap(const bool c, T& X, T& Y) -{ - // used in step 2 - const T Z = X; - X = c ? Y : X; - Y = c ? Z : Y; -} - -template -__host__ __device__ __forceinline__ void condNegSwap(const bool c, T& X, T& Y) -{ - // used in step 2 and 3 - const T Z = -X; - X = c ? Y : X; - Y = c ? Z : Y; -} - -// matrix multiplication M = A * B -template -__host__ __device__ __forceinline__ void multAB(const T a11, - const T a12, - const T a13, - const T a21, - const T a22, - const T a23, - const T a31, - const T a32, - const T a33, - // - const T b11, - const T b12, - const T b13, - const T b21, - const T b22, - const T b23, - const T b31, - const T b32, - const T b33, - // - T& m11, - T& m12, - T& m13, - T& m21, - T& m22, - T& m23, - T& m31, - T& m32, - T& m33) -{ - - m11 = a11 * b11 + a12 * b21 + a13 * b31; - m12 = a11 * b12 + a12 * b22 + a13 * b32; - m13 = a11 * b13 + a12 * b23 + a13 * b33; - m21 = a21 * b11 + a22 * b21 + a23 * b31; - m22 = a21 * b12 + a22 * b22 + a23 * b32; - m23 = a21 * b13 + a22 * b23 + a23 * b33; - m31 = a31 * b11 + a32 * b21 + a33 * b31; - m32 = a31 * b12 + a32 * b22 + a33 * b32; - m33 = a31 * b13 + a32 * b23 + a33 * b33; -} - -// matrix multiplication M = Transpose[A] * B -template -__host__ __device__ __forceinline__ void multAtB(const T a11, - const T a12, - const T a13, - const T a21, - const T a22, - const T a23, - const T a31, - const T a32, - const T a33, - // - const T b11, - const T b12, - const T b13, - const T b21, - const T b22, - const T b23, - const T b31, - const T b32, - const T b33, - // - T& m11, - T& m12, - T& m13, - T& m21, - T& m22, - T& m23, - T& m31, - T& m32, - T& m33) -{ - m11 = a11 * b11 + a21 * b21 + a31 * b31; - m12 = a11 * b12 + a21 * b22 + a31 * b32; - m13 = a11 * b13 + a21 * b23 + a31 * b33; - m21 = a12 * b11 + a22 * b21 + a32 * b31; - m22 = a12 * b12 + a22 * b22 + a32 * b32; - m23 = a12 * b13 + a22 * b23 + a32 * b33; - m31 = a13 * b11 + a23 * b21 + a33 * b31; - m32 = a13 * b12 + a23 * b22 + a33 * b32; - m33 = a13 * b13 + a23 * b23 + a33 * b33; -} - -template -__host__ __device__ __forceinline__ void quatToMat3(const T* qV, - T& m11, - T& m12, - T& m13, - T& m21, - T& m22, - T& m23, - T& m31, - T& m32, - T& m33) -{ - const T w = qV[3]; - const T x = qV[0]; - const T y = qV[1]; - const T z = qV[2]; - - const T qxx = x * x; - const T qyy = y * y; - const T qzz = z * z; - const T qxz = x * z; - const T qxy = x * y; - const T qyz = y * z; - const T qwx = w * x; - const T qwy = w * y; - const T qwz = w * z; - - m11 = 1 - 2 * (qyy + qzz); - m12 = 2 * (qxy - qwz); - m13 = 2 * (qxz + qwy); - m21 = 2 * (qxy + qwz); - m22 = 1 - 2 * (qxx + qzz); - m23 = 2 * (qyz - qwx); - m31 = 2 * (qxz - qwy); - m32 = 2 * (qyz + qwx); - m33 = 1 - 2 * (qxx + qyy); -} - - -/** - * @brief Given givens angle computed by approximateGivensAngles, compute the - * corresponding rotation quaternion. - */ -template -__host__ __device__ __forceinline__ void -approximateGivensQuaternion(const T a11, const T a12, const T a22, T& ch, T& sh) -{ - - ch = 2 * (a11 - a22); - sh = a12; - bool b = _gamma * sh * sh < ch * ch; - float w = rsqrt(ch * ch + sh * sh); - ch = b ? w * ch : _cstar; - sh = b ? w * sh : _sstar; -} - -template -__host__ __device__ __forceinline__ void jacobiConjugation(const int x, - const int y, - const int z, - T& s11, - T& s21, - T& s22, - T& s31, - T& s32, - T& s33, - T* qV) -{ - T ch, sh; - approximateGivensQuaternion(s11, s21, s22, ch, sh); - - T scale = ch * ch + sh * sh; - T a = (ch * ch - sh * sh) / scale; - T b = (2 * sh * ch) / scale; - - // make temp copy of S - T _s11 = s11; - T _s21 = s21; - T _s22 = s22; - T _s31 = s31; - T _s32 = s32; - T _s33 = s33; - - // perform conjugation S = Q'*S*Q - // Q already implicitly solved from a, b - s11 = a * (a * _s11 + b * _s21) + b * (a * _s21 + b * _s22); - s21 = a * (-b * _s11 + a * _s21) + b * (-b * _s21 + a * _s22); - s22 = -b * (-b * _s11 + a * _s21) + a * (-b * _s21 + a * _s22); - s31 = a * _s31 + b * _s32; - s32 = -b * _s31 + a * _s32; - s33 = _s33; - - // update cumulative rotation qV - T tmp[3]; - tmp[0] = qV[0] * sh; - tmp[1] = qV[1] * sh; - tmp[2] = qV[2] * sh; - sh *= qV[3]; - - qV[0] *= ch; - qV[1] *= ch; - qV[2] *= ch; - qV[3] *= ch; - - // (x,y,z) corresponds to ((0,1,2),(1,2,0),(2,0,1)) - // for (p,q) = ((0,1),(1,2),(0,2)) - qV[z] += sh; - qV[3] -= tmp[z]; // w - qV[x] += tmp[y]; - qV[y] -= tmp[x]; - - // re-arrange matrix for next iteration - _s11 = s22; - _s21 = s32; - _s22 = s33; - _s31 = s21; - _s32 = s31; - _s33 = s11; - s11 = _s11; - s21 = _s21; - s22 = _s22; - s31 = _s31; - s32 = _s32; - s33 = _s33; -} - -template -__host__ __device__ __forceinline__ T dist2(const T x, const T y, const T z) -{ - return x * x + y * y + z * z; -} - -/** - * @brief finds transformation that diagonalizes a symmetric matrix - */ -template -__host__ __device__ __forceinline__ void -jacobiEigenanlysis( // symmetric matrix - T& s11, - T& s21, - T& s22, - T& s31, - T& s32, - T& s33, - // quaternion representation of V - T* qV) -{ - qV[3] = 1; - qV[0] = 0; - qV[1] = 0; - qV[2] = 0; // follow same indexing convention as GLM - for (int i = 0; i < 4; i++) { - // we wish to eliminate the maximum off-diagonal element - // on every iteration, but cycling over all 3 possible rotations - // in fixed order (p,q) = (1,2) , (2,3), (1,3) still retains - // asymptotic convergence - jacobiConjugation( - 0, 1, 2, s11, s21, s22, s31, s32, s33, qV); // p,q = 0,1 - jacobiConjugation( - 1, 2, 0, s11, s21, s22, s31, s32, s33, qV); // p,q = 1,2 - jacobiConjugation( - 2, 0, 1, s11, s21, s22, s31, s32, s33, qV); // p,q = 0,2 - } -} - -template -__host__ __device__ __forceinline__ void -sortSingularValues( // matrix that we want to decompose - T& b11, - T& b12, - T& b13, - T& b21, - T& b22, - T& b23, - T& b31, - T& b32, - T& b33, - // sort V simultaneously - T& v11, - T& v12, - T& v13, - T& v21, - T& v22, - T& v23, - T& v31, - T& v32, - T& v33) -{ - T rho1 = dist2(b11, b21, b31); - T rho2 = dist2(b12, b22, b32); - T rho3 = dist2(b13, b23, b33); - bool c; - c = rho1 < rho2; - condNegSwap(c, b11, b12); - condNegSwap(c, v11, v12); - condNegSwap(c, b21, b22); - condNegSwap(c, v21, v22); - condNegSwap(c, b31, b32); - condNegSwap(c, v31, v32); - condSwap(c, rho1, rho2); - c = rho1 < rho3; - condNegSwap(c, b11, b13); - condNegSwap(c, v11, v13); - condNegSwap(c, b21, b23); - condNegSwap(c, v21, v23); - condNegSwap(c, b31, b33); - condNegSwap(c, v31, v33); - condSwap(c, rho1, rho3); - c = rho2 < rho3; - condNegSwap(c, b12, b13); - condNegSwap(c, v12, v13); - condNegSwap(c, b22, b23); - condNegSwap(c, v22, v23); - condNegSwap(c, b32, b33); - condNegSwap(c, v32, v33); -} - -template -__host__ __device__ __forceinline__ void QRGivensQuaternion(const T a1, - const T a2, - T& ch, - T& sh) -{ - // a1 = pivot point on diagonal - // a2 = lower triangular entry we want to annihilate - - constexpr T eps = std::numeric_limits::epsilon(); - - T rho = accurateSqrt(a1 * a1 + a2 * a2); - - sh = rho > eps ? a2 : 0; - ch = fabs(a1) + fmax(rho, eps); - bool b = a1 < 0; - condSwap(b, sh, ch); - T w = rsqrt(ch * ch + sh * sh); - ch *= w; - sh *= w; -} - -template -__host__ __device__ __forceinline__ void -QRDecomposition( // matrix that we want to decompose - const T b11, - const T b12, - const T b13, - const T b21, - const T b22, - const T b23, - const T b31, - const T b32, - const T b33, - // output Q - T& q11, - T& q12, - T& q13, - T& q21, - T& q22, - T& q23, - T& q31, - T& q32, - T& q33, - // output R - T& r11, - T& r12, - T& r13, - T& r21, - T& r22, - T& r23, - T& r31, - T& r32, - T& r33) -{ - T ch1, sh1, ch2, sh2, ch3, sh3; - T a, b; - - // first givens rotation (ch,0,0,sh) - QRGivensQuaternion(b11, b21, ch1, sh1); - a = 1 - 2 * sh1 * sh1; - b = 2 * ch1 * sh1; - // apply B = Q' * B - r11 = a * b11 + b * b21; - r12 = a * b12 + b * b22; - r13 = a * b13 + b * b23; - r21 = -b * b11 + a * b21; - r22 = -b * b12 + a * b22; - r23 = -b * b13 + a * b23; - r31 = b31; - r32 = b32; - r33 = b33; - - // second givens rotation (ch,0,-sh,0) - QRGivensQuaternion(r11, r31, ch2, sh2); - a = 1 - 2 * sh2 * sh2; - b = 2 * ch2 * sh2; - // apply B = Q' * B; - b11 = a * r11 + b * r31; - b12 = a * r12 + b * r32; - b13 = a * r13 + b * r33; - b21 = r21; - b22 = r22; - b23 = r23; - b31 = -b * r11 + a * r31; - b32 = -b * r12 + a * r32; - b33 = -b * r13 + a * r33; - - // third givens rotation (ch,sh,0,0) - QRGivensQuaternion(b22, b32, ch3, sh3); - a = 1 - 2 * sh3 * sh3; - b = 2 * ch3 * sh3; - // R is now set to desired value - r11 = b11; - r12 = b12; - r13 = b13; - r21 = a * b21 + b * b31; - r22 = a * b22 + b * b32; - r23 = a * b23 + b * b33; - r31 = -b * b21 + a * b31; - r32 = -b * b22 + a * b32; - r33 = -b * b23 + a * b33; - - // construct the cumulative rotation Q=Q1 * Q2 * Q3 - // the number of floating point operations for three quaternion - // multiplications is more or less comparable to the explicit form of the - // joined matrix. certainly more memory-efficient! - T sh12 = sh1 * sh1; - T sh22 = sh2 * sh2; - T sh32 = sh3 * sh3; - - q11 = (-1 + 2 * sh12) * (-1 + 2 * sh22); - q12 = 4 * ch2 * ch3 * (-1 + 2 * sh12) * sh2 * sh3 + - 2 * ch1 * sh1 * (-1 + 2 * sh32); - q13 = 4 * ch1 * ch3 * sh1 * sh3 - - 2 * ch2 * (-1 + 2 * sh12) * sh2 * (-1 + 2 * sh32); - - q21 = 2 * ch1 * sh1 * (1 - 2 * sh22); - q22 = -8 * ch1 * ch2 * ch3 * sh1 * sh2 * sh3 + - (-1 + 2 * sh12) * (-1 + 2 * sh32); - q23 = -2 * ch3 * sh3 + - 4 * sh1 * (ch3 * sh1 * sh3 + ch1 * ch2 * sh2 * (-1 + 2 * sh32)); - - q31 = 2 * ch2 * sh2; - q32 = 2 * ch3 * (1 - 2 * sh22) * sh3; - q33 = (-1 + 2 * sh22) * (-1 + 2 * sh32); -} - - -template -__host__ __device__ __forceinline__ void svd( // input A - const T a11, - const T a12, - const T a13, - const T a21, - const T a22, - const T a23, - const T a31, - const T a32, - const T a33, - // output U - T& u11, - T& u12, - T& u13, - T& u21, - T& u22, - T& u23, - T& u31, - T& u32, - T& u33, - // output S - T& s11, - T& s12, - T& s13, - T& s21, - T& s22, - T& s23, - T& s31, - T& s32, - T& s33, - // output V - T& v11, - T& v12, - T& v13, - T& v21, - T& v22, - T& v23, - T& v31, - T& v32, - T& v33) -{ - // normal equations matrix - T ATA11, ATA12, ATA13; - T ATA21, ATA22, ATA23; - T ATA31, ATA32, ATA33; - - multAtB(a11, - a12, - a13, - a21, - a22, - a23, - a31, - a32, - a33, - a11, - a12, - a13, - a21, - a22, - a23, - a31, - a32, - a33, - ATA11, - ATA12, - ATA13, - ATA21, - ATA22, - ATA23, - ATA31, - ATA32, - ATA33); - - // symmetric eigenalysis - T qV[4]; - jacobiEigenanlysis(ATA11, ATA21, ATA22, ATA31, ATA32, ATA33, qV); - quatToMat3(qV, v11, v12, v13, v21, v22, v23, v31, v32, v33); - - T b11, b12, b13; - T b21, b22, b23; - T b31, b32, b33; - multAB(a11, - a12, - a13, - a21, - a22, - a23, - a31, - a32, - a33, - v11, - v12, - v13, - v21, - v22, - v23, - v31, - v32, - v33, - b11, - b12, - b13, - b21, - b22, - b23, - b31, - b32, - b33); - - // sort singular values and find V - sortSingularValues(b11, - b12, - b13, - b21, - b22, - b23, - b31, - b32, - b33, - v11, - v12, - v13, - v21, - v22, - v23, - v31, - v32, - v33); - - // QR decomposition - QRDecomposition(b11, - b12, - b13, - b21, - b22, - b23, - b31, - b32, - b33, - u11, - u12, - u13, - u21, - u22, - u23, - u31, - u32, - u33, - s11, - s12, - s13, - s21, - s22, - s23, - s31, - s32, - s33); -} -} // namespace detail - - -template -__host__ __device__ __forceinline__ SVD singular_value_decomposition( - const rxmesh::mat3x3& mat) -{ - - SVD ret; - - svd(mat[0][0], - mat[0][1], - mat[0][2], - mat[1][0], - mat[1][1], - mat[1][2], - mat[2][0], - mat[2][1], - mat[2][2], - // output U - ret.U[0][0], - ret.U[0][1], - ret.U[0][2], - ret.U[1][0], - ret.U[1][1], - ret.U[1][2], - ret.U[2][0], - ret.U[2][1], - ret.U[2][2], - // output S - ret.S[0][0], - ret.S[0][1], - ret.S[0][2], - ret.S[1][0], - ret.S[1][1], - ret.S[1][2], - ret.S[2][0], - ret.S[2][1], - ret.S[2][2], - // output V - ret.V[0][0], - ret.V[0][1], - ret.V[0][2], - ret.V[1][0], - ret.V[1][1], - ret.V[1][2], - ret.V[2][0], - ret.V[2][1], - ret.V[2][2]); - - return ret; -} \ No newline at end of file diff --git a/apps/XPBD/xpbd.cu b/apps/XPBD/xpbd.cu index 7365e28f..9e7af048 100644 --- a/apps/XPBD/xpbd.cu +++ b/apps/XPBD/xpbd.cu @@ -100,10 +100,7 @@ void __global__ solve_bending(const Context context, const float bending_compliance, const float bending_relaxation, const float dt2) -{ - /* - * - */ +{ auto solve = [&](const EdgeHandle& eh, const VertexIterator& iter) { // iter[0] and iter[2] are the edge two vertices // iter[1] and iter[3] are the two opposite vertices @@ -231,7 +228,6 @@ int main(int argc, char** argv) auto la_s = rx.add_edge_attribute("la_s", 1); auto la_b = rx.add_edge_attribute("la_b", 1); - // initialize rx.for_each_vertex( DEVICE, diff --git a/input/cloth.obj b/input/cloth.obj index cd39dee1..5800c103 100644 --- a/input/cloth.obj +++ b/input/cloth.obj @@ -1,2583 +1,2582 @@ -#taken from https://github.com/taichi-dev/meshtaichi/tree/main/xpbd_cloth -v 0.25 0.25 0.75 -v 0.25 0.75 0.75 -v 0.2672413793103448 0.25 0.75 -v 0.2672413793103448 0.75 0.75 -v 0.28448275862068967 0.25 0.75 -v 0.28448275862068967 0.75 0.75 -v 0.3017241379310345 0.25 0.75 -v 0.3017241379310345 0.75 0.75 -v 0.31896551724137934 0.25 0.75 -v 0.31896551724137934 0.75 0.75 -v 0.33620689655172414 0.25 0.75 -v 0.33620689655172414 0.75 0.75 -v 0.35344827586206895 0.25 0.75 -v 0.35344827586206895 0.75 0.75 -v 0.3706896551724138 0.25 0.75 -v 0.3706896551724138 0.75 0.75 -v 0.3879310344827586 0.25 0.75 -v 0.3879310344827586 0.75 0.75 -v 0.4051724137931034 0.25 0.75 -v 0.4051724137931034 0.75 0.75 -v 0.4224137931034483 0.25 0.75 -v 0.4224137931034483 0.75 0.75 -v 0.4396551724137931 0.25 0.75 -v 0.4396551724137931 0.75 0.75 -v 0.4568965517241379 0.25 0.75 -v 0.4568965517241379 0.75 0.75 -v 0.47413793103448276 0.25 0.75 -v 0.47413793103448276 0.75 0.75 -v 0.49137931034482757 0.25 0.75 -v 0.49137931034482757 0.75 0.75 -v 0.5086206896551724 0.25 0.75 -v 0.5086206896551724 0.75 0.75 -v 0.5258620689655172 0.25 0.75 -v 0.5258620689655172 0.75 0.75 -v 0.5431034482758621 0.25 0.75 -v 0.5431034482758621 0.75 0.75 -v 0.5603448275862069 0.25 0.75 -v 0.5603448275862069 0.75 0.75 -v 0.5775862068965517 0.25 0.75 -v 0.5775862068965517 0.75 0.75 -v 0.5948275862068966 0.25 0.75 -v 0.5948275862068966 0.75 0.75 -v 0.6120689655172413 0.25 0.75 -v 0.6120689655172413 0.75 0.75 -v 0.6293103448275862 0.25 0.75 -v 0.6293103448275862 0.75 0.75 -v 0.646551724137931 0.25 0.75 -v 0.646551724137931 0.75 0.75 -v 0.6637931034482758 0.25 0.75 -v 0.6637931034482758 0.75 0.75 -v 0.6810344827586207 0.25 0.75 -v 0.6810344827586207 0.75 0.75 -v 0.6982758620689655 0.25 0.75 -v 0.6982758620689655 0.75 0.75 -v 0.7155172413793103 0.25 0.75 -v 0.7155172413793103 0.75 0.75 -v 0.7327586206896551 0.25 0.75 -v 0.7327586206896551 0.75 0.75 -v 0.75 0.25 0.75 -v 0.75 0.75 0.75 -v 0.25 0.2672413793103448 0.75 -v 0.75 0.2672413793103448 0.75 -v 0.25 0.28448275862068967 0.75 -v 0.75 0.28448275862068967 0.75 -v 0.25 0.3017241379310345 0.75 -v 0.75 0.3017241379310345 0.75 -v 0.25 0.31896551724137934 0.75 -v 0.75 0.31896551724137934 0.75 -v 0.25 0.33620689655172414 0.75 -v 0.75 0.33620689655172414 0.75 -v 0.25 0.35344827586206895 0.75 -v 0.75 0.35344827586206895 0.75 -v 0.25 0.3706896551724138 0.75 -v 0.75 0.3706896551724138 0.75 -v 0.25 0.3879310344827586 0.75 -v 0.75 0.3879310344827586 0.75 -v 0.25 0.4051724137931034 0.75 -v 0.75 0.4051724137931034 0.75 -v 0.25 0.4224137931034483 0.75 -v 0.75 0.4224137931034483 0.75 -v 0.25 0.4396551724137931 0.75 -v 0.75 0.4396551724137931 0.75 -v 0.25 0.4568965517241379 0.75 -v 0.75 0.4568965517241379 0.75 -v 0.25 0.47413793103448276 0.75 -v 0.75 0.47413793103448276 0.75 -v 0.25 0.49137931034482757 0.75 -v 0.75 0.49137931034482757 0.75 -v 0.25 0.5086206896551724 0.75 -v 0.75 0.5086206896551724 0.75 -v 0.25 0.5258620689655172 0.75 -v 0.75 0.5258620689655172 0.75 -v 0.25 0.5431034482758621 0.75 -v 0.75 0.5431034482758621 0.75 -v 0.25 0.5603448275862069 0.75 -v 0.75 0.5603448275862069 0.75 -v 0.25 0.5775862068965517 0.75 -v 0.75 0.5775862068965517 0.75 -v 0.25 0.5948275862068966 0.75 -v 0.75 0.5948275862068966 0.75 -v 0.25 0.6120689655172413 0.75 -v 0.75 0.6120689655172413 0.75 -v 0.25 0.6293103448275862 0.75 -v 0.75 0.6293103448275862 0.75 -v 0.25 0.646551724137931 0.75 -v 0.75 0.646551724137931 0.75 -v 0.25 0.6637931034482758 0.75 -v 0.75 0.6637931034482758 0.75 -v 0.25 0.6810344827586207 0.75 -v 0.75 0.6810344827586207 0.75 -v 0.25 0.6982758620689655 0.75 -v 0.75 0.6982758620689655 0.75 -v 0.25 0.7155172413793103 0.75 -v 0.75 0.7155172413793103 0.75 -v 0.25 0.7327586206896551 0.75 -v 0.75 0.7327586206896551 0.75 -v 0.5244067519636624 0.6075946831862098 0.75 -v 0.551381688035822 0.5224415914984484 0.75 -v 0.46182739966945235 0.5729470565333281 0.75 -v 0.46879360563134626 0.6958865003910399 0.75 -v 0.7318313802505146 0.44172075941288885 0.75 -v 0.6458625190413323 0.5144474598764522 0.75 -v 0.5340222805469661 0.7127983191463305 0.75 -v 0.28551802909894347 0.29356464985077035 0.75 -v 0.26010919872016286 0.666309922773969 0.75 -v 0.6390783754749252 0.6850060741234096 0.75 -v 0.739309171116382 0.6495792821083618 0.75 -v 0.4807396811264659 0.6402645881432277 0.75 -v 0.3091372129344666 0.569960510663762 0.75 -v 0.3216766437045232 0.722334458524792 0.75 -v 0.5109241608750359 0.4573309699952618 0.75 -v 0.3822778060523135 0.6371168447171083 0.75 -v 0.4780751661082743 0.5342169744343243 0.75 -v 0.25939490021817757 0.5588177485379385 0.75 -v 0.5560478613612108 0.5584669984373785 0.75 -v 0.7218740392573121 0.5909101495517417 0.75 -v 0.429753950286893 0.4685159768996707 0.75 -v 0.5988155979636325 0.2801127358146349 0.75 -v 0.5833833577228338 0.5853189348090797 0.75 -v 0.35519128053692045 0.31446314882742665 0.75 -v 0.40771417546209193 0.4318553854713113 0.75 -v 0.5350983852089398 0.4693007567311602 0.75 -v 0.7441869190296131 0.30102240537401403 0.75 -v 0.35443837804741735 0.33065475894249813 0.75 -v 0.5765541627326992 0.37664580126989106 0.75 -v 0.48315538642815314 0.37221279600080137 0.75 -v 0.32948479182275986 0.30518757058215257 0.75 -v 0.5781647947326367 0.3190914756743069 0.75 -v 0.34829118084002675 0.43436258533048205 0.75 -v 0.6604966149239675 0.29855063789653064 0.75 -v 0.668972453749402 0.29804920394698153 0.75 -v 0.7382297325066979 0.4843256008238508 0.75 -v 0.7383805440951685 0.552422759872523 0.75 -v 0.6196317896991508 0.26959389612716034 0.75 -v 0.3914034812882048 0.31009828060658445 0.75 -v 0.39807009876107247 0.309363859477122 0.75 -v 0.408991589696988 0.457131497257335 0.75 -v 0.2820737481743922 0.5962360596850099 0.75 -v 0.5333007271032876 0.3826947454697227 0.75 -v 0.5116240267333498 0.29697025537922084 0.75 -v 0.5379732477780896 0.714648098788107 0.75 -v 0.40928447622566183 0.5837051899818408 0.75 -v 0.3158989312021961 0.6081636020592828 0.75 -v 0.39470304647360055 0.3415956810035584 0.75 -v 0.5432564674050415 0.2600537730937468 0.75 -v 0.6644700146086815 0.25234773809627353 0.75 -v 0.588908268398115 0.3850039865960824 0.75 -v 0.6175970110612974 0.7310942725587191 0.75 -v 0.374376571759979 0.5380786672089184 0.75 -v 0.5460209656359195 0.5361259528954367 0.75 -v 0.36154081632030916 0.7263745057584925 0.75 -v 0.4735626893088137 0.673204336235564 0.75 -v 0.5997396376587522 0.39871847542756683 0.75 -v 0.6568989098512386 0.4482528704234923 0.75 -v 0.6905515985555808 0.5406364363179293 0.75 -v 0.6908676809274263 0.596265795038883 0.75 -v 0.6126271399098202 0.5006621909633511 0.75 -v 0.7280418173616119 0.5719950996148186 0.75 -v 0.46192752427908984 0.5531966070639622 0.75 -v 0.25959659915466676 0.40078740833727466 0.75 -v 0.5800867687463425 0.39503880360522203 0.75 -v 0.5590077144994208 0.46438435047288307 0.75 -v 0.3177370321112251 0.3991411629780154 0.75 -v 0.5349824553506324 0.5454363806240866 0.75 -v 0.5371626244247893 0.5766004099285669 0.75 -v 0.5760516350008444 0.465709217716987 0.75 -v 0.6982732979255315 0.43378093502394827 0.75 -v 0.4679324626328134 0.6959616775078361 0.75 -v 0.6530969945230429 0.6019442917701832 0.75 -v 0.30011344365615056 0.7097413068723368 0.75 -v 0.6071206497745557 0.7494235032839333 0.75 -v 0.3247241523289969 0.6840630286841072 0.75 -v 0.3312464673381874 0.5577797821419221 0.75 -v 0.31190999142472076 0.6740041146611172 0.75 -v 0.6536594793625053 0.5345503693072966 0.75 -v 0.45359164861299983 0.284583497727569 0.75 -v 0.5987143865722818 0.4767713413390344 0.75 -v 0.611027799735174 0.6831911629643146 0.75 -v 0.7377607525014429 0.6779016711963055 0.75 -v 0.255857042092501 0.42998903223918195 0.75 -v 0.614995281212029 0.33581483863072026 0.75 -v 0.5105183031020646 0.2771689941696268 0.75 -v 0.34999826244820004 0.259260897230307 0.75 -v 0.6468488516787103 0.36196234403019006 0.75 -v 0.42267584034845135 0.7140406467327954 0.75 -v 0.6022072009617664 0.2659194647656539 0.75 -v 0.3323470782489564 0.5607392007498817 0.75 -v 0.5386142943020837 0.3689464106872543 0.75 -v 0.7171069989623968 0.556982977982948 0.75 -v 0.5178164015124791 0.5449549881772855 0.75 -v 0.6150610147583848 0.40597249773980093 0.75 -v 0.44911053110804594 0.3549218744875611 0.75 -v 0.3430965029401681 0.7221861949919668 0.75 -v 0.6197753975246438 0.49522940430878354 0.75 -v 0.3637073139866616 0.37717824088519647 0.75 -v 0.2790145801619378 0.4672083127790604 0.75 -v 0.4058979409970513 0.5981717444077297 0.75 -v 0.43887591964624045 0.3398018387798174 0.75 -v 0.2623393641956656 0.2836248157316243 0.75 -v 0.5896963867492837 0.47684842227802265 0.75 -v 0.5182896055543611 0.6983356465201711 0.75 -v 0.7451694736983522 0.35844849219923697 0.75 -v 0.5815391015500504 0.3816611883685753 0.75 -v 0.26032549973286434 0.6291893269180707 0.75 -v 0.4100085754112339 0.441731947085949 0.75 -v 0.5441585567768028 0.6655242276180953 0.75 -v 0.5644909217955744 0.6863253277236976 0.75 -v 0.3867710174078179 0.6490234169562819 0.75 -v 0.3428179721529761 0.7263958284859723 0.75 -v 0.5937441381939077 0.3577538385567792 0.75 -v 0.7236852952444621 0.6154279033850789 0.75 -v 0.3769708212975129 0.356655988683741 0.75 -v 0.5091003569653316 0.2628313590272658 0.75 -v 0.3537350377205547 0.46234273437575313 0.75 -v 0.4370849901671128 0.48178771218240535 0.75 -v 0.38881435314736595 0.5433921732290844 0.75 -v 0.6819278029616157 0.30876592798101654 0.75 -v 0.508689553577057 0.31603405317257666 0.75 -v 0.6084298405962969 0.4480298514036469 0.75 -v 0.5327106559292545 0.3416399181070393 0.75 -v 0.3224238796716886 0.4940281403244773 0.75 -v 0.4278063689249778 0.7202159726264066 0.75 -v 0.6326626269034826 0.6243318099252737 0.75 -v 0.7018598698729668 0.2917112177210093 0.75 -v 0.5260962349612033 0.5422380344778844 0.75 -v 0.7309681892736145 0.3960737633962744 0.75 -v 0.3704143899577234 0.3001469711327489 0.75 -v 0.2582148147957371 0.7147646583960953 0.75 -v 0.584958273295455 0.6425764560115689 0.75 -v 0.39086505287697454 0.5432050830931634 0.75 -v 0.28197763306049056 0.49281379796731145 0.75 -v 0.7387475698722235 0.6882526226582955 0.75 -v 0.4190794759184228 0.7307850772707493 0.75 -v 0.36585081323560226 0.7246594112078407 0.75 -v 0.7206888523532493 0.6496012936761959 0.75 -v 0.5652239684333955 0.6871439833124735 0.75 -v 0.39651014225389836 0.674471777656459 0.75 -v 0.5589383459587619 0.25661842887944974 0.75 -v 0.4236167589661098 0.3240704304740825 0.75 -v 0.7409146949091265 0.48918515351999403 0.75 -v 0.49869568274933135 0.5697362581993618 0.75 -v 0.43429230306480876 0.31845013584279946 0.75 -v 0.6610588665971228 0.344923955951379 0.75 -v 0.505659491273228 0.36215851448736963 0.75 -v 0.298922242247017 0.6810957587108417 0.75 -v 0.7364597445115652 0.7304173290315001 0.75 -v 0.7032777496105895 0.6370236663493194 0.75 -v 0.41657257601432096 0.2905506949939984 0.75 -v 0.45362058570690367 0.36611707108547137 0.75 -v 0.3162438173789915 0.27671359089341263 0.75 -v 0.6127971821052893 0.2557137293125155 0.75 -v 0.6352903742513881 0.3234733227001875 0.75 -v 0.2897610412933779 0.2948015171193027 0.75 -v 0.5860239036769572 0.3726836049264224 0.75 -v 0.4602697333400492 0.5286843956619585 0.75 -v 0.6802755869143968 0.6135221313556641 0.75 -v 0.3851639526193573 0.3157413996455638 0.75 -v 0.27768716021059897 0.40079931724047124 0.75 -v 0.3810590746198391 0.4780702834002398 0.75 -v 0.5916406677738402 0.5978127228194285 0.75 -v 0.3917594232910833 0.43996347795006024 0.75 -v 0.3405754808684515 0.6442727561532593 0.75 -v 0.27842403821662015 0.5984986208624936 0.75 -v 0.6393476979705517 0.6387037809243765 0.75 -v 0.37971128217267747 0.4369065689662807 0.75 -v 0.5437998175981945 0.3864109512122335 0.75 -v 0.43542639960894436 0.3485271400928198 0.75 -v 0.4799279418780037 0.27230615062705704 0.75 -v 0.6498979422853091 0.28847822349331637 0.75 -v 0.509417574415763 0.40340504977259806 0.75 -v 0.5387714744156877 0.7297166704167126 0.75 -v 0.5727851222280019 0.26768121787774546 0.75 -v 0.4652012197540306 0.5050084261591251 0.75 -v 0.5180887473517259 0.5906962553019189 0.75 -v 0.38879804886588304 0.3144302827331601 0.75 -v 0.44633783827354717 0.7282028613979744 0.75 -v 0.34356544587542237 0.7019919774641186 0.75 -v 0.5219029750386632 0.4784557108228829 0.75 -v 0.6910207051149448 0.47930198088429293 0.75 -v 0.6120838183057716 0.449512660851551 0.75 -v 0.7020221964504789 0.5950125100956136 0.75 -v 0.5998110271252584 0.41386020077855945 0.75 -v 0.6283893213684446 0.5680305277235707 0.75 -v 0.3700101366898548 0.3302694112426282 0.75 -v 0.6481957372586659 0.7295833015176112 0.75 -v 0.4790694136300214 0.5454920826618425 0.75 -v 0.6788613220967773 0.47861172667692853 0.75 -v 0.725937238416368 0.5378755810224363 0.75 -v 0.6603835603506575 0.7044218592063691 0.75 -v 0.6577619093842844 0.32970723172447797 0.75 -v 0.5644492195308501 0.44921712930983854 0.75 -v 0.28135647601167285 0.462016125944921 0.75 -v 0.37934203344470385 0.6745191542142555 0.75 -v 0.2666523132733481 0.7294913609317368 0.75 -v 0.4276844242359648 0.42835344520127144 0.75 -v 0.25816425134185395 0.34261616261809197 0.75 -v 0.45062975040180436 0.7146457086513569 0.75 -v 0.29980746511063566 0.7226507667395398 0.75 -v 0.6847442652733161 0.4770811984537759 0.75 -v 0.41335044088413003 0.3663720646395284 0.75 -v 0.5572323532384371 0.2665372957377528 0.75 -v 0.2578030322234141 0.4643978612491189 0.75 -v 0.2840370369873601 0.37597049412304645 0.75 -v 0.3605804576730419 0.37659559686142596 0.75 -v 0.3155276156076289 0.25601811144882713 0.75 -v 0.30774214856937404 0.5592401297563739 0.75 -v 0.7371281064090252 0.745172500780447 0.75 -v 0.4545270476865308 0.3314772130233027 0.75 -v 0.5693808786832646 0.49515267327436857 0.75 -v 0.7447048886422157 0.2826521035758901 0.75 -v 0.6416172191569065 0.39419924866574696 0.75 -v 0.370709310038287 0.581252285766338 0.75 -v 0.37303159249548223 0.5829295587795938 0.75 -v 0.5086542586011444 0.46204449421792465 0.75 -v 0.5273439043307095 0.39352575995981487 0.75 -v 0.6032873531364895 0.457428434666782 0.75 -v 0.43027278024294613 0.6643284572778689 0.75 -v 0.712483455976596 0.27300365544364846 0.75 -v 0.36631349641488276 0.4242596847462816 0.75 -v 0.6574832396851237 0.7427457138216488 0.75 -v 0.7344858523351758 0.7024741727749635 0.75 -v 0.39827813253201494 0.746005621707237 0.75 -v 0.3747100205282256 0.3029530774411161 0.75 -v 0.725476305527697 0.36671012773404815 0.75 -v 0.5948841325388752 0.27917817949029433 0.75 -v 0.615354549563738 0.6908601061669198 0.75 -v 0.38621844773298125 0.43952844803871427 0.75 -v 0.43714809166045804 0.6243941287700665 0.75 -v 0.3689036212695194 0.3359265495238215 0.75 -v 0.47464582434386904 0.40223420368865975 0.75 -v 0.6695945611293261 0.3688709130078194 0.75 -v 0.5011947287446308 0.7212917998489652 0.75 -v 0.5669988488723303 0.6836447027312325 0.75 -v 0.7201048446773837 0.6253824309431759 0.75 -v 0.5997875301123756 0.7339827833021135 0.75 -v 0.7472003948238397 0.4759108413348798 0.75 -v 0.2854348890921042 0.3963970157202594 0.75 -v 0.32617735284386523 0.4587431873980059 0.75 -v 0.315644664236628 0.5520589020104409 0.75 -v 0.44140402957892705 0.6976929421441049 0.75 -v 0.7338973358992509 0.5234424508347111 0.75 -v 0.3874117849337983 0.5461152093809184 0.75 -v 0.6983805791122049 0.45336667291787414 0.75 -v 0.5260391383459855 0.38582638380307294 0.75 -v 0.4777220747250135 0.4508567676897993 0.75 -v 0.3742067325414855 0.5029331919126542 0.75 -v 0.4051904129899057 0.43651743194037373 0.75 -v 0.5124852211271321 0.6252975114644937 0.75 -v 0.41675373289563766 0.7120793833103818 0.75 -v 0.6811592734179512 0.27434514798776427 0.75 -v 0.3768212621284114 0.47306775632960096 0.75 -v 0.30231394437123704 0.42423799451674854 0.75 -v 0.6200487628088412 0.5902572405714129 0.75 -v 0.5611922142830024 0.6052642013611729 0.75 -v 0.3524618434798509 0.42084905743236606 0.75 -v 0.5881212411387314 0.6896173815156635 0.75 -v 0.5218390269140476 0.3913498254727683 0.75 -v 0.2651176290029913 0.6051684144871067 0.75 -v 0.2539420517542202 0.43633953491049776 0.75 -v 0.5152686072813909 0.7110557308835965 0.75 -v 0.29474727251645005 0.45297116098414186 0.75 -v 0.2621565998550789 0.42130549217079516 0.75 -v 0.5611155294198975 0.3895339741142992 0.75 -v 0.35487497482781755 0.3078516166635468 0.75 -v 0.5385701220101708 0.597635002952343 0.75 -v 0.5859785702979112 0.7244305103602537 0.75 -v 0.25135160694675135 0.573598326947018 0.75 -v 0.5501961185488198 0.5443698049851441 0.75 -v 0.7313851599201212 0.25843583668501985 0.75 -v 0.598241215350725 0.6568393248509317 0.75 -v 0.5049035983107921 0.4169824347984046 0.75 -v 0.6454200816137025 0.2986214628162123 0.75 -v 0.47101781886496263 0.5099761872854192 0.75 -v 0.5969782054672738 0.29544286601620473 0.75 -v 0.3638797507689305 0.4551507813450628 0.75 -v 0.5616473365100654 0.6934803906087088 0.75 -v 0.5594130841206882 0.3167307354674672 0.75 -v 0.7402900663936411 0.6858928673777465 0.75 -v 0.5013603805726621 0.7111739908983166 0.75 -v 0.5206903968785679 0.7116530339445816 0.75 -v 0.6649486843016716 0.7341432051471486 0.75 -v 0.7098914053890792 0.26801690871428474 0.75 -v 0.33738600208047 0.4445673385505929 0.75 -v 0.7260713486477104 0.4000144597379648 0.75 -v 0.3302338219438005 0.69315233304328 0.75 -v 0.47319720774160146 0.703937797177163 0.75 -v 0.33011523316007163 0.5805587557540497 0.75 -v 0.4701318764147459 0.2882433845151427 0.75 -v 0.5982315723262503 0.37369937776957685 0.75 -v 0.26980776128975886 0.27997214912478663 0.75 -v 0.28053926853339367 0.7038664787425197 0.75 -v 0.6199419589145505 0.6990311786068675 0.75 -v 0.5862911556482606 0.5144699645154416 0.75 -v 0.40222318217368913 0.7489811256643367 0.75 -v 0.4310945294696947 0.48532447460695477 0.75 -v 0.4391225874617309 0.7397634646677294 0.75 -v 0.3373291926975029 0.41399400045403983 0.75 -v 0.5901743330075008 0.2816038091693153 0.75 -v 0.5536246870057705 0.48882325143820804 0.75 -v 0.39199998838105055 0.369206640462029 0.75 -v 0.5072563716493783 0.43396379026852067 0.75 -v 0.4782599456313277 0.41873869088211996 0.75 -v 0.7352468467979888 0.316719715872802 0.75 -v 0.2984019765891871 0.42169586439545803 0.75 -v 0.5455134504352457 0.5795882359250142 0.75 -v 0.448628373584021 0.7496389969610855 0.75 -v 0.4259464980965213 0.6107033339799762 0.75 -v 0.5687913472653965 0.6565269316237303 0.75 -v 0.73811283172691 0.6948968282227701 0.75 -v 0.6322809871788543 0.5991242389091453 0.75 -v 0.4177490848379498 0.3238427891033537 0.75 -v 0.2813180015299049 0.3709508521007424 0.75 -v 0.4661407405906493 0.5109981368149912 0.75 -v 0.6365417770274358 0.7293704615282965 0.75 -v 0.3086602401924055 0.3035020700968958 0.75 -v 0.5448473615067754 0.6226990369736465 0.75 -v 0.6740751901734925 0.7179160401083943 0.75 -v 0.741713121130321 0.44990084611226294 0.75 -v 0.44016759176378656 0.3239043383486362 0.75 -v 0.5924672193417797 0.5783809792204185 0.75 -v 0.6810312979256037 0.2986289973938203 0.75 -v 0.4988884539126709 0.5405409648360315 0.75 -v 0.3707785200199592 0.33451270306458064 0.75 -v 0.6797904182098107 0.2792674611177794 0.75 -v 0.48531045195903644 0.30791700065044264 0.75 -v 0.4785293806656837 0.7399811631711546 0.75 -v 0.4618531767277364 0.6785624587522836 0.75 -v 0.30865778209159694 0.38562603838093207 0.75 -v 0.45189637033366725 0.44990607000466537 0.75 -v 0.5856917393350766 0.42235906368775383 0.75 -v 0.6068834342050082 0.5695934496126962 0.75 -v 0.44958057262738654 0.46588006382715963 0.75 -v 0.5572638499051603 0.2850210950723223 0.75 -v 0.6612033691778452 0.5767105805568185 0.75 -v 0.6131712322089176 0.5184615005411952 0.75 -v 0.30523855549587237 0.45251780664847496 0.75 -v 0.45268679142427803 0.41052149502160845 0.75 -v 0.2649751624523747 0.6186271212982386 0.75 -v 0.30489222903125035 0.5531540665225425 0.75 -v 0.6016087482336079 0.5673931614668474 0.75 -v 0.7295711259889874 0.3016490775425693 0.75 -v 0.6835835795525995 0.2645951174244566 0.75 -v 0.517458427463542 0.4521218089696294 0.75 -v 0.5120919301968792 0.4325499385300049 0.75 -v 0.345283457470034 0.2595614487243449 0.75 -v 0.5090749068955871 0.6713884313424212 0.75 -v 0.4366079778723971 0.3614319090074901 0.75 -v 0.29026600173592204 0.2926554615593517 0.75 -v 0.3606982231463861 0.3000070304607776 0.75 -v 0.382519849182241 0.28307473105847736 0.75 -v 0.2828024336049628 0.6781380898113909 0.75 -v 0.3310601303544166 0.529841202911724 0.75 -v 0.6367277722245153 0.4782047826695333 0.75 -v 0.32668443892967286 0.34979807106005717 0.75 -v 0.46649210314059036 0.5141170445892679 0.75 -v 0.42472014602426744 0.6407398001173307 0.75 -v 0.6255108244281993 0.713605903686559 0.75 -v 0.26447627451348027 0.6978456456051016 0.75 -v 0.4462843942310755 0.689186247689997 0.75 -v 0.5953923880782648 0.7436743785369841 0.75 -v 0.629641225858334 0.4322723129983933 0.75 -v 0.500531586417376 0.4381945775971756 0.75 -v 0.43245591801061906 0.3804522496905299 0.75 -v 0.4979851476867348 0.5908699725346807 0.75 -v 0.38867013565262176 0.51218990553861 0.75 -v 0.3086901470852786 0.32992264342709565 0.75 -v 0.2734031773560944 0.7353657213853164 0.75 -v 0.25193017575513055 0.33928998402882815 0.75 -v 0.5564333765584961 0.29068479942665265 0.75 -v 0.6909482515484162 0.609810078921144 0.75 -v 0.7331949857189467 0.5038177736203824 0.75 -v 0.4002018415792436 0.5247502863976357 0.75 -v 0.7154093586489866 0.5103807186209303 0.75 -v 0.3836035159311593 0.6886993945870599 0.75 -v 0.43595937425623066 0.25069167499949957 0.75 -v 0.37384251124615797 0.4091167545885312 0.75 -v 0.679388734115951 0.4792515835332225 0.75 -v 0.4722936439056504 0.4180511331999437 0.75 -v 0.6903390615235399 0.7225133884701957 0.75 -v 0.7459451645773147 0.4383706334804934 0.75 -v 0.7330737228135857 0.6459397848154507 0.75 -v 0.5878445738221334 0.3724447397100497 0.75 -v 0.3582286304721049 0.33302391226062283 0.75 -v 0.7113783051126827 0.39703833119158305 0.75 -v 0.4765471227244392 0.49697891699361174 0.75 -v 0.6390857977251271 0.672117480776512 0.75 -v 0.3195363505743064 0.46345218010553685 0.75 -v 0.6714274439177286 0.6590166528779192 0.75 -v 0.3012068792262082 0.3281916744339815 0.75 -v 0.4020993457997039 0.28767953454167017 0.75 -v 0.46233150142029644 0.30380885257479096 0.75 -v 0.5341087968349225 0.37327846990557806 0.75 -v 0.5482165326748114 0.3087628214518188 0.75 -v 0.7379419342092667 0.7162806019286703 0.75 -v 0.4458984692823329 0.3710892970630427 0.75 -v 0.3751991064267864 0.491696767601196 0.75 -v 0.2699964009503585 0.5698525530375635 0.75 -v 0.4541514541698724 0.43870328629444366 0.75 -v 0.6546824857445992 0.604517730091645 0.75 -v 0.7271669076963461 0.42596812024784536 0.75 -v 0.6987713823247028 0.6349835931250445 0.75 -v 0.4287123257973565 0.5608327182266288 0.75 -v 0.3942849788258478 0.6871999585374211 0.75 -v 0.3062136586061556 0.3562171806470205 0.75 -v 0.34151664603996057 0.4515130012021443 0.75 -v 0.6226164800160645 0.5134537245260902 0.75 -v 0.4938381617691038 0.2502729824484978 0.75 -v 0.46270086267752736 0.2817768874180792 0.75 -v 0.3541266260607419 0.7161969694802472 0.75 -v 0.3576991021716191 0.6791688193171312 0.75 -v 0.6514466857806671 0.3295731184711214 0.75 -v 0.5528559786351395 0.30783093595250666 0.75 -v 0.6139440791847557 0.5687311386861034 0.75 -v 0.6559692808455097 0.48969227469459403 0.75 -v 0.7074315439166914 0.27467447339421985 0.75 -v 0.39644428251350733 0.6075262987325836 0.75 -v 0.45905460587400043 0.3364756771355782 0.75 -v 0.303605372714273 0.6586695557308107 0.75 -v 0.4865714892328221 0.6911418359595537 0.75 -v 0.616644567158363 0.4548631028153718 0.75 -v 0.43675550707784183 0.5078191733256259 0.75 -v 0.6945299765948643 0.6186392898570839 0.75 -v 0.2525764821345116 0.5970789256845628 0.75 -v 0.7097537034529103 0.6052278797522458 0.75 -v 0.33850289078374796 0.49175906371372935 0.75 -v 0.3201580089617097 0.42949763916981604 0.75 -v 0.7185585209702589 0.7116526537793542 0.75 -v 0.39141842608804145 0.4198155220830996 0.75 -v 0.550106434065647 0.73159864763019 0.75 -v 0.3239006670326952 0.37845832184333456 0.75 -v 0.6867784136453857 0.49594611585417225 0.75 -v 0.6994805461135158 0.34275894876158813 0.75 -v 0.5163342937356803 0.4131348163246862 0.75 -v 0.408271279946238 0.47343848197309957 0.75 -v 0.4665387245506342 0.4286734398389772 0.75 -v 0.7074853851578093 0.6158720927164464 0.75 -v 0.6137734956657648 0.3949567247959777 0.75 -v 0.5388547121584202 0.639589716650917 0.75 -v 0.6477951842716065 0.42226523037715613 0.75 -v 0.635436378284324 0.6179469484038664 0.75 -v 0.32075324281095013 0.6829727342832386 0.75 -v 0.4706607350902054 0.49320522444433273 0.75 -v 0.47418458944899866 0.5339230007387538 0.75 -v 0.5605846236835273 0.4990897828814717 0.75 -v 0.6833942716295478 0.5638673780976422 0.75 -v 0.45071397465276 0.4583458784543555 0.75 -v 0.6554193075644756 0.42409597137326005 0.75 -v 0.3557273978912068 0.2796915940028946 0.75 -v 0.6880134239602871 0.709273225595175 0.75 -v 0.310060091081738 0.41723687074805743 0.75 -v 0.33768603475762193 0.30794923441293987 0.75 -v 0.6999333715000151 0.27843862957267773 0.75 -v 0.7402428317345033 0.2982254303486921 0.75 -v 0.6817353245967929 0.5332530534945814 0.75 -v 0.4339587439089367 0.42117118831257894 0.75 -v 0.6286820716188544 0.4072866475021436 0.75 -v 0.5786594583085709 0.50866304175804 0.75 -v 0.49248282257903525 0.7005810853245809 0.75 -v 0.5273225293101298 0.6634308015243475 0.75 -v 0.6127867670507448 0.2692786230294992 0.75 -v 0.6365550262527095 0.35843512504552033 0.75 -v 0.7015748234257857 0.271462095304416 0.75 -v 0.4165360172371594 0.299866473617377 0.75 -v 0.4877945585424219 0.6600112179348758 0.75 -v 0.39909367983153204 0.3254674486555208 0.75 -v 0.4151335178484496 0.6569400709603179 0.75 -v 0.32019197889967344 0.3636812245387509 0.75 -v 0.28442598224668697 0.6028550219948039 0.75 -v 0.4476166217681684 0.405419988571658 0.75 -v 0.6093131951705759 0.4179887711701276 0.75 -v 0.613885636607209 0.6575996976571568 0.75 -v 0.3588314217288692 0.7369093484229916 0.75 -v 0.3311789739563334 0.3954204533283713 0.75 -v 0.3398976454167708 0.42275282817816723 0.75 -v 0.4900304439103778 0.5110879345010595 0.75 -v 0.676803021148136 0.6947239544079333 0.75 -v 0.3600519303904532 0.5614470160928762 0.75 -v 0.3057480286493578 0.47948493009103416 0.75 -v 0.41116676902330906 0.4082503727268032 0.75 -v 0.4912921208560505 0.6149138177646338 0.75 -v 0.28459132939619475 0.6895866688437022 0.75 -v 0.6174068873152805 0.33824969444453007 0.75 -v 0.7195804545436852 0.5031561120116943 0.75 -v 0.7499042890584826 0.3486297371503688 0.75 -v 0.5174540991916007 0.3951240212799642 0.75 -v 0.4020867786946233 0.545532690416995 0.75 -v 0.7108595334354166 0.6526319277895876 0.75 -v 0.6119706992506629 0.5295868910508511 0.75 -v 0.7111492518337239 0.49618070334544206 0.75 -v 0.6869160891673591 0.6669908219383158 0.75 -v 0.3569176734007933 0.6356127314829552 0.75 -v 0.2560855784716294 0.41141476877404376 0.75 -v 0.3647837223459749 0.5034314792436567 0.75 -v 0.6184265808403928 0.29883818372386206 0.75 -v 0.5074611009683843 0.7192060108471928 0.75 -v 0.36432327549147275 0.5885705720557121 0.75 -v 0.5464401353905788 0.25503184782804666 0.75 -v 0.4879130979286114 0.6043851954705244 0.75 -v 0.2719877160120453 0.6897607415118651 0.75 -v 0.5100407083178365 0.26533052416285896 0.75 -v 0.3622068059604201 0.7268378482136565 0.75 -v 0.5411598665260277 0.30373628388440227 0.75 -v 0.39377225114028075 0.4783518129302421 0.75 -v 0.26047503463386523 0.4558077568068794 0.75 -v 0.49472931771734907 0.3718389376406161 0.75 -v 0.5443195001459933 0.6266200597960885 0.75 -v 0.36791711205281774 0.5602499501399938 0.75 -v 0.5698111214818709 0.7242701505644205 0.75 -v 0.6391380836482219 0.674172634895003 0.75 -v 0.49520995421813757 0.3426742934896914 0.75 -v 0.7479076464798176 0.3146778805193498 0.75 -v 0.4857286596771327 0.2840465496210533 0.75 -v 0.7219254286754098 0.7324624704242197 0.75 -v 0.6096945310019868 0.42499642181508446 0.75 -v 0.37719120055881217 0.3826516622685237 0.75 -v 0.31364701271003737 0.5129044765383615 0.75 -v 0.32090863785073875 0.40836533327672814 0.75 -v 0.5633532379795525 0.6137718047953749 0.75 -v 0.2621363523110306 0.46505799218836996 0.75 -v 0.5760622974210695 0.6766229880792565 0.75 -v 0.4876623911060412 0.7346029358588426 0.75 -v 0.38281627377072364 0.25675435331335944 0.75 -v 0.4918764323508681 0.378056897510009 0.75 -v 0.6618588360115847 0.36638633609055604 0.75 -v 0.40531460914565315 0.6456137155095562 0.75 -v 0.60757162601268 0.5290256183286313 0.75 -v 0.6024740309613797 0.45931843178501325 0.75 -v 0.2526550238073339 0.2556775642560963 0.75 -v 0.5056108937868433 0.2916454898594996 0.75 -v 0.2755377400846284 0.7327583195687366 0.75 -v 0.6795013198290293 0.32601361360478476 0.75 -v 0.25033210929510225 0.7208338976948783 0.75 -v 0.38916264916005716 0.34294880142774875 0.75 -v 0.5957540539157676 0.3044518694206631 0.75 -v 0.38232479900140015 0.7375473401060233 0.75 -v 0.5697313872370051 0.5103388957412768 0.75 -v 0.44895930741746887 0.6372504774421011 0.75 -v 0.32047873826184686 0.7336689010184911 0.75 -v 0.6805615040328934 0.5588284912846216 0.75 -v 0.2714530952012791 0.6004278247238688 0.75 -v 0.7066421704427404 0.512288533739313 0.75 -v 0.4271124109136314 0.3101386724897348 0.75 -v 0.6274505520687841 0.6925109256059031 0.75 -v 0.3001258720379243 0.629492277376171 0.75 -v 0.25853024312954875 0.7335274590386098 0.75 -v 0.5575290103175481 0.5262195294958165 0.75 -v 0.3979749167944639 0.7146458357848917 0.75 -v 0.3829528136683955 0.6640733066083475 0.75 -v 0.742554339668566 0.6416983227574029 0.75 -v 0.5094949601932159 0.28303713192583596 0.75 -v 0.4862068945855365 0.4691279734858518 0.75 -v 0.3513980205934976 0.4617938183571522 0.75 -v 0.42887894204016935 0.33184213057582423 0.75 -v 0.4706870716662431 0.3813999781622445 0.75 -v 0.5110312103411322 0.2675800298580971 0.75 -v 0.703115709893642 0.6581821527599281 0.75 -v 0.5262906662552522 0.6759042913786215 0.75 -v 0.7311975369054002 0.3052611470263303 0.75 -v 0.5654159042049018 0.7489970004678292 0.75 -v 0.743944584666763 0.5516614961453723 0.75 -v 0.31401043522540834 0.5415964154952642 0.75 -v 0.25103231778721086 0.3494556673334298 0.75 -v 0.72806157977998 0.41522028630142194 0.75 -v 0.5691950528884743 0.39042974731612134 0.75 -v 0.7239109435557948 0.6142793649747433 0.75 -v 0.4148255787907063 0.6458807105901854 0.75 -v 0.3040827622381688 0.4461594700327297 0.75 -v 0.36060906386579616 0.5918632236380231 0.75 -v 0.3012231408859652 0.44851291614133704 0.75 -v 0.3883248651045948 0.5031714596619021 0.75 -v 0.42494884025193946 0.6032052888335055 0.75 -v 0.26228851215348303 0.5669934606529538 0.75 -v 0.36528564484691 0.38435451437842466 0.75 -v 0.6501278017917618 0.7277841969902104 0.75 -v 0.4082751050135433 0.6634026351845379 0.75 -v 0.30199541898561655 0.5669908265036916 0.75 -v 0.6255161497674594 0.32798896401722827 0.75 -v 0.4630011938347803 0.6963535821244908 0.75 -v 0.3017892317241861 0.2590481791007376 0.75 -v 0.5452926895239669 0.46776577026546673 0.75 -v 0.6493446244419514 0.7117277691020534 0.75 -v 0.39957682227190805 0.44420205858455264 0.75 -v 0.4931360431646574 0.5440757302176497 0.75 -v 0.7419269148408083 0.5986651254150257 0.75 -v 0.4447742536727844 0.38188384322523106 0.75 -v 0.722312859211062 0.3177742165542532 0.75 -v 0.610132926252379 0.7126975126462203 0.75 -v 0.5823327932672193 0.4615272200830606 0.75 -v 0.349495469962742 0.4337376611259636 0.75 -v 0.603435904710061 0.5747671120876484 0.75 -v 0.713988083299179 0.6834304568296874 0.75 -v 0.6580753761331841 0.7057254376753812 0.75 -v 0.3881685763685247 0.43476177006982475 0.75 -v 0.4399469518930087 0.5302252943548902 0.75 -v 0.5841091147713855 0.39335834152019716 0.75 -v 0.2597312336546502 0.44961119182895465 0.75 -v 0.40426397977720024 0.721092359509024 0.75 -v 0.6941325202720218 0.6801553391717098 0.75 -v 0.5764998804726216 0.42214458234946944 0.75 -v 0.5244246336996523 0.6576125203581864 0.75 -v 0.2993051843614055 0.6505374401276524 0.75 -v 0.27058989566406083 0.6582105156071778 0.75 -v 0.6537819020799367 0.2755036544143779 0.75 -v 0.5635803557309291 0.5012265371564486 0.75 -v 0.33490975158585734 0.3241894688386878 0.75 -v 0.6366295630335116 0.5338463744184221 0.75 -v 0.7414995674582663 0.741123888644172 0.75 -v 0.7463334967321331 0.30930775918288295 0.75 -v 0.7191280684819812 0.3722848045102814 0.75 -v 0.479106129879306 0.628703277910863 0.75 -v 0.351810466038616 0.533155802777165 0.75 -v 0.34290837411299785 0.30236805334064665 0.75 -v 0.30827930621479926 0.4288195174241094 0.75 -v 0.25232741841859174 0.46242696060084476 0.75 -v 0.5820985525405745 0.45084409250199436 0.75 -v 0.29289730025991756 0.2813444310118203 0.75 -v 0.38905825638243086 0.3346563452718004 0.75 -v 0.7325474866089847 0.3256151124870425 0.75 -v 0.6527312187024983 0.5430539708705724 0.75 -v 0.5346434599921472 0.5060403579618923 0.75 -v 0.7358815380569755 0.4319223875458189 0.75 -v 0.6439578754769382 0.5276470537334799 0.75 -v 0.4478168338128631 0.7277329666528904 0.75 -v 0.5491579846823436 0.3094584710730068 0.75 -v 0.4587696004984283 0.6407908638408037 0.75 -v 0.5968735116196069 0.7081701649182871 0.75 -v 0.3796886920707601 0.6290968581769119 0.75 -v 0.47993760369726046 0.5368048734834077 0.75 -v 0.7275233405031415 0.7396431599945887 0.75 -v 0.6807954819627817 0.4295485438651481 0.75 -v 0.69385041786366 0.5693045888207064 0.75 -v 0.46499839023535655 0.26787134125024187 0.75 -v 0.635064062095791 0.5010527903620565 0.75 -v 0.6430942497325304 0.624011399671635 0.75 -v 0.646783684024176 0.4003255793483834 0.75 -v 0.6503992995338155 0.5244231642330837 0.75 -v 0.4866631002245716 0.5875629569103784 0.75 -v 0.26067934146917415 0.3011584079437971 0.75 -v 0.396088682583761 0.7414950549789199 0.75 -v 0.3198728894324775 0.41529815037960516 0.75 -v 0.2755265323459618 0.41563444014636314 0.75 -v 0.4101631432554158 0.7234035854722611 0.75 -v 0.6725770434978602 0.44138210962138574 0.75 -v 0.26238452901962434 0.6655155569976892 0.75 -v 0.5802680885722701 0.32618224182565486 0.75 -v 0.7480356355050937 0.3001167187120615 0.75 -v 0.6835572707968314 0.3971330820648445 0.75 -v 0.4676767331625785 0.6477282633881574 0.75 -v 0.5887541779970051 0.7189321872220196 0.75 -v 0.5605701627301916 0.2989050807395264 0.75 -v 0.6921801816209752 0.6345777624942955 0.75 -v 0.605935225446661 0.2768667736768677 0.75 -v 0.448111372330872 0.3337179097297696 0.75 -v 0.6609519542039345 0.6002643114089314 0.75 -v 0.6915387986595676 0.733287553462082 0.75 -v 0.6373738070962494 0.7471165416103811 0.75 -v 0.5573849430721494 0.2685648019457239 0.75 -v 0.25712575757514794 0.42105193760189435 0.75 -v 0.6617358595337686 0.6830673531623654 0.75 -v 0.7304062644113389 0.2825607342744092 0.75 -v 0.27228555549383204 0.7066417981835169 0.75 -v 0.4025233491814308 0.5289937003048164 0.75 -v 0.741222441500355 0.4502242673679617 0.75 -v 0.5829356991549199 0.45043978181599265 0.75 -v 0.6340973322313899 0.5138573627864329 0.75 -v 0.36876156899317486 0.3856530506292331 0.75 -v 0.3790296062692036 0.5161601641292803 0.75 -v 0.6015945080078322 0.724639950021571 0.75 -v 0.59704368754149 0.6405964219811953 0.75 -v 0.3344630579322828 0.43703131250366284 0.75 -v 0.45689010992913304 0.5931901150002366 0.75 -v 0.3979459881884368 0.40164596070983305 0.75 -v 0.42794457732043406 0.6551510407716161 0.75 -v 0.5387950450216743 0.28763863991636723 0.75 -v 0.2891230498452344 0.43564347214124965 0.75 -v 0.6332955253030792 0.5943417132393403 0.75 -v 0.6039911773255933 0.6336050330270484 0.75 -v 0.3935763564431647 0.524128140971044 0.75 -v 0.5216763201583847 0.6198162505899747 0.75 -v 0.7284352845528232 0.38899497239773295 0.75 -v 0.6466408363785241 0.5799852742616312 0.75 -v 0.5401189354317301 0.637439889673701 0.75 -v 0.7220162332140496 0.2683457089347065 0.75 -v 0.32370005125661033 0.6281436157505579 0.75 -v 0.2918956764550423 0.5080618503705465 0.75 -v 0.3599303883208035 0.38714785192137574 0.75 -v 0.6009202414722081 0.2650963866343558 0.75 -v 0.6866597139509518 0.472239477676135 0.75 -v 0.5011966469535505 0.520023981846846 0.75 -v 0.5727721472897469 0.42242829331137405 0.75 -v 0.30055374528174567 0.4091894683330883 0.75 -v 0.33407105901141015 0.528066589725237 0.75 -v 0.40901431548019324 0.7290335889967174 0.75 -v 0.7328671389901403 0.5600629402841207 0.75 -v 0.5587486338336429 0.7426892822835529 0.75 -v 0.693641575617593 0.63253497457907 0.75 -v 0.4067953058713506 0.4327695140590382 0.75 -v 0.3506333828804327 0.4935740634586873 0.75 -v 0.7451842610697899 0.7060754765057944 0.75 -v 0.30917471700973453 0.2625951446452531 0.75 -v 0.6993188342041331 0.5185850639743537 0.75 -v 0.3500949441042281 0.5868266347923177 0.75 -v 0.57211158896491 0.3110428034182336 0.75 -v 0.37980011674880976 0.280038982156629 0.75 -v 0.354930236935071 0.31615283742653144 0.75 -v 0.34661814623727805 0.5927335729565585 0.75 -v 0.27474987213053387 0.30092730762410824 0.75 -v 0.3170868190391438 0.408270560479067 0.75 -v 0.39937515542597046 0.3775318926583197 0.75 -v 0.6252683267162081 0.7490113938790504 0.75 -v 0.5169889618143979 0.722101358956971 0.75 -v 0.44830505606303234 0.3033412234013567 0.75 -v 0.45438691473491866 0.39806388670734666 0.75 -v 0.49670348114458907 0.5785218384582116 0.75 -v 0.4805251095762041 0.7175802560389128 0.75 -v 0.6923824111496899 0.6009887975830985 0.75 -v 0.49484245618227457 0.31584364090722844 0.75 -v 0.4485068333888167 0.6022007697005869 0.75 -v 0.3924427602768656 0.3019940387717102 0.75 -v 0.7039492287553141 0.6045254049077723 0.75 -v 0.5576382133175486 0.6462494528044582 0.75 -v 0.6678230187917378 0.49172949908551167 0.75 -v 0.69059412570472 0.7082095053505527 0.75 -v 0.3857755477123907 0.5537726798953971 0.75 -v 0.5132920144102346 0.5189728895759512 0.75 -v 0.718831546982281 0.4025943513465145 0.75 -v 0.7417169891172881 0.701065607402116 0.75 -v 0.4793614443067513 0.6587266318090831 0.75 -v 0.6345234971600703 0.5889474848092606 0.75 -v 0.40991694469157713 0.34822549591047697 0.75 -v 0.5857638483828791 0.6714866482005422 0.75 -v 0.25812639433868484 0.571401687654256 0.75 -v 0.4714365123111062 0.6990438775634917 0.75 -v 0.4107364654271026 0.4870924061318109 0.75 -v 0.5073835520175931 0.3202197606855265 0.75 -v 0.6064461513494532 0.6652381725604798 0.75 -v 0.2789546384481301 0.39569441026845137 0.75 -v 0.26902234076752896 0.7282720523441212 0.75 -v 0.5835844103519118 0.7321002097273908 0.75 -v 0.5157471391656538 0.6510342619478069 0.75 -v 0.43720699193987 0.42690951628632046 0.75 -v 0.43913390855256634 0.5789310668632142 0.75 -v 0.4297265755108261 0.7001837258021129 0.75 -v 0.7416374325250776 0.26521325766850296 0.75 -v 0.34681164520124824 0.306124996293693 0.75 -v 0.27118202354572163 0.36387049667364324 0.75 -v 0.4733966601845406 0.6684951826443419 0.75 -v 0.36091201525499805 0.49697262783110063 0.75 -v 0.7148093697233773 0.5836073533933375 0.75 -v 0.6490395098779987 0.5254969850927431 0.75 -v 0.740233229420315 0.544331077340737 0.75 -v 0.27275535698129577 0.3489913999698974 0.75 -v 0.452386814478574 0.5506385862868881 0.75 -v 0.63596543370448 0.45654306295194313 0.75 -v 0.6050291525078444 0.6449347515545625 0.75 -v 0.4086300986249914 0.7396351200179092 0.75 -v 0.5748282476179178 0.6904990303930656 0.75 -v 0.5279688444043034 0.6208015535761618 0.75 -v 0.6352720308081827 0.7041241896172895 0.75 -v 0.32517487843568516 0.5291417120846242 0.75 -v 0.46418925655292814 0.7115795105868701 0.75 -v 0.3025473471244996 0.7412869443404564 0.75 -v 0.6877256620466481 0.28691314078212427 0.75 -v 0.4954831927702372 0.608779750106263 0.75 -v 0.6190757728455283 0.7032470626189604 0.75 -v 0.6499327177911555 0.40546519017204474 0.75 -v 0.4992173929312262 0.6008928814679271 0.75 -v 0.3192184206821757 0.34699539880009866 0.75 -v 0.4905212240549363 0.39912289935275636 0.75 -v 0.6812796248920474 0.543138660760309 0.75 -v 0.42433259885918384 0.674416512802205 0.75 -v 0.6524392242771194 0.7491774385496348 0.75 -v 0.6736538441668674 0.45722826740283984 0.75 -v 0.31374944088499424 0.670320454100239 0.75 -v 0.279878958980197 0.4251356036724271 0.75 -v 0.7098689903856369 0.7303832367225503 0.75 -v 0.5702822967606092 0.5943241584918568 0.75 -v 0.27122724436276124 0.507240170544774 0.75 -v 0.5234340894031706 0.42005037306205556 0.75 -v 0.28429841737694217 0.364453799862197 0.75 -f 595 133 809 -f 133 748 809 -f 475 563 133 -f 133 563 748 -f 748 442 809 -f 306 703 748 -f 748 703 442 -f 563 306 748 -f 595 393 133 -f 393 475 133 -f 442 845 809 -f 809 845 595 -f 393 433 475 -f 714 275 541 -f 475 275 563 -f 563 179 306 -f 671 505 595 -f 595 505 393 -f 393 293 433 -f 505 293 393 -f 541 275 475 -f 261 210 442 -f 442 210 845 -f 261 442 703 -f 671 562 505 -f 505 562 293 -f 275 179 563 -f 306 261 703 -f 210 245 845 -f 275 873 179 -f 714 873 275 -f 834 261 119 -f 119 261 306 -f 210 184 245 -f 541 433 293 -f 541 475 433 -f 245 740 845 -f 845 298 595 -f 185 184 210 -f 245 170 740 -f 235 541 293 -f 119 306 179 -f 261 185 210 -f 862 119 873 -f 873 119 179 -f 298 671 595 -f 562 235 293 -f 740 298 845 -f 388 170 184 -f 184 170 245 -f 119 757 834 -f 135 388 184 -f 170 118 740 -f 419 142 298 -f 185 261 294 -f 170 666 118 -f 298 334 671 -f 757 484 834 -f 452 562 671 -f 452 235 562 -f 388 666 170 -f 419 298 740 -f 235 415 541 -f 606 522 714 -f 782 714 541 -f 714 522 873 -f 119 791 757 -f 365 452 671 -f 235 137 415 -f 298 131 334 -f 334 365 671 -f 142 131 298 -f 294 261 834 -f 484 294 834 -f 419 740 564 -f 522 862 873 -f 618 887 484 -f 564 740 118 -f 482 365 334 -f 862 791 119 -f 484 887 294 -f 185 135 184 -f 388 135 666 -f 365 566 452 -f 452 137 235 -f 724 118 656 -f 724 564 118 -f 618 484 757 -f 142 463 131 -f 131 482 334 -f 656 118 666 -f 564 329 419 -f 385 425 185 -f 185 425 135 -f 385 185 294 -f 791 618 757 -f 182 700 419 -f 419 700 142 -f 142 700 463 -f 566 137 452 -f 492 782 541 -f 577 656 413 -f 724 329 564 -f 518 449 365 -f 365 449 566 -f 566 449 137 -f 618 884 887 -f 887 117 294 -f 600 884 618 -f 854 541 415 -f 691 838 862 -f 554 854 415 -f 656 329 724 -f 162 862 522 -f 862 838 791 -f 791 730 618 -f 421 482 463 -f 463 482 131 -f 854 492 541 -f 117 385 294 -f 425 897 135 -f 250 606 782 -f 782 606 714 -f 730 600 618 -f 799 117 368 -f 117 887 368 -f 413 656 666 -f 656 577 329 -f 413 666 135 -f 555 518 365 -f 225 157 137 -f 137 554 415 -f 464 421 463 -f 422 555 365 -f 186 182 419 -f 700 311 463 -f 186 419 329 -f 485 798 492 -f 492 798 782 -f 157 554 137 -f 690 485 492 -f 899 464 463 -f 422 365 482 -f 844 162 606 -f 606 162 522 -f 422 482 391 -f 315 137 449 -f 646 413 135 -f 220 186 329 -f 798 250 782 -f 690 492 854 -f 798 236 250 -f 368 887 884 -f 117 878 385 -f 162 691 862 -f 838 730 791 -f 169 236 798 -f 250 362 606 -f 623 690 854 -f 169 362 236 -f 844 362 169 -f 236 362 250 -f 730 368 600 -f 600 368 884 -f 368 878 799 -f 799 878 117 -f 374 897 425 -f 861 449 518 -f 554 623 854 -f 657 730 838 -f 157 623 554 -f 220 329 577 -f 182 311 700 -f 217 427 691 -f 691 427 838 -f 787 798 485 -f 362 844 606 -f 162 217 691 -f 391 482 421 -f 457 861 518 -f 186 311 182 -f 464 391 421 -f 374 425 385 -f 422 498 555 -f 436 374 385 -f 553 391 464 -f 350 457 498 -f 366 787 690 -f 690 787 485 -f 536 217 333 -f 575 861 457 -f 311 899 463 -f 476 348 427 -f 427 348 838 -f 177 220 577 -f 186 735 311 -f 280 139 897 -f 897 139 135 -f 177 197 220 -f 626 436 878 -f 878 436 385 -f 374 638 897 -f 626 878 802 -f 220 708 186 -f 290 553 605 -f 575 315 861 -f 861 315 449 -f 371 279 623 -f 315 225 137 -f 787 169 798 -f 333 217 162 -f 810 899 311 -f 899 553 464 -f 350 498 422 -f 745 128 730 -f 730 128 368 -f 516 690 623 -f 516 366 690 -f 787 731 169 -f 802 878 558 -f 436 638 374 -f 626 638 436 -f 702 623 157 -f 457 518 555 -f 367 702 225 -f 457 555 498 -f 710 440 280 -f 139 440 135 -f 279 516 623 -f 476 657 348 -f 348 657 838 -f 460 646 135 -f 784 735 708 -f 708 735 186 -f 784 708 647 -f 657 745 730 -f 350 422 889 -f 367 225 817 -f 225 702 157 -f 647 708 220 -f 735 810 311 -f 177 577 413 -f 710 460 440 -f 440 460 135 -f 817 225 141 -f 225 315 141 -f 702 371 623 -f 279 371 516 -f 516 613 366 -f 347 371 702 -f 141 315 599 -f 367 281 702 -f 599 315 575 -f 605 553 335 -f 553 290 391 -f 889 422 391 -f 333 162 844 -f 217 536 427 -f 460 608 646 -f 646 455 413 -f 558 878 368 -f 626 841 638 -f 337 768 745 -f 745 768 128 -f 860 558 368 -f 608 455 646 -f 868 613 516 -f 366 613 787 -f 597 627 169 -f 169 627 844 -f 860 368 128 -f 802 841 626 -f 280 440 139 -f 460 533 608 -f 280 897 638 -f 290 889 391 -f 833 589 457 -f 457 589 575 -f 833 457 350 -f 249 280 638 -f 455 177 413 -f 533 451 710 -f 710 451 460 -f 347 281 713 -f 713 281 367 -f 367 817 713 -f 627 333 844 -f 584 860 128 -f 371 868 516 -f 731 597 169 -f 818 868 371 -f 627 332 333 -f 333 616 536 -f 674 833 350 -f 548 817 141 -f 540 647 197 -f 197 647 220 -f 719 810 735 -f 476 427 645 -f 686 476 645 -f 657 337 745 -f 719 735 784 -f 335 553 899 -f 290 643 889 -f 889 674 350 -f 713 817 548 -f 281 347 702 -f 303 533 373 -f 451 533 460 -f 526 177 455 -f 731 787 613 -f 627 597 332 -f 300 336 540 -f 647 336 784 -f 867 848 768 -f 768 848 128 -f 599 575 589 -f 347 395 371 -f 526 214 177 -f 177 214 197 -f 526 455 608 -f 785 526 726 -f 848 584 128 -f 450 719 784 -f 286 335 899 -f 812 731 613 -f 364 335 159 -f 605 643 290 -f 335 377 605 -f 860 720 558 -f 558 841 802 -f 579 720 466 -f 822 616 332 -f 332 616 333 -f 688 616 822 -f 548 285 713 -f 713 285 347 -f 483 599 589 -f 226 841 558 -f 373 533 710 -f 234 818 371 -f 868 818 613 -f 239 450 784 -f 159 335 286 -f 645 427 536 -f 476 793 657 -f 285 395 347 -f 300 239 336 -f 336 239 784 -f 599 548 141 -f 672 234 395 -f 686 793 476 -f 848 867 584 -f 899 383 286 -f 335 364 377 -f 625 643 605 -f 466 720 860 -f 720 226 558 -f 466 860 584 -f 822 332 597 -f 616 688 536 -f 447 867 768 -f 643 674 889 -f 833 705 589 -f 540 336 647 -f 239 634 450 -f 473 540 197 -f 473 197 214 -f 726 526 608 -f 526 753 214 -f 747 645 536 -f 686 586 793 -f 373 710 280 -f 533 303 608 -f 625 605 377 -f 797 373 280 -f 539 466 584 -f 672 395 525 -f 395 234 371 -f 303 726 608 -f 122 753 785 -f 785 753 526 -f 899 810 383 -f 264 625 377 -f 447 172 867 -f 867 172 584 -f 383 810 684 -f 515 705 269 -f 674 705 833 -f 599 792 548 -f 688 747 536 -f 339 395 285 -f 234 672 818 -f 695 586 645 -f 645 586 686 -f 793 337 657 -f 586 337 793 -f 207 822 597 -f 688 611 747 -f 207 597 193 -f 677 226 579 -f 579 226 720 -f 841 249 638 -f 428 249 841 -f 625 146 643 -f 643 146 674 -f 829 792 599 -f 548 339 285 -f 264 377 364 -f 672 545 818 -f 818 812 613 -f 810 719 181 -f 684 810 181 -f 286 512 159 -f 159 512 364 -f 719 450 181 -f 545 812 818 -f 611 132 747 -f 747 132 645 -f 226 428 841 -f 515 483 705 -f 705 483 589 -f 534 473 753 -f 753 473 214 -f 634 239 300 -f 870 742 195 -f 726 742 785 -f 742 726 195 -f 466 677 579 -f 851 249 428 -f 221 677 466 -f 337 447 768 -f 172 539 584 -f 634 300 481 -f 208 512 286 -f 407 826 822 -f 822 826 688 -f 481 300 540 -f 880 472 812 -f 812 472 731 -f 634 302 450 -f 742 122 785 -f 496 339 548 -f 672 358 545 -f 208 286 383 -f 789 797 249 -f 249 797 280 -f 373 849 303 -f 875 789 390 -f 302 181 450 -f 145 208 383 -f 269 705 674 -f 635 496 792 -f 269 674 146 -f 796 849 373 -f 193 597 731 -f 228 695 645 -f 586 891 337 -f 337 479 447 -f 132 228 645 -f 611 228 132 -f 472 193 731 -f 739 726 303 -f 742 870 122 -f 122 534 753 -f 430 796 373 -f 195 756 870 -f 634 590 302 -f 302 715 181 -f 481 590 634 -f 560 430 373 -f 801 303 849 -f 739 303 801 -f 870 756 122 -f 630 146 625 -f 212 269 537 -f 829 599 483 -f 792 496 548 -f 467 483 515 -f 257 891 695 -f 695 891 586 -f 173 715 302 -f 181 223 684 -f 174 874 473 -f 473 874 540 -f 636 880 812 -f 681 359 193 -f 512 264 364 -f 145 383 684 -f 512 208 264 -f 709 395 339 -f 149 525 395 -f 545 241 812 -f 801 849 189 -f 739 195 726 -f 842 534 122 -f 375 709 339 -f 120 172 447 -f 120 539 172 -f 594 149 709 -f 709 149 395 -f 789 875 797 -f 243 560 373 -f 390 789 249 -f 390 249 851 -f 359 207 193 -f 129 407 207 -f 207 407 822 -f 826 611 688 -f 189 796 430 -f 189 849 796 -f 211 302 590 -f 211 173 302 -f 715 223 181 -f 358 672 525 -f 507 241 545 -f 874 481 540 -f 891 479 337 -f 790 403 149 -f 149 403 525 -f 167 223 715 -f 496 375 339 -f 212 515 269 -f 212 467 515 -f 417 594 375 -f 842 122 756 -f 874 174 481 -f 282 611 804 -f 228 668 695 -f 863 360 479 -f 428 226 353 -f 640 428 353 -f 243 373 797 -f 320 829 483 -f 188 120 447 -f 173 167 715 -f 409 167 173 -f 698 188 447 -f 274 145 223 -f 223 145 684 -f 240 630 264 -f 479 698 447 -f 576 211 590 -f 243 797 875 -f 537 269 146 -f 467 320 483 -f 611 668 228 -f 479 360 698 -f 681 193 472 -f 804 611 826 -f 681 472 880 -f 805 636 241 -f 241 636 812 -f 790 358 403 -f 403 358 525 -f 636 681 880 -f 698 853 188 -f 188 853 120 -f 120 853 539 -f 353 226 227 -f 287 320 467 -f 578 466 539 -f 284 243 875 -f 668 257 695 -f 406 578 539 -f 578 221 466 -f 640 851 428 -f 390 591 875 -f 594 790 149 -f 594 709 375 -f 227 226 396 -f 640 376 851 -f 396 226 677 -f 353 877 640 -f 863 479 891 -f 881 406 853 -f 853 406 539 -f 456 507 358 -f 358 507 545 -f 396 256 227 -f 227 256 353 -f 881 853 698 -f 578 399 221 -f 635 792 829 -f 806 375 496 -f 790 546 358 -f 420 635 829 -f 774 454 801 -f 801 454 739 -f 890 574 195 -f 574 842 756 -f 189 430 560 -f 576 557 211 -f 211 557 173 -f 167 274 223 -f 576 590 481 -f 502 274 167 -f 765 240 208 -f 519 189 560 -f 409 502 167 -f 240 264 208 -f 264 630 625 -f 208 145 765 -f 574 756 195 -f 534 174 473 -f 856 591 390 -f 243 754 560 -f 856 390 851 -f 320 420 829 -f 850 420 320 -f 287 467 212 -f 559 576 481 -f 557 409 173 -f 693 786 215 -f 635 786 496 -f 198 856 851 -f 459 326 359 -f 359 129 207 -f 459 359 681 -f 459 681 805 -f 284 754 243 -f 123 396 677 -f 256 877 353 -f 123 677 221 -f 218 287 212 -f 754 519 560 -f 189 774 801 -f 326 129 359 -f 523 863 891 -f 317 881 698 -f 523 891 257 -f 530 313 668 -f 668 313 257 -f 530 668 611 -f 893 174 534 -f 326 696 129 -f 805 681 636 -f 786 806 496 -f 215 786 635 -f 835 578 406 -f 835 399 578 -f 230 409 581 -f 230 145 274 -f 396 877 256 -f 376 877 746 -f 806 417 375 -f 594 546 790 -f 313 523 257 -f 456 598 507 -f 507 598 241 -f 693 806 786 -f 324 806 693 -f 399 380 221 -f 317 698 360 -f 831 400 380 -f 230 274 502 -f 276 774 519 -f 519 774 189 -f 890 195 739 -f 506 284 591 -f 591 284 875 -f 754 276 519 -f 615 380 399 -f 380 400 221 -f 174 559 481 -f 581 409 557 -f 163 826 407 -f 313 494 523 -f 328 537 511 -f 630 537 146 -f 287 850 320 -f 459 696 326 -f 129 163 407 -f 517 696 459 -f 158 163 129 -f 773 218 212 -f 232 215 635 -f 877 376 640 -f 856 198 591 -f 746 877 769 -f 446 835 881 -f 881 835 406 -f 324 215 232 -f 637 760 417 -f 673 850 287 -f 760 594 417 -f 760 546 594 -f 831 123 400 -f 400 123 221 -f 242 317 863 -f 863 317 360 -f 890 739 659 -f 763 567 174 -f 659 739 454 -f 163 804 826 -f 530 494 313 -f 369 205 863 -f 530 611 282 -f 307 534 842 -f 174 567 559 -f 559 886 576 -f 328 773 537 -f 537 773 212 -f 218 673 287 -f 565 659 454 -f 890 175 574 -f 352 615 399 -f 232 635 420 -f 215 324 693 -f 497 307 842 -f 837 537 630 -f 291 161 123 -f 123 161 396 -f 551 497 842 -f 307 893 534 -f 687 456 358 -f 598 805 241 -f 641 352 835 -f 835 352 399 -f 615 831 380 -f 251 805 598 -f 808 893 307 -f 331 557 576 -f 409 230 502 -f 751 565 176 -f 659 175 890 -f 574 551 842 -f 565 454 176 -f 352 831 615 -f 546 687 358 -f 456 381 598 -f 733 687 546 -f 369 863 523 -f 317 743 881 -f 346 198 376 -f 376 198 851 -f 667 369 523 -f 821 551 574 -f 497 319 307 -f 593 417 806 -f 767 886 567 -f 567 886 559 -f 755 331 576 -f 886 755 576 -f 653 232 420 -f 324 593 806 -f 164 420 850 -f 673 218 439 -f 439 218 773 -f 795 689 687 -f 687 689 456 -f 192 530 282 -f 494 667 523 -f 299 319 497 -f 893 763 174 -f 750 763 187 -f 644 331 755 -f 565 175 659 -f 183 593 550 -f 593 637 417 -f 570 733 546 -f 570 546 760 -f 762 205 369 -f 253 242 205 -f 205 242 863 -f 352 32 831 -f 299 808 319 -f 319 808 307 -f 299 497 551 -f 238 855 240 -f 240 855 630 -f 328 439 773 -f 585 164 850 -f 550 593 324 -f 795 381 689 -f 689 381 456 -f 832 439 328 -f 673 431 850 -f 296 743 317 -f 183 828 637 -f 637 828 760 -f 733 795 687 -f 737 653 164 -f 164 653 420 -f 242 296 317 -f 446 641 835 -f 609 299 551 -f 363 763 893 -f 828 570 760 -f 216 251 598 -f 805 898 459 -f 855 837 630 -f 746 346 376 -f 198 506 591 -f 176 454 774 -f 628 396 161 -f 622 238 240 -f 855 238 837 -f 446 881 743 -f 831 291 123 -f 254 667 494 -f 242 416 296 -f 276 754 508 -f 565 751 175 -f 175 821 574 -f 629 506 198 -f 431 259 662 -f 262 259 673 -f 34 291 831 -f 661 308 493 -f 570 372 733 -f 381 312 598 -f 424 372 570 -f 542 276 816 -f 593 183 637 -f 828 811 570 -f 550 324 587 -f 663 629 198 -f 508 754 284 -f 262 673 439 -f 259 431 673 -f 628 877 396 -f 346 663 198 -f 832 262 439 -f 717 762 369 -f 28 446 26 -f 813 759 876 -f 667 717 369 -f 585 737 164 -f 443 349 232 -f 363 893 808 -f 763 750 567 -f 204 581 331 -f 363 808 299 -f 276 176 774 -f 163 664 804 -f 804 538 282 -f 813 717 759 -f 588 664 163 -f 158 129 696 -f 426 743 296 -f 641 32 352 -f 549 628 161 -f 291 549 161 -f 36 549 291 -f 811 424 570 -f 372 795 733 -f 795 312 381 -f 649 445 837 -f 837 445 537 -f 639 216 312 -f 312 216 598 -f 813 253 762 -f 762 253 205 -f 87 898 251 -f 251 898 805 -f 490 836 276 -f 276 836 176 -f 176 301 751 -f 661 821 308 -f 412 346 885 -f 412 663 346 -f 542 490 276 -f 424 795 372 -f 283 158 517 -f 158 588 163 -f 295 585 156 -f 431 585 850 -f 503 324 232 -f 662 259 262 -f 885 346 746 -f 628 769 877 -f 386 769 628 -f 821 175 308 -f 821 661 551 -f 201 765 230 -f 230 765 145 -f 238 160 837 -f 581 557 331 -f 529 494 530 -f 717 813 762 -f 284 506 508 -f 765 397 240 -f 232 653 443 -f 183 811 828 -f 424 895 795 -f 879 126 663 -f 663 126 629 -f 448 811 183 -f 426 416 24 -f 253 416 242 -f 707 885 746 -f 412 879 663 -f 609 363 299 -f 859 386 628 -f 788 707 746 -f 517 158 696 -f 158 283 588 -f 722 721 664 -f 408 511 445 -f 445 511 537 -f 692 517 134 -f 755 886 767 -f 204 331 644 -f 832 662 262 -f 443 737 304 -f 443 653 737 -f 304 737 277 -f 532 744 397 -f 397 744 240 -f 788 746 769 -f 885 879 412 -f 532 513 744 -f 744 513 240 -f 654 148 765 -f 765 148 397 -f 493 609 661 -f 661 609 551 -f 301 176 836 -f 511 832 328 -f 840 836 490 -f 840 301 836 -f 28 30 446 -f 446 30 641 -f 679 859 628 -f 26 446 743 -f 30 32 641 -f 770 532 397 -f 513 622 240 -f 508 506 779 -f 581 602 230 -f 458 664 588 -f 664 721 804 -f 779 629 126 -f 779 506 629 -f 587 448 550 -f 550 448 183 -f 761 895 424 -f 538 894 282 -f 532 622 513 -f 511 196 832 -f 767 567 750 -f 581 697 602 -f 426 296 416 -f 426 26 743 -f 697 201 602 -f 602 201 230 -f 443 304 349 -f 349 503 232 -f 277 737 585 -f 209 175 751 -f 121 187 363 -f 721 538 804 -f 815 628 549 -f 386 788 769 -f 561 192 894 -f 894 192 282 -f 24 416 22 -f 32 34 831 -f 405 297 530 -f 813 876 253 -f 152 363 609 -f 363 187 763 -f 36 815 549 -f 622 160 238 -f 759 717 655 -f 517 660 283 -f 283 660 588 -f 471 265 538 -f 148 823 397 -f 532 489 622 -f 654 823 148 -f 355 788 386 -f 477 879 885 -f 355 386 859 -f 134 517 459 -f 517 852 660 -f 544 840 556 -f 542 840 490 -f 869 209 751 -f 194 561 894 -f 192 405 530 -f 22 416 253 -f 426 24 26 -f 561 405 192 -f 761 424 811 -f 716 624 312 -f 216 639 251 -f 898 134 459 -f 538 194 894 -f 229 213 130 -f 265 194 538 -f 869 751 301 -f 93 134 898 -f 692 852 517 -f 508 771 276 -f 144 503 825 -f 304 503 349 -f 34 36 291 -f 815 679 628 -f 477 885 707 -f 267 521 676 -f 771 816 276 -f 491 603 493 -f 493 603 609 -f 156 431 583 -f 156 585 431 -f 474 587 324 -f 278 761 811 -f 295 277 585 -f 187 767 750 -f 168 477 707 -f 583 431 662 -f 295 155 277 -f 213 529 297 -f 297 529 530 -f 794 160 622 -f 670 649 160 -f 160 649 837 -f 268 583 662 -f 624 639 312 -f 544 301 840 -f 136 869 301 -f 529 254 494 -f 20 22 876 -f 876 22 253 -f 95 852 692 -f 95 692 134 -f 712 126 879 -f 712 779 126 -f 521 542 816 -f 351 644 755 -f 272 697 581 -f 655 717 667 -f 839 155 156 -f 156 155 295 -f 825 503 304 -f 556 840 542 -f 825 304 140 -f 503 474 324 -f 268 662 832 -f 36 38 815 -f 788 168 707 -f 480 355 859 -f 38 679 815 -f 543 378 660 -f 660 458 588 -f 489 770 453 -f 823 770 397 -f 654 765 201 -f 632 408 445 -f 521 816 771 -f 521 556 542 -f 544 136 301 -f 254 655 667 -f 759 342 876 -f 592 655 254 -f 521 771 676 -f 716 312 795 -f 87 89 898 -f 229 171 529 -f 529 171 254 -f 213 297 130 -f 814 308 209 -f 209 308 175 -f 178 209 869 -f 171 621 254 -f 144 474 503 -f 357 811 448 -f 895 716 795 -f 624 322 639 -f 89 91 898 -f 87 251 639 -f 18 342 759 -f 91 93 898 -f 852 543 660 -f 357 323 857 -f 767 351 755 -f 272 581 531 -f 581 204 263 -f 357 448 323 -f 414 342 18 -f 414 876 342 -f 414 20 876 -f 18 759 655 -f 624 734 322 -f 85 87 639 -f 379 716 200 -f 85 639 322 -f 670 632 649 -f 649 632 445 -f 510 156 583 -f 93 95 134 -f 95 387 852 -f 101 458 378 -f 378 458 660 -f 38 40 679 -f 679 40 859 -f 489 532 770 -f 489 794 622 -f 196 511 408 -f 510 839 156 -f 155 343 277 -f 725 888 474 -f 521 267 556 -f 685 136 544 -f 610 771 508 -f 794 670 160 -f 528 408 288 -f 528 196 408 -f 685 544 556 -f 191 168 355 -f 355 168 788 -f 477 701 879 -f 734 85 322 -f 40 480 859 -f 418 770 823 -f 95 97 387 -f 387 97 852 -f 83 624 716 -f 83 734 624 -f 357 278 811 -f 81 83 716 -f 857 278 357 -f 596 610 779 -f 779 610 508 -f 288 408 632 -f 196 268 832 -f 136 178 869 -f 510 268 19 -f 171 592 621 -f 16 18 655 -f 130 405 190 -f 265 561 194 -f 697 654 201 -f 725 474 144 -f 474 888 587 -f 471 538 721 -f 308 361 493 -f 603 152 609 -f 871 361 308 -f 361 491 493 -f 187 504 767 -f 524 448 587 -f 382 895 761 -f 97 543 852 -f 458 224 664 -f 592 254 621 -f 414 18 20 -f 434 701 477 -f 153 96 680 -f 178 814 209 -f 304 277 343 -f 140 304 343 -f 382 761 612 -f 734 83 85 -f 486 524 888 -f 888 524 587 -f 701 712 879 -f 97 99 543 -f 101 224 458 -f 229 529 213 -f 229 592 171 -f 130 297 405 -f 778 382 612 -f 794 202 670 -f 670 202 632 -f 675 202 794 -f 437 309 712 -f 712 309 779 -f 343 155 839 -f 825 725 144 -f 40 42 480 -f 480 191 355 -f 168 434 477 -f 701 437 712 -f 247 343 470 -f 263 204 644 -f 697 614 654 -f 651 263 552 -f 351 263 644 -f 729 351 767 -f 418 453 770 -f 489 453 794 -f 394 823 654 -f 200 716 895 -f 101 378 543 -f 778 200 382 -f 382 200 895 -f 153 814 96 -f 814 153 308 -f 260 152 491 -f 814 178 98 -f 231 556 354 -f 231 685 556 -f 495 268 196 -f 268 510 583 -f 354 556 267 -f 96 814 98 -f 354 267 255 -f 247 140 343 -f 865 825 140 -f 571 725 825 -f 620 288 632 -f 528 752 196 -f 432 323 900 -f 524 323 448 -f 612 761 180 -f 830 434 168 -f 676 771 610 -f 153 871 308 -f 718 676 610 -f 379 81 716 -f 79 81 379 -f 79 379 200 -f 260 491 88 -f 491 152 603 -f 761 278 180 -f 470 343 839 -f 99 101 543 -f 614 394 654 -f 437 596 309 -f 309 596 779 -f 14 16 592 -f 592 16 655 -f 42 191 480 -f 263 531 581 -f 272 614 697 -f 247 384 140 -f 90 491 361 -f 105 722 224 -f 224 722 664 -f 190 405 561 -f 722 471 721 -f 229 14 592 -f 711 607 676 -f 676 607 267 -f 102 685 231 -f 42 44 191 -f 191 44 168 -f 153 680 871 -f 871 92 361 -f 94 680 96 -f 121 363 783 -f 846 504 187 -f 263 310 531 -f 783 363 152 -f 900 323 524 -f 323 75 857 -f 843 718 596 -f 596 718 610 -f 27 752 288 -f 288 752 528 -f 620 632 202 -f 620 675 33 -f 165 675 794 -f 180 278 857 -f 778 79 200 -f 190 561 265 -f 384 865 140 -f 872 900 524 -f 247 469 384 -f 568 469 247 -f 470 839 510 -f 651 310 263 -f 531 392 272 -f 121 520 187 -f 612 79 778 -f 486 888 725 -f 75 180 857 -f 305 694 434 -f 434 694 701 -f 776 305 434 -f 77 180 75 -f 612 77 79 -f 101 103 224 -f 619 601 471 -f 233 620 33 -f 675 620 202 -f 469 865 384 -f 392 614 272 -f 777 794 453 -f 683 846 187 -f 552 263 351 -f 94 92 871 -f 260 356 152 -f 764 471 722 -f 471 601 265 -f 92 90 361 -f 680 94 871 -f 178 136 704 -f 136 685 704 -f 12 14 229 -f 44 830 168 -f 418 823 394 -f 620 233 288 -f 88 356 260 -f 741 683 520 -f 520 683 187 -f 741 520 121 -f 345 418 138 -f 356 783 152 -f 601 190 265 -f 130 658 229 -f 824 470 642 -f 732 571 865 -f 711 255 607 -f 607 255 267 -f 90 88 491 -f 732 865 469 -f 865 571 825 -f 704 685 102 -f 102 231 104 -f 84 438 783 -f 783 438 121 -f 500 741 121 -f 147 486 725 -f 292 777 453 -f 568 732 469 -f 571 147 725 -f 44 46 830 -f 830 776 434 -f 401 437 694 -f 105 764 722 -f 601 411 190 -f 75 323 432 -f 180 77 612 -f 866 432 900 -f 356 84 783 -f 438 500 121 -f 88 86 356 -f 704 98 178 -f 292 321 777 -f 777 165 794 -f 103 105 224 -f 642 470 510 -f 470 824 247 -f 46 776 830 -f 694 437 701 -f 127 501 255 -f 138 418 394 -f 418 292 453 -f 138 394 772 -f 318 130 190 -f 318 658 130 -f 401 694 340 -f 107 125 764 -f 764 619 471 -f 772 394 614 -f 345 292 418 -f 80 683 741 -f 246 404 683 -f 683 404 846 -f 846 404 504 -f 872 866 900 -f 321 165 777 -f 617 165 321 -f 86 84 356 -f 807 292 345 -f 270 147 732 -f 732 147 571 -f 486 509 524 -f 102 100 704 -f 704 100 98 -f 435 509 486 -f 658 12 229 -f 82 500 438 -f 843 569 499 -f 437 569 596 -f 478 619 109 -f 125 619 764 -f 658 10 12 -f 499 569 437 -f 569 843 596 -f 255 501 354 -f 711 676 718 -f 411 318 190 -f 340 694 305 -f 619 411 601 -f 580 772 154 -f 138 206 345 -f 31 527 233 -f 233 527 288 -f 23 495 196 -f 866 75 432 -f 73 75 866 -f 78 246 683 -f 404 800 504 -f 150 392 531 -f 527 29 288 -f 29 27 288 -f 84 82 438 -f 500 80 741 -f 46 48 776 -f 776 48 305 -f 150 531 310 -f 33 675 165 -f 33 31 233 -f 527 31 29 -f 105 107 764 -f 619 781 411 -f 729 767 504 -f 843 711 718 -f 104 231 354 -f 344 800 74 -f 246 800 404 -f 48 340 305 -f 775 499 437 -f 258 617 321 -f 258 321 292 -f 27 25 752 -f 800 729 504 -f 37 258 39 -f 35 33 165 -f 35 165 617 -f 48 892 340 -f 775 437 401 -f 843 547 711 -f 316 872 827 -f 509 872 524 -f 824 568 247 -f 147 435 486 -f 15 568 824 -f 729 552 351 -f 23 196 752 -f 25 23 752 -f 127 669 501 -f 501 669 354 -f 37 35 617 -f 199 127 255 -f 800 344 729 -f 82 80 500 -f 882 658 318 -f 882 10 658 -f 106 104 669 -f 669 104 354 -f 478 781 619 -f 411 650 318 -f 23 21 495 -f 495 21 268 -f 50 401 340 -f 108 106 127 -f 109 619 125 -f 772 206 138 -f 258 37 617 -f 580 206 772 -f 206 807 345 -f 71 73 866 -f 39 258 292 -f 271 41 807 -f 41 39 292 -f 154 772 614 -f 289 614 392 -f 150 310 151 -f 151 310 237 -f 107 109 125 -f 270 435 147 -f 872 71 866 -f 76 800 246 -f 729 344 552 -f 237 310 651 -f 650 882 318 -f 8 882 6 -f 48 50 892 -f 892 50 340 -f 150 289 392 -f 289 154 614 -f 271 807 206 -f 21 19 268 -f 568 270 732 -f 80 78 683 -f 19 642 510 -f 199 255 711 -f 127 106 669 -f 41 292 807 -f 271 580 154 -f 150 723 289 -f 237 651 706 -f 441 244 883 -f 52 775 401 -f 499 547 843 -f 650 411 781 -f 882 8 10 -f 248 781 478 -f 19 17 642 -f 642 15 824 -f 252 199 711 -f 682 71 872 -f 488 316 69 -f 109 111 478 -f 271 206 580 -f 271 154 45 -f 316 682 872 -f 78 76 246 -f 441 237 244 -f 110 108 199 -f 199 108 127 -f 50 52 401 -f 896 547 499 -f 723 154 289 -f 896 499 775 -f 237 441 151 -f 151 723 150 -f 706 552 738 -f 858 650 781 -f 111 248 478 -f 17 15 642 -f 827 872 509 -f 248 858 781 -f 429 252 711 -f 252 398 199 -f 444 723 151 -f 15 203 568 -f 273 509 435 -f 316 488 682 -f 682 69 71 -f 314 487 858 -f 858 487 650 -f 650 6 882 -f 13 465 203 -f 203 465 568 -f 45 43 271 -f 271 43 41 -f 248 314 858 -f 465 270 568 -f 341 711 547 -f 341 429 711 -f 252 112 398 -f 398 110 199 -f 54 896 775 -f 514 341 547 -f 4 6 487 -f 487 6 650 -f 76 74 800 -f 468 273 435 -f 67 69 316 -f 488 69 682 -f 112 252 429 -f 112 110 398 -f 736 468 435 -f 273 827 509 -f 248 652 314 -f 314 665 487 -f 111 113 248 -f 15 13 203 -f 465 325 270 -f 736 124 468 -f 74 222 344 -f 468 124 273 -f 736 435 270 -f 819 847 341 -f 52 54 775 -f 113 652 248 -f 633 547 896 -f 341 847 429 -f 47 45 723 -f 723 45 154 -f 444 151 441 -f 124 827 273 -f 652 665 314 -f 706 651 552 -f 883 444 441 -f 820 736 270 -f 219 758 827 -f 738 552 344 -f 883 370 444 -f 56 633 896 -f 74 72 222 -f 222 738 344 -f 13 11 465 -f 847 112 429 -f 819 112 847 -f 514 819 341 -f 633 514 547 -f 758 67 827 -f 827 67 316 -f 604 738 222 -f 572 370 883 -f 72 604 222 -f 2 4 665 -f 665 4 487 -f 244 237 706 -f 444 370 723 -f 461 244 706 -f 113 115 652 -f 652 115 665 -f 166 47 723 -f 325 820 270 -f 410 827 124 -f 410 124 736 -f 11 325 465 -f 820 699 736 -f 54 56 896 -f 633 266 514 -f 514 114 819 -f 56 749 633 -f 819 114 112 -f 749 266 633 -f 604 70 738 -f 738 423 706 -f 72 70 604 -f 11 9 325 -f 325 699 820 -f 68 423 738 -f 462 166 370 -f 266 114 514 -f 572 883 244 -f 370 166 723 -f 7 699 325 -f 535 572 244 -f 582 462 370 -f 51 49 166 -f 166 49 47 -f 63 65 758 -f 758 65 67 -f 728 678 423 -f 423 678 706 -f 115 2 665 -f 5 410 736 -f 573 461 678 -f 678 461 706 -f 572 582 370 -f 63 219 61 -f 410 219 827 -f 535 582 572 -f 462 51 166 -f 9 7 325 -f 699 5 736 -f 749 727 266 -f 266 116 114 -f 56 58 749 -f 58 327 749 -f 338 535 244 -f 53 51 462 -f 70 68 738 -f 780 338 244 -f 60 727 327 -f 327 727 749 -f 68 631 423 -f 60 116 727 -f 727 116 266 -f 53 462 582 -f 631 728 423 -f 535 402 582 -f 143 728 66 -f 780 244 461 -f 61 219 410 -f 219 63 758 -f 143 573 678 -f 338 402 535 -f 803 402 338 -f 728 143 678 -f 573 780 461 -f 766 143 66 -f 7 5 699 -f 330 780 573 -f 66 728 631 -f 143 766 573 -f 68 66 631 -f 389 803 864 -f 58 60 327 -f 55 53 402 -f 402 53 582 -f 803 338 780 -f 330 573 64 -f 3 61 410 -f 5 3 410 -f 64 573 766 -f 803 55 402 -f 66 64 766 -f 389 55 803 -f 803 780 864 -f 3 648 61 -f 864 780 330 -f 62 864 330 -f 3 1 648 -f 648 1 61 -f 64 62 330 -f 864 57 389 -f 389 57 55 -f 59 57 864 -f 62 59 864 +v 0.310344755649566650390625 0 0 +v 0.3448275029659271240234375 0 0 +v 0.3793102800846099853515625 0 0 +v 0.4137929975986480712890625 0 0 +v 0.448275744915008544921875 0 0 +v 0.48275852203369140625 0 0 +v 0.5172412395477294921875 0 0 +v 0.55172407627105712890625 0 0 +v 0.58620679378509521484375 0 0 +v 0.62068951129913330078125 0 0 +v 0.655172288417816162109375 0 0 +v 0.689655005931854248046875 0 0 +v 0.724137723445892333984375 0 0 +v 0.521848201751708984375 0.4146618545055389404296875 0 +v 0.697631061077117919921875 0.06022547185420989990234375 0 +v 0.653108179569244384765625 0.253291547298431396484375 0 +v 0.466310679912567138671875 0.2444255352020263671875 0 +v 0.656329452991485595703125 0.1381829082965850830078125 0 +v 0.566601276397705078125 0.26538944244384765625 0 +v 0.5232479572296142578125 0.093940474092960357666015625 0 +v 0.5865128040313720703125 0.02010756172239780426025390625 0 +v 0.4071832001209259033203125 0.06916700303554534912109375 0 +v 0.52103650569915771484375 0.0543379671871662139892578125 0 +v 0.704414188861846923828125 0.031838946044445037841796875 0 +v 0.57722842693328857421875 0.23789274692535400390625 0 +v 0.3982209861278533935546875 0.20984371006488800048828125 0 +v 0.3777517378330230712890625 0.17960365116596221923828125 0 +v 0.663078010082244873046875 0.263322293758392333984375 0 +v 0.687488138675689697265625 0.21550764143466949462890625 0 +v 0.5182006359100341796875 0.02566271461546421051025390625 0 +v 0.5173790454864501953125 0.1320680677890777587890625 0 +v 0.56542122364044189453125 0.18327976763248443603515625 0 +v 0.617876529693603515625 0.013236877508461475372314453125 0 +v 0.3472334444522857666015625 0.1481408178806304931640625 0 +v 0.3685845434665679931640625 0.1369002163410186767578125 0 +v 0.5113189220428466796875 0.22431695461273193359375 0 +v 0.3331450521945953369140625 0.081101395189762115478515625 0 +v 0.4072410762310028076171875 0.232234060764312744140625 0 +v 0.725594222545623779296875 0.011427459307014942169189453125 0 +v 0.5875995159149169921875 0.272821843624114990234375 0 +v 0.3708527386188507080078125 0.19705422222614288330078125 0 +v 0.459855735301971435546875 0.0446122772991657257080078125 0 +v 0.5188350677490234375 0.30681002140045166015625 0 +v 0.645570099353790283203125 0.035362415015697479248046875 0 +v 0.5438058376312255859375 0.4569113254547119140625 0 +v 0.628898203372955322265625 0.3984341919422149658203125 0 +v 0.61446464061737060546875 0.033074609935283660888671875 0 +v 0.4090540111064910888671875 0.16295440495014190673828125 0 +v 0.51730835437774658203125 0.4240889251232147216796875 0 +v 0.5546877384185791015625 0.2870514392852783203125 0 +v 0.689768135547637939453125 0.05835632979869842529296875 0 +v 0.449291527271270751953125 0.304468333721160888671875 0 +v 0.55207812786102294921875 0.271652698516845703125 0 +v 0.455444037914276123046875 0.4017134606838226318359375 0 +v 0.54367792606353759765625 0.2826995849609375 0 +v 0.62223088741302490234375 0.27906787395477294921875 0 +v 0.509807109832763671875 0.3339647948741912841796875 0 +v 0.442035496234893798828125 0.519952297210693359375 0 +v 0.693956196308135986328125 0.090885736048221588134765625 0 +v 0.61882603168487548828125 0.1334614455699920654296875 0 +v 0.44026362895965576171875 0.07648675143718719482421875 0 +v 0.680348455905914306640625 0.06320761144161224365234375 0 +v 0.51451265811920166015625 0.3679274618625640869140625 0 +v 0.456519782543182373046875 0.3374772965908050537109375 0 +v 0.3354980647563934326171875 0.1476855576038360595703125 0 +v 0.3803351223468780517578125 0.1478086411952972412109375 0 +v 0.470620810985565185546875 0.1158339679241180419921875 0 +v 0.4037926495075225830078125 0.3998120725154876708984375 0 +v 0.3991610705852508544921875 0.4317600429058074951171875 0 +v 0.61452758312225341796875 0.07004217803478240966796875 0 +v 0.4053734838962554931640625 0.3210429251194000244140625 0 +v 0.53491675853729248046875 0.4042434990406036376953125 0 +v 0.524183750152587890625 0.3650997579097747802734375 0 +v 0.3732158839702606201171875 0.222863733768463134765625 0 +v 0.5010631084442138671875 0.3763890564441680908203125 0 +v 0.3649117648601531982421875 0.26090443134307861328125 0 +v 0.6128666400909423828125 0.081369556486606597900390625 0 +v 0.3719186484813690185546875 0.00138336385134607553482055664062 0 +v 0.444587171077728271484375 0.3361021578311920166015625 0 +v 0.45309412479400634765625 0.493957698345184326171875 0 +v 0.4246629178524017333984375 0.107617653906345367431640625 0 +v 0.5682175159454345703125 0.24655687808990478515625 0 +v 0.5964329242706298828125 0.1175256073474884033203125 0 +v 0.3917968571186065673828125 0.24217855930328369140625 0 +v 0.4083028137683868408203125 0.3774065077304840087890625 0 +v 0.487676203250885009765625 0.000545978429727256298065185546875 0 +v 0.4254016578197479248046875 0.06355373561382293701171875 0 +v 0.60571181774139404296875 0.1156618297100067138671875 0 +v 0.4181091487407684326171875 0.17295129597187042236328125 0 +v 0.53266847133636474609375 0.3262695372104644775390625 0 +v 0.4330773651599884033203125 0.3573468029499053955078125 0 +v 0.44132137298583984375 0.486410319805145263671875 0 +v 0.3679173886775970458984375 0.3423422873020172119140625 0 +v 0.725573360919952392578125 0.038557223975658416748046875 0 +v 0.3330719172954559326171875 0.099732913076877593994140625 0 +v 0.3952331244945526123046875 0.3108398914337158203125 0 +v 0.480060756206512451171875 0.52217578887939453125 0 +v 0.5349080562591552734375 0.290247976779937744140625 0 +v 0.736852943897247314453125 0.097676374018192291259765625 0 +v 0.59288012981414794921875 0.010063705034554004669189453125 0 +v 0.5200812816619873046875 0.030661039054393768310546875 0 +v 0.58231961727142333984375 0.107472516596317291259765625 0 +v 0.489458501338958740234375 0.243677794933319091796875 0 +v 0.490419805049896240234375 0.18534852564334869384765625 0 +v 0.471457183361053466796875 0.06809310615062713623046875 0 +v 0.483752727508544921875 0.256113708019256591796875 0 +v 0.5112216472625732421875 0.083290971815586090087890625 0 +v 0.691507875919342041015625 0.108903743326663970947265625 0 +v 0.3542247116565704345703125 0.1202773153781890869140625 0 +v 0.5189898014068603515625 0.06607423722743988037109375 0 +v 0.47241365909576416015625 0.438255846500396728515625 0 +v 0.3577577769756317138671875 0.16368420422077178955078125 0 +v 0.441374003887176513671875 0.262799918651580810546875 0 +v 0.5220623016357421875 0.035160057246685028076171875 0 +v 0.638389885425567626953125 0.280859410762786865234375 0 +v 0.3895483911037445068359375 0.26376760005950927734375 0 +v 0.59831583499908447265625 0.1189168989658355712890625 0 +v 0.4299966990947723388671875 0.035742692649364471435546875 0 +v 0.660535991191864013671875 0.1523644626140594482421875 0 +v 0.6211402416229248046875 0.097810126841068267822265625 0 +v 0.711870253086090087890625 0.0537335164844989776611328125 0 +v 0.3962226808071136474609375 0.16743578016757965087890625 0 +v 0.61476981639862060546875 0.037129573523998260498046875 0 +v 0.57758998870849609375 0.07527725398540496826171875 0 +v 0.701840341091156005859375 0.030192784965038299560546875 0 +v 0.645544111728668212890625 0.3448564708232879638671875 0 +v 0.644223034381866455078125 0.1220856010913848876953125 0 +v 0.3966100513935089111328125 0.106682397425174713134765625 0 +v 0.4087737500667572021484375 0.296127736568450927734375 0 +v 0.4896848201751708984375 0.1316872537136077880859375 0 +v 0.5147669315338134765625 0.1404394805431365966796875 0 +v 0.3744138777256011962890625 0.3538189232349395751953125 0 +v 0.481042325496673583984375 0.298245728015899658203125 0 +v 0.5468680858612060546875 0.3401006758213043212890625 0 +v 0.9999997615814208984375 0.48275852203369140625 0 +v 0.9999997615814208984375 0.5172412395477294921875 0 +v 0.9999997615814208984375 0.55172407627105712890625 0 +v 0.9999997615814208984375 0.58620679378509521484375 0 +v 0.9999997615814208984375 0.62068951129913330078125 0 +v 0.9999997615814208984375 0.655172288417816162109375 0 +v 0.9999997615814208984375 0.689655005931854248046875 0 +v 0.9999997615814208984375 0.724137723445892333984375 0 +v 0.9999997615814208984375 0.758620560169219970703125 0 +v 0.9999997615814208984375 0.793103277683258056640625 0 +v 0.9999997615814208984375 0.827585995197296142578125 0 +v 0.9999997615814208984375 0.862068831920623779296875 0 +v 0.9999997615814208984375 0.89655148983001708984375 0 +v 0.9999997615814208984375 0.93103420734405517578125 0 +v 0.791724860668182373046875 0.52889478206634521484375 0 +v 0.978618144989013671875 0.799158394336700439453125 0 +v 0.6120955944061279296875 0.6169338226318359375 0 +v 0.94374787807464599609375 0.681820094585418701171875 0 +v 0.666766583919525146484375 0.670637667179107666015625 0 +v 0.9767608642578125 0.60484540462493896484375 0 +v 0.88110291957855224609375 0.58127272129058837890625 0 +v 0.8817350864410400390625 0.692531406879425048828125 0 +v 0.95608341693878173828125 0.643989980220794677734375 0 +v 0.806193768978118896484375 0.703888356685638427734375 0 +v 0.807318747043609619140625 0.5691006183624267578125 0 +v 0.9755213260650634765625 0.855803191661834716796875 0 +v 0.93421375751495361328125 0.61396586894989013671875 0 +v 0.94737040996551513671875 0.730855643749237060546875 0 +v 0.765325129032135009765625 0.748663485050201416015625 0 +v 0.9774949550628662109375 0.87650501728057861328125 0 +v 0.94137752056121826171875 0.799202382564544677734375 0 +v 0.90655529499053955078125 0.774047195911407470703125 0 +v 0.860550940036773681640625 0.727044045925140380859375 0 +v 0.683281123638153076171875 0.695625245571136474609375 0 +v 0.778695166110992431640625 0.777407348155975341796875 0 +v 0.90404415130615234375 0.690024793148040771484375 0 +v 0.756778419017791748046875 0.636060893535614013671875 0 +v 0.857722461223602294921875 0.457223355770111083984375 0 +v 0.951874256134033203125 0.5757510662078857421875 0 +v 0.9689714908599853515625 0.90494811534881591796875 0 +v 0.94020950794219970703125 0.750764667987823486328125 0 +v 0.9677944183349609375 0.5468847751617431640625 0 +v 0.740097343921661376953125 0.680514276027679443359375 0 +v 0.980579853057861328125 0.871785581111907958984375 0 +v 0.976225376129150390625 0.88979339599609375 0 +v 0.764561831951141357421875 0.698248326778411865234375 0 +v 0.684934318065643310546875 0.656761825084686279296875 0 +v 0.713766634464263916015625 0.639186680316925048828125 0 +v 0.822406589984893798828125 0.653420984745025634765625 0 +v 0.726342260837554931640625 0.536922931671142578125 0 +v 0.703217327594757080078125 0.634786188602447509765625 0 +v 0.8818962574005126953125 0.719619929790496826171875 0 +v 0.96638977527618408203125 0.50763547420501708984375 0 +v 0.93081843852996826171875 0.52076137065887451171875 0 +v 0.858777344226837158203125 0.458503067493438720703125 0 +v 0.9661471843719482421875 0.791879355907440185546875 0 +v 0.778171360492706298828125 0.844234764575958251953125 0 +v 0.842854678630828857421875 0.818033158779144287109375 0 +v 0.97588360309600830078125 0.93256092071533203125 0 +v 0.809364736080169677734375 0.709035336971282958984375 0 +v 0.897542476654052734375 0.769967019557952880859375 0 +v 0.745232760906219482421875 0.52690732479095458984375 0 +v 0.727887928485870361328125 0.637462079524993896484375 0 +v 0.88905966281890869140625 0.737278401851654052734375 0 +v 0.91950714588165283203125 0.710455596446990966796875 0 +v 0.9371168613433837890625 0.923305034637451171875 0 +v 0.873556673526763916015625 0.4918920993804931640625 0 +v 0.91497051715850830078125 0.731743991374969482421875 0 +v 0.770872533321380615234375 0.735893666744232177734375 0 +v 0.866788327693939208984375 0.627734601497650146484375 0 +v 0.863470494747161865234375 0.56650602817535400390625 0 +v 0.727771103382110595703125 0.815199196338653564453125 0 +v 0.853605806827545166015625 0.889447689056396484375 0 +v 0.9217188358306884765625 0.805263698101043701171875 0 +v 0.723941266536712646484375 0.55917370319366455078125 0 +v 0.92229831218719482421875 0.4923613071441650390625 0 +v 0.873832046985626220703125 0.833981454372406005859375 0 +v 0.715143024921417236328125 0.55805110931396484375 0 +v 0.861122786998748779296875 0.61765682697296142578125 0 +v 0.9132840633392333984375 0.5245769023895263671875 0 +v 0.98510849475860595703125 0.783396422863006591796875 0 +v 0.9062311649322509765625 0.816364109516143798828125 0 +v 0.98788893222808837890625 0.60332286357879638671875 0 +v 0.94782161712646484375 0.728558599948883056640625 0 +v 0.9838535785675048828125 0.697330057621002197265625 0 +v 0.706871688365936279296875 0.649534046649932861328125 0 +v 0.9279758930206298828125 0.866860687732696533203125 0 +v 0.88826477527618408203125 0.860310494899749755859375 0 +v 0.773258984088897705078125 0.56769263744354248046875 0 +v 0.805462300777435302734375 0.58610785007476806640625 0 +v 0.787915527820587158203125 0.555294036865234375 0 +v 0.887700557708740234375 0.638608992099761962890625 0 +v 0.786188304424285888671875 0.748022615909576416015625 0 +v 0.800798475742340087890625 0.54884624481201171875 0 +v 0.8843600749969482421875 0.769155323505401611328125 0 +v 0.821903765201568603515625 0.700528442859649658203125 0 +v 0.823471486568450927734375 0.866134583950042724609375 0 +v 0.768194496631622314453125 0.52771461009979248046875 0 +v 0.766590893268585205078125 0.688683211803436279296875 0 +v 0.707982122898101806640625 0.767209827899932861328125 0 +v 0.793281495571136474609375 0.659970343112945556640625 0 +v 0.965734004974365234375 0.62012577056884765625 0 +v 0.88728296756744384765625 0.765069782733917236328125 0 +v 0.99036824703216552734375 0.91215074062347412109375 0 +v 0.89863741397857666015625 0.53717005252838134765625 0 +v 0.88476455211639404296875 0.701977431774139404296875 0 +v 0.9078981876373291015625 0.709050595760345458984375 0 +v 0.835645854473114013671875 0.48345887660980224609375 0 +v 0.88118803501129150390625 0.9164187908172607421875 0 +v 0.98343372344970703125 0.90213096141815185546875 0 +v 0.769046843051910400390625 0.677894771099090576171875 0 +v 0.92961847782135009765625 0.667214572429656982421875 0 +v 0.798078835010528564453125 0.55099380016326904296875 0 +v 0.98046624660491943359375 0.58866202831268310546875 0 +v 0.710058152675628662109375 0.789869368076324462890625 0 +v 0.862559020519256591796875 0.5862772464752197265625 0 +v 0 0.9999997615814208984375 0 +v 0.034482769668102264404296875 0.9999997615814208984375 0 +v 0.06896547973155975341796875 0.9999997615814208984375 0 +v 0.103448249399662017822265625 0.9999997615814208984375 0 +v 0.1379310190677642822265625 0.9999997615814208984375 0 +v 0.17241372168064117431640625 0.9999997615814208984375 0 +v 0 0.655172288417816162109375 0 +v 0 0.689655005931854248046875 0 +v 0 0.724137723445892333984375 0 +v 0 0.758620560169219970703125 0 +v 0 0.793103277683258056640625 0 +v 0 0.827585995197296142578125 0 +v 0 0.862068831920623779296875 0 +v 0 0.89655148983001708984375 0 +v 0 0.93103420734405517578125 0 +v 0 0.9655170440673828125 0 +v 0.02021836675703525543212890625 0.832619607448577880859375 0 +v 0.1182744204998016357421875 0.639920890331268310546875 0 +v 0.1433532536029815673828125 0.94466865062713623046875 0 +v 0.06414745748043060302734375 0.692471921443939208984375 0 +v 0.1317978203296661376953125 0.716327011585235595703125 0 +v 0.100226856768131256103515625 0.91948235034942626953125 0 +v 0.1494482457637786865234375 0.868125855922698974609375 0 +v 0.1238199770450592041015625 0.848008096218109130859375 0 +v 0.16469408571720123291015625 0.6214783191680908203125 0 +v 0.18619294464588165283203125 0.9443721771240234375 0 +v 0.02065097726881504058837890625 0.758378446102142333984375 0 +v 0.18563587963581085205078125 0.9527914524078369140625 0 +v 0.0164295993745326995849609375 0.92952907085418701171875 0 +v 0.097844459116458892822265625 0.862191379070281982421875 0 +v 0.18115092813968658447265625 0.788545310497283935546875 0 +v 0.05684803426265716552734375 0.696997106075286865234375 0 +v 0.18713082373142242431640625 0.90398371219635009765625 0 +v 0.033304624259471893310546875 0.9589824676513671875 0 +v 0.099614895880222320556640625 0.9453012943267822265625 0 +v 0.1154842674732208251953125 0.61848008632659912109375 0 +v 0.241418540477752685546875 0.662504374980926513671875 0 +v 0.24606311321258544921875 0.665858924388885498046875 0 +v 0.030235223472118377685546875 0.710336625576019287109375 0 +v 0.00270318915136158466339111328125 0.647196471691131591796875 0 +v 0.16046757996082305908203125 0.886304378509521484375 0 +v 0.16023041307926177978515625 0.661117374897003173828125 0 +v 0.06107853353023529052734375 0.9077327251434326171875 0 +v 0.029950313270092010498046875 0.737254083156585693359375 0 +v 0.06560485064983367919921875 0.856275975704193115234375 0 +v 0.028952531516551971435546875 0.89569103717803955078125 0 +v 0.0468063242733478546142578125 0.970731258392333984375 0 +v 0.0399927981197834014892578125 0.639704883098602294921875 0 +v 0.21539814770221710205078125 0.858337461948394775390625 0 +v 0.107210732996463775634765625 0.817338883876800537109375 0 +v 0.0051529393531382083892822265625 0.694157660007476806640625 0 +v 0.1415064632892608642578125 0.865945279598236083984375 0 +v 0.06885193288326263427734375 0.705709874629974365234375 0 +v 0.220103800296783447265625 0.62289392948150634765625 0 +v 0.06918261945247650146484375 0.8791730403900146484375 0 +v 0.21383531391620635986328125 0.771225273609161376953125 0 +v 0.2286465167999267578125 0.677141010761260986328125 0 +v 0.0439754016697406768798828125 0.87952125072479248046875 0 +v 0.235834181308746337890625 0.62049973011016845703125 0 +v 0.0510754473507404327392578125 0.96551644802093505859375 0 +v 0.000664233986753970384597778320312 0.9416675567626953125 0 +v 0.1409574449062347412109375 0.96733760833740234375 0 +v 0.0429061539471149444580078125 0.700855433940887451171875 0 +v 0.100251711905002593994140625 0.758984386920928955078125 0 +v 0.0170604549348354339599609375 0.96705472469329833984375 0 +v 0.221218049526214599609375 0.683726251125335693359375 0 +v 0.103990830481052398681640625 0.633981525897979736328125 0 +v 0.098610319197177886962890625 0.801074683666229248046875 0 +v 0.0411797650158405303955078125 0.816420853137969970703125 0 +v 0.02476906217634677886962890625 0.831030905246734619140625 0 +v 0.0445710904896259307861328125 0.91328334808349609375 0 +v 0.1474000513553619384765625 0.756287038326263427734375 0 +v 0.20018984377384185791015625 0.673653066158294677734375 0 +v 0.19323624670505523681640625 0.685466945171356201171875 0 +v 0.0162528119981288909912109375 0.642803251743316650390625 0 +v 0.038044683635234832763671875 0.95654392242431640625 0 +v 0.105094648897647857666015625 0.98257362842559814453125 0 +v 0.1274988353252410888671875 0.840640723705291748046875 0 +v 0.758620560169219970703125 0 0 +v 0.793103277683258056640625 0 0 +v 0.827585995197296142578125 0 0 +v 0.862068831920623779296875 0 0 +v 0.89655148983001708984375 0 0 +v 0.93103420734405517578125 0 0 +v 0.9655170440673828125 0 0 +v 0.9999997615814208984375 0 0 +v 0.9999997615814208984375 0.034482769668102264404296875 0 +v 0.9999997615814208984375 0.06896547973155975341796875 0 +v 0.9999997615814208984375 0.103448249399662017822265625 0 +v 0.9999997615814208984375 0.1379310190677642822265625 0 +v 0.9999997615814208984375 0.17241372168064117431640625 0 +v 0.98837363719940185546875 0.102044798433780670166015625 0 +v 0.820993006229400634765625 0.097101248800754547119140625 0 +v 0.837944686412811279296875 0.096098400652408599853515625 0 +v 0.739263355731964111328125 0.0391877777874469757080078125 0 +v 0.828939855098724365234375 0.004695474170148372650146484375 0 +v 0.863855421543121337890625 0.1175318062305450439453125 0 +v 0.90371954441070556640625 0.083422400057315826416015625 0 +v 0.770580589771270751953125 0.1469466388225555419921875 0 +v 0.799795687198638916015625 0.07695643603801727294921875 0 +v 0.815523684024810791015625 0.15941442549228668212890625 0 +v 0.98940956592559814453125 0.06530420482158660888671875 0 +v 0.92496669292449951171875 0.0460073240101337432861328125 0 +v 0.862318336963653564453125 0.0486903078854084014892578125 0 +v 0.96277010440826416015625 0.0168716870248317718505859375 0 +v 0.790839970111846923828125 0.097242929041385650634765625 0 +v 0.91978251934051513671875 0.036033801734447479248046875 0 +v 0.97049343585968017578125 0.1334393918514251708984375 0 +v 0.862062394618988037109375 0.097257949411869049072265625 0 +v 0.859580695629119873046875 0.05853490531444549560546875 0 +v 0.9591419696807861328125 0.103298105299472808837890625 0 +v 0.867166936397552490234375 0.029190234839916229248046875 0 +v 0.802893221378326416015625 0.15914620459079742431640625 0 +v 0.9148628711700439453125 0.0493489392101764678955078125 0 +v 0.8989608287811279296875 0.18551786243915557861328125 0 +v 0.89986646175384521484375 0.05687724053859710693359375 0 +v 0.980485439300537109375 0.096450842916965484619140625 0 +v 0.9031493663787841796875 0.0429241545498371124267578125 0 +v 0.99980831146240234375 0.19725944101810455322265625 0 +v 0.9958150386810302734375 0.1293557584285736083984375 0 +v 0.859002411365509033203125 0.1520272195339202880859375 0 +v 0.96239483356475830078125 0.1105222404003143310546875 0 +v 0.94462549686431884765625 0.1355483829975128173828125 0 +v 0.807563602924346923828125 0.0510073192417621612548828125 0 +v 0.992666721343994140625 0.1186154782772064208984375 0 +v 0.96509468555450439453125 0.1512301862239837646484375 0 +v 0.99607098102569580078125 0.100233413279056549072265625 0 +v 0.9608123302459716796875 0.06512145698070526123046875 0 +v 0.944032192230224609375 0.036691419780254364013671875 0 +v 0.98327457904815673828125 0.030426494777202606201171875 0 +v 0.87545108795166015625 0.07382623851299285888671875 0 +v 0.20689649879932403564453125 0.9999997615814208984375 0 +v 0.241379261016845703125 0.9999997615814208984375 0 +v 0.275862038135528564453125 0.9999997615814208984375 0 +v 0.310344755649566650390625 0.9999997615814208984375 0 +v 0.3448275029659271240234375 0.9999997615814208984375 0 +v 0.3793102800846099853515625 0.9999997615814208984375 0 +v 0.4137929975986480712890625 0.9999997615814208984375 0 +v 0.448275744915008544921875 0.9999997615814208984375 0 +v 0.48275852203369140625 0.9999997615814208984375 0 +v 0.5172412395477294921875 0.9999997615814208984375 0 +v 0.4236547052860260009765625 0.645893990993499755859375 0 +v 0.437587082386016845703125 0.891772747039794921875 0 +v 0.461479246616363525390625 0.780528962612152099609375 0 +v 0.26455557346343994140625 0.774233520030975341796875 0 +v 0.456150233745574951171875 0.56843388080596923828125 0 +v 0.3185688555240631103515625 0.667410194873809814453125 0 +v 0.2230815887451171875 0.952748775482177734375 0 +v 0.447125256061553955078125 0.846408545970916748046875 0 +v 0.4238549768924713134765625 0.6063930988311767578125 0 +v 0.4358648359775543212890625 0.89192306995391845703125 0 +v 0.3453516066074371337890625 0.928081035614013671875 0 +v 0.31179583072662353515625 0.696343362331390380859375 0 +v 0.536579132080078125 0.8966710567474365234375 0 +v 0.273541986942291259765625 0.798046648502349853515625 0 +v 0.3556126654148101806640625 0.94043171405792236328125 0 +v 0.3381588757038116455078125 0.96156990528106689453125 0 +v 0.231701552867889404296875 0.94931852817535400390625 0 +v 0.2930202484130859375 0.848943412303924560546875 0 +v 0.497391223907470703125 0.639472305774688720703125 0 +v 0.4205393493175506591796875 0.55736863613128662109375 0 +v 0.5361773967742919921875 0.681392371654510498046875 0 +v 0.3926756083965301513671875 0.95640552043914794921875 0 +v 0.4581387042999267578125 0.59098398685455322265625 0 +v 0.258683979511260986328125 0.849038183689117431640625 0 +v 0.4012593924999237060546875 0.92929112911224365234375 0 +v 0.3605454862117767333984375 0.828656733036041259765625 0 +v 0.2965562343597412109375 0.99201095104217529296875 0 +v 0.3742960989475250244140625 0.748788058757781982421875 0 +v 0.50238931179046630859375 0.9425833225250244140625 0 +v 0.3828079402446746826171875 0.89538562297821044921875 0 +v 0.274823486804962158203125 0.592230319976806640625 0 +v 0.5249702930450439453125 0.750594794750213623046875 0 +v 0.3335073888301849365234375 0.924158573150634765625 0 +v 0.530537128448486328125 0.9221112728118896484375 0 +v 0.5027205944061279296875 0.9223477840423583984375 0 +v 0.44639432430267333984375 0.9078752994537353515625 0 +v 0.304446280002593994140625 0.99796199798583984375 0 +v 0.3782450854778289794921875 0.97952663898468017578125 0 +v 0.3972566425800323486328125 0.99927771091461181640625 0 +v 0.3518929183483123779296875 0.721406519412994384765625 0 +v 0.4570586681365966796875 0.9799621105194091796875 0 +v 0.4237062633037567138671875 0.857124745845794677734375 0 +v 0.51814973354339599609375 0.842776715755462646484375 0 +v 0.4329841434955596923828125 0.528234004974365234375 0 +v 0.3494401872158050537109375 0.781479418277740478515625 0 +v 0.3925686776638031005859375 0.87837231159210205078125 0 +v 0.495970189571380615234375 0.681739747524261474609375 0 +v 0.267206966876983642578125 0.87739861011505126953125 0 +v 0.3574245870113372802734375 0.62166535854339599609375 0 +v 0.288569927215576171875 0.874399721622467041015625 0 +v 0.20825321972370147705078125 0.93239367008209228515625 0 +v 0.29288852214813232421875 0.715052425861358642578125 0 +v 0.4731428623199462890625 0.8822834491729736328125 0 +v 0.3735109269618988037109375 0.51563823223114013671875 0 +v 0.448369085788726806640625 0.56784594058990478515625 0 +v 0.484965503215789794921875 0.9011619091033935546875 0 +v 0.475588977336883544921875 0.820022284984588623046875 0 +v 0.3302669227123260498046875 0.813879907131195068359375 0 +v 0.21766282618045806884765625 0.97381842136383056640625 0 +v 0.48258411884307861328125 0.729827463626861572265625 0 +v 0.30417346954345703125 0.59106528759002685546875 0 +v 0.51492214202880859375 0.93841183185577392578125 0 +v 0.475826084613800048828125 0.708770215511322021484375 0 +v 0.224413573741912841796875 0.9536755084991455078125 0 +v 0.475324690341949462890625 0.9692056179046630859375 0 +v 0.3106291294097900390625 0.791227281093597412109375 0 +v 0.26464951038360595703125 0.97509443759918212890625 0 +v 0.3979185521602630615234375 0.774500787258148193359375 0 +v 0.295949757099151611328125 0.9292914867401123046875 0 +v 0.265905559062957763671875 0.828146398067474365234375 0 +v 0.3296510875225067138671875 0.791761219501495361328125 0 +v 0.3498975932598114013671875 0.706410348415374755859375 0 +v 0.3165501058101654052734375 0.826805055141448974609375 0 +v 0.4260022938251495361328125 0.89270687103271484375 0 +v 0.48627197742462158203125 0.58815133571624755859375 0 +v 0.3798938095569610595703125 0.56045043468475341796875 0 +v 0.308527886867523193359375 0.9421844482421875 0 +v 0.4582121372222900390625 0.757406413555145263671875 0 +v 0.3956335484981536865234375 0.95546567440032958984375 0 +v 0.4175390899181365966796875 0.781581580638885498046875 0 +v 0.259377300739288330078125 0.758193552494049072265625 0 +v 0.4598751068115234375 0.5736095905303955078125 0 +v 0.47332608699798583984375 0.675125777721405029296875 0 +v 0.29217731952667236328125 0.98298990726470947265625 0 +v 0.3203261792659759521484375 0.94680690765380859375 0 +v 0.4353533685207366943359375 0.795456349849700927734375 0 +v 0.4137801229953765869140625 0.686380088329315185546875 0 +v 0.3558890521526336669921875 0.810301959514617919921875 0 +v 0.3180285394191741943359375 0.9580669403076171875 0 +v 0.493406832218170166015625 0.657043516635894775390625 0 +v 0.461050093173980712890625 0.93516027927398681640625 0 +v 0.3970135748386383056640625 0.704401314258575439453125 0 +v 0.27155101299285888671875 0.60754525661468505859375 0 +v 0.458722770214080810546875 0.817453086376190185546875 0 +v 0.44287288188934326171875 0.89808750152587890625 0 +v 0.531494140625 0.802068293094635009765625 0 +v 0.3782677352428436279296875 0.657862007617950439453125 0 +v 0.3594530522823333740234375 0.900367259979248046875 0 +v 0.44679319858551025390625 0.836990177631378173828125 0 +v 0.4047735631465911865234375 0.6012771129608154296875 0 +v 0.3172601163387298583984375 0.97926998138427734375 0 +v 0.4283784329891204833984375 0.92315876483917236328125 0 +v 0.490966260433197021484375 0.717559278011322021484375 0 +v 0.49843466281890869140625 0.701785624027252197265625 0 +v 0.3486650884151458740234375 0.848832905292510986328125 0 +v 0.55172407627105712890625 0.9999997615814208984375 0 +v 0.58620679378509521484375 0.9999997615814208984375 0 +v 0.62068951129913330078125 0.9999997615814208984375 0 +v 0.655172288417816162109375 0.9999997615814208984375 0 +v 0.689655005931854248046875 0.9999997615814208984375 0 +v 0.724137723445892333984375 0.9999997615814208984375 0 +v 0.758620560169219970703125 0.9999997615814208984375 0 +v 0.793103277683258056640625 0.9999997615814208984375 0 +v 0.827585995197296142578125 0.9999997615814208984375 0 +v 0.862068831920623779296875 0.9999997615814208984375 0 +v 0.89655148983001708984375 0.9999997615814208984375 0 +v 0.93103420734405517578125 0.9999997615814208984375 0 +v 0.9655170440673828125 0.9999997615814208984375 0 +v 0.9999997615814208984375 0.9999997615814208984375 0 +v 0.9999997615814208984375 0.9655170440673828125 0 +v 0.548813343048095703125 0.715189158916473388671875 0 +v 0.5680444240570068359375 0.92559635639190673828125 0 +v 0.778156578540802001953125 0.870011985301971435546875 0 +v 0.575946331024169921875 0.929296016693115234375 0 +v 0.735193789005279541015625 0.96218836307525634765625 0 +v 0.574325084686279296875 0.653200685977935791015625 0 +v 0.714241087436676025390625 0.9988467693328857421875 0 +v 0.722055375576019287109375 0.866382181644439697265625 0 +v 0.58831703662872314453125 0.831048309803009033203125 0 +v 0.628981649875640869140625 0.872650444507598876953125 0 +v 0.669916331768035888671875 0.785152733325958251953125 0 +v 0.630447804927825927734375 0.874287784099578857421875 0 +v 0.9729192256927490234375 0.96083438396453857421875 0 +v 0.577542781829833984375 0.959433078765869140625 0 +v 0.796391308307647705078125 0.95916640758514404296875 0 +v 0.820766985416412353515625 0.908843517303466796875 0 +v 0.97425591945648193359375 0.9903447628021240234375 0 +v 0.814966261386871337890625 0.98549115657806396484375 0 +v 0.730708897113800048828125 0.88171994686126708984375 0 +v 0.633997499942779541015625 0.867289245128631591796875 0 +v 0.699574887752532958984375 0.9679653644561767578125 0 +v 0.6223843097686767578125 0.710528194904327392578125 0 +v 0.676242291927337646484375 0.8792345523834228515625 0 +v 0.5771400928497314453125 0.695269882678985595703125 0 +v 0.671956956386566162109375 0.94886076450347900390625 0 +v 0.696482241153717041015625 0.813678443431854248046875 0 +v 0.6232945919036865234375 0.886960506439208984375 0 +v 0.5413806438446044921875 0.92330586910247802734375 0 +v 0.829897224903106689453125 0.96828615665435791015625 0 +v 0.739883720874786376953125 0.89806210994720458984375 0 +v 0.591026782989501953125 0.659176290035247802734375 0 +v 0.637582480907440185546875 0.813053667545318603515625 0 +v 0.773083388805389404296875 0.958740711212158203125 0 +v 0.58969461917877197265625 0.745397865772247314453125 0 +v 0.848150193691253662109375 0.9358317852020263671875 0 +v 0.751021444797515869140625 0.9272115230560302734375 0 +v 0.690784633159637451171875 0.9873485565185546875 0 +v 0.8806779384613037109375 0.94502651691436767578125 0 +v 0.60021269321441650390625 0.96319711208343505859375 0 +v 0.57770931720733642578125 0.779179275035858154296875 0 +v 0.876026630401611328125 0.918546199798583984375 0 +v 0.55464494228363037109375 0.826861441135406494140625 0 +v 0.58863890171051025390625 0.753239929676055908203125 0 +v 0.639622032642364501953125 0.94854009151458740234375 0 +v 0.778276026248931884765625 0.848345100879669189453125 0 +v 0.94385063648223876953125 0.96492469310760498046875 0 +v 0.626706302165985107421875 0.727543413639068603515625 0 +v 0.652124464511871337890625 0.853245794773101806640625 0 +v 0.754900872707366943359375 0.88502156734466552734375 0 +v 0.55258119106292724609375 0.851808369159698486328125 0 +v 0.630831658840179443359375 0.9979937076568603515625 0 +v 0.800255477428436279296875 0.95556819438934326171875 0 +v 0.798689067363739013671875 0.92345535755157470703125 0 +v 0.720265686511993408203125 0.9253947734832763671875 0 +v 0.816150605678558349609375 0.9114506244659423828125 0 +v 0.5488491058349609375 0.815224826335906982421875 0 +v 0.98299884796142578125 0.9822475910186767578125 0 +v 0.693746864795684814453125 0.9163401126861572265625 0 +v 0.9550464153289794921875 0.97928607463836669921875 0 +v 0.677508175373077392578125 0.93786418437957763671875 0 +v 0.8830773830413818359375 0.9665749073028564453125 0 +v 0.774747431278228759765625 0.9942328929901123046875 0 +v 0.703188836574554443359375 0.94927966594696044921875 0 +v 0.694087207317352294921875 0.781192719936370849609375 0 +v 0.54335248470306396484375 0.739632308483123779296875 0 +v 0.58023774623870849609375 0.774879634380340576171875 0 +v 0.6174972057342529296875 0.98537838459014892578125 0 +v 0.750536501407623291015625 0.998022556304931640625 0 +v 0.5339777469635009765625 0.944202423095703125 0 +v 0.615276336669921875 0.792498767375946044921875 0 +v 0.671527564525604248046875 0.842973172664642333984375 0 +v 0.712892115116119384765625 0.830476224422454833984375 0 +v 0.667168676853179931640625 0.96420013904571533203125 0 +v 0.649656355381011962890625 0.88099777698516845703125 0 +v 0.5559375286102294921875 0.741602957248687744140625 0 +v 0.770543873310089111328125 0.9082481861114501953125 0 +v 0.738151371479034423828125 0.9064939022064208984375 0 +v 0.804878294467926025390625 0.9983546733856201171875 0 +v 0.91973769664764404296875 0.96076619625091552734375 0 +v 0.640564382076263427734375 0.688648164272308349609375 0 +v 0 0.20689649879932403564453125 0 +v 0 0.241379261016845703125 0 +v 0 0.275862038135528564453125 0 +v 0 0.310344755649566650390625 0 +v 0 0.3448275029659271240234375 0 +v 0 0.3793102800846099853515625 0 +v 0 0.4137929975986480712890625 0 +v 0 0.448275744915008544921875 0 +v 0 0.48275852203369140625 0 +v 0 0.5172412395477294921875 0 +v 0 0.55172407627105712890625 0 +v 0 0.58620679378509521484375 0 +v 0 0.62068951129913330078125 0 +v 0.0187898240983486175537109375 0.61763536930084228515625 0 +v 0.19658233225345611572265625 0.3687250912189483642578125 0 +v 0.3179830610752105712890625 0.4142629206180572509765625 0 +v 0.248753070831298828125 0.57615721225738525390625 0 +v 0.0191931687295436859130859375 0.301574766635894775390625 0 +v 0.1354740560054779052734375 0.29828226566314697265625 0 +v 0.16249288618564605712890625 0.61555945873260498046875 0 +v 0.011714098043739795684814453125 0.3599779903888702392578125 0 +v 0.05802915990352630615234375 0.4344165623188018798828125 0 +v 0.20747001469135284423828125 0.4246853888034820556640625 0 +v 0.2776286602020263671875 0.58678424358367919921875 0 +v 0.1448477208614349365234375 0.488056182861328125 0 +v 0.28173005580902099609375 0.586410045623779296875 0 +v 0.06395523250102996826171875 0.485627472400665283203125 0 +v 0.05537430942058563232421875 0.30159854888916015625 0 +v 0.2621181011199951171875 0.456140458583831787109375 0 +v 0.259422481060028076171875 0.3738130629062652587890625 0 +v 0.06271295249462127685546875 0.4240321815013885498046875 0 +v 0.0156060419976711273193359375 0.4287956058979034423828125 0 +v 0.06807403266429901123046875 0.251940906047821044921875 0 +v 0.23262691497802734375 0.3485192954540252685546875 0 +v 0.2724368572235107421875 0.3790567815303802490234375 0 +v 0.07086978852748870849609375 0.2927939891815185546875 0 +v 0.1523546874523162841796875 0.4174862802028656005859375 0 +v 0.1312892735004425048828125 0.6041176319122314453125 0 +v 0.248413383960723876953125 0.5058662891387939453125 0 +v 0.25364243984222412109375 0.44613540172576904296875 0 +v 0.104627884924411773681640625 0.3484759032726287841796875 0 +v 0.20492364466190338134765625 0.3416980206966400146484375 0 +v 0.00788408331573009490966796875 0.3726789653301239013671875 0 +v 0.089494504034519195556640625 0.4059422314167022705078125 0 +v 0.02431320585310459136962890625 0.3426108658313751220703125 0 +v 0.227759420871734619140625 0.4103014767169952392578125 0 +v 0.17477197945117950439453125 0.3891346156597137451171875 0 +v 0.17465831339359283447265625 0.3279879391193389892578125 0 +v 0.096803940832614898681640625 0.3433916270732879638671875 0 +v 0.06263600289821624755859375 0.2419016361236572265625 0 +v 0.1173155605792999267578125 0.27125203609466552734375 0 +v 0.1104770600795745849609375 0.4050355255603790283203125 0 +v 0.1097844541072845458984375 0.6063079833984375 0 +v 0.16212023794651031494140625 0.55968225002288818359375 0 +v 0.277340233325958251953125 0.524379730224609375 0 +v 0.300403594970703125 0.54950046539306640625 0 +v 0.1390726864337921142578125 0.4269042909145355224609375 0 +v 0.250398159027099609375 0.4833934307098388671875 0 +v 0.1124272644519805908203125 0.21243430674076080322265625 0 +v 0.18303324282169342041015625 0.4030258953571319580078125 0 +v 0.17700572311878204345703125 0.48351800441741943359375 0 +v 0.1403159797191619873046875 0.3589951694011688232421875 0 +v 0.1201201379299163818359375 0.3344736397266387939453125 0 +v 0.16235788166522979736328125 0.2908408641815185546875 0 +v 0.17979522049427032470703125 0.3455055654048919677734375 0 +v 0.1114960014820098876953125 0.45896971225738525390625 0 +v 0.012171146459877490997314453125 0.3228294551372528076171875 0 +v 0.22956740856170654296875 0.5068628787994384765625 0 +v 0.287544429302215576171875 0.456703484058380126953125 0 +v 0.02095007337629795074462890625 0.4116154015064239501953125 0 +v 0.1272939741611480712890625 0.525808811187744140625 0 +v 0.1418172419071197509765625 0.3167305886745452880859375 0 +v 0.02427267469465732574462890625 0.4301159083843231201171875 0 +v 0.20279599726200103759765625 0.4235875308513641357421875 0 +v 0.1280208528041839599609375 0.58319270610809326171875 0 +v 0.108165480196475982666015625 0.3923188745975494384765625 0 +v 0.102446235716342926025390625 0.3970257341861724853515625 0 +v 0.276649653911590576171875 0.50634276866912841796875 0 +v 0.02457701601088047027587890625 0.633986771106719970703125 0 +v 0.2991535663604736328125 0.3884040415287017822265625 0 +v 0.19899089634418487548828125 0.3674752414226531982421875 0 +v 0.0194624625146389007568359375 0.3992222845554351806640625 0 +v 0.20362086594104766845703125 0.56631147861480712890625 0 +v 0.1165585815906524658203125 0.3576389253139495849609375 0 +v 0.004654823802411556243896484375 0.4248538315296173095703125 0 +v 0.1397457420825958251953125 0.3305962383747100830078125 0 +v 0.0510530360043048858642578125 0.3312688171863555908203125 0 +v 0.0142515264451503753662109375 0.3421038091182708740234375 0 +v 0.305046617984771728515625 0.55798733234405517578125 0 +v 0.25805914402008056640625 0.5323202610015869140625 0 +v 0.16892607510089874267578125 0.3740625083446502685546875 0 +v 0.078246094286441802978515625 0.3712868392467498779296875 0 +v 0.28715264797210693359375 0.5482561588287353515625 0 +v 0.083791352808475494384765625 0.5161235332489013671875 0 +v 0.101107455790042877197265625 0.3183788359165191650390625 0 +v 0.16814209520816802978515625 0.5561330318450927734375 0 +v 0.20126672089099884033203125 0.487147986888885498046875 0 +v 0.1341736018657684326171875 0.3165410459041595458984375 0 +v 0.3214728534221649169921875 0.474184691905975341796875 0 +v 0.05790923535823822021484375 0.2913887500762939453125 0 +v 0.0423640497028827667236328125 0.227740943431854248046875 0 +v 0.221823990345001220703125 0.49394512176513671875 0 +v 0.0455106981098651885986328125 0.19798274338245391845703125 0 +v 0.1503497064113616943359375 0.558283329010009765625 0 +v 0.05975787341594696044921875 0.3502711355686187744140625 0 +v 0.0424544699490070343017578125 0.51448023319244384765625 0 +v 0.06859682500362396240234375 0.228907525539398193359375 0 +v 0 0 0 +v 0.034482769668102264404296875 0 0 +v 0.06896547973155975341796875 0 0 +v 0 0.034482769668102264404296875 0 +v 0 0.06896547973155975341796875 0 +v 0 0.103448249399662017822265625 0 +v 0 0.1379310190677642822265625 0 +v 0 0.17241372168064117431640625 0 +v 0.07103602588176727294921875 0.087129272520542144775390625 0 +v 0.3595078289508819580078125 0.4370318353176116943359375 0 +v 0.21038253605365753173828125 0.1289262473583221435546875 0 +v 0.3154282867908477783203125 0.3637106716632843017578125 0 +v 0.20887668430805206298828125 0.16130949556827545166015625 0 +v 0.15896953642368316650390625 0.1103751361370086669921875 0 +v 0.282806873321533203125 0.1201965510845184326171875 0 +v 0.296140134334564208984375 0.1187277138233184814453125 0 +v 0.2894060611724853515625 0.18319131433963775634765625 0 +v 0.227414548397064208984375 0.254356443881988525390625 0 +v 0.02467870153486728668212890625 0.06724964082241058349609375 0 +v 0.3200170695781707763671875 0.3834638297557830810546875 0 +v 0.253941595554351806640625 0.21331192553043365478515625 0 +v 0.3741699159145355224609375 0.463575303554534912109375 0 +v 0.1324876248836517333984375 0.0534271486103534698486328125 0 +v 0.079522050917148590087890625 0.089602984488010406494140625 0 +v 0.270327866077423095703125 0.1314827501773834228515625 0 +v 0.28351879119873046875 0.3799268901348114013671875 0 +v 0.4304023683071136474609375 0.5100166797637939453125 0 +v 0.277596056461334228515625 0.1288605630397796630859375 0 +v 0.240020215511322021484375 0.16053880751132965087890625 0 +v 0.3553687632083892822265625 0.3567068278789520263671875 0 +v 0.0163285098969936370849609375 0.18523229658603668212890625 0 +v 0.3267007768154144287109375 0.23274409770965576171875 0 +v 0.221160829067230224609375 0.25319111347198486328125 0 +v 0.249419987201690673828125 0.105906106531620025634765625 0 +v 0.23780715465545654296875 0.17185308039188385009765625 0 +v 0.310380756855010986328125 0.3730347454547882080078125 0 +v 0.0396155007183551788330078125 0.05994425714015960693359375 0 +v 0.3621889650821685791015625 0.470648825168609619140625 0 +v 0.283999919891357421875 0.23841321468353271484375 0 +v 0.4322814047336578369140625 0.52199614048004150390625 0 +v 0.1173204481601715087890625 0.107004143297672271728515625 0 +v 0.24155700206756591796875 0.16902537643909454345703125 0 +v 0.080531992018222808837890625 0.085310913622379302978515625 0 +v 0.1533688604831695556640625 0.19959612190723419189453125 0 +v 0.1173802912235260009765625 0.15984524786472320556640625 0 +v 0.003860353492200374603271484375 0.17857994139194488525390625 0 +v 0.247684955596923828125 0.3182334005832672119140625 0 +v 0.21645720303058624267578125 0.16604776680469512939453125 0 +v 0.102413751184940338134765625 0.15638329088687896728515625 0 +v 0.28283679485321044921875 0.3396309316158294677734375 0 +v 0.1478013098239898681640625 0.256916582584381103515625 0 +v 0.3165424764156341552734375 0.446876823902130126953125 0 +v 0.4014278352260589599609375 0.4166916906833648681640625 0 +v 0.17537201941013336181640625 0.1158984601497650146484375 0 +v 0.298187315464019775390625 0.1509348452091217041015625 0 +v 0.1403839290142059326171875 0.2273623943328857421875 0 +v 0.3223334252834320068359375 0.3165006339550018310546875 0 +v 0.254382312297821044921875 0.26530325412750244140625 0 +v 0.0053100571967661380767822265625 0.011355099268257617950439453125 0 +v 0.278325259685516357421875 0.18589754402637481689453125 0 +v 0.0020646448247134685516357421875 0.19891126453876495361328125 0 +v 0.230571210384368896484375 0.2687089443206787109375 0 +v 0.103578425943851470947265625 0.0180963836610317230224609375 0 +v 0.276337087154388427734375 0.3695234358310699462890625 0 +v 0.16981942951679229736328125 0.1483788788318634033203125 0 +v 0.18581672012805938720703125 0.104736067354679107666015625 0 +v 0.085794605314731597900390625 0.06268887221813201904296875 0 +v 0.2781164646148681640625 0.16931267082691192626953125 0 +v 0.02135866321623325347900390625 0.102316774427890777587890625 0 +v 0.23752307891845703125 0.27130603790283203125 0 +v 0.295891940593719482421875 0.303291857242584228515625 0 +v 0.219860732555389404296875 0.274295628070831298828125 0 +v 0.3135904967784881591796875 0.3655389249324798583984375 0 +v 0.1183494031429290771484375 0.02519028820097446441650390625 0 +v 0.20986039936542510986328125 0.1323056519031524658203125 0 +v 0.0494997389614582061767578125 0.101854600012302398681640625 0 +v 0.298750221729278564453125 0.255063712596893310546875 0 +v 0.3198337852954864501953125 0.19645096361637115478515625 0 +v 0.19362325966358184814453125 0.1122499406337738037109375 0 +v 0.1384368240833282470703125 0.19399078190326690673828125 0 +v 0.103448249399662017822265625 0 0 +v 0.1379310190677642822265625 0 0 +v 0.17241372168064117431640625 0 0 +v 0.20689649879932403564453125 0 0 +v 0.241379261016845703125 0 0 +v 0.275862038135528564453125 0 0 +v 0.19999648630619049072265625 0.0185217820107936859130859375 0 +v 0.240828692913055419921875 0.100293911993503570556640625 0 +v 0.1310552060604095458984375 0.012036201544106006622314453125 0 +v 0.20974989235401153564453125 0.1157031953334808349609375 0 +v 0.19056685268878936767578125 0.0191228948533535003662109375 0 +v 0.221396386623382568359375 0.100014068186283111572265625 0 +v 0.265039622783660888671875 0.06614945828914642333984375 0 +v 0.30419862270355224609375 0.07535903155803680419921875 0 +v 0.21145476400852203369140625 0.05938319861888885498046875 0 +v 0.26563251018524169921875 0.013508674688637256622314453125 0 +v 0.259600162506103515625 0.06007795035839080810546875 0 +v 0.284885466098785400390625 0.103988029062747955322265625 0 +v 0.9999997615814208984375 0.20689649879932403564453125 0 +v 0.9999997615814208984375 0.241379261016845703125 0 +v 0.9999997615814208984375 0.275862038135528564453125 0 +v 0.9999997615814208984375 0.310344755649566650390625 0 +v 0.9999997615814208984375 0.3448275029659271240234375 0 +v 0.9999997615814208984375 0.3793102800846099853515625 0 +v 0.9999997615814208984375 0.4137929975986480712890625 0 +v 0.9999997615814208984375 0.448275744915008544921875 0 +v 0.60276329517364501953125 0.5448830127716064453125 0 +v 0.96366250514984130859375 0.3834414184093475341796875 0 +v 0.570196628570556640625 0.43860137462615966796875 0 +v 0.9764592647552490234375 0.4686510562896728515625 0 +v 0.677816331386566162109375 0.270007908344268798828125 0 +v 0.59204185009002685546875 0.572251796722412109375 0 +v 0.699479043483734130859375 0.297436892986297607421875 0 +v 0.813797652721405029296875 0.3965056240558624267578125 0 +v 0.725254118442535400390625 0.50132429599761962890625 0 +v 0.660173356533050537109375 0.29007756710052490234375 0 +v 0.618015289306640625 0.4287686049938201904296875 0 +v 0.56996476650238037109375 0.59087264537811279296875 0 +v 0.652103126049041748046875 0.4314183294773101806640625 0 +v 0.89654636383056640625 0.3675617873668670654296875 0 +v 0.697428643703460693359375 0.45354259014129638671875 0 +v 0.729990422725677490234375 0.17162962257862091064453125 0 +v 0.793697535991668701171875 0.2239246368408203125 0 +v 0.53563272953033447265625 0.58990991115570068359375 0 +v 0.730121791362762451171875 0.3119449615478515625 0 +v 0.739550650119781494140625 0.490458667278289794921875 0 +v 0.679392635822296142578125 0.453696727752685546875 0 +v 0.99033868312835693359375 0.21689696609973907470703125 0 +v 0.716859519481658935546875 0.3960596024990081787109375 0 +v 0.55219233036041259765625 0.584475994110107421875 0 +v 0.96193611621856689453125 0.292147457599639892578125 0 +v 0.981829166412353515625 0.478370189666748046875 0 +v 0.822117507457733154296875 0.18984784185886383056640625 0 +v 0.672047674655914306640625 0.24536716938018798828125 0 +v 0.8820412158966064453125 0.45860385894775390625 0 +v 0.724167406558990478515625 0.3990252315998077392578125 0 +v 0.699621856212615966796875 0.3277203142642974853515625 0 +v 0.869488298892974853515625 0.454162299633026123046875 0 +v 0.638761579990386962890625 0.490305244922637939453125 0 +v 0.783234298229217529296875 0.288398444652557373046875 0 +v 0.706574499607086181640625 0.4148567616939544677734375 0 +v 0.95095241069793701171875 0.233420193195343017578125 0 +v 0.839188992977142333984375 0.237741768360137939453125 0 +v 0.994400501251220703125 0.4518215656280517578125 0 +v 0.8967609405517578125 0.4067332446575164794921875 0 +v 0.6003921031951904296875 0.58873951435089111328125 0 +v 0.9521424770355224609375 0.300028860569000244140625 0 +v 0.696462929248809814453125 0.247398674488067626953125 0 +v 0.672582089900970458984375 0.52893984317779541015625 0 +v 0.60724925994873046875 0.477646410465240478515625 0 +v 0.98342597484588623046875 0.3998015820980072021484375 0 +v 0.497776806354522705078125 0.581081867218017578125 0 +v 0.671383321285247802734375 0.3447180092334747314453125 0 +v 0.773455321788787841796875 0.456409454345703125 0 +v 0.759282290935516357421875 0.3645445406436920166015625 0 +v 0.99189007282257080078125 0.3767412006855010986328125 0 +v 0.675688922405242919921875 0.244889438152313232421875 0 +v 0.92275631427764892578125 0.294076621532440185546875 0 +v 0.9543335437774658203125 0.3519361317157745361328125 0 +v 0.811938345432281494140625 0.47938442230224609375 0 +v 0.733288943767547607421875 0.4097261130809783935546875 0 +v 0.727546870708465576171875 0.2899134159088134765625 0 +v 0.795590221881866455078125 0.3445303738117218017578125 0 +v 0.62116909027099609375 0.49817943572998046875 0 +v 0.810838401317596435546875 0.3481918275356292724609375 0 +v 0.757363975048065185546875 0.3145731985569000244140625 0 +v 0.657318770885467529296875 0.51732599735260009765625 0 +v 0.773109853267669677734375 0.21687020361423492431640625 0 +v 0.718626201152801513671875 0.3359774649143218994140625 0 +v 0.734813630580902099609375 0.17649932205677032470703125 0 +v 0.93916070461273193359375 0.5063121318817138671875 0 +v 0.719388902187347412109375 0.3499927818775177001953125 0 +v 0.823717534542083740234375 0.2327725887298583984375 0 +v 0.704947888851165771484375 0.4186367690563201904296875 0 +v 0.639462649822235107421875 0.52067768573760986328125 0 +v 0.61505794525146484375 0.5524389743804931640625 0 +v 0.956122875213623046875 0.3304404914379119873046875 0 +v 0.751032173633575439453125 0.1559778749942779541015625 0 +v 0.590585231781005859375 0.4355314671993255615234375 0 +v 0.664665400981903076171875 0.4230543673038482666015625 0 +v 0.668218076229095458984375 0.286716639995574951171875 0 +v 0.652999579906463623046875 0.3442890942096710205078125 0 +v 0.627160489559173583984375 0.50245296955108642578125 0 +v 0.93825590610504150390625 0.2445695400238037109375 0 +v 0.664196908473968505859375 0.4016880691051483154296875 0 +v 0.569286823272705078125 0.51208055019378662109375 0 +v 0.9717628955841064453125 0.3638446629047393798828125 0 +v 0.861590802669525146484375 0.3590969741344451904296875 0 +v 0.770127952098846435546875 0.5021054744720458984375 0 +v 0.793567240238189697265625 0.300651073455810546875 0 +v 0.845153868198394775390625 0.3827641308307647705078125 0 +v 0.867114365100860595703125 0.29426610469818115234375 0 +v 0.98244464397430419921875 0.4004484713077545166015625 0 +v 0.665871202945709228515625 0.4008794724941253662109375 0 +v 0.9568703174591064453125 0.277989864349365234375 0 +v 0.873319208621978759765625 0.444478809833526611328125 0 +v 0.50239312648773193359375 0.5400478839874267578125 0 +v 0.5265839099884033203125 0.53794562816619873046875 0 +v 0.9376628398895263671875 0.305188655853271484375 0 +v 0.771930634975433349609375 0.4130860269069671630859375 0 +v 0.799865305423736572265625 0.310930311679840087890625 0 +v 0.847307503223419189453125 0.4144564568996429443359375 0 +f 111 80 97 +f 97 80 58 +f 111 92 80 +f 45 111 97 +f 45 49 111 +f 69 92 111 +f 54 69 111 +f 45 14 49 +f 49 54 111 +f 75 54 49 +f 14 75 49 +f 85 68 54 +f 63 75 72 +f 72 75 14 +f 91 85 54 +f 73 63 72 +f 64 91 54 +f 134 73 72 +f 64 54 75 +f 64 75 57 +f 132 68 85 +f 57 75 63 +f 71 132 85 +f 73 57 63 +f 64 79 91 +f 90 57 73 +f 52 71 79 +f 93 132 71 +f 46 134 72 +f 43 90 98 +f 126 134 46 +f 134 90 73 +f 52 79 64 +f 71 85 91 +f 71 91 79 +f 52 64 133 +f 98 90 50 +f 90 43 57 +f 133 64 57 +f 43 133 57 +f 129 96 71 +f 71 96 93 +f 129 71 52 +f 113 129 52 +f 50 90 134 +f 43 106 133 +f 133 113 52 +f 40 50 134 +f 53 50 19 +f 98 106 43 +f 50 55 98 +f 19 50 40 +f 134 56 40 +f 50 53 55 +f 103 106 98 +f 106 113 133 +f 129 116 96 +f 103 98 55 +f 134 126 56 +f 36 103 55 +f 56 126 115 +f 84 116 38 +f 113 116 129 +f 103 17 106 +f 106 17 113 +f 36 55 53 +f 40 82 19 +f 19 82 53 +f 84 76 116 +f 116 76 96 +f 25 82 40 +f 25 40 56 +f 16 25 56 +f 38 116 113 +f 38 113 17 +f 104 17 103 +f 26 38 89 +f 74 76 84 +f 82 36 53 +f 16 56 115 +f 82 25 36 +f 26 84 38 +f 26 74 84 +f 28 16 115 +f 32 104 36 +f 89 38 17 +f 119 32 25 +f 32 36 25 +f 36 104 103 +f 25 16 119 +f 41 74 26 +f 27 41 26 +f 48 89 81 +f 104 89 17 +f 122 27 26 +f 48 122 89 +f 89 122 26 +f 27 112 41 +f 130 89 104 +f 112 27 66 +f 66 27 122 +f 31 131 32 +f 32 131 104 +f 48 66 122 +f 128 66 48 +f 131 130 104 +f 102 31 32 +f 131 31 130 +f 65 34 109 +f 35 34 112 +f 35 112 66 +f 34 65 112 +f 128 35 66 +f 107 67 130 +f 130 67 89 +f 109 34 35 +f 29 119 16 +f 31 20 130 +f 119 60 32 +f 61 81 67 +f 67 81 89 +f 128 109 35 +f 88 117 60 +f 60 117 32 +f 88 83 117 +f 117 83 32 +f 108 18 119 +f 119 18 60 +f 81 128 48 +f 120 88 60 +f 83 102 32 +f 88 102 83 +f 81 22 128 +f 102 20 31 +f 18 127 60 +f 88 77 102 +f 108 127 18 +f 95 65 109 +f 124 20 102 +f 110 107 20 +f 20 107 130 +f 37 95 109 +f 37 109 128 +f 77 120 70 +f 127 120 60 +f 105 61 67 +f 110 105 107 +f 107 105 67 +f 77 88 120 +f 77 124 102 +f 22 81 61 +f 124 110 20 +f 87 61 42 +f 87 22 61 +f 62 120 127 +f 42 61 105 +f 22 37 128 +f 124 23 110 +f 110 23 105 +f 114 23 124 +f 62 70 120 +f 77 70 124 +f 59 127 108 +f 78 37 22 +f 101 42 105 +f 87 118 22 +f 99 59 108 +f 5 118 42 +f 42 118 87 +f 101 105 23 +f 101 114 8 +f 21 114 124 +f 30 101 8 +f 114 101 23 +f 123 124 70 +f 62 127 59 +f 101 30 42 +f 51 62 15 +f 44 123 70 +f 44 47 123 +f 123 21 124 +f 15 62 59 +f 62 44 70 +f 15 59 121 +f 121 59 99 +f 51 44 62 +f 47 21 123 +f 100 21 47 +f 125 44 51 +f 15 24 51 +f 7 86 30 +f 30 86 42 +f 3 78 22 +f 86 6 42 +f 6 5 42 +f 8 114 21 +f 8 7 30 +f 86 7 6 +f 33 100 47 +f 33 47 44 +f 5 4 118 +f 10 33 11 +f 9 8 21 +f 9 21 100 +f 3 22 118 +f 4 3 118 +f 10 9 100 +f 3 2 78 +f 78 2 37 +f 121 24 15 +f 33 10 100 +f 94 24 121 +f 24 125 51 +f 11 33 44 +f 39 12 125 +f 12 11 44 +f 39 125 24 +f 2 1 37 +f 12 44 125 +f 39 24 94 +f 39 13 12 +f 220 181 168 +f 153 181 151 +f 185 212 151 +f 220 185 181 +f 181 185 151 +f 185 209 212 +f 209 184 212 +f 185 197 209 +f 197 182 220 +f 220 182 185 +f 171 197 177 +f 182 197 185 +f 196 184 209 +f 232 196 223 +f 177 197 220 +f 223 196 209 +f 177 220 168 +f 197 171 209 +f 234 177 168 +f 171 223 209 +f 247 225 159 +f 223 225 232 +f 225 223 159 +f 177 245 171 +f 233 245 177 +f 224 223 171 +f 225 247 149 +f 180 233 177 +f 159 228 247 +f 203 180 177 +f 235 171 245 +f 224 171 235 +f 247 228 149 +f 235 245 158 +f 224 159 223 +f 163 203 177 +f 158 233 180 +f 158 245 233 +f 242 149 228 +f 163 177 234 +f 163 234 249 +f 169 163 249 +f 230 183 235 +f 235 183 224 +f 250 205 159 +f 205 242 228 +f 158 180 203 +f 194 158 203 +f 205 228 159 +f 163 227 203 +f 169 227 163 +f 227 194 203 +f 158 230 235 +f 167 230 194 +f 194 230 158 +f 250 159 224 +f 191 169 206 +f 206 169 249 +f 227 167 194 +f 250 224 213 +f 213 224 183 +f 204 213 183 +f 250 155 205 +f 189 172 242 +f 201 189 242 +f 226 204 156 +f 213 155 250 +f 205 201 242 +f 204 183 156 +f 239 201 205 +f 204 155 213 +f 156 183 230 +f 167 227 192 +f 204 226 155 +f 155 239 205 +f 214 173 188 +f 198 167 237 +f 192 227 169 +f 167 156 230 +f 186 240 167 +f 167 240 156 +f 156 170 226 +f 214 239 173 +f 198 186 167 +f 239 155 173 +f 239 214 201 +f 169 191 192 +f 188 210 214 +f 214 210 201 +f 170 156 240 +f 241 240 186 +f 241 170 240 +f 161 155 226 +f 199 241 202 +f 198 241 186 +f 246 161 226 +f 246 226 170 +f 192 229 167 +f 166 195 216 +f 229 237 167 +f 199 170 241 +f 152 246 170 +f 195 198 237 +f 202 241 198 +f 195 237 229 +f 195 202 198 +f 199 152 170 +f 195 229 216 +f 236 173 161 +f 161 173 155 +f 157 161 246 +f 195 166 202 +f 218 152 199 +f 211 229 192 +f 218 199 202 +f 231 211 192 +f 152 157 246 +f 173 176 188 +f 248 176 173 +f 176 187 188 +f 154 139 217 +f 157 236 161 +f 154 236 139 +f 236 154 173 +f 236 157 140 +f 162 202 175 +f 162 218 202 +f 175 202 166 +f 139 236 140 +f 175 166 165 +f 216 229 211 +f 154 248 173 +f 222 216 211 +f 136 187 176 +f 221 208 216 +f 216 208 166 +f 142 218 162 +f 154 217 248 +f 248 137 176 +f 138 217 139 +f 243 222 207 +f 207 222 211 +f 138 137 248 +f 137 136 176 +f 217 138 248 +f 157 152 219 +f 152 218 219 +f 221 165 208 +f 208 165 166 +f 136 135 187 +f 219 218 142 +f 142 162 143 +f 219 140 157 +f 150 190 165 +f 142 141 219 +f 219 141 140 +f 165 190 175 +f 221 216 222 +f 243 221 222 +f 143 162 175 +f 243 200 221 +f 150 215 190 +f 190 215 175 +f 160 150 165 +f 144 143 215 +f 215 143 175 +f 145 144 150 +f 160 165 221 +f 150 144 215 +f 164 160 221 +f 146 145 160 +f 160 145 150 +f 179 164 221 +f 164 178 160 +f 174 221 200 +f 174 179 221 +f 164 147 178 +f 178 146 160 +f 147 164 179 +f 147 146 178 +f 238 244 174 +f 174 244 179 +f 244 147 179 +f 238 147 244 +f 193 148 238 +f 238 148 147 +f 309 287 288 +f 309 304 287 +f 323 307 287 +f 287 307 288 +f 316 307 323 +f 323 287 304 +f 275 323 304 +f 292 324 323 +f 323 324 316 +f 268 292 275 +f 275 292 323 +f 324 306 316 +f 322 306 324 +f 286 317 268 +f 271 324 292 +f 268 271 292 +f 270 271 268 +f 271 322 324 +f 273 299 281 +f 271 314 322 +f 322 300 281 +f 303 314 271 +f 270 268 317 +f 282 270 298 +f 270 303 271 +f 270 282 303 +f 319 318 314 +f 294 314 303 +f 314 318 322 +f 300 328 281 +f 318 300 322 +f 302 273 328 +f 328 273 281 +f 291 283 299 +f 298 313 282 +f 282 313 303 +f 295 280 300 +f 298 325 313 +f 274 302 328 +f 273 291 299 +f 302 291 273 +f 300 274 328 +f 278 276 269 +f 280 274 300 +f 301 289 313 +f 313 294 303 +f 276 283 269 +f 325 301 313 +f 259 294 289 +f 289 294 313 +f 290 257 325 +f 269 291 272 +f 280 302 274 +f 295 300 318 +f 257 301 325 +f 294 277 314 +f 257 258 301 +f 259 277 294 +f 269 283 291 +f 259 289 301 +f 258 259 301 +f 261 319 277 +f 277 319 314 +f 272 291 302 +f 319 295 318 +f 272 302 280 +f 259 260 277 +f 308 305 295 +f 320 295 319 +f 295 305 280 +f 305 272 280 +f 269 312 278 +f 261 320 319 +f 305 293 272 +f 260 261 277 +f 285 269 272 +f 285 312 269 +f 262 267 320 +f 320 308 295 +f 296 308 263 +f 267 308 320 +f 312 255 256 +f 293 285 272 +f 308 293 305 +f 261 262 320 +f 308 321 293 +f 327 312 285 +f 327 255 312 +f 296 321 308 +f 293 310 285 +f 263 308 267 +f 262 263 267 +f 310 327 285 +f 254 327 253 +f 310 293 321 +f 327 254 255 +f 279 321 296 +f 263 264 296 +f 326 310 321 +f 264 279 296 +f 279 326 321 +f 284 297 326 +f 326 297 310 +f 310 253 327 +f 279 284 326 +f 252 253 297 +f 297 253 310 +f 279 311 284 +f 284 315 297 +f 264 265 279 +f 265 311 279 +f 311 315 284 +f 251 252 315 +f 315 252 297 +f 265 266 311 +f 311 266 315 +f 266 251 315 +f 363 356 349 +f 356 99 349 +f 94 121 345 +f 343 356 363 +f 343 363 351 +f 345 121 99 +f 350 99 356 +f 343 351 344 +f 344 351 347 +f 347 351 371 +f 343 350 356 +f 350 345 99 +f 39 94 345 +f 343 374 350 +f 347 371 373 +f 359 348 381 +f 39 345 329 +f 359 347 348 +f 374 345 350 +f 347 359 344 +f 344 374 343 +f 373 365 376 +f 360 374 344 +f 329 13 39 +f 330 329 374 +f 374 329 345 +f 360 344 359 +f 373 371 365 +f 381 360 359 +f 381 354 360 +f 366 354 381 +f 348 347 373 +f 360 354 374 +f 361 348 373 +f 346 330 374 +f 369 341 376 +f 376 358 373 +f 340 358 376 +f 362 346 354 +f 366 381 348 +f 354 346 374 +f 364 366 348 +f 368 362 354 +f 332 331 346 +f 346 331 330 +f 375 372 358 +f 358 372 373 +f 367 361 372 +f 372 361 373 +f 366 368 354 +f 364 368 366 +f 362 332 346 +f 353 364 348 +f 333 332 362 +f 341 340 376 +f 378 353 348 +f 340 370 358 +f 333 362 368 +f 370 375 358 +f 364 357 368 +f 342 375 339 +f 378 348 361 +f 342 367 372 +f 353 357 364 +f 379 357 353 +f 375 342 372 +f 367 378 361 +f 377 342 339 +f 352 378 367 +f 339 375 370 +f 342 377 367 +f 340 339 370 +f 355 379 380 +f 334 333 357 +f 357 333 368 +f 379 353 378 +f 352 367 338 +f 338 367 377 +f 379 334 357 +f 339 338 377 +f 355 334 379 +f 379 378 380 +f 380 378 352 +f 337 380 352 +f 338 337 352 +f 380 335 355 +f 355 335 334 +f 336 335 380 +f 337 336 380 +f 435 446 396 +f 396 446 473 +f 414 466 473 +f 446 414 473 +f 58 435 396 +f 467 411 445 +f 435 411 446 +f 446 400 414 +f 445 411 435 +f 411 400 446 +f 414 410 466 +f 411 491 400 +f 467 491 411 +f 481 410 392 +f 392 410 414 +f 392 414 400 +f 488 392 491 +f 491 392 400 +f 392 474 481 +f 474 438 481 +f 452 440 467 +f 467 440 491 +f 392 478 474 +f 412 410 481 +f 438 412 481 +f 440 488 491 +f 454 495 438 +f 488 478 392 +f 454 438 474 +f 478 454 474 +f 454 494 495 +f 451 494 454 +f 463 483 488 +f 397 488 440 +f 488 483 478 +f 478 469 454 +f 469 451 454 +f 484 397 452 +f 452 397 440 +f 423 495 494 +f 397 463 488 +f 483 469 478 +f 469 423 451 +f 451 423 494 +f 459 469 483 +f 403 431 463 +f 463 431 483 +f 422 484 452 +f 397 403 463 +f 443 403 288 +f 436 419 431 +f 431 419 483 +f 288 403 397 +f 471 394 469 +f 469 394 423 +f 436 459 419 +f 419 459 483 +f 459 471 469 +f 288 397 484 +f 403 443 431 +f 417 477 471 +f 471 477 394 +f 487 423 394 +f 309 288 484 +f 448 487 394 +f 288 307 443 +f 436 431 457 +f 462 436 457 +f 459 417 471 +f 490 485 477 +f 477 485 394 +f 485 448 394 +f 457 431 443 +f 436 479 459 +f 462 479 436 +f 485 490 448 +f 434 487 448 +f 307 316 443 +f 433 490 477 +f 472 457 443 +f 462 449 479 +f 444 434 448 +f 433 399 490 +f 490 399 448 +f 316 472 443 +f 464 449 457 +f 457 449 462 +f 479 417 459 +f 449 417 479 +f 316 306 472 +f 306 395 472 +f 472 395 457 +f 417 433 477 +f 399 444 448 +f 405 464 457 +f 449 496 417 +f 417 437 433 +f 395 405 457 +f 306 405 395 +f 409 496 464 +f 464 496 449 +f 393 399 433 +f 393 444 399 +f 496 437 417 +f 281 306 322 +f 405 461 464 +f 489 421 437 +f 401 393 433 +f 465 401 433 +f 437 465 433 +f 306 461 405 +f 437 421 465 +f 465 486 401 +f 401 486 393 +f 393 486 444 +f 447 434 444 +f 461 409 464 +f 427 447 444 +f 489 437 496 +f 493 427 486 +f 486 427 444 +f 493 486 465 +f 447 426 404 +f 441 489 496 +f 416 493 465 +f 441 496 409 +f 299 415 461 +f 461 415 409 +f 299 461 306 +f 482 447 427 +f 482 426 447 +f 415 441 409 +f 416 465 421 +f 453 425 426 +f 415 439 441 +f 432 482 493 +f 493 482 427 +f 406 416 489 +f 489 416 421 +f 299 439 415 +f 424 402 489 +f 299 306 281 +f 420 453 426 +f 456 420 482 +f 482 420 426 +f 424 489 441 +f 416 470 493 +f 460 424 441 +f 439 460 441 +f 476 402 424 +f 407 406 402 +f 402 406 489 +f 413 470 416 +f 406 413 416 +f 432 456 482 +f 432 493 470 +f 408 460 439 +f 406 429 413 +f 468 476 424 +f 389 432 388 +f 480 475 492 +f 460 468 424 +f 480 468 475 +f 430 470 413 +f 456 391 420 +f 480 407 476 +f 476 407 402 +f 442 439 299 +f 468 480 476 +f 430 429 387 +f 407 429 406 +f 389 390 432 +f 432 390 456 +f 388 432 470 +f 390 391 456 +f 430 413 429 +f 430 388 470 +f 387 429 386 +f 480 492 407 +f 475 468 458 +f 386 429 407 +f 430 387 388 +f 276 442 283 +f 283 442 299 +f 442 408 439 +f 385 386 492 +f 492 386 407 +f 458 468 460 +f 408 458 460 +f 475 418 492 +f 450 458 408 +f 278 398 442 +f 442 398 408 +f 398 455 408 +f 384 418 475 +f 428 418 384 +f 428 492 418 +f 428 385 492 +f 384 475 458 +f 398 450 455 +f 383 384 458 +f 450 408 455 +f 428 384 385 +f 278 442 276 +f 278 450 398 +f 382 383 450 +f 450 383 458 +f 278 382 450 +f 256 382 278 +f 312 256 278 +f 517 410 412 +f 438 495 412 +f 535 542 517 +f 517 542 151 +f 535 517 412 +f 495 512 412 +f 512 535 412 +f 542 591 151 +f 576 512 423 +f 512 495 423 +f 512 586 535 +f 423 586 576 +f 576 586 512 +f 533 591 542 +f 533 542 535 +f 545 533 535 +f 168 153 591 +f 591 153 151 +f 554 545 586 +f 586 545 535 +f 533 558 591 +f 554 586 577 +f 577 586 551 +f 545 558 533 +f 554 558 545 +f 551 586 423 +f 554 581 558 +f 487 551 423 +f 577 581 554 +f 168 181 153 +f 168 591 558 +f 522 168 558 +f 487 567 551 +f 551 581 577 +f 553 567 434 +f 520 581 551 +f 434 567 487 +f 567 520 551 +f 561 520 553 +f 553 520 567 +f 581 522 558 +f 543 522 581 +f 520 543 581 +f 434 561 553 +f 582 522 543 +f 404 561 434 +f 575 234 522 +f 522 234 168 +f 249 575 537 +f 575 249 234 +f 537 575 522 +f 537 522 582 +f 543 520 531 +f 559 543 531 +f 531 520 521 +f 447 404 434 +f 559 582 543 +f 537 206 249 +f 521 520 538 +f 559 534 582 +f 538 520 561 +f 531 585 559 +f 538 523 521 +f 521 523 531 +f 583 206 537 +f 583 537 582 +f 519 583 582 +f 513 538 561 +f 523 585 531 +f 513 561 404 +f 538 585 523 +f 534 585 569 +f 426 425 404 +f 580 539 425 +f 425 539 404 +f 585 534 559 +f 583 519 206 +f 569 585 571 +f 580 513 539 +f 539 513 404 +f 525 515 513 +f 513 515 538 +f 453 580 425 +f 420 580 453 +f 530 519 534 +f 534 519 582 +f 420 391 580 +f 569 530 534 +f 519 191 206 +f 555 538 515 +f 580 525 513 +f 556 191 519 +f 497 525 580 +f 560 556 519 +f 555 585 538 +f 530 560 519 +f 550 555 515 +f 525 550 515 +f 498 550 525 +f 541 530 588 +f 541 560 530 +f 588 530 569 +f 555 571 585 +f 536 571 555 +f 587 514 560 +f 560 514 556 +f 565 588 569 +f 541 587 560 +f 584 536 555 +f 574 565 569 +f 574 569 571 +f 588 587 541 +f 562 584 555 +f 192 191 231 +f 231 556 514 +f 231 191 556 +f 578 555 550 +f 536 574 571 +f 391 497 580 +f 498 578 550 +f 532 574 536 +f 547 587 588 +f 532 536 584 +f 497 498 525 +f 578 562 555 +f 547 588 565 +f 516 547 565 +f 566 514 587 +f 566 231 514 +f 498 499 578 +f 574 516 565 +f 548 532 584 +f 499 562 578 +f 499 500 562 +f 562 500 584 +f 518 516 532 +f 532 516 574 +f 547 564 587 +f 500 548 584 +f 207 211 231 +f 544 564 547 +f 564 566 587 +f 546 527 566 +f 566 527 231 +f 500 501 548 +f 548 518 532 +f 516 544 547 +f 564 546 566 +f 579 544 516 +f 546 207 527 +f 527 207 231 +f 501 518 548 +f 501 502 518 +f 518 502 516 +f 526 563 544 +f 544 563 564 +f 573 526 544 +f 502 579 516 +f 502 503 579 +f 579 573 544 +f 540 546 563 +f 503 573 579 +f 563 546 564 +f 540 563 529 +f 243 552 549 +f 546 552 207 +f 549 552 546 +f 552 243 207 +f 529 563 526 +f 503 504 573 +f 573 504 526 +f 504 529 526 +f 572 549 546 +f 504 589 529 +f 572 546 540 +f 505 540 529 +f 504 505 589 +f 589 505 529 +f 506 572 540 +f 549 200 243 +f 505 506 540 +f 590 200 549 +f 590 549 572 +f 507 590 572 +f 193 174 200 +f 506 507 572 +f 557 200 590 +f 508 557 590 +f 193 238 174 +f 557 193 200 +f 507 508 590 +f 557 524 193 +f 508 570 557 +f 570 524 557 +f 524 148 193 +f 570 568 524 +f 524 511 148 +f 508 509 570 +f 509 528 570 +f 510 568 528 +f 528 568 570 +f 510 511 568 +f 568 511 524 +f 509 510 528 +f 680 467 445 +f 647 680 445 +f 690 647 445 +f 617 452 680 +f 680 452 467 +f 646 684 647 +f 647 684 680 +f 669 646 647 +f 684 617 680 +f 669 647 690 +f 684 615 617 +f 608 615 684 +f 617 422 452 +f 660 669 690 +f 608 422 615 +f 484 422 608 +f 615 422 617 +f 681 684 646 +f 630 681 669 +f 669 681 646 +f 631 620 660 +f 681 608 684 +f 649 669 660 +f 649 630 669 +f 681 674 608 +f 671 660 607 +f 620 649 660 +f 671 631 660 +f 620 631 649 +f 649 659 630 +f 626 631 671 +f 693 659 649 +f 630 659 681 +f 304 309 608 +f 608 309 484 +f 631 693 649 +f 674 304 608 +f 688 693 631 +f 674 681 659 +f 626 637 631 +f 687 674 659 +f 614 688 631 +f 693 688 659 +f 621 637 626 +f 665 614 637 +f 665 637 651 +f 637 614 631 +f 625 637 621 +f 614 665 688 +f 275 304 611 +f 665 652 688 +f 688 687 659 +f 652 687 688 +f 695 645 687 +f 687 645 674 +f 665 628 652 +f 611 304 674 +f 645 611 674 +f 662 695 687 +f 666 629 611 +f 672 637 625 +f 606 651 637 +f 652 616 687 +f 633 672 625 +f 656 606 672 +f 672 606 637 +f 629 275 611 +f 628 665 651 +f 648 616 652 +f 682 638 606 +f 606 638 651 +f 639 656 633 +f 666 611 645 +f 666 645 695 +f 685 662 616 +f 616 662 687 +f 682 628 638 +f 638 628 651 +f 662 666 695 +f 656 682 606 +f 656 672 633 +f 643 648 628 +f 628 648 652 +f 682 653 628 +f 644 286 629 +f 629 268 275 +f 644 629 666 +f 644 666 685 +f 286 268 629 +f 685 666 662 +f 656 653 682 +f 643 657 648 +f 648 657 616 +f 644 317 286 +f 298 317 644 +f 663 677 639 +f 677 656 639 +f 677 653 656 +f 667 643 628 +f 657 685 616 +f 618 685 657 +f 653 667 628 +f 643 635 657 +f 675 667 653 +f 683 668 667 +f 667 668 643 +f 655 663 639 +f 654 675 653 +f 654 653 677 +f 683 635 668 +f 668 635 643 +f 610 689 663 +f 663 689 677 +f 675 683 667 +f 689 654 677 +f 613 618 657 +f 685 697 644 +f 654 632 675 +f 635 622 657 +f 640 632 654 +f 655 610 663 +f 689 686 654 +f 686 640 654 +f 632 683 675 +f 683 622 635 +f 664 613 622 +f 622 613 657 +f 600 697 618 +f 618 697 685 +f 640 683 632 +f 610 686 689 +f 640 696 683 +f 642 686 610 +f 298 270 317 +f 670 298 605 +f 678 696 640 +f 605 298 644 +f 678 640 686 +f 673 661 622 +f 613 664 618 +f 697 605 644 +f 603 605 697 +f 670 325 298 +f 619 678 686 +f 661 664 622 +f 604 325 670 +f 604 670 605 +f 673 622 683 +f 600 601 697 +f 627 686 642 +f 696 673 683 +f 661 623 664 +f 601 602 697 +f 600 618 664 +f 602 603 697 +f 627 624 691 +f 627 642 624 +f 661 676 623 +f 599 600 664 +f 634 673 612 +f 599 664 623 +f 603 604 605 +f 604 290 325 +f 676 599 623 +f 604 257 290 +f 598 661 673 +f 598 676 661 +f 627 619 686 +f 597 598 673 +f 691 619 627 +f 636 696 678 +f 636 678 658 +f 676 598 599 +f 679 636 658 +f 612 673 696 +f 679 612 636 +f 636 612 696 +f 641 624 698 +f 658 678 609 +f 634 597 673 +f 596 597 634 +f 596 634 612 +f 678 619 609 +f 698 624 650 +f 624 594 691 +f 609 619 691 +f 679 596 612 +f 658 596 679 +f 594 609 691 +f 595 609 594 +f 658 595 596 +f 594 624 641 +f 609 595 658 +f 692 641 698 +f 692 594 641 +f 593 594 692 +f 592 593 692 +f 694 592 692 +f 58 738 435 +f 58 725 738 +f 80 725 58 +f 80 92 725 +f 445 738 725 +f 445 435 738 +f 720 445 725 +f 92 720 725 +f 69 720 92 +f 720 736 445 +f 720 708 736 +f 54 751 69 +f 69 708 720 +f 751 708 69 +f 54 68 751 +f 751 68 708 +f 690 445 736 +f 750 690 736 +f 718 607 708 +f 708 750 736 +f 607 750 708 +f 728 708 68 +f 750 660 690 +f 607 660 750 +f 93 728 132 +f 132 728 68 +f 728 718 708 +f 734 671 718 +f 734 718 771 +f 718 671 607 +f 771 718 710 +f 718 728 710 +f 710 728 755 +f 734 724 671 +f 755 728 93 +f 626 724 762 +f 762 724 734 +f 734 771 762 +f 748 771 710 +f 762 771 748 +f 724 626 671 +f 755 93 96 +f 748 621 762 +f 762 621 626 +f 76 755 96 +f 755 748 710 +f 755 769 748 +f 775 769 755 +f 748 625 621 +f 745 625 748 +f 756 745 769 +f 775 755 76 +f 769 745 748 +f 745 633 625 +f 730 775 76 +f 74 730 76 +f 41 730 74 +f 756 769 775 +f 770 633 745 +f 737 756 775 +f 730 737 775 +f 776 737 730 +f 760 768 716 +f 756 768 745 +f 768 770 745 +f 716 768 756 +f 770 639 633 +f 760 770 768 +f 731 770 760 +f 41 776 730 +f 719 716 756 +f 731 716 719 +f 112 776 41 +f 719 756 737 +f 716 731 760 +f 655 639 770 +f 758 719 737 +f 731 655 770 +f 715 737 776 +f 610 655 749 +f 753 715 776 +f 749 655 731 +f 112 65 776 +f 766 758 715 +f 715 758 737 +f 749 731 754 +f 753 766 715 +f 740 733 719 +f 726 753 714 +f 65 753 776 +f 746 731 719 +f 719 758 740 +f 740 766 727 +f 740 758 766 +f 727 766 723 +f 754 642 749 +f 749 642 610 +f 740 727 733 +f 733 746 719 +f 723 766 753 +f 711 746 773 +f 727 746 733 +f 714 753 65 +f 742 754 731 +f 726 723 753 +f 726 713 723 +f 714 713 726 +f 773 746 727 +f 773 727 709 +f 746 742 731 +f 711 742 746 +f 713 732 723 +f 763 778 742 +f 763 742 711 +f 742 778 754 +f 650 642 754 +f 727 723 732 +f 709 727 732 +f 743 650 778 +f 778 650 754 +f 773 763 711 +f 777 773 709 +f 752 763 773 +f 650 624 642 +f 694 698 650 +f 743 778 763 +f 764 752 777 +f 777 752 773 +f 712 743 763 +f 752 712 763 +f 694 692 698 +f 721 712 764 +f 764 712 752 +f 743 747 650 +f 739 747 743 +f 729 694 774 +f 747 694 650 +f 712 739 743 +f 721 739 712 +f 759 592 694 +f 744 729 706 +f 729 759 694 +f 774 694 747 +f 722 747 739 +f 729 744 759 +f 759 706 592 +f 741 722 739 +f 705 706 729 +f 744 706 759 +f 765 741 739 +f 722 774 747 +f 765 707 741 +f 741 707 722 +f 765 739 721 +f 707 774 722 +f 772 765 721 +f 717 767 774 +f 767 705 774 +f 774 705 729 +f 735 774 707 +f 735 707 765 +f 772 761 765 +f 703 704 767 +f 767 704 705 +f 701 735 765 +f 703 717 702 +f 735 717 774 +f 761 701 765 +f 702 717 735 +f 717 703 767 +f 700 702 735 +f 701 700 735 +f 700 757 702 +f 700 699 757 +f 757 699 702 +f 714 65 95 +f 796 713 714 +f 792 714 95 +f 792 796 714 +f 792 37 1 +f 732 713 796 +f 786 732 791 +f 37 792 95 +f 786 709 732 +f 791 732 796 +f 786 788 709 +f 788 777 709 +f 786 790 788 +f 793 790 786 +f 791 796 792 +f 790 777 788 +f 795 791 794 +f 764 777 790 +f 793 764 790 +f 794 791 792 +f 791 795 786 +f 795 793 786 +f 783 793 795 +f 793 721 764 +f 1 794 792 +f 1 784 794 +f 794 783 795 +f 784 783 794 +f 783 785 793 +f 782 789 785 +f 785 789 793 +f 789 721 793 +f 783 782 785 +f 789 787 721 +f 782 781 789 +f 787 772 721 +f 781 787 789 +f 781 780 787 +f 787 761 772 +f 779 761 787 +f 780 779 787 +f 779 701 761 +f 97 396 895 +f 396 473 895 +f 473 850 895 +f 473 466 850 +f 97 58 396 +f 850 896 895 +f 895 896 97 +f 410 822 850 +f 850 822 896 +f 410 850 466 +f 822 828 896 +f 822 816 828 +f 828 884 896 +f 896 45 97 +f 517 816 822 +f 828 810 884 +f 410 517 822 +f 884 45 896 +f 844 810 816 +f 816 810 828 +f 151 844 816 +f 810 805 884 +f 848 807 45 +f 810 874 805 +f 844 874 810 +f 848 45 884 +f 807 14 45 +f 848 884 862 +f 862 884 805 +f 517 151 816 +f 844 151 874 +f 881 805 873 +f 881 862 805 +f 807 72 14 +f 873 805 874 +f 862 837 848 +f 815 877 848 +f 848 877 807 +f 807 877 72 +f 865 873 847 +f 881 837 862 +f 873 837 881 +f 847 873 874 +f 873 865 837 +f 847 874 151 +f 817 815 848 +f 877 46 72 +f 817 848 837 +f 212 847 151 +f 825 817 837 +f 825 837 865 +f 815 46 877 +f 817 46 815 +f 813 825 865 +f 817 883 46 +f 813 819 825 +f 825 878 817 +f 892 883 878 +f 878 883 817 +f 892 878 872 +f 872 878 825 +f 883 126 46 +f 813 865 847 +f 212 184 847 +f 184 813 847 +f 859 872 819 +f 819 872 825 +f 880 126 883 +f 880 883 892 +f 196 813 184 +f 834 839 859 +f 872 839 892 +f 196 824 813 +f 813 824 819 +f 851 880 892 +f 827 851 892 +f 834 827 839 +f 839 827 892 +f 859 839 872 +f 827 870 851 +f 852 859 819 +f 852 819 824 +f 196 887 824 +f 149 887 232 +f 232 887 196 +f 126 880 814 +f 115 126 814 +f 880 851 814 +f 858 852 887 +f 887 852 824 +f 870 827 834 +f 870 834 853 +f 853 834 859 +f 870 835 851 +f 225 149 232 +f 835 814 851 +f 149 858 887 +f 870 867 835 +f 835 879 814 +f 853 867 870 +f 811 879 835 +f 814 28 115 +f 812 898 852 +f 852 898 859 +f 242 858 149 +f 823 835 867 +f 823 811 835 +f 879 28 814 +f 898 853 859 +f 809 28 879 +f 898 812 853 +f 811 809 879 +f 846 809 811 +f 832 16 28 +f 864 823 867 +f 864 860 823 +f 823 860 811 +f 809 832 28 +f 864 867 853 +f 855 832 809 +f 846 855 809 +f 858 812 852 +f 861 864 853 +f 860 846 811 +f 900 812 858 +f 29 846 866 +f 29 16 832 +f 29 832 855 +f 812 861 853 +f 866 846 860 +f 889 863 812 +f 172 858 242 +f 812 863 861 +f 861 899 864 +f 172 900 858 +f 894 900 172 +f 838 860 864 +f 846 29 855 +f 189 836 172 +f 890 899 863 +f 863 899 861 +f 888 838 864 +f 899 888 864 +f 833 836 189 +f 900 889 812 +f 886 889 818 +f 871 838 888 +f 833 894 836 +f 836 894 172 +f 833 189 201 +f 210 833 201 +f 843 889 900 +f 843 900 894 +f 889 886 863 +f 821 866 838 +f 843 894 833 +f 820 119 29 +f 866 860 838 +f 210 843 833 +f 888 899 890 +f 821 838 871 +f 866 868 29 +f 890 863 886 +f 866 876 868 +f 876 820 868 +f 868 820 29 +f 806 818 843 +f 808 843 210 +f 843 818 889 +f 187 869 188 +f 188 869 210 +f 818 890 886 +f 841 871 888 +f 349 876 866 +f 108 119 820 +f 890 841 888 +f 349 866 363 +f 866 821 831 +f 876 108 820 +f 869 808 210 +f 818 856 890 +f 831 821 871 +f 876 99 108 +f 371 831 365 +f 841 831 871 +f 882 841 890 +f 830 808 187 +f 830 187 135 +f 187 808 869 +f 831 363 866 +f 349 99 876 +f 806 843 891 +f 897 856 818 +f 831 351 363 +f 891 843 808 +f 371 351 831 +f 806 857 818 +f 875 897 818 +f 365 831 841 +f 830 842 808 +f 135 842 830 +f 885 875 857 +f 857 875 818 +f 885 857 806 +f 842 891 808 +f 803 849 891 +f 891 849 806 +f 854 885 806 +f 842 803 891 +f 849 854 806 +f 135 804 842 +f 801 875 885 +f 829 845 875 +f 875 845 897 +f 897 845 856 +f 804 803 842 +f 802 854 849 +f 800 829 875 +f 845 893 856 +f 803 802 849 +f 854 801 885 +f 882 890 856 +f 840 893 798 +f 829 893 845 +f 893 882 856 +f 882 365 841 +f 893 840 882 +f 802 801 854 +f 799 893 829 +f 882 840 365 +f 801 800 875 +f 800 799 829 +f 799 798 893 +f 798 826 840 +f 376 365 840 +f 798 797 826 +f 826 376 840 +f 369 376 826 +f 797 369 826 +f 797 341 369 From 6ebf9c7cbb804f97ef879abf81d9e5df56d673f4 Mon Sep 17 00:00:00 2001 From: ahmed Date: Mon, 1 Jul 2024 22:45:59 -0400 Subject: [PATCH 13/96] debugging bits --- include/rxmesh/cavity_manager.cuh | 2 +- include/rxmesh/cavity_manager_impl.cuh | 98 ++++++++++++++++++++++++++ include/rxmesh/rxmesh_dynamic.cu | 19 +++-- include/rxmesh/rxmesh_dynamic.h | 92 +++++++++++++++++++++++- include/rxmesh/util/macros.h | 26 +++++++ 5 files changed, 231 insertions(+), 6 deletions(-) diff --git a/include/rxmesh/cavity_manager.cuh b/include/rxmesh/cavity_manager.cuh index f797d649..d118d50e 100644 --- a/include/rxmesh/cavity_manager.cuh +++ b/include/rxmesh/cavity_manager.cuh @@ -700,7 +700,7 @@ struct CavityManager /** * @brief give a patch q, we store the corresponding element in p in - * s_correspondence. Thus, s_correspondence is indexing via q's index space + * s_correspondence. Thus, s_correspondence is indexed via q's index space */ template __device__ __inline__ void populate_correspondence( diff --git a/include/rxmesh/cavity_manager_impl.cuh b/include/rxmesh/cavity_manager_impl.cuh index 15f752b2..fee47b40 100644 --- a/include/rxmesh/cavity_manager_impl.cuh +++ b/include/rxmesh/cavity_manager_impl.cuh @@ -2949,6 +2949,62 @@ __device__ __inline__ bool CavityManager::migrate_from_patch( bool b2 = m_s_src_mask_e(e2); if (b0 || b1 || b2) { + + + // uint16_t v0q = q_patch_info.ev[2 * e0 + 0].id; + // uint16_t v1q = q_patch_info.ev[2 * e0 + 1].id; + // + // uint16_t v2q = (q_patch_info.ev[2 * e1 + 1].id == v0q || + // q_patch_info.ev[2 * e1 + 1].id == v1q) ? + // q_patch_info.ev[2 * e1 + 0].id : + // q_patch_info.ev[2 * e1 + 1].id; + // + // if (!(v0q != v1q && v0q != v2q && v1q != v2q)) { + // printf( + // "\n ## e0v=%u, %u, e1v=%u, %u, e2v=%u, %u, dirty + // = " + // "%d, is_locked = %d", + // q_patch_info.ev[2 * e0 + 0].id, + // q_patch_info.ev[2 * e0 + 1].id, + // q_patch_info.ev[2 * e1 + 0].id, + // q_patch_info.ev[2 * e1 + 1].id, + // q_patch_info.ev[2 * e2 + 0].id, + // q_patch_info.ev[2 * e2 + 1].id, + // m_context.m_patches_info[q].is_dirty(), + // m_context.m_patches_info[q].lock.is_locked()); + // } + // + // if ((!m_s_src_connect_mask_v(v0q) && + // !m_s_src_mask_v(v0q)) || + // (!m_s_src_connect_mask_v(v1q) && + // !m_s_src_mask_v(v1q)) && + // (!m_s_src_connect_mask_v(v2q) && + // !m_s_src_mask_v(v2q))) { + // + // printf("\n **v0q= %u, v1q= %u, v2q= %u", v0q, v1q, + // v2q); + // + // /*if (q_patch_info.is_owned(LocalVertexT(v0q))) { + // printf("\n **v0q = %f, %f, %f", + // coords(VertexHandle(q, v0q), 0), + // coords(VertexHandle(q, v0q), 1), + // coords(VertexHandle(q, v0q), 2)); + // } + // if (q_patch_info.is_owned(LocalVertexT(v1q))) { + // printf("\n **v1q = %f, %f, %f", + // coords(VertexHandle(q, v1q), 0), + // coords(VertexHandle(q, v1q), 1), + // coords(VertexHandle(q, v1q), 2)); + // } + // if (q_patch_info.is_owned(LocalVertexT(v2q))) { + // printf("\n **v2q = %f, %f, %f", + // coords(VertexHandle(q, v2q), 0), + // coords(VertexHandle(q, v2q), 1), + // coords(VertexHandle(q, v2q), 2)); + // }*/ + // } + + if (!b0) { assert(e0 < m_s_src_connect_mask_e.size()); m_s_src_connect_mask_e.set(e0, true); @@ -3232,6 +3288,48 @@ __device__ __inline__ LPPair CavityManager::migrate_edge( m_s_ev[2 * ep + 0] = v0p; m_s_ev[2 * ep + 1] = v1p; + // if (v0p == INVALID16 || v1p == INVALID16) { + // printf( + // "\n patch_id = %u, q= %u, v0q=%u, v1q= " + // "%u,v0p=%u, v1p= %u, q_edge= %u, o= %u, " + // "m_s_src_connect_mask_v(v0q)= " + // "%d,m_s_src_connect_mask_v(v1q)= %d, " + // "m_s_src_mask_v(v0q)= %u, m_s_src_mask_v(v1q)= %u, " + // "m_s_src_connect_mask_e=%d, " + // "m_s_src_mask_e=%d, " + // "q_patch_info.is_owned(v0q)= " + // "%d,q_patch_info.is_owned(v1q)= %d,", + // patch_id(), + // q, + // v0q, + // v1q, + // v0p, + // v1p, + // q_edge, + // o, + // m_s_src_connect_mask_v(v0q), + // m_s_src_connect_mask_v(v1q), + // m_s_src_mask_v(v0q), + // m_s_src_mask_v(v1q), + // m_s_src_connect_mask_e(q_edge), + // m_s_src_mask_e(q_edge), + // q_patch_info.is_owned(LocalVertexT(v0q)), + // q_patch_info.is_owned(LocalVertexT(v1q))); + // + // /*if (q_patch_info.is_owned(LocalVertexT(v0q))) { + // printf("\n v0q = %f, %f, %f", + // coords(VertexHandle(q, v0q), 0), + // coords(VertexHandle(q, v0q), 1), + // coords(VertexHandle(q, v0q), 2)); + // } + // if (q_patch_info.is_owned(LocalVertexT(v1q))) { + // printf("\n v1q = %f, %f, %f", + // coords(VertexHandle(q, v1q), 0), + // coords(VertexHandle(q, v1q), 1), + // coords(VertexHandle(q, v1q), 2)); + // }*/ + // } + // active bitmask is set in add_element // since it is owned by some other patch diff --git a/include/rxmesh/rxmesh_dynamic.cu b/include/rxmesh/rxmesh_dynamic.cu index d455e854..f655216c 100644 --- a/include/rxmesh/rxmesh_dynamic.cu +++ b/include/rxmesh/rxmesh_dynamic.cu @@ -70,6 +70,13 @@ __device__ __inline__ void hashtable_calibration(const Context context, LPPair lp = pi.get_lp().find(lid, nullptr, nullptr); + // if (lp.is_sentinel()) { + // printf("\n ## B=%u, T= %u, patch_id = %u, i= %u", + // blockIdx.x, + // threadIdx.x, + // pi.patch_id, + // i); + // } assert(!lp.is_sentinel()); owner = pi.patch_stash.get_patch(lp); @@ -2046,7 +2053,7 @@ __global__ static void check_ribbon_faces(const Context context, // vh.local_id(), // s_vf_offset[v_id], // s_vf_offset[v_id + 1]); - ::atomicAdd(d_check, 1); + ::atomicAdd(d_check, 1); break; } } @@ -2527,7 +2534,7 @@ bool RXMeshDynamic::validate() CUDA_ERROR(cudaFree(d_check)); - RXMESH_TRACE("RXMeshDynamic validatation finished"); + RXMESH_TRACE("RXMeshDynamic validation finished"); return success; } @@ -2857,7 +2864,7 @@ void RXMeshDynamic::update_host() RXMESH_TRACE("RXMeshDynamic updating host finished"); } -void RXMeshDynamic::update_polyscope() +void RXMeshDynamic::update_polyscope(std::string new_name) { #if USE_POLYSCOPE // for polyscope, we just remove the mesh and re-add it since polyscope does @@ -2865,7 +2872,11 @@ void RXMeshDynamic::update_polyscope() // if (this->m_polyscope_mesh_name.find("updated") != std::string::npos) { // polyscope::removeSurfaceMesh(this->m_polyscope_mesh_name, true); //} - this->m_polyscope_mesh_name = this->m_polyscope_mesh_name + "updated"; + if (new_name.empty()) { + this->m_polyscope_mesh_name = this->m_polyscope_mesh_name + "updated"; + } else { + this->m_polyscope_mesh_name = new_name; + } this->register_polyscope(); #endif } diff --git a/include/rxmesh/rxmesh_dynamic.h b/include/rxmesh/rxmesh_dynamic.h index 0df97196..355d4f9f 100644 --- a/include/rxmesh/rxmesh_dynamic.h +++ b/include/rxmesh/rxmesh_dynamic.h @@ -532,6 +532,96 @@ __global__ static void slice_patches(Context context, // s_new_num_faces); } + // check ribbons for new and old patch + + // auto check_ribbon = + // [](PatchInfo& info, char* name, PatchInfo& other_info) { + // // vertices + // for (uint16_t v = threadIdx.x; v < info.num_vertices[0]; + // v += blockThreads) { + // if (!info.is_deleted(LocalVertexT(v)) && + // !info.is_owned(LocalVertexT(v))) { + // + // LPPair lp = info.get_lp().find( + // v, nullptr, nullptr); + // + // if (lp.is_sentinel()) { + // printf( + // "\n @@ %s - vertex: B=%u, T= %u, patch_id " + // "= %u, v= %u, other_info.is_deleted= %d, " + // "other_info.is_owned= %d", + // name, + // blockIdx.x, + // threadIdx.x, + // info.patch_id, + // v, + // other_info.is_deleted(LocalVertexT(v)), + // other_info.is_owned(LocalVertexT(v))); + // } + // myAssert(!lp.is_sentinel()); + // } + // } + // + // + // // edges + // for (uint16_t e = threadIdx.x; e < info.num_edges[0]; + // e += blockThreads) { + // if (!info.is_deleted(LocalEdgeT(e)) && + // !info.is_owned(LocalEdgeT(e))) { + // + // LPPair lp = + // info.get_lp().find(e, nullptr, + // nullptr); + // + // if (lp.is_sentinel()) { + // printf( + // "\n @@ %s - edge: B=%u, T= %u, patch_id = " + // "%u, e= %u, other_info.is_deleted= %d, " + // "other_info.is_owned= %d", + // name, + // blockIdx.x, + // threadIdx.x, + // info.patch_id, + // e, + // other_info.is_deleted(LocalEdgeT(e)), + // other_info.is_owned(LocalEdgeT(e))); + // } + // myAssert(!lp.is_sentinel()); + // } + // } + // + // + // // faces + // for (uint16_t f = threadIdx.x; f < info.num_faces[0]; + // f += blockThreads) { + // if (!info.is_deleted(LocalFaceT(f)) && + // !info.is_owned(LocalFaceT(f))) { + // + // LPPair lp = + // info.get_lp().find(f, nullptr, + // nullptr); + // + // if (lp.is_sentinel()) { + // printf( + // "\n @@ %s - face: B=%u, T= %u, patch_id = " + // "%u, f= %u, other_info.is_deleted= %d, " + // "other_info.is_owned= %d", + // name, + // blockIdx.x, + // threadIdx.x, + // info.patch_id, + // f, + // other_info.is_deleted(LocalFaceT(f)), + // other_info.is_owned(LocalFaceT(f))); + // } + // myAssert(!lp.is_sentinel()); + // } + // } + // }; + // + // check_ribbon(old_pi, "old_pi", new_pi); + // check_ribbon(new_pi, "new_pi", old_pi); + #endif } } @@ -1012,6 +1102,6 @@ class RXMeshDynamic : public RXMeshStatic * coordinates as well. Thus, a call to `move(DEVICE, HOST)` should be done * to RXMesh-stored vertex coordinates before calling this function. */ - void update_polyscope(); + void update_polyscope(std::string new_name = ""); }; } // namespace rxmesh \ No newline at end of file diff --git a/include/rxmesh/util/macros.h b/include/rxmesh/util/macros.h index cf418a78..b4cb241a 100644 --- a/include/rxmesh/util/macros.h +++ b/include/rxmesh/util/macros.h @@ -42,6 +42,32 @@ constexpr int MAX_OVERLAP_CAVITIES = 4; #define STRINGIFY(x) TOSTRING(x) #define TOSTRING(x) #x + +#ifndef myAssert +#ifdef __CUDA_ARCH__ +#define myAssert(condition) \ + if (!(condition)) { \ + printf( \ + "**********Assertion failed: %s, file %s, line %d, blockId= %d, " \ + "thread= %d\n", \ + #condition, \ + __FILE__, \ + __LINE__, \ + blockIdx.x, \ + threadIdx.x); /*asm("trap;");*/ \ + } +#else +#define myAssert(condition) \ + if (!(condition)) { \ + printf("**********Assertion failed: %s, file %s, line %d", \ + #condition, \ + __FILE__, \ + __LINE__); \ + } +#endif +#endif + + // CUDA_ERROR inline void HandleError(cudaError_t err, const char* file, int line) { From d0b4eabbc9941fc37c6f83ff9c2dc9e3a4e7f057 Mon Sep 17 00:00:00 2001 From: ahmed Date: Fri, 5 Jul 2024 17:28:22 -0400 Subject: [PATCH 14/96] attributes to dense matrix --- apps/MCF/mcf_sparse_matrix.cuh | 41 +++++---- include/rxmesh/attribute.h | 93 +++++++++++++++----- include/rxmesh/matrix/dense_matrix.cuh | 107 +++++++++++++++--------- include/rxmesh/matrix/sparse_matrix.cuh | 2 +- 4 files changed, 163 insertions(+), 80 deletions(-) diff --git a/apps/MCF/mcf_sparse_matrix.cuh b/apps/MCF/mcf_sparse_matrix.cuh index bdbdbd0a..3b817742 100644 --- a/apps/MCF/mcf_sparse_matrix.cuh +++ b/apps/MCF/mcf_sparse_matrix.cuh @@ -63,11 +63,10 @@ __global__ static void mcf_B_setup(const rxmesh::Context context, } template -__global__ static void mcf_A_X_setup( +__global__ static void mcf_A_setup( const rxmesh::Context context, const rxmesh::VertexAttribute coords, rxmesh::SparseMatrix A_mat, - rxmesh::DenseMatrix X_mat, const bool use_uniform_laplace, // for non-uniform const T time_step) { @@ -85,11 +84,6 @@ __global__ static void mcf_A_X_setup( uint32_t row_index = context.m_vertex_prefix[r_patch_id] + r_local_id; - // set up initial X matrix - X_mat(row_index, 0) = coords(p_id, 0); - X_mat(row_index, 1) = coords(p_id, 1); - X_mat(row_index, 2) = coords(p_id, 2); - // set up matrix A for (uint32_t v = 0; v < iter.size(); ++v) { VertexHandle r_id = iter[v]; @@ -155,9 +149,10 @@ void mcf_rxmesh_cusolver_chol(rxmesh::RXMeshStatic& rx, auto coords = rx.get_input_vertex_coordinates(); SparseMatrix A_mat(rx); - DenseMatrix X_mat(num_vertices, 3); DenseMatrix B_mat(num_vertices, 3); + std::shared_ptr> X_mat = coords->to_matrix(); + RXMESH_INFO("use_uniform_laplace: {}, time_step: {}", Arg.use_uniform_laplace, Arg.time_step); @@ -180,23 +175,22 @@ void mcf_rxmesh_cusolver_chol(rxmesh::RXMeshStatic& rx, LaunchBox launch_box_A_X; rx.prepare_launch_box({Op::VV}, launch_box_A_X, - (void*)mcf_A_X_setup, + (void*)mcf_A_setup, !Arg.use_uniform_laplace); - mcf_A_X_setup + mcf_A_setup <<>>(rx.get_context(), *coords, A_mat, - X_mat, Arg.use_uniform_laplace, Arg.time_step); // Solving the linear system using chol factorization and no reordering - A_mat.spmat_linear_solve(B_mat, X_mat, Solver::CHOL, Reorder::NONE); + A_mat.spmat_linear_solve(B_mat, *X_mat, Solver::CHOL, Reorder::NONE); - X_mat.move(rxmesh::DEVICE, rxmesh::HOST); + X_mat->move(rxmesh::DEVICE, rxmesh::HOST); const T tol = 0.001; T tmp_tol = tol; @@ -205,15 +199,15 @@ void mcf_rxmesh_cusolver_chol(rxmesh::RXMeshStatic& rx, uint32_t v_id = rx.map_to_global(vh); uint32_t v_linear_id = rx.linear_id(vh); - T a = X_mat(v_linear_id, 0); for (uint32_t i = 0; i < 3; ++i) { - tmp_tol = std::abs((X_mat(v_linear_id, i) - ground_truth[v_id][i]) / - ground_truth[v_id][i]); + tmp_tol = + std::abs(((*X_mat)(v_linear_id, i) - ground_truth[v_id][i]) / + ground_truth[v_id][i]); if (tmp_tol > tol) { RXMESH_WARN("val: {}, truth: {}, tol: {}\n", - X_mat(v_linear_id, i), + (*X_mat)(v_linear_id, i), ground_truth[v_id][i], tmp_tol); passed = false; @@ -222,5 +216,18 @@ void mcf_rxmesh_cusolver_chol(rxmesh::RXMeshStatic& rx, } }); + + rx.for_each_vertex(HOST, [&](const VertexHandle vh) { + uint32_t v_linear_id = rx.linear_id(vh); + + for (uint32_t i = 0; i < 3; ++i) { + (*coords)(vh, i) = (*X_mat)(v_linear_id, i); + } + }); + + // rx.export_obj("mcf_rxmesh_chol.obj", *coords); + // rx.get_polyscope_mesh()->updateVertexPositions(*coords); + // polyscope::show(); + EXPECT_TRUE(passed); } \ No newline at end of file diff --git a/include/rxmesh/attribute.h b/include/rxmesh/attribute.h index 530a3e0b..c980d997 100644 --- a/include/rxmesh/attribute.h +++ b/include/rxmesh/attribute.h @@ -7,6 +7,7 @@ #include "rxmesh/kernels/attribute.cuh" #include "rxmesh/kernels/collective.cuh" #include "rxmesh/kernels/util.cuh" +#include "rxmesh/matrix/dense_matrix.cuh" #include "rxmesh/patch_info.h" #include "rxmesh/rxmesh.h" #include "rxmesh/types.h" @@ -14,7 +15,6 @@ #include "rxmesh/util/log.h" #include "rxmesh/util/util.h" - #define GLM_ENABLE_EXPERIMENTAL #include #include @@ -25,6 +25,7 @@ class RXMeshTest; namespace rxmesh { +class RXMeshStatic; /** * @brief Base untyped attributes used as an interface for attribute container @@ -98,11 +99,11 @@ class Attribute : public AttributeBase * @param location where the attribute to be allocated * @param layout memory layout in case of num_attributes>1 */ - explicit Attribute(const char* name, - const uint32_t num_attributes, - locationT location, - const layoutT layout, - const RXMesh* rxmesh) + explicit Attribute(const char* name, + const uint32_t num_attributes, + locationT location, + const layoutT layout, + const RXMeshStatic* rxmesh) : AttributeBase(), m_rxmesh(rxmesh), m_h_patches_info(rxmesh->m_h_patches_info), @@ -155,10 +156,12 @@ class Attribute : public AttributeBase return this->operator()(m_rxmesh->map_to_local_face(i), j); } } + size_t rows() const { return size(); } + size_t cols() const { return this->get_num_attributes(); @@ -179,6 +182,50 @@ class Attribute : public AttributeBase } } + /** + * @brief convert the attributes stored into a dense matrix where number of + * rows represent the number of mesh elements of this attribute and number + * of columns is the number of attributes + */ + std::shared_ptr> to_matrix() + { + std::shared_ptr> mat = + std::make_shared>(rows(), cols()); + + if constexpr (std::is_same_v) { + m_rxmesh->for_each_vertex(HOST, [&](const VertexHandle vh) { + uint32_t i = m_rxmesh->linear_id(vh); + + for (uint32_t j = 0; j < cols(); ++j) { + (*mat)(i, j) = this->operator()(vh, j); + } + }); + } + + if constexpr (std::is_same_v) { + m_rxmesh->for_each_edge(HOST, [&](const EdgeHandle eh) { + uint32_t i = m_rxmesh->linear_id(eh); + + for (uint32_t j = 0; j < cols(); ++j) { + (*mat)(i, j) = this->operator()(eh, j); + } + }); + } + + if constexpr (std::is_same_v) { + m_rxmesh->for_each_face(HOST, [&](const FaceHandle fh) { + uint32_t i = m_rxmesh->linear_id(fh); + + for (uint32_t j = 0; j < cols(); ++j) { + (*mat)(i, j) = this->operator()(fh, j); + } + }); + } + + mat->move(HOST, DEVICE); + + return mat; + } /** * @brief get the number of elements in a patch. The element type @@ -680,18 +727,18 @@ class Attribute : public AttributeBase } } - const RXMesh* m_rxmesh; - const PatchInfo* m_h_patches_info; - const PatchInfo* m_d_patches_info; - char* m_name; - uint32_t m_num_attributes; - locationT m_allocated; - T** m_h_attr; - T** m_h_ptr_on_device; - T** m_d_attr; - uint32_t m_max_num_patches; - layoutT m_layout; - double m_memory_mega_bytes; + const RXMeshStatic* m_rxmesh; + const PatchInfo* m_h_patches_info; + const PatchInfo* m_d_patches_info; + char* m_name; + uint32_t m_num_attributes; + locationT m_allocated; + T** m_h_attr; + T** m_h_ptr_on_device; + T** m_d_attr; + uint32_t m_max_num_patches; + layoutT m_layout; + double m_memory_mega_bytes; constexpr static uint32_t m_block_size = 256; }; @@ -760,11 +807,11 @@ class AttributeContainer * @return a shared pointer to the attribute */ template - std::shared_ptr add(const char* name, - uint32_t num_attributes, - locationT location, - layoutT layout, - const RXMesh* rxmesh) + std::shared_ptr add(const char* name, + uint32_t num_attributes, + locationT location, + layoutT layout, + const RXMeshStatic* rxmesh) { if (does_exist(name)) { RXMESH_WARN( diff --git a/include/rxmesh/matrix/dense_matrix.cuh b/include/rxmesh/matrix/dense_matrix.cuh index 289b69df..4761036f 100644 --- a/include/rxmesh/matrix/dense_matrix.cuh +++ b/include/rxmesh/matrix/dense_matrix.cuh @@ -16,65 +16,95 @@ namespace rxmesh { template struct DenseMatrix { - DenseMatrix(IndexT row_size, IndexT col_size) - : m_row_size(row_size), - m_col_size(col_size), + DenseMatrix(IndexT num_rows, + IndexT num_cols, + locationT location = LOCATION_ALL) + : m_num_rows(num_rows), + m_num_cols(num_cols), m_dendescr(NULL), - m_allocated(LOCATION_NONE), + m_h_val(nullptr), + m_d_val(nullptr), m_col_pad_bytes(0), m_col_pad_idx(0) { - m_allocated = m_allocated | DEVICE; - IndexT col_data_bytes = m_row_size * sizeof(T); + IndexT col_data_bytes = m_num_rows * sizeof(T); if (MemAlignSize != 0 && col_data_bytes % MemAlignSize != 0) { m_col_pad_bytes = MemAlignSize - (col_data_bytes % MemAlignSize); m_col_pad_idx = m_col_pad_bytes / sizeof(T); } - CUDA_ERROR(cudaMalloc((void**)&m_d_val, bytes())); + allocate(location); CUSPARSE_ERROR(cusparseCreateDnMat(&m_dendescr, - m_row_size, - m_col_size, - m_row_size, // leading dim + m_num_rows, + m_num_cols, + m_num_rows, // leading dim m_d_val, CUDA_R_32F, CUSPARSE_ORDER_COL)); } + /** + * @brief return the leading dimension (row by default) + */ IndexT lead_dim() const { - return m_row_size; + return m_num_rows; } + /** + * @brief return number of rows + */ + IndexT rows() const + { + return m_num_rows; + } + + /** + * @brief return number of columns + */ + IndexT cols() const + { + return m_num_cols; + } + + /** + * @brief accessing a specific value in the matrix using the row and col + * index. Can be used on both host and device + */ __host__ __device__ T& operator()(const uint32_t row, const uint32_t col) { - assert(row < m_row_size); - assert(col < m_col_size); + assert(row < m_num_rows); + assert(col < m_num_cols); #ifdef __CUDA_ARCH__ - return m_d_val[col * (m_row_size + m_col_pad_idx) + row]; + return m_d_val[col * (m_num_rows + m_col_pad_idx) + row]; #else - return m_h_val[col * (m_row_size + m_col_pad_idx) + row]; + return m_h_val[col * (m_num_rows + m_col_pad_idx) + row]; #endif } - __host__ __device__ T& operator()(const uint32_t row, - const uint32_t col) const + /** + * @brief accessing a specific value in the matrix using the row and col + * index. Can be used on both host and device + */ + __host__ __device__ const T& operator()(const uint32_t row, + const uint32_t col) const { - assert(row < m_row_size); - assert(col < m_col_size); + assert(row < m_num_rows); + assert(col < m_num_cols); #ifdef __CUDA_ARCH__ - return m_d_val[col * (m_row_size + m_col_pad_idx) + row]; + return m_d_val[col * (m_num_rows + m_col_pad_idx) + row]; #else - return m_h_val[col * (m_row_size + m_col_pad_idx) + row]; + return m_h_val[col * (m_num_rows + m_col_pad_idx) + row]; #endif } /** - * @brief return the raw pointer based on the location specified + * @brief return the raw pointer based on the specified location (host vs. + * device) */ T* data(locationT location = DEVICE) const { @@ -87,21 +117,20 @@ struct DenseMatrix } assert(1 != 1); - return 0; + return nullptr; } /** - * @brief return the raw pointer to columns based on column index the - * location specified and + * @brief return the raw pointer pf a column. */ T* col_data(const uint32_t ld_idx, locationT location = DEVICE) const { if ((location & HOST) == HOST) { - return m_h_val + ld_idx * (m_row_size + m_col_pad_idx); + return m_h_val + ld_idx * (m_num_rows + m_col_pad_idx); } if ((location & DEVICE) == DEVICE) { - return m_d_val + ld_idx * (m_row_size + m_col_pad_idx); + return m_d_val + ld_idx * (m_num_rows + m_col_pad_idx); } if ((location & m_allocated) == location) { @@ -114,16 +143,16 @@ struct DenseMatrix } /** - * @brief return the total number bytes used by the array - */ + * @brief return the total number bytes used to allocate the matrix + */ IndexT bytes() const { - return (m_row_size + m_col_pad_idx) * m_col_size * sizeof(T); + return (m_num_rows + m_col_pad_idx) * m_num_cols * sizeof(T); } /** - * @brief move the data between host an device - */ + * @brief move the data between host and device + */ void move(locationT source, locationT target, cudaStream_t stream = NULL) { if (source == target) { @@ -161,8 +190,8 @@ struct DenseMatrix } /** - * @brief release the data on host or device - */ + * @brief release the data on host or device + */ void release(locationT location = LOCATION_ALL) { if (((location & HOST) == HOST) && ((m_allocated & HOST) == HOST)) { @@ -178,9 +207,10 @@ struct DenseMatrix } } + private: /** - * @brief allocate the data on host or device - */ + * @brief allocate the data on host or device + */ void allocate(locationT location) { if ((location & HOST) == HOST) { @@ -200,12 +230,11 @@ struct DenseMatrix } } - // TODO: something like attribute->move() cusparseDnMatDescr_t m_dendescr; locationT m_allocated; - IndexT m_row_size; - IndexT m_col_size; + IndexT m_num_rows; + IndexT m_num_cols; T* m_d_val; T* m_h_val; diff --git a/include/rxmesh/matrix/sparse_matrix.cuh b/include/rxmesh/matrix/sparse_matrix.cuh index b121bd5b..5b5a118c 100644 --- a/include/rxmesh/matrix/sparse_matrix.cuh +++ b/include/rxmesh/matrix/sparse_matrix.cuh @@ -556,7 +556,7 @@ struct SparseMatrix rxmesh::Solver solver, rxmesh::Reorder reorder) { - for (int i = 0; i < B_mat.m_col_size; ++i) { + for (int i = 0; i < B_mat.cols(); ++i) { cusparse_linear_solver_wrapper(solver, reorder, m_cusolver_sphandle, From 8d987ecc3734ed611db1f912ad9f3f7dccb9c25a Mon Sep 17 00:00:00 2001 From: ahmed Date: Sun, 7 Jul 2024 14:54:54 -0400 Subject: [PATCH 15/96] dense matrix -> attributes --- apps/MCF/mcf_sparse_matrix.cuh | 44 ++++++-------------------------- include/rxmesh/attribute.h | 46 +++++++++++++++++++++++++++++++++- 2 files changed, 52 insertions(+), 38 deletions(-) diff --git a/apps/MCF/mcf_sparse_matrix.cuh b/apps/MCF/mcf_sparse_matrix.cuh index 3b817742..5b6aeb63 100644 --- a/apps/MCF/mcf_sparse_matrix.cuh +++ b/apps/MCF/mcf_sparse_matrix.cuh @@ -146,7 +146,8 @@ void mcf_rxmesh_cusolver_chol(rxmesh::RXMeshStatic& rx, constexpr uint32_t blockThreads = 256; uint32_t num_vertices = rx.get_num_vertices(); - auto coords = rx.get_input_vertex_coordinates(); + + auto coords = rx.get_input_vertex_coordinates(); SparseMatrix A_mat(rx); DenseMatrix B_mat(num_vertices, 3); @@ -190,44 +191,13 @@ void mcf_rxmesh_cusolver_chol(rxmesh::RXMeshStatic& rx, // Solving the linear system using chol factorization and no reordering A_mat.spmat_linear_solve(B_mat, *X_mat, Solver::CHOL, Reorder::NONE); + // move the results to the host X_mat->move(rxmesh::DEVICE, rxmesh::HOST); - const T tol = 0.001; - T tmp_tol = tol; - bool passed = true; - rx.for_each_vertex(HOST, [&](const VertexHandle vh) { - uint32_t v_id = rx.map_to_global(vh); - uint32_t v_linear_id = rx.linear_id(vh); - - - for (uint32_t i = 0; i < 3; ++i) { - tmp_tol = - std::abs(((*X_mat)(v_linear_id, i) - ground_truth[v_id][i]) / - ground_truth[v_id][i]); - - if (tmp_tol > tol) { - RXMESH_WARN("val: {}, truth: {}, tol: {}\n", - (*X_mat)(v_linear_id, i), - ground_truth[v_id][i], - tmp_tol); - passed = false; - break; - } - } - }); - - - rx.for_each_vertex(HOST, [&](const VertexHandle vh) { - uint32_t v_linear_id = rx.linear_id(vh); - - for (uint32_t i = 0; i < 3; ++i) { - (*coords)(vh, i) = (*X_mat)(v_linear_id, i); - } - }); + // copy the results to attributes + coords->from_matrix(X_mat.get()); // rx.export_obj("mcf_rxmesh_chol.obj", *coords); - // rx.get_polyscope_mesh()->updateVertexPositions(*coords); - // polyscope::show(); - - EXPECT_TRUE(passed); + rx.get_polyscope_mesh()->updateVertexPositions(*coords); + polyscope::show(); } \ No newline at end of file diff --git a/include/rxmesh/attribute.h b/include/rxmesh/attribute.h index c980d997..2662a73a 100644 --- a/include/rxmesh/attribute.h +++ b/include/rxmesh/attribute.h @@ -187,7 +187,7 @@ class Attribute : public AttributeBase * rows represent the number of mesh elements of this attribute and number * of columns is the number of attributes */ - std::shared_ptr> to_matrix() + std::shared_ptr> to_matrix() const { std::shared_ptr> mat = std::make_shared>(rows(), cols()); @@ -227,6 +227,50 @@ class Attribute : public AttributeBase return mat; } + /** + * @brief copy a dense matrix to this attribute. The copying happens on the + * host side, i.e., we copy the content of mat which is on the host to this + * attribute on the host side + * @param mat + */ + void from_matrix(DenseMatrix* mat) + { + assert(mat->rows() == rows()); + assert(mat->cols() == cols()); + + + if constexpr (std::is_same_v) { + m_rxmesh->for_each_vertex(HOST, [&](const VertexHandle vh) { + uint32_t i = m_rxmesh->linear_id(vh); + + for (uint32_t j = 0; j < cols(); ++j) { + this->operator()(vh, j) = (*mat)(i, j); + } + }); + } + + if constexpr (std::is_same_v) { + m_rxmesh->for_each_edge(HOST, [&](const EdgeHandle eh) { + uint32_t i = m_rxmesh->linear_id(eh); + + for (uint32_t j = 0; j < cols(); ++j) { + this->operator()(eh, j) = (*mat)(i, j); + } + }); + } + + if constexpr (std::is_same_v) { + m_rxmesh->for_each_face(HOST, [&](const FaceHandle fh) { + uint32_t i = m_rxmesh->linear_id(fh); + + for (uint32_t j = 0; j < cols(); ++j) { + this->operator()(fh, j) = (*mat)(i, j); + } + }); + } + } + + /** * @brief get the number of elements in a patch. The element type * corresponds to the template HandleT From a9699df98510a05d6f6468a1551693eb1e9276ec Mon Sep 17 00:00:00 2001 From: ahmed Date: Sun, 7 Jul 2024 23:19:25 -0400 Subject: [PATCH 16/96] refactor sparse matrix --- apps/MCF/mcf_sparse_matrix.cuh | 7 +- include/rxmesh/matrix/sparse_matrix.cuh | 459 +++++++++++++---------- tests/RXMesh_test/test_sparse_matrix.cuh | 14 +- 3 files changed, 265 insertions(+), 215 deletions(-) diff --git a/apps/MCF/mcf_sparse_matrix.cuh b/apps/MCF/mcf_sparse_matrix.cuh index 5b6aeb63..0c064080 100644 --- a/apps/MCF/mcf_sparse_matrix.cuh +++ b/apps/MCF/mcf_sparse_matrix.cuh @@ -196,8 +196,11 @@ void mcf_rxmesh_cusolver_chol(rxmesh::RXMeshStatic& rx, // copy the results to attributes coords->from_matrix(X_mat.get()); - - // rx.export_obj("mcf_rxmesh_chol.obj", *coords); + rx.get_polyscope_mesh()->updateVertexPositions(*coords); polyscope::show(); + + B_mat.release(); + X_mat->release(); + A_mat.release(); } \ No newline at end of file diff --git a/include/rxmesh/matrix/sparse_matrix.cuh b/include/rxmesh/matrix/sparse_matrix.cuh index 5b5a118c..a454c832 100644 --- a/include/rxmesh/matrix/sparse_matrix.cuh +++ b/include/rxmesh/matrix/sparse_matrix.cuh @@ -133,10 +133,12 @@ void permute_gather(IndexT* d_p, T* d_in, T* d_out, IndexT size) /** - * @brief the sparse matrix implementation and all the functions and soolvers - * related. Right now, only VV is supported and we would add more compatibility - * for EE, FF, VE...... - * This is device/host compatiable + * @brief Sparse matrix that represent the VV connectivity, i.e., it is a square + * matrix with number of rows/cols is equal to number of vertices and there is + * non-zero values at entry (i,j) only if the vertex i is connected to vertex j. + * The sparse matrix is stored as a CSR matrix and it is allocated on both host + * and device. The class also provides implementation for matrix-vector + * multiplication and linear solver—(using cuSolver and cuSparse as a back-end. */ template struct SparseMatrix @@ -145,8 +147,11 @@ struct SparseMatrix : m_d_row_ptr(nullptr), m_d_col_idx(nullptr), m_d_val(nullptr), - m_row_size(0), - m_col_size(0), + m_h_row_ptr(nullptr), + m_h_col_idx(nullptr), + m_h_val(nullptr), + m_num_rows(0), + m_num_cols(0), m_nnz(0), m_context(rx.get_context()), m_cusparse_handle(NULL), @@ -154,6 +159,11 @@ struct SparseMatrix m_spdescr(NULL), m_spmm_buffer_size(0), m_spmv_buffer_size(0), + m_h_permute(nullptr), + m_d_permute(nullptr), + m_d_solver_row_ptr(nullptr), + m_d_solver_col_idx(nullptr), + m_d_solver_val(nullptr), m_use_reorder(false), m_reorder_allocated(false), m_allocated(LOCATION_NONE) @@ -165,8 +175,8 @@ struct SparseMatrix IndexT num_vertices = rx.get_num_vertices(); IndexT num_edges = rx.get_num_edges(); - m_row_size = num_vertices; - m_col_size = num_vertices; + m_num_rows = num_vertices; + m_num_cols = num_vertices; // row pointer allocation and init with prefix sum for CSR CUDA_ERROR(cudaMalloc((void**)&m_d_row_ptr, @@ -221,10 +231,12 @@ struct SparseMatrix launch_box.smem_bytes_dyn>>>( m_context, m_d_row_ptr, m_d_col_idx); - // val pointer allocation, actual value init should be in another - // function + // allocate value ptr CUDA_ERROR(cudaMalloc((void**)&m_d_val, m_nnz * sizeof(T))); + CUDA_ERROR(cudaMemset(m_d_val, 0, m_nnz * sizeof(T))); + m_allocated = m_allocated | DEVICE; + // create cusparse matrix CUSPARSE_ERROR(cusparseCreateMatDescr(&m_descr)); CUSPARSE_ERROR( cusparseSetMatType(m_descr, CUSPARSE_MATRIX_TYPE_GENERAL)); @@ -232,8 +244,8 @@ struct SparseMatrix cusparseSetMatIndexBase(m_descr, CUSPARSE_INDEX_BASE_ZERO)); CUSPARSE_ERROR(cusparseCreateCsr(&m_spdescr, - m_row_size, - m_col_size, + m_num_rows, + m_num_cols, m_nnz, m_d_row_ptr, m_d_col_idx, @@ -246,55 +258,92 @@ struct SparseMatrix CUSPARSE_ERROR(cusparseCreate(&m_cusparse_handle)); CUSOLVER_ERROR(cusolverSpCreate(&m_cusolver_sphandle)); - m_allocated = m_allocated | DEVICE; + + // allocate the host + m_h_val = static_cast(malloc(m_nnz * sizeof(T))); + m_h_row_ptr = + static_cast(malloc((m_num_rows + 1) * sizeof(IndexT))); + m_h_col_idx = static_cast(malloc(m_nnz * sizeof(IndexT))); + + CUDA_ERROR(cudaMemcpy( + m_h_val, m_d_val, m_nnz * sizeof(T), cudaMemcpyDeviceToHost)); + CUDA_ERROR(cudaMemcpy(m_h_col_idx, + m_d_col_idx, + m_nnz * sizeof(IndexT), + cudaMemcpyDeviceToHost)); + CUDA_ERROR(cudaMemcpy(m_h_row_ptr, + m_d_row_ptr, + (m_num_rows + 1) * sizeof(IndexT), + cudaMemcpyDeviceToHost)); + + m_allocated = m_allocated | HOST; } - void set_ones() + /** + * @brief set all entries in the matrix to ones on both host and device + */ + __host__ void set_ones() { - std::vector init_tmp_arr(m_nnz, 1); - CUDA_ERROR(cudaMemcpy(m_d_val, - init_tmp_arr.data(), - m_nnz * sizeof(T), - cudaMemcpyHostToDevice)); + std::fill_n(m_h_val, m_nnz, 1); + CUDA_ERROR(cudaMemcpy( + m_d_val, m_h_val, m_nnz * sizeof(T), cudaMemcpyHostToDevice)); } - __device__ IndexT get_val_idx(const VertexHandle& row_v, - const VertexHandle& col_v) + /** + * @brief set all entries in the matrix to zeros on both host and device + */ + __host__ void set_zeros() { - auto r_ids = row_v.unpack(); - uint32_t r_patch_id = r_ids.first; - uint16_t r_local_id = r_ids.second; + std::memset(m_h_val, 0, m_nnz * sizeof(T)); - auto c_ids = col_v.unpack(); - uint32_t c_patch_id = c_ids.first; - uint16_t c_local_id = c_ids.second; + CUDA_ERROR(cudaMemset(m_d_val, 0, m_nnz * sizeof(T))); + } - uint32_t col_index = m_context.m_vertex_prefix[c_patch_id] + c_local_id; - uint32_t row_index = m_context.m_vertex_prefix[r_patch_id] + r_local_id; + /** + * @brief return number of rows + */ + __device__ __host__ IndexT rows() const + { + return m_num_rows; + } - const IndexT start = m_d_row_ptr[row_index]; - const IndexT end = m_d_row_ptr[row_index + 1]; + /** + * @brief return number of cols + */ + __device__ __host__ IndexT cols() const + { + return m_num_cols; + } - for (IndexT i = start; i < end; ++i) { - if (m_d_col_idx[i] == col_index) { - return i; - } - } - assert(1 != 1); + /** + * @brief return number of non-zero values + */ + __device__ __host__ IndexT non_zeros() const + { + return m_nnz; } + /** + * @brief access the matrix using VertexHandle + */ __device__ T& operator()(const VertexHandle& row_v, const VertexHandle& col_v) { return m_d_val[get_val_idx(row_v, col_v)]; } + /** + * @brief access the matrix using VertexHandle + */ __device__ T& operator()(const VertexHandle& row_v, const VertexHandle& col_v) const { return m_d_val[get_val_idx(row_v, col_v)]; } + /** + * @brief access the matrix using row and col index + */ __device__ T& operator()(const IndexT x, const IndexT y) { const IndexT start = m_d_row_ptr[x]; @@ -308,6 +357,9 @@ struct SparseMatrix assert(1 != 1); } + /** + * @brief access the matrix using row and col index + */ __device__ T& operator()(const IndexT x, const IndexT y) const { const IndexT start = m_d_row_ptr[x]; @@ -321,10 +373,6 @@ struct SparseMatrix assert(1 != 1); } - __host__ __device__ IndexT& get_nnz() const - { - return m_nnz; - } __device__ IndexT& get_row_ptr_at(IndexT idx) const { @@ -341,11 +389,12 @@ struct SparseMatrix return m_d_val[idx]; } - void free_mat() + /** + * @brief release all allocated memory + */ + void release() { - CUDA_ERROR(cudaFree(m_d_row_ptr)); - CUDA_ERROR(cudaFree(m_d_col_idx)); - CUDA_ERROR(cudaFree(m_d_val)); + release(LOCATION_ALL); CUSPARSE_ERROR(cusparseDestroy(m_cusparse_handle)); CUSPARSE_ERROR(cusparseDestroyMatDescr(m_descr)); CUSOLVER_ERROR(cusolverSpDestroy(m_cusolver_sphandle)); @@ -359,47 +408,61 @@ struct SparseMatrix } } - /* ----- CUSPARSE SPMM & SPMV ----- */ - /** - * @brief wrap up the cusparse api for sparse matrix dense matrix - * multiplication buffer size calculation. + * @brief move the data between host an device */ - void denmat_mul_buffer_size(rxmesh::DenseMatrix B_mat, - rxmesh::DenseMatrix C_mat, - cudaStream_t stream = 0) + void move(locationT source, locationT target, cudaStream_t stream = NULL) { - float alpha = 1.0f; - float beta = 0.0f; + if (source == target) { + RXMESH_WARN( + "SparseMatrix::move() source ({}) and target ({}) " + "are the same.", + location_to_string(source), + location_to_string(target)); + return; + } - cusparseSpMatDescr_t matA = m_spdescr; - cusparseDnMatDescr_t matB = B_mat.m_dendescr; - cusparseDnMatDescr_t matC = C_mat.m_dendescr; - void* dBuffer = NULL; + if ((source == HOST || source == DEVICE) && + ((source & m_allocated) != source)) { + RXMESH_ERROR( + "SparseMatrix::move() moving source is not valid" + " because it was not allocated on source i.e., {}", + location_to_string(source)); + return; + } - CUSPARSE_ERROR(cusparseSetStream(m_cusparse_handle, stream)); + if (((target & HOST) == HOST || (target & DEVICE) == DEVICE) && + ((target & m_allocated) != target)) { + RXMESH_ERROR("SparseMatrix::move() target {} is not allocated!", + location_to_string(target)); + return; + } - // allocate an external buffer if needed - CUSPARSE_ERROR(cusparseSpMM_bufferSize(m_cusparse_handle, - CUSPARSE_OPERATION_NON_TRANSPOSE, - CUSPARSE_OPERATION_NON_TRANSPOSE, - &alpha, - matA, - matB, - &beta, - matC, - CUDA_R_32F, - CUSPARSE_SPMM_ALG_DEFAULT, - &m_spmm_buffer_size)); + if (source == HOST && target == DEVICE) { + CUDA_ERROR(cudaMemcpyAsync(m_d_val, + m_h_val, + m_nnz * sizeof(T), + cudaMemcpyHostToDevice, + stream)); + } else if (source == DEVICE && target == HOST) { + CUDA_ERROR(cudaMemcpyAsync(m_h_val, + m_d_val, + m_nnz * sizeof(T), + cudaMemcpyDeviceToHost, + stream)); + } } /** - * @brief wrap up the cusparse api for sparse matrix dense matrix - * multiplication. + * @brief multiply the sparse matrix by a dense matrix. The function + * performs the multiplication as + * C = A*B + * where A is the sparse matrix, B is a dense matrix, and the result is a + * dense matrix C */ - void denmat_mul(rxmesh::DenseMatrix B_mat, - rxmesh::DenseMatrix C_mat, - cudaStream_t stream = 0) + __host__ void multiply_by_dense_matrix(rxmesh::DenseMatrix B_mat, + rxmesh::DenseMatrix C_mat, + cudaStream_t stream = 0) { float alpha = 1.0f; float beta = 0.0f; @@ -438,31 +501,6 @@ struct SparseMatrix CUDA_ERROR(cudaFree(dBuffer)); } - void arr_mul_buffer_size(T* in_arr, T* rt_arr, cudaStream_t stream = 0) - { - const float alpha = 1.0f; - const float beta = 0.0f; - - cusparseDnVecDescr_t vecx = NULL; - cusparseDnVecDescr_t vecy = NULL; - - CUSPARSE_ERROR( - cusparseCreateDnVec(&vecx, m_col_size, in_arr, CUDA_R_32F)); - CUSPARSE_ERROR( - cusparseCreateDnVec(&vecy, m_row_size, rt_arr, CUDA_R_32F)); - - CUSPARSE_ERROR(cusparseSpMV_bufferSize(m_cusparse_handle, - CUSPARSE_OPERATION_NON_TRANSPOSE, - &alpha, - m_spdescr, - vecx, - &beta, - vecy, - CUDA_R_32F, - CUSPARSE_SPMV_ALG_DEFAULT, - &m_spmv_buffer_size)); - } - /** * @brief wrap up the cusparse api for sparse matrix array * multiplication. @@ -477,9 +515,9 @@ struct SparseMatrix cusparseDnVecDescr_t vecy = NULL; CUSPARSE_ERROR( - cusparseCreateDnVec(&vecx, m_col_size, in_arr, CUDA_R_32F)); + cusparseCreateDnVec(&vecx, m_num_cols, in_arr, CUDA_R_32F)); CUSPARSE_ERROR( - cusparseCreateDnVec(&vecy, m_row_size, rt_arr, CUDA_R_32F)); + cusparseCreateDnVec(&vecy, m_num_rows, rt_arr, CUDA_R_32F)); CUSPARSE_ERROR(cusparseSetStream(m_cusparse_handle, stream)); @@ -516,7 +554,7 @@ struct SparseMatrix void spmat_denmat_mul_cw(rxmesh::DenseMatrix B_mat, rxmesh::DenseMatrix C_mat) { - for (int i = 0; i < B_mat.m_col_size; ++i) { + for (int i = 0; i < B_mat.m_num_cols; ++i) { arr_mul(B_mat.col_data(i), C_mat.col_data(i)); } } @@ -537,8 +575,8 @@ struct SparseMatrix reorder, m_cusolver_sphandle, m_descr, - m_row_size, - m_col_size, + m_num_rows, + m_num_cols, m_nnz, m_d_row_ptr, m_d_col_idx, @@ -561,8 +599,8 @@ struct SparseMatrix reorder, m_cusolver_sphandle, m_descr, - m_row_size, - m_col_size, + m_num_rows, + m_num_cols, m_nnz, m_d_row_ptr, m_d_col_idx, @@ -681,8 +719,8 @@ struct SparseMatrix /* --- LOW LEVEL API --- */ /** - * @brief The lower level api of reordering. Sprcify the reordering type or - * simply NONE for no reordering. This should be called at the begining of + * @brief The lower level api of reordering. Specify the reordering type or + * simply NONE for no reordering. This should be called at the beginning of * the solving process. Any other function call order would be undefined. * @param reorder: the reorder method applied. */ @@ -718,19 +756,19 @@ struct SparseMatrix m_reorder_allocated = true; CUDA_ERROR(cudaMalloc((void**)&m_d_solver_val, m_nnz * sizeof(T))); CUDA_ERROR(cudaMalloc((void**)&m_d_solver_row_ptr, - (m_row_size + 1) * sizeof(IndexT))); + (m_num_rows + 1) * sizeof(IndexT))); CUDA_ERROR( cudaMalloc((void**)&m_d_solver_col_idx, m_nnz * sizeof(IndexT))); - m_h_permute = (IndexT*)malloc(m_row_size * sizeof(IndexT)); + m_h_permute = (IndexT*)malloc(m_num_rows * sizeof(IndexT)); CUDA_ERROR( - cudaMalloc((void**)&m_d_permute, m_row_size * sizeof(IndexT))); + cudaMalloc((void**)&m_d_permute, m_num_rows * sizeof(IndexT))); CUSOLVER_ERROR(cusolverSpCreate(&m_cusolver_sphandle)); if (reorder == Reorder::SYMRCM) { CUSOLVER_ERROR(cusolverSpXcsrsymrcmHost(m_cusolver_sphandle, - m_row_size, + m_num_rows, m_nnz, m_descr, m_h_row_ptr, @@ -738,7 +776,7 @@ struct SparseMatrix m_h_permute)); } else if (reorder == Reorder::SYMAMD) { CUSOLVER_ERROR(cusolverSpXcsrsymamdHost(m_cusolver_sphandle, - m_row_size, + m_num_rows, m_nnz, m_descr, m_h_row_ptr, @@ -746,7 +784,7 @@ struct SparseMatrix m_h_permute)); } else if (reorder == Reorder::NSTDIS) { CUSOLVER_ERROR(cusolverSpXcsrmetisndHost(m_cusolver_sphandle, - m_row_size, + m_num_rows, m_nnz, m_descr, m_h_row_ptr, @@ -757,7 +795,7 @@ struct SparseMatrix CUDA_ERROR(cudaMemcpyAsync(m_d_permute, m_h_permute, - m_row_size * sizeof(IndexT), + m_num_rows * sizeof(IndexT), cudaMemcpyHostToDevice)); // working space for permutation: B = A*Q*A^T @@ -772,8 +810,8 @@ struct SparseMatrix void* perm_buffer_cpu = NULL; CUSOLVER_ERROR(cusolverSpXcsrperm_bufferSizeHost(m_cusolver_sphandle, - m_row_size, - m_col_size, + m_num_rows, + m_num_cols, m_nnz, m_descr, m_h_row_ptr, @@ -789,8 +827,8 @@ struct SparseMatrix } CUSOLVER_ERROR(cusolverSpXcsrpermHost(m_cusolver_sphandle, - m_row_size, - m_col_size, + m_num_rows, + m_num_cols, m_nnz, m_descr, m_h_row_ptr, @@ -818,7 +856,7 @@ struct SparseMatrix cudaMemcpyHostToDevice)); CUDA_ERROR(cudaMemcpyAsync(m_d_solver_row_ptr, m_h_row_ptr, - (m_row_size + 1) * sizeof(IndexT), + (m_num_rows + 1) * sizeof(IndexT), cudaMemcpyHostToDevice)); CUDA_ERROR(cudaMemcpyAsync(m_d_solver_col_idx, m_h_col_idx, @@ -860,7 +898,7 @@ struct SparseMatrix m_internalDataInBytes = 0; m_workspaceInBytes = 0; CUSOLVER_ERROR(cusolverSpXcsrcholAnalysis(m_cusolver_sphandle, - m_row_size, + m_num_rows, m_nnz, m_descr, m_d_solver_row_ptr, @@ -876,7 +914,7 @@ struct SparseMatrix { if constexpr (std::is_same_v) { CUSOLVER_ERROR(cusolverSpScsrcholBufferInfo(m_cusolver_sphandle, - m_row_size, + m_num_rows, m_nnz, m_descr, m_d_solver_val, @@ -889,7 +927,7 @@ struct SparseMatrix if constexpr (std::is_same_v) { CUSOLVER_ERROR(cusolverSpDcsrcholBufferInfo(m_cusolver_sphandle, - m_row_size, + m_num_rows, m_nnz, m_descr, m_d_solver_val, @@ -919,7 +957,7 @@ struct SparseMatrix { if constexpr (std::is_same_v) { CUSOLVER_ERROR(cusolverSpScsrcholFactor(m_cusolver_sphandle, - m_row_size, + m_num_rows, m_nnz, m_descr, m_d_solver_val, @@ -930,7 +968,7 @@ struct SparseMatrix } if constexpr (std::is_same_v) { CUSOLVER_ERROR(cusolverSpDcsrcholFactor(m_cusolver_sphandle, - m_row_size, + m_num_rows, m_nnz, m_descr, m_d_solver_val, @@ -975,11 +1013,11 @@ struct SparseMatrix if (m_use_reorder) { /* purmute b and x*/ - CUDA_ERROR(cudaMalloc((void**)&d_solver_b, m_row_size * sizeof(T))); - detail::permute_gather(m_d_permute, d_b, d_solver_b, m_row_size); + CUDA_ERROR(cudaMalloc((void**)&d_solver_b, m_num_rows * sizeof(T))); + detail::permute_gather(m_d_permute, d_b, d_solver_b, m_num_rows); - CUDA_ERROR(cudaMalloc((void**)&d_solver_x, m_col_size * sizeof(T))); - detail::permute_gather(m_d_permute, d_x, d_solver_x, m_row_size); + CUDA_ERROR(cudaMalloc((void**)&d_solver_x, m_num_cols * sizeof(T))); + detail::permute_gather(m_d_permute, d_x, d_solver_x, m_num_rows); } else { d_solver_b = d_b; d_solver_x = d_x; @@ -987,7 +1025,7 @@ struct SparseMatrix if constexpr (std::is_same_v) { CUSOLVER_ERROR(cusolverSpScsrcholSolve(m_cusolver_sphandle, - m_row_size, + m_num_rows, d_solver_b, d_solver_x, m_chol_info, @@ -996,7 +1034,7 @@ struct SparseMatrix if constexpr (std::is_same_v) { CUSOLVER_ERROR(cusolverSpDcsrcholSolve(m_cusolver_sphandle, - m_row_size, + m_num_rows, d_solver_b, d_solver_x, m_chol_info, @@ -1004,82 +1042,96 @@ struct SparseMatrix } if (m_use_reorder) { - detail::permute_scatter(m_d_permute, d_solver_x, d_x, m_row_size); + detail::permute_scatter(m_d_permute, d_solver_x, d_x, m_num_rows); GPU_FREE(d_solver_b); GPU_FREE(d_solver_x); } } - /* Host data compatibility */ - /** - * @brief move the data between host an device - */ - void move(locationT source, locationT target, cudaStream_t stream = NULL) + + private: + __device__ const IndexT get_val_idx(const VertexHandle& row_v, + const VertexHandle& col_v) const { - if (source == target) { - RXMESH_WARN( - "SparseMatrix::move() source ({}) and target ({}) " - "are the same.", - location_to_string(source), - location_to_string(target)); - return; - } + auto r_ids = row_v.unpack(); + uint32_t r_patch_id = r_ids.first; + uint16_t r_local_id = r_ids.second; - if ((source == HOST || source == DEVICE) && - ((source & m_allocated) != source)) { - RXMESH_ERROR( - "SparseMatrix::move() moving source is not valid" - " because it was not allocated on source i.e., {}", - location_to_string(source)); - } + auto c_ids = col_v.unpack(); + uint32_t c_patch_id = c_ids.first; + uint16_t c_local_id = c_ids.second; - if (((target & HOST) == HOST || (target & DEVICE) == DEVICE) && - ((target & m_allocated) != target)) { - RXMESH_WARN( - "SparseMatrix::move() allocating target before moving to {}", - location_to_string(target)); - allocate(target); - } + uint32_t col_index = m_context.m_vertex_prefix[c_patch_id] + c_local_id; + uint32_t row_index = m_context.m_vertex_prefix[r_patch_id] + r_local_id; - if (source == HOST && target == DEVICE) { - CUDA_ERROR(cudaMemcpyAsync(m_d_val, - m_h_val, - m_nnz * sizeof(T), - cudaMemcpyHostToDevice, - stream)); - CUDA_ERROR(cudaMemcpyAsync(m_d_row_ptr, - m_h_row_ptr, - (m_row_size + 1) * sizeof(IndexT), - cudaMemcpyHostToDevice, - stream)); - CUDA_ERROR(cudaMemcpyAsync(m_d_col_idx, - m_h_col_idx, - m_nnz * sizeof(IndexT), - cudaMemcpyHostToDevice, - stream)); - } else if (source == DEVICE && target == HOST) { - CUDA_ERROR(cudaMemcpyAsync(m_h_val, - m_d_val, - m_nnz * sizeof(T), - cudaMemcpyDeviceToHost, - stream)); - CUDA_ERROR(cudaMemcpyAsync(m_h_row_ptr, - m_d_row_ptr, - (m_row_size + 1) * sizeof(IndexT), - cudaMemcpyDeviceToHost, - stream)); - CUDA_ERROR(cudaMemcpyAsync(m_h_col_idx, - m_d_col_idx, - m_nnz * sizeof(IndexT), - cudaMemcpyDeviceToHost, - stream)); + const IndexT start = m_d_row_ptr[row_index]; + const IndexT end = m_d_row_ptr[row_index + 1]; + + for (IndexT i = start; i < end; ++i) { + if (m_d_col_idx[i] == col_index) { + return i; + } } + assert(1 != 1); } - /** - * @brief release the data on host or device - */ - void release(locationT location = LOCATION_ALL) + + void denmat_mul_buffer_size(rxmesh::DenseMatrix B_mat, + rxmesh::DenseMatrix C_mat, + cudaStream_t stream = 0) + { + float alpha = 1.0f; + float beta = 0.0f; + + cusparseSpMatDescr_t matA = m_spdescr; + cusparseDnMatDescr_t matB = B_mat.m_dendescr; + cusparseDnMatDescr_t matC = C_mat.m_dendescr; + void* dBuffer = NULL; + + CUSPARSE_ERROR(cusparseSetStream(m_cusparse_handle, stream)); + + // allocate an external buffer if needed + CUSPARSE_ERROR(cusparseSpMM_bufferSize(m_cusparse_handle, + CUSPARSE_OPERATION_NON_TRANSPOSE, + CUSPARSE_OPERATION_NON_TRANSPOSE, + &alpha, + matA, + matB, + &beta, + matC, + CUDA_R_32F, + CUSPARSE_SPMM_ALG_DEFAULT, + &m_spmm_buffer_size)); + } + + + void arr_mul_buffer_size(T* in_arr, T* rt_arr, cudaStream_t stream = 0) + { + const float alpha = 1.0f; + const float beta = 0.0f; + + cusparseDnVecDescr_t vecx = NULL; + cusparseDnVecDescr_t vecy = NULL; + + CUSPARSE_ERROR( + cusparseCreateDnVec(&vecx, m_num_cols, in_arr, CUDA_R_32F)); + CUSPARSE_ERROR( + cusparseCreateDnVec(&vecy, m_num_rows, rt_arr, CUDA_R_32F)); + + CUSPARSE_ERROR(cusparseSpMV_bufferSize(m_cusparse_handle, + CUSPARSE_OPERATION_NON_TRANSPOSE, + &alpha, + m_spdescr, + vecx, + &beta, + vecy, + CUDA_R_32F, + CUSPARSE_SPMV_ALG_DEFAULT, + &m_spmv_buffer_size)); + } + + + void release(locationT location) { if (((location & HOST) == HOST) && ((m_allocated & HOST) == HOST)) { free(m_h_val); @@ -1100,9 +1152,6 @@ struct SparseMatrix } } - /** - * @brief allocate the data on host or device - */ void allocate(locationT location) { if ((location & HOST) == HOST) { @@ -1110,7 +1159,7 @@ struct SparseMatrix m_h_val = static_cast(malloc(m_nnz * sizeof(T))); m_h_row_ptr = - static_cast(malloc((m_row_size + 1) * sizeof(IndexT))); + static_cast(malloc((m_num_rows + 1) * sizeof(IndexT))); m_h_col_idx = static_cast(malloc(m_nnz * sizeof(IndexT))); m_allocated = m_allocated | HOST; @@ -1121,7 +1170,7 @@ struct SparseMatrix CUDA_ERROR(cudaMalloc((void**)&m_d_val, m_nnz * sizeof(T))); CUDA_ERROR(cudaMalloc((void**)&m_d_row_ptr, - (m_row_size + 1) * sizeof(IndexT))); + (m_num_rows + 1) * sizeof(IndexT))); CUDA_ERROR( cudaMalloc((void**)&m_d_col_idx, m_nnz * sizeof(IndexT))); @@ -1129,16 +1178,14 @@ struct SparseMatrix } } - - private: const Context m_context; cusparseHandle_t m_cusparse_handle; cusolverSpHandle_t m_cusolver_sphandle; cusparseSpMatDescr_t m_spdescr; cusparseMatDescr_t m_descr; - IndexT m_row_size; - IndexT m_col_size; + IndexT m_num_rows; + IndexT m_num_cols; IndexT m_nnz; // device csr data diff --git a/tests/RXMesh_test/test_sparse_matrix.cuh b/tests/RXMesh_test/test_sparse_matrix.cuh index 7125e396..bbc5b0fb 100644 --- a/tests/RXMesh_test/test_sparse_matrix.cuh +++ b/tests/RXMesh_test/test_sparse_matrix.cuh @@ -158,7 +158,7 @@ TEST(RXMeshStatic, SparseMatrix) CUDA_ERROR(cudaMalloc((void**)&d_result, (num_vertices) * sizeof(int))); SparseMatrix spmat(rx); - spmat.set_ones(); + spmat.set_identity(); spmat_multi_hardwired_kernel<<>>( d_arr_ones, spmat, d_result, num_vertices); @@ -193,7 +193,7 @@ TEST(RXMeshStatic, SparseMatrix) CUDA_ERROR(cudaFree(d_arr_ones)); CUDA_ERROR(cudaFree(d_result)); CUDA_ERROR(cudaFree(vet_degree)); - spmat.free_mat(); + spmat.release(); } /* First replace the sparse matrix entry with the edge length and then do spmv @@ -264,7 +264,7 @@ TEST(RXMeshStatic, SparseMatrixEdgeLen) CUDA_ERROR(cudaFree(d_arr_ref)); CUDA_ERROR(cudaFree(d_arr_ones)); CUDA_ERROR(cudaFree(d_result)); - spmat.free_mat(); + spmat.release(); } /* set up a simple AX=B system where A is a sparse matrix, B and C are dense @@ -310,7 +310,7 @@ TEST(RXMeshStatic, SparseMatrixSimpleSolve) GPUTimer timer; timer.start(); - A_mat.denmat_mul(X_mat, ret_mat); + A_mat.multiply_by_dense_matrix(X_mat, ret_mat); timer.stop(); RXMESH_TRACE("SPMM_rxmesh() took {} (ms) ", timer.elapsed_millis()); @@ -331,7 +331,7 @@ TEST(RXMeshStatic, SparseMatrixSimpleSolve) EXPECT_NEAR(h_ret_mat[i][j], h_B_mat[i][j], 1e-3); } } - A_mat.free_mat(); + A_mat.release(); } TEST(RXMeshStatic, SparseMatrixLowerLevelAPISolve) @@ -380,7 +380,7 @@ TEST(RXMeshStatic, SparseMatrixLowerLevelAPISolve) A_mat.spmat_chol_buffer_free(); - A_mat.denmat_mul(X_mat, ret_mat); + A_mat.multiply_by_dense_matrix(X_mat, ret_mat); std::vector> h_ret_mat(num_vertices); CUDA_ERROR(cudaMemcpy(h_ret_mat.data(), @@ -398,5 +398,5 @@ TEST(RXMeshStatic, SparseMatrixLowerLevelAPISolve) EXPECT_NEAR(h_ret_mat[i][j], h_B_mat[i][j], 1e-3); } } - A_mat.free_mat(); + A_mat.release(); } \ No newline at end of file From f1c68b786b182ae410e2067d94549acf34cbf732 Mon Sep 17 00:00:00 2001 From: ahmed Date: Mon, 8 Jul 2024 12:24:14 -0400 Subject: [PATCH 17/96] simplify accessing dense and sparse matrix --- apps/MCF/mcf_sparse_matrix.cuh | 26 +++---- include/rxmesh/attribute.h | 2 +- include/rxmesh/context.h | 66 ++++++++++++---- include/rxmesh/matrix/dense_matrix.cuh | 74 +++++++++++++++++- include/rxmesh/matrix/sparse_matrix.cuh | 95 +++++++++++++++--------- include/rxmesh/rxmesh.cpp | 3 + tests/RXMesh_test/test_sparse_matrix.cuh | 43 +++++------ 7 files changed, 213 insertions(+), 96 deletions(-) diff --git a/apps/MCF/mcf_sparse_matrix.cuh b/apps/MCF/mcf_sparse_matrix.cuh index 0c064080..a1f7fc25 100644 --- a/apps/MCF/mcf_sparse_matrix.cuh +++ b/apps/MCF/mcf_sparse_matrix.cuh @@ -13,17 +13,11 @@ __global__ static void mcf_B_setup(const rxmesh::Context context, using namespace rxmesh; auto init_lambda = [&](VertexHandle& p_id, const VertexIterator& iter) { - auto r_ids = p_id.unpack(); - uint32_t r_patch_id = r_ids.first; - uint16_t r_local_id = r_ids.second; - - uint32_t row_index = context.m_vertex_prefix[r_patch_id] + r_local_id; - if (use_uniform_laplace) { - const T valence = static_cast(iter.size()); - B_mat(row_index, 0) = coords(p_id, 0) * valence; - B_mat(row_index, 1) = coords(p_id, 1) * valence; - B_mat(row_index, 2) = coords(p_id, 2) * valence; + const T valence = static_cast(iter.size()); + B_mat(p_id, 0) = coords(p_id, 0) * valence; + B_mat(p_id, 1) = coords(p_id, 1) * valence; + B_mat(p_id, 2) = coords(p_id, 2) * valence; } else { T v_weight = 0; @@ -42,9 +36,9 @@ __global__ static void mcf_B_setup(const rxmesh::Context context, } v_weight = 0.5 / v_weight; - B_mat(row_index, 0) = coords(p_id, 0) / v_weight; - B_mat(row_index, 1) = coords(p_id, 1) / v_weight; - B_mat(row_index, 2) = coords(p_id, 2) / v_weight; + B_mat(p_id, 0) = coords(p_id, 0) / v_weight; + B_mat(p_id, 1) = coords(p_id, 1) / v_weight; + B_mat(p_id, 2) = coords(p_id, 2) / v_weight; } }; @@ -82,8 +76,6 @@ __global__ static void mcf_A_setup( uint32_t r_patch_id = r_ids.first; uint16_t r_local_id = r_ids.second; - uint32_t row_index = context.m_vertex_prefix[r_patch_id] + r_local_id; - // set up matrix A for (uint32_t v = 0; v < iter.size(); ++v) { VertexHandle r_id = iter[v]; @@ -150,7 +142,7 @@ void mcf_rxmesh_cusolver_chol(rxmesh::RXMeshStatic& rx, auto coords = rx.get_input_vertex_coordinates(); SparseMatrix A_mat(rx); - DenseMatrix B_mat(num_vertices, 3); + DenseMatrix B_mat(rx, num_vertices, 3); std::shared_ptr> X_mat = coords->to_matrix(); @@ -196,7 +188,7 @@ void mcf_rxmesh_cusolver_chol(rxmesh::RXMeshStatic& rx, // copy the results to attributes coords->from_matrix(X_mat.get()); - + rx.get_polyscope_mesh()->updateVertexPositions(*coords); polyscope::show(); diff --git a/include/rxmesh/attribute.h b/include/rxmesh/attribute.h index 2662a73a..1d5b89b4 100644 --- a/include/rxmesh/attribute.h +++ b/include/rxmesh/attribute.h @@ -190,7 +190,7 @@ class Attribute : public AttributeBase std::shared_ptr> to_matrix() const { std::shared_ptr> mat = - std::make_shared>(rows(), cols()); + std::make_shared>(*m_rxmesh, rows(), cols()); if constexpr (std::is_same_v) { m_rxmesh->for_each_vertex(HOST, [&](const VertexHandle vh) { diff --git a/include/rxmesh/context.h b/include/rxmesh/context.h index 3c6a50ac..ebd76a38 100644 --- a/include/rxmesh/context.h +++ b/include/rxmesh/context.h @@ -26,9 +26,12 @@ class Context m_num_faces(nullptr), m_num_vertices(nullptr), m_num_patches(nullptr), - m_vertex_prefix(nullptr), - m_edge_prefix(nullptr), - m_face_prefix(nullptr), + m_d_vertex_prefix(nullptr), + m_d_edge_prefix(nullptr), + m_d_face_prefix(nullptr), + m_h_vertex_prefix(nullptr), + m_h_edge_prefix(nullptr), + m_h_face_prefix(nullptr), m_capacity_factor(0.0f), m_patches_info(nullptr), m_max_lp_capacity_v(0), @@ -37,10 +40,10 @@ class Context { } - __device__ Context(const Context&) = default; - __device__ Context(Context&&) = default; + __device__ Context(const Context&) = default; + __device__ Context(Context&&) = default; __device__ Context& operator=(const Context&) = default; - __device__ Context& operator=(Context&&) = default; + __device__ Context& operator=(Context&&) = default; /** * @brief Total number of edges in mesh @@ -80,6 +83,33 @@ class Context edge = edge_dir >> 1; } + __device__ __host__ __inline__ const uint32_t* vertex_prefix() const + { +#ifdef __CUDA_ARCH__ + return m_d_vertex_prefix; +#else + return m_h_vertex_prefix; +#endif + } + + __device__ __host__ __inline__ const uint32_t* edge_prefix() const + { +#ifdef __CUDA_ARCH__ + return m_d_edge_prefix; +#else + return m_h_edge_prefix; +#endif + } + + __device__ __host__ __inline__ const uint32_t* face_prefix() const + { +#ifdef __CUDA_ARCH__ + return m_d_face_prefix; +#else + return m_h_face_prefix; +#endif + } + /** * @brief get the owner handle of a given mesh element handle * @param handle the mesh element handle @@ -161,9 +191,12 @@ class Context const uint32_t num_patches, const uint32_t max_num_patches, const float capacity_factor, - uint32_t* vertex_prefix, - uint32_t* edge_prefix, - uint32_t* face_prefix, + uint32_t* d_vertex_prefix, + uint32_t* d_edge_prefix, + uint32_t* d_face_prefix, + uint32_t* h_vertex_prefix, + uint32_t* h_edge_prefix, + uint32_t* h_face_prefix, uint16_t max_lp_capacity_v, uint16_t max_lp_capacity_e, uint16_t max_lp_capacity_f, @@ -208,9 +241,13 @@ class Context sizeof(uint32_t), cudaMemcpyHostToDevice)); - m_vertex_prefix = vertex_prefix; - m_edge_prefix = edge_prefix; - m_face_prefix = face_prefix; + m_h_vertex_prefix = h_vertex_prefix; + m_h_edge_prefix = h_edge_prefix; + m_h_face_prefix = h_face_prefix; + + m_d_vertex_prefix = d_vertex_prefix; + m_d_edge_prefix = d_edge_prefix; + m_d_face_prefix = d_face_prefix; m_max_lp_capacity_v = max_lp_capacity_v; m_max_lp_capacity_e = max_lp_capacity_e; @@ -229,8 +266,9 @@ class Context uint32_t *m_num_edges, *m_num_faces, *m_num_vertices, *m_num_patches; // per-patch max v/e/f - uint32_t * m_max_num_vertices, *m_max_num_edges, *m_max_num_faces; - uint32_t * m_vertex_prefix, *m_edge_prefix, *m_face_prefix; + uint32_t *m_max_num_vertices, *m_max_num_edges, *m_max_num_faces; + uint32_t *m_d_vertex_prefix, *m_d_edge_prefix, *m_d_face_prefix, + *m_h_vertex_prefix, *m_h_edge_prefix, *m_h_face_prefix; uint16_t m_max_lp_capacity_v, m_max_lp_capacity_e, m_max_lp_capacity_f; PatchInfo* m_patches_info; float m_capacity_factor; diff --git a/include/rxmesh/matrix/dense_matrix.cuh b/include/rxmesh/matrix/dense_matrix.cuh index 4761036f..fd1fb448 100644 --- a/include/rxmesh/matrix/dense_matrix.cuh +++ b/include/rxmesh/matrix/dense_matrix.cuh @@ -3,6 +3,7 @@ #include "cusparse.h" #include "rxmesh/attribute.h" #include "rxmesh/context.h" +#include "rxmesh/rxmesh.h" #include "rxmesh/types.h" namespace rxmesh { @@ -16,10 +17,27 @@ namespace rxmesh { template struct DenseMatrix { - DenseMatrix(IndexT num_rows, - IndexT num_cols, - locationT location = LOCATION_ALL) - : m_num_rows(num_rows), + template + friend class SparseMatrix; + + DenseMatrix() + : m_allocated(LOCATION_NONE), + m_num_rows(0), + m_num_cols(0), + m_d_val(nullptr), + m_h_val(nullptr), + m_col_pad_bytes(0), + m_col_pad_idx(0) + { + } + + + DenseMatrix(const RXMesh& rx, + IndexT num_rows, + IndexT num_cols, + locationT location = LOCATION_ALL) + : m_context(rx.get_context()), + m_num_rows(num_rows), m_num_cols(num_cols), m_dendescr(NULL), m_h_val(nullptr), @@ -102,6 +120,53 @@ struct DenseMatrix #endif } + + /** + * @brief access the matrix using vertex/edge/face handle as a row index. + */ + template + __host__ __device__ T& operator()(const HandleT handle, const uint32_t col) + { + return this->operator()(get_row_id_from_handle(handle), col); + } + + /** + * @brief access the matrix using vertex/edge/face handle as a row index. + */ + template + __host__ __device__ const T& operator()(const HandleT handle, + const uint32_t col) const + { + return this->operator()(get_row_id_from_handle(handle), col); + } + + /** + * @brief return the row index corresponding to specific vertex/edge/face + * handle + */ + template + __host__ __device__ const uint32_t + get_row_id_from_handle(const HandleT handle) const + { + auto id = handle.unpack(); + + uint32_t row; + + if constexpr (std::is_same_v) { + row = m_context.vertex_prefix()[id.first] + id.second; + } + + if constexpr (std::is_same_v) { + row = m_context.edge_prefix()[id.first] + id.second; + } + + if constexpr (std::is_same_v) { + row = m_context.face_prefix()[id.first] + id.second; + } + + return row; + } + /** * @brief return the raw pointer based on the specified location (host vs. * device) @@ -231,6 +296,7 @@ struct DenseMatrix } + const Context m_context; cusparseDnMatDescr_t m_dendescr; locationT m_allocated; IndexT m_num_rows; diff --git a/include/rxmesh/matrix/sparse_matrix.cuh b/include/rxmesh/matrix/sparse_matrix.cuh index a454c832..6de49fc1 100644 --- a/include/rxmesh/matrix/sparse_matrix.cuh +++ b/include/rxmesh/matrix/sparse_matrix.cuh @@ -70,7 +70,7 @@ __global__ static void sparse_mat_prescan(const rxmesh::Context context, auto ids = v_id.unpack(); uint32_t patch_id = ids.first; uint16_t local_id = ids.second; - row_ptr[context.m_vertex_prefix[patch_id] + local_id] = iter.size() + 1; + row_ptr[context.vertex_prefix()[patch_id] + local_id] = iter.size() + 1; }; auto block = cooperative_groups::this_thread_block(); @@ -90,14 +90,14 @@ __global__ static void sparse_mat_col_fill(const rxmesh::Context context, auto ids = v_id.unpack(); uint32_t patch_id = ids.first; uint16_t local_id = ids.second; - col_idx[row_ptr[context.m_vertex_prefix[patch_id] + local_id]] = - context.m_vertex_prefix[patch_id] + local_id; + col_idx[row_ptr[context.vertex_prefix()[patch_id] + local_id]] = + context.vertex_prefix()[patch_id] + local_id; for (uint32_t v = 0; v < iter.size(); ++v) { auto s_ids = iter[v].unpack(); uint32_t s_patch_id = s_ids.first; uint16_t s_local_id = s_ids.second; - col_idx[row_ptr[context.m_vertex_prefix[patch_id] + local_id] + v + - 1] = context.m_vertex_prefix[s_patch_id] + s_local_id; + col_idx[row_ptr[context.vertex_prefix()[patch_id] + local_id] + v + + 1] = context.vertex_prefix()[s_patch_id] + s_local_id; } }; @@ -133,17 +133,17 @@ void permute_gather(IndexT* d_p, T* d_in, T* d_out, IndexT size) /** - * @brief Sparse matrix that represent the VV connectivity, i.e., it is a square - * matrix with number of rows/cols is equal to number of vertices and there is - * non-zero values at entry (i,j) only if the vertex i is connected to vertex j. - * The sparse matrix is stored as a CSR matrix and it is allocated on both host - * and device. The class also provides implementation for matrix-vector - * multiplication and linear solver—(using cuSolver and cuSparse as a back-end. + * @brief Device-only sparse matrix that represent the VV connectivity, i.e., it + * is a square matrix with number of rows/cols is equal to number of vertices + * and there is non-zero values at entry (i,j) only if the vertex i is connected + * to vertex j. The sparse matrix is stored as a CSR matrix. The class also + * provides implementation for matrix-vector multiplication and linear + * solver—(using cuSolver and cuSparse as a back-end. */ template struct SparseMatrix { - SparseMatrix(RXMeshStatic& rx) + SparseMatrix(const RXMeshStatic& rx) : m_d_row_ptr(nullptr), m_d_col_idx(nullptr), m_d_val(nullptr), @@ -166,6 +166,7 @@ struct SparseMatrix m_d_solver_val(nullptr), m_use_reorder(false), m_reorder_allocated(false), + m_d_cusparse_spmm_buffer(false), m_allocated(LOCATION_NONE) { using namespace rxmesh; @@ -373,22 +374,46 @@ struct SparseMatrix assert(1 != 1); } - - __device__ IndexT& get_row_ptr_at(IndexT idx) const + /** + * @brief return the row pointer of the CSR matrix + * @return + */ + __device__ __host__ const IndexT* row_ptr() const { - return m_d_row_ptr[idx]; +#ifdef __CUDA_ARCH__ + return m_d_row_ptr; +#else + return m_h_row_ptr; +#endif } - __device__ IndexT& get_col_idx_at(IndexT idx) const + /** + * @brief return the column index pointer of the CSR matrix + * @return + */ + __device__ __host__ const IndexT* col_idx() const { - return m_d_col_idx[idx]; +#ifdef __CUDA_ARCH__ + return m_d_col_idx; +#else + return m_h_col_idx; +#endif } - __device__ T& get_val_at(IndexT idx) const + /** + * @brief access the value of (1D array) array that holds the nnz in the CSR + * matrix + */ + __device__ __host__ T& get_val_at(IndexT idx) const { +#ifdef __CUDA_ARCH__ return m_d_val[idx]; +#else + return m_h_val[idx]; +#endif } + /** * @brief release all allocated memory */ @@ -406,6 +431,7 @@ struct SparseMatrix GPU_FREE(m_d_permute); free(m_h_permute); } + GPU_FREE(m_d_cusparse_spmm_buffer); } /** @@ -460,30 +486,27 @@ struct SparseMatrix * where A is the sparse matrix, B is a dense matrix, and the result is a * dense matrix C */ - __host__ void multiply_by_dense_matrix(rxmesh::DenseMatrix B_mat, - rxmesh::DenseMatrix C_mat, - cudaStream_t stream = 0) + __host__ void multiply_by_dense_matrix(rxmesh::DenseMatrix& B_mat, + rxmesh::DenseMatrix& C_mat, + cudaStream_t stream = 0) { float alpha = 1.0f; float beta = 0.0f; // A_mat.create_cusparse_handle(); - cusparseSpMatDescr_t matA = m_spdescr; - cusparseDnMatDescr_t matB = B_mat.m_dendescr; - cusparseDnMatDescr_t matC = C_mat.m_dendescr; - void* dBuffer = NULL; + cusparseSpMatDescr_t matA = m_spdescr; + cusparseDnMatDescr_t matB = B_mat.m_dendescr; + cusparseDnMatDescr_t matC = C_mat.m_dendescr; CUSPARSE_ERROR(cusparseSetStream(m_cusparse_handle, stream)); // allocate an external buffer if needed if (m_spmm_buffer_size == 0) { - RXMESH_WARN( - "Sparse matrix - Dense matrix multiplication buffer size not " - "initialized.", - "Calculate it now."); denmat_mul_buffer_size(B_mat, C_mat, stream); + CUDA_ERROR( + cudaMalloc(&m_d_cusparse_spmm_buffer, m_spmm_buffer_size)); } - CUDA_ERROR(cudaMalloc(&dBuffer, m_spmm_buffer_size)); + // execute SpMM CUSPARSE_ERROR(cusparseSpMM(m_cusparse_handle, @@ -496,9 +519,7 @@ struct SparseMatrix matC, CUDA_R_32F, CUSPARSE_SPMM_ALG_DEFAULT, - dBuffer)); - - CUDA_ERROR(cudaFree(dBuffer)); + m_d_cusparse_spmm_buffer)); } /** @@ -1061,8 +1082,8 @@ struct SparseMatrix uint32_t c_patch_id = c_ids.first; uint16_t c_local_id = c_ids.second; - uint32_t col_index = m_context.m_vertex_prefix[c_patch_id] + c_local_id; - uint32_t row_index = m_context.m_vertex_prefix[r_patch_id] + r_local_id; + uint32_t col_index = m_context.vertex_prefix()[c_patch_id] + c_local_id; + uint32_t row_index = m_context.vertex_prefix()[r_patch_id] + r_local_id; const IndexT start = m_d_row_ptr[row_index]; const IndexT end = m_d_row_ptr[row_index + 1]; @@ -1072,7 +1093,7 @@ struct SparseMatrix return i; } } - assert(1 != 1); + return 0; } @@ -1220,6 +1241,8 @@ struct SparseMatrix IndexT* m_d_solver_col_idx; T* m_d_solver_val; + void* m_d_cusparse_spmm_buffer; + // flags bool m_use_reorder; locationT m_allocated; diff --git a/include/rxmesh/rxmesh.cpp b/include/rxmesh/rxmesh.cpp index 0d02b61a..f132a6b0 100644 --- a/include/rxmesh/rxmesh.cpp +++ b/include/rxmesh/rxmesh.cpp @@ -103,6 +103,9 @@ void RXMesh::init(const std::vector>& fv, m_d_vertex_prefix, m_d_edge_prefix, m_d_face_prefix, + m_h_vertex_prefix, + m_h_edge_prefix, + m_h_face_prefix, max_lp_hashtable_capacity(), max_lp_hashtable_capacity(), max_lp_hashtable_capacity(), diff --git a/tests/RXMesh_test/test_sparse_matrix.cuh b/tests/RXMesh_test/test_sparse_matrix.cuh index bbc5b0fb..fc699cc2 100644 --- a/tests/RXMesh_test/test_sparse_matrix.cuh +++ b/tests/RXMesh_test/test_sparse_matrix.cuh @@ -15,7 +15,7 @@ __global__ static void sparse_mat_test(const rxmesh::Context context, auto ids = v_id.unpack(); uint32_t patch_id = ids.first; uint16_t local_id = ids.second; - vet_degree[context.m_vertex_prefix[patch_id] + local_id] = + vet_degree[context.vertex_prefix()[patch_id] + local_id] = iter.size() + 1; }; @@ -40,7 +40,7 @@ __global__ static void sparse_mat_edge_len_test( uint32_t r_patch_id = r_ids.first; uint16_t r_local_id = r_ids.second; - uint32_t row_index = context.m_vertex_prefix[r_patch_id] + r_local_id; + uint32_t row_index = context.vertex_prefix()[r_patch_id] + r_local_id; arr_ref[row_index] = 0; sparse_mat(v_id, v_id) = 0; @@ -71,10 +71,10 @@ __global__ void spmat_multi_hardwired_kernel(T* vec, int tid = threadIdx.x + blockIdx.x * blockDim.x; float sum = 0; if (tid < N) { - uint32_t start = sparse_mat.get_row_ptr_at(tid); - uint32_t end = sparse_mat.get_row_ptr_at(tid + 1); + uint32_t start = sparse_mat.row_ptr()[tid]; + uint32_t end = sparse_mat.row_ptr()[tid + 1]; for (int i = 0; i < end - start; i++) { - sum += vec[sparse_mat.get_col_idx_at(start + i)] * + sum += vec[sparse_mat.col_idx()[start + i]] * sparse_mat.get_val_at(start + i); } out[tid] = sum; @@ -96,19 +96,14 @@ __global__ static void simple_A_X_B_setup(const rxmesh::Context context, T v_weight = iter.size(); // reference value calculation - auto r_ids = v_id.unpack(); - uint32_t r_patch_id = r_ids.first; - uint16_t r_local_id = r_ids.second; - - uint32_t row_index = context.m_vertex_prefix[r_patch_id] + r_local_id; - B_mat(row_index, 0) = iter.size() * 7.4f; - B_mat(row_index, 1) = iter.size() * 2.6f; - B_mat(row_index, 2) = iter.size() * 10.3f; + B_mat(v_id, 0) = iter.size() * 7.4f; + B_mat(v_id, 1) = iter.size() * 2.6f; + B_mat(v_id, 2) = iter.size() * 10.3f; - X_mat(row_index, 0) = coords(v_id, 0) * v_weight; - X_mat(row_index, 1) = coords(v_id, 1) * v_weight; - X_mat(row_index, 2) = coords(v_id, 2) * v_weight; + X_mat(v_id, 0) = coords(v_id, 0) * v_weight; + X_mat(v_id, 1) = coords(v_id, 1) * v_weight; + X_mat(v_id, 2) = coords(v_id, 2) * v_weight; vec3 vi_coord(coords(v_id, 0), coords(v_id, 1), coords(v_id, 2)); for (uint32_t v = 0; v < iter.size(); ++v) { @@ -158,7 +153,7 @@ TEST(RXMeshStatic, SparseMatrix) CUDA_ERROR(cudaMalloc((void**)&d_result, (num_vertices) * sizeof(int))); SparseMatrix spmat(rx); - spmat.set_identity(); + spmat.set_ones(); spmat_multi_hardwired_kernel<<>>( d_arr_ones, spmat, d_result, num_vertices); @@ -289,9 +284,9 @@ TEST(RXMeshStatic, SparseMatrixSimpleSolve) auto coords = rx.get_input_vertex_coordinates(); SparseMatrix A_mat(rx); - DenseMatrix X_mat(num_vertices, 3); - DenseMatrix B_mat(num_vertices, 3); - DenseMatrix ret_mat(num_vertices, 3); + DenseMatrix X_mat(rx, num_vertices, 3); + DenseMatrix B_mat(rx, num_vertices, 3); + DenseMatrix ret_mat(rx, num_vertices, 3); float time_step = 1.f; @@ -352,9 +347,9 @@ TEST(RXMeshStatic, SparseMatrixLowerLevelAPISolve) auto coords = rx.get_input_vertex_coordinates(); SparseMatrix A_mat(rx); - DenseMatrix X_mat(num_vertices, 3); - DenseMatrix B_mat(num_vertices, 3); - DenseMatrix ret_mat(num_vertices, 3); + DenseMatrix X_mat(rx, num_vertices, 3); + DenseMatrix B_mat(rx, num_vertices, 3); + DenseMatrix ret_mat(rx, num_vertices, 3); float time_step = 1.f; @@ -374,7 +369,7 @@ TEST(RXMeshStatic, SparseMatrixLowerLevelAPISolve) A_mat.spmat_chol_buffer_alloc(); A_mat.spmat_chol_factor(); - for (int i = 0; i < B_mat.m_col_size; ++i) { + for (int i = 0; i < B_mat.cols(); ++i) { A_mat.spmat_chol_solve(B_mat.col_data(i), X_mat.col_data(i)); } From 6cda5f37ac9e2faed3d48cfd9f6f40ba921c07cb Mon Sep 17 00:00:00 2001 From: ahmed Date: Mon, 8 Jul 2024 12:49:50 -0400 Subject: [PATCH 18/96] accessing sparse matrix on the host --- include/rxmesh/matrix/sparse_matrix.cuh | 80 +++++++++++-------------- 1 file changed, 34 insertions(+), 46 deletions(-) diff --git a/include/rxmesh/matrix/sparse_matrix.cuh b/include/rxmesh/matrix/sparse_matrix.cuh index 6de49fc1..a0255591 100644 --- a/include/rxmesh/matrix/sparse_matrix.cuh +++ b/include/rxmesh/matrix/sparse_matrix.cuh @@ -133,12 +133,13 @@ void permute_gather(IndexT* d_p, T* d_in, T* d_out, IndexT size) /** - * @brief Device-only sparse matrix that represent the VV connectivity, i.e., it + * @brief Sparse matrix that represent the VV connectivity, i.e., it * is a square matrix with number of rows/cols is equal to number of vertices * and there is non-zero values at entry (i,j) only if the vertex i is connected - * to vertex j. The sparse matrix is stored as a CSR matrix. The class also - * provides implementation for matrix-vector multiplication and linear - * solver—(using cuSolver and cuSparse as a back-end. + * to vertex j. The sparse matrix is stored as a CSR matrix. The matrix is + * accessible on both host and device. The class also provides implementation + * for matrix-vector multiplication and linear solver—(using cuSolver and + * cuSparse as a back-end. */ template struct SparseMatrix @@ -324,51 +325,55 @@ struct SparseMatrix return m_nnz; } + /** * @brief access the matrix using VertexHandle */ - __device__ T& operator()(const VertexHandle& row_v, - const VertexHandle& col_v) + __device__ __host__ const T& operator()(const VertexHandle& row_v, + const VertexHandle& col_v) const { - return m_d_val[get_val_idx(row_v, col_v)]; + return this->operator()(get_row_id_from_handle(row_v), + get_row_id_from_handle(col_v)); } /** * @brief access the matrix using VertexHandle */ - __device__ T& operator()(const VertexHandle& row_v, - const VertexHandle& col_v) const + __device__ __host__ T& operator()(const VertexHandle& row_v, + const VertexHandle& col_v) { - return m_d_val[get_val_idx(row_v, col_v)]; + return this->operator()(get_row_id_from_handle(row_v), + get_row_id_from_handle(col_v)); } /** * @brief access the matrix using row and col index */ - __device__ T& operator()(const IndexT x, const IndexT y) + __device__ __host__ T& operator()(const IndexT x, const IndexT y) { - const IndexT start = m_d_row_ptr[x]; - const IndexT end = m_d_row_ptr[x + 1]; + const IndexT start = row_ptr()[x]; + const IndexT end = row_ptr()[x + 1]; for (IndexT i = start; i < end; ++i) { - if (m_d_col_idx[i] == y) { - return m_d_val[i]; + if (col_idx()[i] == y) { + return get_val_at(i); } - } + } assert(1 != 1); } /** * @brief access the matrix using row and col index */ - __device__ T& operator()(const IndexT x, const IndexT y) const + __device__ __host__ const T& operator()(const IndexT x, + const IndexT y) const { - const IndexT start = m_d_row_ptr[x]; - const IndexT end = m_d_row_ptr[x + 1]; + const IndexT start = row_ptr()[x]; + const IndexT end = row_ptr()[x + 1]; for (IndexT i = start; i < end; ++i) { - if (m_d_col_idx[i] == y) { - return m_d_val[i]; + if (col_idx()[i] == y) { + return get_val_at(i); } } assert(1 != 1); @@ -1069,34 +1074,17 @@ struct SparseMatrix } } - - private: - __device__ const IndexT get_val_idx(const VertexHandle& row_v, - const VertexHandle& col_v) const + /** + * @brief return the row index corresponding to specific vertex handle + */ + __device__ __host__ const uint32_t + get_row_id_from_handle(const VertexHandle& handle) const { - auto r_ids = row_v.unpack(); - uint32_t r_patch_id = r_ids.first; - uint16_t r_local_id = r_ids.second; - - auto c_ids = col_v.unpack(); - uint32_t c_patch_id = c_ids.first; - uint16_t c_local_id = c_ids.second; - - uint32_t col_index = m_context.vertex_prefix()[c_patch_id] + c_local_id; - uint32_t row_index = m_context.vertex_prefix()[r_patch_id] + r_local_id; - - const IndexT start = m_d_row_ptr[row_index]; - const IndexT end = m_d_row_ptr[row_index + 1]; - - for (IndexT i = start; i < end; ++i) { - if (m_d_col_idx[i] == col_index) { - return i; - } - } - return 0; + auto id = handle.unpack(); + return m_context.vertex_prefix()[id.first] + id.second; } - + private: void denmat_mul_buffer_size(rxmesh::DenseMatrix B_mat, rxmesh::DenseMatrix C_mat, cudaStream_t stream = 0) From 45fffb94cfde6bae6bc69267310f493d9a7a578b Mon Sep 17 00:00:00 2001 From: ahmed Date: Mon, 8 Jul 2024 20:34:03 -0400 Subject: [PATCH 19/96] refactor solver api --- apps/MCF/mcf_sparse_matrix.cuh | 2 +- include/rxmesh/matrix/dense_matrix.cuh | 3 + include/rxmesh/matrix/sparse_matrix.cuh | 452 ++++++++++++----------- tests/RXMesh_test/test_sparse_matrix.cuh | 10 +- 4 files changed, 254 insertions(+), 213 deletions(-) diff --git a/apps/MCF/mcf_sparse_matrix.cuh b/apps/MCF/mcf_sparse_matrix.cuh index a1f7fc25..130226bb 100644 --- a/apps/MCF/mcf_sparse_matrix.cuh +++ b/apps/MCF/mcf_sparse_matrix.cuh @@ -181,7 +181,7 @@ void mcf_rxmesh_cusolver_chol(rxmesh::RXMeshStatic& rx, Arg.time_step); // Solving the linear system using chol factorization and no reordering - A_mat.spmat_linear_solve(B_mat, *X_mat, Solver::CHOL, Reorder::NONE); + A_mat.solve(B_mat, *X_mat, Solver::CHOL, Reorder::NONE); // move the results to the host X_mat->move(rxmesh::DEVICE, rxmesh::HOST); diff --git a/include/rxmesh/matrix/dense_matrix.cuh b/include/rxmesh/matrix/dense_matrix.cuh index fd1fb448..761da8b7 100644 --- a/include/rxmesh/matrix/dense_matrix.cuh +++ b/include/rxmesh/matrix/dense_matrix.cuh @@ -17,6 +17,9 @@ namespace rxmesh { template struct DenseMatrix { + static_assert(std::is_same_v || std::is_same_v, + "RXMesh::DenseMatrix supports only float or double"); + template friend class SparseMatrix; diff --git a/include/rxmesh/matrix/sparse_matrix.cuh b/include/rxmesh/matrix/sparse_matrix.cuh index a0255591..fb04b3ee 100644 --- a/include/rxmesh/matrix/sparse_matrix.cuh +++ b/include/rxmesh/matrix/sparse_matrix.cuh @@ -40,6 +40,7 @@ enum class Reorder NSTDIS = 3 }; +namespace detail { static int reorder_to_int(const Reorder& reorder) { switch (reorder) { @@ -58,7 +59,6 @@ static int reorder_to_int(const Reorder& reorder) } } -namespace detail { // this is the function for the CSR calculation template __global__ static void sparse_mat_prescan(const rxmesh::Context context, @@ -167,9 +167,13 @@ struct SparseMatrix m_d_solver_val(nullptr), m_use_reorder(false), m_reorder_allocated(false), - m_d_cusparse_spmm_buffer(false), + m_d_cusparse_spmm_buffer(nullptr), + m_d_cusparse_spmv_buffer(nullptr), m_allocated(LOCATION_NONE) { + static_assert(std::is_same_v || std::is_same_v, + "RXMesh::SparseMatrix supports only float or double"); + using namespace rxmesh; constexpr uint32_t blockThreads = 256; @@ -358,7 +362,7 @@ struct SparseMatrix if (col_idx()[i] == y) { return get_val_at(i); } - } + } assert(1 != 1); } @@ -422,7 +426,7 @@ struct SparseMatrix /** * @brief release all allocated memory */ - void release() + __host__ void release() { release(LOCATION_ALL); CUSPARSE_ERROR(cusparseDestroy(m_cusparse_handle)); @@ -437,12 +441,15 @@ struct SparseMatrix free(m_h_permute); } GPU_FREE(m_d_cusparse_spmm_buffer); + GPU_FREE(m_d_cusparse_spmv_buffer); } /** * @brief move the data between host an device */ - void move(locationT source, locationT target, cudaStream_t stream = NULL) + __host__ void move(locationT source, + locationT target, + cudaStream_t stream = NULL) { if (source == target) { RXMESH_WARN( @@ -484,17 +491,59 @@ struct SparseMatrix } } + /** + * @brief allocate the temp buffer needed for sparse matrix multiplication + * by a dense matrix + */ + __host__ void alloc_multiply_buffer(rxmesh::DenseMatrix B_mat, + rxmesh::DenseMatrix C_mat, + cudaStream_t stream = 0) + { + T alpha = 1.0; + T beta = 0.0; + + cusparseSpMatDescr_t matA = m_spdescr; + cusparseDnMatDescr_t matB = B_mat.m_dendescr; + cusparseDnMatDescr_t matC = C_mat.m_dendescr; + void* dBuffer = NULL; + + CUSPARSE_ERROR(cusparseSetStream(m_cusparse_handle, stream)); + + CUSPARSE_ERROR(cusparseSpMM_bufferSize(m_cusparse_handle, + CUSPARSE_OPERATION_NON_TRANSPOSE, + CUSPARSE_OPERATION_NON_TRANSPOSE, + &alpha, + matA, + matB, + &beta, + matC, + CUDA_R_32F, + CUSPARSE_SPMM_ALG_DEFAULT, + &m_spmm_buffer_size)); + CUDA_ERROR(cudaMalloc(&m_d_cusparse_spmm_buffer, m_spmm_buffer_size)); + } + /** * @brief multiply the sparse matrix by a dense matrix. The function * performs the multiplication as * C = A*B * where A is the sparse matrix, B is a dense matrix, and the result is a - * dense matrix C + * dense matrix C. + * This method requires extra buffer allocation for cusparse. User may want + * to call first alloce_multiply_buffer() (with the same parameters) first + * to do the allocation and so timing this method will reflect the timing + * for the multiplication operation only. Otherwise, this method calls + * alloce_multiply_buffer() if it is not called before. Note that this + * allocation happens only once and we then reuse it */ - __host__ void multiply_by_dense_matrix(rxmesh::DenseMatrix& B_mat, - rxmesh::DenseMatrix& C_mat, - cudaStream_t stream = 0) + __host__ void multiply(rxmesh::DenseMatrix& B_mat, + rxmesh::DenseMatrix& C_mat, + cudaStream_t stream = 0) { + assert(cols() == B_mat.cols()); + assert(rows() == C_mat.rows()); + assert(B_mat.cols() == C_mat.cols()); + float alpha = 1.0f; float beta = 0.0f; @@ -506,10 +555,8 @@ struct SparseMatrix CUSPARSE_ERROR(cusparseSetStream(m_cusparse_handle, stream)); // allocate an external buffer if needed - if (m_spmm_buffer_size == 0) { - denmat_mul_buffer_size(B_mat, C_mat, stream); - CUDA_ERROR( - cudaMalloc(&m_d_cusparse_spmm_buffer, m_spmm_buffer_size)); + if (m_d_cusparse_spmm_buffer == nullptr) { + alloc_multiply_buffer(B_mat, C_mat, stream); } @@ -528,17 +575,63 @@ struct SparseMatrix } /** - * @brief wrap up the cusparse api for sparse matrix array - * multiplication. + * @brief allocate the temp buffer needed for sparse matrix multiplication + * by a dense vector + */ + __host__ void alloc_multiply_buffer(T* in_arr, + T* rt_arr, + cudaStream_t stream = 0) + { + const T alpha = 1.0; + const T beta = 0.0; + + cusparseDnVecDescr_t vecx = NULL; + cusparseDnVecDescr_t vecy = NULL; + + CUSPARSE_ERROR( + cusparseCreateDnVec(&vecx, m_num_cols, in_arr, CUDA_R_32F)); + CUSPARSE_ERROR( + cusparseCreateDnVec(&vecy, m_num_rows, rt_arr, CUDA_R_32F)); + + CUSPARSE_ERROR(cusparseSpMV_bufferSize(m_cusparse_handle, + CUSPARSE_OPERATION_NON_TRANSPOSE, + &alpha, + m_spdescr, + vecx, + &beta, + vecy, + CUDA_R_32F, + CUSPARSE_SPMV_ALG_DEFAULT, + &m_spmv_buffer_size)); + CUSPARSE_ERROR(cusparseDestroyDnVec(vecx)); + CUSPARSE_ERROR(cusparseDestroyDnVec(vecy)); + + CUDA_ERROR(cudaMalloc(&m_d_cusparse_spmv_buffer, m_spmv_buffer_size)); + } + + + /** + * @brief multiply the sparse matrix by a dense vector. The function + * performs the multiplication as + * Y = A*X + * where A is the sparse matrix, X is a dense vector, and the result is a + * dense vector Y. + * This method requires extra buffer allocation for cusparse. User may want + * to call first alloce_multiply_buffer() (with the same parameters) first + * to do the allocation and so timing this method will reflect the timing + * for the multiplication operation only. Otherwise, this method calls + * alloce_multiply_buffer() if it is not called before. Note that this + * allocation happens only once and we then reuse it + * TODO allow this function to take a DenseMatrix instead that represent a + * dense vector, i.e., one column with multiple rows */ - void arr_mul(T* in_arr, T* rt_arr, cudaStream_t stream = 0) + __host__ void multiply(T* in_arr, T* rt_arr, cudaStream_t stream = 0) { - const float alpha = 1.0f; - const float beta = 0.0f; + const T alpha = 1.0; + const T beta = 0.0; - void* buffer = NULL; - cusparseDnVecDescr_t vecx = NULL; - cusparseDnVecDescr_t vecy = NULL; + cusparseDnVecDescr_t vecx = NULL; + cusparseDnVecDescr_t vecy = NULL; CUSPARSE_ERROR( cusparseCreateDnVec(&vecx, m_num_cols, in_arr, CUDA_R_32F)); @@ -547,15 +640,10 @@ struct SparseMatrix CUSPARSE_ERROR(cusparseSetStream(m_cusparse_handle, stream)); - if (m_spmv_buffer_size == 0) { - RXMESH_WARN( - "Sparse matrix - Array multiplication buffer size not " - "initialized." - "Calculate it now."); - arr_mul_buffer_size(in_arr, rt_arr, stream); + if (m_d_cusparse_spmv_buffer == nullptr) { + alloc_multiply_buffer(in_arr, rt_arr, stream); } - CUDA_ERROR(cudaMalloc(&buffer, m_spmv_buffer_size)); CUSPARSE_ERROR(cusparseSpMV(m_cusparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, @@ -566,36 +654,43 @@ struct SparseMatrix vecy, CUDA_R_32F, CUSPARSE_SPMV_ALG_DEFAULT, - buffer)); + m_d_cusparse_spmv_buffer)); CUSPARSE_ERROR(cusparseDestroyDnVec(vecx)); CUSPARSE_ERROR(cusparseDestroyDnVec(vecy)); - CUDA_ERROR(cudaFree(buffer)); } /** - * @brief do the sparse matrix dense matrix multiplication using sparse - * matrix array multiplication in a column wise way + * @brief multiply the sparse matrix by a dense matrix. The function + * performs the multiplication as + * C = A*B + * where A is the sparse matrix, B is a dense matrix, and the result is a + * dense matrix C. + * This is similar to the multiply() function above but instead of extract + * the columns for B and multiply them separately as sparse matrix dense + * vector multiplication */ - void spmat_denmat_mul_cw(rxmesh::DenseMatrix B_mat, - rxmesh::DenseMatrix C_mat) + void multiply_cw(rxmesh::DenseMatrix B_mat, + rxmesh::DenseMatrix C_mat, + cudaStream_t stream = 0) { + assert(cols() == B_mat.cols()); + assert(rows() == C_mat.rows()); + assert(B_mat.cols() == C_mat.cols()); + for (int i = 0; i < B_mat.m_num_cols; ++i) { - arr_mul(B_mat.col_data(i), C_mat.col_data(i)); + multiply(B_mat.col_data(i), C_mat.col_data(i), stream); } } - - /* ----- SOLVER ----- */ - - /* --- HIGH LEVEL API --- */ + /** * @brief solve the Ax=b for x where x and b are all array */ - void spmat_linear_solve(T* B_arr, - T* X_arr, - rxmesh::Solver solver, - rxmesh::Reorder reorder) + void solve(T* B_arr, + T* X_arr, + rxmesh::Solver solver, + rxmesh::Reorder reorder) { cusparse_linear_solver_wrapper(solver, reorder, @@ -615,10 +710,10 @@ struct SparseMatrix * @brief solve the AX=B for X where X and B are all dense matrix and we * would solve it in a column wise manner */ - void spmat_linear_solve(rxmesh::DenseMatrix B_mat, - rxmesh::DenseMatrix X_mat, - rxmesh::Solver solver, - rxmesh::Reorder reorder) + void solve(rxmesh::DenseMatrix B_mat, + rxmesh::DenseMatrix X_mat, + rxmesh::Solver solver, + rxmesh::Reorder reorder) { for (int i = 0; i < B_mat.cols(); ++i) { cusparse_linear_solver_wrapper(solver, @@ -636,111 +731,6 @@ struct SparseMatrix } } - /** - * @brief wrap up the cusolver api for solving linear systems. This is a - * lower level api - */ - void cusparse_linear_solver_wrapper(const rxmesh::Solver solver, - const rxmesh::Reorder reorder, - cusolverSpHandle_t handle, - cusparseMatDescr_t descrA, - int rowsA, - int colsA, - int nnzA, - int* d_csrRowPtrA, - int* d_csrColIndA, - T* d_csrValA, - T* d_b, - T* d_x) - { - if constexpr ((!std::is_same_v)&&( - !std::is_same_v)) { - RXMESH_ERROR( - "Unsupported type for cusparse: {}" - "Only float and double are supported", - typeid(T).name()); - } - - double tol = 1.e-12; - int singularity = 0; /* -1 if A is invertible under tol. */ - - /* solve B*z = Q*b */ - if (solver == Solver::CHOL) { - if constexpr (std::is_same_v) { - CUSOLVER_ERROR(cusolverSpScsrlsvchol(handle, - rowsA, - nnzA, - descrA, - d_csrValA, - d_csrRowPtrA, - d_csrColIndA, - d_b, - tol, - reorder_to_int(reorder), - d_x, - &singularity)); - } - - if constexpr (std::is_same_v) { - CUSOLVER_ERROR(cusolverSpDcsrlsvchol(handle, - rowsA, - nnzA, - descrA, - d_csrValA, - d_csrRowPtrA, - d_csrColIndA, - d_b, - tol, - reorder_to_int(reorder), - d_x, - &singularity)); - } - - } else if (solver == Solver::QR) { - if constexpr (std::is_same_v) { - CUSOLVER_ERROR(cusolverSpScsrlsvqr(handle, - rowsA, - nnzA, - descrA, - d_csrValA, - d_csrRowPtrA, - d_csrColIndA, - d_b, - tol, - reorder_to_int(reorder), - d_x, - &singularity)); - } - - if constexpr (std::is_same_v) { - CUSOLVER_ERROR(cusolverSpDcsrlsvqr(handle, - rowsA, - nnzA, - descrA, - d_csrValA, - d_csrRowPtrA, - d_csrColIndA, - d_b, - tol, - reorder_to_int(reorder), - d_x, - &singularity)); - } - } else { - RXMESH_ERROR( - "Only Solver::CHOL and Solver::QR is supported, use CUDA 12.x " - "for " - "Solver::LU"); - } - CUDA_ERROR(cudaDeviceSynchronize()); - - if (0 <= singularity) { - RXMESH_WARN( - "WARNING: the matrix is singular at row {} under tol ({})", - singularity, - tol); - } - } /* --- LOW LEVEL API --- */ @@ -1085,61 +1075,6 @@ struct SparseMatrix } private: - void denmat_mul_buffer_size(rxmesh::DenseMatrix B_mat, - rxmesh::DenseMatrix C_mat, - cudaStream_t stream = 0) - { - float alpha = 1.0f; - float beta = 0.0f; - - cusparseSpMatDescr_t matA = m_spdescr; - cusparseDnMatDescr_t matB = B_mat.m_dendescr; - cusparseDnMatDescr_t matC = C_mat.m_dendescr; - void* dBuffer = NULL; - - CUSPARSE_ERROR(cusparseSetStream(m_cusparse_handle, stream)); - - // allocate an external buffer if needed - CUSPARSE_ERROR(cusparseSpMM_bufferSize(m_cusparse_handle, - CUSPARSE_OPERATION_NON_TRANSPOSE, - CUSPARSE_OPERATION_NON_TRANSPOSE, - &alpha, - matA, - matB, - &beta, - matC, - CUDA_R_32F, - CUSPARSE_SPMM_ALG_DEFAULT, - &m_spmm_buffer_size)); - } - - - void arr_mul_buffer_size(T* in_arr, T* rt_arr, cudaStream_t stream = 0) - { - const float alpha = 1.0f; - const float beta = 0.0f; - - cusparseDnVecDescr_t vecx = NULL; - cusparseDnVecDescr_t vecy = NULL; - - CUSPARSE_ERROR( - cusparseCreateDnVec(&vecx, m_num_cols, in_arr, CUDA_R_32F)); - CUSPARSE_ERROR( - cusparseCreateDnVec(&vecy, m_num_rows, rt_arr, CUDA_R_32F)); - - CUSPARSE_ERROR(cusparseSpMV_bufferSize(m_cusparse_handle, - CUSPARSE_OPERATION_NON_TRANSPOSE, - &alpha, - m_spdescr, - vecx, - &beta, - vecy, - CUDA_R_32F, - CUSPARSE_SPMV_ALG_DEFAULT, - &m_spmv_buffer_size)); - } - - void release(locationT location) { if (((location & HOST) == HOST) && ((m_allocated & HOST) == HOST)) { @@ -1187,6 +1122,108 @@ struct SparseMatrix } } + /** + * @brief wrapper for cuSolver API for solving linear systems using cuSolver + * High-level API + */ + void cusparse_linear_solver_wrapper(const rxmesh::Solver solver, + const rxmesh::Reorder reorder, + cusolverSpHandle_t handle, + cusparseMatDescr_t descrA, + int rowsA, + int colsA, + int nnzA, + int* d_csrRowPtrA, + int* d_csrColIndA, + T* d_csrValA, + T* d_b, + T* d_x) + { + double tol = 1.e-12; + + int singularity = 0; /* -1 if A is invertible under tol. */ + + /* solve B*z = Q*b */ + if (solver == Solver::CHOL) { + if constexpr (std::is_same_v) { + CUSOLVER_ERROR( + cusolverSpScsrlsvchol(handle, + rowsA, + nnzA, + descrA, + d_csrValA, + d_csrRowPtrA, + d_csrColIndA, + d_b, + tol, + detail::reorder_to_int(reorder), + d_x, + &singularity)); + } + + if constexpr (std::is_same_v) { + CUSOLVER_ERROR( + cusolverSpDcsrlsvchol(handle, + rowsA, + nnzA, + descrA, + d_csrValA, + d_csrRowPtrA, + d_csrColIndA, + d_b, + tol, + detail::reorder_to_int(reorder), + d_x, + &singularity)); + } + + } else if (solver == Solver::QR) { + if constexpr (std::is_same_v) { + CUSOLVER_ERROR( + cusolverSpScsrlsvqr(handle, + rowsA, + nnzA, + descrA, + d_csrValA, + d_csrRowPtrA, + d_csrColIndA, + d_b, + tol, + detail::reorder_to_int(reorder), + d_x, + &singularity)); + } + + if constexpr (std::is_same_v) { + CUSOLVER_ERROR( + cusolverSpDcsrlsvqr(handle, + rowsA, + nnzA, + descrA, + d_csrValA, + d_csrRowPtrA, + d_csrColIndA, + d_b, + tol, + detail::reorder_to_int(reorder), + d_x, + &singularity)); + } + } else { + RXMESH_ERROR( + "Only Solver::CHOL and Solver::QR is supported, use CUDA 12.x " + "for Solver::LU"); + } + + + if (0 <= singularity) { + RXMESH_WARN( + "WARNING: the matrix is singular at row {} under tol ({})", + singularity, + tol); + } + } + const Context m_context; cusparseHandle_t m_cusparse_handle; cusolverSpHandle_t m_cusolver_sphandle; @@ -1230,6 +1267,7 @@ struct SparseMatrix T* m_d_solver_val; void* m_d_cusparse_spmm_buffer; + void* m_d_cusparse_spmv_buffer; // flags bool m_use_reorder; diff --git a/tests/RXMesh_test/test_sparse_matrix.cuh b/tests/RXMesh_test/test_sparse_matrix.cuh index fc699cc2..95ea3dac 100644 --- a/tests/RXMesh_test/test_sparse_matrix.cuh +++ b/tests/RXMesh_test/test_sparse_matrix.cuh @@ -237,7 +237,7 @@ TEST(RXMeshStatic, SparseMatrixEdgeLen) launch_box.smem_bytes_dyn>>>( rx.get_context(), *coords, spmat, d_arr_ref); - spmat.arr_mul(d_arr_ones, d_result); + spmat.multiply(d_arr_ones, d_result); // copy the value back to host std::vector h_arr_ref(num_vertices); @@ -299,13 +299,13 @@ TEST(RXMeshStatic, SparseMatrixSimpleSolve) launch_box.smem_bytes_dyn>>>( rx.get_context(), *coords, A_mat, X_mat, B_mat, time_step); - A_mat.spmat_linear_solve(B_mat, X_mat, Solver::CHOL, Reorder::NSTDIS); + A_mat.solve(B_mat, X_mat, Solver::CHOL, Reorder::NSTDIS); // timing begins for spmm GPUTimer timer; timer.start(); - A_mat.multiply_by_dense_matrix(X_mat, ret_mat); + A_mat.multiply(X_mat, ret_mat); timer.stop(); RXMESH_TRACE("SPMM_rxmesh() took {} (ms) ", timer.elapsed_millis()); @@ -362,7 +362,7 @@ TEST(RXMeshStatic, SparseMatrixLowerLevelAPISolve) launch_box.smem_bytes_dyn>>>( rx.get_context(), *coords, A_mat, X_mat, B_mat, time_step); - // A_mat.spmat_linear_solve(B_mat, X_mat, Solver::CHOL, Reorder::NSTDIS); + // A_mat.solve(B_mat, X_mat, Solver::CHOL, Reorder::NSTDIS); A_mat.spmat_chol_reorder(Reorder::NSTDIS); A_mat.spmat_chol_analysis(); @@ -375,7 +375,7 @@ TEST(RXMeshStatic, SparseMatrixLowerLevelAPISolve) A_mat.spmat_chol_buffer_free(); - A_mat.multiply_by_dense_matrix(X_mat, ret_mat); + A_mat.multiply(X_mat, ret_mat); std::vector> h_ret_mat(num_vertices); CUDA_ERROR(cudaMemcpy(h_ret_mat.data(), From d9e5aa68325c2445aca8238fd224de29c083f0cc Mon Sep 17 00:00:00 2001 From: ahmed Date: Mon, 8 Jul 2024 20:53:57 -0400 Subject: [PATCH 20/96] renaming mcf files --- apps/MCF/CMakeLists.txt | 14 +- apps/MCF/{mcf_rxmesh.h => mcf_cg.h} | 25 +- ...parse_matrix.cuh => mcf_cusolver_chol.cuh} | 5 +- ...{mcf_rxmesh_kernel.cuh => mcf_kernels.cuh} | 0 apps/MCF/mcf_openmesh.h | 470 ------------------ 5 files changed, 9 insertions(+), 505 deletions(-) rename apps/MCF/{mcf_rxmesh.h => mcf_cg.h} (92%) rename apps/MCF/{mcf_sparse_matrix.cuh => mcf_cusolver_chol.cuh} (97%) rename apps/MCF/{mcf_rxmesh_kernel.cuh => mcf_kernels.cuh} (100%) delete mode 100644 apps/MCF/mcf_openmesh.h diff --git a/apps/MCF/CMakeLists.txt b/apps/MCF/CMakeLists.txt index b242058b..99706293 100644 --- a/apps/MCF/CMakeLists.txt +++ b/apps/MCF/CMakeLists.txt @@ -2,15 +2,9 @@ add_executable(MCF) set(SOURCE_LIST mcf.cu - mcf_rxmesh_kernel.cuh - mcf_openmesh.h - mcf_rxmesh.h - mcf_sparse_matrix.cuh -) - -set(COMMON_LIST - ../common/openmesh_trimesh.h - ../common/openmesh_report.h + mcf_kernels.cuh + mcf_cg.h + mcf_cusolver_chol.cuh ) target_sources(MCF @@ -35,8 +29,6 @@ source_group(TREE ${CMAKE_CURRENT_LIST_DIR} PREFIX "MCF" FILES ${SOURCE_LIST}) target_link_libraries( MCF PRIVATE RXMesh PRIVATE gtest_main - PRIVATE OpenMeshCore - PRIVATE OpenMeshTools ) #gtest_discover_tests( MCF ) \ No newline at end of file diff --git a/apps/MCF/mcf_rxmesh.h b/apps/MCF/mcf_cg.h similarity index 92% rename from apps/MCF/mcf_rxmesh.h rename to apps/MCF/mcf_cg.h index 138b5c13..9e649e14 100644 --- a/apps/MCF/mcf_rxmesh.h +++ b/apps/MCF/mcf_cg.h @@ -1,15 +1,13 @@ #pragma once #include -#include "mcf_rxmesh_kernel.cuh" #include "rxmesh/attribute.h" #include "rxmesh/reduce_handle.h" #include "rxmesh/rxmesh_static.h" #include "rxmesh/util/report.h" #include "rxmesh/util/timer.h" - -#include "mcf_sparse_matrix.cuh" +#include "mcf_kernels.cuh" template void axpy(rxmesh::RXMeshStatic& rx, @@ -49,8 +47,7 @@ void init_PR(rxmesh::RXMeshStatic& rx, } template -void mcf_rxmesh_cg(rxmesh::RXMeshStatic& rx, - const std::vector>& ground_truth) +void mcf_cg(rxmesh::RXMeshStatic& rx) { using namespace rxmesh; constexpr uint32_t blockThreads = 256; @@ -224,22 +221,6 @@ void mcf_rxmesh_cg(rxmesh::RXMeshStatic& rx, // output to obj // rxmesh.export_obj("mcf_rxmesh.obj", *X); - // Verify - const T tol = 0.001; - bool passed = true; - rx.for_each_vertex(HOST, [&](const VertexHandle& vh) { - uint32_t v_id = rx.map_to_global(vh); - - for (uint32_t i = 0; i < 3; ++i) { - if (std::abs(((*X)(vh, i) - ground_truth[v_id][i]) / - ground_truth[v_id][i]) > tol) { - passed = false; - break; - } - } - }); - - EXPECT_TRUE(passed); // Finalize report report.add_member("start_residual", delta_0); @@ -254,7 +235,7 @@ void mcf_rxmesh_cg(rxmesh::RXMeshStatic& rx, td.dyn_smem = launch_box_matvec.smem_bytes_dyn; td.static_smem = launch_box_matvec.smem_bytes_static; td.num_reg = launch_box_matvec.num_registers_per_thread; - td.passed.push_back(passed); + td.passed.push_back(true); td.time_ms.push_back(timer.elapsed_millis() / float(num_cg_iter_taken)); report.add_test(td); report.write(Arg.output_folder + "/rxmesh", diff --git a/apps/MCF/mcf_sparse_matrix.cuh b/apps/MCF/mcf_cusolver_chol.cuh similarity index 97% rename from apps/MCF/mcf_sparse_matrix.cuh rename to apps/MCF/mcf_cusolver_chol.cuh index 130226bb..82a751bc 100644 --- a/apps/MCF/mcf_sparse_matrix.cuh +++ b/apps/MCF/mcf_cusolver_chol.cuh @@ -4,6 +4,8 @@ #include "rxmesh/matrix/sparse_matrix.cuh" #include "rxmesh/rxmesh_static.h" +#include "mcf_kernels.cuh" + template __global__ static void mcf_B_setup(const rxmesh::Context context, const rxmesh::VertexAttribute coords, @@ -131,8 +133,7 @@ __global__ static void mcf_A_setup( } template -void mcf_rxmesh_cusolver_chol(rxmesh::RXMeshStatic& rx, - const std::vector>& ground_truth) +void mcf_cusolver_chol(rxmesh::RXMeshStatic& rx) { using namespace rxmesh; constexpr uint32_t blockThreads = 256; diff --git a/apps/MCF/mcf_rxmesh_kernel.cuh b/apps/MCF/mcf_kernels.cuh similarity index 100% rename from apps/MCF/mcf_rxmesh_kernel.cuh rename to apps/MCF/mcf_kernels.cuh diff --git a/apps/MCF/mcf_openmesh.h b/apps/MCF/mcf_openmesh.h deleted file mode 100644 index 9550d5ea..00000000 --- a/apps/MCF/mcf_openmesh.h +++ /dev/null @@ -1,470 +0,0 @@ -#pragma once -#include "../common/openmesh_report.h" -#include "../common/openmesh_trimesh.h" -#include "rxmesh/util/timer.h" - -#include "rxmesh/geometry_util.cuh" - -/** - * axpy3() - */ -template -void axpy3(const std::vector>& X, - const T alpha, - const T beta, - std::vector>& Y, - const int num_omp_threads) -{ - // Y = beta*Y + alpha*X - - int size = static_cast(X.size()); -#pragma omp parallel for schedule(static) num_threads(num_omp_threads) - for (int i = 0; i < size; ++i) { - Y[i][0] *= beta; - Y[i][1] *= beta; - Y[i][2] *= beta; - - Y[i][0] += alpha * X[i][0]; - Y[i][1] += alpha * X[i][1]; - Y[i][2] += alpha * X[i][2]; - } -} - -/** - * dot3() - */ -template -T dot3(const std::vector>& A, - const std::vector>& B, - const int num_omp_threads) -{ - - T ret = 0; - int size = static_cast(A.size()); -#pragma omp parallel for schedule(static) num_threads(num_omp_threads) \ - reduction(+ : ret) - for (int i = 0; i < size; ++i) { - T partial = 0; - for (size_t j = 0; j < A[i].size(); ++j) { - partial += A[i][j] * B[i][j]; - } - ret += partial; - } - - return ret; -} - -/** - * partial_voronoi_area() - */ -template -T partial_voronoi_area(const int p_id, // center - const int q_id, // before center - const int r_id, // after center - const TriMesh& mesh) -{ - // compute partial Voronoi area of the center vertex that is associated with - // the triangle p->q->r (oriented ccw) - - TriMesh::VertexIter p_it = mesh.vertices_begin() + p_id; - TriMesh::VertexIter q_it = mesh.vertices_begin() + q_id; - TriMesh::VertexIter r_it = mesh.vertices_begin() + r_id; - - assert((*p_it).idx() == p_id); - assert((*q_it).idx() == q_id); - assert((*r_it).idx() == r_id); - - const rxmesh::vec3 p( - mesh.point(*p_it)[0], mesh.point(*p_it)[1], mesh.point(*p_it)[2]); - const rxmesh::vec3 q( - mesh.point(*q_it)[0], mesh.point(*q_it)[1], mesh.point(*q_it)[2]); - const rxmesh::vec3 r( - mesh.point(*r_it)[0], mesh.point(*r_it)[1], mesh.point(*r_it)[2]); - - return rxmesh::partial_voronoi_area(p, q, r); -} - -/** - * edge_cotan_weight() - */ - -template -T edge_cotan_weight(const int p_id, - const int r_id, - const int q_id, - const int s_id, - const TriMesh& mesh) -{ - // Get the edge weight between the two verteices p-r where - // q and s composes the diamond around p-r - - TriMesh::VertexIter p_it = mesh.vertices_begin() + p_id; - TriMesh::VertexIter r_it = mesh.vertices_begin() + r_id; - TriMesh::VertexIter q_it = mesh.vertices_begin() + q_id; - TriMesh::VertexIter s_it = mesh.vertices_begin() + s_id; - - const rxmesh::vec3 p( - mesh.point(*p_it)[0], mesh.point(*p_it)[1], mesh.point(*p_it)[2]); - const rxmesh::vec3 r( - mesh.point(*r_it)[0], mesh.point(*r_it)[1], mesh.point(*r_it)[2]); - const rxmesh::vec3 q( - mesh.point(*q_it)[0], mesh.point(*q_it)[1], mesh.point(*q_it)[2]); - const rxmesh::vec3 s( - mesh.point(*s_it)[0], mesh.point(*s_it)[1], mesh.point(*s_it)[2]); - - return rxmesh::edge_cotan_weight(p, r, q, s); -} - - -template -void mcf_matvec(TriMesh& mesh, - const std::vector>& in, - std::vector>& out, - const int num_omp_threads) -{ - // Matrix vector multiplication operation based on uniform Laplacian weight - // defined in Equation 7 in Implicit Fairing of Irregular Meshes using - // Diffusion and Curvature Flow paper - - // Ideally we should compute the vertex weight first in one loop over the - // one-ring and then do another loop to do the matvect operation. We choose - // to optimize this by saving one loop and incrementally compute the vertex - // weight. Note the vertex weight in case of uniform Laplace is the valence - // inversed, otherwise it is 0.5/voronoi_area. We build this voronoi_area - // incrementally which makes the code looks a bit ugly. - - // To compute the vertex cotan weight, we use the following configuration - // where P is the center vertex we want to compute vertex weight for. - // Looping over P's one ring should gives q->r->s. - /* r - / | \ - / | \ - s | q - \ | / - \ | / - p - */ - -#pragma omp parallel for schedule(static) num_threads(num_omp_threads) - for (int p_id = 0; p_id < int(mesh.n_vertices()); ++p_id) { - TriMesh::VertexIter p_iter = mesh.vertices_begin() + p_id; - - // Off-diagonal entries - rxmesh::vec3 x(T(0)); - T sum_e_weight(0); - - // vertex weight - T v_weight(0); - - // The last vertex in the one ring - TriMesh::VertexVertexIter q_iter = mesh.vv_iter(*p_iter); - --q_iter; - assert(q_iter.is_valid()); - - // the second vertex in the one ring - TriMesh::VertexVertexIter s_iter = mesh.vv_iter(*p_iter); - ++s_iter; - assert(s_iter.is_valid()); - - for (TriMesh::VertexVertexIter r_iter = mesh.vv_iter(*p_iter); - r_iter.is_valid(); - ++r_iter) { - - int r_id = (*r_iter).idx(); - - - T e_weight = 0; - if (Arg.use_uniform_laplace) { - e_weight = 1; - } else { - e_weight = std::max( - T(0.0), - edge_cotan_weight( - p_id, r_id, (*q_iter).idx(), (*s_iter).idx(), mesh)); - ++s_iter; - } - - e_weight *= static_cast(Arg.time_step); - sum_e_weight += e_weight; - - x[0] -= e_weight * in[r_id][0]; - x[1] -= e_weight * in[r_id][1]; - x[2] -= e_weight * in[r_id][2]; - - if (Arg.use_uniform_laplace) { - ++v_weight; - } else { - T tri_area = - partial_voronoi_area(p_id, (*q_iter).idx(), r_id, mesh); - - v_weight += (tri_area > 0) ? tri_area : 0; - - q_iter++; - assert(q_iter == r_iter); - } - } - - // Diagonal entry - if (Arg.use_uniform_laplace) { - v_weight = 1.0 / v_weight; - } else { - v_weight = 0.5 / v_weight; - } - - assert(!std::isnan(v_weight)); - assert(!std::isinf(v_weight)); - - T diag = ((1.0 / v_weight) + sum_e_weight); - out[p_id][0] = x[0] + diag * in[p_id][0]; - out[p_id][1] = x[1] + diag * in[p_id][1]; - out[p_id][2] = x[2] + diag * in[p_id][2]; - } -} - - -/** - * cg() - */ -template -void cg(TriMesh& mesh, - std::vector>& X, - std::vector>& B, - std::vector>& R, - std::vector>& P, - std::vector>& S, - uint32_t& num_cg_iter_taken, - T& start_residual, - T& stop_residual, - const int num_omp_threads) -{ - // CG solver. Solve for the three coordinates simultaneously - - // s = Ax - mcf_matvec(mesh, X, S, num_omp_threads); - - - // r = b - s = b - Ax - // p = r -#pragma omp parallel for schedule(static) num_threads(num_omp_threads) - for (int i = 0; i < int(mesh.n_vertices()); ++i) { - R[i][0] = B[i][0] - S[i][0]; - R[i][1] = B[i][1] - S[i][1]; - R[i][2] = B[i][2] - S[i][2]; - - P[i][0] = R[i][0]; - P[i][1] = R[i][1]; - P[i][2] = R[i][2]; - } - - // delta_new = - T delta_new = dot3(R, R, num_omp_threads); - - // delta_0 = delta_new - const T delta_0(delta_new); - - start_residual = delta_0; - uint32_t iter = 0; - while (iter < Arg.max_num_cg_iter) { - // s = Ap - mcf_matvec(mesh, P, S, num_omp_threads); - - // alpha = delta_new / - T alpha = dot3(S, P, num_omp_threads); - alpha = delta_new / alpha; - - - // x = x + alpha*p - axpy3(P, alpha, T(1), X, num_omp_threads); - - // r = r - alpha*s - axpy3(S, -alpha, T(1), R, num_omp_threads); - - // delta_old = delta_new - T delta_old(delta_new); - - // delta_new = - delta_new = dot3(R, R, num_omp_threads); - - // beta = delta_new/delta_old - T beta(delta_new / delta_old); - - // exit if error is getting too low across three coordinates - if (delta_new < Arg.cg_tolerance * Arg.cg_tolerance * delta_0) { - break; - } - - // p = beta*p + r - axpy3(R, T(1), beta, P, num_omp_threads); - - ++iter; - } - num_cg_iter_taken = iter; - stop_residual = delta_new; -} - -/** - * implicit_smoothing() - */ -template -void implicit_smoothing(TriMesh& mesh, - std::vector>& X, - uint32_t& num_cg_iter_taken, - float& time, - T& start_residual, - T& stop_residual, - const int num_omp_threads) -{ - - for (TriMesh::VertexIter v_it = mesh.vertices_begin(); - v_it != mesh.vertices_end(); - ++v_it) { - ASSERT_FALSE(mesh.is_boundary(*v_it)) - << "OpenMesh MCF only takes watertight/closed mesh without " - "boundaries"; - } - - // CG containers - std::vector> B(X), R(X), P(X), S(X); - -#pragma omp parallel for - for (int v_id = 0; v_id < mesh.n_vertices(); ++v_id) { - TriMesh::VertexIter v_iter = mesh.vertices_begin() + v_id; - - // LHS - X[v_id][0] = mesh.point(*v_iter)[0]; - X[v_id][1] = mesh.point(*v_iter)[1]; - X[v_id][2] = mesh.point(*v_iter)[2]; - - // RHS - T v_weight = 1; - - if (Arg.use_uniform_laplace) { - v_weight = static_cast(mesh.valence(*v_iter)); - } - // will fix it later for cotan weight - - B[v_id][0] = X[v_id][0] * v_weight; - B[v_id][1] = X[v_id][1] * v_weight; - B[v_id][2] = X[v_id][2] * v_weight; - } - - if (!Arg.use_uniform_laplace) { - // fix RHS (B) -#pragma omp parallel for - for (int v_id = 0; v_id < int(mesh.n_vertices()); ++v_id) { - TriMesh::VertexIter v_iter = mesh.vertices_begin() + v_id; - - T v_weight(0); - - TriMesh::VertexVertexIter q_iter = mesh.vv_iter(*v_iter); - --q_iter; - assert(q_iter.is_valid()); - - for (TriMesh::VertexVertexIter vv_iter = mesh.vv_iter(*v_iter); - vv_iter.is_valid(); - ++vv_iter) { - - T tri_area = partial_voronoi_area( - v_id, (*q_iter).idx(), (*vv_iter).idx(), mesh); - - v_weight += (tri_area > 0) ? tri_area : 0; - - q_iter++; - assert(q_iter == vv_iter); - } - v_weight = 0.5 / v_weight; - B[v_id][0] = X[v_id][0] / v_weight; - B[v_id][1] = X[v_id][1] / v_weight; - B[v_id][2] = X[v_id][2] / v_weight; - } - } - - num_cg_iter_taken = 0; - - // solve - rxmesh::CPUTimer timer; - timer.start(); - - cg(mesh, - X, - B, - R, - P, - S, - num_cg_iter_taken, - start_residual, - stop_residual, - num_omp_threads); - - timer.stop(); - time = timer.elapsed_millis(); -} - -template -void mcf_openmesh(const int num_omp_threads, - TriMesh& input_mesh, - std::vector>& smoothed_coord) -{ - // Report - OpenMeshReport report("MCF_OpenMesh"); - report.command_line(Arg.argc, Arg.argv); - report.system(); - report.model_data(Arg.obj_file_name, input_mesh); - std::string method = - "OpenMesh " + std::to_string(num_omp_threads) + " Core"; - report.add_member("method", method); - report.add_member("time_step", Arg.time_step); - report.add_member("cg_tolerance", Arg.cg_tolerance); - report.add_member("use_uniform_laplace", Arg.use_uniform_laplace); - report.add_member("max_num_cg_iter", Arg.max_num_cg_iter); - - - // implicit smoothing - uint32_t num_cg_iter_taken = 0; - float time = 0; - T start_residual; - T stop_residual; - - implicit_smoothing(input_mesh, - smoothed_coord, - num_cg_iter_taken, - time, - start_residual, - stop_residual, - num_omp_threads); - - RXMESH_TRACE( - "mcf_openmesh() took {} (ms) and {} iterations (i.e., {} ms/iter) ", - time, - num_cg_iter_taken, - time / float(num_cg_iter_taken)); - - - // write output - // #pragma omp parallel for - // for (int v_id = 0; v_id < int(input_mesh.n_vertices()); ++v_id) { - // TriMesh::VertexIter v_iter = input_mesh.vertices_begin() + v_id; - // input_mesh.point(*v_iter)[0] = smoothed_coord[v_id][0]; - // input_mesh.point(*v_iter)[1] = smoothed_coord[v_id][1]; - // input_mesh.point(*v_iter)[2] = smoothed_coord[v_id][2]; - // } - // std::string fn = STRINGIFY(OUTPUT_DIR) "mcf_openmesh.obj"; - // if (!OpenMesh::IO::write_mesh(input_mesh, fn)) { - // RXMESH_WARN("OpenMesh cannot write mesh to file {}", fn); - // } - - // Finalize report - report.add_member("start_residual", start_residual); - report.add_member("end_residual", stop_residual); - report.add_member("num_cg_iter_taken", num_cg_iter_taken); - report.add_member("total_time (ms)", time); - rxmesh::TestData td; - td.test_name = "MCF"; - td.num_threads = num_omp_threads; - td.time_ms.push_back(time / float(num_cg_iter_taken)); - td.passed.push_back(true); - report.add_test(td); - report.write( - Arg.output_folder + "/openmesh", - "MCF_OpenMesh_" + rxmesh::extract_file_name(Arg.obj_file_name)); -} \ No newline at end of file From 5257167c31dba7073f6d6c6a9894099503ca5595 Mon Sep 17 00:00:00 2001 From: ahmed Date: Mon, 8 Jul 2024 23:41:25 -0400 Subject: [PATCH 21/96] missing file --- apps/MCF/mcf.cu | 21 ++++----------------- 1 file changed, 4 insertions(+), 17 deletions(-) diff --git a/apps/MCF/mcf.cu b/apps/MCF/mcf.cu index ce018182..3bff3bb9 100644 --- a/apps/MCF/mcf.cu +++ b/apps/MCF/mcf.cu @@ -4,7 +4,6 @@ #include -#include "../common/openmesh_trimesh.h" #include "gtest/gtest.h" #include "rxmesh/attribute.h" #include "rxmesh/rxmesh_static.h" @@ -26,9 +25,8 @@ struct arg int argc; } Arg; -#include "mcf_openmesh.h" -#include "mcf_rxmesh.h" -#include "mcf_sparse_matrix.cuh" +#include "mcf_cg.h" +#include "mcf_cusolver_chol.cuh" TEST(App, MCF) @@ -41,22 +39,11 @@ TEST(App, MCF) RXMeshStatic rx(Arg.obj_file_name); - TriMesh input_mesh; - ASSERT_TRUE(OpenMesh::IO::read_mesh(input_mesh, Arg.obj_file_name)); - - - // OpenMesh Impl - std::vector> ground_truth(rx.get_num_vertices()); - for (auto& g : ground_truth) { - g.resize(3); - } - mcf_openmesh(omp_get_max_threads(), input_mesh, ground_truth); - // RXMesh Impl - mcf_rxmesh_cg(rx, ground_truth); + mcf_cg(rx); // RXMesh cusolver Impl - mcf_rxmesh_cusolver_chol(rx, ground_truth); + mcf_cusolver_chol(rx); } int main(int argc, char** argv) From e9a49994fb246dad120b3d8220cd4b8a09ca495c Mon Sep 17 00:00:00 2001 From: ahmed Date: Mon, 8 Jul 2024 23:46:26 -0400 Subject: [PATCH 22/96] gaussian curvature: convert gold to attribute to simplify comparison --- apps/GaussianCurvature/gaussian_curvature.cu | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/apps/GaussianCurvature/gaussian_curvature.cu b/apps/GaussianCurvature/gaussian_curvature.cu index daa2d514..948bbb81 100644 --- a/apps/GaussianCurvature/gaussian_curvature.cu +++ b/apps/GaussianCurvature/gaussian_curvature.cu @@ -62,11 +62,11 @@ void gaussian_curvature_rxmesh(const std::vector& gaussian_curvature_gold) // Verify v_gc.move(rxmesh::DEVICE, rxmesh::HOST); + // convert gold to attribute to compare against + auto gold = *rx.add_vertex_attribute(gaussian_curvature_gold, "gold"); + rx.for_each_vertex(HOST, [&](const VertexHandle& vh) { - uint32_t v_id = rx.map_to_global(vh); - EXPECT_NEAR(std::abs(gaussian_curvature_gold[v_id]), - std::abs(v_gc(vh, 0)), - 0.001); + EXPECT_NEAR(std::abs(gold(vh)), std::abs(v_gc(vh)), 0.001); }); #if USE_POLYSCOPE From 6f43f5d32fe775c8b84a33dfaf7cd2ad19365ded Mon Sep 17 00:00:00 2001 From: ahmed Date: Sat, 13 Jul 2024 10:42:10 -0400 Subject: [PATCH 23/96] test eigen --- CMakeLists.txt | 6 ++ apps/SurfaceTracking/CMakeLists.txt | 8 +-- {apps/SurfaceTracking => cmake}/eigen.cmake | 0 include/rxmesh/matrix/sparse_matrix.cuh | 5 +- tests/RXMesh_test/CMakeLists.txt | 1 + tests/RXMesh_test/rxmesh_test_main.cu | 1 + tests/RXMesh_test/test_eigen.cuh | 74 +++++++++++++++++++++ 7 files changed, 84 insertions(+), 11 deletions(-) rename {apps/SurfaceTracking => cmake}/eigen.cmake (100%) create mode 100644 tests/RXMesh_test/test_eigen.cuh diff --git a/CMakeLists.txt b/CMakeLists.txt index a9f53e03..d036c085 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -171,6 +171,12 @@ find_package(CUDAToolkit REQUIRED) target_link_libraries(RXMesh INTERFACE CUDA::cusparse) target_link_libraries(RXMesh INTERFACE CUDA::cusolver) +#Eigen +include("cmake/eigen.cmake") +target_link_libraries(RXMesh INTERFACE Eigen3::Eigen) +# https://eigen.tuxfamily.org/dox/TopicCUDA.html +target_compile_definitions(RXMesh INTERFACE "EIGEN_DEFAULT_DENSE_INDEX_TYPE=int") + include(GoogleTest) add_subdirectory(apps) add_subdirectory(tests) diff --git a/apps/SurfaceTracking/CMakeLists.txt b/apps/SurfaceTracking/CMakeLists.txt index ab33882a..6240c127 100644 --- a/apps/SurfaceTracking/CMakeLists.txt +++ b/apps/SurfaceTracking/CMakeLists.txt @@ -27,8 +27,6 @@ if (WIN32) PRIVATE _CRT_SECURE_NO_WARNINGS) endif() -include("eigen.cmake") - set_target_properties(SurfaceTracking PROPERTIES FOLDER "apps") set_property(TARGET SurfaceTracking PROPERTY CUDA_SEPARABLE_COMPILATION ON) @@ -37,11 +35,7 @@ source_group(TREE ${CMAKE_CURRENT_LIST_DIR} PREFIX "SurfaceTracking" FILES ${SOU target_link_libraries(SurfaceTracking PRIVATE RXMesh - PRIVATE gtest_main - PRIVATE Eigen3::Eigen + PRIVATE gtest_main ) -# https://eigen.tuxfamily.org/dox/TopicCUDA.html -target_compile_definitions(SurfaceTracking PUBLIC "EIGEN_DEFAULT_DENSE_INDEX_TYPE=int") - #gtest_discover_tests( SurfaceTracking ) \ No newline at end of file diff --git a/apps/SurfaceTracking/eigen.cmake b/cmake/eigen.cmake similarity index 100% rename from apps/SurfaceTracking/eigen.cmake rename to cmake/eigen.cmake diff --git a/include/rxmesh/matrix/sparse_matrix.cuh b/include/rxmesh/matrix/sparse_matrix.cuh index fb04b3ee..91c94ac5 100644 --- a/include/rxmesh/matrix/sparse_matrix.cuh +++ b/include/rxmesh/matrix/sparse_matrix.cuh @@ -170,10 +170,7 @@ struct SparseMatrix m_d_cusparse_spmm_buffer(nullptr), m_d_cusparse_spmv_buffer(nullptr), m_allocated(LOCATION_NONE) - { - static_assert(std::is_same_v || std::is_same_v, - "RXMesh::SparseMatrix supports only float or double"); - + { using namespace rxmesh; constexpr uint32_t blockThreads = 256; diff --git a/tests/RXMesh_test/CMakeLists.txt b/tests/RXMesh_test/CMakeLists.txt index a42652be..52e24db8 100644 --- a/tests/RXMesh_test/CMakeLists.txt +++ b/tests/RXMesh_test/CMakeLists.txt @@ -22,6 +22,7 @@ set( SOURCE_LIST test_patch_slicing.cuh test_multi_queries.cuh test_wasted_work.cuh + test_eigen.cuh ) target_sources( RXMesh_test diff --git a/tests/RXMesh_test/rxmesh_test_main.cu b/tests/RXMesh_test/rxmesh_test_main.cu index 2b89df15..cc86bf55 100644 --- a/tests/RXMesh_test/rxmesh_test_main.cu +++ b/tests/RXMesh_test/rxmesh_test_main.cu @@ -31,6 +31,7 @@ struct RXMeshTestArg #include "test_patch_slicing.cuh" #include "test_multi_queries.cuh" #include "test_wasted_work.cuh" +#include "test_eigen.cuh" // clang-format on int main(int argc, char** argv) diff --git a/tests/RXMesh_test/test_eigen.cuh b/tests/RXMesh_test/test_eigen.cuh new file mode 100644 index 00000000..804b1c6e --- /dev/null +++ b/tests/RXMesh_test/test_eigen.cuh @@ -0,0 +1,74 @@ +#include "gtest/gtest.h" + +#include "rxmesh/attribute.h" +#include "rxmesh/rxmesh_static.h" + +#include "rxmesh/kernels/for_each.cuh" + +#include + +template +__global__ static void eigen_norm(const rxmesh::Context context, + const rxmesh::VertexAttribute in_attr, + rxmesh::VertexAttribute out_attr) +{ + using namespace rxmesh; + + auto normalize = [&](VertexHandle& vh) { + Eigen::Vector3d attr; + + attr << in_attr(vh, 0), in_attr(vh, 1), in_attr(vh, 2); + + out_attr(vh) = attr.norm(); + }; + + for_each(context, normalize); +} + + +TEST(Attribute, Eigen) +{ + using namespace rxmesh; + + cuda_query(0); + + std::string obj_path = STRINGIFY(INPUT_DIR) "dragon.obj"; + + RXMeshStatic rx(obj_path); + + auto in_attr = *rx.add_vertex_attribute("vAttrIn", 3); + + auto out_attr = *rx.add_vertex_attribute("vAttrOut", 1); + + rx.for_each_vertex(HOST, [&](VertexHandle vh) { + for (int i = 0; i < 3; ++i) { + in_attr(vh, i) = rand() % rx.get_num_vertices(); + } + }); + + in_attr.move(HOST, DEVICE); + + constexpr uint32_t blockThreads = 256; + + LaunchBox launch_box; + + rx.prepare_launch_box({}, launch_box, (void*)eigen_norm); + + eigen_norm + <<>>(rx.get_context(), in_attr, out_attr); + + cudaDeviceSynchronize(); + + out_attr.move(DEVICE, HOST); + + rx.for_each_vertex(HOST, [&](VertexHandle vh) { + double n = in_attr(vh, 0) * in_attr(vh, 0) + + in_attr(vh, 1) * in_attr(vh, 1) + + in_attr(vh, 2) * in_attr(vh, 2); + n = std::sqrt(n); + + EXPECT_NEAR(n, out_attr(vh), 0.0001); + }); +} From 601e378e77e4299ef20db9dc1af28e7509810fa1 Mon Sep 17 00:00:00 2001 From: ahmed Date: Sat, 13 Jul 2024 16:56:44 -0400 Subject: [PATCH 24/96] minor bug fixes --- include/rxmesh/matrix/sparse_matrix.cuh | 2 +- tests/RXMesh_test/test_attribute.cuh | 12 ++++++++---- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/include/rxmesh/matrix/sparse_matrix.cuh b/include/rxmesh/matrix/sparse_matrix.cuh index 91c94ac5..b72fc21f 100644 --- a/include/rxmesh/matrix/sparse_matrix.cuh +++ b/include/rxmesh/matrix/sparse_matrix.cuh @@ -537,7 +537,7 @@ struct SparseMatrix rxmesh::DenseMatrix& C_mat, cudaStream_t stream = 0) { - assert(cols() == B_mat.cols()); + assert(cols() == B_mat.rows()); assert(rows() == C_mat.rows()); assert(B_mat.cols() == C_mat.cols()); diff --git a/tests/RXMesh_test/test_attribute.cuh b/tests/RXMesh_test/test_attribute.cuh index 05deae03..2b9bfc67 100644 --- a/tests/RXMesh_test/test_attribute.cuh +++ b/tests/RXMesh_test/test_attribute.cuh @@ -130,10 +130,14 @@ TEST(Attribute, Reduce) ASSERT_EQ(cudaDeviceSynchronize(), cudaSuccess); uint32_t result = 0; - rx.for_each_edge(rxmesh::HOST, [&](const rxmesh::EdgeHandle eh) { - auto pl = eh.unpack(); - result = std::max(result, pl.first * pl.second); - }); + rx.for_each_edge( + rxmesh::HOST, + [&](const rxmesh::EdgeHandle eh) { + auto pl = eh.unpack(); + result = std::max(result, pl.first * pl.second); + }, + NULL, + false); EXPECT_EQ(output, result); } From 8f0ba20a96b5a1e91223d3a076c080984f245beb Mon Sep 17 00:00:00 2001 From: ahmed Date: Wed, 17 Jul 2024 19:42:17 -0400 Subject: [PATCH 25/96] refactor SparseMatrix allow complex type --- include/rxmesh/matrix/dense_matrix.cuh | 51 ++- include/rxmesh/matrix/sparse_matrix.cuh | 417 ++++++++++++++++-------- include/rxmesh/util/cuda_query.h | 8 + 3 files changed, 328 insertions(+), 148 deletions(-) diff --git a/include/rxmesh/matrix/dense_matrix.cuh b/include/rxmesh/matrix/dense_matrix.cuh index 761da8b7..7949a771 100644 --- a/include/rxmesh/matrix/dense_matrix.cuh +++ b/include/rxmesh/matrix/dense_matrix.cuh @@ -69,7 +69,7 @@ struct DenseMatrix /** * @brief return the leading dimension (row by default) */ - IndexT lead_dim() const + __host__ __device__ IndexT lead_dim() const { return m_num_rows; } @@ -77,7 +77,7 @@ struct DenseMatrix /** * @brief return number of rows */ - IndexT rows() const + __host__ __device__ IndexT rows() const { return m_num_rows; } @@ -85,7 +85,7 @@ struct DenseMatrix /** * @brief return number of columns */ - IndexT cols() const + __host__ __device__ IndexT cols() const { return m_num_cols; } @@ -174,7 +174,7 @@ struct DenseMatrix * @brief return the raw pointer based on the specified location (host vs. * device) */ - T* data(locationT location = DEVICE) const + __host__ __device__ T* data(locationT location = DEVICE) const { if ((location & HOST) == HOST) { return m_h_val; @@ -191,7 +191,8 @@ struct DenseMatrix /** * @brief return the raw pointer pf a column. */ - T* col_data(const uint32_t ld_idx, locationT location = DEVICE) const + __host__ const T* col_data(const uint32_t ld_idx, + locationT location = DEVICE) const { if ((location & HOST) == HOST) { return m_h_val + ld_idx * (m_num_rows + m_col_pad_idx); @@ -201,19 +202,41 @@ struct DenseMatrix return m_d_val + ld_idx * (m_num_rows + m_col_pad_idx); } - if ((location & m_allocated) == location) { - RXMESH_ERROR("Requested data not allocated on {}", - location_to_string(location)); + if ((location & m_allocated) != location) { + RXMESH_ERROR( + "DenseMatrix::col_data() Requested data not allocated on {}", + location_to_string(location)); } - assert(1 != 1); - return 0; + return nullptr; + } + + /** + * @brief return the raw pointer pf a column. + */ + __host__ T* col_data(const uint32_t ld_idx, locationT location = DEVICE) + { + if ((location & HOST) == HOST) { + return m_h_val + ld_idx * (m_num_rows + m_col_pad_idx); + } + + if ((location & DEVICE) == DEVICE) { + return m_d_val + ld_idx * (m_num_rows + m_col_pad_idx); + } + + if ((location & m_allocated) != location) { + RXMESH_ERROR( + "DenseMatrix::col_data() Requested data not allocated on {}", + location_to_string(location)); + } + + return nullptr; } /** * @brief return the total number bytes used to allocate the matrix */ - IndexT bytes() const + __host__ __device__ IndexT bytes() const { return (m_num_rows + m_col_pad_idx) * m_num_cols * sizeof(T); } @@ -221,7 +244,9 @@ struct DenseMatrix /** * @brief move the data between host and device */ - void move(locationT source, locationT target, cudaStream_t stream = NULL) + __host__ void move(locationT source, + locationT target, + cudaStream_t stream = NULL) { if (source == target) { RXMESH_WARN( @@ -260,7 +285,7 @@ struct DenseMatrix /** * @brief release the data on host or device */ - void release(locationT location = LOCATION_ALL) + __host__ void release(locationT location = LOCATION_ALL) { if (((location & HOST) == HOST) && ((m_allocated & HOST) == HOST)) { free(m_h_val); diff --git a/include/rxmesh/matrix/sparse_matrix.cuh b/include/rxmesh/matrix/sparse_matrix.cuh index b72fc21f..8492611c 100644 --- a/include/rxmesh/matrix/sparse_matrix.cuh +++ b/include/rxmesh/matrix/sparse_matrix.cuh @@ -41,23 +41,6 @@ enum class Reorder }; namespace detail { -static int reorder_to_int(const Reorder& reorder) -{ - switch (reorder) { - case Reorder::NONE: - return 0; - case Reorder::SYMRCM: - return 1; - case Reorder::SYMAMD: - return 2; - case Reorder::NSTDIS: - return 3; - default: { - RXMESH_ERROR("reorder_to_int() unknown input reorder"); - return 0; - } - } -} // this is the function for the CSR calculation template @@ -107,28 +90,6 @@ __global__ static void sparse_mat_col_fill(const rxmesh::Context context, query.dispatch(block, shrd_alloc, col_fillin); } -// d_out[d_p[i]] = d_in[i] -template -void permute_scatter(IndexT* d_p, T* d_in, T* d_out, IndexT size) -{ - thrust::device_ptr t_p(d_p); - thrust::device_ptr t_i(d_in); - thrust::device_ptr t_o(d_out); - - thrust::scatter(thrust::device, t_i, t_i + size, t_p, t_o); -} - -// d_out[i] = d_in[d_p[i]] -template -void permute_gather(IndexT* d_p, T* d_in, T* d_out, IndexT size) -{ - thrust::device_ptr t_p(d_p); - thrust::device_ptr t_i(d_in); - thrust::device_ptr t_o(d_out); - - thrust::gather(thrust::device, t_p, t_p + size, t_i, t_o); -} - } // namespace detail @@ -170,7 +131,7 @@ struct SparseMatrix m_d_cusparse_spmm_buffer(nullptr), m_d_cusparse_spmv_buffer(nullptr), m_allocated(LOCATION_NONE) - { + { using namespace rxmesh; constexpr uint32_t blockThreads = 256; @@ -243,6 +204,8 @@ struct SparseMatrix CUSPARSE_ERROR(cusparseCreateMatDescr(&m_descr)); CUSPARSE_ERROR( cusparseSetMatType(m_descr, CUSPARSE_MATRIX_TYPE_GENERAL)); + CUSPARSE_ERROR( + cusparseSetMatDiagType(m_descr, CUSPARSE_DIAG_TYPE_NON_UNIT)); CUSPARSE_ERROR( cusparseSetMatIndexBase(m_descr, CUSPARSE_INDEX_BASE_ZERO)); @@ -256,7 +219,7 @@ struct SparseMatrix CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, - CUDA_R_32F)); + cuda_type())); CUSPARSE_ERROR(cusparseCreate(&m_cusparse_handle)); CUSOLVER_ERROR(cusolverSpCreate(&m_cusolver_sphandle)); @@ -488,13 +451,14 @@ struct SparseMatrix } } + /** * @brief allocate the temp buffer needed for sparse matrix multiplication * by a dense matrix */ - __host__ void alloc_multiply_buffer(rxmesh::DenseMatrix B_mat, - rxmesh::DenseMatrix C_mat, - cudaStream_t stream = 0) + __host__ void alloc_multiply_buffer(const rxmesh::DenseMatrix& B_mat, + rxmesh::DenseMatrix& C_mat, + cudaStream_t stream = 0) { T alpha = 1.0; T beta = 0.0; @@ -514,7 +478,7 @@ struct SparseMatrix matB, &beta, matC, - CUDA_R_32F, + cuda_type(), CUSPARSE_SPMM_ALG_DEFAULT, &m_spmm_buffer_size)); CUDA_ERROR(cudaMalloc(&m_d_cusparse_spmm_buffer, m_spmm_buffer_size)); @@ -566,7 +530,7 @@ struct SparseMatrix matB, &beta, matC, - CUDA_R_32F, + cuda_type(), CUSPARSE_SPMM_ALG_DEFAULT, m_d_cusparse_spmm_buffer)); } @@ -586,9 +550,9 @@ struct SparseMatrix cusparseDnVecDescr_t vecy = NULL; CUSPARSE_ERROR( - cusparseCreateDnVec(&vecx, m_num_cols, in_arr, CUDA_R_32F)); + cusparseCreateDnVec(&vecx, m_num_cols, in_arr, cuda_type())); CUSPARSE_ERROR( - cusparseCreateDnVec(&vecy, m_num_rows, rt_arr, CUDA_R_32F)); + cusparseCreateDnVec(&vecy, m_num_rows, rt_arr, cuda_type())); CUSPARSE_ERROR(cusparseSpMV_bufferSize(m_cusparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, @@ -597,7 +561,7 @@ struct SparseMatrix vecx, &beta, vecy, - CUDA_R_32F, + cuda_type(), CUSPARSE_SPMV_ALG_DEFAULT, &m_spmv_buffer_size)); CUSPARSE_ERROR(cusparseDestroyDnVec(vecx)); @@ -631,9 +595,9 @@ struct SparseMatrix cusparseDnVecDescr_t vecy = NULL; CUSPARSE_ERROR( - cusparseCreateDnVec(&vecx, m_num_cols, in_arr, CUDA_R_32F)); + cusparseCreateDnVec(&vecx, m_num_cols, in_arr, cuda_type())); CUSPARSE_ERROR( - cusparseCreateDnVec(&vecy, m_num_rows, rt_arr, CUDA_R_32F)); + cusparseCreateDnVec(&vecy, m_num_rows, rt_arr, cuda_type())); CUSPARSE_ERROR(cusparseSetStream(m_cusparse_handle, stream)); @@ -649,7 +613,7 @@ struct SparseMatrix vecx, &beta, vecy, - CUDA_R_32F, + cuda_type(), CUSPARSE_SPMV_ALG_DEFAULT, m_d_cusparse_spmv_buffer)); @@ -667,9 +631,9 @@ struct SparseMatrix * the columns for B and multiply them separately as sparse matrix dense * vector multiplication */ - void multiply_cw(rxmesh::DenseMatrix B_mat, - rxmesh::DenseMatrix C_mat, - cudaStream_t stream = 0) + void multiply_cw(const rxmesh::DenseMatrix& B_mat, + rxmesh::DenseMatrix& C_mat, + cudaStream_t stream = 0) { assert(cols() == B_mat.cols()); assert(rows() == C_mat.rows()); @@ -679,15 +643,16 @@ struct SparseMatrix multiply(B_mat.col_data(i), C_mat.col_data(i), stream); } } - + /** - * @brief solve the Ax=b for x where x and b are all array + * @brief solve the Ax=b for x */ - void solve(T* B_arr, + void solve(const T* B_arr, T* X_arr, rxmesh::Solver solver, - rxmesh::Reorder reorder) + rxmesh::Reorder reorder, + cudaStream_t stream = 0) { cusparse_linear_solver_wrapper(solver, reorder, @@ -700,17 +665,19 @@ struct SparseMatrix m_d_col_idx, m_d_val, B_arr, - X_arr); + X_arr, + stream); } /** * @brief solve the AX=B for X where X and B are all dense matrix and we * would solve it in a column wise manner */ - void solve(rxmesh::DenseMatrix B_mat, - rxmesh::DenseMatrix X_mat, - rxmesh::Solver solver, - rxmesh::Reorder reorder) + void solve(const rxmesh::DenseMatrix& B_mat, + rxmesh::DenseMatrix& X_mat, + rxmesh::Solver solver, + rxmesh::Reorder reorder, + cudaStream_t stream = 0) { for (int i = 0; i < B_mat.cols(); ++i) { cusparse_linear_solver_wrapper(solver, @@ -724,7 +691,8 @@ struct SparseMatrix m_d_col_idx, m_d_val, B_mat.col_data(i), - X_mat.col_data(i)); + X_mat.col_data(i), + stream); } } @@ -740,8 +708,9 @@ struct SparseMatrix void spmat_chol_reorder(rxmesh::Reorder reorder) { if (reorder == Reorder::NONE) { - RXMESH_INFO("None reordering is specified", - "Continue without reordering"); + RXMESH_WARN( + "SparseMatrix::spmat_chol_reorder() No reordering is " + "specified. Continue without reordering!"); m_use_reorder = false; if (m_reorder_allocated) { @@ -882,7 +851,7 @@ struct SparseMatrix m_nnz * sizeof(IndexT), cudaMemcpyHostToDevice)); - detail::permute_gather(d_val_permute, m_d_val, m_d_solver_val, m_nnz); + permute_gather(d_val_permute, m_d_val, m_d_solver_val, m_nnz); free(h_val_permute); GPU_FREE(d_val_permute); @@ -1004,7 +973,8 @@ struct SparseMatrix } if (0 <= singularity) { RXMESH_WARN( - "WARNING: the matrix is singular at row {} under tol ({})", + "SparseMatrix::spmat_chol_factor() The matrix is singular at " + "row {} under tol ({})", singularity, tol); } @@ -1025,12 +995,12 @@ struct SparseMatrix T* d_solver_x; if (m_use_reorder) { - /* purmute b and x*/ + // purmute b and x CUDA_ERROR(cudaMalloc((void**)&d_solver_b, m_num_rows * sizeof(T))); - detail::permute_gather(m_d_permute, d_b, d_solver_b, m_num_rows); + permute_gather(m_d_permute, d_b, d_solver_b, m_num_rows); CUDA_ERROR(cudaMalloc((void**)&d_solver_x, m_num_cols * sizeof(T))); - detail::permute_gather(m_d_permute, d_x, d_solver_x, m_num_rows); + permute_gather(m_d_permute, d_x, d_solver_x, m_num_rows); } else { d_solver_b = d_b; d_solver_x = d_x; @@ -1055,7 +1025,7 @@ struct SparseMatrix } if (m_use_reorder) { - detail::permute_scatter(m_d_permute, d_solver_x, d_x, m_num_rows); + permute_scatter(m_d_permute, d_solver_x, d_x, m_num_rows); GPU_FREE(d_solver_b); GPU_FREE(d_solver_x); } @@ -1130,96 +1100,273 @@ struct SparseMatrix int rowsA, int colsA, int nnzA, - int* d_csrRowPtrA, - int* d_csrColIndA, - T* d_csrValA, - T* d_b, - T* d_x) + const int* d_csrRowPtrA, + const int* d_csrColIndA, + const T* d_csrValA, + const T* d_b, + T* d_x, + cudaStream_t stream) { + CUSOLVER_ERROR(cusolverSpSetStream(handle, stream)); + double tol = 1.e-12; - int singularity = 0; /* -1 if A is invertible under tol. */ + // -1 if A is invertible under tol. + int singularity = 0; - /* solve B*z = Q*b */ if (solver == Solver::CHOL) { if constexpr (std::is_same_v) { - CUSOLVER_ERROR( - cusolverSpScsrlsvchol(handle, - rowsA, - nnzA, - descrA, - d_csrValA, - d_csrRowPtrA, - d_csrColIndA, - d_b, - tol, - detail::reorder_to_int(reorder), - d_x, - &singularity)); + CUSOLVER_ERROR(cusolverSpScsrlsvchol(handle, + rowsA, + nnzA, + descrA, + d_csrValA, + d_csrRowPtrA, + d_csrColIndA, + d_b, + tol, + reorder_to_int(reorder), + d_x, + &singularity)); + } + + if constexpr (std::is_same_v) { + CUSOLVER_ERROR(cusolverSpCcsrlsvchol(handle, + rowsA, + nnzA, + descrA, + d_csrValA, + d_csrRowPtrA, + d_csrColIndA, + d_b, + tol, + reorder_to_int(reorder), + d_x, + &singularity)); } if constexpr (std::is_same_v) { - CUSOLVER_ERROR( - cusolverSpDcsrlsvchol(handle, - rowsA, - nnzA, - descrA, - d_csrValA, - d_csrRowPtrA, - d_csrColIndA, - d_b, - tol, - detail::reorder_to_int(reorder), - d_x, - &singularity)); + CUSOLVER_ERROR(cusolverSpDcsrlsvchol(handle, + rowsA, + nnzA, + descrA, + d_csrValA, + d_csrRowPtrA, + d_csrColIndA, + d_b, + tol, + reorder_to_int(reorder), + d_x, + &singularity)); + } + if constexpr (std::is_same_v) { + CUSOLVER_ERROR(cusolverSpZcsrlsvchol(handle, + rowsA, + nnzA, + descrA, + d_csrValA, + d_csrRowPtrA, + d_csrColIndA, + d_b, + tol, + reorder_to_int(reorder), + d_x, + &singularity)); } } else if (solver == Solver::QR) { if constexpr (std::is_same_v) { - CUSOLVER_ERROR( - cusolverSpScsrlsvqr(handle, - rowsA, - nnzA, - descrA, - d_csrValA, - d_csrRowPtrA, - d_csrColIndA, - d_b, - tol, - detail::reorder_to_int(reorder), - d_x, - &singularity)); + CUSOLVER_ERROR(cusolverSpScsrlsvqr(handle, + rowsA, + nnzA, + descrA, + d_csrValA, + d_csrRowPtrA, + d_csrColIndA, + d_b, + tol, + reorder_to_int(reorder), + d_x, + &singularity)); + } + + if constexpr (std::is_same_v) { + CUSOLVER_ERROR(cusolverSpCcsrlsvqr(handle, + rowsA, + nnzA, + descrA, + d_csrValA, + d_csrRowPtrA, + d_csrColIndA, + d_b, + tol, + reorder_to_int(reorder), + d_x, + &singularity)); + } + + if constexpr (std::is_same_v) { + CUSOLVER_ERROR(cusolverSpDcsrlsvqr(handle, + rowsA, + nnzA, + descrA, + d_csrValA, + d_csrRowPtrA, + d_csrColIndA, + d_b, + tol, + reorder_to_int(reorder), + d_x, + &singularity)); + } + if constexpr (std::is_same_v) { + CUSOLVER_ERROR(cusolverSpZcsrlsvqr(handle, + rowsA, + nnzA, + descrA, + d_csrValA, + d_csrRowPtrA, + d_csrColIndA, + d_b, + tol, + reorder_to_int(reorder), + d_x, + &singularity)); + } + } else if (solver == Solver::LU) { + RXMESH_ERROR( + "SparseMatrix: LU Solver is run on the host. Make sure your " + "data resides on the host before calling the solver"); + + if constexpr (std::is_same_v) { + CUSOLVER_ERROR(cusolverSpScsrlsvluHost(handle, + rowsA, + nnzA, + descrA, + d_csrValA, + d_csrRowPtrA, + d_csrColIndA, + d_b, + tol, + reorder_to_int(reorder), + d_x, + &singularity)); + } + + if constexpr (std::is_same_v) { + CUSOLVER_ERROR(cusolverSpCcsrlsvluHost(handle, + rowsA, + nnzA, + descrA, + d_csrValA, + d_csrRowPtrA, + d_csrColIndA, + d_b, + tol, + reorder_to_int(reorder), + d_x, + &singularity)); } if constexpr (std::is_same_v) { - CUSOLVER_ERROR( - cusolverSpDcsrlsvqr(handle, - rowsA, - nnzA, - descrA, - d_csrValA, - d_csrRowPtrA, - d_csrColIndA, - d_b, - tol, - detail::reorder_to_int(reorder), - d_x, - &singularity)); + CUSOLVER_ERROR(cusolverSpDcsrlsvluHost(handle, + rowsA, + nnzA, + descrA, + d_csrValA, + d_csrRowPtrA, + d_csrColIndA, + d_b, + tol, + reorder_to_int(reorder), + d_x, + &singularity)); + } + if constexpr (std::is_same_v) { + CUSOLVER_ERROR(cusolverSpZcsrlsvluHost(handle, + rowsA, + nnzA, + descrA, + d_csrValA, + d_csrRowPtrA, + d_csrColIndA, + d_b, + tol, + reorder_to_int(reorder), + d_x, + &singularity)); } } else { RXMESH_ERROR( - "Only Solver::CHOL and Solver::QR is supported, use CUDA 12.x " - "for Solver::LU"); + "SparseMatrix::cusparse_linear_solver_wrapper() Unsupported " + "solver type."); } if (0 <= singularity) { RXMESH_WARN( - "WARNING: the matrix is singular at row {} under tol ({})", + "SparseMatrix::cusparse_linear_solver_wrapper() The matrix is " + "singular at row {} under tol ({})", singularity, tol); } } + cudaDataType_t cuda_type() const + { + if (std::is_same_v) { + return CUDA_R_32F; + } else if (std::is_same_v) { + return CUDA_R_64F; + } else if (std::is_same_v) { + return CUDA_C_32F; + } else if (std::is_same_v) { + return CUDA_C_64F; + } else { + RXMESH_ERROR( + "SparseMatrix unsupported type. SparseMatrix can support " + "different data type but for the solver, only float, double, " + "cuComplex, and cuDoubleComplex are supported"); + } + } + + int reorder_to_int(const Reorder& reorder) const + { + switch (reorder) { + case Reorder::NONE: + return 0; + case Reorder::SYMRCM: + return 1; + case Reorder::SYMAMD: + return 2; + case Reorder::NSTDIS: + return 3; + default: { + RXMESH_ERROR("reorder_to_int() unknown input reorder"); + return 0; + } + } + } + + + void permute_scatter(IndexT* d_p, T* d_in, T* d_out, IndexT size) + { + // d_out[d_p[i]] = d_in[i] + thrust::device_ptr t_p(d_p); + thrust::device_ptr t_i(d_in); + thrust::device_ptr t_o(d_out); + + thrust::scatter(thrust::device, t_i, t_i + size, t_p, t_o); + } + + void permute_gather(IndexT* d_p, T* d_in, T* d_out, IndexT size) + { + // d_out[i] = d_in[d_p[i]] + thrust::device_ptr t_p(d_p); + thrust::device_ptr t_i(d_in); + thrust::device_ptr t_o(d_out); + + thrust::gather(thrust::device, t_p, t_p + size, t_i, t_o); + } const Context m_context; cusparseHandle_t m_cusparse_handle; diff --git a/include/rxmesh/util/cuda_query.h b/include/rxmesh/util/cuda_query.h index 30661485..98ac9b6f 100644 --- a/include/rxmesh/util/cuda_query.h +++ b/include/rxmesh/util/cuda_query.h @@ -104,6 +104,14 @@ inline cudaDeviceProp cuda_query(const int dev) RXMESH_TRACE("Peak Memory Bandwidth: {0:f}(GB/s)", maxBW); RXMESH_TRACE("Kernels compiled for compute capability: {}", cuda_arch()); + int cusolver_major = -1, cusolver_minor = -1, cusolver_patch = -1; + CUSOLVER_ERROR(cusolverGetProperty(MAJOR_VERSION, &cusolver_major)); + CUSOLVER_ERROR(cusolverGetProperty(MINOR_VERSION, &cusolver_minor)); + CUSOLVER_ERROR(cusolverGetProperty(PATCH_LEVEL, &cusolver_patch)); + RXMESH_TRACE("Using cuSolver Version {}.{}.{}", + cusolver_major, + cusolver_minor, + cusolver_patch); if (!dev_prop.managedMemory) { RXMESH_ERROR( From ff22645af2749260075b8e434247310549fd97c0 Mon Sep 17 00:00:00 2001 From: ahmed Date: Wed, 17 Jul 2024 19:59:37 -0400 Subject: [PATCH 26/96] sparse matrix refactor --- include/rxmesh/matrix/sparse_matrix.cuh | 57 +---------------- .../rxmesh/matrix/sparse_matrix_kernels.cuh | 62 +++++++++++++++++++ 2 files changed, 64 insertions(+), 55 deletions(-) create mode 100644 include/rxmesh/matrix/sparse_matrix_kernels.cuh diff --git a/include/rxmesh/matrix/sparse_matrix.cuh b/include/rxmesh/matrix/sparse_matrix.cuh index 8492611c..63161ff3 100644 --- a/include/rxmesh/matrix/sparse_matrix.cuh +++ b/include/rxmesh/matrix/sparse_matrix.cuh @@ -3,8 +3,6 @@ #include "cusparse.h" #include "rxmesh/attribute.h" #include "rxmesh/context.h" -#include "rxmesh/query.cuh" -#include "rxmesh/types.h" #include "thrust/device_ptr.h" #include "thrust/execution_policy.h" @@ -14,6 +12,8 @@ #include "cusolverSp_LOWLEVEL_PREVIEW.h" #include "rxmesh/matrix/dense_matrix.cuh" +#include "rxmesh/matrix/sparse_matrix_kernels.cuh" + namespace rxmesh { /** @@ -40,59 +40,6 @@ enum class Reorder NSTDIS = 3 }; -namespace detail { - -// this is the function for the CSR calculation -template -__global__ static void sparse_mat_prescan(const rxmesh::Context context, - IndexT* row_ptr) -{ - using namespace rxmesh; - - auto init_lambda = [&](VertexHandle& v_id, const VertexIterator& iter) { - auto ids = v_id.unpack(); - uint32_t patch_id = ids.first; - uint16_t local_id = ids.second; - row_ptr[context.vertex_prefix()[patch_id] + local_id] = iter.size() + 1; - }; - - auto block = cooperative_groups::this_thread_block(); - Query query(context); - ShmemAllocator shrd_alloc; - query.dispatch(block, shrd_alloc, init_lambda); -} - -template -__global__ static void sparse_mat_col_fill(const rxmesh::Context context, - IndexT* row_ptr, - IndexT* col_idx) -{ - using namespace rxmesh; - - auto col_fillin = [&](VertexHandle& v_id, const VertexIterator& iter) { - auto ids = v_id.unpack(); - uint32_t patch_id = ids.first; - uint16_t local_id = ids.second; - col_idx[row_ptr[context.vertex_prefix()[patch_id] + local_id]] = - context.vertex_prefix()[patch_id] + local_id; - for (uint32_t v = 0; v < iter.size(); ++v) { - auto s_ids = iter[v].unpack(); - uint32_t s_patch_id = s_ids.first; - uint16_t s_local_id = s_ids.second; - col_idx[row_ptr[context.vertex_prefix()[patch_id] + local_id] + v + - 1] = context.vertex_prefix()[s_patch_id] + s_local_id; - } - }; - - auto block = cooperative_groups::this_thread_block(); - Query query(context); - ShmemAllocator shrd_alloc; - query.dispatch(block, shrd_alloc, col_fillin); -} - -} // namespace detail - - /** * @brief Sparse matrix that represent the VV connectivity, i.e., it * is a square matrix with number of rows/cols is equal to number of vertices diff --git a/include/rxmesh/matrix/sparse_matrix_kernels.cuh b/include/rxmesh/matrix/sparse_matrix_kernels.cuh new file mode 100644 index 00000000..b9911eaa --- /dev/null +++ b/include/rxmesh/matrix/sparse_matrix_kernels.cuh @@ -0,0 +1,62 @@ +#pragma once +#include "cusolverSp.h" +#include "cusparse.h" + +#include "rxmesh/context.h" +#include "rxmesh/query.cuh" + +namespace rxmesh { + +namespace detail { + +// this is the function for the CSR calculation +template +__global__ static void sparse_mat_prescan(const rxmesh::Context context, + IndexT* row_ptr) +{ + using namespace rxmesh; + + auto init_lambda = [&](VertexHandle& v_id, const VertexIterator& iter) { + auto ids = v_id.unpack(); + uint32_t patch_id = ids.first; + uint16_t local_id = ids.second; + row_ptr[context.vertex_prefix()[patch_id] + local_id] = iter.size() + 1; + }; + + auto block = cooperative_groups::this_thread_block(); + Query query(context); + ShmemAllocator shrd_alloc; + query.dispatch(block, shrd_alloc, init_lambda); +} + +template +__global__ static void sparse_mat_col_fill(const rxmesh::Context context, + IndexT* row_ptr, + IndexT* col_idx) +{ + using namespace rxmesh; + + auto col_fillin = [&](VertexHandle& v_id, const VertexIterator& iter) { + auto ids = v_id.unpack(); + uint32_t patch_id = ids.first; + uint16_t local_id = ids.second; + col_idx[row_ptr[context.vertex_prefix()[patch_id] + local_id]] = + context.vertex_prefix()[patch_id] + local_id; + for (uint32_t v = 0; v < iter.size(); ++v) { + auto s_ids = iter[v].unpack(); + uint32_t s_patch_id = s_ids.first; + uint16_t s_local_id = s_ids.second; + col_idx[row_ptr[context.vertex_prefix()[patch_id] + local_id] + v + + 1] = context.vertex_prefix()[s_patch_id] + s_local_id; + } + }; + + auto block = cooperative_groups::this_thread_block(); + Query query(context); + ShmemAllocator shrd_alloc; + query.dispatch(block, shrd_alloc, col_fillin); +} + +} // namespace detail + +} // namespace rxmesh \ No newline at end of file From e149c1078fdec4b17bf90d730653adfb24299729 Mon Sep 17 00:00:00 2001 From: ahmed Date: Thu, 18 Jul 2024 12:11:31 -0400 Subject: [PATCH 27/96] reorg low-level API --- apps/MCF/mcf_cusolver_chol.cuh | 11 ++- include/rxmesh/matrix/sparse_matrix.cuh | 108 +++++++++++++---------- tests/RXMesh_test/test_sparse_matrix.cuh | 2 - 3 files changed, 71 insertions(+), 50 deletions(-) diff --git a/apps/MCF/mcf_cusolver_chol.cuh b/apps/MCF/mcf_cusolver_chol.cuh index 82a751bc..35eac6ea 100644 --- a/apps/MCF/mcf_cusolver_chol.cuh +++ b/apps/MCF/mcf_cusolver_chol.cuh @@ -182,7 +182,16 @@ void mcf_cusolver_chol(rxmesh::RXMeshStatic& rx) Arg.time_step); // Solving the linear system using chol factorization and no reordering - A_mat.solve(B_mat, *X_mat, Solver::CHOL, Reorder::NONE); + //A_mat.solve(B_mat, *X_mat, Solver::CHOL, Reorder::NONE); + + A_mat.spmat_chol_reorder(Reorder::NSTDIS); + A_mat.spmat_chol_analysis(); + A_mat.spmat_chol_buffer_alloc(); + A_mat.spmat_chol_factor(); + + for (int i = 0; i < B_mat.cols(); ++i) { + A_mat.spmat_chol_solve(B_mat.col_data(i), X_mat->col_data(i)); + } // move the results to the host X_mat->move(rxmesh::DEVICE, rxmesh::HOST); diff --git a/include/rxmesh/matrix/sparse_matrix.cuh b/include/rxmesh/matrix/sparse_matrix.cuh index 63161ff3..52c00ef5 100644 --- a/include/rxmesh/matrix/sparse_matrix.cuh +++ b/include/rxmesh/matrix/sparse_matrix.cuh @@ -73,6 +73,11 @@ struct SparseMatrix m_d_solver_row_ptr(nullptr), m_d_solver_col_idx(nullptr), m_d_solver_val(nullptr), + m_h_solver_row_ptr(nullptr), + m_h_solver_col_idx(nullptr), + m_h_solver_val(nullptr), + m_h_permute_map(nullptr), + m_d_permute_map(nullptr), m_use_reorder(false), m_reorder_allocated(false), m_d_cusparse_spmm_buffer(nullptr), @@ -171,6 +176,7 @@ struct SparseMatrix CUSPARSE_ERROR(cusparseCreate(&m_cusparse_handle)); CUSOLVER_ERROR(cusolverSpCreate(&m_cusolver_sphandle)); + CUSOLVER_ERROR(cusolverSpCreateCsrcholInfo(&m_chol_info)); // allocate the host m_h_val = static_cast(malloc(m_nnz * sizeof(T))); @@ -339,6 +345,7 @@ struct SparseMatrix CUSPARSE_ERROR(cusparseDestroy(m_cusparse_handle)); CUSPARSE_ERROR(cusparseDestroyMatDescr(m_descr)); CUSOLVER_ERROR(cusolverSpDestroy(m_cusolver_sphandle)); + CUSOLVER_ERROR(cusolverSpDestroyCsrcholInfo(m_chol_info)); if (m_reorder_allocated) { GPU_FREE(m_d_solver_val); @@ -346,7 +353,10 @@ struct SparseMatrix GPU_FREE(m_d_solver_col_idx); GPU_FREE(m_d_permute); free(m_h_permute); + GPU_FREE(m_d_permute_map); + free(m_h_permute_map); } + GPU_FREE(m_chol_buffer); GPU_FREE(m_d_cusparse_spmm_buffer); GPU_FREE(m_d_cusparse_spmv_buffer); } @@ -646,6 +656,37 @@ struct SparseMatrix /* --- LOW LEVEL API --- */ + /** + * @brief allocate all temp buffers needed for the solver low-level API + */ + void solver_alloc(rxmesh::Reorder reorder) + { + if (reorder == Reorder::NONE) { + return; + } + + m_reorder_allocated = true; + CUDA_ERROR(cudaMalloc((void**)&m_d_solver_val, m_nnz * sizeof(T))); + CUDA_ERROR(cudaMalloc((void**)&m_d_solver_row_ptr, + (m_num_rows + 1) * sizeof(IndexT))); + CUDA_ERROR( + cudaMalloc((void**)&m_d_solver_col_idx, m_nnz * sizeof(IndexT))); + + + m_h_solver_val = (T*)malloc(m_nnz * sizeof(T)); + m_h_solver_row_ptr = (IndexT*)malloc((m_num_rows + 1) * sizeof(IndexT)); + m_h_solver_col_idx = (IndexT*)malloc(m_nnz * sizeof(IndexT)); + + m_h_permute = (IndexT*)malloc(m_num_rows * sizeof(IndexT)); + CUDA_ERROR( + cudaMalloc((void**)&m_d_permute, m_num_rows * sizeof(IndexT))); + + m_h_permute_map = static_cast(malloc(m_nnz * sizeof(IndexT))); + + CUDA_ERROR( + cudaMalloc((void**)&m_d_permute_map, m_nnz * sizeof(IndexT))); + } + /** * @brief The lower level api of reordering. Specify the reordering type or * simply NONE for no reordering. This should be called at the beginning of @@ -654,21 +695,13 @@ struct SparseMatrix */ void spmat_chol_reorder(rxmesh::Reorder reorder) { + solver_alloc(reorder); + if (reorder == Reorder::NONE) { RXMESH_WARN( "SparseMatrix::spmat_chol_reorder() No reordering is " "specified. Continue without reordering!"); m_use_reorder = false; - - if (m_reorder_allocated) { - GPU_FREE(m_d_solver_val); - GPU_FREE(m_d_solver_row_ptr); - GPU_FREE(m_d_solver_col_idx); - GPU_FREE(m_d_permute); - free(m_h_permute); - m_reorder_allocated = false; - } - return; } @@ -681,19 +714,6 @@ struct SparseMatrix m_use_reorder = true; - // allocate the purmutated csr - m_reorder_allocated = true; - CUDA_ERROR(cudaMalloc((void**)&m_d_solver_val, m_nnz * sizeof(T))); - CUDA_ERROR(cudaMalloc((void**)&m_d_solver_row_ptr, - (m_num_rows + 1) * sizeof(IndexT))); - CUDA_ERROR( - cudaMalloc((void**)&m_d_solver_col_idx, m_nnz * sizeof(IndexT))); - - m_h_permute = (IndexT*)malloc(m_num_rows * sizeof(IndexT)); - CUDA_ERROR( - cudaMalloc((void**)&m_d_permute, m_num_rows * sizeof(IndexT))); - - CUSOLVER_ERROR(cusolverSpCreate(&m_cusolver_sphandle)); if (reorder == Reorder::SYMRCM) { CUSOLVER_ERROR(cusolverSpXcsrsymrcmHost(m_cusolver_sphandle, @@ -730,10 +750,7 @@ struct SparseMatrix // working space for permutation: B = A*Q*A^T // the permutation for matrix A which works only for the col and row // indices, the val will be done on device with the d/h_val_permute - IndexT* h_val_permute = - static_cast(malloc(m_nnz * sizeof(IndexT))); - IndexT* d_val_permute; - CUDA_ERROR(cudaMalloc((void**)&d_val_permute, m_nnz * sizeof(IndexT))); + size_t size_perm = 0; void* perm_buffer_cpu = NULL; @@ -752,7 +769,7 @@ struct SparseMatrix perm_buffer_cpu = (void*)malloc(sizeof(char) * size_perm); for (int j = 0; j < m_nnz; j++) { - h_val_permute[j] = j; + m_h_permute_map[j] = j; } CUSOLVER_ERROR(cusolverSpXcsrpermHost(m_cusolver_sphandle, @@ -764,7 +781,7 @@ struct SparseMatrix m_h_col_idx, m_h_permute, m_h_permute, - h_val_permute, + m_h_permute_map, perm_buffer_cpu)); @@ -775,14 +792,10 @@ struct SparseMatrix // tmp_h_val[j] = m_h_val[j]; // } // for (int j = 0; j < m_nnz; j++) { - // m_h_val[j] = tmp_h_val[h_val_permute[j]]; + // m_h_val[j] = tmp_h_val[m_h_permute_map[j]]; // } // copy the purmutated csr from the host - CUDA_ERROR(cudaMemcpyAsync(m_d_solver_val, - m_h_val, - m_nnz * sizeof(T), - cudaMemcpyHostToDevice)); CUDA_ERROR(cudaMemcpyAsync(m_d_solver_row_ptr, m_h_row_ptr, (m_num_rows + 1) * sizeof(IndexT), @@ -793,15 +806,13 @@ struct SparseMatrix cudaMemcpyHostToDevice)); // do the permutation for val indices on device - CUDA_ERROR(cudaMemcpyAsync(d_val_permute, - h_val_permute, + CUDA_ERROR(cudaMemcpyAsync(m_d_permute_map, + m_h_permute_map, m_nnz * sizeof(IndexT), cudaMemcpyHostToDevice)); - permute_gather(d_val_permute, m_d_val, m_d_solver_val, m_nnz); + permute_gather(m_d_permute_map, m_d_val, m_d_solver_val, m_nnz); - free(h_val_permute); - GPU_FREE(d_val_permute); // restore the host data back to the original if (on_host) { @@ -809,6 +820,8 @@ struct SparseMatrix } else { release(HOST); } + + free(perm_buffer_cpu); } /** @@ -823,7 +836,6 @@ struct SparseMatrix m_d_solver_val = m_d_val; } - CUSOLVER_ERROR(cusolverSpCreateCsrcholInfo(&m_chol_info)); m_internalDataInBytes = 0; m_workspaceInBytes = 0; CUSOLVER_ERROR(cusolverSpXcsrcholAnalysis(m_cusolver_sphandle, @@ -870,13 +882,6 @@ struct SparseMatrix CUDA_ERROR(cudaMalloc((void**)&m_chol_buffer, m_workspaceInBytes)); } - /** - * @brief The lower level api of matrix factorization buffer release. - */ - void spmat_chol_buffer_free() - { - CUDA_ERROR(cudaFree(m_chol_buffer)); - } /** * @brief The lower level api of matrix factorization and save the @@ -1258,6 +1263,7 @@ struct SparseMatrix tol); } } + cudaDataType_t cuda_type() const { if (std::is_same_v) { @@ -1357,6 +1363,14 @@ struct SparseMatrix IndexT* m_d_solver_col_idx; T* m_d_solver_val; + + IndexT* m_h_solver_row_ptr; + IndexT* m_h_solver_col_idx; + T* m_h_solver_val; + + IndexT* m_h_permute_map; + IndexT* m_d_permute_map; + void* m_d_cusparse_spmm_buffer; void* m_d_cusparse_spmv_buffer; diff --git a/tests/RXMesh_test/test_sparse_matrix.cuh b/tests/RXMesh_test/test_sparse_matrix.cuh index 95ea3dac..c13438c9 100644 --- a/tests/RXMesh_test/test_sparse_matrix.cuh +++ b/tests/RXMesh_test/test_sparse_matrix.cuh @@ -373,8 +373,6 @@ TEST(RXMeshStatic, SparseMatrixLowerLevelAPISolve) A_mat.spmat_chol_solve(B_mat.col_data(i), X_mat.col_data(i)); } - A_mat.spmat_chol_buffer_free(); - A_mat.multiply(X_mat, ret_mat); std::vector> h_ret_mat(num_vertices); From 6580f2125dba03fdb529462f25f1d85ad7ed4f28 Mon Sep 17 00:00:00 2001 From: ahmed Date: Thu, 18 Jul 2024 13:48:40 -0400 Subject: [PATCH 28/96] refactor SparseMatrix --- apps/MCF/mcf_cusolver_chol.cuh | 15 +- include/rxmesh/matrix/dense_matrix.cuh | 7 +- include/rxmesh/matrix/sparse_matrix.cuh | 332 ++++++++++++----------- tests/RXMesh_test/test_sparse_matrix.cuh | 18 +- 4 files changed, 187 insertions(+), 185 deletions(-) diff --git a/apps/MCF/mcf_cusolver_chol.cuh b/apps/MCF/mcf_cusolver_chol.cuh index 35eac6ea..805cf3f9 100644 --- a/apps/MCF/mcf_cusolver_chol.cuh +++ b/apps/MCF/mcf_cusolver_chol.cuh @@ -182,16 +182,15 @@ void mcf_cusolver_chol(rxmesh::RXMeshStatic& rx) Arg.time_step); // Solving the linear system using chol factorization and no reordering - //A_mat.solve(B_mat, *X_mat, Solver::CHOL, Reorder::NONE); + // A_mat.solve(B_mat, *X_mat, Solver::CHOL, Reorder::NONE); - A_mat.spmat_chol_reorder(Reorder::NSTDIS); - A_mat.spmat_chol_analysis(); - A_mat.spmat_chol_buffer_alloc(); - A_mat.spmat_chol_factor(); + A_mat.solver_permute_alloc(PermuteMethod::NSTDIS); + A_mat.permute(PermuteMethod::NSTDIS); + A_mat.analyze_pattern(); + A_mat.post_analyze_alloc(); + A_mat.factorize(); + A_mat.solve(B_mat, *X_mat); - for (int i = 0; i < B_mat.cols(); ++i) { - A_mat.spmat_chol_solve(B_mat.col_data(i), X_mat->col_data(i)); - } // move the results to the host X_mat->move(rxmesh::DEVICE, rxmesh::HOST); diff --git a/include/rxmesh/matrix/dense_matrix.cuh b/include/rxmesh/matrix/dense_matrix.cuh index 7949a771..85ca6140 100644 --- a/include/rxmesh/matrix/dense_matrix.cuh +++ b/include/rxmesh/matrix/dense_matrix.cuh @@ -130,7 +130,7 @@ struct DenseMatrix template __host__ __device__ T& operator()(const HandleT handle, const uint32_t col) { - return this->operator()(get_row_id_from_handle(handle), col); + return this->operator()(get_row_id(handle), col); } /** @@ -140,7 +140,7 @@ struct DenseMatrix __host__ __device__ const T& operator()(const HandleT handle, const uint32_t col) const { - return this->operator()(get_row_id_from_handle(handle), col); + return this->operator()(get_row_id(handle), col); } /** @@ -148,8 +148,7 @@ struct DenseMatrix * handle */ template - __host__ __device__ const uint32_t - get_row_id_from_handle(const HandleT handle) const + __host__ __device__ const uint32_t get_row_id(const HandleT handle) const { auto id = handle.unpack(); diff --git a/include/rxmesh/matrix/sparse_matrix.cuh b/include/rxmesh/matrix/sparse_matrix.cuh index 52c00ef5..01be8561 100644 --- a/include/rxmesh/matrix/sparse_matrix.cuh +++ b/include/rxmesh/matrix/sparse_matrix.cuh @@ -32,7 +32,7 @@ enum class Solver * permutation, SYMAMD for Symmetric Approximate Minimum Degree Algorithm based * on Quotient Graph, NSTDIS for Nested Dissection */ -enum class Reorder +enum class PermuteMethod { NONE = 0, SYMRCM = 1, @@ -75,13 +75,15 @@ struct SparseMatrix m_d_solver_val(nullptr), m_h_solver_row_ptr(nullptr), m_h_solver_col_idx(nullptr), - m_h_solver_val(nullptr), m_h_permute_map(nullptr), m_d_permute_map(nullptr), m_use_reorder(false), m_reorder_allocated(false), m_d_cusparse_spmm_buffer(nullptr), m_d_cusparse_spmv_buffer(nullptr), + m_chol_buffer(nullptr), + m_d_solver_b(nullptr), + m_d_solver_x(nullptr), m_allocated(LOCATION_NONE) { using namespace rxmesh; @@ -249,8 +251,7 @@ struct SparseMatrix __device__ __host__ const T& operator()(const VertexHandle& row_v, const VertexHandle& col_v) const { - return this->operator()(get_row_id_from_handle(row_v), - get_row_id_from_handle(col_v)); + return this->operator()(get_row_id(row_v), get_row_id(col_v)); } /** @@ -259,8 +260,7 @@ struct SparseMatrix __device__ __host__ T& operator()(const VertexHandle& row_v, const VertexHandle& col_v) { - return this->operator()(get_row_id_from_handle(row_v), - get_row_id_from_handle(col_v)); + return this->operator()(get_row_id(row_v), get_row_id(col_v)); } /** @@ -335,6 +335,15 @@ struct SparseMatrix #endif } + /** + * @brief return the row index corresponding to specific vertex handle + */ + __device__ __host__ const uint32_t + get_row_id(const VertexHandle& handle) const + { + auto id = handle.unpack(); + return m_context.vertex_prefix()[id.first] + id.second; + } /** * @brief release all allocated memory @@ -352,8 +361,11 @@ struct SparseMatrix GPU_FREE(m_d_solver_row_ptr); GPU_FREE(m_d_solver_col_idx); GPU_FREE(m_d_permute); - free(m_h_permute); + GPU_FREE(m_d_solver_x); + GPU_FREE(m_d_solver_b); GPU_FREE(m_d_permute_map); + + free(m_h_permute); free(m_h_permute_map); } GPU_FREE(m_chol_buffer); @@ -413,9 +425,9 @@ struct SparseMatrix * @brief allocate the temp buffer needed for sparse matrix multiplication * by a dense matrix */ - __host__ void alloc_multiply_buffer(const rxmesh::DenseMatrix& B_mat, - rxmesh::DenseMatrix& C_mat, - cudaStream_t stream = 0) + __host__ void alloc_multiply_buffer(const DenseMatrix& B_mat, + DenseMatrix& C_mat, + cudaStream_t stream = 0) { T alpha = 1.0; T beta = 0.0; @@ -454,9 +466,9 @@ struct SparseMatrix * alloce_multiply_buffer() if it is not called before. Note that this * allocation happens only once and we then reuse it */ - __host__ void multiply(rxmesh::DenseMatrix& B_mat, - rxmesh::DenseMatrix& C_mat, - cudaStream_t stream = 0) + __host__ void multiply(DenseMatrix& B_mat, + DenseMatrix& C_mat, + cudaStream_t stream = 0) { assert(cols() == B_mat.rows()); assert(rows() == C_mat.rows()); @@ -588,9 +600,9 @@ struct SparseMatrix * the columns for B and multiply them separately as sparse matrix dense * vector multiplication */ - void multiply_cw(const rxmesh::DenseMatrix& B_mat, - rxmesh::DenseMatrix& C_mat, - cudaStream_t stream = 0) + void multiply_cw(const DenseMatrix& B_mat, + DenseMatrix& C_mat, + cudaStream_t stream = 0) { assert(cols() == B_mat.cols()); assert(rows() == C_mat.rows()); @@ -602,39 +614,15 @@ struct SparseMatrix } - /** - * @brief solve the Ax=b for x - */ - void solve(const T* B_arr, - T* X_arr, - rxmesh::Solver solver, - rxmesh::Reorder reorder, - cudaStream_t stream = 0) - { - cusparse_linear_solver_wrapper(solver, - reorder, - m_cusolver_sphandle, - m_descr, - m_num_rows, - m_num_cols, - m_nnz, - m_d_row_ptr, - m_d_col_idx, - m_d_val, - B_arr, - X_arr, - stream); - } - /** * @brief solve the AX=B for X where X and B are all dense matrix and we * would solve it in a column wise manner */ - void solve(const rxmesh::DenseMatrix& B_mat, - rxmesh::DenseMatrix& X_mat, - rxmesh::Solver solver, - rxmesh::Reorder reorder, - cudaStream_t stream = 0) + void solve(const DenseMatrix& B_mat, + DenseMatrix& X_mat, + Solver solver, + PermuteMethod reorder, + cudaStream_t stream = 0) { for (int i = 0; i < B_mat.cols(); ++i) { cusparse_linear_solver_wrapper(solver, @@ -653,38 +641,72 @@ struct SparseMatrix } } + /** + * @brief solve the Ax=b for x + */ + void solve(const T* B_arr, + T* X_arr, + Solver solver, + PermuteMethod reorder, + cudaStream_t stream = 0) + { + cusparse_linear_solver_wrapper(solver, + reorder, + m_cusolver_sphandle, + m_descr, + m_num_rows, + m_num_cols, + m_nnz, + m_d_row_ptr, + m_d_col_idx, + m_d_val, + B_arr, + X_arr, + stream); + } + /* --- LOW LEVEL API --- */ /** * @brief allocate all temp buffers needed for the solver low-level API */ - void solver_alloc(rxmesh::Reorder reorder) + void solver_permute_alloc(PermuteMethod reorder) { - if (reorder == Reorder::NONE) { + if (reorder == PermuteMethod::NONE) { return; } - m_reorder_allocated = true; - CUDA_ERROR(cudaMalloc((void**)&m_d_solver_val, m_nnz * sizeof(T))); - CUDA_ERROR(cudaMalloc((void**)&m_d_solver_row_ptr, - (m_num_rows + 1) * sizeof(IndexT))); - CUDA_ERROR( - cudaMalloc((void**)&m_d_solver_col_idx, m_nnz * sizeof(IndexT))); + if (!m_reorder_allocated) { + m_reorder_allocated = true; + CUDA_ERROR(cudaMalloc((void**)&m_d_solver_val, m_nnz * sizeof(T))); + CUDA_ERROR(cudaMalloc((void**)&m_d_solver_row_ptr, + (m_num_rows + 1) * sizeof(IndexT))); + CUDA_ERROR(cudaMalloc((void**)&m_d_solver_col_idx, + m_nnz * sizeof(IndexT))); + m_h_solver_row_ptr = + (IndexT*)malloc((m_num_rows + 1) * sizeof(IndexT)); + m_h_solver_col_idx = (IndexT*)malloc(m_nnz * sizeof(IndexT)); - m_h_solver_val = (T*)malloc(m_nnz * sizeof(T)); - m_h_solver_row_ptr = (IndexT*)malloc((m_num_rows + 1) * sizeof(IndexT)); - m_h_solver_col_idx = (IndexT*)malloc(m_nnz * sizeof(IndexT)); + m_h_permute = (IndexT*)malloc(m_num_rows * sizeof(IndexT)); + CUDA_ERROR( + cudaMalloc((void**)&m_d_permute, m_num_rows * sizeof(IndexT))); - m_h_permute = (IndexT*)malloc(m_num_rows * sizeof(IndexT)); - CUDA_ERROR( - cudaMalloc((void**)&m_d_permute, m_num_rows * sizeof(IndexT))); + m_h_permute_map = + static_cast(malloc(m_nnz * sizeof(IndexT))); - m_h_permute_map = static_cast(malloc(m_nnz * sizeof(IndexT))); + CUDA_ERROR( + cudaMalloc((void**)&m_d_permute_map, m_nnz * sizeof(IndexT))); - CUDA_ERROR( - cudaMalloc((void**)&m_d_permute_map, m_nnz * sizeof(IndexT))); + CUDA_ERROR( + cudaMalloc((void**)&m_d_solver_x, m_num_cols * sizeof(T))); + CUDA_ERROR( + cudaMalloc((void**)&m_d_solver_b, m_num_rows * sizeof(T))); + } + std::memcpy( + m_h_solver_row_ptr, m_h_row_ptr, (m_num_rows + 1) * sizeof(IndexT)); + std::memcpy(m_h_solver_col_idx, m_h_col_idx, m_nnz * sizeof(IndexT)); } /** @@ -693,55 +715,49 @@ struct SparseMatrix * the solving process. Any other function call order would be undefined. * @param reorder: the reorder method applied. */ - void spmat_chol_reorder(rxmesh::Reorder reorder) + void permute(PermuteMethod reorder = PermuteMethod::NSTDIS) { - solver_alloc(reorder); + solver_permute_alloc(reorder); - if (reorder == Reorder::NONE) { + if (reorder == PermuteMethod::NONE) { RXMESH_WARN( - "SparseMatrix::spmat_chol_reorder() No reordering is " - "specified. Continue without reordering!"); + "SparseMatrix::permute() No reordering is specified. Continue " + "without reordering!"); m_use_reorder = false; return; } - /*check on host*/ - bool on_host = true; - if ((HOST & m_allocated) != HOST) { - move(DEVICE, HOST); - on_host = false; - } - m_use_reorder = true; - if (reorder == Reorder::SYMRCM) { + if (reorder == PermuteMethod::SYMRCM) { CUSOLVER_ERROR(cusolverSpXcsrsymrcmHost(m_cusolver_sphandle, m_num_rows, m_nnz, m_descr, - m_h_row_ptr, - m_h_col_idx, + m_h_solver_row_ptr, + m_h_solver_col_idx, m_h_permute)); - } else if (reorder == Reorder::SYMAMD) { + } else if (reorder == PermuteMethod::SYMAMD) { CUSOLVER_ERROR(cusolverSpXcsrsymamdHost(m_cusolver_sphandle, m_num_rows, m_nnz, m_descr, - m_h_row_ptr, - m_h_col_idx, + m_h_solver_row_ptr, + m_h_solver_col_idx, m_h_permute)); - } else if (reorder == Reorder::NSTDIS) { + } else if (reorder == PermuteMethod::NSTDIS) { CUSOLVER_ERROR(cusolverSpXcsrmetisndHost(m_cusolver_sphandle, m_num_rows, m_nnz, m_descr, - m_h_row_ptr, - m_h_col_idx, + m_h_solver_row_ptr, + m_h_solver_col_idx, NULL, m_h_permute)); } + // copy permutation to the device CUDA_ERROR(cudaMemcpyAsync(m_d_permute, m_h_permute, m_num_rows * sizeof(IndexT), @@ -749,8 +765,13 @@ struct SparseMatrix // working space for permutation: B = A*Q*A^T // the permutation for matrix A which works only for the col and row - // indices, the val will be done on device with the d/h_val_permute + // indices, the val will be done on device with the m_d_permute_map + // only on the device since we don't need to access the permuted val on + // the host at all + for (int j = 0; j < m_nnz; j++) { + m_h_permute_map[j] = j; + } size_t size_perm = 0; void* perm_buffer_cpu = NULL; @@ -760,67 +781,45 @@ struct SparseMatrix m_num_cols, m_nnz, m_descr, - m_h_row_ptr, - m_h_col_idx, + m_h_solver_row_ptr, + m_h_solver_col_idx, m_h_permute, m_h_permute, &size_perm)); perm_buffer_cpu = (void*)malloc(sizeof(char) * size_perm); - for (int j = 0; j < m_nnz; j++) { - m_h_permute_map[j] = j; - } - + // permute the matrix CUSOLVER_ERROR(cusolverSpXcsrpermHost(m_cusolver_sphandle, m_num_rows, m_num_cols, m_nnz, m_descr, - m_h_row_ptr, - m_h_col_idx, + m_h_solver_row_ptr, + m_h_solver_col_idx, m_h_permute, m_h_permute, m_h_permute_map, perm_buffer_cpu)); - - // do the permutation for val indices on host - // T* tmp_h_val = static_cast(malloc(m_nnz * sizeof(T))); - - // for (int j = 0; j < m_nnz; j++) { - // tmp_h_val[j] = m_h_val[j]; - // } - // for (int j = 0; j < m_nnz; j++) { - // m_h_val[j] = tmp_h_val[m_h_permute_map[j]]; - // } - - // copy the purmutated csr from the host + // copy the permute csr from the device CUDA_ERROR(cudaMemcpyAsync(m_d_solver_row_ptr, - m_h_row_ptr, + m_h_solver_row_ptr, (m_num_rows + 1) * sizeof(IndexT), cudaMemcpyHostToDevice)); CUDA_ERROR(cudaMemcpyAsync(m_d_solver_col_idx, - m_h_col_idx, + m_h_solver_col_idx, m_nnz * sizeof(IndexT), cudaMemcpyHostToDevice)); - // do the permutation for val indices on device + // do the permutation for val on device CUDA_ERROR(cudaMemcpyAsync(m_d_permute_map, m_h_permute_map, m_nnz * sizeof(IndexT), cudaMemcpyHostToDevice)); - permute_gather(m_d_permute_map, m_d_val, m_d_solver_val, m_nnz); - // restore the host data back to the original - if (on_host) { - move(DEVICE, HOST); - } else { - release(HOST); - } - free(perm_buffer_cpu); } @@ -828,7 +827,7 @@ struct SparseMatrix * @brief The lower level api of matrix analysis. Generating a member value * of type csrcholInfo_t for cucolver. */ - void spmat_chol_analysis() + void analyze_pattern() { if (!m_use_reorder) { m_d_solver_row_ptr = m_d_row_ptr; @@ -836,8 +835,6 @@ struct SparseMatrix m_d_solver_val = m_d_val; } - m_internalDataInBytes = 0; - m_workspaceInBytes = 0; CUSOLVER_ERROR(cusolverSpXcsrcholAnalysis(m_cusolver_sphandle, m_num_rows, m_nnz, @@ -851,8 +848,11 @@ struct SparseMatrix * @brief The lower level api of matrix factorization buffer calculation and * allocation. The buffer is a member variable. */ - void spmat_chol_buffer_alloc() + void post_analyze_alloc() { + m_internalDataInBytes = 0; + m_workspaceInBytes = 0; + if constexpr (std::is_same_v) { CUSOLVER_ERROR(cusolverSpScsrcholBufferInfo(m_cusolver_sphandle, m_num_rows, @@ -887,7 +887,7 @@ struct SparseMatrix * @brief The lower level api of matrix factorization and save the * factorization result in to the buffer. */ - void spmat_chol_factor() + void factorize() { if constexpr (std::is_same_v) { CUSOLVER_ERROR(cusolverSpScsrcholFactor(m_cusolver_sphandle, @@ -925,8 +925,8 @@ struct SparseMatrix } if (0 <= singularity) { RXMESH_WARN( - "SparseMatrix::spmat_chol_factor() The matrix is singular at " - "row {} under tol ({})", + "SparseMatrix::factorize() The matrix is singular at row {} " + "under tol ({})", singularity, tol); } @@ -934,24 +934,40 @@ struct SparseMatrix /** * @brief The lower level api of solving the linear system after using - * cholesky factorization. The format follows Ax=b to solve x, where A is - * the sparse matrix, x and b are device array. As long as A doesn't change. - * This function could be called for many different b and x. - * @param d_b: device array of b - * @param d_x: device array of x + * factorization. The format follows Ax=b to solve x, where A is this sparse + * matrix, x and b are device array. As long as A doesn't change. This + * function could be called for many different b and x. + * @param B_mat: right hand side + * @param X_mat: output solution */ - void spmat_chol_solve(T* d_b, T* d_x) + void solve(DenseMatrix& B_mat, + DenseMatrix& X_mat, + cudaStream_t stream = NULL) { + CUSOLVER_ERROR(cusolverSpSetStream(m_cusolver_sphandle, stream)); + for (int i = 0; i < B_mat.cols(); ++i) { + solve(B_mat.col_data(i), X_mat.col_data(i)); + } + } + /** + * @brief The lower level api of solving the linear system after using + * factorization. The format follows Ax=b to solve x, where A is the sparse + * matrix, x and b are device array. As long as A doesn't change. This + * function could be called for many different b and x. + * @param d_b: right hand side + * @param d_x: output solution + */ + void solve(T* d_b, T* d_x) + { T* d_solver_b; T* d_solver_x; if (m_use_reorder) { // purmute b and x - CUDA_ERROR(cudaMalloc((void**)&d_solver_b, m_num_rows * sizeof(T))); + d_solver_b = m_d_solver_b; + d_solver_x = m_d_solver_x; permute_gather(m_d_permute, d_b, d_solver_b, m_num_rows); - - CUDA_ERROR(cudaMalloc((void**)&d_solver_x, m_num_cols * sizeof(T))); permute_gather(m_d_permute, d_x, d_solver_x, m_num_rows); } else { d_solver_b = d_b; @@ -978,20 +994,9 @@ struct SparseMatrix if (m_use_reorder) { permute_scatter(m_d_permute, d_solver_x, d_x, m_num_rows); - GPU_FREE(d_solver_b); - GPU_FREE(d_solver_x); } } - /** - * @brief return the row index corresponding to specific vertex handle - */ - __device__ __host__ const uint32_t - get_row_id_from_handle(const VertexHandle& handle) const - { - auto id = handle.unpack(); - return m_context.vertex_prefix()[id.first] + id.second; - } private: void release(locationT location) @@ -1045,19 +1050,19 @@ struct SparseMatrix * @brief wrapper for cuSolver API for solving linear systems using cuSolver * High-level API */ - void cusparse_linear_solver_wrapper(const rxmesh::Solver solver, - const rxmesh::Reorder reorder, - cusolverSpHandle_t handle, - cusparseMatDescr_t descrA, - int rowsA, - int colsA, - int nnzA, - const int* d_csrRowPtrA, - const int* d_csrColIndA, - const T* d_csrValA, - const T* d_b, - T* d_x, - cudaStream_t stream) + void cusparse_linear_solver_wrapper(const Solver solver, + const PermuteMethod reorder, + cusolverSpHandle_t handle, + cusparseMatDescr_t descrA, + int rowsA, + int colsA, + int nnzA, + const int* d_csrRowPtrA, + const int* d_csrColIndA, + const T* d_csrValA, + const T* d_b, + T* d_x, + cudaStream_t stream) { CUSOLVER_ERROR(cusolverSpSetStream(handle, stream)); @@ -1282,25 +1287,24 @@ struct SparseMatrix } } - int reorder_to_int(const Reorder& reorder) const + int reorder_to_int(const PermuteMethod& reorder) const { switch (reorder) { - case Reorder::NONE: + case PermuteMethod::NONE: return 0; - case Reorder::SYMRCM: + case PermuteMethod::SYMRCM: return 1; - case Reorder::SYMAMD: + case PermuteMethod::SYMAMD: return 2; - case Reorder::NSTDIS: + case PermuteMethod::NSTDIS: return 3; default: { - RXMESH_ERROR("reorder_to_int() unknown input reorder"); + RXMESH_ERROR("reorder_to_int() unknown input"); return 0; } } } - void permute_scatter(IndexT* d_p, T* d_in, T* d_out, IndexT size) { // d_out[d_p[i]] = d_in[i] @@ -1366,11 +1370,13 @@ struct SparseMatrix IndexT* m_h_solver_row_ptr; IndexT* m_h_solver_col_idx; - T* m_h_solver_val; IndexT* m_h_permute_map; IndexT* m_d_permute_map; + T* m_d_solver_b; + T* m_d_solver_x; + void* m_d_cusparse_spmm_buffer; void* m_d_cusparse_spmv_buffer; diff --git a/tests/RXMesh_test/test_sparse_matrix.cuh b/tests/RXMesh_test/test_sparse_matrix.cuh index c13438c9..2e4695b8 100644 --- a/tests/RXMesh_test/test_sparse_matrix.cuh +++ b/tests/RXMesh_test/test_sparse_matrix.cuh @@ -299,7 +299,7 @@ TEST(RXMeshStatic, SparseMatrixSimpleSolve) launch_box.smem_bytes_dyn>>>( rx.get_context(), *coords, A_mat, X_mat, B_mat, time_step); - A_mat.solve(B_mat, X_mat, Solver::CHOL, Reorder::NSTDIS); + A_mat.solve(B_mat, X_mat, Solver::CHOL, PermuteMethod::NSTDIS); // timing begins for spmm GPUTimer timer; @@ -364,15 +364,13 @@ TEST(RXMeshStatic, SparseMatrixLowerLevelAPISolve) // A_mat.solve(B_mat, X_mat, Solver::CHOL, Reorder::NSTDIS); - A_mat.spmat_chol_reorder(Reorder::NSTDIS); - A_mat.spmat_chol_analysis(); - A_mat.spmat_chol_buffer_alloc(); - A_mat.spmat_chol_factor(); - - for (int i = 0; i < B_mat.cols(); ++i) { - A_mat.spmat_chol_solve(B_mat.col_data(i), X_mat.col_data(i)); - } - + A_mat.solver_permute_alloc(PermuteMethod::NSTDIS); + A_mat.permute(PermuteMethod::NSTDIS); + A_mat.analyze_pattern(); + A_mat.post_analyze_alloc(); + A_mat.factorize(); + A_mat.solve(B_mat, X_mat); + A_mat.multiply(X_mat, ret_mat); std::vector> h_ret_mat(num_vertices); From 76368fc62b30cfe3c2eb81ed8b2c984cb5a1571f Mon Sep 17 00:00:00 2001 From: ahmed Date: Thu, 18 Jul 2024 13:56:24 -0400 Subject: [PATCH 29/96] Sparse Matrix pre_solve --- apps/MCF/mcf_cusolver_chol.cuh | 6 +----- include/rxmesh/matrix/sparse_matrix.cuh | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/apps/MCF/mcf_cusolver_chol.cuh b/apps/MCF/mcf_cusolver_chol.cuh index 805cf3f9..c8a2d0b3 100644 --- a/apps/MCF/mcf_cusolver_chol.cuh +++ b/apps/MCF/mcf_cusolver_chol.cuh @@ -184,11 +184,7 @@ void mcf_cusolver_chol(rxmesh::RXMeshStatic& rx) // Solving the linear system using chol factorization and no reordering // A_mat.solve(B_mat, *X_mat, Solver::CHOL, Reorder::NONE); - A_mat.solver_permute_alloc(PermuteMethod::NSTDIS); - A_mat.permute(PermuteMethod::NSTDIS); - A_mat.analyze_pattern(); - A_mat.post_analyze_alloc(); - A_mat.factorize(); + A_mat.pre_solve(PermuteMethod::NSTDIS); A_mat.solve(B_mat, *X_mat); diff --git a/include/rxmesh/matrix/sparse_matrix.cuh b/include/rxmesh/matrix/sparse_matrix.cuh index 01be8561..ea6d3933 100644 --- a/include/rxmesh/matrix/sparse_matrix.cuh +++ b/include/rxmesh/matrix/sparse_matrix.cuh @@ -932,6 +932,20 @@ struct SparseMatrix } } + /** + * @brief Call all the necessary functions to permute and factorize the + * sparse matrix before calling the solve() method below. After calling this + * pre_solve(), solver() can be called with multiple right hand sides + */ + void pre_solve(PermuteMethod reorder = PermuteMethod::NSTDIS) + { + solver_permute_alloc(PermuteMethod::NSTDIS); + permute(PermuteMethod::NSTDIS); + analyze_pattern(); + post_analyze_alloc(); + factorize(); + } + /** * @brief The lower level api of solving the linear system after using * factorization. The format follows Ax=b to solve x, where A is this sparse From e24c5b4c74f43ba7b974e8abe9811eee98da0b92 Mon Sep 17 00:00:00 2001 From: ahmed Date: Thu, 18 Jul 2024 14:52:48 -0400 Subject: [PATCH 30/96] test LU --- apps/MCF/mcf_cusolver_chol.cuh | 15 +- include/rxmesh/matrix/sparse_matrix.cuh | 270 ++++++++++++++--------- tests/RXMesh_test/test_sparse_matrix.cuh | 6 +- 3 files changed, 177 insertions(+), 114 deletions(-) diff --git a/apps/MCF/mcf_cusolver_chol.cuh b/apps/MCF/mcf_cusolver_chol.cuh index c8a2d0b3..9c640ecd 100644 --- a/apps/MCF/mcf_cusolver_chol.cuh +++ b/apps/MCF/mcf_cusolver_chol.cuh @@ -181,14 +181,25 @@ void mcf_cusolver_chol(rxmesh::RXMeshStatic& rx) Arg.use_uniform_laplace, Arg.time_step); - // Solving the linear system using chol factorization and no reordering - // A_mat.solve(B_mat, *X_mat, Solver::CHOL, Reorder::NONE); + // To Use LU, we have to move the data to the host + // A_mat.move(DEVICE, HOST); + // B_mat.move(DEVICE, HOST); + // X_mat->move(DEVICE, HOST); + // A_mat.solve(B_mat, *X_mat, Solver::LU, PermuteMethod::NSTDIS); + + // Solving using QR or CHOL + // A_mat.solve(B_mat, *X_mat, Solver::QR, PermuteMethod::NSTDIS); + // A_mat.solve(B_mat, *X_mat, Solver::CHOL, PermuteMethod::NSTDIS); + + // Solving using CHOL A_mat.pre_solve(PermuteMethod::NSTDIS); A_mat.solve(B_mat, *X_mat); // move the results to the host + // if we use LU, the data will be on the host and we should not move the + // device to the host X_mat->move(rxmesh::DEVICE, rxmesh::HOST); // copy the results to attributes diff --git a/include/rxmesh/matrix/sparse_matrix.cuh b/include/rxmesh/matrix/sparse_matrix.cuh index ea6d3933..30390134 100644 --- a/include/rxmesh/matrix/sparse_matrix.cuh +++ b/include/rxmesh/matrix/sparse_matrix.cuh @@ -625,19 +625,13 @@ struct SparseMatrix cudaStream_t stream = 0) { for (int i = 0; i < B_mat.cols(); ++i) { - cusparse_linear_solver_wrapper(solver, - reorder, - m_cusolver_sphandle, - m_descr, - m_num_rows, - m_num_cols, - m_nnz, - m_d_row_ptr, - m_d_col_idx, - m_d_val, - B_mat.col_data(i), - X_mat.col_data(i), - stream); + cusparse_linear_solver_wrapper( + solver, + reorder, + m_cusolver_sphandle, + B_mat.col_data(i, solver == Solver::LU ? HOST : DEVICE), + X_mat.col_data(i, solver == Solver::LU ? HOST : DEVICE), + stream); } } @@ -650,19 +644,8 @@ struct SparseMatrix PermuteMethod reorder, cudaStream_t stream = 0) { - cusparse_linear_solver_wrapper(solver, - reorder, - m_cusolver_sphandle, - m_descr, - m_num_rows, - m_num_cols, - m_nnz, - m_d_row_ptr, - m_d_col_idx, - m_d_val, - B_arr, - X_arr, - stream); + cusparse_linear_solver_wrapper( + solver, reorder, m_cusolver_sphandle, B_arr, X_arr, stream); } @@ -671,7 +654,7 @@ struct SparseMatrix /** * @brief allocate all temp buffers needed for the solver low-level API */ - void solver_permute_alloc(PermuteMethod reorder) + void permute_alloc(PermuteMethod reorder) { if (reorder == PermuteMethod::NONE) { return; @@ -717,7 +700,7 @@ struct SparseMatrix */ void permute(PermuteMethod reorder = PermuteMethod::NSTDIS) { - solver_permute_alloc(reorder); + permute_alloc(reorder); if (reorder == PermuteMethod::NONE) { RXMESH_WARN( @@ -768,7 +751,7 @@ struct SparseMatrix // indices, the val will be done on device with the m_d_permute_map // only on the device since we don't need to access the permuted val on // the host at all - +#pragma omp parallel for for (int j = 0; j < m_nnz; j++) { m_h_permute_map[j] = j; } @@ -866,6 +849,19 @@ struct SparseMatrix &m_workspaceInBytes)); } + if constexpr (std::is_same_v) { + CUSOLVER_ERROR(cusolverSpCcsrcholBufferInfo(m_cusolver_sphandle, + m_num_rows, + m_nnz, + m_descr, + m_d_solver_val, + m_d_solver_row_ptr, + m_d_solver_col_idx, + m_chol_info, + &m_internalDataInBytes, + &m_workspaceInBytes)); + } + if constexpr (std::is_same_v) { CUSOLVER_ERROR(cusolverSpDcsrcholBufferInfo(m_cusolver_sphandle, m_num_rows, @@ -879,6 +875,19 @@ struct SparseMatrix &m_workspaceInBytes)); } + if constexpr (std::is_same_v) { + CUSOLVER_ERROR(cusolverSpZcsrcholBufferInfo(m_cusolver_sphandle, + m_num_rows, + m_nnz, + m_descr, + m_d_solver_val, + m_d_solver_row_ptr, + m_d_solver_col_idx, + m_chol_info, + &m_internalDataInBytes, + &m_workspaceInBytes)); + } + CUDA_ERROR(cudaMalloc((void**)&m_chol_buffer, m_workspaceInBytes)); } @@ -900,6 +909,18 @@ struct SparseMatrix m_chol_info, m_chol_buffer)); } + + if constexpr (std::is_same_v) { + CUSOLVER_ERROR(cusolverSpCcsrcholFactor(m_cusolver_sphandle, + m_num_rows, + m_nnz, + m_descr, + m_d_solver_val, + m_d_solver_row_ptr, + m_d_solver_col_idx, + m_chol_info, + m_chol_buffer)); + } if constexpr (std::is_same_v) { CUSOLVER_ERROR(cusolverSpDcsrcholFactor(m_cusolver_sphandle, m_num_rows, @@ -911,6 +932,17 @@ struct SparseMatrix m_chol_info, m_chol_buffer)); } + if constexpr (std::is_same_v) { + CUSOLVER_ERROR(cusolverSpZcsrcholFactor(m_cusolver_sphandle, + m_num_rows, + m_nnz, + m_descr, + m_d_solver_val, + m_d_solver_row_ptr, + m_d_solver_col_idx, + m_chol_info, + m_chol_buffer)); + } double tol = 1.0e-8; int singularity; @@ -919,10 +951,18 @@ struct SparseMatrix CUSOLVER_ERROR(cusolverSpScsrcholZeroPivot( m_cusolver_sphandle, m_chol_info, tol, &singularity)); } + if constexpr (std::is_same_v) { + CUSOLVER_ERROR(cusolverSpCcsrcholZeroPivot( + m_cusolver_sphandle, m_chol_info, tol, &singularity)); + } if constexpr (std::is_same_v) { CUSOLVER_ERROR(cusolverSpDcsrcholZeroPivot( m_cusolver_sphandle, m_chol_info, tol, &singularity)); } + if constexpr (std::is_same_v) { + CUSOLVER_ERROR(cusolverSpZcsrcholZeroPivot( + m_cusolver_sphandle, m_chol_info, tol, &singularity)); + } if (0 <= singularity) { RXMESH_WARN( "SparseMatrix::factorize() The matrix is singular at row {} " @@ -939,7 +979,7 @@ struct SparseMatrix */ void pre_solve(PermuteMethod reorder = PermuteMethod::NSTDIS) { - solver_permute_alloc(PermuteMethod::NSTDIS); + permute_alloc(PermuteMethod::NSTDIS); permute(PermuteMethod::NSTDIS); analyze_pattern(); post_analyze_alloc(); @@ -997,6 +1037,15 @@ struct SparseMatrix m_chol_buffer)); } + if constexpr (std::is_same_v) { + CUSOLVER_ERROR(cusolverSpCcsrcholSolve(m_cusolver_sphandle, + m_num_rows, + d_solver_b, + d_solver_x, + m_chol_info, + m_chol_buffer)); + } + if constexpr (std::is_same_v) { CUSOLVER_ERROR(cusolverSpDcsrcholSolve(m_cusolver_sphandle, m_num_rows, @@ -1005,6 +1054,14 @@ struct SparseMatrix m_chol_info, m_chol_buffer)); } + if constexpr (std::is_same_v) { + CUSOLVER_ERROR(cusolverSpZcsrcholSolve(m_cusolver_sphandle, + m_num_rows, + d_solver_b, + d_solver_x, + m_chol_info, + m_chol_buffer)); + } if (m_use_reorder) { permute_scatter(m_d_permute, d_solver_x, d_x, m_num_rows); @@ -1067,13 +1124,6 @@ struct SparseMatrix void cusparse_linear_solver_wrapper(const Solver solver, const PermuteMethod reorder, cusolverSpHandle_t handle, - cusparseMatDescr_t descrA, - int rowsA, - int colsA, - int nnzA, - const int* d_csrRowPtrA, - const int* d_csrColIndA, - const T* d_csrValA, const T* d_b, T* d_x, cudaStream_t stream) @@ -1088,12 +1138,12 @@ struct SparseMatrix if (solver == Solver::CHOL) { if constexpr (std::is_same_v) { CUSOLVER_ERROR(cusolverSpScsrlsvchol(handle, - rowsA, - nnzA, - descrA, - d_csrValA, - d_csrRowPtrA, - d_csrColIndA, + rows(), + non_zeros(), + m_descr, + m_d_val, + m_d_row_ptr, + m_d_col_idx, d_b, tol, reorder_to_int(reorder), @@ -1103,12 +1153,12 @@ struct SparseMatrix if constexpr (std::is_same_v) { CUSOLVER_ERROR(cusolverSpCcsrlsvchol(handle, - rowsA, - nnzA, - descrA, - d_csrValA, - d_csrRowPtrA, - d_csrColIndA, + rows(), + non_zeros(), + m_descr, + m_d_val, + m_d_row_ptr, + m_d_col_idx, d_b, tol, reorder_to_int(reorder), @@ -1118,12 +1168,12 @@ struct SparseMatrix if constexpr (std::is_same_v) { CUSOLVER_ERROR(cusolverSpDcsrlsvchol(handle, - rowsA, - nnzA, - descrA, - d_csrValA, - d_csrRowPtrA, - d_csrColIndA, + rows(), + non_zeros(), + m_descr, + m_d_val, + m_d_row_ptr, + m_d_col_idx, d_b, tol, reorder_to_int(reorder), @@ -1132,12 +1182,13 @@ struct SparseMatrix } if constexpr (std::is_same_v) { CUSOLVER_ERROR(cusolverSpZcsrlsvchol(handle, - rowsA, - nnzA, - descrA, - d_csrValA, - d_csrRowPtrA, - d_csrColIndA, + rows(), + non_zeros(), + m_d_val, + m_descr, + m_d_val, + m_d_row_ptr, + m_d_col_idx, d_b, tol, reorder_to_int(reorder), @@ -1148,12 +1199,12 @@ struct SparseMatrix } else if (solver == Solver::QR) { if constexpr (std::is_same_v) { CUSOLVER_ERROR(cusolverSpScsrlsvqr(handle, - rowsA, - nnzA, - descrA, - d_csrValA, - d_csrRowPtrA, - d_csrColIndA, + rows(), + non_zeros(), + m_descr, + m_d_val, + m_d_row_ptr, + m_d_col_idx, d_b, tol, reorder_to_int(reorder), @@ -1163,12 +1214,12 @@ struct SparseMatrix if constexpr (std::is_same_v) { CUSOLVER_ERROR(cusolverSpCcsrlsvqr(handle, - rowsA, - nnzA, - descrA, - d_csrValA, - d_csrRowPtrA, - d_csrColIndA, + rows(), + non_zeros(), + m_descr, + m_d_val, + m_d_row_ptr, + m_d_col_idx, d_b, tol, reorder_to_int(reorder), @@ -1178,12 +1229,13 @@ struct SparseMatrix if constexpr (std::is_same_v) { CUSOLVER_ERROR(cusolverSpDcsrlsvqr(handle, - rowsA, - nnzA, - descrA, - d_csrValA, - d_csrRowPtrA, - d_csrColIndA, + rows(), + non_zeros(), + m_d_val, + m_descr, + m_d_val, + m_d_row_ptr, + m_d_col_idx, d_b, tol, reorder_to_int(reorder), @@ -1192,12 +1244,12 @@ struct SparseMatrix } if constexpr (std::is_same_v) { CUSOLVER_ERROR(cusolverSpZcsrlsvqr(handle, - rowsA, - nnzA, - descrA, - d_csrValA, - d_csrRowPtrA, - d_csrColIndA, + rows(), + non_zeros(), + m_descr, + m_d_val, + m_d_row_ptr, + m_d_col_idx, d_b, tol, reorder_to_int(reorder), @@ -1211,12 +1263,12 @@ struct SparseMatrix if constexpr (std::is_same_v) { CUSOLVER_ERROR(cusolverSpScsrlsvluHost(handle, - rowsA, - nnzA, - descrA, - d_csrValA, - d_csrRowPtrA, - d_csrColIndA, + rows(), + non_zeros(), + m_descr, + m_h_val, + m_h_row_ptr, + m_h_col_idx, d_b, tol, reorder_to_int(reorder), @@ -1226,12 +1278,12 @@ struct SparseMatrix if constexpr (std::is_same_v) { CUSOLVER_ERROR(cusolverSpCcsrlsvluHost(handle, - rowsA, - nnzA, - descrA, - d_csrValA, - d_csrRowPtrA, - d_csrColIndA, + rows(), + non_zeros(), + m_descr, + m_h_val, + m_h_row_ptr, + m_h_col_idx, d_b, tol, reorder_to_int(reorder), @@ -1241,12 +1293,12 @@ struct SparseMatrix if constexpr (std::is_same_v) { CUSOLVER_ERROR(cusolverSpDcsrlsvluHost(handle, - rowsA, - nnzA, - descrA, - d_csrValA, - d_csrRowPtrA, - d_csrColIndA, + rows(), + non_zeros(), + m_descr, + m_h_val, + m_h_row_ptr, + m_h_col_idx, d_b, tol, reorder_to_int(reorder), @@ -1255,12 +1307,12 @@ struct SparseMatrix } if constexpr (std::is_same_v) { CUSOLVER_ERROR(cusolverSpZcsrlsvluHost(handle, - rowsA, - nnzA, - descrA, - d_csrValA, - d_csrRowPtrA, - d_csrColIndA, + rows(), + non_zeros(), + m_descr, + m_h_val, + m_h_row_ptr, + m_h_col_idx, d_b, tol, reorder_to_int(reorder), diff --git a/tests/RXMesh_test/test_sparse_matrix.cuh b/tests/RXMesh_test/test_sparse_matrix.cuh index 2e4695b8..25fdc2df 100644 --- a/tests/RXMesh_test/test_sparse_matrix.cuh +++ b/tests/RXMesh_test/test_sparse_matrix.cuh @@ -362,15 +362,15 @@ TEST(RXMeshStatic, SparseMatrixLowerLevelAPISolve) launch_box.smem_bytes_dyn>>>( rx.get_context(), *coords, A_mat, X_mat, B_mat, time_step); - // A_mat.solve(B_mat, X_mat, Solver::CHOL, Reorder::NSTDIS); + // A_mat.solve(B_mat, X_mat, Solver::CHOL, PermuteMethod::NSTDIS); - A_mat.solver_permute_alloc(PermuteMethod::NSTDIS); + A_mat.permute_alloc(PermuteMethod::NSTDIS); A_mat.permute(PermuteMethod::NSTDIS); A_mat.analyze_pattern(); A_mat.post_analyze_alloc(); A_mat.factorize(); A_mat.solve(B_mat, X_mat); - + A_mat.multiply(X_mat, ret_mat); std::vector> h_ret_mat(num_vertices); From a36fd8aa32099458379af0e594c1d0452aa7d3ab Mon Sep 17 00:00:00 2001 From: ahmed Date: Thu, 18 Jul 2024 15:44:10 -0400 Subject: [PATCH 31/96] sparse matrix types --- include/rxmesh/matrix/sparse_matrix.cuh | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/include/rxmesh/matrix/sparse_matrix.cuh b/include/rxmesh/matrix/sparse_matrix.cuh index 30390134..2b846dd7 100644 --- a/include/rxmesh/matrix/sparse_matrix.cuh +++ b/include/rxmesh/matrix/sparse_matrix.cuh @@ -1345,6 +1345,22 @@ struct SparseMatrix return CUDA_C_32F; } else if (std::is_same_v) { return CUDA_C_64F; + } else if (std::is_same_v) { + return CUDA_R_8I; + } else if (std::is_same_v) { + return CUDA_R_8U; + } else if (std::is_same_v) { + return CUDA_R_16I; + } else if (std::is_same_v) { + return CUDA_R_16U; + } else if (std::is_same_v || std::is_same_v) { + return CUDA_R_32I; + } else if (std::is_same_v) { + return CUDA_R_32U; + } else if (std::is_same_v) { + return CUDA_R_64I; + } else if (std::is_same_v) { + return CUDA_R_64U; } else { RXMESH_ERROR( "SparseMatrix unsupported type. SparseMatrix can support " From b6a9e721c9b5f77a1ea697eba4c64bff0b063f8f Mon Sep 17 00:00:00 2001 From: ahmed Date: Fri, 19 Jul 2024 10:55:27 -0400 Subject: [PATCH 32/96] identify boundary vertices --- include/rxmesh/attribute.h | 16 ++++---- include/rxmesh/kernels/boundary.cuh | 46 ++++++++++++++++++++++ include/rxmesh/matrix/dense_matrix.cuh | 10 +++++ include/rxmesh/matrix/sparse_matrix.cuh | 4 ++ include/rxmesh/rxmesh_static.h | 51 +++++++++++++++++++++++++ tests/RXMesh_test/CMakeLists.txt | 1 + tests/RXMesh_test/rxmesh_test_main.cu | 1 + tests/RXMesh_test/test_boundary.cuh | 36 +++++++++++++++++ 8 files changed, 156 insertions(+), 9 deletions(-) create mode 100644 include/rxmesh/kernels/boundary.cuh create mode 100644 tests/RXMesh_test/test_boundary.cuh diff --git a/include/rxmesh/attribute.h b/include/rxmesh/attribute.h index 1d5b89b4..5d7c1de4 100644 --- a/include/rxmesh/attribute.h +++ b/include/rxmesh/attribute.h @@ -7,7 +7,6 @@ #include "rxmesh/kernels/attribute.cuh" #include "rxmesh/kernels/collective.cuh" #include "rxmesh/kernels/util.cuh" -#include "rxmesh/matrix/dense_matrix.cuh" #include "rxmesh/patch_info.h" #include "rxmesh/rxmesh.h" #include "rxmesh/types.h" @@ -15,6 +14,8 @@ #include "rxmesh/util/log.h" #include "rxmesh/util/util.h" +#include "rxmesh/matrix/dense_matrix.cuh" + #define GLM_ENABLE_EXPERIMENTAL #include #include @@ -385,9 +386,7 @@ class Attribute : public AttributeBase */ void reset(const T value, locationT location, cudaStream_t stream = NULL) { - if ((location & DEVICE) == DEVICE) { - - assert((m_allocated & DEVICE) == DEVICE); + if (((location & DEVICE) == DEVICE) && is_host_allocated()) { const int threads = 256; detail::template memset_attribute @@ -399,8 +398,8 @@ class Attribute : public AttributeBase } - if ((location & HOST) == HOST) { - assert((m_allocated & HOST) == HOST); + if (((location & HOST) == HOST) && is_host_allocated()) { + #pragma omp parallel for for (int p = 0; p < static_cast(m_rxmesh->get_num_patches()); ++p) { @@ -480,7 +479,7 @@ class Attribute : public AttributeBase */ void release(locationT location = LOCATION_ALL) { - if (((location & HOST) == HOST) && ((m_allocated & HOST) == HOST)) { + if (((location & HOST) == HOST) && is_host_allocated()) { for (uint32_t p = 0; p < m_rxmesh->get_max_num_patches(); ++p) { free(m_h_attr[p]); } @@ -489,8 +488,7 @@ class Attribute : public AttributeBase m_allocated = m_allocated & (~HOST); } - if (((location & DEVICE) == DEVICE) && - ((m_allocated & DEVICE) == DEVICE)) { + if (((location & DEVICE) == DEVICE) && is_device_allocated()) { for (uint32_t p = 0; p < m_rxmesh->get_max_num_patches(); ++p) { GPU_FREE(m_h_ptr_on_device[p]); } diff --git a/include/rxmesh/kernels/boundary.cuh b/include/rxmesh/kernels/boundary.cuh new file mode 100644 index 00000000..44af7fce --- /dev/null +++ b/include/rxmesh/kernels/boundary.cuh @@ -0,0 +1,46 @@ +#pragma once +#include "rxmesh/attribute.h" +#include "rxmesh/context.h" + +#include "rxmesh/bitmask.cuh" +#include "rxmesh/query.cuh" + +namespace rxmesh { + +namespace detail { +template +__global__ void identify_boundary_vertices(const Context context, + VertexAttribute boundary_v) +{ + auto block = cooperative_groups::this_thread_block(); + + Query query(context); + + ShmemAllocator shrd_alloc; + + Bitmask bd_e(query.get_patch_info().num_edges[0], shrd_alloc); + + auto boundary_edges = [&](EdgeHandle& e_id, const FaceIterator& iter) { + if (iter.size() < 2) { + bd_e.set(e_id.local_id(), true); + } + }; + + query.dispatch(block, shrd_alloc, boundary_edges); + + block.sync(); + + + auto boundary_vertices = [&](EdgeHandle& e_id, const VertexIterator& iter) { + if (bd_e(e_id.local_id())) { + boundary_v(iter[0], 0) = T(1); + boundary_v(iter[1], 0) = T(1); + } + }; + + query.dispatch(block, shrd_alloc, boundary_vertices); +} +} // namespace detail + + +} // namespace rxmesh \ No newline at end of file diff --git a/include/rxmesh/matrix/dense_matrix.cuh b/include/rxmesh/matrix/dense_matrix.cuh index 85ca6140..ba4d6e80 100644 --- a/include/rxmesh/matrix/dense_matrix.cuh +++ b/include/rxmesh/matrix/dense_matrix.cuh @@ -90,6 +90,16 @@ struct DenseMatrix return m_num_cols; } + /** + * @brief set all entries in the matrix to zeros on both host and device + */ + __host__ void set_zeros() + { + std::memset(m_h_val, 0, bytes()); + + CUDA_ERROR(cudaMemset(m_d_val, 0, bytes())); + } + /** * @brief accessing a specific value in the matrix using the row and col * index. Can be used on both host and device diff --git a/include/rxmesh/matrix/sparse_matrix.cuh b/include/rxmesh/matrix/sparse_matrix.cuh index 2b846dd7..72a96046 100644 --- a/include/rxmesh/matrix/sparse_matrix.cuh +++ b/include/rxmesh/matrix/sparse_matrix.cuh @@ -4,6 +4,8 @@ #include "rxmesh/attribute.h" #include "rxmesh/context.h" +#include "rxmesh/launch_box.h" + #include "thrust/device_ptr.h" #include "thrust/execution_policy.h" #include "thrust/gather.h" @@ -14,6 +16,8 @@ #include "rxmesh/matrix/sparse_matrix_kernels.cuh" +#include "rxmesh/launch_box.h" + namespace rxmesh { /** diff --git a/include/rxmesh/rxmesh_static.h b/include/rxmesh/rxmesh_static.h index 0c5fca49..afe5433a 100644 --- a/include/rxmesh/rxmesh_static.h +++ b/include/rxmesh/rxmesh_static.h @@ -17,6 +17,9 @@ #include "rxmesh/util/import_obj.h" #include "rxmesh/util/log.h" #include "rxmesh/util/timer.h" + +#include "rxmesh/kernels/boundary.cuh" + #if USE_POLYSCOPE #include "polyscope/surface_mesh.h" #endif @@ -1006,6 +1009,54 @@ class RXMeshStatic : public RXMesh m_attr_container->remove(name.c_str()); } + /** + * @brief populate boundary_v with 1 if the vertex is a boundary vertex and + * 0 otherwise. Only the first attribute (i.e., boundary_v(vh, 0)) will be + * populated. Possible types of T is bool or int (and maybe float). The + * results will be first calculated on device and then move to the host is + * boundary_v is allocated on the host. + */ + template + void get_boundary_vertices(VertexAttribute& boundary_v, + bool move_to_host = true, + cudaStream_t stream = NULL) const + { + if (!boundary_v.is_device_allocated()) { + RXMESH_ERROR( + "RXMeshStatic::get_boundary_vertices the input/output " + "VertexAttribute (i.e., boundary_v) should be allocated on " + "device since the boundary vertices are identified first on " + "the device (before optionally moving them to the host). " + "Returning without calculating the boundary vertices!"); + return; + } + + boundary_v.reset(0, LOCATION_ALL); + + constexpr uint32_t blockThreads = 256; + + LaunchBox lb; + + prepare_launch_box( + {Op::EF, Op::EV}, + lb, + (void*)detail::identify_boundary_vertices, + false, + false, + false, + [&](uint32_t v, uint32_t e, uint32_t f) { + return detail::mask_num_bytes(e) + + ShmemAllocator::default_alignment; + }); + + detail::identify_boundary_vertices + <<>>( + get_context(), boundary_v); + + if (move_to_host && boundary_v.is_host_allocated()) { + boundary_v.move(DEVICE, HOST, stream); + } + } /** * @brief return a shared pointer the input vertex position diff --git a/tests/RXMesh_test/CMakeLists.txt b/tests/RXMesh_test/CMakeLists.txt index 52e24db8..fdd88c66 100644 --- a/tests/RXMesh_test/CMakeLists.txt +++ b/tests/RXMesh_test/CMakeLists.txt @@ -23,6 +23,7 @@ set( SOURCE_LIST test_multi_queries.cuh test_wasted_work.cuh test_eigen.cuh + test_boundary.cuh ) target_sources( RXMesh_test diff --git a/tests/RXMesh_test/rxmesh_test_main.cu b/tests/RXMesh_test/rxmesh_test_main.cu index cc86bf55..cf46b521 100644 --- a/tests/RXMesh_test/rxmesh_test_main.cu +++ b/tests/RXMesh_test/rxmesh_test_main.cu @@ -32,6 +32,7 @@ struct RXMeshTestArg #include "test_multi_queries.cuh" #include "test_wasted_work.cuh" #include "test_eigen.cuh" +#include "test_boundary.cuh" // clang-format on int main(int argc, char** argv) diff --git a/tests/RXMesh_test/test_boundary.cuh b/tests/RXMesh_test/test_boundary.cuh new file mode 100644 index 00000000..c69bb89e --- /dev/null +++ b/tests/RXMesh_test/test_boundary.cuh @@ -0,0 +1,36 @@ +#include "gtest/gtest.h" + +#include "rxmesh/rxmesh_static.h" + +TEST(RXMeshStatic, BoundaryVertex) +{ + using namespace rxmesh; + + cuda_query(rxmesh_args.device_id); + + RXMeshStatic rx(STRINGIFY(INPUT_DIR) "bunnyhead.obj"); + + auto v_bd = *rx.add_vertex_attribute("vBoundary", 1); + + rx.get_boundary_vertices(v_bd); + + uint32_t num_bd_vertices = 0; + + rx.for_each_vertex( + HOST, + [&](const VertexHandle& vh) { + if (v_bd(vh)) { + num_bd_vertices++; + } + }, + NULL, + false); + + EXPECT_EQ(num_bd_vertices, 98); + + // auto ps = rx.get_polyscope_mesh(); + // ps->addVertexScalarQuantity("vBoundary", *v_bd); + // polyscope::show(); + + EXPECT_EQ(cudaDeviceSynchronize(), cudaSuccess); +} \ No newline at end of file From f05145e255de9886aabf6e84363dec13367b6743 Mon Sep 17 00:00:00 2001 From: ahmed Date: Fri, 19 Jul 2024 10:55:41 -0400 Subject: [PATCH 33/96] bunny head --- .gitignore | 1 + input/bunnyhead.obj | 4566 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 4567 insertions(+) create mode 100644 input/bunnyhead.obj diff --git a/.gitignore b/.gitignore index cff3e42b..52c3f813 100644 --- a/.gitignore +++ b/.gitignore @@ -10,6 +10,7 @@ input/* !input/cloth.obj !input/plane_5.obj !input/sphere1.obj +!input/bunnyhead.obj build/ include/rxmesh/util/git_sha1.cpp .vscode/ diff --git a/input/bunnyhead.obj b/input/bunnyhead.obj new file mode 100644 index 00000000..8370a3ce --- /dev/null +++ b/input/bunnyhead.obj @@ -0,0 +1,4566 @@ +#### +# +# OBJ File Generated by Meshlab +# +#### +# Object bunnyhead.obj +# +# Vertices: 1550 +# Faces: 3000 +# +#### +v -0.135664 -0.074190 0.522640 +v -0.261942 0.060498 -0.237245 +v -0.246716 0.066792 -0.301494 +v -0.218417 0.169934 0.113615 +v -0.452224 -0.418911 0.264231 +v -0.215609 0.173376 0.151984 +v -0.094483 0.173554 0.287146 +v -0.457597 0.069166 0.159663 +v -0.205472 0.272751 -0.153300 +v -0.110833 0.177003 0.241123 +v -0.220588 0.380744 -0.424053 +v -0.359331 0.169205 0.245717 +v -0.336963 0.174853 0.294162 +v -0.262853 0.174240 0.139340 +v -0.193063 0.162031 0.040476 +v -0.230045 0.455395 -0.575286 +v -0.143980 0.459410 -0.696865 +v -0.210359 0.015281 -0.114105 +v 0.396133 0.238983 -0.121771 +v -0.054217 0.162263 0.267555 +v 0.005192 -0.108565 0.433182 +v -0.077091 -0.181218 0.555384 +v -0.028913 0.132026 0.335261 +v -0.045900 0.143821 0.377995 +v -0.067072 0.147775 0.413401 +v -0.066661 0.125623 0.449246 +v -0.428380 0.025147 0.131401 +v 0.458611 0.570073 -0.237410 +v 0.055879 0.278073 0.121910 +v -0.192221 0.043657 -0.204300 +v -0.275609 0.344825 -0.276322 +v -0.285775 0.401836 -0.432668 +v -0.506451 -0.353073 0.175130 +v -0.173959 0.215336 -0.110253 +v -0.245974 0.354139 -0.298264 +v 0.352839 0.411223 -0.107777 +v -0.311785 0.162186 0.100277 +v -0.289191 0.169463 0.112778 +v -0.511184 -0.006604 0.268786 +v 0.290082 0.495160 -0.081808 +v 0.213435 -0.240378 -0.040125 +v -0.451182 -0.449950 0.212359 +v -0.492726 0.024540 0.227583 +v 0.187207 0.429800 -0.030991 +v -0.299159 0.308114 -0.222621 +v 0.073513 0.345532 -0.008302 +v -0.303586 0.480106 -0.608543 +v -0.290526 0.482221 -0.590852 +v -0.256115 0.339728 -0.257885 +v 0.106976 0.324916 0.094797 +v -0.260159 0.481309 -0.591794 +v 0.430051 0.549874 -0.170630 +v -0.012487 0.125005 0.258952 +v -0.497903 0.105098 0.244664 +v 0.167634 0.409414 0.005884 +v -0.171098 -0.304830 0.679712 +v 0.273364 -0.209903 0.245266 +v 0.279452 -0.208197 0.213033 +v -0.151090 0.147099 -0.065477 +v -0.066987 -0.304219 0.545050 +v -0.190147 0.356908 -0.411574 +v -0.231340 0.216362 -0.025237 +v -0.318705 0.462555 -0.565106 +v -0.535575 -0.314130 0.249474 +v 0.174627 0.108849 0.001790 +v -0.416091 -0.108785 0.035854 +v 0.043805 0.064513 0.171277 +v 0.039566 -0.371579 0.504988 +v -0.463550 0.101882 0.397926 +v -0.241079 0.118100 -0.518820 +v -0.239539 0.499960 -0.637031 +v -0.269415 -0.162064 0.717437 +v -0.528538 -0.238385 0.560171 +v 0.396163 0.209652 -0.025791 +v 0.059334 0.338624 0.019462 +v -0.006723 0.102352 0.275609 +v -0.020945 0.160280 0.212261 +v -0.298059 0.142527 0.473431 +v -0.203281 -0.094898 -0.055907 +v 0.027603 0.302657 0.014609 +v -0.389575 0.144461 0.396984 +v -0.119776 0.110406 0.508161 +v 0.528920 0.324198 -0.173333 +v -0.483120 -0.565944 0.194035 +v 0.060429 -0.147154 0.361472 +v -0.150584 0.323072 -0.474738 +v -0.166412 0.337732 -0.439251 +v -0.439425 0.113866 0.414378 +v -0.483175 -0.009465 0.473392 +v 0.185535 0.401931 0.032655 +v -0.351588 -0.359523 0.007141 +v 0.120006 0.352001 0.070545 +v -0.510017 -0.353089 0.322113 +v -0.236848 -0.392075 0.632857 +v 0.076135 0.087188 0.001715 +v -0.013111 -0.058765 0.041505 +v 0.185855 -0.305889 0.451263 +v -0.015634 -0.016925 0.047596 +v -0.308422 0.437302 -0.511552 +v -0.295086 -0.348302 0.708736 +v -0.260514 -0.368817 0.690433 +v -0.258943 -0.384592 0.666842 +v 0.364491 0.539905 -0.109027 +v -0.279405 -0.403044 0.626865 +v 0.550588 0.367392 -0.254594 +v -0.152987 0.444911 -0.658467 +v -0.432847 -0.501615 0.381238 +v 0.407405 0.280835 -0.162646 +v -0.031285 0.146061 0.275049 +v -0.308908 -0.369823 0.683918 +v -0.272864 0.177056 0.072398 +v -0.304075 -0.389115 0.655308 +v -0.268312 -0.423089 0.553440 +v -0.303640 0.179654 0.248888 +v -0.178788 0.152203 0.053178 +v -0.178530 0.272873 -0.701618 +v 0.001095 -0.342296 0.486836 +v -0.342695 -0.372271 0.658783 +v -0.045769 0.129292 0.409593 +v 0.246066 -0.248651 0.367637 +v -0.328672 -0.404617 0.628894 +v -0.294881 -0.420455 0.576380 +v -0.292686 -0.451443 0.527707 +v 0.239459 -0.224623 0.325781 +v -0.172337 0.174175 -0.048630 +v -0.370639 -0.403468 0.611320 +v -0.343428 -0.415398 0.592498 +v -0.321583 -0.424881 0.559202 +v -0.325543 -0.447299 0.502654 +v -0.461427 -0.481610 0.190244 +v -0.196056 0.345056 -0.350124 +v -0.508411 0.086657 0.264180 +v -0.317825 -0.434782 0.528466 +v -0.287402 0.463384 -0.552819 +v 0.291276 0.214092 -0.108116 +v 0.533393 0.427866 -0.319812 +v -0.212514 0.342518 -0.306659 +v -0.525246 -0.329813 0.334760 +v -0.374965 -0.360698 0.655316 +v -0.357556 -0.391445 0.636573 +v -0.307910 -0.214506 -0.048889 +v -0.398988 -0.366183 0.650247 +v 0.397527 0.220692 -0.093443 +v -0.180650 0.420773 -0.583672 +v -0.294858 0.483763 -0.631814 +v -0.268176 0.498012 -0.633360 +v -0.354185 -0.243553 0.712595 +v -0.369783 -0.172440 0.730220 +v -0.397504 -0.171036 0.715702 +v -0.354317 -0.427996 0.508356 +v -0.139577 0.154003 -0.363381 +v -0.195430 0.106997 -0.435495 +v -0.376631 -0.092299 0.696357 +v -0.379453 -0.142994 0.720336 +v -0.353537 -0.088922 0.704397 +v -0.428808 -0.360706 0.645301 +v -0.416811 -0.389500 0.613695 +v -0.448317 -0.020010 0.141765 +v -0.523799 -0.338025 0.214203 +v -0.443549 -0.433388 0.180196 +v -0.234826 -0.349855 0.705899 +v -0.279359 -0.024903 -0.037713 +v -0.390339 -0.409478 0.480175 +v -0.138728 0.368277 -0.610028 +v -0.365261 -0.424194 0.471550 +v -0.213036 0.225840 -0.646544 +v -0.421863 -0.398687 0.583989 +v -0.396692 -0.411841 0.523091 +v -0.356562 -0.195122 0.731805 +v -0.341435 -0.177032 0.735736 +v -0.350104 -0.137207 0.727268 +v -0.340496 -0.028243 0.667518 +v -0.547693 -0.227564 0.349063 +v -0.273449 0.429900 -0.495968 +v 0.065323 0.125643 -0.019256 +v -0.208075 -0.375713 0.637860 +v -0.010238 -0.054716 0.395389 +v -0.432249 -0.439051 0.122926 +v -0.206584 0.201355 -0.028851 +v -0.439583 -0.394995 0.493603 +v -0.511893 0.051110 0.281396 +v -0.450172 -0.379175 0.604872 +v -0.454976 -0.391283 0.560310 +v -0.460869 -0.390198 0.524777 +v 0.025292 0.232298 0.152394 +v -0.229626 -0.410723 0.559360 +v -0.058632 0.187455 0.180991 +v -0.353336 0.135995 0.457074 +v -0.045972 0.152829 0.329118 +v -0.535270 -0.214065 0.215833 +v 0.106335 -0.165634 0.123215 +v -0.324935 -0.121731 0.723272 +v -0.362878 -0.020522 0.653105 +v 0.029176 0.306524 0.062536 +v -0.363368 0.081090 0.545774 +v -0.452740 -0.383071 0.464919 +v -0.157148 -0.500610 -0.170976 +v -0.085293 0.140918 0.453873 +v -0.283325 0.292422 -0.171240 +v -0.471525 -0.361392 0.431489 +v -0.232464 0.168167 -0.588334 +v -0.247945 0.281474 -0.137141 +v -0.194203 -0.160045 -0.062614 +v -0.464678 -0.364693 0.400598 +v -0.249618 0.065310 0.590210 +v -0.487034 -0.369774 0.494988 +v -0.433543 0.117634 0.152013 +v -0.137825 0.274275 -0.381424 +v -0.272612 0.087628 -0.363814 +v -0.503033 0.098159 0.306387 +v 0.133747 0.079864 0.055101 +v 0.021169 0.028146 0.059990 +v 0.002852 0.244630 0.137799 +v -0.536306 -0.168807 0.349502 +v 0.247118 0.250087 0.032267 +v -0.499527 -0.338537 0.383893 +v -0.486881 -0.359916 0.355586 +v -0.407414 -0.266340 0.003316 +v -0.391498 0.131577 0.432792 +v -0.524252 -0.220014 0.510604 +v -0.490098 -0.370402 0.538993 +v -0.320020 -0.158670 0.733516 +v -0.325528 0.069466 0.581397 +v -0.234842 0.374321 -0.375565 +v -0.359940 -0.387300 0.023523 +v -0.474363 -0.369444 0.591938 +v -0.406604 -0.399610 0.438114 +v 0.189196 -0.285621 0.437463 +v -0.524043 -0.132787 0.326125 +v -0.326400 -0.548815 -0.085623 +v -0.267413 -0.122292 -0.051044 +v -0.290815 -0.413285 -0.056253 +v -0.294399 -0.095349 -0.039360 +v -0.168079 -0.110205 -0.053711 +v -0.163246 -0.449882 -0.141638 +v -0.210535 -0.460016 -0.121677 +v -0.172833 0.192924 -0.634157 +v 0.361372 0.244126 -0.129437 +v -0.305590 -0.258998 0.733273 +v -0.330344 -0.225986 0.725808 +v -0.114972 -0.342789 0.537112 +v -0.299173 -0.130572 0.721241 +v -0.298647 -0.022388 0.665387 +v 0.008455 -0.024548 0.307114 +v -0.487857 -0.088616 0.395198 +v -0.146874 0.049793 -0.106748 +v -0.272100 0.078290 -0.301063 +v 0.203662 0.119941 0.044854 +v 0.271883 0.164896 0.038822 +v -0.485798 -0.555431 0.235704 +v -0.471743 -0.539858 0.153384 +v -0.517994 -0.138074 0.202761 +v -0.499903 -0.132985 0.142694 +v -0.525083 -0.256890 0.586611 +v -0.467094 -0.558652 0.128990 +v -0.473417 -0.067735 0.150067 +v -0.201382 0.406939 -0.530177 +v -0.041636 -0.393538 -0.170636 +v -0.311138 -0.301258 0.725293 +v -0.283618 -0.236291 0.736914 +v -0.290095 -0.181539 0.729141 +v -0.296924 -0.083579 0.700865 +v -0.501898 -0.210609 0.609783 +v -0.136347 0.137719 -0.303710 +v -0.521737 -0.204610 0.447290 +v -0.430219 0.052267 0.131846 +v 0.193433 0.268207 -0.124584 +v -0.261850 -0.300005 0.735027 +v -0.259456 -0.220158 0.733122 +v -0.404856 -0.077298 0.667675 +v -0.290807 0.078015 0.577977 +v -0.263286 -0.329666 0.722814 +v 0.360783 0.458485 -0.209434 +v 0.360669 0.390167 -0.208290 +v 0.341203 0.295762 -0.163666 +v 0.488059 0.542687 -0.294840 +v -0.474235 0.007568 0.203415 +v -0.517176 -0.190375 0.164328 +v 0.267422 -0.211840 0.269907 +v 0.130043 0.205758 0.109707 +v -0.049033 0.166566 0.226648 +v 0.393444 0.358901 -0.208547 +v 0.243521 0.241942 -0.125294 +v 0.022809 0.106121 0.203620 +v 0.411052 0.384689 -0.226762 +v -0.474318 -0.396854 0.247230 +v -0.270579 -0.095007 0.695717 +v -0.156186 0.216080 -0.594268 +v 0.126290 0.135415 0.129042 +v -0.163370 -0.521330 -0.171626 +v -0.269072 0.264690 -0.108781 +v -0.126637 0.196769 -0.324266 +v 0.409992 0.415735 -0.238028 +v 0.428605 0.354353 -0.217817 +v -0.290853 0.371303 -0.546026 +v 0.037307 -0.107474 0.365050 +v 0.051488 -0.101169 0.307165 +v 0.248881 0.466120 -0.043162 +v -0.163967 -0.016373 -0.045550 +v -0.167099 0.042604 -0.153855 +v -0.531628 -0.243285 0.189968 +v 0.497507 0.270144 -0.151347 +v -0.236116 -0.514857 0.559106 +v -0.275089 -0.476660 0.531257 +v -0.265962 -0.445014 0.532601 +v 0.361943 0.281471 -0.156803 +v 0.323715 0.392202 -0.191571 +v -0.271356 -0.272475 0.738750 +v -0.361619 -0.267754 -0.018540 +v 0.110474 -0.331139 0.471451 +v -0.198448 -0.392683 0.570831 +v 0.442946 0.468321 -0.272510 +v 0.456262 0.421063 -0.269545 +v -0.513984 -0.329025 0.165619 +v -0.132587 0.206293 -0.213386 +v 0.496149 0.453630 -0.310795 +v -0.479343 -0.388848 0.281973 +v 0.011167 -0.112925 0.033519 +v -0.155626 0.291823 -0.321526 +v 0.232535 0.443584 -0.005970 +v -0.474675 0.018469 0.479495 +v 0.304765 0.256850 -0.139292 +v -0.464870 -0.557895 0.352963 +v -0.237815 -0.552732 0.575668 +v -0.403376 0.054282 0.089704 +v -0.244182 -0.423223 0.540923 +v -0.220703 -0.298227 0.727488 +v -0.232556 -0.250127 0.734550 +v -0.234973 -0.206666 0.722612 +v 0.513562 0.407233 -0.304256 +v 0.025535 0.077847 0.201175 +v -0.029714 0.061255 0.031203 +v -0.136244 0.368820 -0.739529 +v -0.257970 -0.008797 0.647905 +v -0.157474 0.227057 -0.664415 +v -0.128701 0.229522 -0.389458 +v -0.468706 -0.092267 0.101390 +v -0.172569 0.184346 -0.599707 +v -0.128097 -0.413083 -0.148645 +v -0.236331 -0.451217 0.533116 +v -0.218298 -0.429163 0.535959 +v -0.230450 0.105604 -0.496381 +v -0.209117 0.115153 -0.502753 +v -0.028357 0.018428 0.038317 +v -0.381198 -0.448436 0.426289 +v -0.037001 0.176995 0.200381 +v -0.159729 0.195233 -0.531448 +v -0.444228 -0.461941 0.143120 +v -0.195716 -0.364950 -0.087084 +v -0.218302 -0.535352 0.575409 +v -0.136842 0.293601 -0.530144 +v 0.551631 0.382289 -0.280573 +v 0.034013 0.016461 0.132705 +v 0.048742 0.037851 0.135393 +v 0.544565 0.346901 -0.216590 +v -0.131495 0.339369 -0.627912 +v -0.374196 -0.085303 0.008715 +v -0.144626 0.247406 -0.652531 +v -0.547428 -0.269266 0.337999 +v -0.486485 -0.030125 0.458801 +v -0.399731 0.157008 0.234122 +v -0.142078 0.032079 -0.047800 +v 0.191167 0.424774 -0.075768 +v 0.287194 0.285814 -0.021011 +v -0.391529 0.098610 0.496983 +v -0.195779 -0.521186 0.578636 +v -0.187763 0.165914 -0.600260 +v -0.449688 -0.412334 0.178193 +v -0.046010 0.132883 0.025001 +v -0.346324 0.360516 -0.425253 +v 0.435926 0.227643 -0.092502 +v -0.156245 0.060685 -0.171457 +v -0.157653 0.254424 -0.694657 +v -0.486753 -0.090721 0.457546 +v -0.297029 0.029773 -0.041992 +v -0.433112 -0.475888 0.354653 +v -0.181828 -0.473663 0.560817 +v -0.208742 -0.451421 0.540959 +v -0.310657 0.187802 -0.031338 +v 0.091631 0.138119 0.153003 +v -0.127626 0.329982 -0.728142 +v 0.208330 0.168988 -0.071291 +v -0.175535 0.037858 0.566542 +v 0.007330 -0.019492 0.071561 +v -0.011667 0.141421 0.006310 +v -0.022604 0.100813 0.017542 +v -0.171607 -0.509304 0.582542 +v -0.127682 0.176147 -0.230067 +v 0.241705 0.284876 -0.142512 +v -0.234917 0.146308 -0.565376 +v 0.020664 0.001594 0.228021 +v -0.529184 -0.288098 0.488511 +v 0.264030 -0.244471 0.347258 +v 0.232473 -0.255448 0.387385 +v -0.404057 0.149551 0.195287 +v -0.072000 -0.048098 0.469928 +v 0.487442 0.538695 -0.262410 +v -0.149408 -0.495880 0.581013 +v -0.035561 -0.102764 0.473732 +v 0.390265 0.203035 -0.052499 +v -0.181482 0.160063 -0.563557 +v -0.036487 -0.443697 -0.185939 +v 0.420213 0.459049 -0.163984 +v -0.288394 0.180750 0.279675 +v -0.257091 0.455064 -0.553583 +v -0.475728 -0.038536 0.544413 +v -0.396709 -0.295042 0.007127 +v 0.000882 0.036571 0.043355 +v -0.500975 -0.328322 0.411199 +v -0.174999 0.058422 -0.227651 +v -0.217403 -0.241809 -0.062607 +v -0.417563 0.091509 0.112474 +v -0.145423 0.260525 -0.691961 +v 0.293754 0.447007 -0.022959 +v -0.497839 -0.075579 0.327315 +v 0.057833 0.218403 -0.045283 +v 0.077073 0.175441 -0.048065 +v -0.504172 -0.103550 0.217598 +v -0.517323 -0.206483 0.571837 +v -0.485829 -0.305042 0.099450 +v -0.137200 0.102995 -0.129923 +v -0.299360 -0.291618 -0.045004 +v -0.535567 -0.288411 0.214215 +v -0.248258 -0.297031 -0.055540 +v -0.484839 -0.560575 0.213135 +v 0.541450 0.325489 -0.226232 +v -0.303593 -0.537941 -0.101659 +v -0.128394 -0.482492 0.577049 +v -0.341594 0.162310 0.158864 +v -0.319577 -0.310432 -0.032670 +v -0.204159 -0.326559 -0.062640 +v -0.525961 -0.255588 0.463521 +v -0.505634 -0.022887 0.315937 +v -0.030145 0.115161 0.380661 +v -0.516001 -0.334766 0.556152 +v -0.068663 0.125631 0.035837 +v -0.253394 -0.543970 -0.132533 +v -0.330684 -0.501268 -0.065197 +v -0.104160 -0.470816 0.572963 +v -0.131016 -0.385863 0.526463 +v 0.172552 0.377647 0.056825 +v -0.395801 -0.410438 0.553265 +v -0.222548 -0.522906 -0.139416 +v -0.202728 -0.045961 -0.050554 +v 0.532194 0.374460 -0.213861 +v -0.117670 -0.054532 -0.032460 +v -0.053709 -0.461514 -0.185086 +v -0.202469 0.348872 -0.672249 +v -0.255035 -0.383601 -0.064760 +v -0.355906 -0.142786 -0.015882 +v -0.436354 -0.412147 0.133822 +v -0.090871 -0.431165 0.547585 +v -0.111518 -0.127175 -0.041486 +v -0.087792 -0.375932 0.516073 +v 0.221691 0.129988 -0.000487 +v -0.192766 0.089995 -0.377068 +v -0.081728 0.148748 0.049349 +v -0.116249 -0.512483 -0.186804 +v -0.092506 -0.496912 -0.187549 +v 0.060764 0.212588 0.155623 +v -0.223145 0.181815 0.039640 +v -0.144496 -0.203075 -0.058737 +v -0.127948 -0.290953 -0.070578 +v -0.345546 0.416660 -0.541433 +v -0.352950 -0.326267 -0.008110 +v 0.163921 0.106149 0.077693 +v -0.314855 -0.152975 -0.042360 +v -0.280288 -0.229933 -0.054234 +v -0.500830 -0.052171 0.272358 +v -0.093418 -0.403812 0.528662 +v -0.243082 -0.012056 -0.053804 +v -0.068599 -0.146767 -0.028012 +v -0.227342 -0.106738 -0.056508 +v -0.144216 0.247826 -0.599744 +v -0.166254 -0.409228 -0.124541 +v 0.029004 -0.012369 0.148981 +v -0.250152 0.200438 0.015722 +v 0.204203 0.200080 -0.095583 +v -0.387629 -0.128915 0.005119 +v -0.281760 -0.331205 -0.040180 +v -0.125272 -0.260623 -0.062034 +v -0.072607 -0.413310 -0.170564 +v 0.141890 0.232277 -0.093299 +v -0.088486 -0.455534 0.565235 +v -0.128393 -0.462092 -0.168755 +v -0.489382 0.090248 0.206344 +v 0.087465 0.167538 0.148307 +v 0.132157 0.094714 0.000081 +v 0.321458 0.229703 0.040737 +v -0.362267 -0.048077 0.002590 +v -0.006545 0.069655 0.287990 +v -0.475547 -0.525547 0.189441 +v 0.473152 0.318247 -0.211969 +v -0.027101 -0.141648 -0.010172 +v -0.363231 -0.219470 -0.021079 +v -0.339845 -0.188809 -0.034804 +v -0.211279 -0.298891 -0.060842 +v -0.336486 -0.252477 -0.034026 +v -0.492193 -0.335066 0.124410 +v 0.073616 -0.188848 0.359709 +v -0.251078 -0.337158 -0.050150 +v -0.517682 -0.122201 0.261729 +v -0.207116 -0.398957 -0.097433 +v -0.006664 0.186580 0.000208 +v -0.260749 -0.485609 -0.106981 +v -0.220077 -0.549637 -0.146092 +v 0.220963 -0.265437 0.407182 +v -0.307626 -0.473831 -0.071457 +v -0.112816 0.175712 0.114261 +v -0.142124 0.219895 -0.486118 +v -0.398853 -0.202227 -0.007240 +v -0.019581 -0.425340 -0.186245 +v -0.483618 -0.043418 0.520747 +v -0.068880 -0.449223 0.563006 +v -0.046971 -0.325683 0.516321 +v 0.196435 -0.257649 -0.067544 +v -0.137110 0.014366 -0.030897 +v 0.182351 -0.270383 -0.090471 +v 0.174097 -0.243515 -0.069026 +v 0.166735 -0.280985 -0.103474 +v -0.508167 0.020702 0.338370 +v 0.181399 -0.218746 -0.029771 +v 0.136535 -0.203262 -0.020047 +v 0.046806 -0.070647 0.224021 +v -0.256340 0.075888 -0.346475 +v -0.481320 -0.063359 0.387142 +v 0.157942 -0.297529 -0.115931 +v 0.034020 -0.025251 0.197509 +v -0.256037 0.371415 -0.346384 +v 0.142596 -0.317269 -0.131942 +v -0.531740 -0.211836 0.408137 +v 0.120753 -0.214587 -0.049311 +v -0.123331 0.372974 -0.692329 +v 0.135743 -0.342564 -0.147125 +v -0.120336 0.392110 -0.726539 +v 0.101429 -0.327301 -0.144340 +v 0.116400 -0.233719 -0.075354 +v -0.044726 -0.437787 0.556115 +v 0.148288 -0.193658 0.014848 +v -0.057542 0.053337 0.030848 +v 0.109438 -0.349303 -0.151670 +v 0.142788 -0.269609 -0.102988 +v 0.097296 -0.251355 -0.093679 +v 0.083619 -0.357382 -0.155467 +v 0.103378 -0.299787 -0.130224 +v 0.079500 -0.185462 0.006638 +v 0.061534 -0.158318 0.068283 +v 0.059800 -0.368317 -0.158816 +v 0.077695 -0.283781 -0.121193 +v -0.158433 0.079568 -0.251649 +v 0.012136 0.293425 0.039627 +v -0.136294 0.116002 -0.170283 +v 0.071747 -0.195058 -0.020971 +v 0.325943 0.265559 -0.011377 +v 0.050775 -0.136564 0.079093 +v 0.409391 0.489428 -0.244682 +v 0.062416 -0.312304 -0.139235 +v 0.042854 -0.174975 0.019786 +v -0.195122 0.495283 -0.673990 +v 0.113159 0.286815 -0.078958 +v 0.067389 -0.221399 -0.061567 +v 0.027816 -0.197075 -0.018891 +v 0.003598 -0.189701 -0.010911 +v 0.028513 -0.152348 0.033770 +v -0.040020 0.184364 0.026442 +v -0.395052 -0.340243 0.024020 +v 0.019051 -0.393425 -0.178019 +v 0.038877 -0.380029 -0.168280 +v 0.045687 -0.333836 -0.145931 +v -0.000702 -0.408482 -0.183862 +v -0.509621 -0.172466 0.538505 +v -0.014101 -0.435184 0.552431 +v 0.040124 -0.260800 -0.100071 +v -0.238088 0.080709 -0.392754 +v 0.018186 -0.305762 -0.130100 +v 0.026721 -0.231268 -0.064801 +v -0.005907 -0.219037 -0.039737 +v -0.514877 -0.296807 0.154108 +v 0.475922 0.247220 -0.110916 +v 0.477311 0.407259 -0.278207 +v 0.007860 -0.257330 -0.088054 +v -0.003785 -0.291039 -0.114630 +v -0.004308 -0.152824 0.002111 +v -0.051326 -0.350145 -0.143699 +v -0.024439 -0.207757 -0.027541 +v -0.335346 -0.073586 -0.016419 +v -0.009646 -0.100048 0.017739 +v -0.081631 -0.327217 -0.117839 +v -0.045325 -0.270992 -0.080324 +v -0.036356 -0.094735 0.003243 +v -0.028610 -0.371017 0.507512 +v -0.085334 0.047600 0.025696 +v -0.114010 0.036199 0.004723 +v -0.099379 -0.374244 -0.142343 +v -0.069396 -0.298520 -0.098532 +v -0.063056 -0.255965 -0.060474 +v -0.324239 0.151779 0.055449 +v -0.071877 -0.102497 -0.020883 +v 0.114861 -0.178239 0.055643 +v -0.232082 0.503724 -0.663553 +v -0.454704 -0.468698 0.276583 +v -0.087825 0.187963 0.156664 +v -0.137277 -0.325325 -0.090414 +v -0.147503 -0.364296 -0.113508 +v -0.401364 0.113664 0.104105 +v -0.077385 -0.020576 -0.002459 +v -0.150623 0.433665 -0.736392 +v -0.175406 0.432240 -0.726508 +v -0.114059 -0.018780 -0.018040 +v -0.089048 0.024345 0.018992 +v 0.132083 0.204174 -0.079798 +v -0.119203 0.069732 0.013391 +v -0.094998 0.163736 0.067021 +v -0.003279 -0.412371 0.535879 +v -0.450888 -0.385281 0.361552 +v -0.131257 0.189807 -0.370748 +v -0.192902 0.178635 -0.623901 +v -0.101857 0.117465 0.046305 +v -0.020729 0.206744 0.168826 +v 0.102611 0.063836 0.089556 +v -0.131578 0.154175 -0.171012 +v 0.354299 0.537799 -0.131431 +v -0.411591 -0.432958 0.379375 +v -0.472529 -0.042758 0.188366 +v -0.255951 0.004162 -0.064333 +v -0.126702 0.123254 0.048993 +v -0.174322 0.076494 0.550512 +v -0.477175 -0.357045 0.124612 +v -0.322890 -0.027832 -0.021133 +v -0.406098 -0.355162 0.041200 +v -0.478074 -0.378103 0.161890 +v -0.147784 0.161188 0.100055 +v 0.011421 -0.386950 0.518226 +v 0.006243 -0.319288 0.475905 +v -0.519040 -0.312589 0.459196 +v 0.385078 0.442896 -0.226895 +v -0.519518 -0.162733 0.183426 +v 0.031997 0.178001 0.179178 +v 0.025680 -0.079023 0.083848 +v 0.269574 -0.205657 0.183117 +v -0.218940 0.092552 -0.432521 +v -0.150761 0.398614 -0.738951 +v -0.253750 0.169678 0.405777 +v -0.160404 0.361281 -0.529855 +v -0.496137 -0.016609 0.352596 +v 0.087083 -0.344358 0.480257 +v -0.449930 -0.447601 0.256807 +v 0.022620 0.204316 0.170109 +v -0.328952 0.295031 -0.263616 +v -0.144750 0.236149 -0.542681 +v 0.247235 0.466462 -0.076562 +v -0.513595 -0.171790 0.463181 +v -0.201675 0.440414 -0.706142 +v 0.018168 -0.358534 0.495327 +v -0.116368 -0.362174 0.524624 +v -0.270817 0.095781 -0.412198 +v -0.480240 -0.569057 0.300839 +v -0.479396 0.111977 0.343930 +v -0.482158 -0.547655 0.284345 +v 0.522844 0.310172 -0.219243 +v -0.513304 0.029944 0.303043 +v -0.246564 0.092049 -0.449282 +v -0.158639 0.214521 -0.639317 +v -0.234311 0.331980 -0.252450 +v -0.026313 0.062987 0.366766 +v 0.524152 0.380347 -0.292297 +v 0.533757 0.357275 -0.274409 +v -0.208735 0.149433 -0.587893 +v 0.302533 0.294758 -0.157834 +v 0.067635 0.250551 0.137673 +v -0.498479 0.069680 0.218187 +v 0.419932 0.568066 -0.215557 +v -0.262869 0.310413 -0.196048 +v 0.000274 -0.367801 -0.162991 +v -0.325671 0.223890 -0.133321 +v -0.429173 -0.368191 0.077735 +v -0.420869 -0.395555 0.089114 +v -0.129310 0.051183 -0.012368 +v -0.331415 0.353137 -0.376785 +v -0.408322 0.013174 0.090250 +v -0.184061 0.125223 -0.454525 +v -0.479830 -0.224347 0.090878 +v 0.093801 0.200404 0.132632 +v -0.410495 -0.447479 0.065856 +v -0.478442 0.050246 0.185083 +v -0.452652 -0.572644 0.084406 +v -0.459280 -0.014268 0.556605 +v -0.143412 0.247831 -0.248006 +v -0.144013 0.105787 -0.260448 +v -0.363896 0.129874 0.083222 +v -0.376930 0.087122 0.063317 +v -0.419888 -0.043258 0.066655 +v -0.033646 0.037659 0.405226 +v -0.455287 -0.139581 0.066538 +v -0.472406 -0.264409 0.067079 +v -0.394637 -0.394763 0.055135 +v -0.410527 -0.419982 0.080252 +v -0.431457 -0.489710 0.080205 +v -0.436724 -0.535909 0.066514 +v 0.032430 0.044854 0.174920 +v -0.441594 -0.568022 0.061687 +v 0.088547 0.237765 0.134928 +v -0.036306 0.235847 0.045751 +v -0.383070 -0.020652 0.037929 +v 0.057250 -0.279291 0.419381 +v 0.547177 0.400054 -0.263366 +v -0.440122 -0.093224 0.067347 +v -0.354414 0.118532 0.054716 +v 0.241647 0.435102 0.007400 +v 0.108667 0.234355 0.121362 +v -0.296030 0.035448 0.621492 +v 0.104981 0.292642 0.112783 +v -0.396258 -0.053273 0.042451 +v -0.441547 -0.179825 0.029660 +v -0.461770 -0.200075 0.051612 +v -0.450452 -0.236070 0.029832 +v -0.457076 -0.280887 0.045437 +v -0.385209 -0.422008 0.036848 +v -0.428919 -0.560233 0.038686 +v 0.364614 0.191705 -0.019471 +v 0.252371 0.398141 -0.148287 +v -0.289637 0.092017 -0.292767 +v -0.328360 0.141131 0.034460 +v -0.139263 0.293451 -0.469280 +v 0.119205 -0.276603 0.415380 +v 0.113440 -0.233390 0.352147 +v -0.425287 -0.183994 0.012011 +v -0.436312 -0.272124 0.019735 +v -0.271693 0.110431 -0.441407 +v -0.002418 0.005302 0.055951 +v 0.009480 0.249114 -0.000672 +v -0.337082 0.115438 0.023518 +v -0.373308 0.022665 0.035720 +v -0.367820 -0.015110 0.015045 +v -0.166574 -0.104305 0.579021 +v -0.343051 -0.404422 0.001652 +v -0.373999 -0.440900 0.008117 +v 0.047704 0.312661 -0.008416 +v -0.331925 0.074744 -0.005605 +v -0.343416 0.067804 0.015801 +v -0.063880 -0.078830 -0.009620 +v -0.410260 -0.492586 0.029501 +v -0.417953 -0.549402 0.014977 +v -0.408894 -0.561015 -0.003087 +v 0.096498 0.367503 -0.008881 +v -0.328878 0.119152 -0.018562 +v -0.322523 0.028518 -0.011139 +v 0.134047 -0.319927 0.464245 +v -0.125337 0.413538 -0.706045 +v -0.043352 -0.021076 0.021992 +v 0.278138 0.155110 -0.010986 +v 0.150041 0.305553 0.094971 +v 0.044561 -0.067802 0.187481 +v 0.540690 0.339109 -0.251278 +v 0.250389 0.143257 0.023224 +v -0.388458 -0.510824 -0.010971 +v 0.431224 0.373913 -0.148968 +v 0.065985 0.312221 -0.030671 +v -0.315175 0.059458 -0.040517 +v 0.147063 -0.292549 0.440388 +v 0.165085 -0.259476 0.410162 +v -0.133934 0.303568 -0.617728 +v -0.324539 0.148020 -0.010725 +v 0.057222 0.045318 0.050305 +v -0.427236 -0.018218 0.090276 +v 0.332302 0.186047 0.013101 +v -0.525998 -0.250007 0.425320 +v -0.267005 0.038322 -0.155497 +v -0.343202 -0.438306 -0.022823 +v -0.456540 0.095813 0.167174 +v -0.393275 -0.559160 -0.021679 +v 0.159148 -0.312050 0.456423 +v 0.211255 -0.242487 0.377658 +v 0.103335 0.353689 -0.044663 +v -0.321118 0.086820 -0.045152 +v -0.292381 0.007099 -0.036298 +v -0.541220 -0.249583 0.377138 +v -0.191741 0.133729 -0.527177 +v -0.446200 0.136836 0.224356 +v -0.537769 -0.302390 0.342053 +v 0.120480 0.379170 -0.038307 +v 0.201285 0.144339 -0.045263 +v 0.044216 -0.104658 0.108214 +v -0.297228 -0.383532 -0.036324 +v -0.311293 -0.369353 -0.019720 +v -0.440575 -0.021384 0.114009 +v 0.118859 0.342711 -0.067410 +v -0.321946 0.148222 -0.082100 +v 0.037270 0.242673 -0.028271 +v 0.329986 0.482441 -0.055907 +v -0.506629 -0.158932 0.494215 +v -0.357748 -0.534047 -0.050961 +v -0.373029 -0.560324 -0.041740 +v 0.206425 -0.275881 0.425583 +v 0.159311 0.412588 -0.033522 +v -0.335990 -0.268491 0.717035 +v -0.456996 -0.322246 0.655014 +v -0.167028 -0.363085 0.582585 +v -0.308836 0.070349 -0.081161 +v -0.290827 0.045818 -0.086705 +v -0.349804 -0.561840 -0.065159 +v -0.489978 -0.271970 0.105031 +v -0.382776 0.051059 0.568200 +v -0.460241 -0.007802 0.178892 +v -0.489751 -0.086849 0.523598 +v 0.473968 0.513775 -0.300033 +v 0.496869 0.519400 -0.313354 +v -0.313895 0.108600 -0.061512 +v -0.310380 0.126067 -0.089976 +v -0.311817 0.097533 -0.122700 +v -0.300859 0.074540 -0.128125 +v 0.195029 0.364211 0.060286 +v 0.266420 0.462706 -0.119352 +v 0.292173 0.458339 -0.151275 +v -0.324261 0.160074 -0.123483 +v 0.315130 0.205265 -0.094491 +v 0.241419 -0.239459 -0.012760 +v -0.221699 0.126081 -0.540198 +v -0.199561 0.138613 -0.559376 +v 0.224614 0.407010 0.032258 +v -0.205044 -0.019522 -0.054259 +v 0.459425 0.550921 -0.214958 +v -0.307711 0.120251 -0.136066 +v -0.422318 0.005424 0.582686 +v 0.094755 0.080160 0.008769 +v 0.247554 0.398121 0.016867 +v -0.331688 0.189266 -0.135764 +v -0.282542 0.063129 -0.171530 +v -0.448976 -0.382379 0.116172 +v -0.479067 0.116334 0.221808 +v 0.266416 0.435718 -0.000660 +v -0.149007 0.309265 -0.723357 +v -0.522143 -0.326011 0.516416 +v 0.338281 0.466792 -0.189083 +v -0.287019 0.111401 -0.365559 +v -0.335641 0.204894 -0.199499 +v -0.138484 0.271633 -0.560514 +v -0.320507 0.191696 -0.073570 +v 0.529924 0.302769 -0.180991 +v -0.309039 0.108837 -0.191063 +v -0.297415 0.090966 -0.208331 +v -0.211145 0.073725 0.571587 +v -0.193448 0.224360 -0.669072 +v 0.520948 0.459113 -0.327725 +v -0.177612 0.207148 -0.657697 +v 0.373281 0.535716 -0.181518 +v -0.323252 0.170041 -0.166935 +v -0.300112 0.136975 -0.200647 +v -0.184087 0.008597 -0.087151 +v -0.168418 0.334255 -0.720995 +v 0.353900 0.508515 -0.179951 +v 0.414863 0.545810 -0.231594 +v -0.386572 -0.421723 0.424384 +v -0.543941 -0.247703 0.247759 +v -0.323564 0.206474 -0.241404 +v -0.414296 0.148523 0.340256 +v -0.304698 0.120899 -0.230790 +v -0.496784 -0.012652 0.386924 +v -0.150303 -0.371492 0.542154 +v -0.486896 -0.027910 0.420164 +v 0.033864 -0.048726 0.144974 +v 0.443574 0.320482 -0.201769 +v 0.296933 0.489267 -0.040530 +v -0.130256 0.288734 -0.695458 +v 0.022070 -0.066149 0.326257 +v 0.454773 0.281872 -0.170472 +v -0.335008 0.390165 -0.456810 +v 0.437934 0.561131 -0.246419 +v -0.496579 -0.188931 0.124069 +v -0.536091 -0.187568 0.240064 +v -0.290463 0.150252 -0.252432 +v -0.333869 0.266844 -0.237513 +v -0.340044 0.240295 -0.259681 +v -0.288310 0.178830 -0.263009 +v -0.294751 0.132697 -0.278071 +v -0.158140 0.121670 0.512545 +v -0.282529 0.081501 -0.250898 +v -0.524560 -0.287169 0.398320 +v -0.017900 0.093810 0.339999 +v -0.532516 -0.269577 0.535253 +v -0.514149 -0.244052 0.148438 +v -0.341758 0.272927 -0.284789 +v -0.335612 0.243671 -0.288821 +v -0.423820 -0.412474 0.365806 +v 0.220039 0.313418 -0.141715 +v -0.162591 0.137080 -0.414971 +v -0.246014 0.018124 -0.115475 +v 0.347551 0.511939 -0.077620 +v -0.212218 0.167846 0.081532 +v -0.185100 -0.370418 0.609287 +v -0.280534 0.186441 -0.317535 +v -0.389730 0.161739 0.290665 +v -0.316851 0.282019 -0.203880 +v -0.220982 0.429733 -0.678389 +v 0.211295 0.390874 -0.121205 +v -0.107356 0.161756 0.402255 +v -0.098464 0.151986 0.435573 +v -0.343048 0.266813 -0.307178 +v -0.149209 -0.445933 0.552985 +v -0.291770 0.209465 -0.306718 +v -0.296785 0.118035 -0.301495 +v 0.377686 0.510569 -0.103305 +v -0.440910 -0.432660 0.301502 +v -0.062036 0.192778 0.051692 +v -0.150093 0.463166 -0.725426 +v -0.287059 0.361687 -0.325677 +v -0.476369 -0.533498 0.299161 +v -0.344804 0.297530 -0.322891 +v -0.336990 0.268291 -0.339641 +v -0.274221 0.210560 -0.381160 +v -0.115247 0.154707 0.074313 +v -0.474824 -0.563533 0.326557 +v -0.111985 -0.196714 -0.050098 +v -0.161363 0.160103 0.435686 +v -0.284037 0.144136 -0.330512 +v -0.142470 0.100978 -0.209602 +v -0.479381 -0.554819 0.176064 +v -0.488023 -0.100980 0.134201 +v -0.466873 -0.072830 0.118464 +v -0.146482 0.174124 -0.116058 +v -0.349530 0.296016 -0.356356 +v -0.470928 -0.345961 0.627336 +v -0.339547 0.287881 -0.374929 +v 0.357027 0.205687 0.014294 +v -0.296427 0.252270 -0.373385 +v -0.041159 0.244025 0.069374 +v -0.502223 -0.085113 0.272642 +v -0.482012 -0.061863 0.433781 +v -0.350183 0.384738 -0.483835 +v 0.251553 0.218413 0.074772 +v -0.266683 0.188786 -0.405938 +v -0.276302 0.166312 -0.346889 +v -0.466315 -0.510872 0.309252 +v -0.274892 0.377972 -0.367154 +v -0.332113 0.300707 -0.409353 +v 0.033360 0.056479 0.030300 +v -0.137129 0.098564 -0.086278 +v 0.502249 0.369679 -0.271400 +v 0.494211 0.429592 -0.215076 +v -0.362624 -0.296169 0.689933 +v -0.288653 0.172787 0.378376 +v -0.170981 0.153465 -0.011717 +v -0.271642 0.140954 -0.422327 +v -0.476612 -0.514549 0.254246 +v -0.498746 -0.120706 0.430740 +v -0.284972 0.277473 -0.434476 +v -0.278938 0.241498 -0.400939 +v -0.518981 -0.289113 0.424598 +v -0.071584 0.163689 0.342449 +v -0.350669 0.322840 -0.415214 +v -0.337668 0.344715 -0.473125 +v -0.253755 0.191768 -0.484714 +v -0.263309 0.280874 -0.494055 +v -0.260166 0.242977 -0.472176 +v -0.188940 -0.349148 0.661026 +v -0.294899 0.311412 -0.465385 +v -0.178566 -0.280920 -0.062121 +v -0.253623 0.180433 0.216655 +v -0.258764 0.164436 -0.474825 +v -0.343016 0.380592 -0.514176 +v -0.312696 0.342190 -0.491134 +v -0.251471 0.234318 -0.502507 +v -0.155994 0.260465 -0.229882 +v 0.428277 0.575499 -0.188679 +v -0.490003 0.009966 0.431513 +v 0.089971 0.344133 0.064967 +v -0.242420 0.175691 -0.556726 +v -0.322887 0.388000 -0.542283 +v -0.275164 0.349586 -0.539945 +v -0.324782 -0.072559 0.698788 +v -0.493634 -0.121601 0.525283 +v -0.265532 0.317954 -0.523669 +v -0.239079 0.269613 -0.562136 +v -0.192874 0.030441 -0.153460 +v -0.330902 0.414676 -0.566282 +v -0.245948 0.311080 -0.565927 +v -0.138406 0.060387 -0.057240 +v -0.257316 0.494856 -0.658966 +v -0.234605 0.356090 -0.616323 +v -0.361340 0.107623 0.506739 +v -0.226092 0.331281 -0.619255 +v -0.239377 0.217779 -0.550242 +v -0.214290 0.168904 -0.609879 +v -0.308398 0.460464 -0.622723 +v -0.288950 0.449430 -0.629116 +v -0.220371 0.298209 -0.623207 +v -0.218543 0.261814 -0.626661 +v -0.064333 -0.222424 -0.041179 +v -0.277897 0.426142 -0.617428 +v 0.460999 0.544447 -0.280049 +v -0.260276 0.399827 -0.610559 +v -0.385251 -0.205717 0.719032 +v -0.275379 0.476038 -0.651321 +v -0.245605 0.375538 -0.610365 +v -0.201435 0.312109 -0.669914 +v 0.144358 0.136160 -0.042243 +v -0.241965 0.467419 -0.678048 +v -0.251835 0.446879 -0.658149 +v -0.233478 0.492656 -0.684434 +v -0.205340 0.394307 -0.682697 +v 0.059040 -0.120324 0.128659 +v -0.420574 0.054377 0.529839 +v -0.255489 0.026455 0.624085 +v -0.512450 -0.363535 0.272717 +v -0.326779 0.325419 -0.311328 +v -0.306242 -0.477061 0.513876 +v -0.384683 -0.325009 0.674170 +v -0.464670 -0.477891 0.232729 +v 0.097554 0.069237 0.112693 +v -0.466140 -0.072798 0.589440 +v -0.475816 -0.166794 0.623948 +v 0.464503 0.519969 -0.216358 +v -0.487772 -0.273583 0.642195 +v -0.227434 0.002843 -0.082009 +v -0.341247 -0.538948 0.499509 +v -0.340013 -0.576660 0.511206 +v -0.221243 -0.365102 0.678948 +v -0.484934 -0.126999 0.583795 +v -0.482024 -0.223974 0.640249 +v -0.494859 -0.378529 0.223051 +v -0.066922 0.089262 0.030247 +v -0.236206 0.029770 -0.164450 +v -0.358674 -0.566334 0.493439 +v -0.541911 -0.299335 0.308665 +v 0.129246 0.185755 0.125451 +v 0.438142 0.529181 -0.264889 +v -0.501338 -0.306883 0.618563 +v -0.167013 0.087647 -0.309612 +v -0.071876 -0.046329 -0.010551 +v -0.508059 -0.332396 0.586055 +v -0.513096 -0.255291 0.611027 +v -0.377359 -0.554846 0.470983 +v -0.136223 0.240750 -0.472927 +v 0.301910 0.181499 -0.063658 +v 0.056857 0.301305 0.101881 +v -0.119365 0.352178 -0.717028 +v -0.056497 0.222301 0.126743 +v -0.324613 0.413735 -0.484774 +v 0.046776 0.312835 0.079051 +v -0.428954 0.087201 0.475946 +v -0.447179 0.029625 0.527212 +v -0.501775 -0.175133 0.580877 +v -0.519469 -0.290720 0.598232 +v 0.279763 0.482922 -0.105393 +v -0.127500 0.222275 -0.313806 +v 0.024217 0.074877 0.016362 +v -0.511313 0.047713 0.342991 +v -0.449091 0.065454 0.477450 +v 0.545024 0.401532 -0.304654 +v 0.091324 0.295642 -0.060882 +v -0.330743 -0.346809 0.693212 +v -0.304710 -0.324756 0.719649 +v -0.396636 -0.494836 0.427194 +v -0.492431 -0.018669 0.236436 +v -0.504493 -0.000894 0.246723 +v 0.049069 0.127517 0.182316 +v -0.009542 0.038077 0.294371 +v 0.075082 0.091640 0.154400 +v -0.397953 -0.560668 0.451712 +v 0.097520 0.209755 -0.067457 +v -0.551023 -0.238714 0.299319 +v -0.472745 0.129115 0.273070 +v -0.023412 0.261245 0.083495 +v -0.470782 0.059987 0.440985 +v 0.037910 -0.306040 0.453867 +v -0.414856 -0.568600 0.434623 +v -0.133898 0.242609 -0.289196 +v -0.090297 0.192247 0.090117 +v -0.182395 0.081980 -0.328807 +v -0.218628 0.037088 -0.201219 +v -0.430407 -0.575961 0.416224 +v -0.133263 0.140308 -0.246610 +v 0.315264 0.172390 -0.032609 +v -0.425765 -0.539461 0.410541 +v -0.436795 -0.559227 0.402830 +v 0.219707 0.200460 0.093419 +v 0.282815 0.211707 0.064256 +v -0.248750 0.141950 -0.535704 +v 0.473605 0.269994 -0.087112 +v -0.129722 0.384438 -0.657272 +v 0.362218 0.281938 -0.046612 +v -0.504074 0.010553 0.390482 +v -0.497481 0.052970 0.394288 +v 0.314483 0.321797 -0.059257 +v -0.448920 -0.551934 0.380954 +v 0.473085 0.370742 -0.169620 +v 0.198613 0.256612 0.042205 +v -0.533371 -0.150391 0.303992 +v -0.535682 -0.168991 0.267512 +v -0.529070 -0.300095 0.560177 +v -0.164338 0.004926 -0.057400 +v -0.323082 -0.561535 0.522172 +v 0.126850 0.388798 0.002706 +v 0.223170 0.315736 0.006893 +v -0.521131 -0.351625 0.253287 +v -0.243571 0.041572 -0.206295 +v -0.511103 -0.019263 0.286661 +v -0.544682 -0.282822 0.265204 +v 0.422917 0.495209 -0.169715 +v 0.447186 0.284325 -0.079734 +v -0.279698 -0.546239 0.550213 +v -0.454501 -0.128455 0.642911 +v -0.446481 -0.162374 0.669700 +v 0.405079 0.541447 -0.132955 +v 0.421906 0.519331 -0.164275 +v -0.429269 -0.036735 0.614652 +v 0.531246 0.453833 -0.273138 +v -0.151558 0.390161 -0.600596 +v -0.445344 -0.268905 0.673602 +v -0.447240 -0.076204 0.622317 +v 0.189813 0.332408 -0.123246 +v -0.261169 0.138522 -0.489874 +v -0.143770 -0.075212 -0.043021 +v -0.441557 -0.203929 0.675476 +v -0.411431 -0.134811 0.695491 +v -0.407814 -0.325488 0.673950 +v -0.431871 -0.306758 0.675179 +v -0.458274 -0.196930 0.656057 +v -0.469994 -0.248443 0.656148 +v -0.321384 0.250917 -0.330922 +v -0.197639 0.195943 -0.643799 +v -0.464755 0.013979 0.513658 +v 0.031092 0.139956 -0.013901 +v 0.408299 0.229231 -0.015350 +v -0.427192 -0.095702 0.660044 +v -0.174600 0.473071 -0.723640 +v -0.202210 0.469320 -0.711382 +v 0.372971 0.448883 -0.122947 +v -0.347131 -0.488712 0.475973 +v 0.289130 0.391901 -0.172092 +v -0.412039 -0.276115 0.682708 +v 0.390715 0.324157 -0.098002 +v 0.153742 0.290961 -0.105106 +v 0.548844 0.427159 -0.298022 +v -0.298408 -0.499026 0.522268 +v -0.271055 -0.524180 0.545150 +v 0.310547 0.169214 0.000644 +v -0.301911 -0.547503 0.533607 +v -0.244174 -0.144086 0.697858 +v 0.210387 0.346579 0.043194 +v -0.424951 -0.172622 0.694979 +v -0.393304 -0.178022 -0.008841 +v -0.408618 -0.193829 0.707104 +v 0.518230 0.325869 -0.239919 +v 0.274391 0.224814 -0.118266 +v -0.259979 0.114360 -0.483514 +v -0.143327 0.339209 -0.550485 +v -0.208865 -0.269186 0.724914 +v -0.258840 -0.547819 0.564039 +v -0.265802 -0.051827 0.674500 +v 0.351474 0.248351 0.006529 +v -0.170556 0.104393 -0.367139 +v -0.215864 0.075833 -0.353499 +v -0.415143 -0.232870 0.688332 +v -0.214242 -0.321604 0.715076 +v -0.208700 -0.233211 0.718518 +v -0.242930 -0.087353 0.676843 +v 0.178710 0.215741 0.091952 +v -0.176349 -0.213287 -0.061987 +v 0.264946 0.234668 0.050571 +v -0.186506 -0.399474 0.544378 +v 0.231750 0.285452 0.006496 +v 0.510595 0.523310 -0.287264 +v 0.220776 0.167482 0.077121 +v -0.308378 0.147094 -0.154049 +v -0.209316 -0.339797 0.699024 +v -0.183142 -0.326620 0.679833 +v -0.187737 -0.293108 0.707869 +v -0.185938 -0.256889 0.706279 +v -0.187157 -0.223940 0.695801 +v -0.198950 -0.192791 0.689375 +v -0.216779 -0.129197 0.667291 +v 0.152974 0.260866 -0.103776 +v -0.225497 -0.028674 0.641844 +v -0.072279 -0.479578 -0.186019 +v 0.280101 0.339357 -0.167274 +v 0.046952 0.029181 0.105442 +v -0.101152 -0.475839 -0.181815 +v -0.227967 0.054162 -0.265158 +v -0.139492 -0.528904 -0.181989 +v 0.119007 0.074492 0.035646 +v -0.343486 -0.297615 0.707465 +v -0.325088 0.167733 -0.080762 +v 0.466211 0.261366 -0.145788 +v -0.466687 -0.399551 0.202270 +v -0.226908 0.202769 -0.609193 +v -0.169808 -0.259375 0.684997 +v -0.169777 -0.200503 0.659240 +v -0.209215 -0.090756 0.644460 +v -0.216530 0.191334 -0.626283 +v 0.209857 0.328074 0.032683 +v 0.083115 -0.274290 0.405750 +v 0.197595 0.276973 0.032459 +v 0.192569 0.405837 -0.096301 +v -0.136731 0.110612 0.031877 +v 0.078563 0.350327 0.035277 +v 0.218806 0.228233 0.074233 +v 0.063351 -0.358691 0.491722 +v -0.205750 0.029695 0.598536 +v -0.278873 -0.526695 -0.114320 +v 0.181064 0.234197 0.069991 +v 0.143225 0.331350 -0.092129 +v -0.310757 0.391569 -0.429320 +v -0.387958 -0.280653 0.683453 +v -0.127001 0.073110 0.528698 +v 0.022209 0.032599 0.196166 +v 0.003848 0.069011 0.243793 +v -0.188009 -0.554999 -0.159303 +v -0.313779 0.241095 -0.119840 +v -0.342541 -0.456836 0.476773 +v 0.507614 0.490120 -0.323492 +v -0.157168 -0.325270 0.627183 +v 0.014837 0.272744 0.003947 +v -0.204668 -0.030715 0.623285 +v -0.341921 0.323005 -0.348841 +v -0.307395 0.214894 -0.282833 +v 0.504997 0.386324 -0.198157 +v 0.484797 0.518281 -0.243448 +v 0.251014 0.196075 0.076065 +v -0.140783 -0.252274 0.630102 +v -0.187864 -0.097326 0.613565 +v -0.179459 -0.018657 0.587014 +v 0.460117 0.419353 -0.185229 +v -0.291863 0.103704 -0.326386 +v 0.126875 0.164565 0.134998 +v 0.029020 0.171434 -0.022906 +v -0.137246 0.428828 -0.674824 +v -0.429380 -0.231366 0.011371 +v -0.148636 -0.281478 0.643689 +v -0.189012 -0.149213 0.643918 +v -0.291103 0.199573 -0.012649 +v 0.489209 0.341200 -0.154431 +v 0.468314 0.357293 -0.240309 +v 0.031017 0.144759 0.190261 +v -0.249073 -0.179812 -0.062599 +v 0.057530 -0.324206 0.459306 +v 0.073971 -0.147976 0.111774 +v 0.112087 0.090416 0.118839 +v -0.129294 -0.229132 0.608442 +v -0.144380 -0.177280 0.599872 +v -0.165773 -0.047677 0.566186 +v 0.141140 0.336685 0.086430 +v -0.158537 0.020543 0.552027 +v -0.337966 0.440480 -0.554952 +v -0.452098 0.030261 0.161437 +v -0.506933 -0.352862 0.522457 +v 0.277621 0.334590 -0.041392 +v 0.369882 0.394835 -0.119017 +v 0.449815 0.320777 -0.119630 +v 0.487689 0.307645 -0.122395 +v 0.494688 0.277011 -0.107769 +v 0.022476 0.007900 0.088619 +v -0.334031 0.438618 -0.584975 +v -0.143273 -0.336119 0.579038 +v -0.136370 -0.306004 0.598610 +v -0.117376 -0.287995 0.583508 +v -0.133584 0.263545 -0.447231 +v -0.147274 -0.128775 0.567888 +v -0.152637 0.111694 -0.327149 +v 0.292873 0.426875 -0.030114 +v 0.053404 0.033294 0.082537 +v 0.315705 0.248821 0.019253 +v 0.311653 0.199267 0.043487 +v 0.305167 0.369709 -0.068003 +v -0.325690 0.443320 -0.536872 +v -0.349482 0.329823 -0.387152 +v -0.165420 -0.410819 0.535668 +v -0.144857 0.110669 -0.008102 +v -0.127831 -0.327586 0.563748 +v -0.122567 -0.196359 0.587835 +v -0.154535 -0.014805 0.547879 +v -0.134366 0.045116 0.534857 +v -0.173621 0.161603 0.098167 +v -0.151276 0.180090 -0.456711 +v -0.169860 0.158733 -0.488003 +v 0.336775 0.344915 -0.187195 +v 0.511647 0.284828 -0.133401 +v -0.195590 0.493741 -0.701559 +v -0.131087 -0.122259 0.550883 +v -0.150291 -0.058461 0.534847 +v -0.133719 0.007444 0.529991 +v -0.138131 0.082446 -0.027972 +v -0.455668 -0.322920 0.071951 +v -0.432670 -0.310500 0.039040 +v 0.053832 0.101964 0.176618 +v 0.435043 0.222235 -0.068879 +v 0.511175 0.329603 -0.158663 +v -0.352555 0.358972 -0.458019 +v -0.098467 -0.214253 0.578663 +v -0.089080 -0.111210 0.522739 +v -0.299395 0.104301 -0.261651 +v 0.385849 0.556079 -0.173447 +v -0.174532 0.331939 -0.372663 +v 0.438836 0.232920 -0.047768 +v -0.091767 -0.281568 0.569259 +v 0.219998 0.142024 0.059052 +v -0.103976 0.065976 0.515034 +v -0.338204 0.025778 0.623054 +v 0.268469 0.364124 -0.032541 +v 0.351773 0.193199 -0.060187 +v 0.022452 -0.012411 0.106235 +v 0.024808 -0.045780 0.105124 +v -0.224992 0.066455 -0.316376 +v 0.401889 0.321418 -0.190329 +v -0.249175 -0.048673 -0.045189 +v 0.361319 0.464909 -0.105390 +v -0.453157 -0.547961 0.102501 +v -0.090333 -0.320497 0.546192 +v -0.059554 -0.240711 0.558908 +v 0.026124 -0.109686 0.056543 +v -0.099718 -0.159131 0.556141 +v -0.104543 -0.050517 0.489403 +v -0.444652 -0.514870 0.105225 +v 0.394256 0.564547 -0.150857 +v -0.335653 0.230337 -0.196538 +v 0.193609 -0.208782 0.003096 +v 0.377177 0.373124 -0.117912 +v 0.006194 0.104327 0.228536 +v -0.206123 0.310412 -0.235620 +v -0.071550 -0.341564 0.523125 +v -0.068082 -0.263685 0.561909 +v 0.172838 0.275717 0.058042 +v -0.112756 -0.086319 0.516260 +v -0.130418 -0.038244 0.509205 +v -0.106345 0.017445 0.508863 +v -0.078397 0.072472 0.473527 +v -0.241624 0.146291 0.474768 +v 0.316987 0.508215 -0.065662 +v -0.061054 -0.215134 0.558078 +v -0.042751 -0.189055 0.529520 +v -0.086937 0.037613 0.479861 +v -0.090975 0.116751 0.485356 +v -0.231071 0.422192 -0.519410 +v 0.261843 0.180640 0.060012 +v 0.170924 0.169034 0.111318 +v -0.357182 0.041810 0.597277 +v -0.208529 0.172824 0.399609 +v -0.157652 0.175568 0.370721 +v 0.443001 0.260528 -0.051876 +v -0.126213 0.310975 -0.684402 +v -0.179033 0.376033 -0.715427 +v -0.134506 -0.012102 -0.030703 +v -0.035308 -0.284444 0.525655 +v -0.306522 0.212875 -0.063144 +v -0.089788 -0.006517 0.475709 +v -0.456248 -0.520290 0.346252 +v -0.142992 0.124202 -0.065095 +v -0.146307 0.179343 0.328185 +v 0.236778 -0.206025 0.282162 +v -0.190786 0.319137 -0.285116 +v -0.236701 0.161029 0.438474 +v -0.281514 0.117994 0.521529 +v -0.047626 -0.350565 0.507743 +v -0.322556 0.098568 0.542818 +v -0.009942 -0.255576 0.509022 +v -0.451369 -0.403154 0.310307 +v -0.058407 -0.013778 0.447094 +v 0.141443 -0.213587 0.325274 +v -0.077437 0.100302 0.473166 +v 0.191046 -0.201360 0.297737 +v -0.004088 0.088823 0.015682 +v -0.136634 0.150912 0.076913 +v -0.116244 0.175962 0.143739 +v -0.508000 -0.122163 0.390354 +v 0.153112 -0.173468 0.176223 +v -0.309296 0.359867 -0.350162 +v -0.297013 0.232355 -0.078767 +v -0.005547 -0.304415 0.486860 +v -0.227908 0.285727 -0.156470 +v -0.054095 -0.145950 0.519343 +v -0.068347 -0.099893 0.501298 +v -0.272994 0.180645 0.327456 +v -0.508418 -0.104017 0.318065 +v 0.002072 0.137990 0.209785 +v -0.200366 0.487161 -0.648251 +v 0.163757 0.252666 0.066699 +v 0.174521 -0.184894 0.075362 +v 0.131565 -0.244403 0.377217 +v -0.111244 0.176987 0.192718 +v 0.256372 -0.203036 0.156437 +v 0.238832 -0.197894 0.238506 +v -0.189885 0.308900 -0.694469 +v -0.485179 -0.551230 0.260002 +v -0.158001 0.143039 0.058356 +v -0.026021 -0.217340 0.523157 +v 0.218270 -0.195115 0.093593 +v 0.106952 -0.160733 0.173609 +v -0.058782 0.031513 0.453075 +v 0.282339 0.467574 -0.021771 +v 0.388790 0.509451 -0.220033 +v -0.196208 0.176345 0.000575 +v -0.492421 -0.081188 0.212079 +v -0.417796 -0.398038 0.409868 +v 0.077024 -0.212848 0.366218 +v 0.327664 0.520276 -0.096902 +v 0.322055 0.507416 -0.133366 +v -0.509706 0.073784 0.329636 +v -0.165047 0.023019 -0.098865 +v -0.432131 0.147452 0.278967 +v -0.491022 0.115707 0.285556 +v 0.158251 -0.187205 0.255255 +v 0.093035 -0.194863 0.312581 +v -0.045298 -0.039812 0.441359 +v -0.535790 -0.190398 0.379820 +v -0.118703 0.136815 0.481108 +v -0.224011 0.108924 0.539832 +v -0.047180 0.045200 0.437064 +v -0.323353 0.466021 -0.598394 +v -0.264596 0.220682 -0.027129 +v 0.046790 -0.075736 0.249770 +v 0.172686 -0.227111 0.361140 +v 0.255838 -0.222639 0.043480 +v 0.261745 -0.233820 0.015474 +v -0.055125 -0.189029 -0.028362 +v -0.142680 0.145897 0.472394 +v 0.492590 0.303082 -0.203508 +v -0.188358 0.282638 -0.198715 +v -0.160694 0.169588 0.145744 +v 0.466778 0.242635 -0.080636 +v 0.089887 -0.230521 0.359969 +v 0.108518 -0.211621 0.317544 +v 0.105242 -0.184597 0.283295 +v -0.252128 0.412910 -0.474499 +v -0.037371 -0.003436 0.422749 +v -0.047908 0.081874 0.421731 +v -0.138702 0.175499 0.183264 +v -0.173162 0.305080 -0.292806 +v -0.537475 -0.281310 0.373881 +v -0.248998 0.178006 0.082745 +v 0.160410 -0.203732 0.308175 +v 0.127517 -0.188780 0.267253 +v 0.082827 -0.307742 0.442588 +v 0.075170 -0.145246 0.318911 +v 0.252308 -0.213860 0.072662 +v 0.144627 0.268320 0.093170 +v 0.126686 -0.170185 0.206183 +v 0.143187 0.228852 0.089280 +v 0.003512 -0.152166 0.467334 +v -0.458646 0.135365 0.306794 +v -0.021214 -0.053290 0.417869 +v -0.339633 0.156603 0.407473 +v -0.368817 0.162365 0.347525 +v 0.251511 -0.206688 0.100122 +v 0.221392 -0.190608 0.149802 +v 0.192095 -0.184968 0.223194 +v 0.105334 -0.166562 0.235974 +v -0.292020 0.156035 0.439047 +v 0.177306 0.194553 0.109775 +v 0.195791 -0.192721 0.264351 +v 0.252523 -0.204176 0.126573 +v -0.179642 0.390627 -0.536776 +v -0.493280 -0.119922 0.470909 +v -0.318656 0.125881 0.499268 +v -0.293795 0.263837 -0.129411 +v 0.019560 -0.243979 0.472852 +v -0.160689 0.481872 -0.704296 +v 0.213082 -0.203476 0.044663 +v -0.453328 -0.496843 0.138500 +v 0.226717 -0.191097 0.191169 +v -0.024691 -0.007950 0.399497 +v -0.185498 0.366299 -0.464871 +v -0.206768 0.384980 -0.467767 +v 0.131849 -0.173520 0.104430 +v 0.081120 -0.171524 0.061782 +v 0.539906 0.464587 -0.304908 +v 0.272353 -0.233149 0.322267 +v -0.506511 0.038883 0.254773 +v -0.135485 0.085997 -0.000572 +v 0.144924 -0.172270 0.142416 +v 0.449177 0.467508 -0.191192 +v 0.023045 -0.289959 0.465537 +v 0.016223 -0.195867 0.472304 +v -0.297045 0.175167 0.187434 +v -0.202075 0.055563 -0.263052 +v -0.005616 -0.027116 0.345723 +v -0.490400 0.082093 0.380145 +v 0.404316 0.252455 -0.023473 +v -0.159870 0.471424 -0.678066 +v -0.482202 -0.124332 0.105744 +v -0.546239 -0.197443 0.322105 +v 0.277115 -0.221569 0.294470 +v 0.522248 0.507442 -0.306515 +v 0.065390 -0.113456 0.183430 +v -0.015537 -0.002970 0.363982 +v -0.080781 0.177337 0.197940 +v -0.056571 0.205305 0.154587 +v 0.042662 -0.260115 0.435087 +v -0.219644 0.182970 0.323634 +v -0.229831 0.178911 0.370550 +v 0.005770 0.264494 0.116275 +v -0.019989 0.031651 0.352027 +v 0.078618 -0.128428 0.219213 +v -0.330260 0.171600 0.345331 +v -0.350163 0.145937 0.100928 +v 0.485021 0.475380 -0.227435 +v 0.372908 0.230663 0.011270 +v -0.273694 0.200031 0.008252 +v 0.040805 -0.214601 0.440475 +v 0.028890 -0.175088 0.447243 +v -0.313204 0.168179 0.011297 +v 0.073655 -0.129296 0.283692 +v 0.090060 -0.152739 0.256797 +v -0.160628 -0.543028 -0.171420 +v -0.246070 0.237692 -0.054825 +v -0.218099 0.238026 -0.076158 +v -0.429580 0.133324 0.179226 +v 0.032091 -0.141693 0.420680 +v -0.190474 0.248068 -0.132753 +v -0.166960 0.245263 -0.173818 +v -0.486869 -0.036776 0.225018 +v 0.086149 -0.146312 0.160366 +v -0.196021 0.132971 0.503573 +v -0.502744 -0.343143 0.459607 +v 0.051634 -0.169366 0.401842 +v 0.186108 0.220346 -0.104812 +v 0.165571 0.135867 0.099307 +v -0.519985 -0.159296 0.410735 +v -0.156037 0.135074 -0.015256 +v -0.164387 0.138978 0.024094 +v -0.340296 0.418757 -0.514377 +v -0.023712 0.148672 0.231732 +v 0.518583 0.445662 -0.248673 +v -0.130665 0.440964 -0.715634 +v -0.477947 -0.166148 0.099286 +v -0.449236 0.130229 0.354371 +v 0.124596 0.150818 -0.049723 +v -0.265254 0.393993 -0.417531 +v -0.215713 -0.499806 -0.136171 +v -0.204006 0.452395 -0.596864 +v -0.176904 0.453710 -0.630979 +v -0.156763 0.204619 -0.130319 +v -0.136406 0.161232 -0.141701 +v 0.004016 -0.072764 0.386375 +v -0.106268 0.171349 0.348125 +v -0.310870 0.431998 -0.600567 +v -0.197304 0.182284 0.282076 +v -0.189581 0.179877 0.226744 +v -0.306613 0.163372 0.071696 +v -0.295254 0.180387 0.029288 +v -0.301282 0.168654 -0.211449 +v -0.079973 0.207649 0.111471 +v 0.009123 0.009909 0.263645 +v -0.406537 -0.458253 0.395461 +v -0.126931 0.418657 -0.731866 +v 0.238403 0.153178 -0.044591 +v -0.515115 -0.317697 0.385824 +v -0.006396 0.012574 0.300883 +v 0.027068 -0.046539 0.278307 +v -0.393143 0.132670 0.123490 +v -0.272999 0.022124 -0.079759 +# 1550 vertices, 0 vertices normals + +f 1203 370 679 +f 316 312 806 +f 608 653 1127 +f 1301 353 476 +f 354 353 1177 +f 47 1408 48 +f 30 410 372 +f 1544 607 1523 +f 617 1121 237 +f 33 631 1020 +f 110 1051 118 +f 1211 806 807 +f 452 439 484 +f 148 171 154 +f 738 46 758 +f 46 1196 745 +f 1170 1169 1187 +f 763 1500 379 +f 667 352 105 +f 1101 1136 1138 +f 100 1051 110 +f 1169 1156 1148 +f 915 835 1224 +f 113 305 326 +f 1290 1279 1311 +f 428 899 398 +f 1267 859 1161 +f 1049 330 136 +f 1420 1421 1394 +f 858 965 1082 +f 827 1315 836 +f 1219 1076 1164 +f 120 393 124 +f 1277 1287 1250 +f 1272 1205 627 +f 1389 414 863 +f 1211 844 316 +f 1051 100 1052 +f 198 26 25 +f 1523 905 17 +f 127 121 126 +f 379 1500 1231 +f 157 142 156 +f 878 1546 409 +f 1367 1344 1409 +f 722 247 877 +f 1141 1115 1103 +f 110 101 100 +f 358 663 335 +f 1141 1114 1143 +f 186 113 326 +f 1169 1170 1156 +f 221 226 435 +f 883 873 882 +f 341 326 340 +f 853 165 227 +f 605 412 207 +f 1496 924 74 +f 1353 1368 515 +f 419 571 73 +f 1246 221 435 +f 641 343 152 +f 165 853 1210 +f 1356 5 903 +f 317 286 5 +f 798 311 1161 +f 631 33 628 +f 314 499 628 +f 152 343 681 +f 1308 1320 60 +f 1121 843 845 +f 1394 1421 1402 +f 1471 634 1065 +f 607 1544 642 +f 501 424 497 +f 384 730 1252 +f 1282 977 937 +f 320 831 1389 +f 1310 318 639 +f 1126 905 608 +f 355 754 105 +f 515 60 1320 +f 316 806 1211 +f 1308 655 1320 +f 646 1236 1198 +f 595 596 589 +f 1253 975 1244 +f 40 1044 651 +f 999 978 600 +f 990 1026 868 +f 1468 612 678 +f 97 228 760 +f 1016 1015 1023 +f 171 148 170 +f 1429 477 111 +f 1419 371 579 +f 585 988 1414 +f 1062 779 830 +f 1375 1529 1530 +f 1252 212 1261 +f 2 247 3 +f 908 1005 1215 +f 1408 47 984 +f 678 1282 1468 +f 739 746 775 +f 408 212 730 +f 1487 1455 1471 +f 157 140 142 +f 1197 1201 1158 +f 678 977 1282 +f 990 868 28 +f 785 91 480 +f 480 91 430 +f 1344 838 379 +f 466 1516 1238 +f 759 747 739 +f 344 332 408 +f 677 829 676 +f 1206 1207 1542 +f 408 730 344 +f 728 717 716 +f 936 212 408 +f 237 1121 845 +f 1208 290 1503 +f 384 98 730 +f 975 464 1244 +f 628 33 314 +f 1198 654 68 +f 208 724 86 +f 191 1464 1237 +f 1196 46 75 +f 642 1544 535 +f 443 506 437 +f 1170 1187 1188 +f 1166 1167 1155 +f 420 499 578 +f 778 818 819 +f 1252 730 212 +f 1178 1175 459 +f 678 612 593 +f 727 511 1228 +f 1156 328 1148 +f 1168 1167 56 +f 1432 646 310 +f 482 485 339 +f 1166 1017 955 +f 1150 1157 1174 +f 25 897 198 +f 706 1049 1134 +f 859 798 1161 +f 105 754 667 +f 1204 1007 940 +f 397 1163 276 +f 1086 1132 1232 +f 519 516 518 +f 1021 436 369 +f 240 260 261 +f 1254 1269 1255 +f 1167 1168 1155 +f 304 340 305 +f 1199 1003 334 +f 1105 52 1104 +f 1277 1250 1251 +f 955 1167 1166 +f 1166 161 1017 +f 396 1357 1345 +f 859 1267 440 +f 655 1308 241 +f 1121 617 1190 +f 1353 591 117 +f 1185 1020 631 +f 774 46 745 +f 149 992 148 +f 1215 921 908 +f 1229 1255 1256 +f 77 281 1521 +f 830 54 1062 +f 564 555 547 +f 1478 17 1456 +f 1012 1495 1218 +f 587 96 318 +f 1538 597 1494 +f 1038 1520 929 +f 1449 1445 1401 +f 852 868 1026 +f 1395 888 103 +f 1326 1297 1325 +f 759 739 775 +f 902 888 790 +f 1241 1221 735 +f 1236 646 1432 +f 584 674 258 +f 1272 1243 1281 +f 493 862 1233 +f 117 1368 1353 +f 871 875 857 +f 1477 1151 1496 +f 941 643 1489 +f 536 530 534 +f 1394 1402 500 +f 1419 1286 371 +f 75 738 80 +f 772 97 760 +f 977 678 362 +f 192 242 262 +f 244 865 1548 +f 645 521 433 +f 352 666 1049 +f 1330 22 1329 +f 1266 679 370 +f 390 201 668 +f 136 1465 1134 +f 517 362 678 +f 294 862 1304 +f 812 820 90 +f 888 1328 790 +f 608 905 607 +f 1436 1401 1445 +f 183 167 182 +f 535 533 1036 +f 852 672 868 +f 660 1144 426 +f 536 534 541 +f 1374 1318 284 +f 70 1078 818 +f 20 1521 281 +f 1236 654 1198 +f 749 533 535 +f 1251 1079 579 +f 758 46 774 +f 1416 1144 660 +f 723 708 597 +f 804 1245 27 +f 1124 1496 74 +f 747 740 739 +f 1340 864 381 +f 24 434 23 +f 225 718 696 +f 742 756 743 +f 279 1349 1481 +f 148 154 149 +f 732 708 723 +f 807 1482 1211 +f 718 697 696 +f 743 756 744 +f 1435 1376 1322 +f 27 158 804 +f 694 707 714 +f 898 883 882 +f 284 1318 331 +f 1039 1037 1490 +f 75 46 738 +f 1291 857 875 +f 863 1328 40 +f 771 744 756 +f 718 225 736 +f 1094 1191 1193 +f 749 535 1544 +f 1076 1197 1158 +f 431 497 957 +f 737 718 736 +f 793 792 801 +f 709 831 320 +f 387 377 366 +f 579 1079 1419 +f 675 1315 827 +f 1277 839 1287 +f 1008 601 647 +f 810 809 823 +f 907 657 912 +f 1467 1055 39 +f 600 978 71 +f 1165 823 815 +f 811 799 810 +f 401 668 367 +f 1482 807 1163 +f 84 425 492 +f 352 667 666 +f 685 671 486 +f 260 240 239 +f 5 1356 317 +f 1233 862 294 +f 799 808 810 +f 945 1517 652 +f 426 1144 754 +f 253 1479 869 +f 1417 1350 1319 +f 1451 1108 144 +f 1163 807 276 +f 277 1245 804 +f 1535 975 1253 +f 1521 20 109 +f 359 173 1061 +f 409 948 878 +f 247 2 877 +f 1294 1286 1419 +f 26 119 25 +f 1000 894 653 +f 1456 17 905 +f 844 1465 136 +f 26 1425 119 +f 909 898 908 +f 823 809 815 +f 1049 706 352 +f 1169 1148 1168 +f 1477 1496 1124 +f 266 27 1245 +f 733 734 704 +f 1179 2 3 +f 314 578 499 +f 1164 1334 1219 +f 1326 1325 1331 +f 253 252 918 +f 181 1467 39 +f 819 818 390 +f 1126 608 1127 +f 729 662 656 +f 136 1134 1049 +f 63 1408 1253 +f 343 641 342 +f 908 921 909 +f 754 355 426 +f 1523 607 905 +f 978 997 993 +f 953 947 946 +f 905 1126 1456 +f 954 947 953 +f 71 978 146 +f 1288 951 950 +f 370 867 1288 +f 13 114 12 +f 1244 63 1253 +f 873 872 882 +f 917 84 492 +f 405 174 1333 +f 1499 1514 1507 +f 70 1112 1078 +f 668 819 390 +f 390 818 1078 +f 48 1408 63 +f 656 943 729 +f 983 668 201 +f 845 335 663 +f 146 978 993 +f 145 146 993 +f 47 145 984 +f 617 367 983 +f 668 983 367 +f 909 921 923 +f 401 819 668 +f 1265 63 1244 +f 999 997 978 +f 1075 1074 1085 +f 983 1190 617 +f 992 169 148 +f 1030 435 226 +f 139 1007 1116 +f 346 619 187 +f 192 262 970 +f 434 24 119 +f 659 657 907 +f 109 53 1521 +f 1002 365 1040 +f 1114 1141 1103 +f 330 1049 666 +f 1328 888 1395 +f 1103 1115 1102 +f 810 808 809 +f 461 477 1429 +f 333 642 535 +f 1015 1016 1092 +f 845 663 237 +f 284 1234 1374 +f 533 749 1080 +f 623 1393 884 +f 90 441 812 +f 1541 509 602 +f 1354 1352 1453 +f 773 794 507 +f 839 1277 302 +f 77 346 281 +f 107 376 1346 +f 170 222 171 +f 535 1036 333 +f 1517 265 652 +f 749 1544 1523 +f 829 677 451 +f 1093 55 795 +f 53 1318 1374 +f 260 269 261 +f 1380 1450 1444 +f 622 1314 1292 +f 4 14 6 +f 326 305 340 +f 968 464 975 +f 370 1288 950 +f 924 720 74 +f 1427 1350 1417 +f 867 370 1203 +f 1096 2 1179 +f 169 170 148 +f 1054 1055 43 +f 80 1213 551 +f 658 1400 210 +f 26 198 1332 +f 1390 273 834 +f 1396 40 1395 +f 5 647 903 +f 1489 1372 941 +f 1458 251 492 +f 971 374 1452 +f 167 126 157 +f 552 1072 621 +f 579 302 1251 +f 1085 1074 107 +f 1224 835 209 +f 1471 1368 634 +f 306 275 1304 +f 1324 1280 1 +f 649 872 893 +f 1056 1285 380 +f 740 691 732 +f 1003 1199 842 +f 104 102 112 +f 48 134 51 +f 1348 10 1536 +f 611 1060 1173 +f 507 394 773 +f 1289 1270 1239 +f 1204 1116 1007 +f 1488 1372 1489 +f 143 1300 1034 +f 953 972 976 +f 1355 1309 1343 +f 1384 115 1273 +f 1466 124 393 +f 680 325 733 +f 12 1473 429 +f 1179 1474 1096 +f 1338 896 1534 +f 1523 17 1227 +f 850 832 116 +f 381 832 333 +f 61 1461 1462 +f 1427 131 1350 +f 605 691 412 +f 128 127 442 +f 361 12 429 +f 1139 329 1171 +f 257 1529 16 +f 1429 889 461 +f 864 373 832 +f 1034 19 143 +f 1130 307 1176 +f 924 489 1263 +f 36 1264 1248 +f 1004 93 1095 +f 1460 693 1424 +f 167 442 127 +f 172 153 155 +f 906 31 529 +f 454 470 591 +f 281 10 7 +f 170 169 240 +f 885 1130 1176 +f 155 153 171 +f 85 296 1507 +f 487 1225 1025 +f 1537 958 1536 +f 1154 1109 1131 +f 359 777 173 +f 1519 1195 1268 +f 1340 381 1036 +f 42 160 348 +f 216 1546 138 +f 1102 1018 1011 +f 239 240 796 +f 1157 1139 1172 +f 869 682 881 +f 156 142 1116 +f 1129 1135 1006 +f 1017 176 955 +f 64 1095 1024 +f 123 133 129 +f 220 73 571 +f 1115 1141 149 +f 279 57 1381 +f 755 455 751 +f 451 631 829 +f 590 587 494 +f 86 1147 644 +f 1155 327 272 +f 22 1311 1289 +f 72 242 261 +f 1243 383 1222 +f 260 239 308 +f 824 1336 803 +f 1505 202 1504 +f 1392 927 1510 +f 642 608 607 +f 154 153 1115 +f 365 1002 195 +f 463 595 603 +f 680 27 325 +f 846 622 1292 +f 896 949 1534 +f 1138 1135 1092 +f 1214 1157 1189 +f 1139 1171 1172 +f 1202 1111 1133 +f 1494 1549 1506 +f 1447 1327 1351 +f 297 865 296 +f 38 1429 111 +f 1311 1270 1289 +f 768 2 1096 +f 328 308 327 +f 1357 1388 1345 +f 295 1535 989 +f 1003 205 711 +f 332 1046 408 +f 690 1549 1494 +f 995 166 987 +f 338 347 401 +f 1441 81 188 +f 328 327 1148 +f 1100 1079 1251 +f 925 1216 1120 +f 959 931 952 +f 1386 1434 1457 +f 78 188 1453 +f 1432 705 1236 +f 1136 1101 1149 +f 332 1021 386 +f 152 681 886 +f 959 952 967 +f 44 363 795 +f 955 176 890 +f 3 247 525 +f 581 595 589 +f 102 176 1017 +f 341 1161 186 +f 340 303 378 +f 1267 378 899 +f 378 303 377 +f 1037 1541 602 +f 326 341 186 +f 1071 1074 1075 +f 1078 967 390 +f 955 890 798 +f 174 134 99 +f 1117 797 156 +f 186 94 104 +f 303 366 377 +f 160 451 178 +f 111 1539 1538 +f 1086 1232 1287 +f 101 1017 161 +f 1035 1039 1490 +f 1093 1196 966 +f 897 1405 198 +f 1291 875 901 +f 311 176 94 +f 112 101 110 +f 140 126 121 +f 1015 1129 1053 +f 76 491 1207 +f 221 183 226 +f 1299 1264 36 +f 899 377 398 +f 1069 1303 1153 +f 1309 1289 1239 +f 885 1176 389 +f 413 373 864 +f 95 1046 175 +f 147 240 169 +f 440 470 454 +f 1032 1023 1015 +f 1424 1357 1403 +f 55 1093 966 +f 720 766 1073 +f 147 992 1154 +f 378 377 899 +f 129 165 1210 +f 288 237 663 +f 1493 1372 13 +f 118 140 112 +f 259 1052 268 +f 118 139 140 +f 606 446 609 +f 378 1267 1161 +f 186 1161 311 +f 128 122 127 +f 216 93 217 +f 1223 1317 757 +f 1537 1536 10 +f 364 1084 1162 +f 385 1361 386 +f 104 112 121 +f 1182 796 940 +f 200 204 196 +f 168 442 184 +f 121 112 140 +f 566 1284 407 +f 1427 1417 963 +f 1128 36 403 +f 1333 1423 1462 +f 131 61 11 +f 118 1051 139 +f 142 140 139 +f 1182 1051 259 +f 141 468 422 +f 377 387 398 +f 1330 1329 1385 +f 1119 1013 1109 +f 1155 161 1166 +f 946 925 956 +f 430 422 480 +f 1052 100 272 +f 1051 1052 259 +f 182 156 922 +f 127 104 121 +f 623 345 853 +f 1343 60 515 +f 141 467 1235 +f 196 227 163 +f 145 48 146 +f 302 579 1184 +f 770 8 486 +f 796 259 239 +f 122 186 104 +f 963 688 319 +f 204 1393 227 +f 949 25 24 +f 1359 1425 26 +f 658 1525 1439 +f 177 1460 1440 +f 1060 416 1173 +f 936 408 1046 +f 1293 87 61 +f 1387 1365 1469 +f 345 1543 1053 +f 903 601 376 +f 1131 1117 1116 +f 139 1051 940 +f 150 133 442 +f 163 227 165 +f 241 1308 1269 +f 168 180 163 +f 798 1212 955 +f 1138 1136 1135 +f 1186 982 987 +f 1418 1273 6 +f 1420 726 1421 +f 1030 1090 435 +f 880 254 73 +f 827 1183 838 +f 1135 1136 304 +f 833 1246 435 +f 1444 1386 1377 +f 554 364 215 +f 1334 1164 1296 +f 764 212 936 +f 899 452 470 +f 439 899 428 +f 1030 1027 1043 +f 1305 471 821 +f 281 7 20 +f 691 605 708 +f 848 823 1165 +f 1043 254 1090 +f 442 167 183 +f 196 180 184 +f 1061 854 1098 +f 308 269 260 +f 122 104 127 +f 180 168 184 +f 1039 1063 1037 +f 1365 1436 1445 +f 1176 669 389 +f 1498 1487 1514 +f 1154 1114 1109 +f 206 1246 1513 +f 73 254 419 +f 150 163 165 +f 633 68 654 +f 1537 1379 1426 +f 1160 1262 215 +f 1382 995 448 +f 982 973 987 +f 841 857 1291 +f 139 940 1007 +f 1116 1204 1131 +f 833 635 1513 +f 880 73 220 +f 1015 1135 1129 +f 615 884 1393 +f 1129 1210 345 +f 509 632 1363 +f 298 320 1389 +f 808 746 788 +f 183 182 226 +f 1206 331 1207 +f 591 614 633 +f 452 572 614 +f 566 407 465 +f 150 168 163 +f 1230 1171 1188 +f 108 238 306 +f 815 847 1165 +f 1177 1252 1261 +f 1546 216 409 +f 1320 454 1353 +f 591 1353 454 +f 406 1122 513 +f 188 78 1441 +f 236 505 503 +f 756 438 792 +f 1489 1337 1338 +f 1428 878 777 +f 204 216 217 +f 423 314 159 +f 287 262 242 +f 1397 661 1047 +f 409 1513 635 +f 138 1095 93 +f 365 195 980 +f 432 948 635 +f 93 1004 317 +f 531 1404 173 +f 855 883 1216 +f 333 832 850 +f 298 651 44 +f 680 765 786 +f 317 1004 1020 +f 1080 1227 164 +f 994 991 894 +f 615 217 317 +f 64 423 159 +f 1143 1114 1154 +f 80 551 194 +f 1081 1132 364 +f 160 1185 368 +f 1236 1065 117 +f 287 1150 262 +f 814 1194 813 +f 750 590 741 +f 1536 958 404 +f 1022 1096 1070 +f 836 847 827 +f 144 1108 106 +f 591 452 614 +f 1531 315 688 +f 1474 1069 1028 +f 981 986 973 +f 529 35 224 +f 1534 1348 1338 +f 1343 1321 60 +f 504 565 789 +f 814 895 1194 +f 1056 487 460 +f 854 1061 870 +f 1259 1069 1152 +f 778 1275 681 +f 1096 1022 768 +f 887 625 1550 +f 82 1205 1297 +f 1006 304 123 +f 787 895 1202 +f 938 667 1144 +f 493 1416 866 +f 636 556 312 +f 307 1130 273 +f 1120 909 923 +f 468 411 424 +f 1273 1418 632 +f 1528 290 443 +f 462 481 1159 +f 1342 446 1113 +f 1153 641 456 +f 806 1026 990 +f 1300 1073 1034 +f 814 851 834 +f 752 1140 812 +f 1468 1268 1195 +f 1144 667 754 +f 445 1522 939 +f 899 439 452 +f 449 505 232 +f 1373 927 502 +f 291 673 199 +f 602 1363 1379 +f 418 927 1392 +f 1526 996 825 +f 816 19 1034 +f 313 285 293 +f 1276 669 1176 +f 330 938 580 +f 866 19 108 +f 1385 1455 1472 +f 850 1382 1341 +f 1411 773 124 +f 1005 908 649 +f 282 1304 275 +f 603 588 594 +f 108 19 238 +f 580 285 313 +f 1286 1294 74 +f 719 701 699 +f 1360 1411 124 +f 1471 1355 1368 +f 930 1219 1077 +f 234 79 444 +f 1087 1376 1201 +f 474 762 837 +f 1479 1524 869 +f 90 320 55 +f 1260 826 1140 +f 1174 1157 1214 +f 1159 203 462 +f 481 463 957 +f 1113 453 234 +f 1154 1131 1204 +f 89 321 965 +f 893 1454 45 +f 461 179 477 +f 639 783 1310 +f 294 1304 282 +f 1276 1176 307 +f 1319 9 1417 +f 707 692 66 +f 1178 447 1175 +f 503 475 235 +f 712 702 710 +f 750 741 1029 +f 765 692 337 +f 479 1142 727 +f 141 422 498 +f 280 1158 1437 +f 312 556 806 +f 316 313 312 +f 885 1111 1130 +f 290 197 1180 +f 542 543 537 +f 1000 994 894 +f 452 538 572 +f 391 1206 1542 +f 141 1235 468 +f 336 1045 292 +f 826 709 820 +f 1435 710 1437 +f 207 1549 605 +f 1509 963 1417 +f 1422 1402 1421 +f 1276 285 282 +f 1211 1482 1465 +f 475 604 339 +f 1495 1223 939 +f 1390 852 1026 +f 1421 726 1358 +f 739 740 746 +f 37 429 38 +f 1124 74 1294 +f 1012 1099 1470 +f 636 293 274 +f 1469 1444 1377 +f 330 316 136 +f 556 1026 806 +f 477 179 62 +f 608 642 1341 +f 839 302 660 +f 771 792 793 +f 752 712 710 +f 276 990 28 +f 349 603 604 +f 1117 1109 797 +f 743 719 699 +f 510 1274 347 +f 12 892 13 +f 1192 1432 725 +f 445 706 1107 +f 1105 1012 822 +f 105 706 355 +f 59 1518 1347 +f 1358 1378 1411 +f 27 266 325 +f 505 1528 1200 +f 20 189 109 +f 1068 509 1541 +f 1424 693 1407 +f 964 822 28 +f 446 598 453 +f 705 1394 1487 +f 942 1519 1518 +f 1391 179 461 +f 554 1081 364 +f 476 353 528 +f 591 633 654 +f 611 417 1060 +f 267 885 389 +f 627 842 1199 +f 1414 494 583 +f 974 1070 30 +f 592 593 612 +f 825 764 936 +f 1226 385 504 +f 1517 945 1364 +f 1506 830 779 +f 1244 464 1265 +f 242 72 287 +f 203 79 234 +f 449 431 349 +f 456 152 1152 +f 1065 1487 1471 +f 618 436 1021 +f 834 851 1390 +f 224 35 137 +f 1115 153 270 +f 604 475 349 +f 480 501 784 +f 1351 1337 643 +f 876 1512 1406 +f 1196 1039 966 +f 811 810 840 +f 443 1208 506 +f 1301 1302 384 +f 468 1235 411 +f 728 1228 218 +f 79 473 444 +f 799 800 759 +f 1159 1235 203 +f 234 444 299 +f 1378 725 761 +f 231 1235 467 +f 769 232 438 +f 1008 130 944 +f 505 1200 508 +f 482 594 258 +f 1123 417 175 +f 712 1242 50 +f 779 1399 361 +f 621 388 1532 +f 669 283 389 +f 1043 1027 1031 +f 1425 1359 1326 +f 929 1520 960 +f 338 617 237 +f 1133 885 267 +f 1261 212 764 +f 892 1442 13 +f 1050 758 787 +f 475 339 235 +f 427 438 508 +f 988 585 596 +f 1425 693 665 +f 992 149 1143 +f 349 431 603 +f 283 478 1515 +f 1237 547 555 +f 773 1411 761 +f 1390 556 273 +f 1360 1401 1430 +f 1507 1514 85 +f 453 1113 446 +f 554 1151 1477 +f 411 497 424 +f 369 436 565 +f 1401 1360 1449 +f 1426 1363 1418 +f 602 187 1486 +f 574 641 1153 +f 1351 643 1447 +f 278 253 869 +f 1248 1264 1317 +f 1019 1011 263 +f 13 404 114 +f 162 629 776 +f 782 1545 65 +f 1084 757 1317 +f 232 508 438 +f 1397 658 210 +f 1319 664 1369 +f 413 358 373 +f 231 233 1305 +f 1227 749 1523 +f 1464 546 558 +f 1304 862 108 +f 1165 1540 848 +f 906 1366 1005 +f 394 120 773 +f 233 467 586 +f 236 1528 505 +f 134 405 51 +f 1137 751 1073 +f 310 748 760 +f 1133 560 1202 +f 565 904 731 +f 777 359 1428 +f 1374 638 77 +f 59 920 1531 +f 863 298 1389 +f 768 800 828 +f 133 128 442 +f 1112 959 1078 +f 382 135 782 +f 521 1082 1047 +f 1089 1480 1088 +f 7 1534 949 +f 782 135 816 +f 1375 1478 559 +f 202 1505 1369 +f 929 867 1038 +f 1036 381 333 +f 1282 1268 1468 +f 164 106 1108 +f 1252 353 1301 +f 1511 1387 191 +f 131 11 224 +f 1091 362 517 +f 1021 592 626 +f 1341 1382 448 +f 664 49 673 +f 1433 1502 1501 +f 82 627 1205 +f 1243 1271 1281 +f 176 311 890 +f 1417 9 1508 +f 1366 32 1203 +f 18 887 1022 +f 942 125 1391 +f 839 426 355 +f 1395 40 1328 +f 1461 644 1451 +f 574 209 656 +f 622 846 1396 +f 31 49 35 +f 1257 336 1033 +f 728 218 407 +f 581 573 582 +f 1021 540 592 +f 1453 980 1354 +f 1345 1388 1331 +f 309 218 511 +f 1005 1366 679 +f 1260 790 414 +f 625 471 162 +f 1367 291 1454 +f 661 1397 181 +f 496 467 141 +f 1524 715 682 +f 850 642 333 +f 1479 918 337 +f 218 1228 511 +f 232 769 784 +f 395 429 1494 +f 821 299 444 +f 610 540 344 +f 317 1356 615 +f 267 1173 1133 +f 1492 1502 1446 +f 18 849 1014 +f 1026 556 1390 +f 238 19 816 +f 1191 1140 1322 +f 422 430 498 +f 1307 1458 1313 +f 1433 1402 1422 +f 748 772 760 +f 429 395 361 +f 373 843 116 +f 187 619 1486 +f 78 1327 1447 +f 465 407 309 +f 1283 717 1284 +f 849 18 1398 +f 575 569 674 +f 945 245 1364 +f 782 65 488 +f 774 781 1194 +f 610 750 606 +f 1196 75 194 +f 485 1178 197 +f 108 862 866 +f 474 1340 762 +f 1204 940 147 +f 597 1500 723 +f 48 145 47 +f 504 416 1226 +f 613 457 618 +f 162 586 629 +f 463 603 431 +f 309 407 218 +f 281 346 1485 +f 1252 1301 384 +f 1382 166 995 +f 815 788 1183 +f 473 203 1235 +f 727 66 479 +f 625 887 1014 +f 1242 712 752 +f 373 335 843 +f 1511 1001 1483 +f 1372 1493 941 +f 96 639 318 +f 820 709 320 +f 369 386 1021 +f 15 115 1519 +f 525 574 1153 +f 651 363 44 +f 491 1491 1057 +f 598 472 453 +f 459 458 1178 +f 312 313 293 +f 1079 1339 1419 +f 15 1519 942 +f 529 224 934 +f 1115 1125 1102 +f 480 422 424 +f 851 1396 846 +f 1454 199 45 +f 396 1371 399 +f 689 550 1259 +f 950 923 1266 +f 856 1525 81 +f 388 1045 315 +f 519 532 522 +f 479 66 357 +f 834 273 1130 +f 300 1398 974 +f 893 872 675 +f 821 1014 849 +f 1275 778 401 +f 1296 466 248 +f 134 63 99 +f 999 559 1278 +f 1359 1332 1326 +f 766 1137 1073 +f 773 120 124 +f 1352 271 1406 +f 839 355 83 +f 714 66 727 +f 1185 631 368 +f 261 269 72 +f 1207 1057 1542 +f 1426 1418 1537 +f 1084 1317 1247 +f 399 1371 1370 +f 1463 599 1464 +f 234 299 1113 +f 59 1347 920 +f 1213 731 703 +f 1060 417 416 +f 234 453 462 +f 908 898 882 +f 762 164 351 +f 270 193 824 +f 1386 1444 1450 +f 1108 1147 164 +f 712 1035 29 +f 500 1433 85 +f 1200 1528 437 +f 460 185 648 +f 339 594 482 +f 944 907 933 +f 675 872 1315 +f 1226 1123 385 +f 603 595 588 +f 197 290 1528 +f 1127 997 999 +f 684 697 718 +f 18 974 1398 +f 339 485 235 +f 750 610 344 +f 1340 474 358 +f 1178 485 482 +f 785 225 91 +f 984 1535 1408 +f 1268 1282 1347 +f 700 67 331 +f 319 1427 963 +f 192 222 242 +f 426 839 660 +f 1456 1126 1278 +f 1504 202 291 +f 296 21 1507 +f 742 684 718 +f 82 1332 1405 +f 421 621 1532 +f 849 1398 1091 +f 469 39 1054 +f 563 577 585 +f 103 622 1395 +f 1497 1409 1231 +f 566 630 1284 +f 171 222 192 +f 1135 1015 1092 +f 200 1513 409 +f 19 866 1184 +f 1113 299 1342 +f 1200 427 508 +f 261 242 222 +f 885 1133 1111 +f 1144 1233 938 +f 740 734 733 +f 450 479 357 +f 802 420 578 +f 501 449 784 +f 599 539 546 +f 588 584 594 +f 101 272 100 +f 259 308 239 +f 1528 236 197 +f 329 269 328 +f 913 453 472 +f 50 92 966 +f 525 1303 3 +f 235 197 236 +f 736 784 769 +f 272 327 268 +f 616 1033 336 +f 331 1206 700 +f 400 720 1073 +f 680 733 704 +f 450 586 467 +f 268 327 308 +f 339 604 594 +f 576 581 589 +f 1028 1259 550 +f 482 402 1178 +f 1197 1160 215 +f 111 37 38 +f 171 192 155 +f 505 508 232 +f 375 776 747 +f 1343 1309 1321 +f 261 222 170 +f 1189 1157 1172 +f 551 703 926 +f 1343 1368 1355 +f 465 309 430 +f 753 861 528 +f 465 430 91 +f 1492 1483 524 +f 1399 856 892 +f 496 450 467 +f 233 231 467 +f 1384 1362 626 +f 1017 101 102 +f 481 988 596 +f 329 1156 1171 +f 329 328 1156 +f 1139 72 329 +f 1172 1230 1221 +f 1230 1172 1171 +f 430 309 498 +f 1323 1 1279 +f 1171 1170 1188 +f 274 307 636 +f 66 692 713 +f 79 203 473 +f 1014 471 625 +f 849 1091 821 +f 1387 1511 1492 +f 269 308 328 +f 66 714 707 +f 1157 287 1139 +f 1180 197 458 +f 1488 1338 1348 +f 1354 195 223 +f 12 361 892 +f 1463 1464 191 +f 1377 599 1463 +f 540 1021 332 +f 1469 191 1387 +f 1220 1187 1229 +f 1230 1188 1240 +f 1188 1187 1220 +f 396 1403 1357 +f 1235 1159 411 +f 1150 243 262 +f 1278 559 1456 +f 1458 130 348 +f 517 593 609 +f 1208 443 290 +f 481 596 463 +f 1235 231 473 +f 512 402 482 +f 1504 62 1505 +f 45 906 1005 +f 876 1405 1415 +f 151 886 1274 +f 1442 1441 1493 +f 1169 1168 1187 +f 287 72 1139 +f 1525 658 69 +f 596 595 463 +f 124 1466 1481 +f 351 164 1147 +f 575 674 584 +f 431 957 463 +f 188 81 219 +f 102 94 176 +f 1336 223 195 +f 1241 1222 1221 +f 1221 1222 1189 +f 1120 883 898 +f 737 736 769 +f 1431 1421 1358 +f 936 95 825 +f 594 604 603 +f 501 431 449 +f 235 485 197 +f 51 405 16 +f 670 460 702 +f 411 957 497 +f 577 576 589 +f 354 1009 67 +f 480 784 785 +f 7 949 20 +f 1171 1156 1170 +f 1240 1188 1239 +f 1199 1174 1214 +f 1385 1329 1309 +f 501 497 431 +f 349 503 449 +f 407 1284 728 +f 494 1414 472 +f 785 784 736 +f 629 734 747 +f 424 422 468 +f 1365 1445 1459 +f 1178 458 197 +f 626 612 1195 +f 1503 290 1180 +f 1248 1317 1223 +f 655 241 859 +f 641 152 456 +f 1309 1239 1295 +f 1256 1220 1229 +f 531 173 777 +f 1331 1325 1345 +f 735 1258 1279 +f 1532 937 421 +f 153 172 193 +f 1470 1495 1012 +f 993 985 145 +f 1299 1094 1247 +f 303 1149 324 +f 1443 1434 1386 +f 1254 1212 798 +f 1239 1220 1256 +f 399 1440 1403 +f 1472 1438 1330 +f 1438 1472 1499 +f 1531 920 1532 +f 1279 1 735 +f 1 1280 735 +f 627 82 876 +f 822 52 1105 +f 1413 1316 1412 +f 143 1184 371 +f 545 530 536 +f 1254 1255 1212 +f 1256 1255 1269 +f 714 716 715 +f 1270 1240 1239 +f 1279 1270 1311 +f 1222 1214 1189 +f 1044 40 1396 +f 1325 1324 1345 +f 1105 902 1306 +f 674 570 258 +f 1081 1100 1249 +f 523 532 553 +f 1071 1066 1074 +f 1187 56 1229 +f 1031 1027 1013 +f 123 304 305 +f 859 241 1269 +f 1295 1256 1308 +f 1370 22 1330 +f 735 1230 1258 +f 1271 1222 1241 +f 824 193 1336 +f 996 782 488 +f 383 1199 1214 +f 537 519 542 +f 542 520 527 +f 537 561 532 +f 539 523 546 +f 1522 445 1107 +f 547 1237 1464 +f 371 1184 579 +f 1167 955 1212 +f 2 768 828 +f 735 1221 1230 +f 267 389 1515 +f 1112 70 1146 +f 523 522 532 +f 1262 1151 554 +f 1250 1100 1251 +f 364 1132 1084 +f 1132 1086 757 +f 207 1506 1549 +f 1355 1471 1455 +f 94 102 104 +f 399 1370 1438 +f 1256 1269 1308 +f 1239 1256 1295 +f 243 334 1003 +f 842 205 1003 +f 1415 1327 1512 +f 124 1481 1349 +f 720 924 766 +f 1088 214 229 +f 665 491 879 +f 1184 143 19 +f 519 537 532 +f 561 562 553 +f 705 1432 1192 +f 1369 1505 9 +f 388 1072 292 +f 1174 1199 334 +f 897 914 1415 +f 817 41 1316 +f 698 1313 1458 +f 516 519 41 +f 414 790 863 +f 561 537 543 +f 561 543 576 +f 546 523 553 +f 1157 1150 287 +f 580 316 330 +f 1086 1217 757 +f 825 95 1526 +f 705 1192 726 +f 22 1290 1311 +f 399 1403 396 +f 1512 1327 1352 +f 1275 886 681 +f 1316 41 522 +f 296 1533 21 +f 536 541 544 +f 527 530 542 +f 573 575 582 +f 303 304 1136 +f 1317 1264 1247 +f 1374 1521 53 +f 322 1145 283 +f 939 1522 1495 +f 850 1341 642 +f 352 706 105 +f 386 1361 332 +f 798 859 1254 +f 582 575 584 +f 726 1420 705 +f 1371 1323 1290 +f 1329 22 1289 +f 542 519 518 +f 549 543 542 +f 1386 1450 1443 +f 1425 1326 1407 +f 1377 1457 539 +f 1469 1463 191 +f 753 1483 1001 +f 1376 1435 1437 +f 753 528 524 +f 538 452 514 +f 846 852 1390 +f 980 1453 188 +f 319 1293 1427 +f 1493 13 1442 +f 653 997 1127 +f 536 544 569 +f 557 536 569 +f 965 1083 1082 +f 573 549 575 +f 545 536 557 +f 705 1420 1394 +f 543 549 573 +f 539 1316 523 +f 1279 1258 1240 +f 1020 1185 286 +f 396 1323 1371 +f 558 564 547 +f 396 1345 1312 +f 587 590 96 +f 1440 1460 1424 +f 318 583 587 +f 842 876 1406 +f 1459 640 1444 +f 672 1292 964 +f 270 153 193 +f 1050 787 1202 +f 569 544 548 +f 549 545 557 +f 1260 1140 1299 +f 549 557 575 +f 1316 522 523 +f 1377 1386 1457 +f 1340 358 864 +f 577 562 576 +f 1229 1212 1255 +f 1001 1511 1237 +f 634 117 1065 +f 1188 1220 1239 +f 1370 1290 22 +f 1280 1241 735 +f 1325 1281 1324 +f 1325 1272 1281 +f 76 53 23 +f 1512 876 1415 +f 1547 244 1542 +f 1124 1294 1477 +f 1223 403 1248 +f 582 588 595 +f 575 557 569 +f 549 542 545 +f 553 532 561 +f 1414 913 472 +f 1423 32 1527 +f 1368 117 634 +f 1483 753 524 +f 1370 1371 1290 +f 1512 1352 1406 +f 1289 1309 1329 +f 416 417 1226 +f 1279 1290 1323 +f 1409 62 1504 +f 1248 403 36 +f 1272 1297 1205 +f 268 1052 272 +f 918 1392 256 +f 569 568 674 +f 913 1414 988 +f 1213 703 551 +f 590 598 741 +f 539 599 1377 +f 851 814 1396 +f 914 1351 1327 +f 403 1470 1099 +f 895 814 721 +f 382 996 478 +f 470 452 591 +f 1358 726 1378 +f 725 726 1192 +f 669 322 283 +f 434 1425 665 +f 562 558 553 +f 1203 679 1366 +f 665 879 434 +f 563 583 564 +f 1287 1217 1086 +f 725 1378 726 +f 569 548 568 +f 1065 705 1487 +f 1209 675 1367 +f 1385 1309 1355 +f 1280 1324 1271 +f 825 488 1181 +f 24 189 949 +f 1414 583 563 +f 442 168 150 +f 1407 1326 1388 +f 660 302 1416 +f 489 1496 1151 +f 1242 441 92 +f 414 1389 831 +f 577 596 585 +f 1306 1260 1299 +f 36 1306 1299 +f 581 582 595 +f 632 1362 1384 +f 576 543 573 +f 564 558 563 +f 103 888 902 +f 598 590 494 +f 524 1548 1410 +f 620 1177 1261 +f 761 725 760 +f 93 317 217 +f 1353 515 1320 +f 1345 1324 1312 +f 876 842 627 +f 1382 850 116 +f 577 589 596 +f 481 913 988 +f 1147 1108 644 +f 598 446 1029 +f 639 861 783 +f 1057 1207 491 +f 487 1056 380 +f 517 299 1091 +f 313 316 580 +f 1343 515 1368 +f 1385 1355 1455 +f 957 1159 481 +f 526 645 415 +f 494 472 598 +f 585 1414 563 +f 571 419 1042 +f 913 462 453 +f 813 651 1044 +f 1515 1526 611 +f 172 243 711 +f 1477 1339 1100 +f 1077 1160 930 +f 1434 1412 1457 +f 117 591 654 +f 1261 1181 620 +f 1297 1272 1325 +f 1178 402 447 +f 1001 1237 555 +f 475 503 349 +f 913 481 462 +f 1536 1372 1488 +f 353 1252 1177 +f 205 1406 271 +f 1425 1407 693 +f 130 1008 42 +f 795 363 781 +f 1287 445 1217 +f 31 35 529 +f 1531 1532 315 +f 1218 1163 397 +f 567 674 568 +f 562 561 576 +f 897 896 914 +f 973 953 976 +f 648 638 460 +f 794 761 228 +f 1424 1388 1357 +f 1438 1507 21 +f 1475 1533 865 +f 78 1453 1352 +f 1133 1173 560 +f 368 631 451 +f 2 828 877 +f 258 570 512 +f 264 689 1259 +f 357 586 450 +f 1102 1011 1103 +f 1533 1475 177 +f 248 466 211 +f 1530 1529 144 +f 783 555 1310 +f 594 584 258 +f 551 926 1063 +f 741 598 1029 +f 563 558 562 +f 43 1467 671 +f 223 271 1354 +f 385 369 504 +f 360 965 860 +f 1411 1430 1358 +f 902 790 1260 +f 576 573 581 +f 310 760 1432 +f 528 861 476 +f 610 609 593 +f 795 781 745 +f 1222 383 1214 +f 420 717 1283 +f 1215 1266 921 +f 620 211 466 +f 1195 612 1468 +f 399 1438 21 +f 380 1225 487 +f 414 831 1260 +f 1532 920 1347 +f 904 613 1068 +f 1472 1330 1385 +f 761 760 228 +f 234 462 203 +f 896 897 25 +f 1099 1012 1105 +f 542 530 545 +f 1432 760 725 +f 1295 1321 1309 +f 1514 1394 500 +f 1381 1349 279 +f 1287 839 83 +f 1484 1491 693 +f 87 319 86 +f 588 582 584 +f 1067 319 688 +f 1484 693 1460 +f 1332 1359 26 +f 1096 1474 1070 +f 219 88 1040 +f 59 1531 125 +f 125 1531 34 +f 1541 1063 926 +f 1518 125 942 +f 49 31 673 +f 533 1080 356 +f 835 943 656 +f 1342 299 517 +f 593 592 610 +f 826 820 812 +f 457 565 436 +f 1042 1011 1018 +f 1498 1455 1487 +f 1514 1487 1394 +f 21 1440 399 +f 1286 400 371 +f 1375 600 71 +f 764 1181 1261 +f 626 618 1021 +f 383 1243 1272 +f 618 911 613 +f 1490 29 1035 +f 802 695 420 +f 50 966 1035 +f 547 1464 558 +f 332 344 540 +f 6 1537 1418 +f 30 1474 410 +f 533 356 1340 +f 675 838 1344 +f 664 1319 137 +f 751 1034 1073 +f 129 133 150 +f 403 1099 1128 +f 911 1362 509 +f 1091 246 362 +f 194 1063 1039 +f 117 654 1236 +f 733 691 740 +f 54 486 671 +f 1388 1326 1331 +f 1322 1376 1087 +f 488 65 1181 +f 28 672 964 +f 208 319 1067 +f 1209 1367 1454 +f 939 1223 1217 +f 257 1333 1462 +f 208 1257 724 +f 934 1366 906 +f 1448 1335 1076 +f 1258 1230 1240 +f 859 440 655 +f 1536 1488 1348 +f 781 363 1194 +f 1033 351 1257 +f 1024 359 1098 +f 1240 1270 1279 +f 746 740 732 +f 1228 728 716 +f 1469 1365 1444 +f 211 65 455 +f 1012 1218 822 +f 816 135 238 +f 1410 1548 297 +f 753 1001 783 +f 1072 552 916 +f 1094 1162 1247 +f 1507 1438 1499 +f 1292 1314 964 +f 503 235 236 +f 730 98 344 +f 119 1425 434 +f 444 473 1305 +f 651 813 363 +f 1410 1501 1492 +f 864 358 413 +f 1035 712 50 +f 113 133 123 +f 1304 108 306 +f 1046 95 936 +f 751 455 1034 +f 1502 1433 1422 +f 1339 1079 1100 +f 24 25 119 +f 1077 1263 489 +f 38 14 1429 +f 270 1125 1115 +f 248 455 755 +f 303 350 366 +f 321 1122 1048 +f 1249 1100 1250 +f 1232 1250 1287 +f 1292 672 846 +f 1223 1495 1470 +f 90 55 92 +f 99 63 1265 +f 274 285 1276 +f 354 67 700 +f 1029 606 750 +f 648 185 619 +f 98 384 96 +f 460 670 185 +f 1098 359 1061 +f 1396 1395 622 +f 1259 1028 1069 +f 1300 143 400 +f 1238 1009 620 +f 1285 331 67 +f 1225 380 289 +f 460 638 1234 +f 1003 711 243 +f 275 322 669 +f 965 360 89 +f 1332 1297 1326 +f 602 1486 1037 +f 520 542 518 +f 1245 277 685 +f 56 1187 1168 +f 635 948 409 +f 1242 92 50 +f 1381 1449 1349 +f 1276 307 274 +f 1451 144 257 +f 949 189 20 +f 610 592 540 +f 546 553 558 +f 181 1397 210 +f 340 378 341 +f 612 626 592 +f 289 1058 1238 +f 941 1447 643 +f 700 1206 353 +f 1212 56 1167 +f 348 130 42 +f 640 1380 1444 +f 7 1348 1534 +f 51 16 71 +f 1314 622 103 +f 417 1123 1226 +f 158 256 624 +f 971 791 571 +f 89 360 513 +f 392 880 432 +f 1125 1110 1102 +f 1193 1087 1162 +f 470 440 1267 +f 648 77 638 +f 1524 1479 694 +f 692 765 713 +f 418 1392 918 +f 1164 1076 1335 +f 354 700 353 +f 755 1137 249 +f 1376 1437 1201 +f 731 904 703 +f 1442 81 1441 +f 677 696 697 +f 716 717 695 +f 696 676 630 +f 630 676 1283 +f 630 225 696 +f 1458 348 698 +f 698 348 178 +f 477 1497 111 +f 629 490 734 +f 697 684 178 +f 699 1313 698 +f 1313 699 1307 +f 751 1137 755 +f 1458 1307 255 +f 1160 489 1262 +f 1528 443 437 +f 1095 159 1020 +f 1465 844 1211 +f 285 580 1233 +f 1218 1495 1522 +f 1286 74 400 +f 374 928 245 +f 789 416 504 +f 478 1145 382 +f 460 487 683 +f 1201 215 1087 +f 384 639 96 +f 1515 478 1526 +f 1494 37 1538 +f 277 43 685 +f 869 881 278 +f 337 692 707 +f 727 1142 511 +f 337 786 765 +f 715 716 695 +f 1524 682 869 +f 717 420 695 +f 451 160 368 +f 1283 499 420 +f 1486 213 1037 +f 699 698 742 +f 852 846 672 +f 698 684 742 +f 1458 492 130 +f 903 884 1356 +f 476 861 1301 +f 783 861 753 +f 1448 280 1025 +f 1513 200 206 +f 509 1363 602 +f 703 1068 926 +f 765 680 713 +f 1494 1506 395 +f 158 27 680 +f 1314 52 964 +f 1305 821 444 +f 624 804 158 +f 704 734 490 +f 919 256 786 +f 927 415 469 +f 731 1050 789 +f 356 1080 164 +f 690 1494 597 +f 498 495 496 +f 676 829 628 +f 465 91 566 +f 177 1475 1484 +f 697 451 677 +f 1238 1058 67 +f 1063 194 551 +f 756 769 438 +f 460 1234 1056 +f 1063 1541 1037 +f 406 1018 1010 +f 187 1485 346 +f 21 177 1440 +f 911 618 626 +f 14 4 1429 +f 747 734 740 +f 732 723 763 +f 786 256 158 +f 252 253 637 +f 676 628 1283 +f 1228 716 727 +f 1284 717 728 +f 479 450 1142 +f 743 699 742 +f 1083 965 321 +f 930 1160 1197 +f 638 1374 1234 +f 823 848 840 +f 1018 805 1042 +f 1470 403 1223 +f 691 708 732 +f 1231 1409 1344 +f 1236 705 1065 +f 1262 554 215 +f 1143 149 1141 +f 252 418 918 +f 1181 65 211 +f 1301 861 1302 +f 278 301 190 +f 1335 1448 1025 +f 1194 363 813 +f 438 427 230 +f 1014 821 471 +f 801 792 230 +f 246 372 552 +f 1306 1099 1105 +f 1328 863 790 +f 179 125 34 +f 1322 1193 1191 +f 245 945 374 +f 213 29 1490 +f 1374 77 1521 +f 674 567 570 +f 958 1537 6 +f 932 931 943 +f 1189 1172 1221 +f 1206 528 353 +f 1140 826 812 +f 215 1201 1197 +f 53 76 1318 +f 704 713 680 +f 162 776 625 +f 586 357 490 +f 490 713 704 +f 1477 1100 554 +f 1479 337 707 +f 714 727 716 +f 178 684 698 +f 756 742 737 +f 647 42 1008 +f 1314 1104 52 +f 134 174 405 +f 143 371 400 +f 1296 249 1334 +f 1335 1516 1164 +f 5 1185 160 +f 1223 757 1217 +f 92 441 90 +f 194 75 80 +f 1384 626 1195 +f 597 708 690 +f 1459 58 640 +f 1231 1344 379 +f 563 562 577 +f 626 1362 911 +f 756 737 769 +f 1122 321 513 +f 720 400 74 +f 1516 1296 1164 +f 185 670 29 +f 1197 1076 930 +f 1162 1084 1247 +f 1285 1058 380 +f 55 320 298 +f 1520 464 960 +f 184 442 183 +f 469 415 433 +f 1031 1013 1019 +f 1249 1232 1132 +f 433 661 1097 +f 836 855 847 +f 240 147 796 +f 162 1305 233 +f 629 747 776 +f 227 1393 853 +f 115 15 889 +f 85 1433 296 +f 608 1000 653 +f 555 783 1001 +f 703 904 1068 +f 599 546 1464 +f 505 449 503 +f 225 785 736 +f 1183 788 763 +f 611 483 1515 +f 701 686 699 +f 246 1398 300 +f 1437 710 280 +f 29 213 185 +f 297 1501 1410 +f 213 619 185 +f 298 40 651 +f 258 512 482 +f 918 1479 253 +f 445 83 355 +f 150 165 129 +f 1485 187 602 +f 1242 812 441 +f 55 966 92 +f 915 1224 901 +f 1469 1377 1463 +f 763 746 732 +f 893 675 1209 +f 629 586 490 +f 1238 620 466 +f 1184 1416 302 +f 1058 1285 67 +f 1306 36 1128 +f 11 1462 1423 +f 784 449 232 +f 466 1296 1516 +f 1142 495 511 +f 1527 224 1423 +f 792 438 230 +f 1482 1107 1465 +f 1310 555 564 +f 175 417 611 +f 1496 489 924 +f 1501 297 1433 +f 306 322 275 +f 761 794 773 +f 683 702 460 +f 382 782 996 +f 1526 175 611 +f 1050 560 789 +f 1314 103 1104 +f 1402 1433 500 +f 412 325 266 +f 1168 1148 327 +f 1077 1219 1334 +f 1316 1413 817 +f 1516 1335 289 +f 1142 496 495 +f 966 1039 1035 +f 1462 1461 1451 +f 1339 1477 1294 +f 1081 554 1100 +f 1088 229 502 +f 423 64 1098 +f 758 1050 731 +f 731 1213 738 +f 1196 1093 745 +f 840 841 811 +f 357 713 490 +f 357 66 713 +f 1104 902 1105 +f 1550 768 887 +f 1183 827 815 +f 1190 1186 166 +f 249 766 1263 +f 511 495 309 +f 1387 1446 1436 +f 630 91 225 +f 1533 296 865 +f 695 682 715 +f 706 1134 1107 +f 889 1429 4 +f 1151 1262 489 +f 294 285 1233 +f 669 1276 275 +f 1123 1361 385 +f 298 44 55 +f 827 847 815 +f 675 827 838 +f 799 759 775 +f 1158 1448 1076 +f 1162 1087 215 +f 1140 752 1435 +f 930 1076 1219 +f 771 756 792 +f 820 320 90 +f 706 445 355 +f 1054 277 804 +f 870 1061 1480 +f 161 1155 272 +f 1419 1339 1294 +f 826 1260 831 +f 136 316 844 +f 745 781 774 +f 1416 1184 866 +f 944 250 1383 +f 1428 1546 878 +f 1159 957 411 +f 1545 816 1034 +f 702 683 710 +f 683 487 1025 +f 757 1084 1132 +f 1435 1322 1140 +f 742 718 737 +f 1436 1446 1431 +f 1459 1381 58 +f 513 805 1018 +f 859 1269 1254 +f 854 301 423 +f 1074 1053 107 +f 1307 699 686 +f 1140 1191 1299 +f 1400 658 1439 +f 283 1145 478 +f 1152 152 886 +f 1231 1539 1497 +f 375 747 759 +f 1341 448 1000 +f 666 938 330 +f 208 86 319 +f 933 376 601 +f 42 647 5 +f 1098 854 423 +f 501 480 424 +f 372 300 30 +f 126 140 157 +f 329 72 269 +f 220 571 791 +f 795 55 44 +f 5 286 1185 +f 421 977 246 +f 64 1024 1098 +f 813 1396 814 +f 137 1350 131 +f 474 650 288 +f 182 167 157 +f 489 1160 1077 +f 799 775 808 +f 138 1024 1095 +f 38 429 1473 +f 933 601 944 +f 1428 359 780 +f 33 1020 159 +f 635 833 392 +f 1438 1370 1330 +f 138 1546 780 +f 620 354 1177 +f 755 1296 248 +f 45 31 906 +f 1305 162 471 +f 1375 1530 1478 +f 1045 336 208 +f 1478 1530 106 +f 1459 1444 1365 +f 1364 229 1517 +f 999 1278 1127 +f 1423 1333 174 +f 325 412 691 +f 1480 214 1088 +f 625 776 375 +f 220 265 432 +f 655 440 454 +f 1364 245 1373 +f 521 1047 661 +f 823 840 810 +f 30 300 974 +f 811 828 800 +f 1173 416 560 +f 415 245 526 +f 1524 694 715 +f 351 1033 837 +f 804 1510 1054 +f 347 338 288 +f 151 1259 886 +f 1163 1107 1482 +f 935 961 956 +f 1522 1107 1218 +f 378 1161 341 +f 1476 658 1397 +f 1352 1327 78 +f 931 910 954 +f 830 486 54 +f 1510 927 469 +f 245 928 526 +f 620 1009 354 +f 1538 37 111 +f 770 1506 207 +f 1518 1519 1268 +f 676 696 677 +f 944 130 492 +f 1546 1428 780 +f 1531 688 1509 +f 1072 916 689 +f 891 932 915 +f 292 616 336 +f 650 347 288 +f 552 372 916 +f 1072 388 621 +f 169 992 147 +f 211 620 1181 +f 787 774 1194 +f 210 132 181 +f 1041 1048 1122 +f 788 746 763 +f 763 723 1500 +f 903 376 623 +f 1 1312 1324 +f 1400 132 210 +f 912 1346 907 +f 297 1548 865 +f 351 724 1257 +f 950 935 923 +f 1118 1114 1103 +f 1158 280 1448 +f 860 645 526 +f 927 418 502 +f 259 268 308 +f 1095 64 159 +f 190 870 637 +f 731 738 758 +f 1397 1083 1476 +f 1485 10 281 +f 181 132 1467 +f 1195 1519 1384 +f 788 809 808 +f 799 811 800 +f 775 746 808 +f 375 759 800 +f 391 1548 528 +f 360 374 805 +f 531 265 1404 +f 1025 280 683 +f 689 916 550 +f 1474 1303 1069 +f 112 102 101 +f 125 1518 59 +f 919 918 256 +f 761 1411 1378 +f 1508 9 1505 +f 302 1277 1251 +f 445 939 1217 +f 721 834 1130 +f 671 132 54 +f 1031 254 1043 +f 767 948 432 +f 1526 95 175 +f 874 1540 1216 +f 1020 286 317 +f 770 830 1506 +f 1027 1030 922 +f 410 916 372 +f 204 409 216 +f 327 1155 1168 +f 822 964 52 +f 340 304 303 +f 220 432 880 +f 1183 763 379 +f 679 1266 1215 +f 644 1461 87 +f 199 673 45 +f 1033 616 510 +f 216 138 93 +f 181 39 661 +f 1090 833 435 +f 926 1068 1541 +f 360 860 928 +f 238 322 306 +f 8 770 412 +f 1373 245 415 +f 624 1392 1510 +f 191 1237 1511 +f 412 770 207 +f 76 1207 1318 +f 680 786 158 +f 877 828 841 +f 847 1540 1165 +f 1474 1179 1303 +f 791 945 652 +f 1072 264 292 +f 160 42 5 +f 1274 886 1275 +f 1334 1263 1077 +f 609 1342 517 +f 858 860 965 +f 722 877 1291 +f 1298 193 172 +f 1288 929 951 +f 204 200 409 +f 1457 1316 539 +f 857 848 871 +f 625 375 1550 +f 223 1298 271 +f 1300 400 1073 +f 1110 1010 1102 +f 628 499 1283 +f 1327 1415 914 +f 980 195 1354 +f 803 1336 195 +f 1361 1123 175 +f 1396 813 1044 +f 374 971 805 +f 1431 1358 1430 +f 24 23 189 +f 857 841 840 +f 679 1215 1005 +f 1061 173 1480 +f 990 276 806 +f 752 710 1435 +f 907 944 659 +f 685 43 671 +f 802 682 695 +f 199 1454 291 +f 1260 1306 902 +f 1106 824 687 +f 635 392 432 +f 342 1146 70 +f 410 1028 550 +f 425 250 492 +f 531 767 265 +f 1018 406 513 +f 833 1090 392 +f 103 902 1104 +f 15 461 889 +f 937 1347 1282 +f 1287 83 445 +f 264 1259 151 +f 294 282 285 +f 138 780 1024 +f 1473 14 38 +f 1107 1134 1465 +f 114 958 1473 +f 836 1315 873 +f 1540 874 871 +f 848 857 840 +f 1116 142 139 +f 278 881 301 +f 1216 1540 855 +f 1510 804 624 +f 1338 1337 914 +f 1163 1218 1107 +f 224 11 1423 +f 621 421 552 +f 1064 1476 1083 +f 1090 880 392 +f 851 846 1390 +f 274 293 285 +f 1022 974 18 +f 522 41 519 +f 504 369 565 +f 644 87 86 +f 1014 887 18 +f 433 1097 469 +f 1054 39 1055 +f 1508 179 34 +f 1089 502 252 +f 934 224 1527 +f 314 423 578 +f 1399 892 361 +f 292 1045 388 +f 1445 1449 1381 +f 1299 1191 1094 +f 1153 456 1069 +f 1249 1132 1081 +f 1245 8 266 +f 28 397 276 +f 477 1409 1497 +f 1022 887 768 +f 917 492 251 +f 1229 56 1212 +f 967 952 982 +f 1540 847 855 +f 1291 901 722 +f 871 848 1540 +f 871 874 891 +f 945 1452 374 +f 1430 1401 1431 +f 178 348 160 +f 682 802 881 +f 255 251 1458 +f 1360 1349 1449 +f 664 35 49 +f 1298 1336 193 +f 1266 370 950 +f 209 247 1224 +f 31 45 673 +f 597 1539 1500 +f 277 1054 43 +f 996 1526 478 +f 315 1532 388 +f 656 662 574 +f 190 637 278 +f 429 37 1494 +f 1267 899 470 +f 184 183 221 +f 1405 876 82 +f 1472 1455 1498 +f 812 1242 752 +f 60 1295 1308 +f 1297 1332 82 +f 1509 1417 1508 +f 650 1033 510 +f 1108 1451 644 +f 502 229 1373 +f 687 824 1002 +f 1336 1298 223 +f 944 492 250 +f 552 421 246 +f 531 777 767 +f 137 35 664 +f 1547 1484 1475 +f 342 70 343 +f 252 502 418 +f 432 265 767 +f 935 950 951 +f 1296 755 249 +f 157 156 182 +f 248 211 455 +f 652 265 220 +f 1263 1334 249 +f 179 1391 125 +f 877 841 1291 +f 206 184 221 +f 767 777 878 +f 229 214 1517 +f 898 909 1120 +f 910 900 925 +f 87 1293 319 +f 343 818 778 +f 875 915 901 +f 132 671 1467 +f 1117 1131 1109 +f 1227 106 164 +f 1347 1518 1268 +f 721 1130 1111 +f 85 1514 500 +f 870 1480 1089 +f 69 658 1476 +f 791 652 220 +f 928 860 526 +f 415 927 1373 +f 1346 323 1085 +f 766 924 1263 +f 893 45 649 +f 900 874 1216 +f 649 45 1005 +f 874 900 891 +f 1034 455 1545 +f 256 1392 624 +f 1020 1004 1095 +f 264 1072 689 +f 1532 1347 937 +f 1462 1451 257 +f 96 590 750 +f 1011 1019 1118 +f 416 789 560 +f 973 976 981 +f 878 948 767 +f 488 825 996 +f 610 606 609 +f 1550 800 768 +f 1006 1210 1129 +f 882 872 649 +f 891 931 932 +f 947 910 925 +f 959 943 931 +f 283 1515 389 +f 1103 1011 1118 +f 257 16 1333 +f 918 919 337 +f 944 1383 659 +f 673 291 202 +f 925 1120 935 +f 915 871 891 +f 446 606 1029 +f 135 322 238 +f 954 962 931 +f 818 343 70 +f 959 1112 943 +f 883 1120 1216 +f 886 1259 1152 +f 958 14 1473 +f 166 116 843 +f 1315 872 873 +f 1352 1354 271 +f 237 288 338 +f 1462 11 61 +f 347 1275 401 +f 1067 315 1045 +f 196 206 200 +f 112 110 118 +f 1037 213 1490 +f 998 993 997 +f 967 1078 959 +f 641 574 662 +f 483 611 1173 +f 3 1303 1179 +f 1028 410 1474 +f 1068 613 911 +f 1 1323 1312 +f 833 1513 1246 +f 636 307 273 +f 131 1427 1293 +f 1337 1351 914 +f 1511 1483 1492 +f 1423 174 32 +f 196 184 206 +f 1422 1446 1502 +f 419 254 263 +f 174 99 32 +f 925 900 1216 +f 947 925 946 +f 347 1274 1275 +f 1057 1547 1542 +f 1224 247 722 +f 1549 690 605 +f 28 822 397 +f 1341 1000 608 +f 645 1082 521 +f 901 1224 722 +f 662 729 1146 +f 247 209 525 +f 1112 729 943 +f 334 1150 1174 +f 194 1039 1196 +f 1027 797 1013 +f 1203 32 1038 +f 1293 61 131 +f 943 835 915 +f 915 932 943 +f 802 578 881 +f 178 451 697 +f 858 1082 645 +f 731 789 565 +f 1057 1491 1547 +f 486 8 685 +f 1349 1360 124 +f 32 99 1038 +f 1362 632 509 +f 1203 1038 867 +f 946 956 972 +f 715 694 714 +f 1040 69 1048 +f 1031 1019 263 +f 1285 1056 284 +f 358 474 288 +f 276 807 806 +f 639 384 1302 +f 873 855 836 +f 1246 206 221 +f 931 962 952 +f 43 1055 1467 +f 213 1486 619 +f 961 951 960 +f 295 969 961 +f 1283 1284 630 +f 738 1213 80 +f 295 989 991 +f 953 946 972 +f 510 616 151 +f 137 131 224 +f 190 301 854 +f 998 991 989 +f 151 1274 510 +f 891 910 931 +f 106 1530 144 +f 332 1361 1046 +f 681 343 778 +f 288 663 358 +f 469 1054 1510 +f 1520 1038 99 +f 969 991 994 +f 951 929 960 +f 855 873 883 +f 601 903 647 +f 656 209 835 +f 923 921 1266 +f 273 556 636 +f 1000 979 994 +f 837 1033 650 +f 981 448 986 +f 401 778 819 +f 342 662 1146 +f 929 1288 867 +f 974 1022 1070 +f 910 947 954 +f 1106 270 824 +f 474 837 650 +f 956 961 969 +f 214 1480 1404 +f 969 994 972 +f 881 578 301 +f 967 982 1186 +f 204 217 615 +f 951 961 935 +f 897 1415 1405 +f 982 952 962 +f 166 1186 987 +f 993 998 985 +f 973 954 953 +f 1250 1232 1249 +f 994 979 976 +f 587 583 494 +f 960 968 961 +f 71 16 1375 +f 870 252 637 +f 976 972 994 +f 351 1147 724 +f 986 995 987 +f 295 961 968 +f 969 972 956 +f 48 63 134 +f 510 347 650 +f 829 631 628 +f 1253 1408 1535 +f 1186 1190 983 +f 954 973 962 +f 201 390 967 +f 166 1121 1190 +f 209 574 525 +f 630 566 91 +f 1018 1102 1010 +f 106 1227 17 +f 960 464 968 +f 265 1517 1404 +f 1436 1431 1401 +f 1186 201 967 +f 641 662 342 +f 987 973 986 +f 729 1112 1146 +f 976 979 981 +f 1153 1303 525 +f 16 1529 1375 +f 975 1535 968 +f 1265 464 1520 +f 1416 493 1144 +f 968 1535 295 +f 457 436 618 +f 991 969 295 +f 894 991 998 +f 1137 766 249 +f 984 145 985 +f 979 1000 981 +f 843 1121 166 +f 843 335 845 +f 201 1186 983 +f 448 981 1000 +f 985 989 1535 +f 450 496 1142 +f 381 864 832 +f 982 962 973 +f 455 65 1545 +f 144 1529 257 +f 989 985 998 +f 762 1340 356 +f 164 762 356 +f 661 433 521 +f 995 986 448 +f 1036 533 1340 +f 1520 99 1265 +f 1080 749 1227 +f 1321 1295 60 +f 69 88 1525 +f 1147 86 724 +f 373 116 832 +f 1127 1278 1126 +f 1272 627 383 +f 67 1009 1238 +f 559 600 1375 +f 894 997 653 +f 215 364 1162 +f 88 69 1040 +f 1182 259 796 +f 1346 933 907 +f 1298 711 271 +f 880 1090 254 +f 1030 226 922 +f 419 263 1042 +f 127 126 167 +f 305 113 123 +f 372 246 300 +f 282 275 1276 +f 750 344 98 +f 1332 198 1405 +f 866 862 493 +f 1013 797 1109 +f 1302 861 639 +f 1231 1500 1539 +f 670 712 29 +f 1298 172 711 +f 1025 1225 1335 +f 1064 321 1048 +f 805 571 1042 +f 971 1452 791 +f 1119 1109 1114 +f 1421 1431 1422 +f 797 1027 922 +f 1111 895 721 +f 1384 1519 115 +f 380 1058 289 +f 617 338 367 +f 1492 524 1410 +f 1135 304 1006 +f 1436 1365 1387 +f 865 244 1475 +f 113 128 133 +f 1508 1505 179 +f 564 583 318 +f 707 694 1479 +f 1233 1144 493 +f 1129 345 1053 +f 786 337 919 +f 1225 289 1335 +f 15 942 1391 +f 69 1064 1048 +f 1097 39 469 +f 1125 1106 1110 +f 1040 365 219 +f 571 805 971 +f 762 351 837 +f 358 335 373 +f 1441 941 1493 +f 177 21 1533 +f 1475 244 1547 +f 1424 1403 1440 +f 135 1145 322 +f 113 122 128 +f 1320 655 454 +f 153 154 171 +f 685 8 1245 +f 1492 1501 1502 +f 312 293 636 +f 1361 175 1046 +f 246 1091 1398 +f 1041 1002 1040 +f 854 870 190 +f 1011 1042 263 +f 687 1010 1110 +f 76 879 491 +f 1206 391 528 +f 623 1543 345 +f 1543 376 107 +f 25 949 896 +f 318 1310 564 +f 365 980 188 +f 159 314 33 +f 1338 914 896 +f 4 1273 889 +f 311 798 890 +f 1062 1399 779 +f 1537 10 1379 +f 1367 675 1344 +f 195 1002 803 +f 202 1369 673 +f 1491 491 665 +f 23 879 76 +f 513 321 89 +f 1043 1090 1030 +f 412 266 8 +f 1209 1454 893 +f 1543 623 376 +f 678 593 517 +f 205 842 1406 +f 1542 1548 391 +f 665 693 1491 +f 1484 1547 1491 +f 1319 1369 9 +f 565 457 904 +f 188 219 365 +f 1350 137 1319 +f 163 180 196 +f 1031 263 254 +f 1019 1013 1119 +f 1457 1412 1316 +f 345 1210 853 +f 687 1110 1106 +f 496 141 498 +f 811 841 828 +f 1247 1264 1299 +f 1099 1306 1128 +f 404 1372 1536 +f 338 401 367 +f 405 1333 16 +f 151 616 264 +f 687 1002 1041 +f 764 825 1181 +f 1441 1447 941 +f 297 296 1433 +f 623 884 903 +f 376 933 1346 +f 1271 1241 1280 +f 233 586 162 +f 336 1257 208 +f 1233 580 938 +f 1162 1094 1193 +f 4 6 1273 +f 406 1010 687 +f 1002 824 803 +f 1346 912 323 +f 113 186 122 +f 166 1382 116 +f 998 997 894 +f 434 879 23 +f 528 1548 524 +f 1446 1387 1492 +f 331 1318 1207 +f 135 382 1145 +f 32 934 1527 +f 386 369 385 +f 601 1008 944 +f 1539 597 1538 +f 291 1367 1409 +f 107 1053 1543 +f 486 830 770 +f 1091 299 821 +f 822 1218 397 +f 1312 1323 396 +f 13 1372 404 +f 509 1068 911 +f 627 1199 383 +f 963 1509 688 +f 115 889 1273 +f 452 484 514 +f 71 146 51 +f 243 1150 334 +f 1114 1118 1119 +f 156 797 922 +f 1381 1459 1445 +f 1488 1489 1338 +f 1439 1062 1400 +f 77 648 619 +f 1059 1074 1066 +f 615 1393 204 +f 853 1393 623 +f 985 1535 984 +f 498 309 495 +f 934 906 529 +f 559 1478 1456 +f 632 1384 1273 +f 1418 1363 632 +f 483 1173 267 +f 214 1404 1517 +f 252 870 1089 +f 360 805 513 +f 147 940 796 +f 1047 1082 1083 +f 1388 1424 1407 +f 1499 1498 1514 +f 815 809 788 +f 935 1120 923 +f 688 315 1067 +f 560 1050 1202 +f 1070 1474 30 +f 114 404 958 +f 280 710 683 +f 1122 406 687 +f 81 88 219 +f 1498 1499 1472 +f 69 1476 1064 +f 1062 54 1400 +f 186 311 94 +f 278 637 253 +f 54 132 1400 +f 325 691 733 +f 289 1238 1516 +f 101 161 272 +f 227 196 204 +f 1409 1504 291 +f 324 350 303 +f 800 1550 375 +f 745 1093 795 +f 40 298 863 +f 1201 1437 1158 +f 721 814 834 +f 1539 111 1497 +f 1505 62 179 +f 477 62 1409 +f 1484 1460 177 +f 395 779 361 +f 605 690 708 +f 1399 1439 856 +f 1324 1281 1271 +f 922 226 182 +f 1088 502 1089 +f 884 615 1356 +f 791 1452 945 +f 831 709 826 +f 456 1152 1069 +f 956 925 935 +f 1041 1040 1048 +f 838 1183 379 +f 129 1006 123 +f 787 758 774 +f 1439 1399 1062 +f 1364 1373 229 +f 446 1342 609 +f 816 1545 782 +f 1032 1053 1059 +f 1337 1489 643 +f 1379 10 1485 +f 1426 1379 1363 +f 77 619 346 +f 1136 1149 303 +f 57 58 1381 +f 1439 1525 856 +f 39 1097 661 +f 1083 321 1064 +f 868 672 28 +f 928 374 360 +f 17 1478 106 +f 1154 1204 147 +f 156 1116 1117 +f 1548 1542 244 +f 1447 1441 78 +f 1006 129 1210 +f 81 1442 856 +f 1051 1182 940 +f 1085 107 1346 +f 860 858 645 +f 1485 602 1379 +f 6 14 958 +f 1074 1059 1053 +f 666 667 938 +f 109 189 23 +f 1202 895 1111 +f 1222 1271 1243 +f 1122 687 1041 +f 645 433 415 +f 88 81 1525 +f 780 359 1024 +f 999 600 559 +f 1125 270 1106 +f 243 172 970 +f 155 192 970 +f 1194 895 787 +f 362 246 977 +f 1369 664 673 +f 1067 1045 208 +f 578 423 301 +f 1508 34 1509 +f 473 231 1305 +f 1404 1480 173 +f 1087 1193 1322 +f 1391 461 15 +f 51 146 48 +f 1047 1083 1397 +f 32 1366 934 +f 261 170 240 +f 1411 1360 1430 +f 616 292 264 +f 779 395 1506 +f 915 875 871 +f 970 262 243 +f 154 1115 149 +f 1509 34 1531 +f 904 457 613 +f 267 1515 483 +f 1348 7 10 +f 155 970 172 +f 87 1461 61 +f 891 900 910 +f 977 421 937 +f 702 712 670 +f 23 53 109 +f 96 750 98 +f 916 410 550 +f 1431 1446 1422 +f 1015 1053 1032 +f 1019 1119 1118 +f 1234 284 1056 +f 284 331 1285 +f 12 114 1473 +f 649 908 882 +f 892 856 1442 +f 992 1143 1154 +f 711 205 271 +# 3000 faces, 0 coords texture + +# End of File From bbafbd90d9b79628b19b8690345c0f3fc6eed88f Mon Sep 17 00:00:00 2001 From: ahmed Date: Fri, 19 Jul 2024 11:15:40 -0400 Subject: [PATCH 34/96] apps templates --- apps/ARAP/CMakeLists.txt | 22 ++++++++++++++++++++++ apps/ARAP/arap.cu | 21 +++++++++++++++++++++ apps/CMakeLists.txt | 3 +++ apps/Heat/CMakeLists.txt | 22 ++++++++++++++++++++++ apps/Heat/heat.cu | 21 +++++++++++++++++++++ apps/SCP/CMakeLists.txt | 22 ++++++++++++++++++++++ apps/SCP/scp.cu | 21 +++++++++++++++++++++ 7 files changed, 132 insertions(+) create mode 100644 apps/ARAP/CMakeLists.txt create mode 100644 apps/ARAP/arap.cu create mode 100644 apps/Heat/CMakeLists.txt create mode 100644 apps/Heat/heat.cu create mode 100644 apps/SCP/CMakeLists.txt create mode 100644 apps/SCP/scp.cu diff --git a/apps/ARAP/CMakeLists.txt b/apps/ARAP/CMakeLists.txt new file mode 100644 index 00000000..80b46184 --- /dev/null +++ b/apps/ARAP/CMakeLists.txt @@ -0,0 +1,22 @@ +add_executable(ARAP) + +set(SOURCE_LIST + arap.cu +) + +target_sources(ARAP + PRIVATE + ${SOURCE_LIST} +) + +set_target_properties(ARAP PROPERTIES FOLDER "apps") + +set_property(TARGET ARAP PROPERTY CUDA_SEPARABLE_COMPILATION ON) + +source_group(TREE ${CMAKE_CURRENT_LIST_DIR} PREFIX "ARAP" FILES ${SOURCE_LIST}) + +target_link_libraries(ARAP + PRIVATE RXMesh +) + +#gtest_discover_tests( ARAP ) \ No newline at end of file diff --git a/apps/ARAP/arap.cu b/apps/ARAP/arap.cu new file mode 100644 index 00000000..3f642cf5 --- /dev/null +++ b/apps/ARAP/arap.cu @@ -0,0 +1,21 @@ +#include "rxmesh/query.cuh" +#include "rxmesh/rxmesh_static.h" + +#include "rxmesh/matrix/sparse_matrix.cuh" + +using namespace rxmesh; + + +int main(int argc, char** argv) +{ + Log::init(); + + const uint32_t device_id = 0; + cuda_query(device_id); + + RXMeshStatic rx(STRINGIFY(INPUT_DIR) "sphere3.obj"); + +#if USE_POLYSCOPE + polyscope::show(); +#endif +} \ No newline at end of file diff --git a/apps/CMakeLists.txt b/apps/CMakeLists.txt index e727db25..55d14245 100644 --- a/apps/CMakeLists.txt +++ b/apps/CMakeLists.txt @@ -9,4 +9,7 @@ add_subdirectory(XPBD ) add_subdirectory(ShortestEdgeCollapse) add_subdirectory(Remesh) add_subdirectory(SurfaceTracking) +add_subdirectory(SCP) +add_subdirectory(ARAP) +add_subdirectory(Heat) diff --git a/apps/Heat/CMakeLists.txt b/apps/Heat/CMakeLists.txt new file mode 100644 index 00000000..67c1ed07 --- /dev/null +++ b/apps/Heat/CMakeLists.txt @@ -0,0 +1,22 @@ +add_executable(Heat) + +set(SOURCE_LIST + heat.cu +) + +target_sources(Heat + PRIVATE + ${SOURCE_LIST} +) + +set_target_properties(Heat PROPERTIES FOLDER "apps") + +set_property(TARGET Heat PROPERTY CUDA_SEPARABLE_COMPILATION ON) + +source_group(TREE ${CMAKE_CURRENT_LIST_DIR} PREFIX "Heat" FILES ${SOURCE_LIST}) + +target_link_libraries(Heat + PRIVATE RXMesh +) + +#gtest_discover_tests( Heat ) \ No newline at end of file diff --git a/apps/Heat/heat.cu b/apps/Heat/heat.cu new file mode 100644 index 00000000..3f642cf5 --- /dev/null +++ b/apps/Heat/heat.cu @@ -0,0 +1,21 @@ +#include "rxmesh/query.cuh" +#include "rxmesh/rxmesh_static.h" + +#include "rxmesh/matrix/sparse_matrix.cuh" + +using namespace rxmesh; + + +int main(int argc, char** argv) +{ + Log::init(); + + const uint32_t device_id = 0; + cuda_query(device_id); + + RXMeshStatic rx(STRINGIFY(INPUT_DIR) "sphere3.obj"); + +#if USE_POLYSCOPE + polyscope::show(); +#endif +} \ No newline at end of file diff --git a/apps/SCP/CMakeLists.txt b/apps/SCP/CMakeLists.txt new file mode 100644 index 00000000..91b562e8 --- /dev/null +++ b/apps/SCP/CMakeLists.txt @@ -0,0 +1,22 @@ +add_executable(SCP) + +set(SOURCE_LIST + scp.cu +) + +target_sources(SCP + PRIVATE + ${SOURCE_LIST} +) + +set_target_properties(SCP PROPERTIES FOLDER "apps") + +set_property(TARGET SCP PROPERTY CUDA_SEPARABLE_COMPILATION ON) + +source_group(TREE ${CMAKE_CURRENT_LIST_DIR} PREFIX "SCP" FILES ${SOURCE_LIST}) + +target_link_libraries(SCP + PRIVATE RXMesh +) + +#gtest_discover_tests( SCP ) \ No newline at end of file diff --git a/apps/SCP/scp.cu b/apps/SCP/scp.cu new file mode 100644 index 00000000..3f642cf5 --- /dev/null +++ b/apps/SCP/scp.cu @@ -0,0 +1,21 @@ +#include "rxmesh/query.cuh" +#include "rxmesh/rxmesh_static.h" + +#include "rxmesh/matrix/sparse_matrix.cuh" + +using namespace rxmesh; + + +int main(int argc, char** argv) +{ + Log::init(); + + const uint32_t device_id = 0; + cuda_query(device_id); + + RXMeshStatic rx(STRINGIFY(INPUT_DIR) "sphere3.obj"); + +#if USE_POLYSCOPE + polyscope::show(); +#endif +} \ No newline at end of file From 3179ee5c07c6829c0e633d55452f0d668584b70c Mon Sep 17 00:00:00 2001 From: ahmed Date: Sat, 20 Jul 2024 14:29:03 -0400 Subject: [PATCH 35/96] dense matrix fill random --- include/rxmesh/matrix/dense_matrix.cuh | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/include/rxmesh/matrix/dense_matrix.cuh b/include/rxmesh/matrix/dense_matrix.cuh index ba4d6e80..2806fcf8 100644 --- a/include/rxmesh/matrix/dense_matrix.cuh +++ b/include/rxmesh/matrix/dense_matrix.cuh @@ -17,9 +17,6 @@ namespace rxmesh { template struct DenseMatrix { - static_assert(std::is_same_v || std::is_same_v, - "RXMesh::DenseMatrix supports only float or double"); - template friend class SparseMatrix; @@ -100,6 +97,26 @@ struct DenseMatrix CUDA_ERROR(cudaMemset(m_d_val, 0, bytes())); } + /** + * @brief fill in the matrix with random numbers on both host and device + * @return + */ + __host__ void fill_random(T min = T(-1), T max = T(1)) + { + std::random_device rd; + std::mt19937 gen(rd()); + + std::uniform_int_distribution<> dis(min, max); + + + for (int i = 0; i < rows() * cols(); ++i) { + m_h_val[i] = dis(gen); + } + + CUDA_ERROR( + cudaMemcpy(m_d_val, m_h_val, bytes(), cudaMemcpyHostToDevice)); + } + /** * @brief accessing a specific value in the matrix using the row and col * index. Can be used on both host and device From bccb325b3acca94707c361dd5b240b0dc2e5889e Mon Sep 17 00:00:00 2001 From: ahmed Date: Sat, 20 Jul 2024 15:54:57 -0400 Subject: [PATCH 36/96] specialize fill_random for different types --- include/rxmesh/matrix/dense_matrix.cuh | 69 ++++++++++++++++++++++++-- 1 file changed, 66 insertions(+), 3 deletions(-) diff --git a/include/rxmesh/matrix/dense_matrix.cuh b/include/rxmesh/matrix/dense_matrix.cuh index 2806fcf8..60dd7556 100644 --- a/include/rxmesh/matrix/dense_matrix.cuh +++ b/include/rxmesh/matrix/dense_matrix.cuh @@ -106,13 +106,76 @@ struct DenseMatrix std::random_device rd; std::mt19937 gen(rd()); - std::uniform_int_distribution<> dis(min, max); + if constexpr (std::is_same_v || + std::is_same_v) { + std::uniform_real_distribution dis(static_cast(minn), + static_cast(maxx)); + + for (int i = 0; i < rows() * cols(); ++i) { + if constexpr (std::is_same_v) { + m_h_val[i].x = dis(gen); + m_h_val[i].y = dis(gen); + } + if constexpr (std::is_same_v) { + m_h_val[i] = dis(gen); + } + } + } + + if constexpr (std::is_same_v || + std::is_same_v) { + std::uniform_real_distribution dis( + static_cast(minn), static_cast(maxx)); + + for (int i = 0; i < rows() * cols(); ++i) { + if constexpr (std::is_same_v) { + m_h_val[i].x = dis(gen); + m_h_val[i].y = dis(gen); + } + if constexpr (std::is_same_v) { + m_h_val[i] = dis(gen); + } + } + } + + if constexpr (std::is_same_v || std::is_same_v) { + std::uniform_int_distribution dis(static_cast(minn), + static_cast(maxx)); + + for (int i = 0; i < rows() * cols(); ++i) { + m_h_val[i] = dis(gen); + } + } + + if constexpr (std::is_same_v) { + std::uniform_int_distribution dis( + static_cast(minn), static_cast(maxx)); + + for (int i = 0; i < rows() * cols(); ++i) { + m_h_val[i] = dis(gen); + } + } - for (int i = 0; i < rows() * cols(); ++i) { - m_h_val[i] = dis(gen); + if constexpr (std::is_same_v) { + std::uniform_int_distribution dis( + static_cast(minn), static_cast(maxx)); + + for (int i = 0; i < rows() * cols(); ++i) { + m_h_val[i] = dis(gen); + } } + if constexpr (std::is_same_v) { + std::uniform_int_distribution dis( + static_cast(minn), static_cast(maxx)); + + for (int i = 0; i < rows() * cols(); ++i) { + m_h_val[i] = dis(gen); + } + } + + CUDA_ERROR( cudaMemcpy(m_d_val, m_h_val, bytes(), cudaMemcpyHostToDevice)); } From 5b89bd99aa231905d330abd5a0914820168e7290 Mon Sep 17 00:00:00 2001 From: ahmed Date: Sat, 20 Jul 2024 15:59:15 -0400 Subject: [PATCH 37/96] minor fix --- include/rxmesh/matrix/dense_matrix.cuh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/rxmesh/matrix/dense_matrix.cuh b/include/rxmesh/matrix/dense_matrix.cuh index 60dd7556..d868a2bb 100644 --- a/include/rxmesh/matrix/dense_matrix.cuh +++ b/include/rxmesh/matrix/dense_matrix.cuh @@ -101,7 +101,7 @@ struct DenseMatrix * @brief fill in the matrix with random numbers on both host and device * @return */ - __host__ void fill_random(T min = T(-1), T max = T(1)) + __host__ void fill_random(double minn = -1.0, double maxx = 1.0) { std::random_device rd; std::mt19937 gen(rd()); From 0b7fb78dd929466a3889f7bac01692cd6fd89c94 Mon Sep 17 00:00:00 2001 From: ahmed Date: Sun, 21 Jul 2024 12:06:38 -0400 Subject: [PATCH 38/96] dense matrix refactor --- include/rxmesh/matrix/dense_matrix.cuh | 67 ++++++++++++++++++++- include/rxmesh/matrix/sparse_matrix.cuh | 79 +++++++++++-------------- include/rxmesh/util/cuda_query.h | 20 +++++++ include/rxmesh/util/macros.h | 33 ++++++++++- include/rxmesh/util/meta.h | 31 ++++++++++ include/rxmesh/util/util.h | 38 ++++++++++++ tests/RXMesh_test/CMakeLists.txt | 1 + tests/RXMesh_test/rxmesh_test_main.cu | 1 + tests/RXMesh_test/test_dense_matrix.cuh | 34 +++++++++++ 9 files changed, 253 insertions(+), 51 deletions(-) create mode 100644 tests/RXMesh_test/test_dense_matrix.cuh diff --git a/include/rxmesh/matrix/dense_matrix.cuh b/include/rxmesh/matrix/dense_matrix.cuh index d868a2bb..47ebff7d 100644 --- a/include/rxmesh/matrix/dense_matrix.cuh +++ b/include/rxmesh/matrix/dense_matrix.cuh @@ -1,11 +1,15 @@ #pragma once #include +#include "cublas_v2.h" #include "cusparse.h" + #include "rxmesh/attribute.h" #include "rxmesh/context.h" #include "rxmesh/rxmesh.h" #include "rxmesh/types.h" +#include"rxmesh/util/meta.h" + namespace rxmesh { /** @@ -14,10 +18,12 @@ namespace rxmesh { * We would only support col major dense matrix for now since that's what * cusparse and cusolver wants. */ -template +template struct DenseMatrix { - template + using IndexT = int; + + template friend class SparseMatrix; DenseMatrix() @@ -42,6 +48,7 @@ struct DenseMatrix m_dendescr(NULL), m_h_val(nullptr), m_d_val(nullptr), + m_cublas_handle(nullptr), m_col_pad_bytes(0), m_col_pad_idx(0) { @@ -59,8 +66,12 @@ struct DenseMatrix m_num_cols, m_num_rows, // leading dim m_d_val, - CUDA_R_32F, + cuda_type(), CUSPARSE_ORDER_COL)); + + CUBLAS_ERROR(cublasCreate(&m_cublas_handle)); + CUBLAS_ERROR( + cublasSetPointerMode(m_cublas_handle, CUBLAS_POINTER_MODE_HOST)); } /** @@ -233,6 +244,52 @@ struct DenseMatrix return this->operator()(get_row_id(handle), col); } + /** + * @brief compute the sum of the absolute value of all elements in the + * matrix. For complex types (cuComples and cuDoubleComplex), we sum the + * absolute value of the real and absolute value of the imaginary part. The + * results are computed for the data on the device. Only float, double, + * cuComplex, and cuDoubleComplex are supported + * @param stream + * @return + */ + __host__ BaseTypeT abs_sum(cudaStream_t stream = NULL) + { + CUBLAS_ERROR(cublasSetStream(m_cublas_handle, stream)); + if constexpr (!std::is_same_v && !std::is_same_v && + !std::is_same_v && + !std::is_same_v) { + RXMESH_ERROR( + "DenseMatrix::abs_sum() only float, double, cuComplex, and " + "cuDoubleComplex are supported for this function!"); + return BaseTypeT(0); + } + + BaseTypeT result; + if constexpr (std::is_same_v) { + CUBLAS_ERROR(cublasSasum( + m_cublas_handle, rows() * cols(), m_d_val, 1, &result)); + } + + if constexpr (std::is_same_v) { + CUBLAS_ERROR(cublasDasum( + m_cublas_handle, rows() * cols(), m_d_val, 1, &result)); + } + + if constexpr (std::is_same_v) { + CUBLAS_ERROR(cublasScasum( + m_cublas_handle, rows() * cols(), m_d_val, 1, &result)); + } + + if constexpr (std::is_same_v) { + CUBLAS_ERROR(cublasDzasum( + m_cublas_handle, rows() * cols(), m_d_val, 1, &result)); + } + + return result; + } + + /** * @brief return the row index corresponding to specific vertex/edge/face * handle @@ -387,6 +444,9 @@ struct DenseMatrix GPU_FREE(m_d_val); m_allocated = m_allocated & (~DEVICE); } + if ((location & LOCATION_ALL) == LOCATION_ALL) { + CUSPARSE_ERROR(cusparseDestroyDnMat(m_dendescr)); + } } private: @@ -415,6 +475,7 @@ struct DenseMatrix const Context m_context; cusparseDnMatDescr_t m_dendescr; + cublasHandle_t m_cublas_handle; locationT m_allocated; IndexT m_num_rows; IndexT m_num_cols; diff --git a/include/rxmesh/matrix/sparse_matrix.cuh b/include/rxmesh/matrix/sparse_matrix.cuh index 72a96046..6b9be1c8 100644 --- a/include/rxmesh/matrix/sparse_matrix.cuh +++ b/include/rxmesh/matrix/sparse_matrix.cuh @@ -53,9 +53,11 @@ enum class PermuteMethod * for matrix-vector multiplication and linear solver—(using cuSolver and * cuSparse as a back-end. */ -template +template struct SparseMatrix { + using IndexT = int; + SparseMatrix(const RXMeshStatic& rx) : m_d_row_ptr(nullptr), m_d_col_idx(nullptr), @@ -177,7 +179,7 @@ struct SparseMatrix CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, - cuda_type())); + cuda_type())); CUSPARSE_ERROR(cusparseCreate(&m_cusparse_handle)); CUSOLVER_ERROR(cusolverSpCreate(&m_cusolver_sphandle)); @@ -202,6 +204,9 @@ struct SparseMatrix cudaMemcpyDeviceToHost)); m_allocated = m_allocated | HOST; + + CUSPARSE_ERROR(cusparseSetPointerMode(m_cusparse_handle, + CUSPARSE_POINTER_MODE_HOST)); } /** @@ -451,7 +456,7 @@ struct SparseMatrix matB, &beta, matC, - cuda_type(), + cuda_type(), CUSPARSE_SPMM_ALG_DEFAULT, &m_spmm_buffer_size)); CUDA_ERROR(cudaMalloc(&m_d_cusparse_spmm_buffer, m_spmm_buffer_size)); @@ -478,8 +483,24 @@ struct SparseMatrix assert(rows() == C_mat.rows()); assert(B_mat.cols() == C_mat.cols()); - float alpha = 1.0f; - float beta = 0.0f; + BaseTypeT alpha; + BaseTypeT beta; + + if constexpr (std::is_same_v) { + alpha = make_cuComplex(1.f, 1.f); + beta = make_cuComplex(0.f, 0.f); + } + + if constexpr (std::is_same_v) { + alpha = make_cuDoubleComplex(1.0, 1.0); + beta = make_cuDoubleComplex(0.0, 0.0); + } + + if constexpr (!std::is_same_v && + !std::is_same_v) { + alpha = T(1); + beta = T(0); + } // A_mat.create_cusparse_handle(); cusparseSpMatDescr_t matA = m_spdescr; @@ -503,7 +524,7 @@ struct SparseMatrix matB, &beta, matC, - cuda_type(), + cuda_type(), CUSPARSE_SPMM_ALG_DEFAULT, m_d_cusparse_spmm_buffer)); } @@ -523,9 +544,9 @@ struct SparseMatrix cusparseDnVecDescr_t vecy = NULL; CUSPARSE_ERROR( - cusparseCreateDnVec(&vecx, m_num_cols, in_arr, cuda_type())); + cusparseCreateDnVec(&vecx, m_num_cols, in_arr, cuda_type())); CUSPARSE_ERROR( - cusparseCreateDnVec(&vecy, m_num_rows, rt_arr, cuda_type())); + cusparseCreateDnVec(&vecy, m_num_rows, rt_arr, cuda_type())); CUSPARSE_ERROR(cusparseSpMV_bufferSize(m_cusparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, @@ -534,7 +555,7 @@ struct SparseMatrix vecx, &beta, vecy, - cuda_type(), + cuda_type(), CUSPARSE_SPMV_ALG_DEFAULT, &m_spmv_buffer_size)); CUSPARSE_ERROR(cusparseDestroyDnVec(vecx)); @@ -568,9 +589,9 @@ struct SparseMatrix cusparseDnVecDescr_t vecy = NULL; CUSPARSE_ERROR( - cusparseCreateDnVec(&vecx, m_num_cols, in_arr, cuda_type())); + cusparseCreateDnVec(&vecx, m_num_cols, in_arr, cuda_type())); CUSPARSE_ERROR( - cusparseCreateDnVec(&vecy, m_num_rows, rt_arr, cuda_type())); + cusparseCreateDnVec(&vecy, m_num_rows, rt_arr, cuda_type())); CUSPARSE_ERROR(cusparseSetStream(m_cusparse_handle, stream)); @@ -586,7 +607,7 @@ struct SparseMatrix vecx, &beta, vecy, - cuda_type(), + cuda_type(), CUSPARSE_SPMV_ALG_DEFAULT, m_d_cusparse_spmv_buffer)); @@ -1339,40 +1360,6 @@ struct SparseMatrix } } - cudaDataType_t cuda_type() const - { - if (std::is_same_v) { - return CUDA_R_32F; - } else if (std::is_same_v) { - return CUDA_R_64F; - } else if (std::is_same_v) { - return CUDA_C_32F; - } else if (std::is_same_v) { - return CUDA_C_64F; - } else if (std::is_same_v) { - return CUDA_R_8I; - } else if (std::is_same_v) { - return CUDA_R_8U; - } else if (std::is_same_v) { - return CUDA_R_16I; - } else if (std::is_same_v) { - return CUDA_R_16U; - } else if (std::is_same_v || std::is_same_v) { - return CUDA_R_32I; - } else if (std::is_same_v) { - return CUDA_R_32U; - } else if (std::is_same_v) { - return CUDA_R_64I; - } else if (std::is_same_v) { - return CUDA_R_64U; - } else { - RXMESH_ERROR( - "SparseMatrix unsupported type. SparseMatrix can support " - "different data type but for the solver, only float, double, " - "cuComplex, and cuDoubleComplex are supported"); - } - } - int reorder_to_int(const PermuteMethod& reorder) const { switch (reorder) { diff --git a/include/rxmesh/util/cuda_query.h b/include/rxmesh/util/cuda_query.h index 98ac9b6f..26fd7782 100644 --- a/include/rxmesh/util/cuda_query.h +++ b/include/rxmesh/util/cuda_query.h @@ -113,6 +113,26 @@ inline cudaDeviceProp cuda_query(const int dev) cusolver_minor, cusolver_patch); + + int cusparse_major = -1, cusparse_minor = -1, cusparse_patch = -1; + CUSPARSE_ERROR(cusparseGetProperty(MAJOR_VERSION, &cusparse_major)); + CUSPARSE_ERROR(cusparseGetProperty(MINOR_VERSION, &cusparse_minor)); + CUSPARSE_ERROR(cusparseGetProperty(PATCH_LEVEL, &cusparse_patch)); + RXMESH_TRACE("Using cuSparse Version {}.{}.{}", + cusparse_major, + cusparse_minor, + cusparse_patch); + + int cublas_major = -1, cublas_minor = -1, cublas_patch = -1; + CUBLAS_ERROR(cublasGetProperty(MAJOR_VERSION, &cublas_major)); + CUBLAS_ERROR(cublasGetProperty(MINOR_VERSION, &cublas_minor)); + CUBLAS_ERROR(cublasGetProperty(PATCH_LEVEL, &cublas_patch)); + RXMESH_TRACE("Using cuBlas Version {}.{}.{}", + cublas_major, + cublas_minor, + cublas_patch); + + if (!dev_prop.managedMemory) { RXMESH_ERROR( "The selected device does not support CUDA unified memory"); diff --git a/include/rxmesh/util/macros.h b/include/rxmesh/util/macros.h index b4cb241a..ce45703b 100644 --- a/include/rxmesh/util/macros.h +++ b/include/rxmesh/util/macros.h @@ -68,18 +68,24 @@ constexpr int MAX_OVERLAP_CAVITIES = 4; #endif -// CUDA_ERROR +#ifndef CUDA_ERROR inline void HandleError(cudaError_t err, const char* file, int line) { // Error handling micro, wrap it around function whenever possible if (err != cudaSuccess) { Log::get_logger()->error("Line {} File {}", line, file); Log::get_logger()->error("CUDA ERROR: {}", cudaGetErrorString(err)); +#ifdef _WIN32 + system("pause"); +#else exit(EXIT_FAILURE); +#endif } } #define CUDA_ERROR(err) (HandleError(err, __FILE__, __LINE__)) +#endif +#ifndef CUSPARSE_ERROR inline void cusparseHandleError(cusparseStatus_t status, const char* file, const int line) @@ -97,8 +103,9 @@ inline void cusparseHandleError(cusparseStatus_t status, return; } #define CUSPARSE_ERROR(err) (cusparseHandleError(err, __FILE__, __LINE__)) +#endif - +#ifndef CUSOLVER_ERROR static inline void cusolverHandleError(cusolverStatus_t status, const char* file, const int line) @@ -139,6 +146,28 @@ static inline void cusolverHandleError(cusolverStatus_t status, return; } #define CUSOLVER_ERROR(err) (cusolverHandleError(err, __FILE__, __LINE__)) +#endif + + +#ifndef CUBLAS_ERROR +inline void cublasHandleError(cublasStatus_t status, + const char* file, + const int line) +{ + if (status != CUBLAS_STATUS_SUCCESS) { + Log::get_logger()->error("Line {} File {}", line, file); + Log::get_logger()->error("CUBLAS ERROR: {}", + cublasGetStatusString(status)); +#ifdef _WIN32 + system("pause"); +#else + exit(EXIT_FAILURE); +#endif + } + return; +} +#define CUBLAS_ERROR(err) (cublasHandleError(err, __FILE__, __LINE__)) +#endif // GPU_FREE diff --git a/include/rxmesh/util/meta.h b/include/rxmesh/util/meta.h index c42bf54a..c508e0bf 100644 --- a/include/rxmesh/util/meta.h +++ b/include/rxmesh/util/meta.h @@ -1,5 +1,8 @@ #pragma once #include + +#include + namespace rxmesh { namespace detail { @@ -48,4 +51,32 @@ struct FunctionTraits }; } // namespace detail + + +/** + * @brief Extracting base type from a type. Used primarily to extract the float + * and double base type of cuComplex and cuDoubleComplex types + */ +template +struct BaseType +{ + using type = T; +}; + +template <> +struct BaseType +{ + using type = float; +}; + +template <> +struct BaseType +{ + using type = double; +}; + + +template +using BaseTypeT = typename BaseType::type; + } // namespace rxmesh \ No newline at end of file diff --git a/include/rxmesh/util/util.h b/include/rxmesh/util/util.h index 97b5d55f..6a531865 100644 --- a/include/rxmesh/util/util.h +++ b/include/rxmesh/util/util.h @@ -416,4 +416,42 @@ __device__ __host__ __inline__ void align(const std::size_t byte_alignment, const uint64_t aligned = intptr + byte_alignment - remainder; ptr = reinterpret_cast(aligned); } + +/** + * @brief get cuSparse/cuSolver data type for T + */ +template +__host__ __inline__ cudaDataType_t cuda_type() +{ + if (std::is_same_v) { + return CUDA_R_32F; + } else if (std::is_same_v) { + return CUDA_R_64F; + } else if (std::is_same_v) { + return CUDA_C_32F; + } else if (std::is_same_v) { + return CUDA_C_64F; + } else if (std::is_same_v) { + return CUDA_R_8I; + } else if (std::is_same_v) { + return CUDA_R_8U; + } else if (std::is_same_v) { + return CUDA_R_16I; + } else if (std::is_same_v) { + return CUDA_R_16U; + } else if (std::is_same_v || std::is_same_v) { + return CUDA_R_32I; + } else if (std::is_same_v) { + return CUDA_R_32U; + } else if (std::is_same_v) { + return CUDA_R_64I; + } else if (std::is_same_v) { + return CUDA_R_64U; + } else { + RXMESH_ERROR( + "Unsupported type. Sparse/Dense Matrix in RXMesh can support " + "different data type but for the solver, only float, double, " + "cuComplex, and cuDoubleComplex are supported"); + } +} } // namespace rxmesh \ No newline at end of file diff --git a/tests/RXMesh_test/CMakeLists.txt b/tests/RXMesh_test/CMakeLists.txt index fdd88c66..713ab312 100644 --- a/tests/RXMesh_test/CMakeLists.txt +++ b/tests/RXMesh_test/CMakeLists.txt @@ -24,6 +24,7 @@ set( SOURCE_LIST test_wasted_work.cuh test_eigen.cuh test_boundary.cuh + test_dense_matrix.cuh ) target_sources( RXMesh_test diff --git a/tests/RXMesh_test/rxmesh_test_main.cu b/tests/RXMesh_test/rxmesh_test_main.cu index cf46b521..27218bfb 100644 --- a/tests/RXMesh_test/rxmesh_test_main.cu +++ b/tests/RXMesh_test/rxmesh_test_main.cu @@ -33,6 +33,7 @@ struct RXMeshTestArg #include "test_wasted_work.cuh" #include "test_eigen.cuh" #include "test_boundary.cuh" +#include "test_dense_matrix.cuh" // clang-format on int main(int argc, char** argv) diff --git a/tests/RXMesh_test/test_dense_matrix.cuh b/tests/RXMesh_test/test_dense_matrix.cuh new file mode 100644 index 00000000..06a38d2f --- /dev/null +++ b/tests/RXMesh_test/test_dense_matrix.cuh @@ -0,0 +1,34 @@ +#include "gtest/gtest.h" + +#include "rxmesh/rxmesh_static.h" + +#include "rxmesh/matrix/dense_matrix.cuh" + +TEST(RXMeshStatic, DenseMatrixASum) +{ + using namespace rxmesh; + + cuda_query(rxmesh_args.device_id); + + RXMeshStatic rx(STRINGIFY(INPUT_DIR) "sphere3.obj"); + + DenseMatrix mat(rx, 10, 10); + + mat.fill_random(); + + float a_sum = mat.abs_sum(); + + float res = 0; + + for (uint32_t i = 0; i < mat.rows(); ++i) { + for (uint32_t j = 0; j < mat.cols(); ++j) { + res += std::abs(mat(i, j)); + } + } + + EXPECT_NEAR(res, a_sum, 0.001); + + mat.release(); + + EXPECT_EQ(cudaDeviceSynchronize(), cudaSuccess); +} \ No newline at end of file From 659963cb7bd261b0bb4d2e7f7da30d7ebb7fe698 Mon Sep 17 00:00:00 2001 From: ahmed Date: Sun, 21 Jul 2024 13:14:27 -0400 Subject: [PATCH 39/96] dense matrix copy_from and axpy --- include/rxmesh/attribute.h | 26 +++-- include/rxmesh/matrix/dense_matrix.cuh | 146 +++++++++++++++++++++++- tests/RXMesh_test/test_dense_matrix.cuh | 40 +++++++ 3 files changed, 203 insertions(+), 9 deletions(-) diff --git a/include/rxmesh/attribute.h b/include/rxmesh/attribute.h index 5d7c1de4..aeb4a851 100644 --- a/include/rxmesh/attribute.h +++ b/include/rxmesh/attribute.h @@ -526,12 +526,14 @@ class Attribute : public AttributeBase if ((source_flag & LOCATION_ALL) == LOCATION_ALL && (dst_flag & LOCATION_ALL) != LOCATION_ALL) { RXMESH_ERROR("Attribute::copy_from() Invalid configuration!"); + return; } if (m_num_attributes != source.get_num_attributes()) { RXMESH_ERROR( "Attribute::copy_from() number of attributes is " "different!"); + return; } if (this->is_empty() || m_rxmesh->get_num_patches() == 0) { @@ -542,13 +544,15 @@ class Attribute : public AttributeBase if ((source_flag & HOST) == HOST && (dst_flag & HOST) == HOST) { if ((source_flag & source.m_allocated) != source_flag) { RXMESH_ERROR( - "Attribute::copy() copying source is not valid" + "Attribute::copy_from() copying source is not valid" " because it was not allocated on host"); + return; } if ((dst_flag & m_allocated) != dst_flag) { RXMESH_ERROR( - "Attribute::copy() copying source is not valid" + "Attribute::copy_from() copying source is not valid" " because location (this) was not allocated on host"); + return; } for (uint32_t p = 0; p < m_rxmesh->get_num_patches(); ++p) { @@ -563,13 +567,15 @@ class Attribute : public AttributeBase if ((source_flag & DEVICE) == DEVICE && (dst_flag & DEVICE) == DEVICE) { if ((source_flag & source.m_allocated) != source_flag) { RXMESH_ERROR( - "Attribute::copy() copying source is not valid" + "Attribute::copy_from() copying source is not valid" " because it was not allocated on device"); + return; } if ((dst_flag & m_allocated) != dst_flag) { RXMESH_ERROR( - "Attribute::copy() copying source is not valid" + "Attribute::copy_from() copying source is not valid" " because location (this) was not allocated on device"); + return; } for (uint32_t p = 0; p < m_rxmesh->get_num_patches(); ++p) { @@ -587,13 +593,15 @@ class Attribute : public AttributeBase if ((source_flag & DEVICE) == DEVICE && (dst_flag & HOST) == HOST) { if ((source_flag & source.m_allocated) != source_flag) { RXMESH_ERROR( - "Attribute::copy() copying source is not valid" + "Attribute::copy_from() copying source is not valid" " because it was not allocated on host"); + return; } if ((dst_flag & m_allocated) != dst_flag) { RXMESH_ERROR( - "Attribute::copy() copying source is not valid" + "Attribute::copy_from() copying source is not valid" " because location (this) was not allocated on device"); + return; } @@ -612,13 +620,15 @@ class Attribute : public AttributeBase if ((source_flag & HOST) == HOST && (dst_flag & DEVICE) == DEVICE) { if ((source_flag & source.m_allocated) != source_flag) { RXMESH_ERROR( - "Attribute::copy() copying source is not valid" + "Attribute::copy_from() copying source is not valid" " because it was not allocated on device"); + return; } if ((dst_flag & m_allocated) != dst_flag) { RXMESH_ERROR( - "Attribute::copy() copying source is not valid" + "Attribute::copy_from() copying source is not valid" " because location (this) was not allocated on host"); + return; } diff --git a/include/rxmesh/matrix/dense_matrix.cuh b/include/rxmesh/matrix/dense_matrix.cuh index 47ebff7d..d9a6d0e7 100644 --- a/include/rxmesh/matrix/dense_matrix.cuh +++ b/include/rxmesh/matrix/dense_matrix.cuh @@ -8,7 +8,7 @@ #include "rxmesh/rxmesh.h" #include "rxmesh/types.h" -#include"rxmesh/util/meta.h" +#include "rxmesh/util/meta.h" namespace rxmesh { @@ -289,6 +289,68 @@ struct DenseMatrix return result; } + /** + * @brief compute the following + * Y = alpha * X + Y + * where Y is this dense matrix, X is another dense matrix that has the same + * dimensions as Y and alpha is a scalar. The results are computed for the + * data on the device. Only float, double, cuComplex, and cuDoubleComplex + * are supported + * @param stream + * @return + */ + __host__ void axpy(DenseMatrix& X, T alpha, cudaStream_t stream = NULL) + { + CUBLAS_ERROR(cublasSetStream(m_cublas_handle, stream)); + if constexpr (!std::is_same_v && !std::is_same_v && + !std::is_same_v && + !std::is_same_v) { + RXMESH_ERROR( + "DenseMatrix::axpy() only float, double, cuComplex, and " + "cuDoubleComplex are supported for this function!"); + return; + } + + if constexpr (std::is_same_v) { + CUBLAS_ERROR(cublasSaxpy(m_cublas_handle, + rows() * cols(), + &alpha, + X.m_d_val, + 1, + m_d_val, + 1)); + } + + if constexpr (std::is_same_v) { + CUBLAS_ERROR(cublasDaxpy(m_cublas_handle, + rows() * cols(), + &alpha, + X.m_d_val, + 1, + m_d_val, + 1)); + } + + if constexpr (std::is_same_v) { + CUBLAS_ERROR(cublasCaxpy(m_cublas_handle, + rows() * cols(), + &alpha, + X.m_d_val, + 1, + m_d_val, + 1)); + } + + if constexpr (std::is_same_v) { + CUBLAS_ERROR(cublasZaxpy(m_cublas_handle, + rows() * cols(), + &alpha, + X.m_d_val, + 1, + m_d_val, + 1)); + } + } /** * @brief return the row index corresponding to specific vertex/edge/face @@ -428,6 +490,88 @@ struct DenseMatrix } } + + /** + * @brief Deep copy from a source matrix. If source_flag and target_flag are + * both set to LOCATION_ALL, then we copy what is on host to host, and what + * on device to device. If sourc_flag is set to HOST (or DEVICE) and + * target_flag is set to LOCATION_ALL, then we copy source's HOST (or + * DEVICE) to both HOST and DEVICE. Setting source_flag to + * LOCATION_ALL while target_flag is NOT set to LOCATION_ALL is invalid + * because we don't know which source to copy from + * @param source matrix to copy from + * @param source_flag defines where we will copy from + * @param target_flag defines where we will copy to + * @param stream used to launch kernel/memcpy + */ + __host__ void copy_from(DenseMatrix& source, + locationT source_flag = LOCATION_ALL, + locationT target_flag = LOCATION_ALL, + cudaStream_t stream = NULL) + { + if (rows() != source.rows() || cols() != source.cols()) { + RXMESH_ERROR( + "DenseMatrix::copy_from() the number of rows/cols is " + "different!"); + return; + } + + if ((source_flag & LOCATION_ALL) == LOCATION_ALL && + (target_flag & LOCATION_ALL) != LOCATION_ALL) { + RXMESH_ERROR("DenseMatrix::copy_from() Invalid configuration!"); + return; + } + + if ((source_flag & m_allocated) != source_flag) { + RXMESH_ERROR( + "DenseMatrix::copy_from() copying source is not valid" + " because it was not allocated on source i.e., {}", + location_to_string(source_flag)); + return; + } + + if ((target_flag & m_allocated) != target_flag) { + RXMESH_WARN( + "DenseMatrix::copy_from() allocating target before moving to " + "{}", + location_to_string(target_flag)); + allocate(target_flag); + } + // 1) copy from HOST to HOST + if ((source_flag & HOST) == HOST && (target_flag & HOST) == HOST) { + std::memcpy(m_h_val, source.m_h_val, bytes()); + } + + // 2) copy from DEVICE to DEVICE + if ((source_flag & DEVICE) == DEVICE && + (target_flag & DEVICE) == DEVICE) { + CUDA_ERROR(cudaMemcpyAsync(m_d_val, + source.m_d_val, + bytes(), + cudaMemcpyDeviceToDevice, + stream)); + } + + // 3) copy from DEVICE to HOST + if ((source_flag & DEVICE) == DEVICE && (target_flag & HOST) == HOST) { + CUDA_ERROR(cudaMemcpyAsync(m_h_val, + source.m_d_val, + bytes(), + cudaMemcpyDeviceToHost, + stream)); + } + + + // 4) copy from HOST to DEVICE + if ((source_flag & HOST) == HOST && (target_flag & DEVICE) == DEVICE) { + CUDA_ERROR(cudaMemcpyAsync(m_d_val, + source.m_h_val, + bytes(), + cudaMemcpyHostToDevice, + stream)); + } + } + /** * @brief release the data on host or device */ diff --git a/tests/RXMesh_test/test_dense_matrix.cuh b/tests/RXMesh_test/test_dense_matrix.cuh index 06a38d2f..83211e48 100644 --- a/tests/RXMesh_test/test_dense_matrix.cuh +++ b/tests/RXMesh_test/test_dense_matrix.cuh @@ -30,5 +30,45 @@ TEST(RXMeshStatic, DenseMatrixASum) mat.release(); + EXPECT_EQ(cudaDeviceSynchronize(), cudaSuccess); +} + + +TEST(RXMeshStatic, DenseMatrixAXPY) +{ + using namespace rxmesh; + + cuda_query(rxmesh_args.device_id); + + RXMeshStatic rx(STRINGIFY(INPUT_DIR) "sphere3.obj"); + + DenseMatrix Y(rx, 10, 10); + DenseMatrix X(rx, 10, 10); + + DenseMatrix Y_copy(rx, 10, 10); + DenseMatrix X_copy(rx, 10, 10); + + Y.fill_random(); + X.fill_random(); + + Y_copy.copy_from(Y, HOST, HOST); + X_copy.copy_from(X, HOST, HOST); + + Y.axpy(X, 0.5f); + + Y.move(DEVICE, HOST); + + for (uint32_t i = 0; i < Y.rows(); ++i) { + for (uint32_t j = 0; j < Y.cols(); ++j) { + EXPECT_NEAR(Y_copy(i, j) + 0.5 * X_copy(i, j), Y(i, j), 0.001); + } + } + + + X.release(); + Y.release(); + Y_copy.release(); + X_copy.release(); + EXPECT_EQ(cudaDeviceSynchronize(), cudaSuccess); } \ No newline at end of file From 38fb4ee3344594226ff16bc2ff9cc5fed70c4914 Mon Sep 17 00:00:00 2001 From: ahmed Date: Sun, 21 Jul 2024 17:30:25 -0400 Subject: [PATCH 40/96] dense matrix dot product --- include/rxmesh/matrix/dense_matrix.cuh | 102 +++++++++++++++++++++-- include/rxmesh/matrix/sparse_matrix.cuh | 18 ++-- tests/RXMesh_test/test_dense_matrix.cuh | 43 ++++++++++ tests/RXMesh_test/test_sparse_matrix.cuh | 2 +- 4 files changed, 144 insertions(+), 21 deletions(-) diff --git a/include/rxmesh/matrix/dense_matrix.cuh b/include/rxmesh/matrix/dense_matrix.cuh index d9a6d0e7..3e904fd1 100644 --- a/include/rxmesh/matrix/dense_matrix.cuh +++ b/include/rxmesh/matrix/dense_matrix.cuh @@ -99,13 +99,16 @@ struct DenseMatrix } /** - * @brief set all entries in the matrix to zeros on both host and device + * @brief set all entries in the matrix to certain value on both host and + * device */ - __host__ void set_zeros() + __host__ void set_value(T val) { - std::memset(m_h_val, 0, bytes()); - - CUDA_ERROR(cudaMemset(m_d_val, 0, bytes())); + std::fill_n(m_h_val, rows() * cols(), val); + CUDA_ERROR(cudaMemcpy(m_d_val, + m_h_val, + rows() * cols() * sizeof(T), + cudaMemcpyHostToDevice)); } /** @@ -296,8 +299,6 @@ struct DenseMatrix * dimensions as Y and alpha is a scalar. The results are computed for the * data on the device. Only float, double, cuComplex, and cuDoubleComplex * are supported - * @param stream - * @return */ __host__ void axpy(DenseMatrix& X, T alpha, cudaStream_t stream = NULL) { @@ -352,6 +353,93 @@ struct DenseMatrix } } + /** + * @brief compute the dot produce with another dense matrix. If the matrix + * is a 1D vector, it is the inner product. If the matrix represents a 2D + * matrix, then it is the sum of the element-wise multiplication. The + * results are computed for the data on the device. Only float, double, + * cuComplex, and cuDoubleComplex are supported. For complex matrices + * (cuComplex and cuDoubleComplex), it is optional to use the conjugate of + * x. + */ + __host__ T dot(DenseMatrix& x, + bool use_conjugate = false, + cudaStream_t stream = NULL) + { + CUBLAS_ERROR(cublasSetStream(m_cublas_handle, stream)); + if constexpr (!std::is_same_v && !std::is_same_v && + !std::is_same_v && + !std::is_same_v) { + RXMESH_ERROR( + "DenseMatrix::dot() only float, double, cuComplex, and " + "cuDoubleComplex are supported for this function!"); + return T(0); + } + + T result; + if constexpr (std::is_same_v) { + CUBLAS_ERROR(cublasSdot(m_cublas_handle, + rows() * cols(), + x.m_d_val, + 1, + m_d_val, + 1, + &result)); + } + + if constexpr (std::is_same_v) { + CUBLAS_ERROR(cublasDdot(m_cublas_handle, + rows() * cols(), + x.m_d_val, + 1, + m_d_val, + 1, + &result)); + } + + if constexpr (std::is_same_v) { + if (use_conjugate) { + CUBLAS_ERROR(cublasCdotc(m_cublas_handle, + rows() * cols(), + x.m_d_val, + 1, + m_d_val, + 1, + &result)); + } else { + CUBLAS_ERROR(cublasCdotu(m_cublas_handle, + rows() * cols(), + x.m_d_val, + 1, + m_d_val, + 1, + &result)); + } + } + + if constexpr (std::is_same_v) { + if (use_conjugate) { + CUBLAS_ERROR(cublasZdotc(m_cublas_handle, + rows() * cols(), + x.m_d_val, + 1, + m_d_val, + 1, + &result)); + } else { + CUBLAS_ERROR(cublasZdotu(m_cublas_handle, + rows() * cols(), + x.m_d_val, + 1, + m_d_val, + 1, + &result)); + } + } + + return result; + } + /** * @brief return the row index corresponding to specific vertex/edge/face * handle diff --git a/include/rxmesh/matrix/sparse_matrix.cuh b/include/rxmesh/matrix/sparse_matrix.cuh index 6b9be1c8..0d135017 100644 --- a/include/rxmesh/matrix/sparse_matrix.cuh +++ b/include/rxmesh/matrix/sparse_matrix.cuh @@ -210,24 +210,16 @@ struct SparseMatrix } /** - * @brief set all entries in the matrix to ones on both host and device + * @brief set all entries in the matrix to certain value on both host and + * device */ - __host__ void set_ones() + __host__ void set_value(T val) { - std::fill_n(m_h_val, m_nnz, 1); + std::fill_n(m_h_val, m_nnz, val); CUDA_ERROR(cudaMemcpy( m_d_val, m_h_val, m_nnz * sizeof(T), cudaMemcpyHostToDevice)); } - - /** - * @brief set all entries in the matrix to zeros on both host and device - */ - __host__ void set_zeros() - { - std::memset(m_h_val, 0, m_nnz * sizeof(T)); - - CUDA_ERROR(cudaMemset(m_d_val, 0, m_nnz * sizeof(T))); - } + /** * @brief return number of rows diff --git a/tests/RXMesh_test/test_dense_matrix.cuh b/tests/RXMesh_test/test_dense_matrix.cuh index 83211e48..1936c877 100644 --- a/tests/RXMesh_test/test_dense_matrix.cuh +++ b/tests/RXMesh_test/test_dense_matrix.cuh @@ -70,5 +70,48 @@ TEST(RXMeshStatic, DenseMatrixAXPY) Y_copy.release(); X_copy.release(); + EXPECT_EQ(cudaDeviceSynchronize(), cudaSuccess); +} + +TEST(RXMeshStatic, DenseMatrixDot) +{ + using namespace rxmesh; + + cuda_query(rxmesh_args.device_id); + + RXMeshStatic rx(STRINGIFY(INPUT_DIR) "sphere3.obj"); + + DenseMatrix y(rx, 10, 10); + y.fill_random(); + + DenseMatrix x(rx, 10, 10); + x.fill_random(); + + cuComplex dot_res = y.dot(x); + + cuComplex res = make_cuComplex(0.f, 0.f); + + + + + for (uint32_t i = 0; i < y.rows(); ++i) { + for (uint32_t j = 0; j < y.cols(); ++j) { + // for complex number (rx, ix) and (ry+iy), the result of the + // multiplication is (rx.ry-ix.iy) + i(rx.iy + ix.ry) + + cuComplex x_val = x(i, j); + cuComplex y_val = y(i, j); + + res.x += x_val.x * y_val.x - x_val.y * y_val.y; + res.y += x_val.x * y_val.y + x_val.y * y_val.x; + } + } + + EXPECT_NEAR(res.x, dot_res.x, 0.001); + EXPECT_NEAR(res.y, dot_res.y, 0.001); + + y.release(); + x.release(); + EXPECT_EQ(cudaDeviceSynchronize(), cudaSuccess); } \ No newline at end of file diff --git a/tests/RXMesh_test/test_sparse_matrix.cuh b/tests/RXMesh_test/test_sparse_matrix.cuh index 25fdc2df..6b3a9385 100644 --- a/tests/RXMesh_test/test_sparse_matrix.cuh +++ b/tests/RXMesh_test/test_sparse_matrix.cuh @@ -153,7 +153,7 @@ TEST(RXMeshStatic, SparseMatrix) CUDA_ERROR(cudaMalloc((void**)&d_result, (num_vertices) * sizeof(int))); SparseMatrix spmat(rx); - spmat.set_ones(); + spmat.set_value(1); spmat_multi_hardwired_kernel<<>>( d_arr_ones, spmat, d_result, num_vertices); From d1f84381deb6b31f24b436cdd62456475b6e3016 Mon Sep 17 00:00:00 2001 From: ahmed Date: Sun, 21 Jul 2024 18:01:03 -0400 Subject: [PATCH 41/96] dense matrix norm2 --- include/rxmesh/matrix/dense_matrix.cuh | 45 +++++++++++++++++++++++++ tests/RXMesh_test/test_dense_matrix.cuh | 38 ++++++++++++++++++--- 2 files changed, 79 insertions(+), 4 deletions(-) diff --git a/include/rxmesh/matrix/dense_matrix.cuh b/include/rxmesh/matrix/dense_matrix.cuh index 3e904fd1..417c8c26 100644 --- a/include/rxmesh/matrix/dense_matrix.cuh +++ b/include/rxmesh/matrix/dense_matrix.cuh @@ -440,6 +440,51 @@ struct DenseMatrix return result; } + + /** + * @brief compute the norm of a dense matrix which is computed as + * sqrt(\sum (x[i]*x[i]**H) for i = 0,...,n*m where n is number of rows and + * m is number of columns, and **H denotes the conjugate if x is complex + * number. The results are computed for the data on the device. Only float, + * double, cuComplex, and cuDoubleComplex are supported. + */ + __host__ BaseTypeT norm2(cudaStream_t stream = NULL) + { + CUBLAS_ERROR(cublasSetStream(m_cublas_handle, stream)); + if constexpr (!std::is_same_v && !std::is_same_v && + !std::is_same_v && + !std::is_same_v) { + RXMESH_ERROR( + "DenseMatrix::norm2() only float, double, cuComplex, and " + "cuDoubleComplex are supported for this function!"); + return T(0); + } + + BaseTypeT result; + if constexpr (std::is_same_v) { + CUBLAS_ERROR(cublasSnrm2( + m_cublas_handle, rows() * cols(), m_d_val, 1, &result)); + } + + if constexpr (std::is_same_v) { + CUBLAS_ERROR(cublasDnrm2( + m_cublas_handle, rows() * cols(), m_d_val, 1, &result)); + } + + if constexpr (std::is_same_v) { + CUBLAS_ERROR(cublasScnrm2( + m_cublas_handle, rows() * cols(), m_d_val, 1, &result)); + } + + if constexpr (std::is_same_v) { + CUBLAS_ERROR(cublasDznrm2( + m_cublas_handle, rows() * cols(), m_d_val, 1, &result)); + } + + return result; + } + + /** * @brief return the row index corresponding to specific vertex/edge/face * handle diff --git a/tests/RXMesh_test/test_dense_matrix.cuh b/tests/RXMesh_test/test_dense_matrix.cuh index 1936c877..6ec270f3 100644 --- a/tests/RXMesh_test/test_dense_matrix.cuh +++ b/tests/RXMesh_test/test_dense_matrix.cuh @@ -82,18 +82,16 @@ TEST(RXMeshStatic, DenseMatrixDot) RXMeshStatic rx(STRINGIFY(INPUT_DIR) "sphere3.obj"); DenseMatrix y(rx, 10, 10); - y.fill_random(); + y.fill_random(); DenseMatrix x(rx, 10, 10); - x.fill_random(); + x.fill_random(); cuComplex dot_res = y.dot(x); cuComplex res = make_cuComplex(0.f, 0.f); - - for (uint32_t i = 0; i < y.rows(); ++i) { for (uint32_t j = 0; j < y.cols(); ++j) { // for complex number (rx, ix) and (ry+iy), the result of the @@ -113,5 +111,37 @@ TEST(RXMeshStatic, DenseMatrixDot) y.release(); x.release(); + EXPECT_EQ(cudaDeviceSynchronize(), cudaSuccess); +} + + +TEST(RXMeshStatic, DenseMatrixNorm2) +{ + using namespace rxmesh; + + cuda_query(rxmesh_args.device_id); + + RXMeshStatic rx(STRINGIFY(INPUT_DIR) "sphere3.obj"); + + DenseMatrix x(rx, 10, 10); + x.fill_random(); + + float norm2_res = x.norm2(); + + float res = 0.f; + + for (uint32_t i = 0; i < x.rows(); ++i) { + for (uint32_t j = 0; j < x.cols(); ++j) { + + cuComplex x_val = x(i, j); + + res += x_val.x * x_val.x + x_val.y * x_val.y; + } + } + + EXPECT_NEAR(norm2_res, std::sqrt(res), 0.001); + + x.release(); + EXPECT_EQ(cudaDeviceSynchronize(), cudaSuccess); } \ No newline at end of file From 57c82d67fa60e56af1a45ebf5a0a223a16c2da59 Mon Sep 17 00:00:00 2001 From: ahmed Date: Sun, 21 Jul 2024 18:29:27 -0400 Subject: [PATCH 42/96] dense matrix scaling --- include/rxmesh/matrix/dense_matrix.cuh | 53 ++++++++++++++++++++++++- tests/RXMesh_test/test_dense_matrix.cuh | 43 ++++++++++++++++++++ 2 files changed, 95 insertions(+), 1 deletion(-) diff --git a/include/rxmesh/matrix/dense_matrix.cuh b/include/rxmesh/matrix/dense_matrix.cuh index 417c8c26..baffd6bb 100644 --- a/include/rxmesh/matrix/dense_matrix.cuh +++ b/include/rxmesh/matrix/dense_matrix.cuh @@ -457,7 +457,7 @@ struct DenseMatrix RXMESH_ERROR( "DenseMatrix::norm2() only float, double, cuComplex, and " "cuDoubleComplex are supported for this function!"); - return T(0); + return BaseTypeT(0); } BaseTypeT result; @@ -485,6 +485,57 @@ struct DenseMatrix } + /** + * @brief multiply all entries in the dense matrix by a scalar (i.e., + * scaling). For complex number, the scalar could be either a complex or + * real number. The results are computed for the data on the device. Only + * float, double, cuComplex, and cuDoubleComplex are supported. + */ + template + __host__ void multiply(U scalar, cudaStream_t stream = NULL) + { + CUBLAS_ERROR(cublasSetStream(m_cublas_handle, stream)); + if constexpr (!std::is_same_v && !std::is_same_v && + !std::is_same_v && + !std::is_same_v) { + RXMESH_ERROR( + "DenseMatrix::multiply() only float, double, cuComplex, and " + "cuDoubleComplex are supported for this function!"); + return T(0); + } + + + if constexpr (std::is_same_v) { + CUBLAS_ERROR(cublasSscal( + m_cublas_handle, rows() * cols(), &scalar, m_d_val, 1)); + } + + if constexpr (std::is_same_v) { + CUBLAS_ERROR(cublasDscal( + m_cublas_handle, rows() * cols(), &scalar, m_d_val, 1)); + } + + if constexpr (std::is_same_v) { + if constexpr (std::is_same_v) { + CUBLAS_ERROR(cublasCscal( + m_cublas_handle, rows() * cols(), &scalar, m_d_val, 1)); + } else { + CUBLAS_ERROR(cublasCsscal( + m_cublas_handle, rows() * cols(), &scalar, m_d_val, 1)); + } + } + + if constexpr (std::is_same_v) { + if constexpr (std::is_same_v) { + CUBLAS_ERROR(cublasZscal( + m_cublas_handle, rows() * cols(), &scalar, m_d_val, 1)); + } else { + CUBLAS_ERROR(cublasZdscal( + m_cublas_handle, rows() * cols(), &scalar, m_d_val, 1)); + } + } + } + /** * @brief return the row index corresponding to specific vertex/edge/face * handle diff --git a/tests/RXMesh_test/test_dense_matrix.cuh b/tests/RXMesh_test/test_dense_matrix.cuh index 6ec270f3..c50b105e 100644 --- a/tests/RXMesh_test/test_dense_matrix.cuh +++ b/tests/RXMesh_test/test_dense_matrix.cuh @@ -143,5 +143,48 @@ TEST(RXMeshStatic, DenseMatrixNorm2) x.release(); + EXPECT_EQ(cudaDeviceSynchronize(), cudaSuccess); +} + + +TEST(RXMeshStatic, DenseMatrixMulitply) +{ + using namespace rxmesh; + + cuda_query(rxmesh_args.device_id); + + RXMeshStatic rx(STRINGIFY(INPUT_DIR) "sphere3.obj"); + + DenseMatrix x(rx, 10, 10); + DenseMatrix copy(rx, 10, 10); + + x.fill_random(); + + copy.copy_from(x, HOST, HOST); + + float scalar = 5.0f; + + x.multiply(scalar); + + x.move(DEVICE, HOST); + + for (uint32_t i = 0; i < x.rows(); ++i) { + for (uint32_t j = 0; j < x.cols(); ++j) { + + cuComplex x_val = x(i, j); + + cuComplex res = copy(i, j); + res.x *= scalar; + res.y *= scalar; + + EXPECT_NEAR(res.x, x_val.x, 0.001); + EXPECT_NEAR(res.y, x_val.y, 0.001); + } + } + + + x.release(); + copy.release(); + EXPECT_EQ(cudaDeviceSynchronize(), cudaSuccess); } \ No newline at end of file From 9e39a2e9ff3fecd925ea80fa8e3dcd2baf555f2b Mon Sep 17 00:00:00 2001 From: ahmed Date: Sun, 21 Jul 2024 19:15:42 -0400 Subject: [PATCH 43/96] dense matrix swap --- include/rxmesh/matrix/dense_matrix.cuh | 62 +++++++++++++++++++++++++ tests/RXMesh_test/test_dense_matrix.cuh | 39 +++++++++++++++- 2 files changed, 100 insertions(+), 1 deletion(-) diff --git a/include/rxmesh/matrix/dense_matrix.cuh b/include/rxmesh/matrix/dense_matrix.cuh index baffd6bb..5c65d9a0 100644 --- a/include/rxmesh/matrix/dense_matrix.cuh +++ b/include/rxmesh/matrix/dense_matrix.cuh @@ -312,6 +312,18 @@ struct DenseMatrix return; } + + if (rows() != X.rows() || cols() != X.cols()) { + RXMESH_ERROR( + "DenseMatrix::axpy() The input matrices size does not match. " + "This matrix size is {},{} while X size is {},{}", + rows(), + cols(), + X.rows(), + X.cols()); + return; + } + if constexpr (std::is_same_v) { CUBLAS_ERROR(cublasSaxpy(m_cublas_handle, rows() * cols(), @@ -536,6 +548,56 @@ struct DenseMatrix } } + /** + * @brief Swap the content of this dense matrix with another dense matrix. + * The results are computed for the data on the device. Only float, double, + * cuComplex, and cuDoubleComplex are supported. + */ + __host__ void swap(DenseMatrix& X, cudaStream_t stream = NULL) + { + CUBLAS_ERROR(cublasSetStream(m_cublas_handle, stream)); + if constexpr (!std::is_same_v && !std::is_same_v && + !std::is_same_v && + !std::is_same_v) { + RXMESH_ERROR( + "DenseMatrix::swap() only float, double, cuComplex, and " + "cuDoubleComplex are supported for this function!"); + return; + } + + if (rows() != X.rows() || cols() != X.cols()) { + RXMESH_ERROR( + "DenseMatrix::swap() The input matrices size does not match. " + "This matrix size is {},{} while X size is {},{}", + rows(), + cols(), + X.rows(), + X.cols()); + return; + } + + + if constexpr (std::is_same_v) { + CUBLAS_ERROR(cublasSswap( + m_cublas_handle, rows() * cols(), m_d_val, 1, X.m_d_val, 1)); + } + + if constexpr (std::is_same_v) { + CUBLAS_ERROR(cublasDswap( + m_cublas_handle, rows() * cols(), m_d_val, 1, X.m_d_val, 1)); + } + + if constexpr (std::is_same_v) { + CUBLAS_ERROR(cublasCswap( + m_cublas_handle, rows() * cols(), m_d_val, 1, X.m_d_val, 1)); + } + + if constexpr (std::is_same_v) { + CUBLAS_ERROR(cublasZswap( + m_cublas_handle, rows() * cols(), m_d_val, 1, X.m_d_val, 1)); + } + } + /** * @brief return the row index corresponding to specific vertex/edge/face * handle diff --git a/tests/RXMesh_test/test_dense_matrix.cuh b/tests/RXMesh_test/test_dense_matrix.cuh index c50b105e..5d0bca9b 100644 --- a/tests/RXMesh_test/test_dense_matrix.cuh +++ b/tests/RXMesh_test/test_dense_matrix.cuh @@ -187,4 +187,41 @@ TEST(RXMeshStatic, DenseMatrixMulitply) copy.release(); EXPECT_EQ(cudaDeviceSynchronize(), cudaSuccess); -} \ No newline at end of file +} + +TEST(RXMeshStatic, DenseMatrixSwap) +{ + using namespace rxmesh; + + cuda_query(rxmesh_args.device_id); + + RXMeshStatic rx(STRINGIFY(INPUT_DIR) "sphere3.obj"); + + DenseMatrix x(rx, 10, 10); + DenseMatrix copy(rx, 10, 10); + x.fill_random(); + + copy.copy_from(x, HOST, HOST); + + DenseMatrix y(rx, 10, 10); + y.fill_random(); + + x.swap(y); + + x.move(DEVICE, HOST); + y.move(DEVICE, HOST); + + for (uint32_t i = 0; i < x.rows(); ++i) { + for (uint32_t j = 0; j < x.cols(); ++j) { + + EXPECT_NEAR(y(i, j), copy(i, j), 0.001); + } + } + + + x.release(); + y.release(); + copy.release(); + + EXPECT_EQ(cudaDeviceSynchronize(), cudaSuccess); +} From 29725b6e9534eb523317e151c26015362bf6296c Mon Sep 17 00:00:00 2001 From: ahmed Date: Sun, 21 Jul 2024 19:30:11 -0400 Subject: [PATCH 44/96] minor fixes --- include/rxmesh/matrix/sparse_matrix.cuh | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/include/rxmesh/matrix/sparse_matrix.cuh b/include/rxmesh/matrix/sparse_matrix.cuh index 0d135017..5dfedeb4 100644 --- a/include/rxmesh/matrix/sparse_matrix.cuh +++ b/include/rxmesh/matrix/sparse_matrix.cuh @@ -219,7 +219,7 @@ struct SparseMatrix CUDA_ERROR(cudaMemcpy( m_d_val, m_h_val, m_nnz * sizeof(T), cudaMemcpyHostToDevice)); } - + /** * @brief return number of rows @@ -430,8 +430,8 @@ struct SparseMatrix DenseMatrix& C_mat, cudaStream_t stream = 0) { - T alpha = 1.0; - T beta = 0.0; + T alpha; + T beta; cusparseSpMatDescr_t matA = m_spdescr; cusparseDnMatDescr_t matB = B_mat.m_dendescr; @@ -475,8 +475,8 @@ struct SparseMatrix assert(rows() == C_mat.rows()); assert(B_mat.cols() == C_mat.cols()); - BaseTypeT alpha; - BaseTypeT beta; + T alpha; + T beta; if constexpr (std::is_same_v) { alpha = make_cuComplex(1.f, 1.f); From 0ae5f01db00688cdabc813067d2fee585eae2c8b Mon Sep 17 00:00:00 2001 From: ahmed Date: Sun, 21 Jul 2024 22:58:00 -0400 Subject: [PATCH 45/96] export VTK --- include/rxmesh/rxmesh.cpp | 2 +- include/rxmesh/rxmesh_static.h | 199 ++++++++++++++++++++++++-- tests/RXMesh_test/CMakeLists.txt | 1 + tests/RXMesh_test/rxmesh_test_main.cu | 1 + tests/RXMesh_test/test_export.cuh | 63 ++++++++ 5 files changed, 250 insertions(+), 16 deletions(-) create mode 100644 tests/RXMesh_test/test_export.cuh diff --git a/include/rxmesh/rxmesh.cpp b/include/rxmesh/rxmesh.cpp index f132a6b0..d3add26f 100644 --- a/include/rxmesh/rxmesh.cpp +++ b/include/rxmesh/rxmesh.cpp @@ -22,7 +22,7 @@ RXMesh::RXMesh() m_input_max_edge_incident_faces(0), m_input_max_face_adjacent_faces(0), m_num_patches(0), - m_patch_size(256), + m_patch_size(512), m_is_input_edge_manifold(true), m_is_input_closed(true), m_h_vertex_prefix(nullptr), diff --git a/include/rxmesh/rxmesh_static.h b/include/rxmesh/rxmesh_static.h index afe5433a..361084a9 100644 --- a/include/rxmesh/rxmesh_static.h +++ b/include/rxmesh/rxmesh_static.h @@ -1033,9 +1033,9 @@ class RXMeshStatic : public RXMesh boundary_v.reset(0, LOCATION_ALL); - constexpr uint32_t blockThreads = 256; + constexpr uint32_t blockThreads = 256; - LaunchBox lb; + LaunchBox lb; prepare_launch_box( {Op::EF, Op::EV}, @@ -1253,23 +1253,115 @@ class RXMeshStatic : public RXMesh std::fstream file(fn, std::ios::out); file.precision(30); + std::vector v_list; + create_vertex_list(v_list, coords); - std::vector obj_coords(get_num_vertices()); + assert(get_num_vertices() == v_list.size()); + + for (uint32_t v = 0; v < v_list.size(); ++v) { + file << "v " << v_list[v][0] << " " << v_list[v][1] << " " + << v_list[v][2] << " \n"; + } + + std::vector f_list; + create_face_list(f_list); + + assert(f_list.size() == get_num_faces()); + + for (uint32_t f = 0; f < f_list.size(); ++f) { + file << "f "; + for (uint32_t i = 0; i < 3; ++i) { + file << f_list[f][i] + 1 << " "; + } + file << "\n"; + } + + file.close(); + } + + /** + * @brief export the mesh to a VTK file which can be visualized using + * Paraview. The VTK supports visualizing attributes on vertices and faces. + * Edge attributes are NOT supported. This function uses parameter pack such + * that the user can call it with zero, one or move attributes (again should + * be either VertexAttribute or FaceAttribute). + */ + template + void export_vtk(const std::string& filename, + const VertexAttribute& coords, + AttributesT... attributes) const + { + std::string fn = filename; + std::fstream file(fn, std::ios::out); + file.precision(30); + + file << "# vtk DataFile Version 3.0\n"; + file << extract_file_name(filename) << "\n"; + file << "ASCII\n"; + file << "DATASET POLYDATA\n"; + file << "POINTS " << get_num_vertices() << " float\n "; + + std::vector v_list; + create_vertex_list(v_list, coords); + + assert(get_num_vertices() == v_list.size()); + + for (uint32_t v = 0; v < v_list.size(); ++v) { + file << v_list[v][0] << " " << v_list[v][1] << " " << v_list[v][2] + << " \n"; + } + + std::vector f_list; + create_face_list(f_list); + + assert(f_list.size() == get_num_faces()); + + file << "POLYGONS 3 " << 4 * f_list.size() << "\n"; + + for (uint32_t f = 0; f < f_list.size(); ++f) { + file << "3 "; + for (uint32_t i = 0; i < 3; ++i) { + file << f_list[f][i] << " "; + } + file << "\n"; + } + bool first_v_attr = true; + bool first_f_attr = true; + + + ([&] { export_vtk(file, first_v_attr, first_f_attr, attributes); }(), + ...); + + file.close(); + } + + /** + * @brief convert given vertex attributes representing the coordinates into + * std vector + */ + template + void create_vertex_list(std::vector& v_list, + const VertexAttribute& coords) const + { + v_list.resize(get_num_vertices()); for_each_vertex( HOST, [&](const VertexHandle vh) { - uint32_t vid = linear_id(vh); - obj_coords[vid][0] = coords(vh, 0); - obj_coords[vid][1] = coords(vh, 1); - obj_coords[vid][2] = coords(vh, 2); + uint32_t vid = linear_id(vh); + v_list[vid][0] = coords(vh, 0); + v_list[vid][1] = coords(vh, 1); + v_list[vid][2] = coords(vh, 2); }, NULL, false); + } - for (uint32_t v = 0; v < obj_coords.size(); ++v) { - file << "v " << obj_coords[v][0] << " " << obj_coords[v][1] << " " - << obj_coords[v][2] << " \n"; - } + /** + * @brief convert the mesh connectivity to face list + */ + void create_face_list(std::vector& f_list) const + { + f_list.reserve(get_num_faces()); for (uint32_t p = 0; p < this->m_num_patches; ++p) { const uint32_t p_num_faces = this->m_h_patches_info[p].num_faces[0]; @@ -1278,7 +1370,9 @@ class RXMeshStatic : public RXMesh f, this->m_h_patches_info[p].active_mask_f) && detail::is_owned(f, this->m_h_patches_info[p].owned_mask_f)) { - file << "f "; + + glm::uvec3 face; + for (uint32_t e = 0; e < 3; ++e) { uint16_t edge = this->m_h_patches_info[p].fe[3 * f + e].id; @@ -1288,16 +1382,91 @@ class RXMeshStatic : public RXMesh uint16_t v = this->m_h_patches_info[p].ev[e_id].id; VertexHandle vh(p, v); uint32_t vid = linear_id(vh); - file << vid + 1 << " "; + face[e] = vid; } - file << std::endl; + f_list.push_back(face); } } } } - protected: + template + void export_vtk(std::fstream& file, + bool& first_v_attr, + bool& first_f_attr, + const AttributeT& attribute) const + { + using HandleT = typename AttributeT::HandleType; + + if constexpr (std::is_same_v) { + if (first_f_attr) { + file << "CELL_DATA " << get_num_faces() << "\n"; + first_f_attr = false; + } + uint32_t num_attr = attribute.get_num_attributes(); + if (num_attr == 1) { + file << "SCALARS " << attribute.get_name() << " float 1\n"; + file << "LOOKUP_TABLE default\n"; + } else if (num_attr == 2) { + file << "COLOR_SCALARS " << attribute.get_name() << " 2\n"; + } else if (num_attr == 3) { + file << "VECTORS " << attribute.get_name() << " float \n"; + } else { + RXMESH_ERROR( + "RXMeshStatic::export_vtk() The number of attributes ({}) " + "is not support. Only 1, 2, or 3 attributes are supported", + num_attr); + return; + } + + for_each_face( + HOST, + [&](const FaceHandle& fh) { + for (int i = 0; i < attribute.get_num_attributes(); ++i) { + file << attribute(fh, i) << " "; + } + file << "\n"; + }, + NULL, + false); + } + + + if constexpr (std::is_same_v) { + if (first_v_attr) { + file << "POINT_DATA " << get_num_vertices() << "\n"; + first_v_attr = false; + } + uint32_t num_attr = attribute.get_num_attributes(); + if (num_attr == 1) { + file << "SCALARS " << attribute.get_name() << " float 1\n"; + file << "LOOKUP_TABLE default\n"; + } else if (num_attr == 2) { + file << "COLOR_SCALARS " << attribute.get_name() << " 2\n"; + } else if (num_attr == 3) { + file << "VECTORS " << attribute.get_name() << " float \n"; + } else { + RXMESH_ERROR( + "RXMeshStatic::export_vtk() The number of attributes ({}) " + "is not support. Only 1, 2, or 3 attributes are supported", + num_attr); + return; + } + + for_each_vertex( + HOST, + [&](const VertexHandle& vh) { + for (int i = 0; i < attribute.get_num_attributes(); ++i) { + file << attribute(vh, i) << " "; + } + file << "\n"; + }, + NULL, + false); + } + } + template size_t calc_shared_memory(const Op op, const bool oriented) const { diff --git a/tests/RXMesh_test/CMakeLists.txt b/tests/RXMesh_test/CMakeLists.txt index 713ab312..cad48095 100644 --- a/tests/RXMesh_test/CMakeLists.txt +++ b/tests/RXMesh_test/CMakeLists.txt @@ -25,6 +25,7 @@ set( SOURCE_LIST test_eigen.cuh test_boundary.cuh test_dense_matrix.cuh + test_export.cuh ) target_sources( RXMesh_test diff --git a/tests/RXMesh_test/rxmesh_test_main.cu b/tests/RXMesh_test/rxmesh_test_main.cu index 27218bfb..f12ec56f 100644 --- a/tests/RXMesh_test/rxmesh_test_main.cu +++ b/tests/RXMesh_test/rxmesh_test_main.cu @@ -34,6 +34,7 @@ struct RXMeshTestArg #include "test_eigen.cuh" #include "test_boundary.cuh" #include "test_dense_matrix.cuh" +#include "test_export.cuh" // clang-format on int main(int argc, char** argv) diff --git a/tests/RXMesh_test/test_export.cuh b/tests/RXMesh_test/test_export.cuh new file mode 100644 index 00000000..80de621d --- /dev/null +++ b/tests/RXMesh_test/test_export.cuh @@ -0,0 +1,63 @@ +#include "gtest/gtest.h" + +#include "rxmesh/rxmesh_static.h" + + +TEST(RXMeshStatic, Export) +{ + using namespace rxmesh; + + CUDA_ERROR(cudaDeviceReset()); + + cuda_query(rxmesh_args.device_id); + + RXMeshStatic rx(STRINGIFY(INPUT_DIR) "sphere3.obj"); + + auto v_attr_scalar = *rx.add_vertex_attribute("vScalar", 1); + auto v_attr_vec2 = *rx.add_vertex_attribute("vVector2", 2); + auto v_attr_vec3 = *rx.add_vertex_attribute("vVector3", 3); + + auto f_attr_scalar = *rx.add_face_attribute("fScalar", 1); + auto f_attr_vec2 = *rx.add_face_attribute("fVector2", 2); + auto f_attr_vec3 = *rx.add_face_attribute("fVector3", 3); + + rx.for_each_vertex(HOST, [&](const VertexHandle& vh) { + v_attr_scalar(vh, 0) = rand() % 100; + + for (int i = 0; i < v_attr_vec2.get_num_attributes(); ++i) { + v_attr_vec2(vh, i) = rand() % 100; + } + + for (int i = 0; i < v_attr_vec3.get_num_attributes(); ++i) { + v_attr_vec3(vh, i) = rand() % 100; + } + }); + + + rx.for_each_face(HOST, [&](const FaceHandle& fh) { + f_attr_scalar(fh, 0) = rand() % 100; + + for (int i = 0; i < f_attr_vec2.get_num_attributes(); ++i) { + f_attr_vec2(fh, i) = rand() % 100; + } + + for (int i = 0; i < f_attr_vec3.get_num_attributes(); ++i) { + f_attr_vec3(fh, i) = rand() % 100; + } + }); + + + rx.export_obj("sphere3.obj", *rx.get_input_vertex_coordinates()); + + rx.export_vtk("sphere3.vtk", + *rx.get_input_vertex_coordinates(), + v_attr_scalar, + v_attr_vec2, + v_attr_vec3, + f_attr_scalar, + f_attr_vec2, + f_attr_vec3); + + + ASSERT_EQ(cudaDeviceSynchronize(), cudaSuccess); +} \ No newline at end of file From ba82b0de8a31c2416dca776bf77c4cce40759a54 Mon Sep 17 00:00:00 2001 From: ahmed Date: Mon, 22 Jul 2024 09:29:39 -0400 Subject: [PATCH 46/96] Update Readme [skip ci] --- README.md | 128 +++++++++++++++++++++++++++++++++++------------------- 1 file changed, 84 insertions(+), 44 deletions(-) diff --git a/README.md b/README.md index bf1d8831..b5bd000b 100644 --- a/README.md +++ b/README.md @@ -13,6 +13,7 @@ * [**Structures**](#structures) * [**Computation**](#computation) * [**Viewer**](#viewer) + * [**Matrices and Vectors**](#matrices-and-vectors) - [**Replicability**](#replicability) - [**Bibtex**](#bibtex) @@ -25,16 +26,23 @@ RXMesh is a surface triangle mesh data structure and programming model for proce - *[RXMesh: A High-performance Mesh Data Structure and Programming Model on the GPU [S41051]](https://www.nvidia.com/gtc/session-catalog/?tab.scheduledorondemand=1583520458947001NJiE&search=rxmesh#/session/1633891051385001Q9SE)—NVIDIA GTC 2022* -This repository provides 1) source code to reproduce the results presented in the paper (git tag [`v0.1.0`](https://github.com/owensgroup/RXMesh/tree/v0.1.0)) and 2) ongoing development of RXMesh. For 1), all input models used in the paper can be found [here](https://ucdavis365-my.sharepoint.com/:f:/g/personal/ahmahmoud_ucdavis_edu/En-vEpIdSGBHqvCIa-MVXRQBg5g7GfM3P3RwZBHL4Hby3w?e=2EVnJd). Models were collected from [Thingi10K](https://ten-thousand-models.appspot.com/) and [Smithsonian 3D](https://3d.si.edu/explore) repository. +The library also features a sparse and dense matrix infrastructure that is tightly coupled with the mesh data structure. We expose various [cuSolver](https://docs.nvidia.com/cuda/cusolver/index.html), [cuSparse](https://docs.nvidia.com/cuda/cusparse/), and [cuBlas](https://docs.nvidia.com/cuda/cublas/) operations through the sparse and dense matrices, tailored for geometry processing applications. + +This repository provides 1) source code to reproduce the results presented in the paper (git tag [`v0.1.0`](https://github.com/owensgroup/RXMesh/tree/v0.1.0)) and 2) ongoing development of RXMesh. ## **Compilation** -The code can be compiled on Ubuntu (GCC 9) and Windows (Visual Studio 2019) providing that CUDA (>=11.1.0) is installed. To run the executable(s), an NVIDIA GPU should be installed on the machine. +The code can be compiled on Ubuntu, Windows, and WSL providing that CUDA (>=11.1.0) is installed. To run the executable(s), an NVIDIA GPU should be installed on the machine. ### **Dependencies** - [OpenMesh](https://www.graphics.rwth-aachen.de:9000/OpenMesh/OpenMesh) to verify the applications against reference CPU implementation - [RapidJson](https://github.com/Tencent/rapidjson) to report the results in JSON file(s) - [GoogleTest](https://github.com/google/googletest) for unit tests - [spdlog](https://github.com/gabime/spdlog) for logging +- [glm](https://github.com/g-truc/glm.git) for small vectors and matrices operations +- [Eigen](https://gitlab.com/libeigen/eigen) for small vectors and matrices operations +- [Polyscope ](https://github.com/nmwsharp/polyscope) for visualization +- [cereal](https://github.com/USCiLab/cereal.git) for serialization + All the dependencies are installed automatically! To compile the code: @@ -109,7 +117,7 @@ The goal of defining a programming model is to make it easy to write applicatio vertex_color(vh, 2) = 0.6; ``` -- **Iterators** are used during query operations to iterate over the output of the query operation. The type of iterator defines the type of mesh element iterated on e.g., `VertexIterator` iterates over vertices which is the output of `VV`, `EV`, or `FV` query operations. Since query operations are only supported on the device, iterators can be only used inside the kernel. Iterators are usually populated internally. +- **Iterators** are used during query operations to iterate over the output of the query operation. The type of iterator defines the type of mesh element iterated on e.g., `VertexIterator` iterates over vertices which is the output of `VV`, `EV`, or `FV` query operations. Since query operations are only supported on the device, iterators can be only used inside the GPU kernel. Iterators are usually populated internally. - Example: Iterating over faces ```c++ @@ -140,7 +148,7 @@ The goal of defining a programming model is to make it easy to write applicatio vertex_color(vh, 2) = 0.9; }); ``` - Alternatively, `for_each` operations could be written the same way as Queries operations (see below) using `for_each_dispatcher()`. This might be useful if the user would like to combine a `for_each` with queries operations in the same kernel. For more examples, checkout [`ForEach`](/tests/RXMesh_test/test_for_each.cuh) unit test. + Alternatively, `for_each` operations could be written the same way as Queries operations (see below). This might be useful if the user would like to combine a `for_each` with queries operations in the same kernel. For more examples, checkout [`ForEach`](/tests/RXMesh_test/test_for_each.cuh) unit test. - **Queries** operations supported by RXMesh with description are listed below @@ -161,16 +169,16 @@ The goal of defining a programming model is to make it easy to write applicatio ```cpp template __global__ void vertex_normal (Context context){ - auto compute_vn = [&](FaceHandle face_id, VertexIterator& fv) { + auto compute_vn = [&](const FaceHandle face_id, const VertexIterator& fv) { //This thread is assigned to face_id // get the face's three vertices coordinates - Vector<3, T> c0(coords(fv[0], 0), coords(fv[0], 1), coords(fv[0], 2)); - Vector<3, T> c1(coords(fv[1], 0), coords(fv[1], 1), coords(fv[1], 2)); - Vector<3, T> c2(coords(fv[2], 0), coords(fv[2], 1), coords(fv[2], 2)); + vec3 c0(coords(fv[0], 0), coords(fv[0], 1), coords(fv[0], 2)); + vec3 c1(coords(fv[1], 0), coords(fv[1], 1), coords(fv[1], 2)); + vec3 c2(coords(fv[2], 0), coords(fv[2], 1), coords(fv[2], 2)); //compute face normal - Vector<3, T> n = cross(c1 - c0, c2 - c0); + vec3 n = cross(c1 - c0, c2 - c0); // add the face's normal to its vertices for (uint32_t v = 0; v < 3; ++v) // for every vertex in this face @@ -178,13 +186,22 @@ The goal of defining a programming model is to make it easy to write applicatio atomicAdd(&normals(fv[v], i), n[i]); }; - //Query dispatcher must be called by all threads in the block. - //Dispatcher will first perform the query, store the results in shared memory, then - //run the user-defined computation i.e., compute_vn - query_block_dispatcher(context, compute_vn); + //Query must be called by all threads in the block. Thus, we create this cooperative_group + //that uses all threads in the block and pass to the Query + auto block = cooperative_groups::this_thread_block(); + + Query query(context); + + //Qeury will first perform the query, store the results in shared memory. ShmemAllocator is + //passed to the function to make sure we don't over-allocate or overwrite user-allocated shared + //memory + ShmemAllocator shrd_alloc; + + //Finally, we run the user-defined computation i.e., compute_vn + query.dispatch(block, shrd_alloc, compute_vn); } ``` - To save computation, `query_block_dispatcher` could be run on a subset of the input mesh element i.e., _active set_. The user can define the active set using a lambda function that returns true if the input mesh element is in the active set. + To save computation, `query.dispatch` could be run on a subset of the input mesh element i.e., _active set_. The user can define the active set using a lambda function that returns true if the input mesh element is in the active set. - Example: defining active set ```cpp @@ -194,11 +211,11 @@ The goal of defining a programming model is to make it easy to write applicatio // .... }; - auto computation = [&](FaceHandle face_id, VertexIterator& fv) { + auto computation = [&](const FaceHandle face_id, const VertexIterator& fv) { // .... }; - query_block_dispatcher(context, computation, active_set); + query.dispatch(context, computation, active_set); } ``` @@ -242,13 +259,10 @@ Starting v0.2.1, RXMesh integrates [Polyscope](https://polyscope.run) as a mesh > cd build > cmake -DUSE_POLYSCOPE=True ../ ``` -By default, the parameter is set to True on Windows and False on Linux machines. RXMesh implements the necessary functionalities to pass attributes to Polyscope—thanks to its [data adaptors](https://polyscope.run/data_adaptors/). However, this needs attributes to be moved to the host first before passing it to Polyscope. For more information about Polyscope's different visualization options, please checkout Polyscope's [Surface Mesh documentation](https://polyscope.run/structures/surface_mesh/basics/). +By default, the parameter is set to True. RXMesh implements the necessary functionalities to pass attributes to Polyscope—thanks to its [data adaptors](https://polyscope.run/data_adaptors/). However, this needs attributes to be moved to the host first before passing it to Polyscope. For more information about Polyscope's different visualization options, please checkout Polyscope's [Surface Mesh documentation](https://polyscope.run/structures/surface_mesh/basics/). - Example: [render vertex color](./tests/Polyscope_test/test_polyscope.cu) ```cpp - //initialize polyscope - polyscope::init(); - RXMeshStatic rx("dragon.obj"); //vertex color attribute @@ -273,36 +287,62 @@ By default, the parameter is set to True on Windows and False on Linux machines.

+### **Matrices and Vectors** +- **Large Matrices:** RXMesh has built-in support for large sparse and dense matrices built on top of [cuSparse](https://docs.nvidia.com/cuda/cusparse/) and [cuBlas](https://docs.nvidia.com/cuda/cublas/), respectively. For example, attributes can be converted to dense matrices as follows -## **Replicability** -This repo was awarded the [replicability stamp](http://www.replicabilitystamp.org#https-github-com-owensgroup-rxmesh) by the Graphics Replicability Stamp Initiative (GRSI) :tada: +```cpp + +RXMeshStatic rx("input.obj"); + +//Input mesh coordinates as VertexAttribute +std::shared_ptr> x = rx.get_input_vertex_coordinates(); + +//Convert the attributes to a (#vertices x 3) dense matrix +std::shared_ptr> x_mat = x->to_matrix(); + +//do something with x_mat +//.... -The scripts used to generate the data shown in the paper can be found under -* [Figure 6](https://github.com/owensgroup/RXMesh/blob/main/tests/RXMesh_test/benchmark.sh) -* [Figure 8 (a)](https://github.com/owensgroup/RXMesh/blob/main/apps/MCF/benchmark.sh) -* [Figure 8 (b)](https://github.com/owensgroup/RXMesh/blob/main/apps/Geodesic/benchmark.sh) -* [Figure 8 (c)](https://github.com/owensgroup/RXMesh/blob/main/apps/Filtering/benchmark.sh) -* [Figure 8 (d)](https://github.com/owensgroup/RXMesh/blob/main/apps/VertexNormal/benchmark.sh) +//Populate the VertexAttribute coordinates back with the content of the dense matrix +x->from_matrix(x_mat.get()); -Each script should be run from the script's containing directory after compiling the code in the `build/` directory. The only input parameter needed is the path to the input OBJ files. The resulting JSON files will be written to the `output/` directory. +``` +Dense matrices can be accessed using the usual row and column indices or via the mesh element handle (Vertex/Edge/FaceHandle) as a row index. This allows for easy access to the correct row associated with a specific vertex, edge, or face. Dense matrices support various operations such as absolute sum, AXPY, dot products, norm2, scaling, and swapping. + +RXMesh supports sparse matrices, where the sparsity pattern matches the query operations. For example, it is often necessary to build a sparse matrix of size #V x #V with non-zero values at (i, j) only if the vertex corresponding to row i is connected by an edge to the vertex corresponding to column j. Currently, we only support the VV sparsity pattern, but we are working on expanding to all other types of queries. + +The sparse matrix can be used to solve a linear system via Cholesky, LU, or QR factorization (relying on [cuSolver](https://docs.nvidia.com/cuda/cusolver/index.html))). The solver offers two APIs. The high-level API reorders the input sparse matrix (to reduce non-zero fill-in after matrix factorization) and allocates the additional memory needed to solve the system. Repeated calls to this API will reorder the matrix and allocate/deallocate the temporary memory with each call. For scenarios where the matrix remains unchanged but multiple right-hand sides need to be solved, users can utilize the low-level API, which splits the solve method into pre_solve() and solve(). The former reorders the matrix and allocates temporary memory only once. The low-level API is currently only supported for Cholesky-based factorization. Check out the MCF application for an example of how to set up and use the solver. + +Similar to dense matrices, sparse matrices also support accessing the matrix using the VertexHandle and multiplication by dense matrices. + +- **Small Matrices:** +It is often necessary to perform operations on small matrices as part of geometry processing applications, such as computing the SVD of a 3x3 matrix or normalizing a 1x3 vector. For this purpose, RXMesh attributes can be converted into glm or Eigen matrices, as demonstrated in the vertex_normal example above. Both glm and Eigen support small matrix operations inside the GPU kernel. + + + +## **Replicability** +This repo was awarded the [replicability stamp](http://www.replicabilitystamp.org#https-github-com-owensgroup-rxmesh) by the Graphics Replicability Stamp Initiative (GRSI) :tada:. Visit git tag [`v0.1.0`](https://github.com/owensgroup/RXMesh/tree/v0.1.0) for more information about replicability scripts. ## **Bibtex** ``` @article{Mahmoud:2021:RAG, - author = {Mahmoud, Ahmed H. and Porumbescu, Serban D. and Owens, John D.}, - title = {{RXM}esh: A {GPU} Mesh Data Structure}, - journal = {ACM Transactions on Graphics}, - year = 2021, - volume = 40, - number = 4, - month = aug, - issue_date = {August 2021}, - articleno = 104, - numpages = 16, - pages = {104:1--104:16}, - url = {https://escholarship.org/uc/item/8r5848vp}, - full_talk = {https://youtu.be/Se_cNAol4hY}, - short_talk = {https://youtu.be/V_SHMXnCVws}, - doi = {10.1145/3450626.3459748} + author = {Ahmed H. Mahmoud and Serban D. Porumbescu and John D. Owens}, + title = {{RXM}esh: A {GPU} Mesh Data Structure}, + journal = {ACM Transactions on Graphics}, + year = 2021, + volume = 40, + number = 4, + month = aug, + issue_date = {August 2021}, + articleno = 104, + numpages = 16, + pages = {104:1--104:16}, + url = {https://escholarship.org/uc/item/8r5848vp}, + full_talk = {https://youtu.be/Se_cNAol4hY}, + short_talk = {https://youtu.be/V_SHMXnCVws}, + doi = {10.1145/3450626.3459748}, + acmauthorize = {https://dl.acm.org/doi/10.1145/3450626.3459748?cid=81100458295}, + acceptance = {149/444 (33.6\%)}, + ucdcite = {a140} } ``` From 88450d4a370da07d573d82edc4750ecbf9e673d9 Mon Sep 17 00:00:00 2001 From: ahmed Date: Mon, 22 Jul 2024 21:04:41 -0400 Subject: [PATCH 47/96] post-merge bug fix --- include/rxmesh/attribute.h | 3 +-- include/rxmesh/patch_info.h | 8 +++++--- include/rxmesh/rxmesh_dynamic.cu | 7 ++++--- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/include/rxmesh/attribute.h b/include/rxmesh/attribute.h index aeb4a851..84b0cffe 100644 --- a/include/rxmesh/attribute.h +++ b/include/rxmesh/attribute.h @@ -386,8 +386,7 @@ class Attribute : public AttributeBase */ void reset(const T value, locationT location, cudaStream_t stream = NULL) { - if (((location & DEVICE) == DEVICE) && is_host_allocated()) { - + if (((location & DEVICE) == DEVICE) && is_device_allocated()) { const int threads = 256; detail::template memset_attribute <<get_num_patches(), threads, 0, stream>>>( diff --git a/include/rxmesh/patch_info.h b/include/rxmesh/patch_info.h index f3304484..047962ee 100644 --- a/include/rxmesh/patch_info.h +++ b/include/rxmesh/patch_info.h @@ -12,7 +12,6 @@ #include "rxmesh/lp_hashtable.cuh" - #ifdef __CUDA_ARCH__ #include "rxmesh/kernels/util.cuh" #endif @@ -145,8 +144,11 @@ struct ALIGN(16) PatchInfo const LPPair* table = nullptr, const LPPair* stash = nullptr) const { - if (table == nullptr && stash == nullptr) { - assert(!is_owned(typename HandleT::LocalT(key))); + // if (table == nullptr && stash == nullptr) { + // assert(!is_owned(typename HandleT::LocalT(key))); + // } + if (is_owned(typename HandleT::LocalT(key))) { + return HandleT(patch_id, key); } LPPair lp = get_lp().find(key, table, stash); diff --git a/include/rxmesh/rxmesh_dynamic.cu b/include/rxmesh/rxmesh_dynamic.cu index f655216c..c16e6fe3 100644 --- a/include/rxmesh/rxmesh_dynamic.cu +++ b/include/rxmesh/rxmesh_dynamic.cu @@ -2036,9 +2036,8 @@ __global__ static void check_ribbon_faces(const Context context, // printf( // "\n T=%u, p = %u, #F=%u, #F_owned= %u, " // "#E=%u, #E_owned= %u, #V=%u, #V_owned= - // %u, " "fvh_global=%u, %u, vh=%u, %u, s_vf - // =%u, " - // "%u", + // %u, " "f= %u, fvh_global=%u, %u, v_id= + // %u, " "vh=%u,%u, s_vf =%u, %u", // threadIdx.x, // patch_id, // patch_info.num_faces[0], @@ -2047,8 +2046,10 @@ __global__ static void check_ribbon_faces(const Context context, // patch_info.get_num_owned(), // patch_info.num_vertices[0], // patch_info.get_num_owned(), + // f, // fvh_global.patch_id(), // fvh_global.local_id(), + // v_id, // vh.patch_id(), // vh.local_id(), // s_vf_offset[v_id], From df4f711a52a2f7b7bf832c792bdf81a970f361db Mon Sep 17 00:00:00 2001 From: ahmed Date: Thu, 25 Jul 2024 00:35:07 -0400 Subject: [PATCH 48/96] Zero-copy conversion from RXMesh Sparse/DenseMatrxi to Eigen Sparse/DenseMatrix --- include/rxmesh/matrix/dense_matrix.cuh | 15 ++++ include/rxmesh/matrix/sparse_matrix.cuh | 16 ++++ tests/RXMesh_test/test_dense_matrix.cuh | 36 ++++++++ tests/RXMesh_test/test_sparse_matrix.cuh | 110 +++++++++++++++++++---- 4 files changed, 160 insertions(+), 17 deletions(-) diff --git a/include/rxmesh/matrix/dense_matrix.cuh b/include/rxmesh/matrix/dense_matrix.cuh index 5c65d9a0..8519ca12 100644 --- a/include/rxmesh/matrix/dense_matrix.cuh +++ b/include/rxmesh/matrix/dense_matrix.cuh @@ -10,6 +10,8 @@ #include "rxmesh/util/meta.h" +#include + namespace rxmesh { /** @@ -26,6 +28,9 @@ struct DenseMatrix template friend class SparseMatrix; + using EigenDenseMatrix = Eigen::Map< + Eigen::Matrix>; + DenseMatrix() : m_allocated(LOCATION_NONE), m_num_rows(0), @@ -818,6 +823,16 @@ struct DenseMatrix } } + /** + * @brief Convert/map the dense matrix to Eigen dense matrix. This is a + * zero-copy conversion so Eigen dense matrix will point to the same memory + * as the host-side of this DenseMatrix + */ + __host__ EigenDenseMatrix to_eigen() + { + return EigenDenseMatrix(m_h_val, rows(), cols()); + } + /** * @brief release the data on host or device */ diff --git a/include/rxmesh/matrix/sparse_matrix.cuh b/include/rxmesh/matrix/sparse_matrix.cuh index 5dfedeb4..bbacf75d 100644 --- a/include/rxmesh/matrix/sparse_matrix.cuh +++ b/include/rxmesh/matrix/sparse_matrix.cuh @@ -18,6 +18,8 @@ #include "rxmesh/launch_box.h" +#include + namespace rxmesh { /** @@ -58,6 +60,9 @@ struct SparseMatrix { using IndexT = int; + using EigenSparseMatrix = + Eigen::Map>; + SparseMatrix(const RXMeshStatic& rx) : m_d_row_ptr(nullptr), m_d_col_idx(nullptr), @@ -631,6 +636,17 @@ struct SparseMatrix } + /** + * @brief Convert/map this sparse matrix to Eigen sparse matrix. This is a + * zero-copy conversion so Eigen sparse matrix will point to the same memory + * as the host-side of this SparseMatrix + */ + __host__ EigenSparseMatrix to_eigen() + { + return EigenSparseMatrix( + rows(), cols(), non_zeros(), m_h_row_ptr, m_h_col_idx, m_h_val); + } + /** * @brief solve the AX=B for X where X and B are all dense matrix and we * would solve it in a column wise manner diff --git a/tests/RXMesh_test/test_dense_matrix.cuh b/tests/RXMesh_test/test_dense_matrix.cuh index 5d0bca9b..8e75ea17 100644 --- a/tests/RXMesh_test/test_dense_matrix.cuh +++ b/tests/RXMesh_test/test_dense_matrix.cuh @@ -4,6 +4,42 @@ #include "rxmesh/matrix/dense_matrix.cuh" +TEST(RXMeshStatic, DenseMatrixToEigen) +{ + using namespace rxmesh; + + cuda_query(rxmesh_args.device_id); + + RXMeshStatic rx(STRINGIFY(INPUT_DIR) "sphere3.obj"); + + DenseMatrix rx_mat(rx, 10, 10); + DenseMatrix rx_mat_copy(rx, 10, 10); + + rx_mat.fill_random(); + rx_mat_copy.copy_from(rx_mat, HOST, HOST); + + auto eigen_mat = rx_mat.to_eigen(); + + // ensure that the content of Eigen matrix is the same as the RXMesh + // DenseMatrix + for (uint32_t i = 0; i < rx_mat.rows(); ++i) { + for (uint32_t j = 0; j < rx_mat.cols(); ++j) { + EXPECT_NEAR(rx_mat(i, j), eigen_mat(i, j), 0.0000001); + } + } + + // ensure operations done on the Eigen matrix is reflected on RXMesh + // DenseMatrix + const float scalar = 5.f; + eigen_mat *= scalar; + + for (uint32_t i = 0; i < rx_mat.rows(); ++i) { + for (uint32_t j = 0; j < rx_mat.cols(); ++j) { + EXPECT_NEAR(rx_mat_copy(i, j), rx_mat(i, j) / scalar, 0.0000001); + } + } +} + TEST(RXMeshStatic, DenseMatrixASum) { using namespace rxmesh; diff --git a/tests/RXMesh_test/test_sparse_matrix.cuh b/tests/RXMesh_test/test_sparse_matrix.cuh index 6b3a9385..1c37b8bd 100644 --- a/tests/RXMesh_test/test_sparse_matrix.cuh +++ b/tests/RXMesh_test/test_sparse_matrix.cuh @@ -6,6 +6,8 @@ #include "rxmesh/query.cuh" #include "rxmesh/rxmesh_static.h" +#include + template __global__ static void sparse_mat_test(const rxmesh::Context context, IndexT* vet_degree) @@ -123,13 +125,15 @@ __global__ static void simple_A_X_B_setup(const rxmesh::Context context, query.dispatch(block, shrd_alloc, mat_setup); } -/* Check the access of the sparse matrix in CSR format in device */ + TEST(RXMeshStatic, SparseMatrix) { + // Test accessing of the sparse matrix in CSR format in device + using namespace rxmesh; // Select device - cuda_query(0); + cuda_query(rxmesh_args.device_id); // generate rxmesh obj std::string obj_path = STRINGIFY(INPUT_DIR) "dragon.obj"; @@ -191,15 +195,15 @@ TEST(RXMeshStatic, SparseMatrix) spmat.release(); } -/* First replace the sparse matrix entry with the edge length and then do spmv - * with an all one array and check the result - */ TEST(RXMeshStatic, SparseMatrixEdgeLen) { + // First replace the sparse matrix entry with the edge length and then do + // spmv with an all one array and check the result + // using namespace rxmesh; // Select device - cuda_query(0); + cuda_query(rxmesh_args.device_id); // generate rxmesh obj RXMeshStatic rx(rxmesh_args.obj_file_name); @@ -262,16 +266,15 @@ TEST(RXMeshStatic, SparseMatrixEdgeLen) spmat.release(); } -/* set up a simple AX=B system where A is a sparse matrix, B and C are dense - * matrix. Solve it using the warpped up cusolver API and check the final AX - * with B using warpped up cusparse API. - */ TEST(RXMeshStatic, SparseMatrixSimpleSolve) { + // set up a simple AX=B system where A is a sparse matrix, B and C are dense + // matrix. + using namespace rxmesh; // Select device - cuda_query(0); + cuda_query(rxmesh_args.device_id); // generate rxmesh obj std::string obj_path = rxmesh_args.obj_file_name; @@ -301,14 +304,9 @@ TEST(RXMeshStatic, SparseMatrixSimpleSolve) A_mat.solve(B_mat, X_mat, Solver::CHOL, PermuteMethod::NSTDIS); - // timing begins for spmm - GPUTimer timer; - timer.start(); A_mat.multiply(X_mat, ret_mat); - timer.stop(); - RXMESH_TRACE("SPMM_rxmesh() took {} (ms) ", timer.elapsed_millis()); std::vector> h_ret_mat(num_vertices); CUDA_ERROR(cudaMemcpy(h_ret_mat.data(), @@ -326,7 +324,12 @@ TEST(RXMeshStatic, SparseMatrixSimpleSolve) EXPECT_NEAR(h_ret_mat[i][j], h_B_mat[i][j], 1e-3); } } + + A_mat.release(); + X_mat.release(); + B_mat.release(); + ret_mat.release(); } TEST(RXMeshStatic, SparseMatrixLowerLevelAPISolve) @@ -334,7 +337,7 @@ TEST(RXMeshStatic, SparseMatrixLowerLevelAPISolve) using namespace rxmesh; // Select device - cuda_query(0); + cuda_query(rxmesh_args.device_id); // generate rxmesh obj std::string obj_path = rxmesh_args.obj_file_name; @@ -389,5 +392,78 @@ TEST(RXMeshStatic, SparseMatrixLowerLevelAPISolve) EXPECT_NEAR(h_ret_mat[i][j], h_B_mat[i][j], 1e-3); } } + + A_mat.release(); + X_mat.release(); + B_mat.release(); + ret_mat.release(); +} + +TEST(RXMeshStatic, SparseMatrixToEigen) +{ + using namespace rxmesh; + + // Select device + cuda_query(rxmesh_args.device_id); + + // generate rxmesh obj + std::string obj_path = rxmesh_args.obj_file_name; + RXMeshStatic rx(obj_path); + + uint32_t num_vertices = rx.get_num_vertices(); + + const uint32_t threads = 256; + const uint32_t blocks = DIVIDE_UP(num_vertices, threads); + + auto coords = rx.get_input_vertex_coordinates(); + SparseMatrix A_mat(rx); + DenseMatrix X_mat(rx, num_vertices, 3); + DenseMatrix B_mat(rx, num_vertices, 3); + + + float time_step = 1.f; + + LaunchBox launch_box; + rx.prepare_launch_box( + {Op::VV}, launch_box, (void*)simple_A_X_B_setup); + + simple_A_X_B_setup<<>>( + rx.get_context(), *coords, A_mat, X_mat, B_mat, time_step); + + A_mat.solve(B_mat, X_mat, Solver::CHOL, PermuteMethod::NSTDIS); + + + DenseMatrix X_copy(rx, num_vertices, 3); + X_copy.copy_from(X_mat, DEVICE, HOST); + + A_mat.move(DEVICE, HOST); + B_mat.move(DEVICE, HOST); + + auto A_eigen = A_mat.to_eigen(); + auto X_eigen = X_mat.to_eigen(); + auto B_eigen = B_mat.to_eigen(); + + // Note: there is a bug with Eigen if we use the default reordering + // which is Eigen::AMDOrdering + // (https://gitlab.com/libeigen/eigen/-/issues/2839) + Eigen::SimplicialLDLT, + Eigen::UpLoType::Lower, + Eigen::COLAMDOrdering> + eigen_solver; + + eigen_solver.compute(A_eigen); + X_eigen = eigen_solver.solve(B_eigen); + + for (uint32_t i = 0; i < X_copy.rows(); ++i) { + for (uint32_t j = 0; j < X_copy.cols(); ++j) { + EXPECT_NEAR(X_eigen(i, j), X_copy(i, j), 0.0000001); + } + } + A_mat.release(); + X_mat.release(); + B_mat.release(); + X_copy.release(); } \ No newline at end of file From cee864fe0616adeb2aca90d6516a2ab7decdf267 Mon Sep 17 00:00:00 2001 From: Sachin Kishan Date: Fri, 26 Jul 2024 01:20:23 +0530 Subject: [PATCH 49/96] weight calculation --- apps/ARAP/arap.cu | 115 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 115 insertions(+) diff --git a/apps/ARAP/arap.cu b/apps/ARAP/arap.cu index 3f642cf5..13312770 100644 --- a/apps/ARAP/arap.cu +++ b/apps/ARAP/arap.cu @@ -5,6 +5,101 @@ using namespace rxmesh; +template +__device__ __forceinline__ T +edge_cotan_weight(const rxmesh::VertexHandle& p_id, + const rxmesh::VertexHandle& r_id, + const rxmesh::VertexHandle& q_id, + const rxmesh::VertexHandle& s_id, + const rxmesh::VertexAttribute& X) +{ + // Get the edge weight between the two vertices p-r where + // q and s composes the diamond around p-r + + const vec3 p(X(p_id, 0), X(p_id, 1), X(p_id, 2)); + const vec3 r(X(r_id, 0), X(r_id, 1), X(r_id, 2)); + const vec3 q(X(q_id, 0), X(q_id, 1), X(q_id, 2)); + const vec3 s(X(s_id, 0), X(s_id, 1), X(s_id, 2)); + + //cotans[(v1, v2)] =np.dot(e1, e2) / np.linalg.norm(np.cross(e1, e2)) + + T weight = 0; + if (q_id.is_valid()) + weight += dot((p - q), (r - q)) / length(cross(p - q, r - q)); + if (s_id.is_valid()) + weight += dot((p - s), (r - s)) / length(cross(p - s, r - s)); + weight /= 2; + return weight; +} + + + +template +__global__ static void compute_edge_weights(const rxmesh::Context context, + rxmesh::VertexAttribute coords, + rxmesh::SparseMatrix A_mat) +{ + + auto vn_lambda = [&](VertexHandle vertex_id, VertexIterator& vv) + { + VertexHandle q_id = vv.back(); + + for (uint32_t v = 0; v < vv.size(); ++v) + { + VertexHandle r_id = vv[v]; + T e_weight = 0; + VertexHandle s_id = (v == vv.size() - 1) ? vv[0] : vv[v + 1]; + e_weight = edge_cotan_weight(vertex_id, r_id, q_id, s_id, coords); + A_mat(vertex_id, vv[v]) = e_weight; + } + + }; + + auto block = cooperative_groups::this_thread_block(); + Query query(context); + ShmemAllocator shrd_alloc; + query.dispatch(block, shrd_alloc, vn_lambda); +} + +template +__global__ static void compute_edge_weights_evd(const rxmesh::Context context, + rxmesh::VertexAttribute coords, + rxmesh::SparseMatrix A_mat) +{ + + auto vn_lambda = [&](EdgeHandle edge_id, VertexIterator& vv) { + T e_weight = 0; + e_weight = edge_cotan_weight(vv[0], vv[2], vv[1], vv[3], coords); + A_mat(vv[0], vv[2]) = e_weight; + + }; + + auto block = cooperative_groups::this_thread_block(); + Query query(context); + ShmemAllocator shrd_alloc; + query.dispatch(block, shrd_alloc, vn_lambda); +} + +template +__global__ static void edge_weight_values( + const rxmesh::Context context, + rxmesh::EdgeAttribute edge_weights, + rxmesh::SparseMatrix A_mat) +{ + + auto vn_lambda = [&](EdgeHandle edge_id, VertexIterator& vv) { + edge_weights(edge_id, 0) = A_mat(vv[0], vv[1]); + }; + + auto block = cooperative_groups::this_thread_block(); + Query query(context); + ShmemAllocator shrd_alloc; + query.dispatch(block, shrd_alloc, vn_lambda); +} + + + + int main(int argc, char** argv) { @@ -15,6 +110,26 @@ int main(int argc, char** argv) RXMeshStatic rx(STRINGIFY(INPUT_DIR) "sphere3.obj"); + //compute wij + //auto weights = rx.add_edge_attribute("edgeWeights", 1); + auto vertex_pos = *rx.get_input_vertex_coordinates(); + SparseMatrix weights(rx); + + constexpr uint32_t CUDABlockSize = 256; + rxmesh::LaunchBox launch_box; + rx.prepare_launch_box({rxmesh::Op::EVDiamond}, + launch_box, + (void*)compute_edge_weights_evd); + + compute_edge_weights_evd + <<>>( + rx.get_context(), vertex_pos, weights); + + + + #if USE_POLYSCOPE polyscope::show(); #endif From e71278198ae856ca32c4fce1ca50bdd073883d4b Mon Sep 17 00:00:00 2001 From: Sachin Kishan Date: Fri, 26 Jul 2024 19:57:11 +0530 Subject: [PATCH 50/96] visualise contangent weights --- apps/ARAP/arap.cu | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/apps/ARAP/arap.cu b/apps/ARAP/arap.cu index 13312770..c57bfbc0 100644 --- a/apps/ARAP/arap.cu +++ b/apps/ARAP/arap.cu @@ -23,7 +23,7 @@ edge_cotan_weight(const rxmesh::VertexHandle& p_id, //cotans[(v1, v2)] =np.dot(e1, e2) / np.linalg.norm(np.cross(e1, e2)) - T weight = 0; + float weight = 0; if (q_id.is_valid()) weight += dot((p - q), (r - q)) / length(cross(p - q, r - q)); if (s_id.is_valid()) @@ -108,13 +108,15 @@ int main(int argc, char** argv) const uint32_t device_id = 0; cuda_query(device_id); - RXMeshStatic rx(STRINGIFY(INPUT_DIR) "sphere3.obj"); + //RXMeshStatic rx(STRINGIFY(INPUT_DIR) "sphere3.obj"); + RXMeshStatic rx(STRINGIFY(INPUT_DIR) "dragon.obj"); //compute wij - //auto weights = rx.add_edge_attribute("edgeWeights", 1); + auto weights = rx.add_edge_attribute("edgeWeights", 1); auto vertex_pos = *rx.get_input_vertex_coordinates(); - SparseMatrix weights(rx); - + SparseMatrix weight_matrix(rx); + + //obtain cotangent weight matrix constexpr uint32_t CUDABlockSize = 256; rxmesh::LaunchBox launch_box; rx.prepare_launch_box({rxmesh::Op::EVDiamond}, @@ -125,8 +127,23 @@ int main(int argc, char** argv) <<>>( - rx.get_context(), vertex_pos, weights); - + rx.get_context(), vertex_pos, weight_matrix); + + //visualise edge weights + rxmesh::LaunchBox launch_box2; + rx.prepare_launch_box( + {rxmesh::Op::EV}, + launch_box2, + (void*)edge_weight_values); + + edge_weight_values + <<>>(rx.get_context(), *weights, weight_matrix ); + + weights->move(DEVICE, HOST); + rx.get_polyscope_mesh()->addEdgeScalarQuantity("edgeWeights", *weights); + // From cff965c6265606d7e746b62c77df5650f343e276 Mon Sep 17 00:00:00 2001 From: Sachin Kishan Date: Fri, 26 Jul 2024 23:15:19 +0530 Subject: [PATCH 51/96] successfully compiling rotation matrix calculation function --- apps/ARAP/arap.cu | 92 ++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 87 insertions(+), 5 deletions(-) diff --git a/apps/ARAP/arap.cu b/apps/ARAP/arap.cu index c57bfbc0..e563c16b 100644 --- a/apps/ARAP/arap.cu +++ b/apps/ARAP/arap.cu @@ -3,6 +3,8 @@ #include "rxmesh/matrix/sparse_matrix.cuh" +#include "eigen/Dense" + using namespace rxmesh; template @@ -87,8 +89,8 @@ __global__ static void edge_weight_values( rxmesh::SparseMatrix A_mat) { - auto vn_lambda = [&](EdgeHandle edge_id, VertexIterator& vv) { - edge_weights(edge_id, 0) = A_mat(vv[0], vv[1]); + auto vn_lambda = [&](EdgeHandle edge_id, VertexIterator& ev) { + edge_weights(edge_id, 0) = A_mat(ev[0], ev[1]); }; auto block = cooperative_groups::this_thread_block(); @@ -97,6 +99,73 @@ __global__ static void edge_weight_values( query.dispatch(block, shrd_alloc, vn_lambda); } +template +__global__ static void calculate_rotation_matrix(const rxmesh::Context context, + rxmesh::VertexAttribute ref_coords, + rxmesh::VertexAttribute current_coords, + rxmesh::VertexAttribute rotationVector, + rxmesh::SparseMatrix weight_mat) +{ + + auto vn_lambda = [&](VertexHandle v_id, VertexIterator& vv) { + + + // pi + + Eigen::MatrixXf pi = Eigen::MatrixXf::Identity(3, vv.size()); + for (int j = 0; j < vv.size(); j++) + { + pi(0, j) = ref_coords(v_id, 0) - ref_coords(vv[j], 0); + pi(1, j) = ref_coords(v_id, 1) - ref_coords(vv[j], 1); + pi(2, j) = ref_coords(v_id, 2) - ref_coords(vv[j], 2); + } + + // Di + //Eigen::Sparse eigen_weight_mat = weight_mat; + Eigen::VectorXf weight_vector; + weight_vector.resize(vv.size()); + + for (int v = 0; v < vv.size();v++) + { + weight_vector(v) = weight_mat(v_id, vv[v]); + } + Eigen::MatrixXf diagonal_mat = weight_vector.asDiagonal(); + + // pi'T + Eigen::MatrixXf pi_dash = Eigen::MatrixXf::Identity(3, vv.size()); + for (int j = 0; j < vv.size(); j++) { + pi_dash(0, j) = current_coords(v_id, 0) - current_coords(vv[j], 0); + pi_dash(1, j) = current_coords(v_id, 1) - current_coords(vv[j], 1); + pi_dash(2, j) = current_coords(v_id, 2) - current_coords(vv[j], 2); + } + + // calculate covariance matrix S = piDiPiTdash + + Eigen::MatrixXf S = pi * diagonal_mat * pi_dash.transpose(); + + // perform svd on S (eigen) + + + // R =VU + Eigen::MatrixXf R = S.jacobiSvd().matrixU() * S.jacobiSvd().matrixV(); + + // Matrix R to vector attribute R + for (int i=0;i<3;i++) { + for (int j = 0; j < 3; j++) + rotationVector(v_id, i * 3 + j) = R(i, j); + } + + + }; + + auto block = cooperative_groups::this_thread_block(); + Query query(context); + ShmemAllocator shrd_alloc; + query.dispatch(block, shrd_alloc, vn_lambda); +} + + + @@ -113,7 +182,11 @@ int main(int argc, char** argv) //compute wij auto weights = rx.add_edge_attribute("edgeWeights", 1); - auto vertex_pos = *rx.get_input_vertex_coordinates(); + + + auto ref_vertex_pos = *rx.get_input_vertex_coordinates(); // stays same across computation + auto changed_vertex_pos = rx.add_vertex_attribute("P", 3); // changes per iteration + SparseMatrix weight_matrix(rx); //obtain cotangent weight matrix @@ -127,7 +200,7 @@ int main(int argc, char** argv) <<>>( - rx.get_context(), vertex_pos, weight_matrix); + rx.get_context(), ref_vertex_pos, weight_matrix); //visualise edge weights rxmesh::LaunchBox launch_box2; @@ -142,9 +215,18 @@ int main(int argc, char** argv) launch_box2.smem_bytes_dyn>>>(rx.get_context(), *weights, weight_matrix ); weights->move(DEVICE, HOST); - rx.get_polyscope_mesh()->addEdgeScalarQuantity("edgeWeights", *weights); + + + //pi and p'i + + //rx.get_polyscope_mesh()->addEdgeScalarQuantity("edgeWeights", *weights); // + //calculate rotation matrix + auto rot_mat = rx.add_vertex_attribute("RotationMatrix", 9); + + + #if USE_POLYSCOPE From 16a38d8bc5163b54c8e83ef3a1f640611e3d2fdf Mon Sep 17 00:00:00 2001 From: Sachin Kishan Date: Fri, 26 Jul 2024 23:44:27 +0530 Subject: [PATCH 52/96] add U sign adjustment step --- apps/ARAP/arap.cu | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/apps/ARAP/arap.cu b/apps/ARAP/arap.cu index e563c16b..6ef41b98 100644 --- a/apps/ARAP/arap.cu +++ b/apps/ARAP/arap.cu @@ -108,8 +108,6 @@ __global__ static void calculate_rotation_matrix(const rxmesh::Context contex { auto vn_lambda = [&](VertexHandle v_id, VertexIterator& vv) { - - // pi Eigen::MatrixXf pi = Eigen::MatrixXf::Identity(3, vv.size()); @@ -121,7 +119,6 @@ __global__ static void calculate_rotation_matrix(const rxmesh::Context contex } // Di - //Eigen::Sparse eigen_weight_mat = weight_mat; Eigen::VectorXf weight_vector; weight_vector.resize(vv.size()); @@ -147,15 +144,20 @@ __global__ static void calculate_rotation_matrix(const rxmesh::Context contex // R =VU - Eigen::MatrixXf R = S.jacobiSvd().matrixU() * S.jacobiSvd().matrixV(); + Eigen::MatrixXf V = S.jacobiSvd().matrixV(); + Eigen::MatrixXf U = S.jacobiSvd().matrixU().eval(); + + float smallest_singular_value = + S.jacobiSvd().singularValues().minCoeff(); + U.col(smallest_singular_value)= U.col(smallest_singular_value) * -1; + + Eigen::MatrixXf R = V * U; // Matrix R to vector attribute R for (int i=0;i<3;i++) { for (int j = 0; j < 3; j++) rotationVector(v_id, i * 3 + j) = R(i, j); } - - }; auto block = cooperative_groups::this_thread_block(); From ebf7515f45903bc85c7c3a683afb95172027c110 Mon Sep 17 00:00:00 2001 From: Sachin Kishan Date: Sat, 27 Jul 2024 00:47:26 +0530 Subject: [PATCH 53/96] error svd --- apps/ARAP/arap.cu | 55 +++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 51 insertions(+), 4 deletions(-) diff --git a/apps/ARAP/arap.cu b/apps/ARAP/arap.cu index 6ef41b98..97cb8783 100644 --- a/apps/ARAP/arap.cu +++ b/apps/ARAP/arap.cu @@ -3,7 +3,8 @@ #include "rxmesh/matrix/sparse_matrix.cuh" -#include "eigen/Dense" +#include "Eigen/Dense" + using namespace rxmesh; @@ -99,6 +100,27 @@ __global__ static void edge_weight_values( query.dispatch(block, shrd_alloc, vn_lambda); } + + +//////// + +__host__ __device__ Eigen::Matrix3f calculateSVD(Eigen::Matrix3f S) +{ + Eigen::JacobiSVD svd(S); + + + Eigen::MatrixXf V = svd.matrixV(); + Eigen::MatrixXf U = svd.matrixU().eval(); + + float smallest_singular_value = svd.singularValues().minCoeff(); + + U.col(smallest_singular_value) = U.col(smallest_singular_value) * -1; + + Eigen::MatrixXf R = V * U; + + return R; +} + template __global__ static void calculate_rotation_matrix(const rxmesh::Context context, rxmesh::VertexAttribute ref_coords, @@ -138,12 +160,18 @@ __global__ static void calculate_rotation_matrix(const rxmesh::Context contex // calculate covariance matrix S = piDiPiTdash - Eigen::MatrixXf S = pi * diagonal_mat * pi_dash.transpose(); + Eigen::Matrix3f S = pi * diagonal_mat * pi_dash.transpose(); // perform svd on S (eigen) // R =VU + + + Eigen::JacobiSVD svd(S); + + + /* Eigen::MatrixXf V = S.jacobiSvd().matrixV(); Eigen::MatrixXf U = S.jacobiSvd().matrixU().eval(); @@ -158,6 +186,7 @@ __global__ static void calculate_rotation_matrix(const rxmesh::Context contex for (int j = 0; j < 3; j++) rotationVector(v_id, i * 3 + j) = R(i, j); } + */ }; auto block = cooperative_groups::this_thread_block(); @@ -203,7 +232,7 @@ int main(int argc, char** argv) launch_box.num_threads, launch_box.smem_bytes_dyn>>>( rx.get_context(), ref_vertex_pos, weight_matrix); - + //visualise edge weights rxmesh::LaunchBox launch_box2; rx.prepare_launch_box( @@ -225,7 +254,25 @@ int main(int argc, char** argv) // //calculate rotation matrix - auto rot_mat = rx.add_vertex_attribute("RotationMatrix", 9); + auto rot_mat = *rx.add_vertex_attribute("RotationMatrix", 9); + + rxmesh::LaunchBox rotation_launch_box; + + + rx.prepare_launch_box({rxmesh::Op::VV}, + rotation_launch_box, + (void*)calculate_rotation_matrix); + /* + calculate_rotation_matrix + <<>>(rx.get_context(), + ref_vertex_pos, + *changed_vertex_pos, + rot_mat, + weight_matrix); + */ + From 6cb34b4994c926439e0833d8f52932cf1fc3b1f5 Mon Sep 17 00:00:00 2001 From: ahmed Date: Fri, 26 Jul 2024 22:41:28 -0400 Subject: [PATCH 54/96] svd --- include/rxmesh/util/svd3_cuda.h | 1137 +++++++++++++++++++++++++ tests/RXMesh_test/CMakeLists.txt | 1 + tests/RXMesh_test/rxmesh_test_main.cu | 4 + tests/RXMesh_test/test_svd.cuh | 96 +++ 4 files changed, 1238 insertions(+) create mode 100644 include/rxmesh/util/svd3_cuda.h create mode 100644 tests/RXMesh_test/test_svd.cuh diff --git a/include/rxmesh/util/svd3_cuda.h b/include/rxmesh/util/svd3_cuda.h new file mode 100644 index 00000000..a9123c8e --- /dev/null +++ b/include/rxmesh/util/svd3_cuda.h @@ -0,0 +1,1137 @@ +/************************************************************************** +** +** svd3 +** +** Quick singular value decomposition as described by: +** A. McAdams, A. Selle, R. Tamstorf, J. Teran and E. Sifakis, +** Computing the Singular Value Decomposition of 3x3 matrices +** with minimal branching and elementary floating point operations, +** University of Wisconsin - Madison technical report TR1690, May 2011 +** +** Identical GPU version +** Implementated by: Kui Wu +** kwu@cs.utah.edu +** +** May 2018 +** +**************************************************************************/ + +#pragma once + +#include +#include "math.h" // CUDA math library + +#include + +namespace rxmesh { + +#define gone 1065353216 +#define gsine_pi_over_eight 1053028117 +#define gcosine_pi_over_eight 1064076127 +#define gone_half 0.5f +#define gsmall_number 1.e-12f +#define gtiny_number 1.e-20f +#define gfour_gamma_squared 5.8284273147583007813f + +__device__ __forceinline__ void svd( + // input A + float a11, + float a12, + float a13, + float a21, + float a22, + float a23, + float a31, + float a32, + float a33, + // output U + float& u11, + float& u12, + float& u13, + float& u21, + float& u22, + float& u23, + float& u31, + float& u32, + float& u33, + // output S + float& s11, + // float &s12, float &s13, float &s21, + float& s22, + // float &s23, float &s31, float &s32, + float& s33, + // output V + float& v11, + float& v12, + float& v13, + float& v21, + float& v22, + float& v23, + float& v31, + float& v32, + float& v33) +{ + union un + { + float f; + unsigned int ui; + }; + + un Sa11, Sa21, Sa31, Sa12, Sa22, Sa32, Sa13, Sa23, Sa33; + un Su11, Su21, Su31, Su12, Su22, Su32, Su13, Su23, Su33; + un Sv11, Sv21, Sv31, Sv12, Sv22, Sv32, Sv13, Sv23, Sv33; + un Sc, Ss, Sch, Ssh; + un Stmp1, Stmp2, Stmp3, Stmp4, Stmp5; + un Ss11, Ss21, Ss31, Ss22, Ss32, Ss33; + un Sqvs, Sqvvx, Sqvvy, Sqvvz; + + Sa11.f = a11; + Sa12.f = a12; + Sa13.f = a13; + Sa21.f = a21; + Sa22.f = a22; + Sa23.f = a23; + Sa31.f = a31; + Sa32.f = a32; + Sa33.f = a33; + + // ########################################################### + // Compute normal equations matrix + // ########################################################### + + Ss11.f = Sa11.f * Sa11.f; + Stmp1.f = Sa21.f * Sa21.f; + Ss11.f = __fadd_rn(Stmp1.f, Ss11.f); + Stmp1.f = Sa31.f * Sa31.f; + Ss11.f = __fadd_rn(Stmp1.f, Ss11.f); + + Ss21.f = Sa12.f * Sa11.f; + Stmp1.f = Sa22.f * Sa21.f; + Ss21.f = __fadd_rn(Stmp1.f, Ss21.f); + Stmp1.f = Sa32.f * Sa31.f; + Ss21.f = __fadd_rn(Stmp1.f, Ss21.f); + + Ss31.f = Sa13.f * Sa11.f; + Stmp1.f = Sa23.f * Sa21.f; + Ss31.f = __fadd_rn(Stmp1.f, Ss31.f); + Stmp1.f = Sa33.f * Sa31.f; + Ss31.f = __fadd_rn(Stmp1.f, Ss31.f); + + Ss22.f = Sa12.f * Sa12.f; + Stmp1.f = Sa22.f * Sa22.f; + Ss22.f = __fadd_rn(Stmp1.f, Ss22.f); + Stmp1.f = Sa32.f * Sa32.f; + Ss22.f = __fadd_rn(Stmp1.f, Ss22.f); + + Ss32.f = Sa13.f * Sa12.f; + Stmp1.f = Sa23.f * Sa22.f; + Ss32.f = __fadd_rn(Stmp1.f, Ss32.f); + Stmp1.f = Sa33.f * Sa32.f; + Ss32.f = __fadd_rn(Stmp1.f, Ss32.f); + + Ss33.f = Sa13.f * Sa13.f; + Stmp1.f = Sa23.f * Sa23.f; + Ss33.f = __fadd_rn(Stmp1.f, Ss33.f); + Stmp1.f = Sa33.f * Sa33.f; + Ss33.f = __fadd_rn(Stmp1.f, Ss33.f); + + Sqvs.f = 1.f; + Sqvvx.f = 0.f; + Sqvvy.f = 0.f; + Sqvvz.f = 0.f; + + // ########################################################### + // Solve symmetric eigenproblem using Jacobi iteration + // ########################################################### + for (int i = 0; i < 4; i++) { + Ssh.f = Ss21.f * 0.5f; + Stmp5.f = __fsub_rn(Ss11.f, Ss22.f); + + Stmp2.f = Ssh.f * Ssh.f; + Stmp1.ui = (Stmp2.f >= gtiny_number) ? 0xffffffff : 0; + Ssh.ui = Stmp1.ui & Ssh.ui; + Sch.ui = Stmp1.ui & Stmp5.ui; + Stmp2.ui = ~Stmp1.ui & gone; + Sch.ui = Sch.ui | Stmp2.ui; + + Stmp1.f = Ssh.f * Ssh.f; + Stmp2.f = Sch.f * Sch.f; + Stmp3.f = __fadd_rn(Stmp1.f, Stmp2.f); + Stmp4.f = __frsqrt_rn(Stmp3.f); + + Ssh.f = Stmp4.f * Ssh.f; + Sch.f = Stmp4.f * Sch.f; + Stmp1.f = gfour_gamma_squared * Stmp1.f; + Stmp1.ui = (Stmp2.f <= Stmp1.f) ? 0xffffffff : 0; + + Stmp2.ui = gsine_pi_over_eight & Stmp1.ui; + Ssh.ui = ~Stmp1.ui & Ssh.ui; + Ssh.ui = Ssh.ui | Stmp2.ui; + Stmp2.ui = gcosine_pi_over_eight & Stmp1.ui; + Sch.ui = ~Stmp1.ui & Sch.ui; + Sch.ui = Sch.ui | Stmp2.ui; + + Stmp1.f = Ssh.f * Ssh.f; + Stmp2.f = Sch.f * Sch.f; + Sc.f = __fsub_rn(Stmp2.f, Stmp1.f); + Ss.f = Sch.f * Ssh.f; + Ss.f = __fadd_rn(Ss.f, Ss.f); + +#ifdef DEBUG_JACOBI_CONJUGATE + printf("GPU s %.20g, c %.20g, sh %.20g, ch %.20g\n", + Ss.f, + Sc.f, + Ssh.f, + Sch.f); +#endif + // ########################################################### + // Perform the actual Givens conjugation + // ########################################################### + + Stmp3.f = __fadd_rn(Stmp1.f, Stmp2.f); + Ss33.f = Ss33.f * Stmp3.f; + Ss31.f = Ss31.f * Stmp3.f; + Ss32.f = Ss32.f * Stmp3.f; + Ss33.f = Ss33.f * Stmp3.f; + + Stmp1.f = Ss.f * Ss31.f; + Stmp2.f = Ss.f * Ss32.f; + Ss31.f = Sc.f * Ss31.f; + Ss32.f = Sc.f * Ss32.f; + Ss31.f = __fadd_rn(Stmp2.f, Ss31.f); + Ss32.f = __fsub_rn(Ss32.f, Stmp1.f); + + Stmp2.f = Ss.f * Ss.f; + Stmp1.f = Ss22.f * Stmp2.f; + Stmp3.f = Ss11.f * Stmp2.f; + Stmp4.f = Sc.f * Sc.f; + Ss11.f = Ss11.f * Stmp4.f; + Ss22.f = Ss22.f * Stmp4.f; + Ss11.f = __fadd_rn(Ss11.f, Stmp1.f); + Ss22.f = __fadd_rn(Ss22.f, Stmp3.f); + Stmp4.f = __fsub_rn(Stmp4.f, Stmp2.f); + Stmp2.f = __fadd_rn(Ss21.f, Ss21.f); + Ss21.f = Ss21.f * Stmp4.f; + Stmp4.f = Sc.f * Ss.f; + Stmp2.f = Stmp2.f * Stmp4.f; + Stmp5.f = Stmp5.f * Stmp4.f; + Ss11.f = __fadd_rn(Ss11.f, Stmp2.f); + Ss21.f = __fsub_rn(Ss21.f, Stmp5.f); + Ss22.f = __fsub_rn(Ss22.f, Stmp2.f); + +#ifdef DEBUG_JACOBI_CONJUGATE + printf("%.20g\n", Ss11.f); + printf("%.20g %.20g\n", Ss21.f, Ss22.f); + printf("%.20g %.20g %.20g\n", Ss31.f, Ss32.f, Ss33.f); +#endif + + // ########################################################### + // Compute the cumulative rotation, in quaternion form + // ########################################################### + + Stmp1.f = Ssh.f * Sqvvx.f; + Stmp2.f = Ssh.f * Sqvvy.f; + Stmp3.f = Ssh.f * Sqvvz.f; + Ssh.f = Ssh.f * Sqvs.f; + + Sqvs.f = Sch.f * Sqvs.f; + Sqvvx.f = Sch.f * Sqvvx.f; + Sqvvy.f = Sch.f * Sqvvy.f; + Sqvvz.f = Sch.f * Sqvvz.f; + + Sqvvz.f = __fadd_rn(Sqvvz.f, Ssh.f); + Sqvs.f = __fsub_rn(Sqvs.f, Stmp3.f); + Sqvvx.f = __fadd_rn(Sqvvx.f, Stmp2.f); + Sqvvy.f = __fsub_rn(Sqvvy.f, Stmp1.f); + +#ifdef DEBUG_JACOBI_CONJUGATE + printf("GPU q %.20g %.20g %.20g %.20g\n", + Sqvvx.f, + Sqvvy.f, + Sqvvz.f, + Sqvs.f); +#endif + + ////////////////////////////////////////////////////////////////////////// + // (1->3) + ////////////////////////////////////////////////////////////////////////// + Ssh.f = Ss32.f * 0.5f; + Stmp5.f = __fsub_rn(Ss22.f, Ss33.f); + + Stmp2.f = Ssh.f * Ssh.f; + Stmp1.ui = (Stmp2.f >= gtiny_number) ? 0xffffffff : 0; + Ssh.ui = Stmp1.ui & Ssh.ui; + Sch.ui = Stmp1.ui & Stmp5.ui; + Stmp2.ui = ~Stmp1.ui & gone; + Sch.ui = Sch.ui | Stmp2.ui; + + Stmp1.f = Ssh.f * Ssh.f; + Stmp2.f = Sch.f * Sch.f; + Stmp3.f = __fadd_rn(Stmp1.f, Stmp2.f); + Stmp4.f = __frsqrt_rn(Stmp3.f); + + Ssh.f = Stmp4.f * Ssh.f; + Sch.f = Stmp4.f * Sch.f; + Stmp1.f = gfour_gamma_squared * Stmp1.f; + Stmp1.ui = (Stmp2.f <= Stmp1.f) ? 0xffffffff : 0; + + Stmp2.ui = gsine_pi_over_eight & Stmp1.ui; + Ssh.ui = ~Stmp1.ui & Ssh.ui; + Ssh.ui = Ssh.ui | Stmp2.ui; + Stmp2.ui = gcosine_pi_over_eight & Stmp1.ui; + Sch.ui = ~Stmp1.ui & Sch.ui; + Sch.ui = Sch.ui | Stmp2.ui; + + Stmp1.f = Ssh.f * Ssh.f; + Stmp2.f = Sch.f * Sch.f; + Sc.f = __fsub_rn(Stmp2.f, Stmp1.f); + Ss.f = Sch.f * Ssh.f; + Ss.f = __fadd_rn(Ss.f, Ss.f); + + // ########################################################### + // Perform the actual Givens conjugation + // ########################################################### + + Stmp3.f = __fadd_rn(Stmp1.f, Stmp2.f); + Ss11.f = Ss11.f * Stmp3.f; + Ss21.f = Ss21.f * Stmp3.f; + Ss31.f = Ss31.f * Stmp3.f; + Ss11.f = Ss11.f * Stmp3.f; + + Stmp1.f = Ss.f * Ss21.f; + Stmp2.f = Ss.f * Ss31.f; + Ss21.f = Sc.f * Ss21.f; + Ss31.f = Sc.f * Ss31.f; + Ss21.f = __fadd_rn(Stmp2.f, Ss21.f); + Ss31.f = __fsub_rn(Ss31.f, Stmp1.f); + + Stmp2.f = Ss.f * Ss.f; + Stmp1.f = Ss33.f * Stmp2.f; + Stmp3.f = Ss22.f * Stmp2.f; + Stmp4.f = Sc.f * Sc.f; + Ss22.f = Ss22.f * Stmp4.f; + Ss33.f = Ss33.f * Stmp4.f; + Ss22.f = __fadd_rn(Ss22.f, Stmp1.f); + Ss33.f = __fadd_rn(Ss33.f, Stmp3.f); + Stmp4.f = __fsub_rn(Stmp4.f, Stmp2.f); + Stmp2.f = __fadd_rn(Ss32.f, Ss32.f); + Ss32.f = Ss32.f * Stmp4.f; + Stmp4.f = Sc.f * Ss.f; + Stmp2.f = Stmp2.f * Stmp4.f; + Stmp5.f = Stmp5.f * Stmp4.f; + Ss22.f = __fadd_rn(Ss22.f, Stmp2.f); + Ss32.f = __fsub_rn(Ss32.f, Stmp5.f); + Ss33.f = __fsub_rn(Ss33.f, Stmp2.f); + + + // ########################################################### + // Compute the cumulative rotation, in quaternion form + // ########################################################### + + Stmp1.f = Ssh.f * Sqvvx.f; + Stmp2.f = Ssh.f * Sqvvy.f; + Stmp3.f = Ssh.f * Sqvvz.f; + Ssh.f = Ssh.f * Sqvs.f; + + Sqvs.f = Sch.f * Sqvs.f; + Sqvvx.f = Sch.f * Sqvvx.f; + Sqvvy.f = Sch.f * Sqvvy.f; + Sqvvz.f = Sch.f * Sqvvz.f; + + Sqvvx.f = __fadd_rn(Sqvvx.f, Ssh.f); + Sqvs.f = __fsub_rn(Sqvs.f, Stmp1.f); + Sqvvy.f = __fadd_rn(Sqvvy.f, Stmp3.f); + Sqvvz.f = __fsub_rn(Sqvvz.f, Stmp2.f); + +#if 1 + ////////////////////////////////////////////////////////////////////////// + // 1 -> 2 + ////////////////////////////////////////////////////////////////////////// + + Ssh.f = Ss31.f * 0.5f; + Stmp5.f = __fsub_rn(Ss33.f, Ss11.f); + + Stmp2.f = Ssh.f * Ssh.f; + Stmp1.ui = (Stmp2.f >= gtiny_number) ? 0xffffffff : 0; + Ssh.ui = Stmp1.ui & Ssh.ui; + Sch.ui = Stmp1.ui & Stmp5.ui; + Stmp2.ui = ~Stmp1.ui & gone; + Sch.ui = Sch.ui | Stmp2.ui; + + Stmp1.f = Ssh.f * Ssh.f; + Stmp2.f = Sch.f * Sch.f; + Stmp3.f = __fadd_rn(Stmp1.f, Stmp2.f); + Stmp4.f = __frsqrt_rn(Stmp3.f); + + Ssh.f = Stmp4.f * Ssh.f; + Sch.f = Stmp4.f * Sch.f; + Stmp1.f = gfour_gamma_squared * Stmp1.f; + Stmp1.ui = (Stmp2.f <= Stmp1.f) ? 0xffffffff : 0; + + Stmp2.ui = gsine_pi_over_eight & Stmp1.ui; + Ssh.ui = ~Stmp1.ui & Ssh.ui; + Ssh.ui = Ssh.ui | Stmp2.ui; + Stmp2.ui = gcosine_pi_over_eight & Stmp1.ui; + Sch.ui = ~Stmp1.ui & Sch.ui; + Sch.ui = Sch.ui | Stmp2.ui; + + Stmp1.f = Ssh.f * Ssh.f; + Stmp2.f = Sch.f * Sch.f; + Sc.f = __fsub_rn(Stmp2.f, Stmp1.f); + Ss.f = Sch.f * Ssh.f; + Ss.f = __fadd_rn(Ss.f, Ss.f); + + + // ########################################################### + // Perform the actual Givens conjugation + // ########################################################### + + Stmp3.f = __fadd_rn(Stmp1.f, Stmp2.f); + Ss22.f = Ss22.f * Stmp3.f; + Ss32.f = Ss32.f * Stmp3.f; + Ss21.f = Ss21.f * Stmp3.f; + Ss22.f = Ss22.f * Stmp3.f; + + Stmp1.f = Ss.f * Ss32.f; + Stmp2.f = Ss.f * Ss21.f; + Ss32.f = Sc.f * Ss32.f; + Ss21.f = Sc.f * Ss21.f; + Ss32.f = __fadd_rn(Stmp2.f, Ss32.f); + Ss21.f = __fsub_rn(Ss21.f, Stmp1.f); + + Stmp2.f = Ss.f * Ss.f; + Stmp1.f = Ss11.f * Stmp2.f; + Stmp3.f = Ss33.f * Stmp2.f; + Stmp4.f = Sc.f * Sc.f; + Ss33.f = Ss33.f * Stmp4.f; + Ss11.f = Ss11.f * Stmp4.f; + Ss33.f = __fadd_rn(Ss33.f, Stmp1.f); + Ss11.f = __fadd_rn(Ss11.f, Stmp3.f); + Stmp4.f = __fsub_rn(Stmp4.f, Stmp2.f); + Stmp2.f = __fadd_rn(Ss31.f, Ss31.f); + Ss31.f = Ss31.f * Stmp4.f; + Stmp4.f = Sc.f * Ss.f; + Stmp2.f = Stmp2.f * Stmp4.f; + Stmp5.f = Stmp5.f * Stmp4.f; + Ss33.f = __fadd_rn(Ss33.f, Stmp2.f); + Ss31.f = __fsub_rn(Ss31.f, Stmp5.f); + Ss11.f = __fsub_rn(Ss11.f, Stmp2.f); + + + // ########################################################### + // Compute the cumulative rotation, in quaternion form + // ########################################################### + + Stmp1.f = Ssh.f * Sqvvx.f; + Stmp2.f = Ssh.f * Sqvvy.f; + Stmp3.f = Ssh.f * Sqvvz.f; + Ssh.f = Ssh.f * Sqvs.f; + + Sqvs.f = Sch.f * Sqvs.f; + Sqvvx.f = Sch.f * Sqvvx.f; + Sqvvy.f = Sch.f * Sqvvy.f; + Sqvvz.f = Sch.f * Sqvvz.f; + + Sqvvy.f = __fadd_rn(Sqvvy.f, Ssh.f); + Sqvs.f = __fsub_rn(Sqvs.f, Stmp2.f); + Sqvvz.f = __fadd_rn(Sqvvz.f, Stmp1.f); + Sqvvx.f = __fsub_rn(Sqvvx.f, Stmp3.f); +#endif + } + + // ########################################################### + // Normalize quaternion for matrix V + // ########################################################### + + Stmp2.f = Sqvs.f * Sqvs.f; + Stmp1.f = Sqvvx.f * Sqvvx.f; + Stmp2.f = __fadd_rn(Stmp1.f, Stmp2.f); + Stmp1.f = Sqvvy.f * Sqvvy.f; + Stmp2.f = __fadd_rn(Stmp1.f, Stmp2.f); + Stmp1.f = Sqvvz.f * Sqvvz.f; + Stmp2.f = __fadd_rn(Stmp1.f, Stmp2.f); + + Stmp1.f = __frsqrt_rn(Stmp2.f); + Stmp4.f = Stmp1.f * 0.5f; + Stmp3.f = Stmp1.f * Stmp4.f; + Stmp3.f = Stmp1.f * Stmp3.f; + Stmp3.f = Stmp2.f * Stmp3.f; + Stmp1.f = __fadd_rn(Stmp1.f, Stmp4.f); + Stmp1.f = __fsub_rn(Stmp1.f, Stmp3.f); + + Sqvs.f = Sqvs.f * Stmp1.f; + Sqvvx.f = Sqvvx.f * Stmp1.f; + Sqvvy.f = Sqvvy.f * Stmp1.f; + Sqvvz.f = Sqvvz.f * Stmp1.f; + + // ########################################################### + // Transform quaternion to matrix V + // ########################################################### + + Stmp1.f = Sqvvx.f * Sqvvx.f; + Stmp2.f = Sqvvy.f * Sqvvy.f; + Stmp3.f = Sqvvz.f * Sqvvz.f; + Sv11.f = Sqvs.f * Sqvs.f; + Sv22.f = __fsub_rn(Sv11.f, Stmp1.f); + Sv33.f = __fsub_rn(Sv22.f, Stmp2.f); + Sv33.f = __fadd_rn(Sv33.f, Stmp3.f); + Sv22.f = __fadd_rn(Sv22.f, Stmp2.f); + Sv22.f = __fsub_rn(Sv22.f, Stmp3.f); + Sv11.f = __fadd_rn(Sv11.f, Stmp1.f); + Sv11.f = __fsub_rn(Sv11.f, Stmp2.f); + Sv11.f = __fsub_rn(Sv11.f, Stmp3.f); + Stmp1.f = __fadd_rn(Sqvvx.f, Sqvvx.f); + Stmp2.f = __fadd_rn(Sqvvy.f, Sqvvy.f); + Stmp3.f = __fadd_rn(Sqvvz.f, Sqvvz.f); + Sv32.f = Sqvs.f * Stmp1.f; + Sv13.f = Sqvs.f * Stmp2.f; + Sv21.f = Sqvs.f * Stmp3.f; + Stmp1.f = Sqvvy.f * Stmp1.f; + Stmp2.f = Sqvvz.f * Stmp2.f; + Stmp3.f = Sqvvx.f * Stmp3.f; + Sv12.f = __fsub_rn(Stmp1.f, Sv21.f); + Sv23.f = __fsub_rn(Stmp2.f, Sv32.f); + Sv31.f = __fsub_rn(Stmp3.f, Sv13.f); + Sv21.f = __fadd_rn(Stmp1.f, Sv21.f); + Sv32.f = __fadd_rn(Stmp2.f, Sv32.f); + Sv13.f = __fadd_rn(Stmp3.f, Sv13.f); + + /// ########################################################### + // Multiply (from the right) with V + // ########################################################### + + Stmp2.f = Sa12.f; + Stmp3.f = Sa13.f; + Sa12.f = Sv12.f * Sa11.f; + Sa13.f = Sv13.f * Sa11.f; + Sa11.f = Sv11.f * Sa11.f; + Stmp1.f = Sv21.f * Stmp2.f; + Sa11.f = __fadd_rn(Sa11.f, Stmp1.f); + Stmp1.f = Sv31.f * Stmp3.f; + Sa11.f = __fadd_rn(Sa11.f, Stmp1.f); + Stmp1.f = Sv22.f * Stmp2.f; + Sa12.f = __fadd_rn(Sa12.f, Stmp1.f); + Stmp1.f = Sv32.f * Stmp3.f; + Sa12.f = __fadd_rn(Sa12.f, Stmp1.f); + Stmp1.f = Sv23.f * Stmp2.f; + Sa13.f = __fadd_rn(Sa13.f, Stmp1.f); + Stmp1.f = Sv33.f * Stmp3.f; + Sa13.f = __fadd_rn(Sa13.f, Stmp1.f); + + Stmp2.f = Sa22.f; + Stmp3.f = Sa23.f; + Sa22.f = Sv12.f * Sa21.f; + Sa23.f = Sv13.f * Sa21.f; + Sa21.f = Sv11.f * Sa21.f; + Stmp1.f = Sv21.f * Stmp2.f; + Sa21.f = __fadd_rn(Sa21.f, Stmp1.f); + Stmp1.f = Sv31.f * Stmp3.f; + Sa21.f = __fadd_rn(Sa21.f, Stmp1.f); + Stmp1.f = Sv22.f * Stmp2.f; + Sa22.f = __fadd_rn(Sa22.f, Stmp1.f); + Stmp1.f = Sv32.f * Stmp3.f; + Sa22.f = __fadd_rn(Sa22.f, Stmp1.f); + Stmp1.f = Sv23.f * Stmp2.f; + Sa23.f = __fadd_rn(Sa23.f, Stmp1.f); + Stmp1.f = Sv33.f * Stmp3.f; + Sa23.f = __fadd_rn(Sa23.f, Stmp1.f); + + Stmp2.f = Sa32.f; + Stmp3.f = Sa33.f; + Sa32.f = Sv12.f * Sa31.f; + Sa33.f = Sv13.f * Sa31.f; + Sa31.f = Sv11.f * Sa31.f; + Stmp1.f = Sv21.f * Stmp2.f; + Sa31.f = __fadd_rn(Sa31.f, Stmp1.f); + Stmp1.f = Sv31.f * Stmp3.f; + Sa31.f = __fadd_rn(Sa31.f, Stmp1.f); + Stmp1.f = Sv22.f * Stmp2.f; + Sa32.f = __fadd_rn(Sa32.f, Stmp1.f); + Stmp1.f = Sv32.f * Stmp3.f; + Sa32.f = __fadd_rn(Sa32.f, Stmp1.f); + Stmp1.f = Sv23.f * Stmp2.f; + Sa33.f = __fadd_rn(Sa33.f, Stmp1.f); + Stmp1.f = Sv33.f * Stmp3.f; + Sa33.f = __fadd_rn(Sa33.f, Stmp1.f); + + // ########################################################### + // Permute columns such that the singular values are sorted + // ########################################################### + + Stmp1.f = Sa11.f * Sa11.f; + Stmp4.f = Sa21.f * Sa21.f; + Stmp1.f = __fadd_rn(Stmp1.f, Stmp4.f); + Stmp4.f = Sa31.f * Sa31.f; + Stmp1.f = __fadd_rn(Stmp1.f, Stmp4.f); + + Stmp2.f = Sa12.f * Sa12.f; + Stmp4.f = Sa22.f * Sa22.f; + Stmp2.f = __fadd_rn(Stmp2.f, Stmp4.f); + Stmp4.f = Sa32.f * Sa32.f; + Stmp2.f = __fadd_rn(Stmp2.f, Stmp4.f); + + Stmp3.f = Sa13.f * Sa13.f; + Stmp4.f = Sa23.f * Sa23.f; + Stmp3.f = __fadd_rn(Stmp3.f, Stmp4.f); + Stmp4.f = Sa33.f * Sa33.f; + Stmp3.f = __fadd_rn(Stmp3.f, Stmp4.f); + + // Swap columns 1-2 if necessary + + Stmp4.ui = (Stmp1.f < Stmp2.f) ? 0xffffffff : 0; + Stmp5.ui = Sa11.ui ^ Sa12.ui; + Stmp5.ui = Stmp5.ui & Stmp4.ui; + Sa11.ui = Sa11.ui ^ Stmp5.ui; + Sa12.ui = Sa12.ui ^ Stmp5.ui; + + Stmp5.ui = Sa21.ui ^ Sa22.ui; + Stmp5.ui = Stmp5.ui & Stmp4.ui; + Sa21.ui = Sa21.ui ^ Stmp5.ui; + Sa22.ui = Sa22.ui ^ Stmp5.ui; + + Stmp5.ui = Sa31.ui ^ Sa32.ui; + Stmp5.ui = Stmp5.ui & Stmp4.ui; + Sa31.ui = Sa31.ui ^ Stmp5.ui; + Sa32.ui = Sa32.ui ^ Stmp5.ui; + + Stmp5.ui = Sv11.ui ^ Sv12.ui; + Stmp5.ui = Stmp5.ui & Stmp4.ui; + Sv11.ui = Sv11.ui ^ Stmp5.ui; + Sv12.ui = Sv12.ui ^ Stmp5.ui; + + Stmp5.ui = Sv21.ui ^ Sv22.ui; + Stmp5.ui = Stmp5.ui & Stmp4.ui; + Sv21.ui = Sv21.ui ^ Stmp5.ui; + Sv22.ui = Sv22.ui ^ Stmp5.ui; + + Stmp5.ui = Sv31.ui ^ Sv32.ui; + Stmp5.ui = Stmp5.ui & Stmp4.ui; + Sv31.ui = Sv31.ui ^ Stmp5.ui; + Sv32.ui = Sv32.ui ^ Stmp5.ui; + + Stmp5.ui = Stmp1.ui ^ Stmp2.ui; + Stmp5.ui = Stmp5.ui & Stmp4.ui; + Stmp1.ui = Stmp1.ui ^ Stmp5.ui; + Stmp2.ui = Stmp2.ui ^ Stmp5.ui; + + // If columns 1-2 have been swapped, negate 2nd column of A and V so that V + // is still a rotation + + Stmp5.f = -2.f; + Stmp5.ui = Stmp5.ui & Stmp4.ui; + Stmp4.f = 1.f; + Stmp4.f = __fadd_rn(Stmp4.f, Stmp5.f); + + Sa12.f = Sa12.f * Stmp4.f; + Sa22.f = Sa22.f * Stmp4.f; + Sa32.f = Sa32.f * Stmp4.f; + + Sv12.f = Sv12.f * Stmp4.f; + Sv22.f = Sv22.f * Stmp4.f; + Sv32.f = Sv32.f * Stmp4.f; + + // Swap columns 1-3 if necessary + + Stmp4.ui = (Stmp1.f < Stmp3.f) ? 0xffffffff : 0; + Stmp5.ui = Sa11.ui ^ Sa13.ui; + Stmp5.ui = Stmp5.ui & Stmp4.ui; + Sa11.ui = Sa11.ui ^ Stmp5.ui; + Sa13.ui = Sa13.ui ^ Stmp5.ui; + + Stmp5.ui = Sa21.ui ^ Sa23.ui; + Stmp5.ui = Stmp5.ui & Stmp4.ui; + Sa21.ui = Sa21.ui ^ Stmp5.ui; + Sa23.ui = Sa23.ui ^ Stmp5.ui; + + Stmp5.ui = Sa31.ui ^ Sa33.ui; + Stmp5.ui = Stmp5.ui & Stmp4.ui; + Sa31.ui = Sa31.ui ^ Stmp5.ui; + Sa33.ui = Sa33.ui ^ Stmp5.ui; + + Stmp5.ui = Sv11.ui ^ Sv13.ui; + Stmp5.ui = Stmp5.ui & Stmp4.ui; + Sv11.ui = Sv11.ui ^ Stmp5.ui; + Sv13.ui = Sv13.ui ^ Stmp5.ui; + + Stmp5.ui = Sv21.ui ^ Sv23.ui; + Stmp5.ui = Stmp5.ui & Stmp4.ui; + Sv21.ui = Sv21.ui ^ Stmp5.ui; + Sv23.ui = Sv23.ui ^ Stmp5.ui; + + Stmp5.ui = Sv31.ui ^ Sv33.ui; + Stmp5.ui = Stmp5.ui & Stmp4.ui; + Sv31.ui = Sv31.ui ^ Stmp5.ui; + Sv33.ui = Sv33.ui ^ Stmp5.ui; + + Stmp5.ui = Stmp1.ui ^ Stmp3.ui; + Stmp5.ui = Stmp5.ui & Stmp4.ui; + Stmp1.ui = Stmp1.ui ^ Stmp5.ui; + Stmp3.ui = Stmp3.ui ^ Stmp5.ui; + + // If columns 1-3 have been swapped, negate 1st column of A and V so that V + // is still a rotation + + Stmp5.f = -2.f; + Stmp5.ui = Stmp5.ui & Stmp4.ui; + Stmp4.f = 1.f; + Stmp4.f = __fadd_rn(Stmp4.f, Stmp5.f); + + Sa11.f = Sa11.f * Stmp4.f; + Sa21.f = Sa21.f * Stmp4.f; + Sa31.f = Sa31.f * Stmp4.f; + + Sv11.f = Sv11.f * Stmp4.f; + Sv21.f = Sv21.f * Stmp4.f; + Sv31.f = Sv31.f * Stmp4.f; + + // Swap columns 2-3 if necessary + + Stmp4.ui = (Stmp2.f < Stmp3.f) ? 0xffffffff : 0; + Stmp5.ui = Sa12.ui ^ Sa13.ui; + Stmp5.ui = Stmp5.ui & Stmp4.ui; + Sa12.ui = Sa12.ui ^ Stmp5.ui; + Sa13.ui = Sa13.ui ^ Stmp5.ui; + + Stmp5.ui = Sa22.ui ^ Sa23.ui; + Stmp5.ui = Stmp5.ui & Stmp4.ui; + Sa22.ui = Sa22.ui ^ Stmp5.ui; + Sa23.ui = Sa23.ui ^ Stmp5.ui; + + Stmp5.ui = Sa32.ui ^ Sa33.ui; + Stmp5.ui = Stmp5.ui & Stmp4.ui; + Sa32.ui = Sa32.ui ^ Stmp5.ui; + Sa33.ui = Sa33.ui ^ Stmp5.ui; + + Stmp5.ui = Sv12.ui ^ Sv13.ui; + Stmp5.ui = Stmp5.ui & Stmp4.ui; + Sv12.ui = Sv12.ui ^ Stmp5.ui; + Sv13.ui = Sv13.ui ^ Stmp5.ui; + + Stmp5.ui = Sv22.ui ^ Sv23.ui; + Stmp5.ui = Stmp5.ui & Stmp4.ui; + Sv22.ui = Sv22.ui ^ Stmp5.ui; + Sv23.ui = Sv23.ui ^ Stmp5.ui; + + Stmp5.ui = Sv32.ui ^ Sv33.ui; + Stmp5.ui = Stmp5.ui & Stmp4.ui; + Sv32.ui = Sv32.ui ^ Stmp5.ui; + Sv33.ui = Sv33.ui ^ Stmp5.ui; + + Stmp5.ui = Stmp2.ui ^ Stmp3.ui; + Stmp5.ui = Stmp5.ui & Stmp4.ui; + Stmp2.ui = Stmp2.ui ^ Stmp5.ui; + Stmp3.ui = Stmp3.ui ^ Stmp5.ui; + + // If columns 2-3 have been swapped, negate 3rd column of A and V so that V + // is still a rotation + + Stmp5.f = -2.f; + Stmp5.ui = Stmp5.ui & Stmp4.ui; + Stmp4.f = 1.f; + Stmp4.f = __fadd_rn(Stmp4.f, Stmp5.f); + + Sa13.f = Sa13.f * Stmp4.f; + Sa23.f = Sa23.f * Stmp4.f; + Sa33.f = Sa33.f * Stmp4.f; + + Sv13.f = Sv13.f * Stmp4.f; + Sv23.f = Sv23.f * Stmp4.f; + Sv33.f = Sv33.f * Stmp4.f; + + // ########################################################### + // Construct QR factorization of A*V (=U*D) using Givens rotations + // ########################################################### + + Su11.f = 1.f; + Su12.f = 0.f; + Su13.f = 0.f; + Su21.f = 0.f; + Su22.f = 1.f; + Su23.f = 0.f; + Su31.f = 0.f; + Su32.f = 0.f; + Su33.f = 1.f; + + Ssh.f = Sa21.f * Sa21.f; + Ssh.ui = (Ssh.f >= gsmall_number) ? 0xffffffff : 0; + Ssh.ui = Ssh.ui & Sa21.ui; + + Stmp5.f = 0.f; + Sch.f = __fsub_rn(Stmp5.f, Sa11.f); + Sch.f = max(Sch.f, Sa11.f); + Sch.f = max(Sch.f, gsmall_number); + Stmp5.ui = (Sa11.f >= Stmp5.f) ? 0xffffffff : 0; + + Stmp1.f = Sch.f * Sch.f; + Stmp2.f = Ssh.f * Ssh.f; + Stmp2.f = __fadd_rn(Stmp1.f, Stmp2.f); + Stmp1.f = __frsqrt_rn(Stmp2.f); + + Stmp4.f = Stmp1.f * 0.5f; + Stmp3.f = Stmp1.f * Stmp4.f; + Stmp3.f = Stmp1.f * Stmp3.f; + Stmp3.f = Stmp2.f * Stmp3.f; + Stmp1.f = __fadd_rn(Stmp1.f, Stmp4.f); + Stmp1.f = __fsub_rn(Stmp1.f, Stmp3.f); + Stmp1.f = Stmp1.f * Stmp2.f; + + Sch.f = __fadd_rn(Sch.f, Stmp1.f); + + Stmp1.ui = ~Stmp5.ui & Ssh.ui; + Stmp2.ui = ~Stmp5.ui & Sch.ui; + Sch.ui = Stmp5.ui & Sch.ui; + Ssh.ui = Stmp5.ui & Ssh.ui; + Sch.ui = Sch.ui | Stmp1.ui; + Ssh.ui = Ssh.ui | Stmp2.ui; + + Stmp1.f = Sch.f * Sch.f; + Stmp2.f = Ssh.f * Ssh.f; + Stmp2.f = __fadd_rn(Stmp1.f, Stmp2.f); + Stmp1.f = __frsqrt_rn(Stmp2.f); + + Stmp4.f = Stmp1.f * 0.5f; + Stmp3.f = Stmp1.f * Stmp4.f; + Stmp3.f = Stmp1.f * Stmp3.f; + Stmp3.f = Stmp2.f * Stmp3.f; + Stmp1.f = __fadd_rn(Stmp1.f, Stmp4.f); + Stmp1.f = __fsub_rn(Stmp1.f, Stmp3.f); + + Sch.f = Sch.f * Stmp1.f; + Ssh.f = Ssh.f * Stmp1.f; + + Sc.f = Sch.f * Sch.f; + Ss.f = Ssh.f * Ssh.f; + Sc.f = __fsub_rn(Sc.f, Ss.f); + Ss.f = Ssh.f * Sch.f; + Ss.f = __fadd_rn(Ss.f, Ss.f); + + // ########################################################### + // Rotate matrix A + // ########################################################### + + Stmp1.f = Ss.f * Sa11.f; + Stmp2.f = Ss.f * Sa21.f; + Sa11.f = Sc.f * Sa11.f; + Sa21.f = Sc.f * Sa21.f; + Sa11.f = __fadd_rn(Sa11.f, Stmp2.f); + Sa21.f = __fsub_rn(Sa21.f, Stmp1.f); + + Stmp1.f = Ss.f * Sa12.f; + Stmp2.f = Ss.f * Sa22.f; + Sa12.f = Sc.f * Sa12.f; + Sa22.f = Sc.f * Sa22.f; + Sa12.f = __fadd_rn(Sa12.f, Stmp2.f); + Sa22.f = __fsub_rn(Sa22.f, Stmp1.f); + + Stmp1.f = Ss.f * Sa13.f; + Stmp2.f = Ss.f * Sa23.f; + Sa13.f = Sc.f * Sa13.f; + Sa23.f = Sc.f * Sa23.f; + Sa13.f = __fadd_rn(Sa13.f, Stmp2.f); + Sa23.f = __fsub_rn(Sa23.f, Stmp1.f); + + // ########################################################### + // Update matrix U + // ########################################################### + + Stmp1.f = Ss.f * Su11.f; + Stmp2.f = Ss.f * Su12.f; + Su11.f = Sc.f * Su11.f; + Su12.f = Sc.f * Su12.f; + Su11.f = __fadd_rn(Su11.f, Stmp2.f); + Su12.f = __fsub_rn(Su12.f, Stmp1.f); + + Stmp1.f = Ss.f * Su21.f; + Stmp2.f = Ss.f * Su22.f; + Su21.f = Sc.f * Su21.f; + Su22.f = Sc.f * Su22.f; + Su21.f = __fadd_rn(Su21.f, Stmp2.f); + Su22.f = __fsub_rn(Su22.f, Stmp1.f); + + Stmp1.f = Ss.f * Su31.f; + Stmp2.f = Ss.f * Su32.f; + Su31.f = Sc.f * Su31.f; + Su32.f = Sc.f * Su32.f; + Su31.f = __fadd_rn(Su31.f, Stmp2.f); + Su32.f = __fsub_rn(Su32.f, Stmp1.f); + + // Second Givens rotation + + Ssh.f = Sa31.f * Sa31.f; + Ssh.ui = (Ssh.f >= gsmall_number) ? 0xffffffff : 0; + Ssh.ui = Ssh.ui & Sa31.ui; + + Stmp5.f = 0.f; + Sch.f = __fsub_rn(Stmp5.f, Sa11.f); + Sch.f = max(Sch.f, Sa11.f); + Sch.f = max(Sch.f, gsmall_number); + Stmp5.ui = (Sa11.f >= Stmp5.f) ? 0xffffffff : 0; + + Stmp1.f = Sch.f * Sch.f; + Stmp2.f = Ssh.f * Ssh.f; + Stmp2.f = __fadd_rn(Stmp1.f, Stmp2.f); + Stmp1.f = __frsqrt_rn(Stmp2.f); + + Stmp4.f = Stmp1.f * 0.5; + Stmp3.f = Stmp1.f * Stmp4.f; + Stmp3.f = Stmp1.f * Stmp3.f; + Stmp3.f = Stmp2.f * Stmp3.f; + Stmp1.f = __fadd_rn(Stmp1.f, Stmp4.f); + Stmp1.f = __fsub_rn(Stmp1.f, Stmp3.f); + Stmp1.f = Stmp1.f * Stmp2.f; + + Sch.f = __fadd_rn(Sch.f, Stmp1.f); + + Stmp1.ui = ~Stmp5.ui & Ssh.ui; + Stmp2.ui = ~Stmp5.ui & Sch.ui; + Sch.ui = Stmp5.ui & Sch.ui; + Ssh.ui = Stmp5.ui & Ssh.ui; + Sch.ui = Sch.ui | Stmp1.ui; + Ssh.ui = Ssh.ui | Stmp2.ui; + + Stmp1.f = Sch.f * Sch.f; + Stmp2.f = Ssh.f * Ssh.f; + Stmp2.f = __fadd_rn(Stmp1.f, Stmp2.f); + Stmp1.f = __frsqrt_rn(Stmp2.f); + + Stmp4.f = Stmp1.f * 0.5f; + Stmp3.f = Stmp1.f * Stmp4.f; + Stmp3.f = Stmp1.f * Stmp3.f; + Stmp3.f = Stmp2.f * Stmp3.f; + Stmp1.f = __fadd_rn(Stmp1.f, Stmp4.f); + Stmp1.f = __fsub_rn(Stmp1.f, Stmp3.f); + + Sch.f = Sch.f * Stmp1.f; + Ssh.f = Ssh.f * Stmp1.f; + + Sc.f = Sch.f * Sch.f; + Ss.f = Ssh.f * Ssh.f; + Sc.f = __fsub_rn(Sc.f, Ss.f); + Ss.f = Ssh.f * Sch.f; + Ss.f = __fadd_rn(Ss.f, Ss.f); + + // ########################################################### + // Rotate matrix A + // ########################################################### + + Stmp1.f = Ss.f * Sa11.f; + Stmp2.f = Ss.f * Sa31.f; + Sa11.f = Sc.f * Sa11.f; + Sa31.f = Sc.f * Sa31.f; + Sa11.f = __fadd_rn(Sa11.f, Stmp2.f); + Sa31.f = __fsub_rn(Sa31.f, Stmp1.f); + + Stmp1.f = Ss.f * Sa12.f; + Stmp2.f = Ss.f * Sa32.f; + Sa12.f = Sc.f * Sa12.f; + Sa32.f = Sc.f * Sa32.f; + Sa12.f = __fadd_rn(Sa12.f, Stmp2.f); + Sa32.f = __fsub_rn(Sa32.f, Stmp1.f); + + Stmp1.f = Ss.f * Sa13.f; + Stmp2.f = Ss.f * Sa33.f; + Sa13.f = Sc.f * Sa13.f; + Sa33.f = Sc.f * Sa33.f; + Sa13.f = __fadd_rn(Sa13.f, Stmp2.f); + Sa33.f = __fsub_rn(Sa33.f, Stmp1.f); + + // ########################################################### + // Update matrix U + // ########################################################### + + Stmp1.f = Ss.f * Su11.f; + Stmp2.f = Ss.f * Su13.f; + Su11.f = Sc.f * Su11.f; + Su13.f = Sc.f * Su13.f; + Su11.f = __fadd_rn(Su11.f, Stmp2.f); + Su13.f = __fsub_rn(Su13.f, Stmp1.f); + + Stmp1.f = Ss.f * Su21.f; + Stmp2.f = Ss.f * Su23.f; + Su21.f = Sc.f * Su21.f; + Su23.f = Sc.f * Su23.f; + Su21.f = __fadd_rn(Su21.f, Stmp2.f); + Su23.f = __fsub_rn(Su23.f, Stmp1.f); + + Stmp1.f = Ss.f * Su31.f; + Stmp2.f = Ss.f * Su33.f; + Su31.f = Sc.f * Su31.f; + Su33.f = Sc.f * Su33.f; + Su31.f = __fadd_rn(Su31.f, Stmp2.f); + Su33.f = __fsub_rn(Su33.f, Stmp1.f); + + // Third Givens Rotation + + Ssh.f = Sa32.f * Sa32.f; + Ssh.ui = (Ssh.f >= gsmall_number) ? 0xffffffff : 0; + Ssh.ui = Ssh.ui & Sa32.ui; + + Stmp5.f = 0.f; + Sch.f = __fsub_rn(Stmp5.f, Sa22.f); + Sch.f = max(Sch.f, Sa22.f); + Sch.f = max(Sch.f, gsmall_number); + Stmp5.ui = (Sa22.f >= Stmp5.f) ? 0xffffffff : 0; + + Stmp1.f = Sch.f * Sch.f; + Stmp2.f = Ssh.f * Ssh.f; + Stmp2.f = __fadd_rn(Stmp1.f, Stmp2.f); + Stmp1.f = __frsqrt_rn(Stmp2.f); + + Stmp4.f = Stmp1.f * 0.5f; + Stmp3.f = Stmp1.f * Stmp4.f; + Stmp3.f = Stmp1.f * Stmp3.f; + Stmp3.f = Stmp2.f * Stmp3.f; + Stmp1.f = __fadd_rn(Stmp1.f, Stmp4.f); + Stmp1.f = __fsub_rn(Stmp1.f, Stmp3.f); + Stmp1.f = Stmp1.f * Stmp2.f; + + Sch.f = __fadd_rn(Sch.f, Stmp1.f); + + Stmp1.ui = ~Stmp5.ui & Ssh.ui; + Stmp2.ui = ~Stmp5.ui & Sch.ui; + Sch.ui = Stmp5.ui & Sch.ui; + Ssh.ui = Stmp5.ui & Ssh.ui; + Sch.ui = Sch.ui | Stmp1.ui; + Ssh.ui = Ssh.ui | Stmp2.ui; + + Stmp1.f = Sch.f * Sch.f; + Stmp2.f = Ssh.f * Ssh.f; + Stmp2.f = __fadd_rn(Stmp1.f, Stmp2.f); + Stmp1.f = __frsqrt_rn(Stmp2.f); + + Stmp4.f = Stmp1.f * 0.5f; + Stmp3.f = Stmp1.f * Stmp4.f; + Stmp3.f = Stmp1.f * Stmp3.f; + Stmp3.f = Stmp2.f * Stmp3.f; + Stmp1.f = __fadd_rn(Stmp1.f, Stmp4.f); + Stmp1.f = __fsub_rn(Stmp1.f, Stmp3.f); + + Sch.f = Sch.f * Stmp1.f; + Ssh.f = Ssh.f * Stmp1.f; + + Sc.f = Sch.f * Sch.f; + Ss.f = Ssh.f * Ssh.f; + Sc.f = __fsub_rn(Sc.f, Ss.f); + Ss.f = Ssh.f * Sch.f; + Ss.f = __fadd_rn(Ss.f, Ss.f); + + // ########################################################### + // Rotate matrix A + // ########################################################### + + Stmp1.f = Ss.f * Sa21.f; + Stmp2.f = Ss.f * Sa31.f; + Sa21.f = Sc.f * Sa21.f; + Sa31.f = Sc.f * Sa31.f; + Sa21.f = __fadd_rn(Sa21.f, Stmp2.f); + Sa31.f = __fsub_rn(Sa31.f, Stmp1.f); + + Stmp1.f = Ss.f * Sa22.f; + Stmp2.f = Ss.f * Sa32.f; + Sa22.f = Sc.f * Sa22.f; + Sa32.f = Sc.f * Sa32.f; + Sa22.f = __fadd_rn(Sa22.f, Stmp2.f); + Sa32.f = __fsub_rn(Sa32.f, Stmp1.f); + + Stmp1.f = Ss.f * Sa23.f; + Stmp2.f = Ss.f * Sa33.f; + Sa23.f = Sc.f * Sa23.f; + Sa33.f = Sc.f * Sa33.f; + Sa23.f = __fadd_rn(Sa23.f, Stmp2.f); + Sa33.f = __fsub_rn(Sa33.f, Stmp1.f); + + // ########################################################### + // Update matrix U + // ########################################################### + + Stmp1.f = Ss.f * Su12.f; + Stmp2.f = Ss.f * Su13.f; + Su12.f = Sc.f * Su12.f; + Su13.f = Sc.f * Su13.f; + Su12.f = __fadd_rn(Su12.f, Stmp2.f); + Su13.f = __fsub_rn(Su13.f, Stmp1.f); + + Stmp1.f = Ss.f * Su22.f; + Stmp2.f = Ss.f * Su23.f; + Su22.f = Sc.f * Su22.f; + Su23.f = Sc.f * Su23.f; + Su22.f = __fadd_rn(Su22.f, Stmp2.f); + Su23.f = __fsub_rn(Su23.f, Stmp1.f); + + Stmp1.f = Ss.f * Su32.f; + Stmp2.f = Ss.f * Su33.f; + Su32.f = Sc.f * Su32.f; + Su33.f = Sc.f * Su33.f; + Su32.f = __fadd_rn(Su32.f, Stmp2.f); + Su33.f = __fsub_rn(Su33.f, Stmp1.f); + + v11 = Sv11.f; + v12 = Sv12.f; + v13 = Sv13.f; + v21 = Sv21.f; + v22 = Sv22.f; + v23 = Sv23.f; + v31 = Sv31.f; + v32 = Sv32.f; + v33 = Sv33.f; + + u11 = Su11.f; + u12 = Su12.f; + u13 = Su13.f; + u21 = Su21.f; + u22 = Su22.f; + u23 = Su23.f; + u31 = Su31.f; + u32 = Su32.f; + u33 = Su33.f; + + s11 = Sa11.f; + // s12 = Sa12.f; s13 = Sa13.f; s21 = Sa21.f; + s22 = Sa22.f; + // s23 = Sa23.f; s31 = Sa31.f; s32 = Sa32.f; + s33 = Sa33.f; +} + +__device__ __forceinline__ void svd(const Eigen::Matrix3f& mat, + Eigen::Matrix3f& U, + Eigen::Vector3f& S, + Eigen::Matrix3f& V) +{ + S.setZero(); + + svd(mat(0, 0), + mat(0, 1), + mat(0, 2), + mat(1, 0), + mat(1, 1), + mat(1, 2), + mat(2, 0), + mat(2, 1), + mat(2, 2), + + U(0, 0), + U(0, 1), + U(0, 2), + U(1, 0), + U(1, 1), + U(1, 2), + U(2, 0), + U(2, 1), + U(2, 2), + + S(0), + S(1), + S(2), + + V(0, 0), + V(0, 1), + V(0, 2), + V(1, 0), + V(1, 1), + V(1, 2), + V(2, 0), + V(2, 1), + V(2, 2)); +} + +} // namespace rxmesh \ No newline at end of file diff --git a/tests/RXMesh_test/CMakeLists.txt b/tests/RXMesh_test/CMakeLists.txt index cad48095..1c85e361 100644 --- a/tests/RXMesh_test/CMakeLists.txt +++ b/tests/RXMesh_test/CMakeLists.txt @@ -26,6 +26,7 @@ set( SOURCE_LIST test_boundary.cuh test_dense_matrix.cuh test_export.cuh + test_svd.cuh ) target_sources( RXMesh_test diff --git a/tests/RXMesh_test/rxmesh_test_main.cu b/tests/RXMesh_test/rxmesh_test_main.cu index f12ec56f..7bbad04b 100644 --- a/tests/RXMesh_test/rxmesh_test_main.cu +++ b/tests/RXMesh_test/rxmesh_test_main.cu @@ -2,6 +2,7 @@ #include "gtest/gtest.h" #include "rxmesh/util/log.h" #include "rxmesh/util/report.h" +#include "rxmesh/util/cuda_query.h" using dataT = float; @@ -35,6 +36,7 @@ struct RXMeshTestArg #include "test_boundary.cuh" #include "test_dense_matrix.cuh" #include "test_export.cuh" +#include "test_svd.cuh" // clang-format on int main(int argc, char** argv) @@ -87,5 +89,7 @@ int main(int argc, char** argv) RXMESH_TRACE("num_run= {}", rxmesh_args.num_run); RXMESH_TRACE("device_id= {}", rxmesh_args.device_id); + cuda_query(rxmesh_args.device_id); + return RUN_ALL_TESTS(); } diff --git a/tests/RXMesh_test/test_svd.cuh b/tests/RXMesh_test/test_svd.cuh new file mode 100644 index 00000000..5f0a1053 --- /dev/null +++ b/tests/RXMesh_test/test_svd.cuh @@ -0,0 +1,96 @@ +#include "gtest/gtest.h" + +#include + +#include "rxmesh/rxmesh_static.h" + +#include "rxmesh/matrix/dense_matrix.cuh" + +#include "rxmesh/util/svd3_cuda.h" + +#include + +template +__global__ static void test_svd(const rxmesh::Context context, + const rxmesh::VertexAttribute in_mat, + rxmesh::VertexAttribute out_mat) +{ + using namespace rxmesh; + + auto compute_svd = [&](VertexHandle& vh) { + // input matrix + Eigen::Matrix3f mat; + for (int i = 0; i < 3; ++i) { + for (int j = 0; j < 3; ++j) { + mat(i, j) = in_mat(vh, i * 3 + j); + } + } + Eigen::Matrix3f U; // left singular vectors + Eigen::Matrix3f V; // right singular vectors + Eigen::Vector3f S; // singular values + + svd(mat, U, S, V); + + + // reconstructed matrix from SVD + Eigen::Matrix3f recon = U * S.asDiagonal() * V.transpose(); + + for (int i = 0; i < 3; ++i) { + for (int j = 0; j < 3; ++j) { + out_mat(vh, i * 3 + j) = recon(i, j); + } + } + }; + + for_each(context, compute_svd); +} + + +TEST(Util, SVD) +{ + using namespace rxmesh; + + std::string obj_path = STRINGIFY(INPUT_DIR) "sphere3.obj"; + + RXMeshStatic rx(obj_path); + + // every vertex is assigned to a 3x3 matrix i.e., 9 attributes + auto in_mat = *rx.add_vertex_attribute("vAttrIn", 9); + + auto out_mat = *rx.add_vertex_attribute("vAttrOut", 9); + + rx.for_each_vertex(HOST, [&](VertexHandle vh) { + for (int i = 0; i < 9; ++i) { + in_mat(vh, i) = (float(std::rand()) / float(RAND_MAX)); + } + }); + + in_mat.move(HOST, DEVICE); + + out_mat.reset(0.f, DEVICE); + + constexpr uint32_t blockThreads = 256; + + LaunchBox launch_box; + + rx.prepare_launch_box({}, launch_box, (void*)test_svd); + + test_svd + <<>>(rx.get_context(), in_mat, out_mat); + + CUDA_ERROR(cudaDeviceSynchronize()); + + out_mat.move(DEVICE, HOST); + + rx.for_each_vertex( + HOST, + [&](VertexHandle vh) { + for (int i = 0; i < 9; ++i) { + EXPECT_LT(std::abs(in_mat(vh, i) - out_mat(vh, i)), 0.01); + } + }, + NULL, + false); +} From 6b5dd6293175548f1f5103fce52812d41678e49f Mon Sep 17 00:00:00 2001 From: ahmed Date: Fri, 26 Jul 2024 22:41:40 -0400 Subject: [PATCH 55/96] minor cleanup --- tests/RXMesh_test/test_attribute.cuh | 10 ---------- tests/RXMesh_test/test_boundary.cuh | 2 -- tests/RXMesh_test/test_dense_matrix.cuh | 14 -------------- tests/RXMesh_test/test_dynamic.cuh | 6 ++---- tests/RXMesh_test/test_eigen.cuh | 4 +--- tests/RXMesh_test/test_ev_diamond.h | 6 +----- tests/RXMesh_test/test_export.cuh | 2 -- tests/RXMesh_test/test_for_each.cuh | 4 ---- tests/RXMesh_test/test_higher_queries.h | 3 --- tests/RXMesh_test/test_multi_queries.cuh | 3 --- tests/RXMesh_test/test_patch_slicing.cuh | 3 +-- tests/RXMesh_test/test_queries.h | 7 ++----- tests/RXMesh_test/test_queries_oriented.h | 6 ------ tests/RXMesh_test/test_sparse_matrix.cuh | 17 ++--------------- tests/RXMesh_test/test_validate.h | 2 -- 15 files changed, 9 insertions(+), 80 deletions(-) diff --git a/tests/RXMesh_test/test_attribute.cuh b/tests/RXMesh_test/test_attribute.cuh index 2b9bfc67..7116294d 100644 --- a/tests/RXMesh_test/test_attribute.cuh +++ b/tests/RXMesh_test/test_attribute.cuh @@ -58,8 +58,6 @@ TEST(Attribute, Norm2) CUDA_ERROR(cudaDeviceReset()); - cuda_query(rxmesh_args.device_id); - RXMeshStatic rx(STRINGIFY(INPUT_DIR) "sphere3.obj"); auto attr = rx.add_vertex_attribute("v", 3, rxmesh::DEVICE); @@ -84,8 +82,6 @@ TEST(Attribute, Dot) { using namespace rxmesh; - cuda_query(rxmesh_args.device_id); - RXMeshStatic rx(STRINGIFY(INPUT_DIR) "sphere3.obj"); auto v1_attr = rx.add_vertex_attribute("v1", 3, rxmesh::DEVICE); @@ -111,8 +107,6 @@ TEST(Attribute, Reduce) CUDA_ERROR(cudaDeviceReset()); - cuda_query(rxmesh_args.device_id); - RXMeshStatic rx(STRINGIFY(INPUT_DIR) "sphere3.obj"); auto attr = rx.add_edge_attribute("e", 3, rxmesh::DEVICE); @@ -147,8 +141,6 @@ TEST(Attribute, CopyFrom) { using namespace rxmesh; - cuda_query(rxmesh_args.device_id); - RXMeshStatic rx(STRINGIFY(INPUT_DIR) "sphere3.obj"); auto f_device = rx.add_face_attribute("d", 3, DEVICE); @@ -169,8 +161,6 @@ TEST(Attribute, AddingAndRemoving) { using namespace rxmesh; - cuda_query(rxmesh_args.device_id); - RXMeshStatic rx(STRINGIFY(INPUT_DIR) "sphere3.obj"); std::string attr_name = "v_attr"; diff --git a/tests/RXMesh_test/test_boundary.cuh b/tests/RXMesh_test/test_boundary.cuh index c69bb89e..6330eb60 100644 --- a/tests/RXMesh_test/test_boundary.cuh +++ b/tests/RXMesh_test/test_boundary.cuh @@ -6,8 +6,6 @@ TEST(RXMeshStatic, BoundaryVertex) { using namespace rxmesh; - cuda_query(rxmesh_args.device_id); - RXMeshStatic rx(STRINGIFY(INPUT_DIR) "bunnyhead.obj"); auto v_bd = *rx.add_vertex_attribute("vBoundary", 1); diff --git a/tests/RXMesh_test/test_dense_matrix.cuh b/tests/RXMesh_test/test_dense_matrix.cuh index 8e75ea17..a7a5333c 100644 --- a/tests/RXMesh_test/test_dense_matrix.cuh +++ b/tests/RXMesh_test/test_dense_matrix.cuh @@ -8,8 +8,6 @@ TEST(RXMeshStatic, DenseMatrixToEigen) { using namespace rxmesh; - cuda_query(rxmesh_args.device_id); - RXMeshStatic rx(STRINGIFY(INPUT_DIR) "sphere3.obj"); DenseMatrix rx_mat(rx, 10, 10); @@ -44,8 +42,6 @@ TEST(RXMeshStatic, DenseMatrixASum) { using namespace rxmesh; - cuda_query(rxmesh_args.device_id); - RXMeshStatic rx(STRINGIFY(INPUT_DIR) "sphere3.obj"); DenseMatrix mat(rx, 10, 10); @@ -74,8 +70,6 @@ TEST(RXMeshStatic, DenseMatrixAXPY) { using namespace rxmesh; - cuda_query(rxmesh_args.device_id); - RXMeshStatic rx(STRINGIFY(INPUT_DIR) "sphere3.obj"); DenseMatrix Y(rx, 10, 10); @@ -113,8 +107,6 @@ TEST(RXMeshStatic, DenseMatrixDot) { using namespace rxmesh; - cuda_query(rxmesh_args.device_id); - RXMeshStatic rx(STRINGIFY(INPUT_DIR) "sphere3.obj"); DenseMatrix y(rx, 10, 10); @@ -155,8 +147,6 @@ TEST(RXMeshStatic, DenseMatrixNorm2) { using namespace rxmesh; - cuda_query(rxmesh_args.device_id); - RXMeshStatic rx(STRINGIFY(INPUT_DIR) "sphere3.obj"); DenseMatrix x(rx, 10, 10); @@ -187,8 +177,6 @@ TEST(RXMeshStatic, DenseMatrixMulitply) { using namespace rxmesh; - cuda_query(rxmesh_args.device_id); - RXMeshStatic rx(STRINGIFY(INPUT_DIR) "sphere3.obj"); DenseMatrix x(rx, 10, 10); @@ -229,8 +217,6 @@ TEST(RXMeshStatic, DenseMatrixSwap) { using namespace rxmesh; - cuda_query(rxmesh_args.device_id); - RXMeshStatic rx(STRINGIFY(INPUT_DIR) "sphere3.obj"); DenseMatrix x(rx, 10, 10); diff --git a/tests/RXMesh_test/test_dynamic.cuh b/tests/RXMesh_test/test_dynamic.cuh index 357d7977..7d67001f 100644 --- a/tests/RXMesh_test/test_dynamic.cuh +++ b/tests/RXMesh_test/test_dynamic.cuh @@ -265,8 +265,7 @@ inline void set_edge_tag(rxmesh::RXMeshDynamic& rx, TEST(RXMeshDynamic, RandomFlips) { using namespace rxmesh; - cuda_query(rxmesh_args.device_id); - + RXMeshDynamic rx(STRINGIFY(INPUT_DIR) "sphere3.obj", STRINGIFY(INPUT_DIR) "sphere3_patches"); @@ -384,8 +383,7 @@ TEST(RXMeshDynamic, RandomFlips) TEST(RXMeshDynamic, RandomCollapse) { using namespace rxmesh; - cuda_query(rxmesh_args.device_id); - + RXMeshDynamic rx(STRINGIFY(INPUT_DIR) "sphere3.obj", STRINGIFY(INPUT_DIR) "sphere3_patches"); diff --git a/tests/RXMesh_test/test_eigen.cuh b/tests/RXMesh_test/test_eigen.cuh index 804b1c6e..055093bb 100644 --- a/tests/RXMesh_test/test_eigen.cuh +++ b/tests/RXMesh_test/test_eigen.cuh @@ -30,8 +30,6 @@ TEST(Attribute, Eigen) { using namespace rxmesh; - cuda_query(0); - std::string obj_path = STRINGIFY(INPUT_DIR) "dragon.obj"; RXMeshStatic rx(obj_path); @@ -59,7 +57,7 @@ TEST(Attribute, Eigen) launch_box.num_threads, launch_box.smem_bytes_dyn>>>(rx.get_context(), in_attr, out_attr); - cudaDeviceSynchronize(); + CUDA_ERROR(cudaDeviceSynchronize()); out_attr.move(DEVICE, HOST); diff --git a/tests/RXMesh_test/test_ev_diamond.h b/tests/RXMesh_test/test_ev_diamond.h index bc61a08f..3808bc46 100644 --- a/tests/RXMesh_test/test_ev_diamond.h +++ b/tests/RXMesh_test/test_ev_diamond.h @@ -9,11 +9,7 @@ TEST(RXMeshStatic, EVDiamond) { using namespace rxmesh; - - // Select device - cuda_query(rxmesh_args.device_id); - - // RXMesh + RXMeshStatic rx(STRINGIFY(INPUT_DIR) "plane_5.obj"); // input/output container diff --git a/tests/RXMesh_test/test_export.cuh b/tests/RXMesh_test/test_export.cuh index 80de621d..e05786d4 100644 --- a/tests/RXMesh_test/test_export.cuh +++ b/tests/RXMesh_test/test_export.cuh @@ -9,8 +9,6 @@ TEST(RXMeshStatic, Export) CUDA_ERROR(cudaDeviceReset()); - cuda_query(rxmesh_args.device_id); - RXMeshStatic rx(STRINGIFY(INPUT_DIR) "sphere3.obj"); auto v_attr_scalar = *rx.add_vertex_attribute("vScalar", 1); diff --git a/tests/RXMesh_test/test_for_each.cuh b/tests/RXMesh_test/test_for_each.cuh index 7128e315..a70efafd 100644 --- a/tests/RXMesh_test/test_for_each.cuh +++ b/tests/RXMesh_test/test_for_each.cuh @@ -10,8 +10,6 @@ TEST(RXMeshStatic, ForEach) { using namespace rxmesh; - cuda_query(rxmesh_args.device_id); - RXMeshStatic rx(STRINGIFY(INPUT_DIR) "cube.obj"); std::atomic_uint32_t num_v = 0; @@ -48,8 +46,6 @@ TEST(RXMeshStatic, ForEachOnDevice) { using namespace rxmesh; - cuda_query(rxmesh_args.device_id); - constexpr uint32_t blockThreads = 256; LaunchBox launch_box; diff --git a/tests/RXMesh_test/test_higher_queries.h b/tests/RXMesh_test/test_higher_queries.h index 034a92c3..6542dd7d 100644 --- a/tests/RXMesh_test/test_higher_queries.h +++ b/tests/RXMesh_test/test_higher_queries.h @@ -9,14 +9,11 @@ TEST(RXMeshStatic, DISABLED_HigherQueries) { using namespace rxmesh; - // Select device - cuda_query(rxmesh_args.device_id); std::vector> Verts; std::vector> Faces; ASSERT_TRUE(import_obj(STRINGIFY(INPUT_DIR) "sphere3.obj", Verts, Faces)); - // RXMesh RXMeshStatic rx(Faces); diff --git a/tests/RXMesh_test/test_multi_queries.cuh b/tests/RXMesh_test/test_multi_queries.cuh index 08a73d41..dd126e80 100644 --- a/tests/RXMesh_test/test_multi_queries.cuh +++ b/tests/RXMesh_test/test_multi_queries.cuh @@ -108,9 +108,6 @@ TEST(RXMeshStatic, MultiQueries) using namespace rxmesh; constexpr uint32_t blockThreads = 320; - // Select device - cuda_query(rxmesh_args.device_id); - RXMeshStatic rx(rxmesh_args.obj_file_name); const auto coords = rx.get_input_vertex_coordinates(); diff --git a/tests/RXMesh_test/test_patch_slicing.cuh b/tests/RXMesh_test/test_patch_slicing.cuh index c16f921c..32e2bbf0 100644 --- a/tests/RXMesh_test/test_patch_slicing.cuh +++ b/tests/RXMesh_test/test_patch_slicing.cuh @@ -16,8 +16,7 @@ __global__ static void set_patch_should_slice(rxmesh::Context context, int p) TEST(RXMeshDynamic, PatchSlicing) { using namespace rxmesh; - cuda_query(rxmesh_args.device_id); - + RXMeshDynamic rx(rxmesh_args.obj_file_name, STRINGIFY(OUTPUT_DIR) + extract_file_name(rxmesh_args.obj_file_name) + diff --git a/tests/RXMesh_test/test_queries.h b/tests/RXMesh_test/test_queries.h index 422c2d8b..a48f4e57 100644 --- a/tests/RXMesh_test/test_queries.h +++ b/tests/RXMesh_test/test_queries.h @@ -96,16 +96,13 @@ TEST(RXMeshStatic, Queries) using namespace rxmesh; bool oriented = false; - - // Select device - cuda_query(rxmesh_args.device_id); - + std::vector> Verts; std::vector> Faces; ASSERT_TRUE(import_obj(rxmesh_args.obj_file_name, Verts, Faces)); - // RXMesh + RXMeshStatic rx(Faces); diff --git a/tests/RXMesh_test/test_queries_oriented.h b/tests/RXMesh_test/test_queries_oriented.h index 1ade6d07..4a51f542 100644 --- a/tests/RXMesh_test/test_queries_oriented.h +++ b/tests/RXMesh_test/test_queries_oriented.h @@ -15,9 +15,6 @@ TEST(RXMeshStatic, Oriented_VV_Open) { using namespace rxmesh; - // Select device - cuda_query(rxmesh_args.device_id); - std::vector> Verts; std::vector> Faces; @@ -135,9 +132,6 @@ TEST(RXMeshStatic, Oriented_VV_Closed) { using namespace rxmesh; - // Select device - cuda_query(rxmesh_args.device_id); - std::vector> Verts; std::vector> Faces; diff --git a/tests/RXMesh_test/test_sparse_matrix.cuh b/tests/RXMesh_test/test_sparse_matrix.cuh index 1c37b8bd..eb79652e 100644 --- a/tests/RXMesh_test/test_sparse_matrix.cuh +++ b/tests/RXMesh_test/test_sparse_matrix.cuh @@ -132,9 +132,6 @@ TEST(RXMeshStatic, SparseMatrix) using namespace rxmesh; - // Select device - cuda_query(rxmesh_args.device_id); - // generate rxmesh obj std::string obj_path = STRINGIFY(INPUT_DIR) "dragon.obj"; RXMeshStatic rx(obj_path); @@ -202,8 +199,6 @@ TEST(RXMeshStatic, SparseMatrixEdgeLen) // using namespace rxmesh; - // Select device - cuda_query(rxmesh_args.device_id); // generate rxmesh obj RXMeshStatic rx(rxmesh_args.obj_file_name); @@ -272,10 +267,7 @@ TEST(RXMeshStatic, SparseMatrixSimpleSolve) // matrix. using namespace rxmesh; - - // Select device - cuda_query(rxmesh_args.device_id); - + // generate rxmesh obj std::string obj_path = rxmesh_args.obj_file_name; RXMeshStatic rx(obj_path); @@ -335,9 +327,7 @@ TEST(RXMeshStatic, SparseMatrixSimpleSolve) TEST(RXMeshStatic, SparseMatrixLowerLevelAPISolve) { using namespace rxmesh; - - // Select device - cuda_query(rxmesh_args.device_id); + // generate rxmesh obj std::string obj_path = rxmesh_args.obj_file_name; @@ -403,9 +393,6 @@ TEST(RXMeshStatic, SparseMatrixToEigen) { using namespace rxmesh; - // Select device - cuda_query(rxmesh_args.device_id); - // generate rxmesh obj std::string obj_path = rxmesh_args.obj_file_name; RXMeshStatic rx(obj_path); diff --git a/tests/RXMesh_test/test_validate.h b/tests/RXMesh_test/test_validate.h index 3066f89f..9715807e 100644 --- a/tests/RXMesh_test/test_validate.h +++ b/tests/RXMesh_test/test_validate.h @@ -5,8 +5,6 @@ TEST(RXMeshDynamic, Validate) { using namespace rxmesh; - cuda_query(rxmesh_args.device_id); - RXMeshDynamic rxmesh(STRINGIFY(INPUT_DIR) "dragon.obj"); EXPECT_TRUE(rxmesh.validate()); From 7fb410ddc202a0440656155bc909946d4ea0da2c Mon Sep 17 00:00:00 2001 From: ahmed Date: Fri, 26 Jul 2024 22:49:37 -0400 Subject: [PATCH 56/96] fix svd --- apps/ARAP/arap.cu | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/apps/ARAP/arap.cu b/apps/ARAP/arap.cu index 97cb8783..5a5c2dfc 100644 --- a/apps/ARAP/arap.cu +++ b/apps/ARAP/arap.cu @@ -5,6 +5,7 @@ #include "Eigen/Dense" +#include "rxmesh/util/svd3_cuda.h" using namespace rxmesh; @@ -167,8 +168,13 @@ __global__ static void calculate_rotation_matrix(const rxmesh::Context contex // R =VU + Eigen::Matrix3f U; // left singular vectors + Eigen::Matrix3f V; // right singular vectors + Eigen::Vector3f sing_val; // singular values - Eigen::JacobiSVD svd(S); + svd(S, U, sing_val, V); + + //Eigen::JacobiSVD svd(S); /* From 1a1318333ed163c4205bc5aa88b60eb8c24e258c Mon Sep 17 00:00:00 2001 From: Sachin Kishan Date: Mon, 29 Jul 2024 12:16:07 +0530 Subject: [PATCH 57/96] complete rotation function --- apps/ARAP/arap.cu | 56 +++++++++++++++++++++++++++++++++++------------ 1 file changed, 42 insertions(+), 14 deletions(-) diff --git a/apps/ARAP/arap.cu b/apps/ARAP/arap.cu index 5a5c2dfc..057265e2 100644 --- a/apps/ARAP/arap.cu +++ b/apps/ARAP/arap.cu @@ -130,9 +130,10 @@ __global__ static void calculate_rotation_matrix(const rxmesh::Context contex rxmesh::SparseMatrix weight_mat) { - auto vn_lambda = [&](VertexHandle v_id, VertexIterator& vv) { + auto vn_lambda = [&](VertexHandle v_id, VertexIterator& vv) + { // pi - + /* Eigen::MatrixXf pi = Eigen::MatrixXf::Identity(3, vv.size()); for (int j = 0; j < vv.size(); j++) { @@ -158,10 +159,30 @@ __global__ static void calculate_rotation_matrix(const rxmesh::Context contex pi_dash(1, j) = current_coords(v_id, 1) - current_coords(vv[j], 1); pi_dash(2, j) = current_coords(v_id, 2) - current_coords(vv[j], 2); } - + */ // calculate covariance matrix S = piDiPiTdash - Eigen::Matrix3f S = pi * diagonal_mat * pi_dash.transpose(); + Eigen::Matrix3f S; //= pi * diagonal_mat * pi_dash.transpose(); + + for (int j=0;j pi_vector = { + ref_coords(v_id, 0) - ref_coords(vv[j], 0), + ref_coords(v_id, 1) - ref_coords(vv[j], 1), + ref_coords(v_id, 2) - ref_coords(vv[j], 2)}; + + + Eigen::Vector pi_dash_vector = { + current_coords(v_id, 0) - current_coords(vv[j], 0), + current_coords(v_id, 1) - current_coords(vv[j], 1), + current_coords(v_id, 2) - current_coords(vv[j], 2)}; + + S = S + w * pi_vector * pi_dash_vector.transpose(); + + + } // perform svd on S (eigen) @@ -173,7 +194,21 @@ __global__ static void calculate_rotation_matrix(const rxmesh::Context contex Eigen::Vector3f sing_val; // singular values svd(S, U, sing_val, V); + + const float smallest_singular_value = sing_val.minCoeff(); + + U.col(smallest_singular_value) = U.col(smallest_singular_value) * -1; + + Eigen::MatrixXf R = V * U.transpose(); + + + // Matrix R to vector attribute R + for (int i = 0; i < 3; i++) + for (int j = 0; j < 3; j++) + rotationVector(v_id, i * 3 + j) = R(i, j); + + //Eigen::JacobiSVD svd(S); @@ -181,17 +216,10 @@ __global__ static void calculate_rotation_matrix(const rxmesh::Context contex Eigen::MatrixXf V = S.jacobiSvd().matrixV(); Eigen::MatrixXf U = S.jacobiSvd().matrixU().eval(); - float smallest_singular_value = - S.jacobiSvd().singularValues().minCoeff(); - - U.col(smallest_singular_value)= U.col(smallest_singular_value) * -1; + - Eigen::MatrixXf R = V * U; - // Matrix R to vector attribute R - for (int i=0;i<3;i++) { - for (int j = 0; j < 3; j++) - rotationVector(v_id, i * 3 + j) = R(i, j); - } + + */ }; From d8e0cb9d0da24a3ff926022eda4263ff01ce1b18 Mon Sep 17 00:00:00 2001 From: Sachin Kishan Date: Mon, 29 Jul 2024 13:36:07 +0530 Subject: [PATCH 58/96] set up input for application --- apps/ARAP/arap.cu | 77 +++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 71 insertions(+), 6 deletions(-) diff --git a/apps/ARAP/arap.cu b/apps/ARAP/arap.cu index 057265e2..d0176bd1 100644 --- a/apps/ARAP/arap.cu +++ b/apps/ARAP/arap.cu @@ -130,6 +130,8 @@ __global__ static void calculate_rotation_matrix(const rxmesh::Context contex rxmesh::SparseMatrix weight_mat) { + + auto vn_lambda = [&](VertexHandle v_id, VertexIterator& vv) { // pi @@ -161,7 +163,9 @@ __global__ static void calculate_rotation_matrix(const rxmesh::Context contex } */ // calculate covariance matrix S = piDiPiTdash + + Eigen::Matrix3f S; //= pi * diagonal_mat * pi_dash.transpose(); for (int j=0;j(block, shrd_alloc, vn_lambda); } +template +__global__ static void test_input( + const rxmesh::Context context, + rxmesh::VertexAttribute ref_coords, + rxmesh::VertexAttribute current_coords + ) +{ + //above a specific z up value, shift x by 1 + auto vn_lambda = [&](VertexHandle v_id, VertexIterator& vv) { + + current_coords(v_id, 0) = ref_coords(v_id, 0); + current_coords(v_id, 1) = ref_coords(v_id, 1); + current_coords(v_id, 2) = ref_coords(v_id, 2); + + if (current_coords(v_id,1)>0.1) { + current_coords(v_id, 0) = current_coords(v_id, 0) + 0.25; + } + + }; + + + auto block = cooperative_groups::this_thread_block(); + Query query(context); + ShmemAllocator shrd_alloc; + query.dispatch(block, shrd_alloc, vn_lambda); +} @@ -245,17 +284,41 @@ int main(int argc, char** argv) //RXMeshStatic rx(STRINGIFY(INPUT_DIR) "sphere3.obj"); RXMeshStatic rx(STRINGIFY(INPUT_DIR) "dragon.obj"); + + auto ref_vertex_pos = + *rx.get_input_vertex_coordinates(); // stays same across computation + auto changed_vertex_pos = + rx.add_vertex_attribute("P", 3); // changes per iteration + + + //input + constexpr uint32_t CUDABlockSize = 256; + rxmesh::LaunchBox input_launch_box; + rx.prepare_launch_box( + {rxmesh::Op::VV}, + input_launch_box, + (void*)test_input); + + test_input<<>>( + rx.get_context(), ref_vertex_pos, *changed_vertex_pos); + + changed_vertex_pos->move(DEVICE, HOST); + + //process + + + /* //compute wij auto weights = rx.add_edge_attribute("edgeWeights", 1); - auto ref_vertex_pos = *rx.get_input_vertex_coordinates(); // stays same across computation - auto changed_vertex_pos = rx.add_vertex_attribute("P", 3); // changes per iteration + SparseMatrix weight_matrix(rx); //obtain cotangent weight matrix - constexpr uint32_t CUDABlockSize = 256; rxmesh::LaunchBox launch_box; rx.prepare_launch_box({rxmesh::Op::EVDiamond}, launch_box, @@ -296,7 +359,7 @@ int main(int argc, char** argv) rx.prepare_launch_box({rxmesh::Op::VV}, rotation_launch_box, (void*)calculate_rotation_matrix); - /* + calculate_rotation_matrix <<move(DEVICE, HOST); + */ + rx.get_polyscope_mesh()->updateVertexPositions(*changed_vertex_pos); #if USE_POLYSCOPE From 5f61bcb85504caeb6fa11a099f97754f0eb26177 Mon Sep 17 00:00:00 2001 From: Sachin Kishan Date: Mon, 29 Jul 2024 20:15:33 +0530 Subject: [PATCH 59/96] successfully compiling arap solver --- apps/ARAP/arap.cu | 250 +++++++++++++++++++++++++++++++++++----------- 1 file changed, 192 insertions(+), 58 deletions(-) diff --git a/apps/ARAP/arap.cu b/apps/ARAP/arap.cu index d0176bd1..cbb9a91c 100644 --- a/apps/ARAP/arap.cu +++ b/apps/ARAP/arap.cu @@ -129,44 +129,9 @@ __global__ static void calculate_rotation_matrix(const rxmesh::Context contex rxmesh::VertexAttribute rotationVector, rxmesh::SparseMatrix weight_mat) { - - - auto vn_lambda = [&](VertexHandle v_id, VertexIterator& vv) { - // pi - /* - Eigen::MatrixXf pi = Eigen::MatrixXf::Identity(3, vv.size()); - for (int j = 0; j < vv.size(); j++) - { - pi(0, j) = ref_coords(v_id, 0) - ref_coords(vv[j], 0); - pi(1, j) = ref_coords(v_id, 1) - ref_coords(vv[j], 1); - pi(2, j) = ref_coords(v_id, 2) - ref_coords(vv[j], 2); - } - - // Di - Eigen::VectorXf weight_vector; - weight_vector.resize(vv.size()); - - for (int v = 0; v < vv.size();v++) - { - weight_vector(v) = weight_mat(v_id, vv[v]); - } - Eigen::MatrixXf diagonal_mat = weight_vector.asDiagonal(); - - // pi'T - Eigen::MatrixXf pi_dash = Eigen::MatrixXf::Identity(3, vv.size()); - for (int j = 0; j < vv.size(); j++) { - pi_dash(0, j) = current_coords(v_id, 0) - current_coords(vv[j], 0); - pi_dash(1, j) = current_coords(v_id, 1) - current_coords(vv[j], 1); - pi_dash(2, j) = current_coords(v_id, 2) - current_coords(vv[j], 2); - } - */ - // calculate covariance matrix S = piDiPiTdash - - - - Eigen::Matrix3f S; //= pi * diagonal_mat * pi_dash.transpose(); + Eigen::Matrix3f S; for (int j=0;j new_coords = {current_coords(v_id, 0), + current_coords(v_id, 1), + current_coords(v_id, 2)}; + new_coords = new_coords.transpose() * R; + + current_coords(v_id, 0) = new_coords[0]; + current_coords(v_id, 1) = new_coords[1]; + current_coords(v_id, 2) = new_coords[2]; + */ }; auto block = cooperative_groups::this_thread_block(); @@ -246,7 +212,8 @@ template __global__ static void test_input( const rxmesh::Context context, rxmesh::VertexAttribute ref_coords, - rxmesh::VertexAttribute current_coords + rxmesh::VertexAttribute current_coords, + rxmesh::VertexAttribute constrained ) { @@ -259,6 +226,10 @@ __global__ static void test_input( if (current_coords(v_id,1)>0.1) { current_coords(v_id, 0) = current_coords(v_id, 0) + 0.25; + constrained(v_id, 0) = 1; + } + else { + constrained(v_id, 0) = 0; } }; @@ -271,6 +242,98 @@ __global__ static void test_input( } +/* compute all entries of bMatrix parallely */ +template +__global__ static void calculate_b( + const rxmesh::Context context, + rxmesh::VertexAttribute original_coords, // [num_coord, 3] + rxmesh::VertexAttribute rot_mat, // [num_coord, 9] + rxmesh::SparseMatrix weight_mat, // [num_coord, num_coord] + rxmesh::DenseMatrix bMatrix) // [num_coord, 3] +{ + auto init_lambda = [&](VertexHandle v_id, VertexIterator& vv) { + + // variable to store ith entry of bMatrix + Eigen::Vector3f bi(0.0f, 0.0f, 0.0f); + + // get rotation matrix for ith vertex + Eigen::Matrix3f Ri = Eigen::Matrix3f::Zero(3, 3); + + for (int i = 0; i < 3; i++) + { + for (int j = 0; j < 3; j++) + Ri(i, j) = rot_mat(v_id, i * 3 + j); + } + Eigen::VectorXf w; + w.resize(vv.size()); + + for (int v = 0; v < vv.size(); v++) { + w(v) = weight_mat(v_id, vv[v]); + } + + for (int nei_index = 0; nei_index < vv.size(); nei_index++) { + // get rotation matrix for neightbor j + Eigen::Matrix3f Rj = Eigen::Matrix3f::Zero(3, 3); + for (int i = 0; i < 3; i++) + for (int j = 0; j < 3; j++) + Rj(i, j) = rot_mat(vv[nei_index], i * 3 + j); + + // find rotation addition + Eigen::Matrix3f rot_add = Ri + Rj; + // find coord difference + Eigen::Vector3f vert_diff = + { + original_coords(v_id, 0) - original_coords(vv[nei_index], 0), + original_coords(v_id, 1) - original_coords(vv[nei_index], 1), + original_coords(v_id, 2) - original_coords(vv[nei_index], 2) + }; + + // update bi + bi = bi + 0.5 * w[nei_index] * rot_add * vert_diff; + } + + bMatrix(v_id, 0) = bi[0]; + bMatrix(v_id, 1) = bi[1]; + bMatrix(v_id, 2) = bi[2]; + }; + + auto block = cooperative_groups::this_thread_block(); + Query query(context); + ShmemAllocator shrd_alloc; + query.dispatch(block, shrd_alloc, init_lambda); +} + +/* compute system matrix rows parallely (L from eq9) */ +template +__global__ static void calculate_system_matrix( + const rxmesh::Context context, + rxmesh::SparseMatrix weight_mat, // [num_coord, num_coord] + rxmesh::SparseMatrix L, // [num_coord, num_coord] + rxmesh::VertexAttribute constrained) + +{ + auto init_lambda = [&](VertexHandle v_id, VertexIterator& vv) { + + if (constrained(v_id, 0)==0) { + for (int nei_index = 0; nei_index < vv.size(); nei_index++) + { + L(v_id, v_id) += weight_mat(v_id, vv[nei_index]); + L(v_id, vv[nei_index]) -= weight_mat(v_id, vv[nei_index]); + } + } + else { + for (int nei_index = 0; nei_index < vv.size(); nei_index++) { + L(v_id, vv[nei_index]) = 0; + } + L(v_id, v_id) = 1; + } + }; + + auto block = cooperative_groups::this_thread_block(); + Query query(context); + ShmemAllocator shrd_alloc; + query.dispatch(block, shrd_alloc, init_lambda); +} @@ -292,6 +355,9 @@ int main(int argc, char** argv) //input + auto constraints = *rx.add_vertex_attribute("FixedVertices", 1); + + constexpr uint32_t CUDABlockSize = 256; rxmesh::LaunchBox input_launch_box; rx.prepare_launch_box( @@ -302,20 +368,16 @@ int main(int argc, char** argv) test_input<<>>( - rx.get_context(), ref_vertex_pos, *changed_vertex_pos); + rx.get_context(), ref_vertex_pos, *changed_vertex_pos, constraints); changed_vertex_pos->move(DEVICE, HOST); - + rx.get_polyscope_mesh()->updateVertexPositions(*changed_vertex_pos); //process - /* + //compute wij auto weights = rx.add_edge_attribute("edgeWeights", 1); - - - - SparseMatrix weight_matrix(rx); //obtain cotangent weight matrix @@ -329,7 +391,7 @@ int main(int argc, char** argv) launch_box.num_threads, launch_box.smem_bytes_dyn>>>( rx.get_context(), ref_vertex_pos, weight_matrix); - + /* //visualise edge weights rxmesh::LaunchBox launch_box2; rx.prepare_launch_box( @@ -346,9 +408,10 @@ int main(int argc, char** argv) //pi and p'i - //rx.get_polyscope_mesh()->addEdgeScalarQuantity("edgeWeights", *weights); - // + */ + + // //calculate rotation matrix auto rot_mat = *rx.add_vertex_attribute("RotationMatrix", 9); @@ -371,10 +434,81 @@ int main(int argc, char** argv) changed_vertex_pos->move(DEVICE, HOST); + ///position calculation + /** Calculate bMatrix */ + uint32_t num_vertices = rx.get_num_vertices(); + + + //Eigen::MatrixXf bMatrix = Eigen::MatrixXf::Zero(num_vertices, 3); + + DenseMatrix bMatrix(rx, num_vertices, 3); + + //DenseMatrix B_mat(rx, num_vertices, 3); + + // call function to calculate bMatrix entries parallely + rxmesh::LaunchBox launch_box_bMatrix; + rx.prepare_launch_box({rxmesh::Op::VV}, + launch_box_bMatrix, + (void*)calculate_b); + + calculate_b<<>>( + rx.get_context(), *changed_vertex_pos, rot_mat, weight_matrix, bMatrix); + + // Calculate System Matrix L + //Eigen::MatrixXf systemMatrix = Eigen::MatrixXf::Zero(num_vertices, num_vertices); + SparseMatrix systemMatrix(rx); + + + // VertexAttribute that will store + + // call function to calculate L Matrix entries parallely + rxmesh::LaunchBox launch_box_L; + rx.prepare_launch_box({rxmesh::Op::VV}, + launch_box_L, + (void*)calculate_system_matrix); + + calculate_system_matrix + <<>>( + rx.get_context(), weight_matrix, systemMatrix, constraints); + + // incorporating constraints. Keep the static and user modified vertices the + // same + // TODO: check with Ahmed if the following code is correct + // (do I need to move matrices from GPU to CPU to run following code?) + /* + for (int ids : constraints) { + systemMatrix.row(ids).setZero(); + systemMatrix(ids, ids) = 1; + } */ - rx.get_polyscope_mesh()->updateVertexPositions(*changed_vertex_pos); + + + // solve eq9 by Cholesky factorization + auto coords = rx.get_input_vertex_coordinates(); + std::shared_ptr> X_mat = coords->to_matrix(); + + // Solving using CHOL + systemMatrix.pre_solve(PermuteMethod::NSTDIS); + systemMatrix.solve(bMatrix, *X_mat); + + + // move the results to the host + X_mat->move(rxmesh::DEVICE, rxmesh::HOST); + + // copy the results to attributes + coords->from_matrix(X_mat.get()); + + + // visualize new position + rx.get_polyscope_mesh()->updateVertexPositions(*coords); + + //rx.get_polyscope_mesh()->updateVertexPositions(*changed_vertex_pos); #if USE_POLYSCOPE From 818c48d676df92cc49cc7a403b13295de518cd57 Mon Sep 17 00:00:00 2001 From: Sachin Kishan Date: Mon, 29 Jul 2024 20:30:55 +0530 Subject: [PATCH 60/96] ensure constraints on both moved and fixed points on the mesh --- apps/ARAP/arap.cu | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/apps/ARAP/arap.cu b/apps/ARAP/arap.cu index cbb9a91c..18a94c62 100644 --- a/apps/ARAP/arap.cu +++ b/apps/ARAP/arap.cu @@ -229,7 +229,10 @@ __global__ static void test_input( constrained(v_id, 0) = 1; } else { - constrained(v_id, 0) = 0; + if (current_coords(v_id, 1) < 0.025) + constrained(v_id, 0) = 1; + else + constrained(v_id, 0) = 0; } }; @@ -344,8 +347,8 @@ int main(int argc, char** argv) const uint32_t device_id = 0; cuda_query(device_id); - //RXMeshStatic rx(STRINGIFY(INPUT_DIR) "sphere3.obj"); - RXMeshStatic rx(STRINGIFY(INPUT_DIR) "dragon.obj"); + RXMeshStatic rx(STRINGIFY(INPUT_DIR) "sphere3.obj"); + //RXMeshStatic rx(STRINGIFY(INPUT_DIR) "dragon.obj"); auto ref_vertex_pos = From 9e6ca941b317d8e306096310c3147440c8a6195c Mon Sep 17 00:00:00 2001 From: Sachin Kishan Date: Mon, 29 Jul 2024 21:59:19 +0530 Subject: [PATCH 61/96] testing on a cube --- apps/ARAP/arap.cu | 43 +++++++++++++++++++++---------------------- 1 file changed, 21 insertions(+), 22 deletions(-) diff --git a/apps/ARAP/arap.cu b/apps/ARAP/arap.cu index 18a94c62..c59beb38 100644 --- a/apps/ARAP/arap.cu +++ b/apps/ARAP/arap.cu @@ -149,8 +149,6 @@ __global__ static void calculate_rotation_matrix(const rxmesh::Context contex current_coords(v_id, 2) - current_coords(vv[j], 2)}; S = S + w * pi_vector * pi_dash_vector.transpose(); - - } // perform svd on S (eigen) @@ -166,10 +164,14 @@ __global__ static void calculate_rotation_matrix(const rxmesh::Context contex const float smallest_singular_value = sing_val.minCoeff(); - U.col(smallest_singular_value) = U.col(smallest_singular_value) * -1; Eigen::Matrix3f R = V * U.transpose(); - + + if (R.determinant() < 0) { + U.col(smallest_singular_value) = + U.col(smallest_singular_value) * -1; + R = V * U.transpose(); + } // Matrix R to vector attribute R @@ -184,13 +186,8 @@ __global__ static void calculate_rotation_matrix(const rxmesh::Context contex /* Eigen::MatrixXf V = S.jacobiSvd().matrixV(); Eigen::MatrixXf U = S.jacobiSvd().matrixU().eval(); - - - - - */ - /* + Eigen::Vector3 new_coords = {current_coords(v_id, 0), current_coords(v_id, 1), current_coords(v_id, 2)}; @@ -199,7 +196,7 @@ __global__ static void calculate_rotation_matrix(const rxmesh::Context contex current_coords(v_id, 0) = new_coords[0]; current_coords(v_id, 1) = new_coords[1]; current_coords(v_id, 2) = new_coords[2]; - */ + }; auto block = cooperative_groups::this_thread_block(); @@ -347,7 +344,7 @@ int main(int argc, char** argv) const uint32_t device_id = 0; cuda_query(device_id); - RXMeshStatic rx(STRINGIFY(INPUT_DIR) "sphere3.obj"); + RXMeshStatic rx(STRINGIFY(INPUT_DIR) "cube.obj"); //RXMeshStatic rx(STRINGIFY(INPUT_DIR) "dragon.obj"); @@ -358,7 +355,7 @@ int main(int argc, char** argv) //input - auto constraints = *rx.add_vertex_attribute("FixedVertices", 1); + auto constraints = rx.add_vertex_attribute("FixedVertices", 1); constexpr uint32_t CUDABlockSize = 256; @@ -371,9 +368,10 @@ int main(int argc, char** argv) test_input<<>>( - rx.get_context(), ref_vertex_pos, *changed_vertex_pos, constraints); + rx.get_context(), ref_vertex_pos, *changed_vertex_pos, *constraints); changed_vertex_pos->move(DEVICE, HOST); + constraints->move(DEVICE, HOST); rx.get_polyscope_mesh()->updateVertexPositions(*changed_vertex_pos); //process @@ -436,10 +434,10 @@ int main(int argc, char** argv) weight_matrix); changed_vertex_pos->move(DEVICE, HOST); - + /* ///position calculation - /** Calculate bMatrix */ + /** Calculate bMatrix uint32_t num_vertices = rx.get_num_vertices(); @@ -477,7 +475,7 @@ int main(int argc, char** argv) <<>>( - rx.get_context(), weight_matrix, systemMatrix, constraints); + rx.get_context(), weight_matrix, systemMatrix, *constraints); // incorporating constraints. Keep the static and user modified vertices the // same @@ -497,8 +495,8 @@ int main(int argc, char** argv) std::shared_ptr> X_mat = coords->to_matrix(); // Solving using CHOL - systemMatrix.pre_solve(PermuteMethod::NSTDIS); - systemMatrix.solve(bMatrix, *X_mat); + //systemMatrix.pre_solve(PermuteMethod::NSTDIS); + //systemMatrix.solve(bMatrix, *X_mat); // move the results to the host @@ -509,11 +507,12 @@ int main(int argc, char** argv) // visualize new position - rx.get_polyscope_mesh()->updateVertexPositions(*coords); - - //rx.get_polyscope_mesh()->updateVertexPositions(*changed_vertex_pos); + //rx.get_polyscope_mesh()->updateVertexPositions(*coords); + rx.get_polyscope_mesh()->updateVertexPositions(*changed_vertex_pos); + rx.get_polyscope_mesh()->addVertexScalarQuantity("fixedVertices", *constraints); + #if USE_POLYSCOPE polyscope::show(); #endif From bbf9fe0015a09d271b931f9eb0fcabeb915e9d43 Mon Sep 17 00:00:00 2001 From: Sachin Kishan Date: Mon, 29 Jul 2024 23:10:35 +0530 Subject: [PATCH 62/96] initialise S, fix rotation multiplication order, restore weights to cotangent value --- apps/ARAP/arap.cu | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/apps/ARAP/arap.cu b/apps/ARAP/arap.cu index c59beb38..90a31004 100644 --- a/apps/ARAP/arap.cu +++ b/apps/ARAP/arap.cu @@ -55,6 +55,7 @@ __global__ static void compute_edge_weights(const rxmesh::Context context, VertexHandle s_id = (v == vv.size() - 1) ? vv[0] : vv[v + 1]; e_weight = edge_cotan_weight(vertex_id, r_id, q_id, s_id, coords); A_mat(vertex_id, vv[v]) = e_weight; + A_mat(vertex_id, vv[v]) = 1; } }; @@ -74,7 +75,8 @@ __global__ static void compute_edge_weights_evd(const rxmesh::Context conte auto vn_lambda = [&](EdgeHandle edge_id, VertexIterator& vv) { T e_weight = 0; e_weight = edge_cotan_weight(vv[0], vv[2], vv[1], vv[3], coords); - A_mat(vv[0], vv[2]) = e_weight; + A_mat(vv[0], vv[2]) = e_weight; + //A_mat(vv[0], vv[2]) = 1; }; @@ -131,7 +133,7 @@ __global__ static void calculate_rotation_matrix(const rxmesh::Context contex { auto vn_lambda = [&](VertexHandle v_id, VertexIterator& vv) { - Eigen::Matrix3f S; + Eigen::Matrix3f S=Eigen::Matrix3f::Zero(); for (int j=0;j new_coords = {current_coords(v_id, 0), current_coords(v_id, 1), current_coords(v_id, 2)}; - new_coords = new_coords.transpose() * R; + new_coords = R*new_coords; current_coords(v_id, 0) = new_coords[0]; current_coords(v_id, 1) = new_coords[1]; @@ -221,12 +223,12 @@ __global__ static void test_input( current_coords(v_id, 1) = ref_coords(v_id, 1); current_coords(v_id, 2) = ref_coords(v_id, 2); - if (current_coords(v_id,1)>0.1) { + if (current_coords(v_id,1)>0.25) { current_coords(v_id, 0) = current_coords(v_id, 0) + 0.25; constrained(v_id, 0) = 1; } else { - if (current_coords(v_id, 1) < 0.025) + if (current_coords(v_id, 0) < 0.025) constrained(v_id, 0) = 1; else constrained(v_id, 0) = 0; @@ -371,7 +373,7 @@ int main(int argc, char** argv) rx.get_context(), ref_vertex_pos, *changed_vertex_pos, *constraints); changed_vertex_pos->move(DEVICE, HOST); - constraints->move(DEVICE, HOST); + constraints->move(DEVICE, HOST); rx.get_polyscope_mesh()->updateVertexPositions(*changed_vertex_pos); //process @@ -437,7 +439,7 @@ int main(int argc, char** argv) /* ///position calculation - /** Calculate bMatrix + // Calculate bMatrix uint32_t num_vertices = rx.get_num_vertices(); @@ -515,5 +517,6 @@ int main(int argc, char** argv) #if USE_POLYSCOPE polyscope::show(); + polyscope::shutdown(); #endif } \ No newline at end of file From e389d554b67d238ec06941c5018eff0ede4772f6 Mon Sep 17 00:00:00 2001 From: Sachin Kishan Date: Mon, 29 Jul 2024 23:37:38 +0530 Subject: [PATCH 63/96] swap cholesky with QR solver --- apps/ARAP/arap.cu | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/apps/ARAP/arap.cu b/apps/ARAP/arap.cu index 90a31004..f8871331 100644 --- a/apps/ARAP/arap.cu +++ b/apps/ARAP/arap.cu @@ -190,6 +190,8 @@ __global__ static void calculate_rotation_matrix(const rxmesh::Context contex Eigen::MatrixXf U = S.jacobiSvd().matrixU().eval(); */ + //Apply rotation + /* Eigen::Vector3 new_coords = {current_coords(v_id, 0), current_coords(v_id, 1), current_coords(v_id, 2)}; @@ -198,7 +200,7 @@ __global__ static void calculate_rotation_matrix(const rxmesh::Context contex current_coords(v_id, 0) = new_coords[0]; current_coords(v_id, 1) = new_coords[1]; current_coords(v_id, 2) = new_coords[2]; - + */ }; auto block = cooperative_groups::this_thread_block(); @@ -224,12 +226,15 @@ __global__ static void test_input( current_coords(v_id, 2) = ref_coords(v_id, 2); if (current_coords(v_id,1)>0.25) { - current_coords(v_id, 0) = current_coords(v_id, 0) + 0.25; - constrained(v_id, 0) = 1; + //current_coords(v_id, 0) = current_coords(v_id, 0) + 0.25; + constrained(v_id, 0) = 0; } else { - if (current_coords(v_id, 0) < 0.025) + if (current_coords(v_id, 0) < 0.025) { constrained(v_id, 0) = 1; + current_coords(v_id, 0) = current_coords(v_id, 0) + 0.25; + + } else constrained(v_id, 0) = 0; } @@ -435,8 +440,8 @@ int main(int argc, char** argv) rot_mat, weight_matrix); - changed_vertex_pos->move(DEVICE, HOST); - /* + //changed_vertex_pos->move(DEVICE, HOST); + ///position calculation // Calculate bMatrix @@ -500,6 +505,7 @@ int main(int argc, char** argv) //systemMatrix.pre_solve(PermuteMethod::NSTDIS); //systemMatrix.solve(bMatrix, *X_mat); + systemMatrix.solve(bMatrix, *X_mat, Solver::QR, PermuteMethod::NSTDIS); // move the results to the host X_mat->move(rxmesh::DEVICE, rxmesh::HOST); @@ -509,9 +515,9 @@ int main(int argc, char** argv) // visualize new position - //rx.get_polyscope_mesh()->updateVertexPositions(*coords); + rx.get_polyscope_mesh()->updateVertexPositions(*coords); - rx.get_polyscope_mesh()->updateVertexPositions(*changed_vertex_pos); + //rx.get_polyscope_mesh()->updateVertexPositions(*changed_vertex_pos); rx.get_polyscope_mesh()->addVertexScalarQuantity("fixedVertices", *constraints); From 23c30c291301ad0f41b41aa24b325bf3aa792d60 Mon Sep 17 00:00:00 2001 From: Sachin Kishan Date: Tue, 30 Jul 2024 00:13:40 +0530 Subject: [PATCH 64/96] incorporate constraints into b matrix --- apps/ARAP/arap.cu | 34 +++++++++++++++++++++++----------- 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/apps/ARAP/arap.cu b/apps/ARAP/arap.cu index f8871331..1b4e7191 100644 --- a/apps/ARAP/arap.cu +++ b/apps/ARAP/arap.cu @@ -226,13 +226,13 @@ __global__ static void test_input( current_coords(v_id, 2) = ref_coords(v_id, 2); if (current_coords(v_id,1)>0.25) { - //current_coords(v_id, 0) = current_coords(v_id, 0) + 0.25; - constrained(v_id, 0) = 0; + current_coords(v_id, 0) = current_coords(v_id, 0) + 0.25; + constrained(v_id, 0) = 1; } else { if (current_coords(v_id, 0) < 0.025) { constrained(v_id, 0) = 1; - current_coords(v_id, 0) = current_coords(v_id, 0) + 0.25; + //current_coords(v_id, 0) = current_coords(v_id, 0) + 0.25; } else @@ -256,7 +256,8 @@ __global__ static void calculate_b( rxmesh::VertexAttribute original_coords, // [num_coord, 3] rxmesh::VertexAttribute rot_mat, // [num_coord, 9] rxmesh::SparseMatrix weight_mat, // [num_coord, num_coord] - rxmesh::DenseMatrix bMatrix) // [num_coord, 3] + rxmesh::DenseMatrix bMatrix, // [num_coord, 3] + rxmesh::VertexAttribute constrained) { auto init_lambda = [&](VertexHandle v_id, VertexIterator& vv) { @@ -271,8 +272,9 @@ __global__ static void calculate_b( for (int j = 0; j < 3; j++) Ri(i, j) = rot_mat(v_id, i * 3 + j); } - Eigen::VectorXf w; - w.resize(vv.size()); + Eigen::VectorXf w = Eigen::VectorXf::Zero(vv.size()); + //w.resize(vv.size()); + for (int v = 0; v < vv.size(); v++) { w(v) = weight_mat(v_id, vv[v]); @@ -298,10 +300,20 @@ __global__ static void calculate_b( // update bi bi = bi + 0.5 * w[nei_index] * rot_add * vert_diff; } - - bMatrix(v_id, 0) = bi[0]; + + + if (constrained(v_id, 0) == 0) { + bMatrix(v_id, 0) = bi[0]; bMatrix(v_id, 1) = bi[1]; bMatrix(v_id, 2) = bi[2]; + } + else + { + bMatrix(v_id, 0) = original_coords(v_id,0); + bMatrix(v_id, 1) = original_coords(v_id, 1); + bMatrix(v_id, 2) = original_coords(v_id, 2); + } + }; auto block = cooperative_groups::this_thread_block(); @@ -351,7 +363,7 @@ int main(int argc, char** argv) const uint32_t device_id = 0; cuda_query(device_id); - RXMeshStatic rx(STRINGIFY(INPUT_DIR) "cube.obj"); + RXMeshStatic rx(STRINGIFY(INPUT_DIR) "sphere3.obj"); //RXMeshStatic rx(STRINGIFY(INPUT_DIR) "dragon.obj"); @@ -463,7 +475,7 @@ int main(int argc, char** argv) calculate_b<<>>( - rx.get_context(), *changed_vertex_pos, rot_mat, weight_matrix, bMatrix); + rx.get_context(), *changed_vertex_pos, rot_mat, weight_matrix, bMatrix, *constraints); // Calculate System Matrix L //Eigen::MatrixXf systemMatrix = Eigen::MatrixXf::Zero(num_vertices, num_vertices); @@ -499,7 +511,7 @@ int main(int argc, char** argv) // solve eq9 by Cholesky factorization auto coords = rx.get_input_vertex_coordinates(); - std::shared_ptr> X_mat = coords->to_matrix(); + std::shared_ptr> X_mat = changed_vertex_pos->to_matrix(); // Solving using CHOL //systemMatrix.pre_solve(PermuteMethod::NSTDIS); From 643b7187cfa89044e4beba016b8fe5d414adf116 Mon Sep 17 00:00:00 2001 From: Sachin Kishan Date: Tue, 30 Jul 2024 02:15:26 +0530 Subject: [PATCH 65/96] reorganised code --- apps/ARAP/arap.cu | 212 +++++++++++++++++++--------------------------- 1 file changed, 89 insertions(+), 123 deletions(-) diff --git a/apps/ARAP/arap.cu b/apps/ARAP/arap.cu index 1b4e7191..33f8af49 100644 --- a/apps/ARAP/arap.cu +++ b/apps/ARAP/arap.cu @@ -225,13 +225,13 @@ __global__ static void test_input( current_coords(v_id, 1) = ref_coords(v_id, 1); current_coords(v_id, 2) = ref_coords(v_id, 2); - if (current_coords(v_id,1)>0.25) { + if (current_coords(v_id,1)>0.75) { current_coords(v_id, 0) = current_coords(v_id, 0) + 0.25; constrained(v_id, 0) = 1; } else { - if (current_coords(v_id, 0) < 0.025) { - constrained(v_id, 0) = 1; + if (current_coords(v_id, 1) < 0.025) { + constrained(v_id, 0) = 2; //current_coords(v_id, 0) = current_coords(v_id, 0) + 0.25; } @@ -272,15 +272,9 @@ __global__ static void calculate_b( for (int j = 0; j < 3; j++) Ri(i, j) = rot_mat(v_id, i * 3 + j); } - Eigen::VectorXf w = Eigen::VectorXf::Zero(vv.size()); - //w.resize(vv.size()); - - - for (int v = 0; v < vv.size(); v++) { - w(v) = weight_mat(v_id, vv[v]); - } - for (int nei_index = 0; nei_index < vv.size(); nei_index++) { + for (int nei_index = 0; nei_index < vv.size(); nei_index++) + { // get rotation matrix for neightbor j Eigen::Matrix3f Rj = Eigen::Matrix3f::Zero(3, 3); for (int i = 0; i < 3; i++) @@ -298,14 +292,13 @@ __global__ static void calculate_b( }; // update bi - bi = bi + 0.5 * w[nei_index] * rot_add * vert_diff; + bi = bi + 0.5 * weight_mat(v_id, vv[nei_index]) * rot_add * vert_diff; } - - + if (constrained(v_id, 0) == 0) { - bMatrix(v_id, 0) = bi[0]; - bMatrix(v_id, 1) = bi[1]; - bMatrix(v_id, 2) = bi[2]; + bMatrix(v_id, 0) = bi[0]; + bMatrix(v_id, 1) = bi[1]; + bMatrix(v_id, 2) = bi[2]; } else { @@ -313,7 +306,6 @@ __global__ static void calculate_b( bMatrix(v_id, 1) = original_coords(v_id, 1); bMatrix(v_id, 2) = original_coords(v_id, 2); } - }; auto block = cooperative_groups::this_thread_block(); @@ -333,6 +325,10 @@ __global__ static void calculate_system_matrix( { auto init_lambda = [&](VertexHandle v_id, VertexIterator& vv) { + L(v_id, v_id) = 0; + for (int nei_index = 0; nei_index < vv.size(); nei_index++) + L(v_id, vv[nei_index]) = 0; + if (constrained(v_id, 0)==0) { for (int nei_index = 0; nei_index < vv.size(); nei_index++) { @@ -373,10 +369,9 @@ int main(int argc, char** argv) rx.add_vertex_attribute("P", 3); // changes per iteration + //input auto constraints = rx.add_vertex_attribute("FixedVertices", 1); - - constexpr uint32_t CUDABlockSize = 256; rxmesh::LaunchBox input_launch_box; rx.prepare_launch_box( @@ -396,7 +391,7 @@ int main(int argc, char** argv) - //compute wij + //compute weights auto weights = rx.add_edge_attribute("edgeWeights", 1); SparseMatrix weight_matrix(rx); @@ -411,60 +406,33 @@ int main(int argc, char** argv) launch_box.num_threads, launch_box.smem_bytes_dyn>>>( rx.get_context(), ref_vertex_pos, weight_matrix); - /* - //visualise edge weights - rxmesh::LaunchBox launch_box2; - rx.prepare_launch_box( - {rxmesh::Op::EV}, - launch_box2, - (void*)edge_weight_values); - - edge_weight_values - <<>>(rx.get_context(), *weights, weight_matrix ); - - weights->move(DEVICE, HOST); + // Calculate System + SparseMatrix systemMatrix(rx); + // call function to calculate L Matrix entries parallely + rxmesh::LaunchBox launch_box_L; + rx.prepare_launch_box( + {rxmesh::Op::VV}, + launch_box_L, + (void*)calculate_system_matrix); - //pi and p'i - //rx.get_polyscope_mesh()->addEdgeScalarQuantity("edgeWeights", *weights); - */ + calculate_system_matrix + <<>>( + rx.get_context(), weight_matrix, systemMatrix, *constraints); - // + systemMatrix.move(DEVICE, HOST); - //calculate rotation matrix - auto rot_mat = *rx.add_vertex_attribute("RotationMatrix", 9); + auto rot_mat = *rx.add_vertex_attribute("RotationMatrix", 9); rxmesh::LaunchBox rotation_launch_box; + rx.prepare_launch_box( + {rxmesh::Op::VV}, + rotation_launch_box, + (void*)calculate_rotation_matrix); - - rx.prepare_launch_box({rxmesh::Op::VV}, - rotation_launch_box, - (void*)calculate_rotation_matrix); - - calculate_rotation_matrix - <<>>(rx.get_context(), - ref_vertex_pos, - *changed_vertex_pos, - rot_mat, - weight_matrix); - - //changed_vertex_pos->move(DEVICE, HOST); - - ///position calculation - - // Calculate bMatrix - uint32_t num_vertices = rx.get_num_vertices(); - - - //Eigen::MatrixXf bMatrix = Eigen::MatrixXf::Zero(num_vertices, 3); - - DenseMatrix bMatrix(rx, num_vertices, 3); - - //DenseMatrix B_mat(rx, num_vertices, 3); + DenseMatrix bMatrix(rx, rx.get_num_vertices(), 3); // call function to calculate bMatrix entries parallely rxmesh::LaunchBox launch_box_bMatrix; @@ -472,69 +440,67 @@ int main(int argc, char** argv) launch_box_bMatrix, (void*)calculate_b); - calculate_b<<>>( - rx.get_context(), *changed_vertex_pos, rot_mat, weight_matrix, bMatrix, *constraints); - // Calculate System Matrix L - //Eigen::MatrixXf systemMatrix = Eigen::MatrixXf::Zero(num_vertices, num_vertices); - SparseMatrix systemMatrix(rx); - - // VertexAttribute that will store + DenseMatrix X_mat(rx, rx.get_num_vertices(), 3); - // call function to calculate L Matrix entries parallely - rxmesh::LaunchBox launch_box_L; - rx.prepare_launch_box({rxmesh::Op::VV}, - launch_box_L, - (void*)calculate_system_matrix); - - calculate_system_matrix - <<>>( - rx.get_context(), weight_matrix, systemMatrix, *constraints); - // incorporating constraints. Keep the static and user modified vertices the - // same - // TODO: check with Ahmed if the following code is correct - // (do I need to move matrices from GPU to CPU to run following code?) - /* - for (int ids : constraints) { - systemMatrix.row(ids).setZero(); - systemMatrix(ids, ids) = 1; - } - */ + //how many times will arap algorithm run? + int iterations = 1; + for (int i=0;i + <<>>(rx.get_context(), + ref_vertex_pos, + *changed_vertex_pos, + rot_mat, + weight_matrix); + + //changed_vertex_pos->move(DEVICE, HOST); + calculate_b + <<>>(rx.get_context(), + ref_vertex_pos, + rot_mat, + weight_matrix, + bMatrix, + *constraints); + + bMatrix.move(DEVICE, HOST); + systemMatrix.solve(bMatrix, X_mat, Solver::LU, PermuteMethod::NSTDIS); + + changed_vertex_pos->from_matrix(&X_mat); + + + rx.get_polyscope_mesh()->updateVertexPositions(changed_vertex_pos); + } + // visualize new position + + rx.get_polyscope_mesh()->addVertexScalarQuantity("fixedVertices", + *constraints); + +#if USE_POLYSCOPE + polyscope::show(); + polyscope::shutdown(); +#endif +} - - // solve eq9 by Cholesky factorization - auto coords = rx.get_input_vertex_coordinates(); - std::shared_ptr> X_mat = changed_vertex_pos->to_matrix(); - - // Solving using CHOL - //systemMatrix.pre_solve(PermuteMethod::NSTDIS); - //systemMatrix.solve(bMatrix, *X_mat); +//just in case - systemMatrix.solve(bMatrix, *X_mat, Solver::QR, PermuteMethod::NSTDIS); - - // move the results to the host - X_mat->move(rxmesh::DEVICE, rxmesh::HOST); - - // copy the results to attributes - coords->from_matrix(X_mat.get()); +// Solving using CHOL +// systemMatrix.pre_solve(PermuteMethod::NSTDIS); +// systemMatrix.solve(bMatrix, *X_mat); +// systemMatrix.solve(bMatrix, *X_mat, Solver::QR, PermuteMethod::NSTDIS); - // visualize new position - rx.get_polyscope_mesh()->updateVertexPositions(*coords); - //rx.get_polyscope_mesh()->updateVertexPositions(*changed_vertex_pos); +// move the results to the host +// X_mat->move(rxmesh::DEVICE, rxmesh::HOST); - rx.get_polyscope_mesh()->addVertexScalarQuantity("fixedVertices", *constraints); - -#if USE_POLYSCOPE - polyscope::show(); - polyscope::shutdown(); -#endif -} \ No newline at end of file +// copy the results to attributes +// changed_vertex_pos->from_matrix(X_mat.get()); \ No newline at end of file From 9c00008e215bf50988d093a196eb391cfddb2fcf Mon Sep 17 00:00:00 2001 From: Sachin Kishan Date: Tue, 30 Jul 2024 11:47:27 +0530 Subject: [PATCH 66/96] better input --- apps/ARAP/arap.cu | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/apps/ARAP/arap.cu b/apps/ARAP/arap.cu index 33f8af49..8347e741 100644 --- a/apps/ARAP/arap.cu +++ b/apps/ARAP/arap.cu @@ -76,7 +76,7 @@ __global__ static void compute_edge_weights_evd(const rxmesh::Context conte T e_weight = 0; e_weight = edge_cotan_weight(vv[0], vv[2], vv[1], vv[3], coords); A_mat(vv[0], vv[2]) = e_weight; - //A_mat(vv[0], vv[2]) = 1; + A_mat(vv[0], vv[2]) = 1; }; @@ -225,12 +225,12 @@ __global__ static void test_input( current_coords(v_id, 1) = ref_coords(v_id, 1); current_coords(v_id, 2) = ref_coords(v_id, 2); - if (current_coords(v_id,1)>0.75) { - current_coords(v_id, 0) = current_coords(v_id, 0) + 0.25; + if (current_coords(v_id,1)>1.25) { + current_coords(v_id, 1) = current_coords(v_id, 1) + 0.25; constrained(v_id, 0) = 1; } else { - if (current_coords(v_id, 1) < 0.025) { + if (current_coords(v_id, 1) < -0.34) { constrained(v_id, 0) = 2; //current_coords(v_id, 0) = current_coords(v_id, 0) + 0.25; @@ -442,7 +442,8 @@ int main(int argc, char** argv) - DenseMatrix X_mat(rx, rx.get_num_vertices(), 3); + //DenseMatrix X_mat(rx, rx.get_num_vertices(), 3); + std::shared_ptr> X_mat = changed_vertex_pos->to_matrix(); //how many times will arap algorithm run? @@ -471,15 +472,15 @@ int main(int argc, char** argv) *constraints); bMatrix.move(DEVICE, HOST); - systemMatrix.solve(bMatrix, X_mat, Solver::LU, PermuteMethod::NSTDIS); + systemMatrix.solve(bMatrix, *X_mat, Solver::LU, PermuteMethod::NSTDIS); - changed_vertex_pos->from_matrix(&X_mat); + changed_vertex_pos->from_matrix(X_mat.get()); - rx.get_polyscope_mesh()->updateVertexPositions(changed_vertex_pos); } // visualize new position - + //rx.get_polyscope_mesh()->updateVertexPositions(*changed_vertex_pos); + rx.get_polyscope_mesh()->addVertexScalarQuantity("fixedVertices", *constraints); From ca38374f4d9ac2ca49ba8c92e73f9c6e94648cfb Mon Sep 17 00:00:00 2001 From: Sachin Kishan Date: Tue, 30 Jul 2024 21:28:13 +0530 Subject: [PATCH 67/96] ensure symmetric weight matrix is symmetric. Fix b matrix entries for fixed points --- apps/ARAP/arap.cu | 53 +++++++++++++++++++++++++---------------------- 1 file changed, 28 insertions(+), 25 deletions(-) diff --git a/apps/ARAP/arap.cu b/apps/ARAP/arap.cu index 8347e741..f21149e1 100644 --- a/apps/ARAP/arap.cu +++ b/apps/ARAP/arap.cu @@ -76,7 +76,10 @@ __global__ static void compute_edge_weights_evd(const rxmesh::Context conte T e_weight = 0; e_weight = edge_cotan_weight(vv[0], vv[2], vv[1], vv[3], coords); A_mat(vv[0], vv[2]) = e_weight; - A_mat(vv[0], vv[2]) = 1; + A_mat(vv[2], vv[0]) = e_weight; + + //A_mat(vv[0], vv[2]) = 1; + //A_mat(vv[2], vv[0]) = 1; }; @@ -180,23 +183,14 @@ __global__ static void calculate_rotation_matrix(const rxmesh::Context contex for (int i = 0; i < 3; i++) for (int j = 0; j < 3; j++) rotationVector(v_id, i * 3 + j) = R(i, j); - - - //Eigen::JacobiSVD svd(S); - - - /* - Eigen::MatrixXf V = S.jacobiSvd().matrixV(); - Eigen::MatrixXf U = S.jacobiSvd().matrixU().eval(); - */ //Apply rotation - /* + Eigen::Vector3 new_coords = {current_coords(v_id, 0), current_coords(v_id, 1), current_coords(v_id, 2)}; - new_coords = R*new_coords; - + /* new_coords = R * new_coords; + current_coords(v_id, 0) = new_coords[0]; current_coords(v_id, 1) = new_coords[1]; current_coords(v_id, 2) = new_coords[2]; @@ -226,7 +220,7 @@ __global__ static void test_input( current_coords(v_id, 2) = ref_coords(v_id, 2); if (current_coords(v_id,1)>1.25) { - current_coords(v_id, 1) = current_coords(v_id, 1) + 0.25; + current_coords(v_id, 1) = current_coords(v_id, 1) + 0.65; constrained(v_id, 0) = 1; } else { @@ -254,6 +248,7 @@ template __global__ static void calculate_b( const rxmesh::Context context, rxmesh::VertexAttribute original_coords, // [num_coord, 3] + rxmesh::VertexAttribute changed_coords, // [num_coord, 3] rxmesh::VertexAttribute rot_mat, // [num_coord, 9] rxmesh::SparseMatrix weight_mat, // [num_coord, num_coord] rxmesh::DenseMatrix bMatrix, // [num_coord, 3] @@ -302,9 +297,9 @@ __global__ static void calculate_b( } else { - bMatrix(v_id, 0) = original_coords(v_id,0); - bMatrix(v_id, 1) = original_coords(v_id, 1); - bMatrix(v_id, 2) = original_coords(v_id, 2); + bMatrix(v_id, 0) = changed_coords(v_id,0); + bMatrix(v_id, 1) = changed_coords(v_id, 1); + bMatrix(v_id, 2) = changed_coords(v_id, 2); } }; @@ -329,15 +324,18 @@ __global__ static void calculate_system_matrix( for (int nei_index = 0; nei_index < vv.size(); nei_index++) L(v_id, vv[nei_index]) = 0; - if (constrained(v_id, 0)==0) { + if (constrained(v_id, 0)==0) + { for (int nei_index = 0; nei_index < vv.size(); nei_index++) { L(v_id, v_id) += weight_mat(v_id, vv[nei_index]); L(v_id, vv[nei_index]) -= weight_mat(v_id, vv[nei_index]); } } - else { - for (int nei_index = 0; nei_index < vv.size(); nei_index++) { + else + { + for (int nei_index = 0; nei_index < vv.size(); nei_index++) + { L(v_id, vv[nei_index]) = 0; } L(v_id, v_id) = 1; @@ -359,7 +357,7 @@ int main(int argc, char** argv) const uint32_t device_id = 0; cuda_query(device_id); - RXMeshStatic rx(STRINGIFY(INPUT_DIR) "sphere3.obj"); + RXMeshStatic rx(STRINGIFY(INPUT_DIR) "sphere1.obj"); //RXMeshStatic rx(STRINGIFY(INPUT_DIR) "dragon.obj"); @@ -447,7 +445,7 @@ int main(int argc, char** argv) //how many times will arap algorithm run? - int iterations = 1; + int iterations = 0; for (int i=0;imove(DEVICE, HOST); + changed_vertex_pos->move(DEVICE, HOST); calculate_b <<>>(rx.get_context(), ref_vertex_pos, + *changed_vertex_pos, rot_mat, weight_matrix, bMatrix, *constraints); + bMatrix.move(DEVICE, HOST); + + X_mat = changed_vertex_pos->to_matrix(); systemMatrix.solve(bMatrix, *X_mat, Solver::LU, PermuteMethod::NSTDIS); - + //systemMatrix.solve(bMatrix, *X_mat, Solver::QR, PermuteMethod::NSTDIS); + X_mat->move(DEVICE, HOST); changed_vertex_pos->from_matrix(X_mat.get()); } // visualize new position - //rx.get_polyscope_mesh()->updateVertexPositions(*changed_vertex_pos); + rx.get_polyscope_mesh()->updateVertexPositions(*changed_vertex_pos); rx.get_polyscope_mesh()->addVertexScalarQuantity("fixedVertices", *constraints); From 8ea2c619ceac1d4a15ac09b072b841a89d511b0d Mon Sep 17 00:00:00 2001 From: Sachin Kishan Date: Tue, 30 Jul 2024 22:48:41 +0530 Subject: [PATCH 68/96] working with QR solver --- apps/ARAP/arap.cu | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/apps/ARAP/arap.cu b/apps/ARAP/arap.cu index f21149e1..ae4a394e 100644 --- a/apps/ARAP/arap.cu +++ b/apps/ARAP/arap.cu @@ -33,7 +33,7 @@ edge_cotan_weight(const rxmesh::VertexHandle& p_id, if (s_id.is_valid()) weight += dot((p - s), (r - s)) / length(cross(p - s, r - s)); weight /= 2; - return weight; + return std::max(0.f, weight); } @@ -219,8 +219,10 @@ __global__ static void test_input( current_coords(v_id, 1) = ref_coords(v_id, 1); current_coords(v_id, 2) = ref_coords(v_id, 2); - if (current_coords(v_id,1)>1.25) { - current_coords(v_id, 1) = current_coords(v_id, 1) + 0.65; + if (current_coords(v_id,1)>1.35) { + current_coords(v_id, 1) = current_coords(v_id, 1) + 0.55; + current_coords(v_id, 0) = current_coords(v_id, 0) + 0.75; + current_coords(v_id, 2) = current_coords(v_id, 2) + 0.35; constrained(v_id, 0) = 1; } else { @@ -297,9 +299,9 @@ __global__ static void calculate_b( } else { - bMatrix(v_id, 0) = changed_coords(v_id,0); - bMatrix(v_id, 1) = changed_coords(v_id, 1); - bMatrix(v_id, 2) = changed_coords(v_id, 2); + bMatrix(v_id, 0) = original_coords(v_id,0); + bMatrix(v_id, 1) = original_coords(v_id, 1); + bMatrix(v_id, 2) = original_coords(v_id, 2); } }; @@ -357,7 +359,7 @@ int main(int argc, char** argv) const uint32_t device_id = 0; cuda_query(device_id); - RXMeshStatic rx(STRINGIFY(INPUT_DIR) "sphere1.obj"); + RXMeshStatic rx(STRINGIFY(INPUT_DIR) "sphere3.obj"); //RXMeshStatic rx(STRINGIFY(INPUT_DIR) "dragon.obj"); @@ -445,7 +447,7 @@ int main(int argc, char** argv) //how many times will arap algorithm run? - int iterations = 0; + int iterations = 10; for (int i=0;imove(DEVICE, HOST); + //changed_vertex_pos->move(DEVICE, HOST); calculate_b <<to_matrix(); - systemMatrix.solve(bMatrix, *X_mat, Solver::LU, PermuteMethod::NSTDIS); - //systemMatrix.solve(bMatrix, *X_mat, Solver::QR, PermuteMethod::NSTDIS); + //systemMatrix.solve(bMatrix, *X_mat, Solver::LU, PermuteMethod::NSTDIS); + systemMatrix.solve(bMatrix, *X_mat, Solver::QR, PermuteMethod::NSTDIS); X_mat->move(DEVICE, HOST); changed_vertex_pos->from_matrix(X_mat.get()); - - } // visualize new position rx.get_polyscope_mesh()->updateVertexPositions(*changed_vertex_pos); From 660cbb4e4adbe12094ba7e64b8ef7d80e09f6a9d Mon Sep 17 00:00:00 2001 From: Sachin Kishan Date: Tue, 30 Jul 2024 22:52:53 +0530 Subject: [PATCH 69/96] fix b matrix calculation for fixed vertices --- apps/ARAP/arap.cu | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/apps/ARAP/arap.cu b/apps/ARAP/arap.cu index ae4a394e..0a27c57b 100644 --- a/apps/ARAP/arap.cu +++ b/apps/ARAP/arap.cu @@ -220,9 +220,9 @@ __global__ static void test_input( current_coords(v_id, 2) = ref_coords(v_id, 2); if (current_coords(v_id,1)>1.35) { - current_coords(v_id, 1) = current_coords(v_id, 1) + 0.55; - current_coords(v_id, 0) = current_coords(v_id, 0) + 0.75; - current_coords(v_id, 2) = current_coords(v_id, 2) + 0.35; + current_coords(v_id, 1) = current_coords(v_id, 1) + 1.55; + current_coords(v_id, 0) = current_coords(v_id, 0) + 1.75; + current_coords(v_id, 2) = current_coords(v_id, 2) + 2.35; constrained(v_id, 0) = 1; } else { @@ -299,9 +299,9 @@ __global__ static void calculate_b( } else { - bMatrix(v_id, 0) = original_coords(v_id,0); - bMatrix(v_id, 1) = original_coords(v_id, 1); - bMatrix(v_id, 2) = original_coords(v_id, 2); + bMatrix(v_id, 0) = changed_coords(v_id,0); + bMatrix(v_id, 1) = changed_coords(v_id, 1); + bMatrix(v_id, 2) = changed_coords(v_id, 2); } }; From 5b70700c0681cd90e11de9580ed6b2f24e85f0b2 Mon Sep 17 00:00:00 2001 From: Sachin Kishan Date: Wed, 31 Jul 2024 01:45:18 +0530 Subject: [PATCH 70/96] working implementation with spot --- apps/ARAP/arap.cu | 141 +++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 127 insertions(+), 14 deletions(-) diff --git a/apps/ARAP/arap.cu b/apps/ARAP/arap.cu index 0a27c57b..4db06828 100644 --- a/apps/ARAP/arap.cu +++ b/apps/ARAP/arap.cu @@ -7,6 +7,8 @@ #include "rxmesh/util/svd3_cuda.h" +#include "polyscope/polyscope.h" + using namespace rxmesh; template @@ -204,11 +206,11 @@ __global__ static void calculate_rotation_matrix(const rxmesh::Context contex } template -__global__ static void test_input( - const rxmesh::Context context, - rxmesh::VertexAttribute ref_coords, - rxmesh::VertexAttribute current_coords, - rxmesh::VertexAttribute constrained +__global__ static void test_input(const rxmesh::Context context, + rxmesh::VertexAttribute ref_coords, + rxmesh::VertexAttribute current_coords, + rxmesh::VertexAttribute constrained, + Eigen::Vector3f displacement ) { @@ -218,11 +220,11 @@ __global__ static void test_input( current_coords(v_id, 0) = ref_coords(v_id, 0); current_coords(v_id, 1) = ref_coords(v_id, 1); current_coords(v_id, 2) = ref_coords(v_id, 2); - + /* if (current_coords(v_id,1)>1.35) { - current_coords(v_id, 1) = current_coords(v_id, 1) + 1.55; - current_coords(v_id, 0) = current_coords(v_id, 0) + 1.75; - current_coords(v_id, 2) = current_coords(v_id, 2) + 2.35; + current_coords(v_id, 1) = current_coords(v_id, 1) + displacement.x(); + current_coords(v_id, 0) =current_coords(v_id, 0) + displacement.z(); + current_coords(v_id, 2) =current_coords(v_id, 2) + displacement.y(); constrained(v_id, 0) = 1; } else { @@ -234,6 +236,36 @@ __global__ static void test_input( else constrained(v_id, 0) = 0; } + */ + + + if (current_coords(v_id, 0) < -0.15) + { + //current_coords(v_id, 0) =current_coords(v_id, 0) + displacement.x(); + //current_coords(v_id, 1) =current_coords(v_id, 1) + displacement.y(); + //current_coords(v_id, 2) =current_coords(v_id, 2) + displacement.z(); + constrained(v_id, 0) = 2; + } + + else if (current_coords(v_id, 1) < -0.65) { + current_coords(v_id, 0) = + current_coords(v_id, 0) + displacement.x(); + current_coords(v_id, 2) = + current_coords(v_id, 2) + displacement.z(); + constrained(v_id, 0) = 1; + } + else { + if (current_coords(v_id, 0) < -0.2) { + constrained(v_id, 0) = 0; + // current_coords(v_id, 0) = current_coords(v_id, 0) + 0.25; + + } else + constrained(v_id, 0) = 0; + } + + if (current_coords(v_id,1)>-0.15) { + constrained(v_id, 0) = 2; + } }; @@ -359,8 +391,11 @@ int main(int argc, char** argv) const uint32_t device_id = 0; cuda_query(device_id); - RXMeshStatic rx(STRINGIFY(INPUT_DIR) "sphere3.obj"); - //RXMeshStatic rx(STRINGIFY(INPUT_DIR) "dragon.obj"); + RXMeshStatic rx(STRINGIFY(INPUT_DIR) "spot_low_resolution.obj"); + //RXMeshStatic rx(STRINGIFY(INPUT_DIR) "bumpycube.obj"); + + + Eigen::Vector3f displacement(1, 1, 1); auto ref_vertex_pos = @@ -370,6 +405,7 @@ int main(int argc, char** argv) + //input auto constraints = rx.add_vertex_attribute("FixedVertices", 1); constexpr uint32_t CUDABlockSize = 256; @@ -382,7 +418,7 @@ int main(int argc, char** argv) test_input<<>>( - rx.get_context(), ref_vertex_pos, *changed_vertex_pos, *constraints); + rx.get_context(), ref_vertex_pos, *changed_vertex_pos, *constraints, displacement); changed_vertex_pos->move(DEVICE, HOST); constraints->move(DEVICE, HOST); @@ -447,7 +483,9 @@ int main(int argc, char** argv) //how many times will arap algorithm run? - int iterations = 10; + int iterations = 1; + + /* for (int i=0;imove(DEVICE, HOST); changed_vertex_pos->from_matrix(X_mat.get()); + + } // visualize new position rx.get_polyscope_mesh()->updateVertexPositions(*changed_vertex_pos); + */ rx.get_polyscope_mesh()->addVertexScalarQuantity("fixedVertices", *constraints); + + float t = 0; + int flag = 0; + Eigen::Vector3f start = Eigen::Vector3f(-0.5, 0.0, 0.0); + Eigen::Vector3f end = Eigen::Vector3f(0.5, 0.0, 0.0); + + auto polyscope_callback = [&]() mutable { + //input step + + if (flag == 1) { + t -= 0.03; + if (t < 0) + flag = 0; + } else if (flag == 0) { + t += 0.03; + if (t > 1.0) + flag = 1; + } + displacement = (1 - t) * start + + (t) * end; + + //displacement = Eigen::Vector3f(0, 0, 0); + + test_input + <<>>(rx.get_context(), + ref_vertex_pos, + *changed_vertex_pos, + *constraints, + displacement); + + + + //process step + for (int i = 0; i < iterations; i++) { + // rotation part + // calculate rotation matrix + calculate_rotation_matrix + <<>>(rx.get_context(), + ref_vertex_pos, + *changed_vertex_pos, + rot_mat, + weight_matrix); + calculate_b + <<>>(rx.get_context(), + ref_vertex_pos, + *changed_vertex_pos, + rot_mat, + weight_matrix, + bMatrix, + *constraints); + + X_mat = changed_vertex_pos->to_matrix(); + systemMatrix.solve(bMatrix, *X_mat, Solver::QR, PermuteMethod::NSTDIS); + + } + X_mat->move(DEVICE, HOST); + changed_vertex_pos->from_matrix(X_mat.get()); + + //update step + #if USE_POLYSCOPE + //x->move(DEVICE, HOST); + rx.get_polyscope_mesh()->updateVertexPositions(*changed_vertex_pos); + #endif + + }; + #if USE_POLYSCOPE + polyscope::state::userCallback = polyscope_callback; polyscope::show(); - polyscope::shutdown(); #endif } From af23a2052b333ec6fd80c86c4c3e31fb63579b26 Mon Sep 17 00:00:00 2001 From: ahmed Date: Wed, 31 Jul 2024 22:14:50 -0400 Subject: [PATCH 71/96] refactor remeshing app --- apps/Remesh/CMakeLists.txt | 6 +- apps/Remesh/collapse.cuh | 597 +++++++++++++++++ apps/Remesh/flip.cuh | 488 ++++++++++++++ apps/Remesh/link_condition.cuh | 124 ++++ apps/Remesh/remesh_kernels.cuh | 1099 -------------------------------- apps/Remesh/remesh_rxmesh.cuh | 585 ++--------------- apps/Remesh/smoothing.cuh | 120 ++++ apps/Remesh/split.cuh | 348 ++++++++++ apps/Remesh/util.cuh | 68 ++ include/rxmesh/util/timer.h | 39 ++ 10 files changed, 1854 insertions(+), 1620 deletions(-) create mode 100644 apps/Remesh/collapse.cuh create mode 100644 apps/Remesh/flip.cuh delete mode 100644 apps/Remesh/remesh_kernels.cuh create mode 100644 apps/Remesh/smoothing.cuh create mode 100644 apps/Remesh/split.cuh create mode 100644 apps/Remesh/util.cuh diff --git a/apps/Remesh/CMakeLists.txt b/apps/Remesh/CMakeLists.txt index 360a795c..fcb70b2f 100644 --- a/apps/Remesh/CMakeLists.txt +++ b/apps/Remesh/CMakeLists.txt @@ -3,7 +3,11 @@ add_executable(Remesh) set(SOURCE_LIST remesh.cu remesh_rxmesh.cuh - remesh_kernels.cuh + util.cuh + split.cuh + collapse.cuh + flip.cuh + smoothing.cuh link_condition.cuh ) diff --git a/apps/Remesh/collapse.cuh b/apps/Remesh/collapse.cuh new file mode 100644 index 00000000..66f3899e --- /dev/null +++ b/apps/Remesh/collapse.cuh @@ -0,0 +1,597 @@ +#pragma once +#include + +#include "rxmesh/cavity_manager.cuh" +#include "rxmesh/rxmesh_dynamic.h" + +#include "util.cuh" + + +template +__global__ static void __launch_bounds__(blockThreads) + edge_collapse(rxmesh::Context context, + const rxmesh::VertexAttribute coords, + rxmesh::EdgeAttribute edge_status, + const T low_edge_len_sq, + const T high_edge_len_sq, + int* d_buffer) +{ + + using namespace rxmesh; + auto block = cooperative_groups::this_thread_block(); + ShmemAllocator shrd_alloc; + CavityManager cavity( + block, context, shrd_alloc, true); + + const uint32_t pid = cavity.patch_id(); + + if (pid == INVALID32) { + return; + } + + // a bitmask that indicates which edge we want to flip + // we also use it to mark updated edges (for edge_status) + Bitmask edge_mask(cavity.patch_info().edges_capacity[0], shrd_alloc); + edge_mask.reset(block); + + uint32_t shmem_before = shrd_alloc.get_allocated_size_bytes(); + + // we use this bitmask to mark the other end of to-be-collapse edge during + // checking for the link condition + Bitmask v0_mask(cavity.patch_info().num_vertices[0], shrd_alloc); + Bitmask v1_mask(cavity.patch_info().num_vertices[0], shrd_alloc); + + // Precompute EV + Query query(context, pid); + query.prologue(block, shrd_alloc); + block.sync(); + + // 1. mark edge that we want to collapse based on the edge length + for_each_edge(cavity.patch_info(), [&](EdgeHandle eh) { + assert(eh.local_id() < cavity.patch_info().num_edges[0]); + + // iter[0] and iter[2] are the edge two vertices + // iter[1] and iter[3] are the two opposite vertices + // 0 + // / | \ + // 3 | 1 + // \ | / + // 2 + + + if (edge_status(eh) == UNSEEN /*&& edge_link(eh) == 2*/) { + const VertexIterator iter = + query.template get_iterator(eh.local_id()); + + assert(iter.size() == 4); + + const VertexHandle v0 = iter[0]; + const VertexHandle v1 = iter[2]; + + const VertexHandle v2 = iter[1]; + const VertexHandle v3 = iter[3]; + + // don't collapse boundary edges + if (v2.is_valid() && v3.is_valid()) { + + // degenerate cases + if (v0 == v1 || v0 == v2 || v0 == v3 || v1 == v2 || v1 == v3 || + v2 == v3) { + return; + } + const vec3 p0(coords(v0, 0), coords(v0, 1), coords(v0, 2)); + const vec3 p1(coords(v1, 0), coords(v1, 1), coords(v1, 2)); + const T edge_len_sq = glm::distance2(p0, p1); + + if (edge_len_sq < low_edge_len_sq) { + edge_mask.set(eh.local_id(), true); + } + } + } + }); + block.sync(); + + + // 2. check link condition + link_condition( + block, cavity.patch_info(), query, edge_mask, v0_mask, v1_mask, 0, 2); + block.sync(); + + + // 3. create cavity for the surviving edges + for_each_edge(cavity.patch_info(), [&](EdgeHandle eh) { + if (edge_mask(eh.local_id())) { + cavity.create(eh); + } else { + edge_status(eh) = OKAY; + } + }); + block.sync(); + + shrd_alloc.dealloc(shrd_alloc.get_allocated_size_bytes() - shmem_before); + + // create the cavity + if (cavity.prologue(block, shrd_alloc, coords, edge_status)) { + + edge_mask.reset(block); + block.sync(); + + cavity.for_each_cavity(block, [&](uint16_t c, uint16_t size) { + //::atomicAdd(&s_num_collapses, 1); + const EdgeHandle src = cavity.template get_creator(c); + + VertexHandle v0, v1; + + cavity.get_vertices(src, v0, v1); + + const vec3 p0(coords(v0, 0), coords(v0, 1), coords(v0, 2)); + const vec3 p1(coords(v1, 0), coords(v1, 1), coords(v1, 2)); + + const vec3 new_p((p0[0] + p1[0]) * T(0.5), + (p0[1] + p1[1]) * T(0.5), + (p0[2] + p1[2]) * T(0.5)); + + // check if we will create a long edge + bool long_edge = false; + + for (uint16_t i = 0; i < size; ++i) { + + + const VertexHandle vvv = cavity.get_cavity_vertex(c, i); + + const vec3 vp( + coords(vvv, 0), coords(vvv, 1), coords(vvv, 2)); + + const T edge_len_sq = glm::distance2(vp, new_p); + + if (edge_len_sq >= low_edge_len_sq) { + long_edge = true; + break; + } + } + + if (long_edge) { + // roll back + cavity.recover(src); + + // mark this edge as OKAY because 1) if all cavities in this + // patch are successful, then we want to indicate that this + // edge is okay and should not be attempted again + // 2) if we have to rollback all changes in this patch, we still + // don't want to attempt this edge since we know that it creates + // short edges + edge_status(src) = OKAY; + } else { + + const VertexHandle new_v = cavity.add_vertex(); + + if (new_v.is_valid()) { + + coords(new_v, 0) = new_p[0]; + coords(new_v, 1) = new_p[1]; + coords(new_v, 2) = new_p[2]; + + DEdgeHandle e0 = + cavity.add_edge(new_v, cavity.get_cavity_vertex(c, 0)); + + if (e0.is_valid()) { + edge_mask.set(e0.local_id(), true); + + const DEdgeHandle e_init = e0; + + for (uint16_t i = 0; i < size; ++i) { + const DEdgeHandle e = cavity.get_cavity_edge(c, i); + + // edge_mask.set(e.local_id(), true); + + const VertexHandle v_end = + cavity.get_cavity_vertex(c, (i + 1) % size); + + const DEdgeHandle e1 = + (i == size - 1) ? + e_init.get_flip_dedge() : + cavity.add_edge( + cavity.get_cavity_vertex(c, i + 1), + new_v); + + if (!e1.is_valid()) { + break; + } + + if (i != size - 1) { + edge_mask.set(e1.local_id(), true); + } + + const FaceHandle new_f = cavity.add_face(e0, e, e1); + + if (!new_f.is_valid()) { + break; + } + e0 = e1.get_flip_dedge(); + } + } + } + } + }); + } + block.sync(); + + cavity.epilogue(block); + block.sync(); + + if (cavity.is_successful()) { + // if (threadIdx.x == 0) { + // ::atomicAdd(d_buffer, s_num_collapses); + //} + for_each_edge(cavity.patch_info(), [&](EdgeHandle eh) { + if (edge_mask(eh.local_id()) || cavity.is_recovered(eh)) { + edge_status(eh) = ADDED; + } + }); + } +} + +template +__global__ static void __launch_bounds__(blockThreads) + edge_collapse_1(rxmesh::Context context, + const rxmesh::VertexAttribute coords, + rxmesh::EdgeAttribute edge_status, + const T low_edge_len_sq, + const T high_edge_len_sq, + int* d_buffer) +{ + + using namespace rxmesh; + auto block = cooperative_groups::this_thread_block(); + ShmemAllocator shrd_alloc; + CavityManager cavity( + block, context, shrd_alloc, true); + + const uint32_t pid = cavity.patch_id(); + + + //__shared__ int s_num_collapses; + // if (threadIdx.x == 0) { + // s_num_collapses = 0; + //} + + if (pid == INVALID32) { + return; + } + + + Bitmask is_updated(cavity.patch_info().edges_capacity[0], shrd_alloc); + + uint32_t shmem_before = shrd_alloc.get_allocated_size_bytes(); + + + // for each edge we want to flip, we its id in one of its opposite vertices + // along with the other opposite vertex + uint16_t* v_info = + shrd_alloc.alloc(2 * cavity.patch_info().num_vertices[0]); + fill_n( + v_info, 2 * cavity.patch_info().num_vertices[0], uint16_t(INVALID16)); + + // a bitmask that indicates which edge we want to flip + Bitmask e_collapse(cavity.patch_info().num_edges[0], shrd_alloc); + e_collapse.reset(block); + block.sync(); + + auto should_collapse = [&](const EdgeHandle& eh, + const VertexIterator& iter) { + if (edge_status(eh) == UNSEEN) { + + assert(iter.size() == 4); + + const VertexHandle v0 = iter[0]; + const VertexHandle v1 = iter[2]; + + const VertexHandle v2 = iter[1]; + const VertexHandle v3 = iter[3]; + + // don't collapse boundary edges + if (v0.is_valid() && v1.is_valid() && v2.is_valid() && + v3.is_valid()) { + + // degenerate cases + if (v0 == v1 || v0 == v2 || v0 == v3 || v1 == v2 || v1 == v3 || + v2 == v3) { + edge_status(eh) = OKAY; + return; + } + + const vec3 p0(coords(v0, 0), coords(v0, 1), coords(v0, 2)); + const vec3 p1(coords(v1, 0), coords(v1, 1), coords(v1, 2)); + const T edge_len_sq = glm::distance2(p0, p1); + + if (edge_len_sq < low_edge_len_sq) { + + const uint16_t c0(iter.local(0)), c1(iter.local(2)); + + uint16_t ret = ::atomicCAS(v_info + 2 * c0, INVALID16, c1); + if (ret == INVALID16) { + v_info[2 * c0 + 1] = eh.local_id(); + e_collapse.set(eh.local_id(), true); + } else { + ret = ::atomicCAS(v_info + 2 * c1, INVALID16, c0); + if (ret == INVALID16) { + v_info[2 * c1 + 1] = eh.local_id(); + e_collapse.set(eh.local_id(), true); + } + } + } + } + } + }; + + // 1. mark edge that we want to collapse based on the edge lenght + Query query(context, cavity.patch_id()); + query.dispatch(block, shrd_alloc, should_collapse); + block.sync(); + + + auto check_edges = [&](const VertexHandle& vh, const VertexIterator& iter) { + uint16_t opposite_v = v_info[2 * vh.local_id()]; + if (opposite_v != INVALID16) { + int num_shared_v = 0; + + const VertexIterator opp_iter = + query.template get_iterator(opposite_v); + + for (uint16_t v = 0; v < iter.size(); ++v) { + + for (uint16_t ov = 0; ov < opp_iter.size(); ++ov) { + if (iter.local(v) == opp_iter.local(ov)) { + num_shared_v++; + break; + } + } + } + + if (num_shared_v > 2) { + e_collapse.reset(v_info[2 * vh.local_id() + 1], true); + } + } + }; + // 2. make sure that the two vertices opposite to a flipped edge are not + // connected + query.dispatch( + block, + shrd_alloc, + check_edges, + [](VertexHandle) { return true; }, + false, + true); + block.sync(); + + + // 3. create cavity for the surviving edges + for_each_edge(cavity.patch_info(), [&](EdgeHandle eh) { + if (e_collapse(eh.local_id())) { + cavity.create(eh); + } else { + edge_status(eh) = OKAY; + } + }); + block.sync(); + + shrd_alloc.dealloc(shrd_alloc.get_allocated_size_bytes() - shmem_before); + + // create the cavity + if (cavity.prologue(block, shrd_alloc, coords, edge_status)) { + + is_updated.reset(block); + block.sync(); + + cavity.for_each_cavity(block, [&](uint16_t c, uint16_t size) { + //::atomicAdd(&s_num_collapses, 1); + const EdgeHandle src = cavity.template get_creator(c); + + VertexHandle v0, v1; + + cavity.get_vertices(src, v0, v1); + + const vec3 p0(coords(v0, 0), coords(v0, 1), coords(v0, 2)); + const vec3 p1(coords(v1, 0), coords(v1, 1), coords(v1, 2)); + + const vec3 new_p((p0[0] + p1[0]) * T(0.5), + (p0[1] + p1[1]) * T(0.5), + (p0[2] + p1[2]) * T(0.5)); + + // check if we will create a long edge + bool long_edge = false; + + for (uint16_t i = 0; i < size; ++i) { + const VertexHandle vvv = cavity.get_cavity_vertex(c, i); + + const vec3 vp( + coords(vvv, 0), coords(vvv, 1), coords(vvv, 2)); + + const T edge_len_sq = glm::distance2(vp, new_p); + if (edge_len_sq > high_edge_len_sq) { + long_edge = true; + break; + } + } + + if (long_edge) { + // roll back + cavity.recover(src); + + // mark this edge as OKAY because 1) if all cavities in this + // patch are successful, then we want to indicate that this + // edge is okay and should not be attempted again + // 2) if we have to rollback all changes in this patch, we still + // don't want to attempt this edge since we know that it creates + // short edges + edge_status(src) = OKAY; + } else { + + const VertexHandle new_v = cavity.add_vertex(); + + if (new_v.is_valid()) { + + coords(new_v, 0) = new_p[0]; + coords(new_v, 1) = new_p[1]; + coords(new_v, 2) = new_p[2]; + + DEdgeHandle e0 = + cavity.add_edge(new_v, cavity.get_cavity_vertex(c, 0)); + + if (e0.is_valid()) { + is_updated.set(e0.local_id(), true); + + const DEdgeHandle e_init = e0; + + for (uint16_t i = 0; i < size; ++i) { + const DEdgeHandle e = cavity.get_cavity_edge(c, i); + + // is_updated.set(e.local_id(), true); + + const VertexHandle v_end = + cavity.get_cavity_vertex(c, (i + 1) % size); + + const DEdgeHandle e1 = + (i == size - 1) ? + e_init.get_flip_dedge() : + cavity.add_edge( + cavity.get_cavity_vertex(c, i + 1), + new_v); + + if (!e1.is_valid()) { + break; + } + + if (i != size - 1) { + is_updated.set(e1.local_id(), true); + } + + const FaceHandle new_f = cavity.add_face(e0, e, e1); + + if (!new_f.is_valid()) { + break; + } + e0 = e1.get_flip_dedge(); + } + } + } + } + }); + } + block.sync(); + + cavity.epilogue(block); + block.sync(); + + if (cavity.is_successful()) { + // if (threadIdx.x == 0) { + // ::atomicAdd(d_buffer, s_num_collapses); + // } + for_each_edge(cavity.patch_info(), [&](EdgeHandle eh) { + if (is_updated(eh.local_id()) || cavity.is_recovered(eh)) { + edge_status(eh) = ADDED; + } + }); + } +} + + +template +inline void collapse_short_edges(rxmesh::RXMeshDynamic& rx, + rxmesh::VertexAttribute* coords, + rxmesh::EdgeAttribute* edge_status, + rxmesh::EdgeAttribute* edge_link, + const T low_edge_len_sq, + const T high_edge_len_sq, + rxmesh::Timers timers, + int* d_buffer) +{ + using namespace rxmesh; + + constexpr uint32_t blockThreads = 512; + + edge_status->reset(UNSEEN, DEVICE); + + int prv_remaining_work = rx.get_num_edges(); + + + int num_outer_iter = 0; + int num_inner_iter = 0; + // int num_collapses = 0; + + timers.start("CollapseTotal"); + while (true) { + num_outer_iter++; + rx.reset_scheduler(); + while (!rx.is_queue_empty()) { + // RXMESH_INFO(" Queue size = {}", + // rx.get_context().m_patch_scheduler.size()); + num_inner_iter++; + + // link_condition(rx, edge_link); + + LaunchBox launch_box; + rx.update_launch_box( + {Op::EVDiamond, Op::VV}, + launch_box, + (void*)edge_collapse_1, + true, + false, + false, + false, + [&](uint32_t v, uint32_t e, uint32_t f) { + return detail::mask_num_bytes(e) + + 2 * v * sizeof(uint16_t) + + 2 * ShmemAllocator::default_alignment; + // 2 * detail::mask_num_bytes(v) + + // 3 * ShmemAllocator::default_alignment; + }); + + // CUDA_ERROR(cudaMemset(d_buffer, 0, sizeof(int))); + + timers.start("Collapse"); + edge_collapse_1 + <<>>(rx.get_context(), + *coords, + *edge_status, + //*edge_link, + low_edge_len_sq, + high_edge_len_sq, + d_buffer); + timers.stop("Collapse"); + + timers.start("CollapseCleanup"); + rx.cleanup(); + timers.stop("CollapseCleanup"); + + timers.start("CollapseSlice"); + rx.slice_patches(*coords, *edge_status /*, *edge_link */); + timers.stop("CollapseSlice"); + + timers.start("CollapseCleanup"); + rx.cleanup(); + timers.stop("CollapseCleanup"); + } + + int remaining_work = is_done(rx, edge_status, d_buffer); + + if (remaining_work == 0 || prv_remaining_work == remaining_work) { + break; + } + prv_remaining_work = remaining_work; + } + timers.stop("CollapseTotal"); + + // RXMESH_INFO("total num_collapses {}", num_collapses); + RXMESH_INFO("num_outer_iter {}", num_outer_iter); + RXMESH_INFO("num_inner_iter {}", num_inner_iter); + RXMESH_INFO("Collapse total time {} (ms)", + timers.elapsed_millis("CollapseTotal")); + RXMESH_INFO("Collapse time {} (ms)", timers.elapsed_millis("Collapse")); + RXMESH_INFO("Collapse slice time {} (ms)", + timers.elapsed_millis("CollapseSlice")); + RXMESH_INFO("Collapse cleanup time {} (ms)", + timers.elapsed_millis("CollapseCleanup")); +} \ No newline at end of file diff --git a/apps/Remesh/flip.cuh b/apps/Remesh/flip.cuh new file mode 100644 index 00000000..15c6ebbb --- /dev/null +++ b/apps/Remesh/flip.cuh @@ -0,0 +1,488 @@ +#pragma once +#include + +#include "rxmesh/cavity_manager.cuh" +#include "rxmesh/query.cuh" +#include "rxmesh/rxmesh_dynamic.h" + +#include "util.cuh" + + +template +__global__ static void __launch_bounds__(blockThreads) + compute_valence(rxmesh::Context context, + const rxmesh::VertexAttribute v_valence) +{ + using namespace rxmesh; + + auto block = cooperative_groups::this_thread_block(); + + ShmemAllocator shrd_alloc; + + Query query(context); + query.compute_vertex_valence(block, shrd_alloc); + block.sync(); + + for_each_vertex(query.get_patch_info(), [&](VertexHandle vh) { + v_valence(vh) = query.vertex_valence(vh); + }); +} + + +template +__global__ static void __launch_bounds__(blockThreads) + edge_flip(rxmesh::Context context, + const rxmesh::VertexAttribute coords, + const rxmesh::VertexAttribute v_valence, + rxmesh::EdgeAttribute edge_status, + int* d_buffer) +{ + using namespace rxmesh; + + auto block = cooperative_groups::this_thread_block(); + + ShmemAllocator shrd_alloc; + + CavityManager cavity( + block, context, shrd_alloc, false, false); + + + if (cavity.patch_id() == INVALID32) { + return; + } + + // a bitmask that indicates which edge we want to flip + // we also used it to mark the new edges + Bitmask edge_mask(cavity.patch_info().edges_capacity[0], shrd_alloc); + edge_mask.reset(block); + + uint32_t shmem_before = shrd_alloc.get_allocated_size_bytes(); + + // we use this bitmask to mark the other end of to-be-collapse edge during + // checking for the link condition + Bitmask v0_mask(cavity.patch_info().num_vertices[0], shrd_alloc); + Bitmask v1_mask(cavity.patch_info().num_vertices[0], shrd_alloc); + + // precompute EVDiamond + Query query(context, cavity.patch_id()); + query.prologue(block, shrd_alloc); + block.sync(); + + + // 1. mark edge that we want to flip + for_each_edge(cavity.patch_info(), [&](EdgeHandle eh) { + assert(eh.local_id() < cavity.patch_info().num_edges[0]); + + const VertexIterator iter = + query.template get_iterator(eh.local_id()); + + // only if the edge is not seen before and its not a boundary edge + if (edge_status(eh) == UNSEEN && iter[1].is_valid() && + iter[3].is_valid()) { + + if (iter[0] == iter[1] || iter[0] == iter[2] || + iter[0] == iter[3] || iter[1] == iter[2] || + iter[1] == iter[3] || iter[2] == iter[3]) { + return; + } + + // iter[0] and iter[2] are the edge two vertices + // iter[1] and iter[3] are the two opposite vertices + + + // since we only deal with closed meshes without boundaries + constexpr int target_valence = 6; + + + const int valence_a = v_valence(iter[0]); + const int valence_b = v_valence(iter[2]); + const int valence_c = v_valence(iter[1]); + const int valence_d = v_valence(iter[3]); + + // clang-format off + const int deviation_pre = + (valence_a - target_valence) * (valence_a - target_valence) + + (valence_b - target_valence) * (valence_b - target_valence) + + (valence_c - target_valence) * (valence_c - target_valence) + + (valence_d - target_valence) * (valence_d - target_valence); + // clang-format on + + // clang-format off + const int deviation_post = + (valence_a - 1 - target_valence)*(valence_a - 1 - target_valence) + + (valence_b - 1 - target_valence)*(valence_b - 1 - target_valence) + + (valence_c + 1 - target_valence)*(valence_c + 1 - target_valence) + + (valence_d + 1 - target_valence)*(valence_d + 1 - target_valence); + // clang-format on + + if (deviation_pre > deviation_post) { + edge_mask.set(eh.local_id(), true); + } + } + }); + block.sync(); + + // 2. check link condition + link_condition( + block, cavity.patch_info(), query, edge_mask, v0_mask, v1_mask, 0, 2); + block.sync(); + + + // 3. create cavity for the surviving edges + for_each_edge(cavity.patch_info(), [&](EdgeHandle eh) { + if (edge_mask(eh.local_id())) { + cavity.create(eh); + } else { + edge_status(eh) = OKAY; + } + }); + block.sync(); + + shrd_alloc.dealloc(shrd_alloc.get_allocated_size_bytes() - shmem_before); + + if (cavity.prologue(block, shrd_alloc, coords, edge_status)) { + + edge_mask.reset(block); + block.sync(); + + cavity.for_each_cavity(block, [&](uint16_t c, uint16_t size) { + assert(size == 4); + + DEdgeHandle new_edge = cavity.add_edge( + cavity.get_cavity_vertex(c, 1), cavity.get_cavity_vertex(c, 3)); + + + if (new_edge.is_valid()) { + edge_mask.set(new_edge.local_id(), true); + cavity.add_face(cavity.get_cavity_edge(c, 0), + new_edge, + cavity.get_cavity_edge(c, 3)); + + + cavity.add_face(cavity.get_cavity_edge(c, 1), + cavity.get_cavity_edge(c, 2), + new_edge.get_flip_dedge()); + } + }); + } + block.sync(); + + cavity.epilogue(block); + block.sync(); + + if (cavity.is_successful()) { + for_each_edge(cavity.patch_info(), [&](EdgeHandle eh) { + if (edge_mask(eh.local_id())) { + edge_status(eh) = ADDED; + } + }); + } +} + +template +__global__ static void __launch_bounds__(blockThreads) + edge_flip_1(rxmesh::Context context, + const rxmesh::VertexAttribute coords, + const rxmesh::VertexAttribute v_valence, + rxmesh::EdgeAttribute edge_status, + int* d_buffer) +{ + using namespace rxmesh; + + auto block = cooperative_groups::this_thread_block(); + + ShmemAllocator shrd_alloc; + + CavityManager cavity( + block, context, shrd_alloc, false, false); + + + //__shared__ int s_num_flips; + // if (threadIdx.x == 0) { + // s_num_flips = 0; + //} + if (cavity.patch_id() == INVALID32) { + return; + } + + Bitmask is_updated(cavity.patch_info().edges_capacity[0], shrd_alloc); + + uint32_t shmem_before = shrd_alloc.get_allocated_size_bytes(); + + // for each edge we want to flip, we its id in one of its opposite vertices + // along with the other opposite vertex + uint16_t* v_info = + shrd_alloc.alloc(2 * cavity.patch_info().num_vertices[0]); + fill_n( + v_info, 2 * cavity.patch_info().num_vertices[0], uint16_t(INVALID16)); + + // a bitmask that indicates which edge we want to flip + Bitmask e_flip(cavity.patch_info().num_edges[0], shrd_alloc); + e_flip.reset(block); + + + auto should_flip = [&](const EdgeHandle& eh, const VertexIterator& iter) { + // iter[0] and iter[2] are the edge two vertices + // iter[1] and iter[3] are the two opposite vertices + + + // we use the local index since we are only interested in the + // valence which computed on the local index space + if (edge_status(eh) == UNSEEN) { + if (iter[1].is_valid() && iter[3].is_valid() && + iter[0].is_valid() && iter[2].is_valid()) { + + if (iter[0] == iter[1] || iter[0] == iter[2] || + iter[0] == iter[3] || iter[1] == iter[2] || + iter[1] == iter[3] || iter[2] == iter[3]) { + edge_status(eh) = OKAY; + return; + } + + // since we only deal with closed meshes without boundaries + constexpr int target_valence = 6; + + + const int valence_a = v_valence(iter[0]); + const int valence_b = v_valence(iter[2]); + const int valence_c = v_valence(iter[1]); + const int valence_d = v_valence(iter[3]); + + // clang-format off + const int deviation_pre = + (valence_a - target_valence) * (valence_a - target_valence) + + (valence_b - target_valence) * (valence_b - target_valence) + + (valence_c - target_valence) * (valence_c - target_valence) + + (valence_d - target_valence) * (valence_d - target_valence); + // clang-format on + + // clang-format off + const int deviation_post = + (valence_a - 1 - target_valence)*(valence_a - 1 - target_valence) + + (valence_b - 1 - target_valence)*(valence_b - 1 - target_valence) + + (valence_c + 1 - target_valence)*(valence_c + 1 - target_valence) + + (valence_d + 1 - target_valence)*(valence_d + 1 - target_valence); + // clang-format on + + if (deviation_pre > deviation_post) { + uint16_t v_c(iter.local(1)), v_d(iter.local(3)); + + bool added = false; + + if (iter[1].patch_id() == cavity.patch_id()) { + uint16_t ret = + ::atomicCAS(v_info + 2 * v_c, INVALID16, v_d); + if (ret == INVALID16) { + added = true; + v_info[2 * v_c + 1] = eh.local_id(); + e_flip.set(eh.local_id(), true); + } + } + + if (iter[3].patch_id() == cavity.patch_id() && !added) { + uint16_t ret = + ::atomicCAS(v_info + 2 * v_d, INVALID16, v_c); + if (ret == INVALID16) { + v_info[2 * v_d + 1] = eh.local_id(); + e_flip.set(eh.local_id(), true); + } + } + } + } else { + edge_status(eh) = OKAY; + } + } + }; + + // 1. mark edge that we want to flip + Query query(context, cavity.patch_id()); + query.dispatch(block, shrd_alloc, should_flip); + block.sync(); + + + // 2. make sure that the two vertices opposite to a flipped edge are not + // connected + auto check_edges = [&](const VertexHandle& vh, const VertexIterator& iter) { + uint16_t opposite_v = v_info[2 * vh.local_id()]; + if (opposite_v != INVALID16) { + bool is_valid = true; + for (uint16_t v = 0; v < iter.size(); ++v) { + if (iter.local(v) == opposite_v) { + is_valid = false; + break; + } + } + if (!is_valid) { + e_flip.reset(v_info[2 * vh.local_id() + 1], true); + } + } + }; + query.dispatch(block, shrd_alloc, check_edges); + block.sync(); + + // 3. create cavity for the surviving edges + for_each_edge(cavity.patch_info(), [&](EdgeHandle eh) { + if (e_flip(eh.local_id())) { + cavity.create(eh); + } else { + edge_status(eh) = OKAY; + } + }); + block.sync(); + + shrd_alloc.dealloc(shrd_alloc.get_allocated_size_bytes() - shmem_before); + + if (cavity.prologue(block, shrd_alloc, coords, edge_status)) { + + is_updated.reset(block); + block.sync(); + + cavity.for_each_cavity(block, [&](uint16_t c, uint16_t size) { + //::atomicAdd(&s_num_flips, 1); + assert(size == 4); + + DEdgeHandle new_edge = cavity.add_edge( + cavity.get_cavity_vertex(c, 1), cavity.get_cavity_vertex(c, 3)); + + + if (new_edge.is_valid()) { + is_updated.set(new_edge.local_id(), true); + cavity.add_face(cavity.get_cavity_edge(c, 0), + new_edge, + cavity.get_cavity_edge(c, 3)); + + + cavity.add_face(cavity.get_cavity_edge(c, 1), + cavity.get_cavity_edge(c, 2), + new_edge.get_flip_dedge()); + } + }); + } + + cavity.epilogue(block); + block.sync(); + + if (cavity.is_successful()) { + // if (threadIdx.x == 0) { + // ::atomicAdd(d_buffer, s_num_flips); + // } + for_each_edge(cavity.patch_info(), [&](EdgeHandle eh) { + if (is_updated(eh.local_id())) { + edge_status(eh) = ADDED; + } + }); + } +} + + +template +inline void equalize_valences(rxmesh::RXMeshDynamic& rx, + rxmesh::VertexAttribute* coords, + rxmesh::VertexAttribute* v_valence, + rxmesh::EdgeAttribute* edge_status, + rxmesh::EdgeAttribute* edge_link, + rxmesh::Timers timers, + int* d_buffer) +{ + + using namespace rxmesh; + + constexpr uint32_t blockThreads = 512; + + edge_status->reset(UNSEEN, DEVICE); + + int prv_remaining_work = rx.get_num_edges(); + + // int num_flips = 0; + int num_outer_iter = 0; + int num_inner_iter = 0; + + timers.start("FlipTotal"); + while (true) { + num_outer_iter++; + rx.reset_scheduler(); + while (!rx.is_queue_empty()) { + // RXMESH_INFO(" Queue size = {}", + // rx.get_context().m_patch_scheduler.size()); + num_inner_iter++; + LaunchBox launch_box; + + rx.update_launch_box({}, + launch_box, + (void*)compute_valence, + false, + false, + true); + + timers.start("Flip"); + compute_valence + <<>>(rx.get_context(), *v_valence); + + // link_condition(rx, edge_link); + + rx.update_launch_box( + {Op::EVDiamond, Op::VV}, + launch_box, + //(void*)edge_flip, + (void*)edge_flip_1, + true, + false, + false, + false, + [&](uint32_t v, uint32_t e, uint32_t f) { + return detail::mask_num_bytes(e) + + 2 * v * sizeof(uint16_t) + + 2 * ShmemAllocator::default_alignment; + // 2 * detail::mask_num_bytes(v) + + // 3 * ShmemAllocator::default_alignment; + }); + + // CUDA_ERROR(cudaMemset(d_buffer, 0, sizeof(int))); + + edge_flip_1 + <<>>(rx.get_context(), + *coords, + *v_valence, + *edge_status, + //*edge_link, + d_buffer); + timers.stop("Flip"); + + timers.start("FlipCleanup"); + rx.cleanup(); + timers.stop("FlipCleanup"); + + timers.start("FlipSlice"); + rx.slice_patches(*coords, *edge_status /*,edge_link*/); + timers.stop("FlipSlice"); + + timers.start("FlipCleanup"); + rx.cleanup(); + timers.stop("FlipCleanup"); + } + + int remaining_work = is_done(rx, edge_status, d_buffer); + + if (remaining_work == 0 || prv_remaining_work == remaining_work) { + break; + } + prv_remaining_work = remaining_work; + // RXMESH_INFO("num_flips {}, time {}", + // num_flips, + // app_time + slice_time + cleanup_time); + } + timers.stop("FlipTotal"); + + // RXMESH_INFO("total num_flips {}", num_flips); + RXMESH_INFO("num_outer_iter {}", num_outer_iter); + RXMESH_INFO("num_inner_iter {}", num_inner_iter); + RXMESH_INFO("Flip total time {} (ms)", timers.elapsed_millis("FlipTotal")); + RXMESH_INFO("Flip time {} (ms)", timers.elapsed_millis("Flip")); + RXMESH_INFO("Flip slice time {} (ms)", timers.elapsed_millis("FlipSlice")); + RXMESH_INFO("Flip cleanup time {} (ms)", + timers.elapsed_millis("FlipCleanup")); +} diff --git a/apps/Remesh/link_condition.cuh b/apps/Remesh/link_condition.cuh index ec75f9ba..726e40f5 100644 --- a/apps/Remesh/link_condition.cuh +++ b/apps/Remesh/link_condition.cuh @@ -88,3 +88,127 @@ __inline__ __device__ void link_condition( } } } + +template +__global__ static void __launch_bounds__(blockThreads) + edge_link_condition(const rxmesh::Context context, + rxmesh::EdgeAttribute edge_link) +{ + using namespace rxmesh; + + auto block = cooperative_groups::this_thread_block(); + + ShmemAllocator shrd_alloc; + + Query query(context); + const PatchInfo& patch_info = query.get_patch_info(); + + Bitmask v0_mask(patch_info.num_vertices[0], shrd_alloc); + Bitmask v1_mask(patch_info.num_vertices[0], shrd_alloc); + + query.prologue(block, shrd_alloc); + block.sync(); + + __shared__ int s_num_shared_one_ring; + + + const uint16_t num_edges = patch_info.num_edges[0]; + + for (uint16_t e = 0; e < num_edges; ++e) { + + if (patch_info.is_owned(LocalEdgeT(e))) { + // the edge two end vertices + const VertexIterator iter = + query.template get_iterator(e); + + const uint16_t v0 = iter.local(0); + const uint16_t v1 = iter.local(1); + + if (threadIdx.x == 0) { + s_num_shared_one_ring = 0; + } + + v0_mask.reset(block); + v1_mask.reset(block); + block.sync(); + + // each thread will be assigned to an edge (including not-owned one) + // and mark in v0_mask/v1_mask if one of its two ends are v0/v1 + for_each_edge( + patch_info, + [&](EdgeHandle eh) { + if (eh.local_id() == e && + eh.patch_id() == patch_info.patch_id) { + return; + } + const VertexIterator v_iter = + query.template get_iterator( + eh.local_id()); + + const uint16_t vv0 = v_iter.local(0); + const uint16_t vv1 = v_iter.local(1); + + + if (vv0 == v0) { + v0_mask.set(vv1, true); + } + if (vv0 == v1) { + v1_mask.set(vv1, true); + } + + if (vv1 == v0) { + v0_mask.set(vv0, true); + } + if (vv1 == v1) { + v1_mask.set(vv0, true); + } + }, + true); + block.sync(); + + for (int v = threadIdx.x; v < v0_mask.size(); v += blockThreads) { + if (v0_mask(v) && v1_mask(v)) { + ::atomicAdd(&s_num_shared_one_ring, 1); + } + } + + block.sync(); + if (threadIdx.x == 0) { + edge_link(EdgeHandle(patch_info.patch_id, e)) = + s_num_shared_one_ring; + } + } + } +} +void link_condition(rxmesh::RXMeshDynamic& rx, + rxmesh::EdgeAttribute* edge_link) +{ + using namespace rxmesh; + + constexpr uint32_t blockThreads = 384; + + edge_link->reset(0, DEVICE); + + LaunchBox launch_box; + rx.update_launch_box({Op::EV}, + launch_box, + (void*)edge_link_condition, + false, + false, + false, + false, + [&](uint32_t v, uint32_t e, uint32_t f) { + return 2 * detail::mask_num_bytes(v) + + 2 * ShmemAllocator::default_alignment; + }); + + GPUTimer app_timer; + app_timer.start(); + edge_link_condition + <<>>(rx.get_context(), *edge_link); + app_timer.stop(); + + RXMESH_INFO("Link Condition time {} (ms)", app_timer.elapsed_millis()); +} \ No newline at end of file diff --git a/apps/Remesh/remesh_kernels.cuh b/apps/Remesh/remesh_kernels.cuh deleted file mode 100644 index bd05dab9..00000000 --- a/apps/Remesh/remesh_kernels.cuh +++ /dev/null @@ -1,1099 +0,0 @@ -#include "rxmesh/cavity_manager.cuh" -#include "rxmesh/query.cuh" - -#include "link_condition.cuh" - -#include "rxmesh/kernels/debug.cuh" - - -template -__global__ static void stats_kernel(const rxmesh::Context context, - const rxmesh::VertexAttribute coords, - rxmesh::EdgeAttribute edge_len, - rxmesh::VertexAttribute vertex_valence) -{ - using namespace rxmesh; - - auto block = cooperative_groups::this_thread_block(); - - ShmemAllocator shrd_alloc; - - auto compute_edge_len = [&](const EdgeHandle eh, const VertexIterator& ev) { - const vec3 v0(coords(ev[0], 0), coords(ev[0], 1), coords(ev[0], 2)); - const vec3 v1(coords(ev[1], 0), coords(ev[1], 1), coords(ev[1], 2)); - - T len = glm::distance(v0, v1); - - edge_len(eh) = len; - }; - - Query query(context); - query.compute_vertex_valence(block, shrd_alloc); - query.dispatch(block, shrd_alloc, compute_edge_len); - - for_each_vertex(query.get_patch_info(), [&](const VertexHandle vh) { - vertex_valence(vh) = query.vertex_valence(vh); - }); -} - - -template -__global__ static void __launch_bounds__(blockThreads) - edge_split(rxmesh::Context context, - const rxmesh::VertexAttribute coords, - rxmesh::EdgeAttribute edge_status, - const T high_edge_len_sq, - int* d_buffer) -{ - // EV for calc edge len - - using namespace rxmesh; - - auto block = cooperative_groups::this_thread_block(); - - ShmemAllocator shrd_alloc; - - CavityManager cavity( - block, context, shrd_alloc, true); - - - //__shared__ int s_num_splits; - // if (threadIdx.x == 0) { - // s_num_splits = 0; - //} - if (cavity.patch_id() == INVALID32) { - return; - } - Bitmask is_updated(cavity.patch_info().edges_capacity[0], shrd_alloc); - - uint32_t shmem_before = shrd_alloc.get_allocated_size_bytes(); - - auto should_split = [&](const EdgeHandle& eh, const VertexIterator& iter) { - // iter[0] and iter[2] are the edge two vertices - // iter[1] and iter[3] are the two opposite vertices - // 0 - // / | \ - // 3 | 1 - // \ | / - // 2 - assert(iter.size() == 4); - - if (edge_status(eh) == UNSEEN) { - const VertexHandle va = iter[0]; - const VertexHandle vb = iter[2]; - - const VertexHandle vc = iter[1]; - const VertexHandle vd = iter[3]; - - // don't split boundary edges - if (vc.is_valid() && vd.is_valid()) { - // degenerate cases - if (va == vb || vb == vc || vc == va || va == vd || vb == vd || - vc == vd) { - edge_status(eh) = OKAY; - return; - } - const vec3 pa(coords(va, 0), coords(va, 1), coords(va, 2)); - const vec3 pb(coords(vb, 0), coords(vb, 1), coords(vb, 2)); - - const T edge_len = glm::distance2(pa, pb); - - if (edge_len > high_edge_len_sq) { - cavity.create(eh); - } else { - edge_status(eh) = OKAY; - } - } - } - }; - - Query query(context, cavity.patch_id()); - query.dispatch(block, shrd_alloc, should_split); - block.sync(); - - shrd_alloc.dealloc(shrd_alloc.get_allocated_size_bytes() - shmem_before); - - - if (cavity.prologue(block, shrd_alloc, coords, edge_status)) { - - is_updated.reset(block); - block.sync(); - - cavity.for_each_cavity(block, [&](uint16_t c, uint16_t size) { - assert(size == 4); - - const VertexHandle v0 = cavity.get_cavity_vertex(c, 0); - const VertexHandle v1 = cavity.get_cavity_vertex(c, 2); - - const VertexHandle new_v = cavity.add_vertex(); - - if (new_v.is_valid()) { - - coords(new_v, 0) = (coords(v0, 0) + coords(v1, 0)) * T(0.5); - coords(new_v, 1) = (coords(v0, 1) + coords(v1, 1)) * T(0.5); - coords(new_v, 2) = (coords(v0, 2) + coords(v1, 2)) * T(0.5); - - DEdgeHandle e0 = - cavity.add_edge(new_v, cavity.get_cavity_vertex(c, 0)); - const DEdgeHandle e_init = e0; - - if (e0.is_valid()) { - is_updated.set(e0.local_id(), true); - //::atomicAdd(&s_num_splits, 1); - for (uint16_t i = 0; i < size; ++i) { - const DEdgeHandle e = cavity.get_cavity_edge(c, i); - - // is_updated.set(e.local_id(), true); - - const DEdgeHandle e1 = - (i == size - 1) ? - e_init.get_flip_dedge() : - cavity.add_edge( - cavity.get_cavity_vertex(c, i + 1), new_v); - if (!e1.is_valid()) { - break; - } - if (i != size - 1) { - is_updated.set(e1.local_id(), true); - } - - const FaceHandle f = cavity.add_face(e0, e, e1); - if (!f.is_valid()) { - break; - } - e0 = e1.get_flip_dedge(); - } - } - } - }); - } - - cavity.epilogue(block); - block.sync(); - - if (cavity.is_successful()) { - // if (threadIdx.x == 0) { - // ::atomicAdd(d_buffer, s_num_splits); - //} - for_each_edge(cavity.patch_info(), [&](EdgeHandle eh) { - if (is_updated(eh.local_id())) { - edge_status(eh) = ADDED; - } - }); - } -} - - -template -__global__ static void __launch_bounds__(blockThreads) - compute_valence(rxmesh::Context context, - const rxmesh::VertexAttribute v_valence) -{ - using namespace rxmesh; - - auto block = cooperative_groups::this_thread_block(); - - ShmemAllocator shrd_alloc; - - Query query(context); - query.compute_vertex_valence(block, shrd_alloc); - block.sync(); - - for_each_vertex(query.get_patch_info(), [&](VertexHandle vh) { - v_valence(vh) = query.vertex_valence(vh); - }); -} - - -template -__global__ static void __launch_bounds__(blockThreads) - edge_collapse(rxmesh::Context context, - const rxmesh::VertexAttribute coords, - rxmesh::EdgeAttribute edge_status, - const T low_edge_len_sq, - const T high_edge_len_sq, - int* d_buffer) -{ - - using namespace rxmesh; - auto block = cooperative_groups::this_thread_block(); - ShmemAllocator shrd_alloc; - CavityManager cavity( - block, context, shrd_alloc, true); - - const uint32_t pid = cavity.patch_id(); - - if (pid == INVALID32) { - return; - } - - // a bitmask that indicates which edge we want to flip - // we also use it to mark updated edges (for edge_status) - Bitmask edge_mask(cavity.patch_info().edges_capacity[0], shrd_alloc); - edge_mask.reset(block); - - uint32_t shmem_before = shrd_alloc.get_allocated_size_bytes(); - - // we use this bitmask to mark the other end of to-be-collapse edge during - // checking for the link condition - Bitmask v0_mask(cavity.patch_info().num_vertices[0], shrd_alloc); - Bitmask v1_mask(cavity.patch_info().num_vertices[0], shrd_alloc); - - // Precompute EV - Query query(context, pid); - query.prologue(block, shrd_alloc); - block.sync(); - - // 1. mark edge that we want to collapse based on the edge length - for_each_edge(cavity.patch_info(), [&](EdgeHandle eh) { - assert(eh.local_id() < cavity.patch_info().num_edges[0]); - - // iter[0] and iter[2] are the edge two vertices - // iter[1] and iter[3] are the two opposite vertices - // 0 - // / | \ - // 3 | 1 - // \ | / - // 2 - - - if (edge_status(eh) == UNSEEN) { - const VertexIterator iter = - query.template get_iterator(eh.local_id()); - - assert(iter.size() == 4); - - const VertexHandle v0 = iter[0]; - const VertexHandle v1 = iter[2]; - - const VertexHandle v2 = iter[1]; - const VertexHandle v3 = iter[3]; - - // don't collapse boundary edges - if (v2.is_valid() && v3.is_valid()) { - - // degenerate cases - if (v0 == v1 || v0 == v2 || v0 == v3 || v1 == v2 || v1 == v3 || - v2 == v3) { - return; - } - const vec3 p0(coords(v0, 0), coords(v0, 1), coords(v0, 2)); - const vec3 p1(coords(v1, 0), coords(v1, 1), coords(v1, 2)); - const T edge_len_sq = glm::distance2(p0, p1); - - if (edge_len_sq < low_edge_len_sq) { - edge_mask.set(eh.local_id(), true); - } - } - } - }); - block.sync(); - - - // 2. check link condition - link_condition( - block, cavity.patch_info(), query, edge_mask, v0_mask, v1_mask, 0, 2); - block.sync(); - - - // 3. create cavity for the surviving edges - for_each_edge(cavity.patch_info(), [&](EdgeHandle eh) { - if (edge_mask(eh.local_id())) { - cavity.create(eh); - } else { - edge_status(eh) = OKAY; - } - }); - block.sync(); - - shrd_alloc.dealloc(shrd_alloc.get_allocated_size_bytes() - shmem_before); - - // create the cavity - if (cavity.prologue(block, shrd_alloc, coords, edge_status)) { - - edge_mask.reset(block); - block.sync(); - - cavity.for_each_cavity(block, [&](uint16_t c, uint16_t size) { - //::atomicAdd(&s_num_collapses, 1); - const EdgeHandle src = cavity.template get_creator(c); - - VertexHandle v0, v1; - - cavity.get_vertices(src, v0, v1); - - const vec3 p0(coords(v0, 0), coords(v0, 1), coords(v0, 2)); - const vec3 p1(coords(v1, 0), coords(v1, 1), coords(v1, 2)); - - const vec3 new_p((p0[0] + p1[0]) * T(0.5), - (p0[1] + p1[1]) * T(0.5), - (p0[2] + p1[2]) * T(0.5)); - - // check if we will create a long edge - bool long_edge = false; - - for (uint16_t i = 0; i < size; ++i) { - - - const VertexHandle vvv = cavity.get_cavity_vertex(c, i); - - const vec3 vp( - coords(vvv, 0), coords(vvv, 1), coords(vvv, 2)); - - const T edge_len_sq = glm::distance2(vp, new_p); - - if (edge_len_sq >= low_edge_len_sq) { - long_edge = true; - break; - } - } - - if (long_edge) { - // roll back - cavity.recover(src); - - // mark this edge as OKAY because 1) if all cavities in this - // patch are successful, then we want to indicate that this - // edge is okay and should not be attempted again - // 2) if we have to rollback all changes in this patch, we still - // don't want to attempt this edge since we know that it creates - // short edges - edge_status(src) = OKAY; - } else { - - const VertexHandle new_v = cavity.add_vertex(); - - if (new_v.is_valid()) { - - coords(new_v, 0) = new_p[0]; - coords(new_v, 1) = new_p[1]; - coords(new_v, 2) = new_p[2]; - - DEdgeHandle e0 = - cavity.add_edge(new_v, cavity.get_cavity_vertex(c, 0)); - - if (e0.is_valid()) { - edge_mask.set(e0.local_id(), true); - - const DEdgeHandle e_init = e0; - - for (uint16_t i = 0; i < size; ++i) { - const DEdgeHandle e = cavity.get_cavity_edge(c, i); - - // edge_mask.set(e.local_id(), true); - - const VertexHandle v_end = - cavity.get_cavity_vertex(c, (i + 1) % size); - - const DEdgeHandle e1 = - (i == size - 1) ? - e_init.get_flip_dedge() : - cavity.add_edge( - cavity.get_cavity_vertex(c, i + 1), - new_v); - - if (!e1.is_valid()) { - break; - } - - if (i != size - 1) { - edge_mask.set(e1.local_id(), true); - } - - const FaceHandle new_f = cavity.add_face(e0, e, e1); - - if (!new_f.is_valid()) { - break; - } - e0 = e1.get_flip_dedge(); - } - } - } - } - }); - } - block.sync(); - - cavity.epilogue(block); - block.sync(); - - if (cavity.is_successful()) { - // if (threadIdx.x == 0) { - // ::atomicAdd(d_buffer, s_num_collapses); - //} - for_each_edge(cavity.patch_info(), [&](EdgeHandle eh) { - if (edge_mask(eh.local_id()) || cavity.is_recovered(eh)) { - edge_status(eh) = ADDED; - } - }); - } -} - -template -__global__ static void __launch_bounds__(blockThreads) - edge_collapse_1(rxmesh::Context context, - const rxmesh::VertexAttribute coords, - rxmesh::EdgeAttribute edge_status, - const T low_edge_len_sq, - const T high_edge_len_sq, - int* d_buffer) -{ - - using namespace rxmesh; - auto block = cooperative_groups::this_thread_block(); - ShmemAllocator shrd_alloc; - CavityManager cavity( - block, context, shrd_alloc, true); - - const uint32_t pid = cavity.patch_id(); - - - //__shared__ int s_num_collapses; - // if (threadIdx.x == 0) { - // s_num_collapses = 0; - //} - - if (pid == INVALID32) { - return; - } - - - Bitmask is_updated(cavity.patch_info().edges_capacity[0], shrd_alloc); - - uint32_t shmem_before = shrd_alloc.get_allocated_size_bytes(); - - - // for each edge we want to flip, we its id in one of its opposite vertices - // along with the other opposite vertex - uint16_t* v_info = - shrd_alloc.alloc(2 * cavity.patch_info().num_vertices[0]); - fill_n( - v_info, 2 * cavity.patch_info().num_vertices[0], uint16_t(INVALID16)); - - // a bitmask that indicates which edge we want to flip - Bitmask e_collapse(cavity.patch_info().num_edges[0], shrd_alloc); - e_collapse.reset(block); - block.sync(); - - auto should_collapse = [&](const EdgeHandle& eh, - const VertexIterator& iter) { - if (edge_status(eh) == UNSEEN) { - - const VertexHandle v0(iter[0]), v1(iter[1]); - - const vec3 p0(coords(v0, 0), coords(v0, 1), coords(v0, 2)); - const vec3 p1(coords(v1, 0), coords(v1, 1), coords(v1, 2)); - const T edge_len_sq = glm::distance2(p0, p1); - - if (edge_len_sq < low_edge_len_sq) { - - const uint16_t c0(iter.local(0)), c1(iter.local(1)); - - uint16_t ret = ::atomicCAS(v_info + 2 * c0, INVALID16, c1); - if (ret == INVALID16) { - v_info[2 * c0 + 1] = eh.local_id(); - e_collapse.set(eh.local_id(), true); - } else { - ret = ::atomicCAS(v_info + 2 * c1, INVALID16, c0); - if (ret == INVALID16) { - v_info[2 * c1 + 1] = eh.local_id(); - e_collapse.set(eh.local_id(), true); - } - } - } - } - }; - - // 1. mark edge that we want to collapse based on the edge lenght - Query query(context, cavity.patch_id()); - query.dispatch(block, shrd_alloc, should_collapse); - block.sync(); - - - auto check_edges = [&](const VertexHandle& vh, const VertexIterator& iter) { - uint16_t opposite_v = v_info[2 * vh.local_id()]; - if (opposite_v != INVALID16) { - int num_shared_v = 0; - - const VertexIterator opp_iter = - query.template get_iterator(opposite_v); - - for (uint16_t v = 0; v < iter.size(); ++v) { - - for (uint16_t ov = 0; ov < opp_iter.size(); ++ov) { - if (iter.local(v) == opp_iter.local(ov)) { - num_shared_v++; - break; - } - } - } - - if (num_shared_v > 2) { - e_collapse.reset(v_info[2 * vh.local_id() + 1], true); - } - } - }; - // 2. make sure that the two vertices opposite to a flipped edge are not - // connected - query.dispatch( - block, - shrd_alloc, - check_edges, - [](VertexHandle) { return true; }, - false, - true); - block.sync(); - - - // 3. create cavity for the surviving edges - for_each_edge(cavity.patch_info(), [&](EdgeHandle eh) { - if (e_collapse(eh.local_id())) { - cavity.create(eh); - } else { - edge_status(eh) = OKAY; - } - }); - block.sync(); - - shrd_alloc.dealloc(shrd_alloc.get_allocated_size_bytes() - shmem_before); - - // create the cavity - if (cavity.prologue(block, shrd_alloc, coords, edge_status)) { - - is_updated.reset(block); - block.sync(); - - cavity.for_each_cavity(block, [&](uint16_t c, uint16_t size) { - //::atomicAdd(&s_num_collapses, 1); - const EdgeHandle src = cavity.template get_creator(c); - - VertexHandle v0, v1; - - cavity.get_vertices(src, v0, v1); - - const vec3 p0(coords(v0, 0), coords(v0, 1), coords(v0, 2)); - const vec3 p1(coords(v1, 0), coords(v1, 1), coords(v1, 2)); - - const vec3 new_p((p0[0] + p1[0]) * T(0.5), - (p0[1] + p1[1]) * T(0.5), - (p0[2] + p1[2]) * T(0.5)); - - // check if we will create a long edge - bool long_edge = false; - - for (uint16_t i = 0; i < size; ++i) { - const VertexHandle vvv = cavity.get_cavity_vertex(c, i); - - const vec3 vp( - coords(vvv, 0), coords(vvv, 1), coords(vvv, 2)); - - const T edge_len_sq = glm::distance2(vp, new_p); - if (edge_len_sq > high_edge_len_sq) { - long_edge = true; - break; - } - } - - if (long_edge) { - // roll back - cavity.recover(src); - - // mark this edge as OKAY because 1) if all cavities in this - // patch are successful, then we want to indicate that this - // edge is okay and should not be attempted again - // 2) if we have to rollback all changes in this patch, we still - // don't want to attempt this edge since we know that it creates - // short edges - edge_status(src) = OKAY; - } else { - - const VertexHandle new_v = cavity.add_vertex(); - - if (new_v.is_valid()) { - - coords(new_v, 0) = new_p[0]; - coords(new_v, 1) = new_p[1]; - coords(new_v, 2) = new_p[2]; - - DEdgeHandle e0 = - cavity.add_edge(new_v, cavity.get_cavity_vertex(c, 0)); - - if (e0.is_valid()) { - is_updated.set(e0.local_id(), true); - - const DEdgeHandle e_init = e0; - - for (uint16_t i = 0; i < size; ++i) { - const DEdgeHandle e = cavity.get_cavity_edge(c, i); - - // is_updated.set(e.local_id(), true); - - const VertexHandle v_end = - cavity.get_cavity_vertex(c, (i + 1) % size); - - const DEdgeHandle e1 = - (i == size - 1) ? - e_init.get_flip_dedge() : - cavity.add_edge( - cavity.get_cavity_vertex(c, i + 1), - new_v); - - if (!e1.is_valid()) { - break; - } - - if (i != size - 1) { - is_updated.set(e1.local_id(), true); - } - - const FaceHandle new_f = cavity.add_face(e0, e, e1); - - if (!new_f.is_valid()) { - break; - } - e0 = e1.get_flip_dedge(); - } - } - } - } - }); - } - block.sync(); - - cavity.epilogue(block); - block.sync(); - - if (cavity.is_successful()) { - // if (threadIdx.x == 0) { - // ::atomicAdd(d_buffer, s_num_collapses); - // } - for_each_edge(cavity.patch_info(), [&](EdgeHandle eh) { - if (is_updated(eh.local_id()) || cavity.is_recovered(eh)) { - edge_status(eh) = ADDED; - } - }); - } -} - - -template -__global__ static void __launch_bounds__(blockThreads) - edge_flip(rxmesh::Context context, - const rxmesh::VertexAttribute coords, - const rxmesh::VertexAttribute v_valence, - rxmesh::EdgeAttribute edge_status, - int* d_buffer) -{ - using namespace rxmesh; - - auto block = cooperative_groups::this_thread_block(); - - ShmemAllocator shrd_alloc; - - CavityManager cavity( - block, context, shrd_alloc, false, false); - - - if (cavity.patch_id() == INVALID32) { - return; - } - - // a bitmask that indicates which edge we want to flip - // we also used it to mark the new edges - Bitmask edge_mask(cavity.patch_info().edges_capacity[0], shrd_alloc); - edge_mask.reset(block); - - uint32_t shmem_before = shrd_alloc.get_allocated_size_bytes(); - - // we use this bitmask to mark the other end of to-be-collapse edge during - // checking for the link condition - Bitmask v0_mask(cavity.patch_info().num_vertices[0], shrd_alloc); - Bitmask v1_mask(cavity.patch_info().num_vertices[0], shrd_alloc); - - // precompute EVDiamond - Query query(context, cavity.patch_id()); - query.prologue(block, shrd_alloc); - block.sync(); - - - // 1. mark edge that we want to flip - for_each_edge(cavity.patch_info(), [&](EdgeHandle eh) { - assert(eh.local_id() < cavity.patch_info().num_edges[0]); - - const VertexIterator iter = - query.template get_iterator(eh.local_id()); - - // only if the edge is not seen before and its not a boundary edge - if (edge_status(eh) == UNSEEN && iter[1].is_valid() && - iter[3].is_valid()) { - - if (iter[0] == iter[1] || iter[0] == iter[2] || - iter[0] == iter[3] || iter[1] == iter[2] || - iter[1] == iter[3] || iter[2] == iter[3]) { - return; - } - - // iter[0] and iter[2] are the edge two vertices - // iter[1] and iter[3] are the two opposite vertices - - - // since we only deal with closed meshes without boundaries - constexpr int target_valence = 6; - - - const int valence_a = v_valence(iter[0]); - const int valence_b = v_valence(iter[2]); - const int valence_c = v_valence(iter[1]); - const int valence_d = v_valence(iter[3]); - - // clang-format off - const int deviation_pre = - (valence_a - target_valence) * (valence_a - target_valence) + - (valence_b - target_valence) * (valence_b - target_valence) + - (valence_c - target_valence) * (valence_c - target_valence) + - (valence_d - target_valence) * (valence_d - target_valence); - // clang-format on - - // clang-format off - const int deviation_post = - (valence_a - 1 - target_valence)*(valence_a - 1 - target_valence) + - (valence_b - 1 - target_valence)*(valence_b - 1 - target_valence) + - (valence_c + 1 - target_valence)*(valence_c + 1 - target_valence) + - (valence_d + 1 - target_valence)*(valence_d + 1 - target_valence); - // clang-format on - - if (deviation_pre > deviation_post) { - edge_mask.set(eh.local_id(), true); - } - } - }); - block.sync(); - - // 2. check link condition - link_condition( - block, cavity.patch_info(), query, edge_mask, v0_mask, v1_mask, 0, 2); - block.sync(); - - - // 3. create cavity for the surviving edges - for_each_edge(cavity.patch_info(), [&](EdgeHandle eh) { - if (edge_mask(eh.local_id())) { - cavity.create(eh); - } else { - edge_status(eh) = OKAY; - } - }); - block.sync(); - - shrd_alloc.dealloc(shrd_alloc.get_allocated_size_bytes() - shmem_before); - - if (cavity.prologue(block, shrd_alloc, coords, edge_status)) { - - edge_mask.reset(block); - block.sync(); - - cavity.for_each_cavity(block, [&](uint16_t c, uint16_t size) { - assert(size == 4); - - DEdgeHandle new_edge = cavity.add_edge( - cavity.get_cavity_vertex(c, 1), cavity.get_cavity_vertex(c, 3)); - - - if (new_edge.is_valid()) { - edge_mask.set(new_edge.local_id(), true); - cavity.add_face(cavity.get_cavity_edge(c, 0), - new_edge, - cavity.get_cavity_edge(c, 3)); - - - cavity.add_face(cavity.get_cavity_edge(c, 1), - cavity.get_cavity_edge(c, 2), - new_edge.get_flip_dedge()); - } - }); - } - block.sync(); - - cavity.epilogue(block); - block.sync(); - - if (cavity.is_successful()) { - for_each_edge(cavity.patch_info(), [&](EdgeHandle eh) { - if (edge_mask(eh.local_id())) { - edge_status(eh) = ADDED; - } - }); - } -} - -template -__global__ static void __launch_bounds__(blockThreads) - edge_flip_1(rxmesh::Context context, - const rxmesh::VertexAttribute coords, - const rxmesh::VertexAttribute v_valence, - rxmesh::EdgeAttribute edge_status, - int* d_buffer) -{ - using namespace rxmesh; - - auto block = cooperative_groups::this_thread_block(); - - ShmemAllocator shrd_alloc; - - CavityManager cavity( - block, context, shrd_alloc, false, false); - - - //__shared__ int s_num_flips; - // if (threadIdx.x == 0) { - // s_num_flips = 0; - //} - if (cavity.patch_id() == INVALID32) { - return; - } - - Bitmask is_updated(cavity.patch_info().edges_capacity[0], shrd_alloc); - - uint32_t shmem_before = shrd_alloc.get_allocated_size_bytes(); - - // for each edge we want to flip, we its id in one of its opposite vertices - // along with the other opposite vertex - uint16_t* v_info = - shrd_alloc.alloc(2 * cavity.patch_info().num_vertices[0]); - fill_n( - v_info, 2 * cavity.patch_info().num_vertices[0], uint16_t(INVALID16)); - - // a bitmask that indicates which edge we want to flip - Bitmask e_flip(cavity.patch_info().num_edges[0], shrd_alloc); - e_flip.reset(block); - - - auto should_flip = [&](const EdgeHandle& eh, const VertexIterator& iter) { - // iter[0] and iter[2] are the edge two vertices - // iter[1] and iter[3] are the two opposite vertices - - - // we use the local index since we are only interested in the - // valence which computed on the local index space - if (edge_status(eh) == UNSEEN) { - if (iter[1].is_valid() && iter[3].is_valid()) { - - // since we only deal with closed meshes without boundaries - constexpr int target_valence = 6; - - - const int valence_a = v_valence(iter[0]); - const int valence_b = v_valence(iter[2]); - const int valence_c = v_valence(iter[1]); - const int valence_d = v_valence(iter[3]); - - // clang-format off - const int deviation_pre = - (valence_a - target_valence) * (valence_a - target_valence) + - (valence_b - target_valence) * (valence_b - target_valence) + - (valence_c - target_valence) * (valence_c - target_valence) + - (valence_d - target_valence) * (valence_d - target_valence); - // clang-format on - - // clang-format off - const int deviation_post = - (valence_a - 1 - target_valence)*(valence_a - 1 - target_valence) + - (valence_b - 1 - target_valence)*(valence_b - 1 - target_valence) + - (valence_c + 1 - target_valence)*(valence_c + 1 - target_valence) + - (valence_d + 1 - target_valence)*(valence_d + 1 - target_valence); - // clang-format on - - if (deviation_pre > deviation_post) { - uint16_t v_c(iter.local(1)), v_d(iter.local(3)); - - bool added = false; - - if (iter[1].patch_id() == cavity.patch_id()) { - uint16_t ret = - ::atomicCAS(v_info + 2 * v_c, INVALID16, v_d); - if (ret == INVALID16) { - added = true; - v_info[2 * v_c + 1] = eh.local_id(); - e_flip.set(eh.local_id(), true); - } - } - - if (iter[3].patch_id() == cavity.patch_id() && !added) { - uint16_t ret = - ::atomicCAS(v_info + 2 * v_d, INVALID16, v_c); - if (ret == INVALID16) { - v_info[2 * v_d + 1] = eh.local_id(); - e_flip.set(eh.local_id(), true); - } - } - } - } else { - edge_status(eh) = OKAY; - } - } - }; - - // 1. mark edge that we want to flip - Query query(context, cavity.patch_id()); - query.dispatch(block, shrd_alloc, should_flip); - block.sync(); - - - // 2. make sure that the two vertices opposite to a flipped edge are not - // connected - auto check_edges = [&](const VertexHandle& vh, const VertexIterator& iter) { - uint16_t opposite_v = v_info[2 * vh.local_id()]; - if (opposite_v != INVALID16) { - bool is_valid = true; - for (uint16_t v = 0; v < iter.size(); ++v) { - if (iter.local(v) == opposite_v) { - is_valid = false; - break; - } - } - if (!is_valid) { - e_flip.reset(v_info[2 * vh.local_id() + 1], true); - } - } - }; - query.dispatch(block, shrd_alloc, check_edges); - block.sync(); - - // 3. create cavity for the surviving edges - for_each_edge(cavity.patch_info(), [&](EdgeHandle eh) { - if (e_flip(eh.local_id())) { - cavity.create(eh); - } else { - edge_status(eh) = OKAY; - } - }); - block.sync(); - - shrd_alloc.dealloc(shrd_alloc.get_allocated_size_bytes() - shmem_before); - - if (cavity.prologue(block, shrd_alloc, coords, edge_status)) { - - is_updated.reset(block); - block.sync(); - - cavity.for_each_cavity(block, [&](uint16_t c, uint16_t size) { - //::atomicAdd(&s_num_flips, 1); - assert(size == 4); - - DEdgeHandle new_edge = cavity.add_edge( - cavity.get_cavity_vertex(c, 1), cavity.get_cavity_vertex(c, 3)); - - - if (new_edge.is_valid()) { - is_updated.set(new_edge.local_id(), true); - cavity.add_face(cavity.get_cavity_edge(c, 0), - new_edge, - cavity.get_cavity_edge(c, 3)); - - - cavity.add_face(cavity.get_cavity_edge(c, 1), - cavity.get_cavity_edge(c, 2), - new_edge.get_flip_dedge()); - } - }); - } - - cavity.epilogue(block); - block.sync(); - - if (cavity.is_successful()) { - // if (threadIdx.x == 0) { - // ::atomicAdd(d_buffer, s_num_flips); - // } - for_each_edge(cavity.patch_info(), [&](EdgeHandle eh) { - if (is_updated(eh.local_id())) { - edge_status(eh) = ADDED; - } - }); - } -} - -template -__global__ static void __launch_bounds__(blockThreads) - vertex_smoothing(const rxmesh::Context context, - const rxmesh::VertexAttribute coords, - rxmesh::VertexAttribute new_coords) -{ - // VV to compute vertex sum and normal - using namespace rxmesh; - auto block = cooperative_groups::this_thread_block(); - - auto smooth = [&](VertexHandle v_id, VertexIterator& iter) { - if (iter.size() == 0) { - return; - } - - const vec3 v(coords(v_id, 0), coords(v_id, 1), coords(v_id, 2)); - - // compute both vertex normal and the new position - // the new position is the average of the one-ring - // while we iterate on the one ring to compute this new position, we - // also compute the vertex normal - // finally, we project the new position on the tangent plane of the - // vertex (old position) - - // this is the last vertex in the one-ring (before r_id) - VertexHandle q_id = iter.back(); - vec3 q(coords(q_id, 0), coords(q_id, 1), coords(q_id, 2)); - - vec3 new_v(0.0, 0.0, 0.0); - vec3 v_normal(0.0, 0.0, 0.0); - - T w = 0.0; - - for (uint32_t i = 0; i < iter.size(); ++i) { - // the current one ring vertex - const VertexHandle r_id = iter[i]; - - const vec3 r(coords(r_id, 0), coords(r_id, 1), coords(r_id, 2)); - - vec3 c = glm::cross(q - v, r - v); - - const T area = glm::length(c) / T(2.0); - w += area; - - if (glm::length2(c) > 1e-6) { - c = glm::normalize(c); - } - - const vec3 n = c * area; - - v_normal += n; - - new_v += r; - - q_id = r_id; - q = r; - } - new_v /= T(iter.size()); - - assert(w > 0); - - v_normal /= w; - - if (glm::length2(v_normal) < 1e-6) { - new_v = v; - } else { - v_normal = glm::normalize(v_normal); - - new_v = new_v + (glm::dot(v_normal, (v - new_v)) * v_normal); - } - - assert(!isnan(new_v[0])); - assert(!isnan(new_v[1])); - assert(!isnan(new_v[2])); - - new_coords(v_id, 0) = new_v[0]; - new_coords(v_id, 1) = new_v[1]; - new_coords(v_id, 2) = new_v[2]; - }; - - Query query(context); - ShmemAllocator shrd_alloc; - query.dispatch(block, shrd_alloc, smooth, true); -} \ No newline at end of file diff --git a/apps/Remesh/remesh_rxmesh.cuh b/apps/Remesh/remesh_rxmesh.cuh index d5fb238a..7cfe9cd8 100644 --- a/apps/Remesh/remesh_rxmesh.cuh +++ b/apps/Remesh/remesh_rxmesh.cuh @@ -1,24 +1,20 @@ +#pragma once #include #include "rxmesh/rxmesh_dynamic.h" #include "rxmesh/util/report.h" #include "rxmesh/util/util.h" -int ps_iddd = 0; +#include "smoothing.cuh" -using EdgeStatus = int8_t; -enum : EdgeStatus -{ - UNSEEN = 0, // means we have not tested it before for e.g., split/flip/col - OKAY = 1, // means we have tested it and it is okay to skip - UPDATE = 2, // means we should update it i.e., we have tested it before - ADDED = 3, // means it has been added to during the split/flip/collapse -}; +#include "collapse.cuh" +#include "flip.cuh" +#include "split.cuh" -#include "remesh_kernels.cuh" +#include "util.cuh" -float split_time_ms, collapse_time_ms, flip_time_ms, smoothing_time_ms; +int ps_iddd = 0; struct Stats { @@ -170,34 +166,15 @@ inline void compute_stats(rxmesh::RXMeshDynamic& rx, float len = (*edge_len)(eh); stats.avg_edge_len += len; stats.max_edge_len = std::max(stats.max_edge_len, len); - stats.min_edge_len = std::min(stats.min_edge_len, len); + if (len > std::numeric_limits::epsilon()) { + stats.min_edge_len = std::min(stats.min_edge_len, len); + } }, NULL, false); stats.avg_edge_len /= rx.get_num_edges(); } -int is_done(const rxmesh::RXMeshDynamic& rx, - const rxmesh::EdgeAttribute* edge_status, - int* d_buffer) -{ - using namespace rxmesh; - - // if there is at least one edge that is UNSEEN, then we are not done yet - CUDA_ERROR(cudaMemset(d_buffer, 0, sizeof(int))); - - rx.for_each_edge( - DEVICE, - [edge_status = *edge_status, d_buffer] __device__(const EdgeHandle eh) { - if (edge_status(eh) == UNSEEN || edge_status(eh) == UPDATE) { - ::atomicAdd(d_buffer, 1); - } - }); - - CUDA_ERROR(cudaDeviceSynchronize()); - return d_buffer[0]; -} - template void screen_shot(rxmesh::RXMeshDynamic& rx, rxmesh::VertexAttribute* coords, @@ -225,473 +202,6 @@ void screen_shot(rxmesh::RXMeshDynamic& rx, #endif } -template -inline void split_long_edges(rxmesh::RXMeshDynamic& rx, - rxmesh::VertexAttribute* coords, - rxmesh::EdgeAttribute* edge_status, - const T high_edge_len_sq, - int* d_buffer) -{ - using namespace rxmesh; - - constexpr uint32_t blockThreads = 512; - - - edge_status->reset(UNSEEN, DEVICE); - - int prv_remaining_work = rx.get_num_edges(); - - - float app_time = 0; - float slice_time = 0; - float cleanup_time = 0; - // int num_splits = 0; - int num_outer_iter = 0; - int num_inner_iter = 0; - - GPUTimer timer; - timer.start(); - - while (true) { - num_outer_iter++; - rx.reset_scheduler(); - - while (!rx.is_queue_empty()) { - num_inner_iter++; - - // RXMESH_INFO(" Queue size = {}", - // rx.get_context().m_patch_scheduler.size()); - - LaunchBox launch_box; - rx.update_launch_box({Op::EVDiamond}, - launch_box, - (void*)edge_split, - true, - false, - false, - false, - [&](uint32_t v, uint32_t e, uint32_t f) { - return detail::mask_num_bytes(e) + - ShmemAllocator::default_alignment; - }); - - // CUDA_ERROR(cudaMemset(d_buffer, 0, sizeof(int))); - - - GPUTimer app_timer; - app_timer.start(); - edge_split - <<>>(rx.get_context(), - *coords, - *edge_status, - high_edge_len_sq, - d_buffer); - app_timer.stop(); - - GPUTimer cleanup_timer; - cleanup_timer.start(); - rx.cleanup(); - cleanup_timer.stop(); - - GPUTimer slice_timer; - slice_timer.start(); - rx.slice_patches(*coords, *edge_status); - slice_timer.stop(); - - GPUTimer cleanup_timer2; - cleanup_timer2.start(); - rx.cleanup(); - cleanup_timer2.stop(); - - app_time += app_timer.elapsed_millis(); - slice_time += slice_timer.elapsed_millis(); - cleanup_time += cleanup_timer.elapsed_millis(); - cleanup_time += cleanup_timer2.elapsed_millis(); - CUDA_ERROR(cudaDeviceSynchronize()); - // int dd; - // CUDA_ERROR( - // cudaMemcpy(&dd, d_buffer, sizeof(int), - // cudaMemcpyDeviceToHost)); - // num_splits += dd; - - // rx.update_host(); - // EXPECT_TRUE(rx.validate()); - - // screen_shot(rx, coords, "Split"); - - // stats(rx); - // bool show = false; - // if (show) { - // rx.update_host(); - // RXMESH_INFO(" "); - // RXMESH_INFO("#Vertices {}", rx.get_num_vertices()); - // RXMESH_INFO("#Edges {}", rx.get_num_edges()); - // RXMESH_INFO("#Faces {}", rx.get_num_faces()); - // RXMESH_INFO("#Patches {}", rx.get_num_patches()); - // // stats(rx); - // coords->move(DEVICE, HOST); - // edge_status->move(DEVICE, HOST); - // rx.update_polyscope(); - // auto ps_mesh = rx.get_polyscope_mesh(); - // ps_mesh->updateVertexPositions(*coords); - // ps_mesh->setEnabled(false); - // - // ps_mesh->addEdgeScalarQuantity("EdgeStatus", *edge_status); - // - // rx.render_vertex_patch(); - // rx.render_edge_patch(); - // rx.render_face_patch()->setEnabled(false); - // - // rx.render_patch(0); - // - // polyscope::show(); - //} - } - - int remaining_work = is_done(rx, edge_status, d_buffer); - - if (remaining_work == 0 || prv_remaining_work == remaining_work) { - break; - } - prv_remaining_work = remaining_work; - // RXMESH_INFO("num_splits {}, time {}", - // num_splits, - // app_time + slice_time + cleanup_time); - } - timer.stop(); - - // RXMESH_INFO("total num_splits {}", num_splits); - RXMESH_INFO("num_outer_iter {}", num_outer_iter); - RXMESH_INFO("num_inner_iter {}", num_inner_iter); - RXMESH_INFO("Split total time {} (ms)", timer.elapsed_millis()); - RXMESH_INFO("Split time {} (ms)", app_time); - RXMESH_INFO("Split slice timer {} (ms)", slice_time); - RXMESH_INFO("Split cleanup timer {} (ms)", cleanup_time); - - split_time_ms += timer.elapsed_millis(); -} - -template -inline void collapse_short_edges(rxmesh::RXMeshDynamic& rx, - rxmesh::VertexAttribute* coords, - rxmesh::EdgeAttribute* edge_status, - const T low_edge_len_sq, - const T high_edge_len_sq, - int* d_buffer) -{ - using namespace rxmesh; - - constexpr uint32_t blockThreads = 512; - - edge_status->reset(UNSEEN, DEVICE); - - int prv_remaining_work = rx.get_num_edges(); - - float app_time = 0; - float slice_time = 0; - float cleanup_time = 0; - int num_outer_iter = 0; - int num_inner_iter = 0; - // int num_collapses = 0; - - GPUTimer timer; - timer.start(); - while (true) { - num_outer_iter++; - rx.reset_scheduler(); - while (!rx.is_queue_empty()) { - // RXMESH_INFO(" Queue size = {}", - // rx.get_context().m_patch_scheduler.size()); - num_inner_iter++; - - LaunchBox launch_box; - rx.update_launch_box( - //{Op::EVDiamond}, - {Op::EV, Op::VV}, - launch_box, - (void*)edge_collapse_1, - true, - false, - false, - false, - [&](uint32_t v, uint32_t e, uint32_t f) { - return detail::mask_num_bytes(e) + - 2 * v * sizeof(uint16_t) + - 2 * ShmemAllocator::default_alignment; - // 2 * detail::mask_num_bytes(v) + - // 3 * ShmemAllocator::default_alignment; - }); - - // CUDA_ERROR(cudaMemset(d_buffer, 0, sizeof(int))); - - GPUTimer app_timer; - app_timer.start(); - edge_collapse_1 - <<>>(rx.get_context(), - *coords, - *edge_status, - low_edge_len_sq, - high_edge_len_sq, - d_buffer); - app_timer.stop(); - - GPUTimer cleanup_timer; - cleanup_timer.start(); - rx.cleanup(); - cleanup_timer.stop(); - - GPUTimer slice_timer; - slice_timer.start(); - rx.slice_patches(*coords, *edge_status); - slice_timer.stop(); - - GPUTimer cleanup_timer2; - cleanup_timer2.start(); - rx.cleanup(); - cleanup_timer2.stop(); - - - app_time += app_timer.elapsed_millis(); - slice_time += slice_timer.elapsed_millis(); - cleanup_time += cleanup_timer.elapsed_millis(); - cleanup_time += cleanup_timer2.elapsed_millis(); - - // int dd; - // CUDA_ERROR( - // cudaMemcpy(&dd, d_buffer, sizeof(int), - // cudaMemcpyDeviceToHost)); - // num_collapses += dd; - - // screen_shot(rx, coords, "Collapse"); - - // stats(rx); - // bool show = false; - // if (show) { - // rx.update_host(); - // RXMESH_INFO(" "); - // RXMESH_INFO("#Vertices {}", rx.get_num_vertices()); - // RXMESH_INFO("#Edges {}", rx.get_num_edges()); - // RXMESH_INFO("#Faces {}", rx.get_num_faces()); - // RXMESH_INFO("#Patches {}", rx.get_num_patches()); - // // stats(rx); - // coords->move(DEVICE, HOST); - // edge_status->move(DEVICE, HOST); - // rx.update_polyscope(); - // auto ps_mesh = rx.get_polyscope_mesh(); - // ps_mesh->updateVertexPositions(*coords); - // ps_mesh->setEnabled(false); - // - // ps_mesh->addEdgeScalarQuantity("EdgeStatus", *edge_status); - // - // rx.render_vertex_patch(); - // rx.render_edge_patch(); - // rx.render_face_patch()->setEnabled(false); - // - // polyscope::show(); - //} - } - - int remaining_work = is_done(rx, edge_status, d_buffer); - - if (remaining_work == 0 || prv_remaining_work == remaining_work) { - break; - } - prv_remaining_work = remaining_work; - // RXMESH_INFO("num_collapses {}, time {}", - // num_collapses, - // app_time + slice_time + cleanup_time); - } - timer.stop(); - // RXMESH_INFO("total num_collapses {}", num_collapses); - RXMESH_INFO("num_outer_iter {}", num_outer_iter); - RXMESH_INFO("num_inner_iter {}", num_inner_iter); - RXMESH_INFO("Collapse total time {} (ms)", timer.elapsed_millis()); - RXMESH_INFO("Collapse time {} (ms)", app_time); - RXMESH_INFO("Collapse slice timer {} (ms)", slice_time); - RXMESH_INFO("Collapse cleanup timer {} (ms)", cleanup_time); - - collapse_time_ms += timer.elapsed_millis(); -} - -template -inline void equalize_valences(rxmesh::RXMeshDynamic& rx, - rxmesh::VertexAttribute* coords, - rxmesh::VertexAttribute* v_valence, - rxmesh::EdgeAttribute* edge_status, - int* d_buffer) -{ - using namespace rxmesh; - - constexpr uint32_t blockThreads = 512; - - edge_status->reset(UNSEEN, DEVICE); - - int prv_remaining_work = rx.get_num_edges(); - - float app_time = 0; - float slice_time = 0; - float cleanup_time = 0; - - // int num_flips = 0; - int num_outer_iter = 0; - int num_inner_iter = 0; - - GPUTimer timer; - timer.start(); - while (true) { - num_outer_iter++; - rx.reset_scheduler(); - while (!rx.is_queue_empty()) { - // RXMESH_INFO(" Queue size = {}", - // rx.get_context().m_patch_scheduler.size()); - num_inner_iter++; - LaunchBox launch_box; - - rx.update_launch_box({}, - launch_box, - (void*)compute_valence, - false, - false, - true); - GPUTimer app_timer; - app_timer.start(); - compute_valence - <<>>(rx.get_context(), *v_valence); - - rx.update_launch_box( - //{Op::EVDiamond}, - {Op::EVDiamond, Op::VV}, - launch_box, - (void*)edge_flip_1, - true, - false, - false, - false, - [&](uint32_t v, uint32_t e, uint32_t f) { - return detail::mask_num_bytes(e) + - 2 * v * sizeof(uint16_t) + - 2 * ShmemAllocator::default_alignment; - // 2 * detail::mask_num_bytes(v) + - // 3 * ShmemAllocator::default_alignment; - }); - - // CUDA_ERROR(cudaMemset(d_buffer, 0, sizeof(int))); - - edge_flip_1<<>>( - rx.get_context(), *coords, *v_valence, *edge_status, d_buffer); - app_timer.stop(); - - GPUTimer cleanup_timer; - cleanup_timer.start(); - rx.cleanup(); - cleanup_timer.stop(); - - GPUTimer slice_timer; - slice_timer.start(); - rx.slice_patches(*coords, *edge_status); - slice_timer.stop(); - - GPUTimer cleanup_timer2; - cleanup_timer2.start(); - rx.cleanup(); - cleanup_timer2.stop(); - - app_time += app_timer.elapsed_millis(); - slice_time += slice_timer.elapsed_millis(); - cleanup_time += cleanup_timer.elapsed_millis(); - cleanup_time += cleanup_timer2.elapsed_millis(); - - // int dd; - // CUDA_ERROR( - // cudaMemcpy(&dd, d_buffer, sizeof(int), - // cudaMemcpyDeviceToHost)); - // num_flips += dd; - - // screen_shot(rx, coords, "Flip"); - - // stats(rx); - // bool show = false; - // if (show) { - // rx.update_host(); - // RXMESH_INFO(" "); - // RXMESH_INFO("#Vertices {}", rx.get_num_vertices()); - // RXMESH_INFO("#Edges {}", rx.get_num_edges()); - // RXMESH_INFO("#Faces {}", rx.get_num_faces()); - // RXMESH_INFO("#Patches {}", rx.get_num_patches()); - // // stats(rx); - // coords->move(DEVICE, HOST); - // edge_status->move(DEVICE, HOST); - // rx.update_polyscope(); - // auto ps_mesh = rx.get_polyscope_mesh(); - // ps_mesh->updateVertexPositions(*coords); - // ps_mesh->setEnabled(false); - // - // ps_mesh->addEdgeScalarQuantity("EdgeStatus", *edge_status); - // - // rx.render_vertex_patch(); - // rx.render_edge_patch(); - // rx.render_face_patch()->setEnabled(false); - // - // polyscope::show(); - //} - } - - int remaining_work = is_done(rx, edge_status, d_buffer); - - if (remaining_work == 0 || prv_remaining_work == remaining_work) { - break; - } - prv_remaining_work = remaining_work; - // RXMESH_INFO("num_flips {}, time {}", - // num_flips, - // app_time + slice_time + cleanup_time); - } - timer.stop(); - // RXMESH_INFO("total num_flips {}", num_flips); - RXMESH_INFO("num_outer_iter {}", num_outer_iter); - RXMESH_INFO("num_inner_iter {}", num_inner_iter); - RXMESH_INFO("Flip total time {} (ms)", timer.elapsed_millis()); - RXMESH_INFO("Flip time {} (ms)", app_time); - RXMESH_INFO("Flip slice timer {} (ms)", slice_time); - RXMESH_INFO("Flip cleanup timer {} (ms)", cleanup_time); - - flip_time_ms += timer.elapsed_millis(); -} - -template -inline void tangential_relaxation(rxmesh::RXMeshDynamic& rx, - rxmesh::VertexAttribute* coords, - rxmesh::VertexAttribute* new_coords) -{ - using namespace rxmesh; - - constexpr uint32_t blockThreads = 384; - - LaunchBox launch_box; - rx.update_launch_box({Op::VV}, - launch_box, - (void*)vertex_smoothing, - false, - true); - - GPUTimer app_timer; - app_timer.start(); - vertex_smoothing - <<>>(rx.get_context(), *coords, *new_coords); - app_timer.stop(); - smoothing_time_ms += app_timer.elapsed_millis(); - RXMESH_INFO("Relax time {} (ms)", app_timer.elapsed_millis()); -} inline void remesh_rxmesh(rxmesh::RXMeshDynamic& rx) { @@ -725,7 +235,35 @@ inline void remesh_rxmesh(rxmesh::RXMeshDynamic& rx) int* d_buffer; CUDA_ERROR(cudaMallocManaged((void**)&d_buffer, sizeof(int))); + auto edge_link = rx.add_edge_attribute("edgeLink", 1); + + // edge_link->move(DEVICE, HOST); + // rx.get_polyscope_mesh()->addEdgeScalarQuantity("edgeLink", *edge_link); + // compute stats + + Timers timers; + + timers.add("Total"); + + timers.add("SplitTotal"); + timers.add("Split"); + timers.add("SplitCleanup"); + timers.add("SplitSlice"); + + timers.add("CollapseTotal"); + timers.add("Collapse"); + timers.add("CollapseCleanup"); + timers.add("CollapseSlice"); + + timers.add("FlipTotal"); + timers.add("Flip"); + timers.add("FlipCleanup"); + timers.add("FlipSlice"); + + timers.add("SmoothTotal"); + + Stats stats; compute_stats( rx, coords.get(), edge_len.get(), vertex_valence.get(), stats); @@ -756,42 +294,47 @@ inline void remesh_rxmesh(rxmesh::RXMeshDynamic& rx) // stats(rx); - split_time_ms = 0; - collapse_time_ms = 0; - flip_time_ms = 0; - smoothing_time_ms = 0; - - GPUTimer timer; - timer.start(); - + timers.start("Total"); for (uint32_t iter = 0; iter < Arg.num_iter; ++iter) { RXMESH_INFO(" Edge Split -- iter {}", iter); - split_long_edges( - rx, coords.get(), edge_status.get(), high_edge_len_sq, d_buffer); + split_long_edges(rx, + coords.get(), + edge_status.get(), + high_edge_len_sq, + low_edge_len_sq, + timers, + d_buffer); RXMESH_INFO(" Edge Collapse -- iter {}", iter); collapse_short_edges(rx, coords.get(), edge_status.get(), + edge_link.get(), low_edge_len_sq, high_edge_len_sq, + timers, d_buffer); RXMESH_INFO(" Edge Flip -- iter {}", iter); - equalize_valences( - rx, coords.get(), v_valence.get(), edge_status.get(), d_buffer); + equalize_valences(rx, + coords.get(), + v_valence.get(), + edge_status.get(), + edge_link.get(), + timers, + d_buffer); RXMESH_INFO(" Vertex Smoothing -- iter {}", iter); - tangential_relaxation(rx, coords.get(), new_coords.get()); + tangential_relaxation(rx, coords.get(), new_coords.get(), timers); std::swap(new_coords, coords); } - timer.stop(); + timers.stop("Total"); CUDA_ERROR(cudaDeviceSynchronize()); CUDA_ERROR(cudaGetLastError()); - RXMESH_INFO("remesh_rxmesh() took {} (ms)", timer.elapsed_millis()); + RXMESH_INFO("remesh_rxmesh() took {} (ms)", timers.elapsed_millis("Total")); rx.update_host(); coords->move(DEVICE, HOST); @@ -802,7 +345,7 @@ inline void remesh_rxmesh(rxmesh::RXMeshDynamic& rx) RXMESH_INFO("Output mesh #Faces {}", rx.get_num_faces()); RXMESH_INFO("Output mesh #Patches {}", rx.get_num_patches()); - report.add_member("total_remesh_time", timer.elapsed_millis()); + report.add_member("total_remesh_time", timers.elapsed_millis("Total")); report.model_data(Arg.obj_file_name + "_after", rx, "model_after"); compute_stats( @@ -832,10 +375,12 @@ inline void remesh_rxmesh(rxmesh::RXMeshDynamic& rx) vertex_valence->get_memory_mg()); - report.add_member("split_time_ms", split_time_ms); - report.add_member("collapse_time_ms", collapse_time_ms); - report.add_member("flip_time_ms", flip_time_ms); - report.add_member("smoothing_time_ms", smoothing_time_ms); + report.add_member("split_time_ms", timers.elapsed_millis("SplitTotal")); + report.add_member("collapse_time_ms", + timers.elapsed_millis("CollapseTotal")); + report.add_member("flip_time_ms", timers.elapsed_millis("FlipTotal")); + report.add_member("smoothing_time_ms", + timers.elapsed_millis("SmoothTotal")); EXPECT_TRUE(rx.validate()); // rx.export_obj("remesh.obj", *coords); diff --git a/apps/Remesh/smoothing.cuh b/apps/Remesh/smoothing.cuh new file mode 100644 index 00000000..726a8f7a --- /dev/null +++ b/apps/Remesh/smoothing.cuh @@ -0,0 +1,120 @@ +#pragma once +#include + +#include "rxmesh/cavity_manager.cuh" +#include "rxmesh/rxmesh_dynamic.h" + +#include "util.cuh" + + +template +__global__ static void __launch_bounds__(blockThreads) + vertex_smoothing(const rxmesh::Context context, + const rxmesh::VertexAttribute coords, + rxmesh::VertexAttribute new_coords) +{ + // VV to compute vertex sum and normal + using namespace rxmesh; + auto block = cooperative_groups::this_thread_block(); + + auto smooth = [&](VertexHandle v_id, VertexIterator& iter) { + if (iter.size() == 0) { + return; + } + + const vec3 v(coords(v_id, 0), coords(v_id, 1), coords(v_id, 2)); + + // compute both vertex normal and the new position + // the new position is the average of the one-ring + // while we iterate on the one ring to compute this new position, we + // also compute the vertex normal + // finally, we project the new position on the tangent plane of the + // vertex (old position) + + // this is the last vertex in the one-ring (before r_id) + VertexHandle q_id = iter.back(); + vec3 q(coords(q_id, 0), coords(q_id, 1), coords(q_id, 2)); + + vec3 new_v(0.0, 0.0, 0.0); + vec3 v_normal(0.0, 0.0, 0.0); + + T w = 0.0; + + for (uint32_t i = 0; i < iter.size(); ++i) { + // the current one ring vertex + const VertexHandle r_id = iter[i]; + + const vec3 r(coords(r_id, 0), coords(r_id, 1), coords(r_id, 2)); + + vec3 c = glm::cross(q - v, r - v); + + const T area = glm::length(c) / T(2.0); + w += area; + + if (glm::length2(c) > 1e-6) { + c = glm::normalize(c); + } + + const vec3 n = c * area; + + v_normal += n; + + new_v += r; + + q_id = r_id; + q = r; + } + new_v /= T(iter.size()); + + assert(w > 0); + + v_normal /= w; + + if (glm::length2(v_normal) < 1e-6) { + new_v = v; + } else { + v_normal = glm::normalize(v_normal); + + new_v = new_v + (glm::dot(v_normal, (v - new_v)) * v_normal); + } + + assert(!isnan(new_v[0])); + assert(!isnan(new_v[1])); + assert(!isnan(new_v[2])); + + new_coords(v_id, 0) = new_v[0]; + new_coords(v_id, 1) = new_v[1]; + new_coords(v_id, 2) = new_v[2]; + }; + + Query query(context); + ShmemAllocator shrd_alloc; + query.dispatch(block, shrd_alloc, smooth, true); +} + +template +inline void tangential_relaxation(rxmesh::RXMeshDynamic& rx, + rxmesh::VertexAttribute* coords, + rxmesh::VertexAttribute* new_coords, + rxmesh::Timers timers) +{ + using namespace rxmesh; + + constexpr uint32_t blockThreads = 384; + + LaunchBox launch_box; + rx.update_launch_box({Op::VV}, + launch_box, + (void*)vertex_smoothing, + false, + true); + + timers.start("SmoothTotal"); + vertex_smoothing + <<>>(rx.get_context(), *coords, *new_coords); + timers.stop("SmoothTotal"); + + RXMESH_INFO("Relax time {} (ms)", timers.elapsed_millis("SmoothTotal")); +} \ No newline at end of file diff --git a/apps/Remesh/split.cuh b/apps/Remesh/split.cuh new file mode 100644 index 00000000..c1fda94b --- /dev/null +++ b/apps/Remesh/split.cuh @@ -0,0 +1,348 @@ +#pragma once +#include + +#include "rxmesh/cavity_manager.cuh" +#include "rxmesh/rxmesh_dynamic.h" + +#include "util.cuh" + +template +__global__ static void edge_split(rxmesh::Context context, + const rxmesh::VertexAttribute coords, + rxmesh::EdgeAttribute edge_status, + const T high_edge_len_sq, + const T low_edge_len_sq, + int* d_buffer) +{ + // EV for calc edge len + + using namespace rxmesh; + + auto block = cooperative_groups::this_thread_block(); + + ShmemAllocator shrd_alloc; + + CavityManager cavity( + block, context, shrd_alloc, true); + + + //__shared__ int s_num_splits; + // if (threadIdx.x == 0) { + // s_num_splits = 0; + //} + if (cavity.patch_id() == INVALID32) { + return; + } + Bitmask is_updated(cavity.patch_info().edges_capacity[0], shrd_alloc); + + uint32_t shmem_before = shrd_alloc.get_allocated_size_bytes(); + + auto should_split = [&](const EdgeHandle& eh, const VertexIterator& iter) { + // iter[0] and iter[2] are the edge two vertices + // iter[1] and iter[3] are the two opposite vertices + // 0 + // / | \ + // 3 | 1 + // \ | / + // 2 + assert(iter.size() == 4); + + if (edge_status(eh) == UNSEEN) { + const VertexHandle va = iter[0]; + const VertexHandle vb = iter[2]; + + const VertexHandle vc = iter[1]; + const VertexHandle vd = iter[3]; + + // don't split boundary edges + if (vc.is_valid() && vd.is_valid() && va.is_valid() && + vb.is_valid()) { + // degenerate cases + if (va == vb || vb == vc || vc == va || va == vd || vb == vd || + vc == vd) { + edge_status(eh) = OKAY; + return; + } + const vec3 pa(coords(va, 0), coords(va, 1), coords(va, 2)); + const vec3 pb(coords(vb, 0), coords(vb, 1), coords(vb, 2)); + + const T edge_len = glm::distance2(pa, pb); + + if (edge_len > high_edge_len_sq) { + + vec3 p_new = (pa + pb) * T(0.5); + + vec3 pc(coords(vc, 0), coords(vc, 1), coords(vc, 2)); + vec3 pd(coords(vd, 0), coords(vd, 1), coords(vd, 2)); + + T min_new_edge_len = std::numeric_limits::max(); + + min_new_edge_len = + std::max(min_new_edge_len, glm::distance2(p_new, pa)); + min_new_edge_len = + std::max(min_new_edge_len, glm::distance2(p_new, pb)); + min_new_edge_len = + std::max(min_new_edge_len, glm::distance2(p_new, pc)); + min_new_edge_len = + std::max(min_new_edge_len, glm::distance2(p_new, pd)); + + if (min_new_edge_len >= low_edge_len_sq) { + // printf("\n to_split = %u, %u", + // eh.patch_id(), + // eh.local_id()); + cavity.create(eh); + } else { + edge_status(eh) = OKAY; + } + } else { + edge_status(eh) = OKAY; + } + } + } + }; + + Query query(context, cavity.patch_id()); + query.dispatch(block, shrd_alloc, should_split); + block.sync(); + + shrd_alloc.dealloc(shrd_alloc.get_allocated_size_bytes() - shmem_before); + + if (cavity.prologue(block, shrd_alloc, coords, edge_status)) { + + if (threadIdx.x == 0) { + printf("\n Patch %u cavity.prologue", cavity.patch_id()); + } + + is_updated.reset(block); + block.sync(); + + cavity.for_each_cavity(block, [&](uint16_t c, uint16_t size) { + assert(size == 4); + + const VertexHandle v0 = cavity.get_cavity_vertex(c, 0); + const VertexHandle v1 = cavity.get_cavity_vertex(c, 2); + + const VertexHandle new_v = cavity.add_vertex(); + + const EdgeHandle src = cavity.template get_creator(c); + // printf("\n src = %u, %u", src.patch_id(), src.local_id()); + + if (new_v.is_valid()) { + + coords(new_v, 0) = (coords(v0, 0) + coords(v1, 0)) * T(0.5); + coords(new_v, 1) = (coords(v0, 1) + coords(v1, 1)) * T(0.5); + coords(new_v, 2) = (coords(v0, 2) + coords(v1, 2)) * T(0.5); + + DEdgeHandle e0 = + cavity.add_edge(new_v, cavity.get_cavity_vertex(c, 0)); + const DEdgeHandle e_init = e0; + + if (e0.is_valid()) { + is_updated.set(e0.local_id(), true); + //::atomicAdd(&s_num_splits, 1); + for (uint16_t i = 0; i < size; ++i) { + const DEdgeHandle e = cavity.get_cavity_edge(c, i); + + // is_updated.set(e.local_id(), true); + + const DEdgeHandle e1 = + (i == size - 1) ? + e_init.get_flip_dedge() : + cavity.add_edge( + cavity.get_cavity_vertex(c, i + 1), new_v); + if (!e1.is_valid()) { + break; + } + + is_updated.set(e1.local_id(), true); + + const FaceHandle f = cavity.add_face(e0, e, e1); + if (!f.is_valid()) { + break; + } + e0 = e1.get_flip_dedge(); + } + } + } + }); + } + + + cavity.epilogue(block); + block.sync(); + + + if (cavity.is_successful()) { + if (threadIdx.x == 0) { + printf("\n ** Patch %u passed", cavity.patch_id()); + } + // if (threadIdx.x == 0) { + // ::atomicAdd(d_buffer, s_num_splits); + //} + for_each_edge(cavity.patch_info(), [&](EdgeHandle eh) { + if (is_updated(eh.local_id())) { + edge_status(eh) = ADDED; + } + }); + } + + + //{ + // block.sync(); + // if (threadIdx.x == 0) { + // printf("\n done patch =%u, successful= %d", + // cavity.patch_id(), + // int(cavity.is_successful())); + // + // const PatchInfo& pi = cavity.patch_info(); + // for (uint16_t e = 0; e < pi.num_edges[0]; e++) { + // LocalEdgeT le(e); + // if (!pi.is_owned(le) && !pi.is_deleted(le)) { + // EdgeHandle eh = pi.template find(e); + // if (!eh.is_valid()) { + // printf( + // "\n $$$ B=%u, T= %u, found an edge - patch= %u, " + // "edge = %u", + // blockIdx.x, + // threadIdx.x, + // pi.patch_id, + // e); + // } + // } + // } + // } + // block.sync(); + //} + +} + + +template +inline void split_long_edges(rxmesh::RXMeshDynamic& rx, + rxmesh::VertexAttribute* coords, + rxmesh::EdgeAttribute* edge_status, + const T high_edge_len_sq, + const T low_edge_len_sq, + rxmesh::Timers timers, + int* d_buffer) +{ + using namespace rxmesh; + + constexpr uint32_t blockThreads = 512; + + + edge_status->reset(UNSEEN, DEVICE); + + int prv_remaining_work = rx.get_num_edges(); + + int num_outer_iter = 0; + int num_inner_iter = 0; + + timers.start("SplitTotal"); + + while (true) { + num_outer_iter++; + rx.reset_scheduler(); + + while (!rx.is_queue_empty()) { + num_inner_iter++; + + // RXMESH_INFO(" Queue size = {}", + // rx.get_context().m_patch_scheduler.size()); + + LaunchBox launch_box; + rx.prepare_launch_box({Op::EVDiamond}, + launch_box, + (void*)edge_split, + true, + false, + false, + false, + [&](uint32_t v, uint32_t e, uint32_t f) { + return detail::mask_num_bytes(e) + + ShmemAllocator::default_alignment; + }); + + timers.start("Split"); + edge_split + <<>>(rx.get_context(), + *coords, + *edge_status, + high_edge_len_sq, + low_edge_len_sq, + d_buffer); + CUDA_ERROR(cudaDeviceSynchronize()); + timers.stop("Split"); + + timers.start("SplitCleanup"); + rx.cleanup(); + CUDA_ERROR(cudaDeviceSynchronize()); + timers.stop("SplitCleanup"); + + timers.start("SplitSlice"); + rx.slice_patches(*coords, *edge_status); + CUDA_ERROR(cudaDeviceSynchronize()); + timers.stop("SplitSlice"); + + timers.start("SplitCleanup"); + rx.cleanup(); + CUDA_ERROR(cudaDeviceSynchronize()); + timers.stop("SplitCleanup"); + + bool show = true; + if (show) { + + rx.update_host(); + EXPECT_TRUE(rx.validate()); + + // screen_shot(rx, coords, "Split"); + // stats(rx); + + RXMESH_INFO(" "); + RXMESH_INFO("#Vertices {}", rx.get_num_vertices()); + RXMESH_INFO("#Edges {}", rx.get_num_edges()); + RXMESH_INFO("#Faces {}", rx.get_num_faces()); + RXMESH_INFO("#Patches {}", rx.get_num_patches()); + + coords->move(DEVICE, HOST); + edge_status->move(DEVICE, HOST); + rx.update_polyscope(); + auto ps_mesh = rx.get_polyscope_mesh(); + ps_mesh->updateVertexPositions(*coords); + ps_mesh->setEnabled(false); + + ps_mesh->addEdgeScalarQuantity("EdgeStatus", *edge_status); + + rx.render_vertex_patch(); + rx.render_edge_patch(); + rx.render_face_patch()->setEnabled(false); + + polyscope::show(); + } + } + + int remaining_work = is_done(rx, edge_status, d_buffer); + + if (remaining_work == 0 || prv_remaining_work == remaining_work) { + break; + } + prv_remaining_work = remaining_work; + // RXMESH_INFO("num_splits {}, time {}", + // num_splits, + // app_time + slice_time + cleanup_time); + } + timers.stop("SplitTotal"); + + // RXMESH_INFO("total num_splits {}", num_splits); + RXMESH_INFO("num_outer_iter {}", num_outer_iter); + RXMESH_INFO("num_inner_iter {}", num_inner_iter); + RXMESH_INFO("Split total time {} (ms)", + timers.elapsed_millis("SplitTotal")); + RXMESH_INFO("Split time {} (ms)", timers.elapsed_millis("Split")); + RXMESH_INFO("Split slice time {} (ms)", + timers.elapsed_millis("SplitSlice")); + RXMESH_INFO("Split cleanup time {} (ms)", + timers.elapsed_millis("SplitCleanup")); +} diff --git a/apps/Remesh/util.cuh b/apps/Remesh/util.cuh new file mode 100644 index 00000000..9b066a03 --- /dev/null +++ b/apps/Remesh/util.cuh @@ -0,0 +1,68 @@ +#pragma once + +#include "rxmesh/kernels/debug.cuh" +#include "rxmesh/query.cuh" + +#include "link_condition.cuh" + + +using EdgeStatus = int8_t; +enum : EdgeStatus +{ + UNSEEN = 0, // means we have not tested it before for e.g., split/flip/col + OKAY = 1, // means we have tested it and it is okay to skip + UPDATE = 2, // means we should update it i.e., we have tested it before + ADDED = 3, // means it has been added to during the split/flip/collapse +}; + + +template +__global__ static void stats_kernel(const rxmesh::Context context, + const rxmesh::VertexAttribute coords, + rxmesh::EdgeAttribute edge_len, + rxmesh::VertexAttribute vertex_valence) +{ + using namespace rxmesh; + + auto block = cooperative_groups::this_thread_block(); + + ShmemAllocator shrd_alloc; + + auto compute_edge_len = [&](const EdgeHandle eh, const VertexIterator& ev) { + const vec3 v0(coords(ev[0], 0), coords(ev[0], 1), coords(ev[0], 2)); + const vec3 v1(coords(ev[1], 0), coords(ev[1], 1), coords(ev[1], 2)); + + T len = glm::distance(v0, v1); + + edge_len(eh) = len; + }; + + Query query(context); + query.compute_vertex_valence(block, shrd_alloc); + query.dispatch(block, shrd_alloc, compute_edge_len); + + for_each_vertex(query.get_patch_info(), [&](const VertexHandle vh) { + vertex_valence(vh) = query.vertex_valence(vh); + }); +} + +int is_done(const rxmesh::RXMeshDynamic& rx, + const rxmesh::EdgeAttribute* edge_status, + int* d_buffer) +{ + using namespace rxmesh; + + // if there is at least one edge that is UNSEEN, then we are not done yet + CUDA_ERROR(cudaMemset(d_buffer, 0, sizeof(int))); + + rx.for_each_edge( + DEVICE, + [edge_status = *edge_status, d_buffer] __device__(const EdgeHandle eh) { + if (edge_status(eh) == UNSEEN || edge_status(eh) == UPDATE) { + ::atomicAdd(d_buffer, 1); + } + }); + + CUDA_ERROR(cudaDeviceSynchronize()); + return d_buffer[0]; +} diff --git a/include/rxmesh/util/timer.h b/include/rxmesh/util/timer.h index c5c06adb..7f2c4e98 100644 --- a/include/rxmesh/util/timer.h +++ b/include/rxmesh/util/timer.h @@ -1,8 +1,10 @@ #pragma once #include +#include #include "rxmesh/util/macros.h" + namespace rxmesh { struct GPUTimer @@ -65,4 +67,41 @@ struct CPUTimer std::chrono::high_resolution_clock::time_point m_start; std::chrono::high_resolution_clock::time_point m_stop; }; + +template +struct Timers +{ + Timers() = default; + ~Timers() = default; + + void add(std::string name) + { + m_timers.insert(std::make_pair(name, std::make_shared())); + m_total_time.insert(std::make_pair(name, 0)); + } + + void start(std::string name) + { + m_timers.at(name)->start(); + } + + void stop(std::string name) + { + m_timers.at(name)->stop(); + + float new_time = + m_total_time.at(name) + m_timers.at(name)->elapsed_millis(); + + m_total_time.insert_or_assign(name, new_time); + + } + + float elapsed_millis(std::string name) + { + return m_total_time.at(name); + } + + std::unordered_map> m_timers; + std::unordered_map m_total_time; +}; } // namespace rxmesh \ No newline at end of file From 5fd1bd32fc1aba696a4ce28a263ecc97a7871d6d Mon Sep 17 00:00:00 2001 From: ahmed Date: Wed, 31 Jul 2024 22:28:42 -0400 Subject: [PATCH 72/96] refactor remeshing app --- apps/Remesh/split.cuh | 25 ++++++++++++------------- include/rxmesh/rxmesh.cpp | 2 +- 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/apps/Remesh/split.cuh b/apps/Remesh/split.cuh index c1fda94b..3c51bfd4 100644 --- a/apps/Remesh/split.cuh +++ b/apps/Remesh/split.cuh @@ -213,7 +213,6 @@ __global__ static void edge_split(rxmesh::Context context, // } // block.sync(); //} - } @@ -251,17 +250,17 @@ inline void split_long_edges(rxmesh::RXMeshDynamic& rx, // rx.get_context().m_patch_scheduler.size()); LaunchBox launch_box; - rx.prepare_launch_box({Op::EVDiamond}, - launch_box, - (void*)edge_split, - true, - false, - false, - false, - [&](uint32_t v, uint32_t e, uint32_t f) { - return detail::mask_num_bytes(e) + - ShmemAllocator::default_alignment; - }); + rx.update_launch_box({Op::EVDiamond}, + launch_box, + (void*)edge_split, + true, + false, + false, + false, + [&](uint32_t v, uint32_t e, uint32_t f) { + return detail::mask_num_bytes(e) + + ShmemAllocator::default_alignment; + }); timers.start("Split"); edge_split @@ -291,7 +290,7 @@ inline void split_long_edges(rxmesh::RXMeshDynamic& rx, CUDA_ERROR(cudaDeviceSynchronize()); timers.stop("SplitCleanup"); - bool show = true; + bool show = false; if (show) { rx.update_host(); diff --git a/include/rxmesh/rxmesh.cpp b/include/rxmesh/rxmesh.cpp index d3add26f..f132a6b0 100644 --- a/include/rxmesh/rxmesh.cpp +++ b/include/rxmesh/rxmesh.cpp @@ -22,7 +22,7 @@ RXMesh::RXMesh() m_input_max_edge_incident_faces(0), m_input_max_face_adjacent_faces(0), m_num_patches(0), - m_patch_size(512), + m_patch_size(256), m_is_input_edge_manifold(true), m_is_input_closed(true), m_h_vertex_prefix(nullptr), From a38012ff071b409fe2b6f3c180ae25fc998d4e76 Mon Sep 17 00:00:00 2001 From: ahmed Date: Thu, 1 Aug 2024 10:03:39 -0400 Subject: [PATCH 73/96] bug fix in `identify_boundary_vertices` --- include/rxmesh/kernels/boundary.cuh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/rxmesh/kernels/boundary.cuh b/include/rxmesh/kernels/boundary.cuh index 44af7fce..0d255da4 100644 --- a/include/rxmesh/kernels/boundary.cuh +++ b/include/rxmesh/kernels/boundary.cuh @@ -19,6 +19,8 @@ __global__ void identify_boundary_vertices(const Context context, ShmemAllocator shrd_alloc; Bitmask bd_e(query.get_patch_info().num_edges[0], shrd_alloc); + bd_e.reset(block); + block.sync(); auto boundary_edges = [&](EdgeHandle& e_id, const FaceIterator& iter) { if (iter.size() < 2) { From eb069e5d5958e416c2d3b93189a0409afa067ad7 Mon Sep 17 00:00:00 2001 From: ahmed Date: Thu, 1 Aug 2024 20:43:48 -0400 Subject: [PATCH 74/96] SKIP --- apps/Remesh/collapse.cuh | 14 +++++++------- apps/Remesh/flip.cuh | 8 ++++---- apps/Remesh/split.cuh | 6 +++--- apps/Remesh/util.cuh | 2 +- apps/SurfaceTracking/collapser.cuh | 6 +++--- apps/SurfaceTracking/flipper.cuh | 4 ++-- apps/SurfaceTracking/splitter.cuh | 2 +- apps/SurfaceTracking/tracking_rxmesh.cuh | 2 +- 8 files changed, 22 insertions(+), 22 deletions(-) diff --git a/apps/Remesh/collapse.cuh b/apps/Remesh/collapse.cuh index 66f3899e..8340b1f1 100644 --- a/apps/Remesh/collapse.cuh +++ b/apps/Remesh/collapse.cuh @@ -103,7 +103,7 @@ __global__ static void __launch_bounds__(blockThreads) if (edge_mask(eh.local_id())) { cavity.create(eh); } else { - edge_status(eh) = OKAY; + edge_status(eh) = SKIP; } }); block.sync(); @@ -154,13 +154,13 @@ __global__ static void __launch_bounds__(blockThreads) // roll back cavity.recover(src); - // mark this edge as OKAY because 1) if all cavities in this + // mark this edge as SKIP because 1) if all cavities in this // patch are successful, then we want to indicate that this // edge is okay and should not be attempted again // 2) if we have to rollback all changes in this patch, we still // don't want to attempt this edge since we know that it creates // short edges - edge_status(src) = OKAY; + edge_status(src) = SKIP; } else { const VertexHandle new_v = cavity.add_vertex(); @@ -296,7 +296,7 @@ __global__ static void __launch_bounds__(blockThreads) // degenerate cases if (v0 == v1 || v0 == v2 || v0 == v3 || v1 == v2 || v1 == v3 || v2 == v3) { - edge_status(eh) = OKAY; + edge_status(eh) = SKIP; return; } @@ -370,7 +370,7 @@ __global__ static void __launch_bounds__(blockThreads) if (e_collapse(eh.local_id())) { cavity.create(eh); } else { - edge_status(eh) = OKAY; + edge_status(eh) = SKIP; } }); block.sync(); @@ -418,13 +418,13 @@ __global__ static void __launch_bounds__(blockThreads) // roll back cavity.recover(src); - // mark this edge as OKAY because 1) if all cavities in this + // mark this edge as SKIP because 1) if all cavities in this // patch are successful, then we want to indicate that this // edge is okay and should not be attempted again // 2) if we have to rollback all changes in this patch, we still // don't want to attempt this edge since we know that it creates // short edges - edge_status(src) = OKAY; + edge_status(src) = SKIP; } else { const VertexHandle new_v = cavity.add_vertex(); diff --git a/apps/Remesh/flip.cuh b/apps/Remesh/flip.cuh index 15c6ebbb..e67b6d6f 100644 --- a/apps/Remesh/flip.cuh +++ b/apps/Remesh/flip.cuh @@ -133,7 +133,7 @@ __global__ static void __launch_bounds__(blockThreads) if (edge_mask(eh.local_id())) { cavity.create(eh); } else { - edge_status(eh) = OKAY; + edge_status(eh) = SKIP; } }); block.sync(); @@ -235,7 +235,7 @@ __global__ static void __launch_bounds__(blockThreads) if (iter[0] == iter[1] || iter[0] == iter[2] || iter[0] == iter[3] || iter[1] == iter[2] || iter[1] == iter[3] || iter[2] == iter[3]) { - edge_status(eh) = OKAY; + edge_status(eh) = SKIP; return; } @@ -289,7 +289,7 @@ __global__ static void __launch_bounds__(blockThreads) } } } else { - edge_status(eh) = OKAY; + edge_status(eh) = SKIP; } } }; @@ -325,7 +325,7 @@ __global__ static void __launch_bounds__(blockThreads) if (e_flip(eh.local_id())) { cavity.create(eh); } else { - edge_status(eh) = OKAY; + edge_status(eh) = SKIP; } }); block.sync(); diff --git a/apps/Remesh/split.cuh b/apps/Remesh/split.cuh index 3c51bfd4..4f471984 100644 --- a/apps/Remesh/split.cuh +++ b/apps/Remesh/split.cuh @@ -60,7 +60,7 @@ __global__ static void edge_split(rxmesh::Context context, // degenerate cases if (va == vb || vb == vc || vc == va || va == vd || vb == vd || vc == vd) { - edge_status(eh) = OKAY; + edge_status(eh) = SKIP; return; } const vec3 pa(coords(va, 0), coords(va, 1), coords(va, 2)); @@ -92,10 +92,10 @@ __global__ static void edge_split(rxmesh::Context context, // eh.local_id()); cavity.create(eh); } else { - edge_status(eh) = OKAY; + edge_status(eh) = SKIP; } } else { - edge_status(eh) = OKAY; + edge_status(eh) = SKIP; } } } diff --git a/apps/Remesh/util.cuh b/apps/Remesh/util.cuh index 9b066a03..c19cbc11 100644 --- a/apps/Remesh/util.cuh +++ b/apps/Remesh/util.cuh @@ -10,7 +10,7 @@ using EdgeStatus = int8_t; enum : EdgeStatus { UNSEEN = 0, // means we have not tested it before for e.g., split/flip/col - OKAY = 1, // means we have tested it and it is okay to skip + SKIP = 1, // means we have tested it and it is okay to skip UPDATE = 2, // means we should update it i.e., we have tested it before ADDED = 3, // means it has been added to during the split/flip/collapse }; diff --git a/apps/SurfaceTracking/collapser.cuh b/apps/SurfaceTracking/collapser.cuh index 1589888a..4d2dba6f 100644 --- a/apps/SurfaceTracking/collapser.cuh +++ b/apps/SurfaceTracking/collapser.cuh @@ -162,7 +162,7 @@ __global__ static void __launch_bounds__(blockThreads) if (edge_mask(eh.local_id())) { cavity.create(eh); } else { - edge_status(eh) = OKAY; + edge_status(eh) = SKIP; } }); block.sync(); @@ -238,13 +238,13 @@ __global__ static void __launch_bounds__(blockThreads) if (is_bad) { // roll back cavity.recover(src); - // mark this edge as OKAY because 1) if all cavities in this + // mark this edge as SKIP because 1) if all cavities in this // patch are successful, then we want to indicate that this // edge is okay and should not be attempted again // 2) if we have to rollback all changes in this patch, we still // don't want to attempt this edge since we know that it creates // short edges - edge_status(src) = OKAY; + edge_status(src) = SKIP; } else { const VertexHandle new_v = cavity.add_vertex(); diff --git a/apps/SurfaceTracking/flipper.cuh b/apps/SurfaceTracking/flipper.cuh index 560b2ed9..583f3090 100644 --- a/apps/SurfaceTracking/flipper.cuh +++ b/apps/SurfaceTracking/flipper.cuh @@ -162,7 +162,7 @@ __global__ static void __launch_bounds__(blockThreads) // check if ah or bh is boundary if (is_vertex_bd(ah) == 1 || is_vertex_bd(bh) == 1) { - edge_status(eh) = OKAY; + edge_status(eh) = SKIP; return; } @@ -318,7 +318,7 @@ __global__ static void __launch_bounds__(blockThreads) if (edge_mask(eh.local_id())) { cavity.create(eh); } else { - edge_status(eh) = OKAY; + edge_status(eh) = SKIP; } }); block.sync(); diff --git a/apps/SurfaceTracking/splitter.cuh b/apps/SurfaceTracking/splitter.cuh index 49805781..fc0cb418 100644 --- a/apps/SurfaceTracking/splitter.cuh +++ b/apps/SurfaceTracking/splitter.cuh @@ -165,7 +165,7 @@ __global__ static void __launch_bounds__(blockThreads) cavity.create(eh); } } else { - edge_status(eh) = OKAY; + edge_status(eh) = SKIP; } } }; diff --git a/apps/SurfaceTracking/tracking_rxmesh.cuh b/apps/SurfaceTracking/tracking_rxmesh.cuh index b708e8af..a62ac64b 100644 --- a/apps/SurfaceTracking/tracking_rxmesh.cuh +++ b/apps/SurfaceTracking/tracking_rxmesh.cuh @@ -13,7 +13,7 @@ using EdgeStatus = int8_t; enum : EdgeStatus { UNSEEN = 0, // means we have not tested it before for e.g., split/flip/col - OKAY = 1, // means we have tested it and it is okay to skip + SKIP = 1, // means we have tested it and it is okay to skip UPDATE = 2, // means we should update it i.e., we have tested it before ADDED = 3, // means it has been added to during the split/flip/collapse }; From a2168ad015d798d8775ac45e740b28c1d7534c6b Mon Sep 17 00:00:00 2001 From: ahmed Date: Thu, 1 Aug 2024 20:44:01 -0400 Subject: [PATCH 75/96] Pass abstraction --- include/rxmesh/pass.h | 112 +++++++++++++++++++++++++++++++++ include/rxmesh/rxmesh_static.h | 54 +++++++++++++++- 2 files changed, 165 insertions(+), 1 deletion(-) create mode 100644 include/rxmesh/pass.h diff --git a/include/rxmesh/pass.h b/include/rxmesh/pass.h new file mode 100644 index 00000000..83bf9b9f --- /dev/null +++ b/include/rxmesh/pass.h @@ -0,0 +1,112 @@ +#pragma once +#include + +#include "rxmesh/rxmesh_dynamic.h" +#include "rxmesh/util/macros.h" + +namespace rxmesh { + +enum class Status : uint8_t +{ + UNSEEN = 0, // means we have not tested/touched it before + SKIP = 1, // means we have tested it and it is okay to skip + UPDATE = 2, // means we should update it i.e., we have tested it before + ADDED = 3, // means it has been added to during the split/flip/collapse +}; + +/** + * @brief record a pass over all mesh elements of certain type + */ +template +struct Pass +{ + using HandleType = HandleT; + + __host__ Pass(RXMeshDynamic& rx) + { + CUDA_ERROR(cudaMalloc((void**)&m_d_counter, sizeof(uint32_t))); + m_status = *rx.add_attribute("rx:status", 1); + m_status.reset(Status::UNSEEN, DEVICE); + }; + + + __device__ __host__ Pass() = default; + __device__ __host__ Pass(const Pass& other) = default; + __device__ __host__ Pass(Pass&&) = default; + __device__ __host__ Pass& operator=(const Pass&) = default; + __device__ __host__ Pass& operator=(Pass&&) = default; + __device__ __host__ ~Pass() = default; + + __host__ uint32_t remaining_items(const RXMeshDynamic& rx) + { + using namespace rxmesh; + + // if there is at least one edge that is UNSEEN, then we are not done + // yet + CUDA_ERROR(cudaMemset(m_d_counter, 0, sizeof(uint32_t))); + + rx.for_each( + DEVICE, + [status = *m_status, + m_d_counter = d_counter] __device__(const HandleT eh) mutable { + if (status(eh) == Status::UNSEEN || + status(eh) == Status::UPDATE) { + ::atomicAdd(d_counter, 1); + } + }); + + uint32_t h_counter; + + CUDA_ERROR(cudaMemcpy( + &h_counter, m_d_counter, sizeof(uint32_t), cudaMemcpyDeviceToHost)); + + return h_counter; + } + + /** + * @brief reset the status UNSEEN + * @return + */ + __host__ void reset() + { + m_status.reset(Status::UNSEEN, DEVICE); + } + + /** + * @brief set the status of a given mesh element + * @return + */ + __host__ __device__ void set(const HandleT& h, Status status) + { + m_status(h) = status; + } + + /** + * @brief get the status of a given mesh element + */ + __host__ __device__ Status get(const HandleT& h) + { + return m_status(h); + } + + /** + * @brief return status attribute + */ + __host__ __device__ Attribute& get_status_attribute() + { + return m_status; + } + + /** + * @brief free internal memory + */ + __host__ void release() + { + GPU_FREE(m_d_counter); + } + + Attribute m_status; + uint32_t* m_d_counter; +}; + +} // namespace rxmesh \ No newline at end of file diff --git a/include/rxmesh/rxmesh_static.h b/include/rxmesh/rxmesh_static.h index 361084a9..d8e46667 100644 --- a/include/rxmesh/rxmesh_static.h +++ b/include/rxmesh/rxmesh_static.h @@ -612,6 +612,30 @@ class RXMeshStatic : public RXMesh } } + + /** + * @brief same as for_each_vertex/edge/face where the type is defined via + * template parameter + */ + template + void for_each(locationT location, + LambdaT apply, + cudaStream_t stream = NULL, + bool with_omp = true) + { + if constexpr (std::is_same_v) { + for_each_vertex(location, apply, stream, with_omp); + } + + if constexpr (std::is_same_v) { + for_each_edge(location, apply, stream, with_omp); + } + + if constexpr (std::is_same_v) { + for_each_face(location, apply, stream, with_omp); + } + } + /** * @brief populate the launch_box with grid size and dynamic shared memory * needed for kernel launch @@ -982,6 +1006,34 @@ class RXMeshStatic : public RXMesh return ret; } + /** + * @brief similar to add_vertex/edge/face_attribute where the mesh element + * type is defined via template parameter + * @return + */ + template + std::shared_ptr> add_attribute( + const std::string& name, + uint32_t num_attributes, + locationT location = LOCATION_ALL, + layoutT layout = SoA) + { + if constexpr (std::is_same_v) { + return add_vertex_attribute( + name, num_attributes, location, layout); + } + + if constexpr (std::is_same_v) { + return add_edge_attribute( + name, num_attributes, location, layout); + } + + if constexpr (std::is_same_v) { + return add_face_attribute( + name, num_attributes, location, layout); + } + } + /** * @brief Checks if an attribute exists given its name * @param name the attribute name @@ -1284,7 +1336,7 @@ class RXMeshStatic : public RXMesh * Paraview. The VTK supports visualizing attributes on vertices and faces. * Edge attributes are NOT supported. This function uses parameter pack such * that the user can call it with zero, one or move attributes (again should - * be either VertexAttribute or FaceAttribute). + * be either VertexAttribute or FaceAttribute). */ template void export_vtk(const std::string& filename, From 02eb715c8b68c0b249ca81206e0f9296f555a3db Mon Sep 17 00:00:00 2001 From: ahmed Date: Thu, 1 Aug 2024 23:19:39 -0400 Subject: [PATCH 76/96] cleanup --- apps/Remesh/split.cuh | 80 +++++++++++++------------------------------ 1 file changed, 23 insertions(+), 57 deletions(-) diff --git a/apps/Remesh/split.cuh b/apps/Remesh/split.cuh index 4f471984..0df19a47 100644 --- a/apps/Remesh/split.cuh +++ b/apps/Remesh/split.cuh @@ -26,10 +26,6 @@ __global__ static void edge_split(rxmesh::Context context, block, context, shrd_alloc, true); - //__shared__ int s_num_splits; - // if (threadIdx.x == 0) { - // s_num_splits = 0; - //} if (cavity.patch_id() == INVALID32) { return; } @@ -78,18 +74,15 @@ __global__ static void edge_split(rxmesh::Context context, T min_new_edge_len = std::numeric_limits::max(); min_new_edge_len = - std::max(min_new_edge_len, glm::distance2(p_new, pa)); + std::min(min_new_edge_len, glm::distance2(p_new, pa)); min_new_edge_len = - std::max(min_new_edge_len, glm::distance2(p_new, pb)); + std::min(min_new_edge_len, glm::distance2(p_new, pb)); min_new_edge_len = - std::max(min_new_edge_len, glm::distance2(p_new, pc)); + std::min(min_new_edge_len, glm::distance2(p_new, pc)); min_new_edge_len = - std::max(min_new_edge_len, glm::distance2(p_new, pd)); + std::min(min_new_edge_len, glm::distance2(p_new, pd)); if (min_new_edge_len >= low_edge_len_sq) { - // printf("\n to_split = %u, %u", - // eh.patch_id(), - // eh.local_id()); cavity.create(eh); } else { edge_status(eh) = SKIP; @@ -109,10 +102,6 @@ __global__ static void edge_split(rxmesh::Context context, if (cavity.prologue(block, shrd_alloc, coords, edge_status)) { - if (threadIdx.x == 0) { - printf("\n Patch %u cavity.prologue", cavity.patch_id()); - } - is_updated.reset(block); block.sync(); @@ -133,6 +122,21 @@ __global__ static void edge_split(rxmesh::Context context, coords(new_v, 1) = (coords(v0, 1) + coords(v1, 1)) * T(0.5); coords(new_v, 2) = (coords(v0, 2) + coords(v1, 2)) * T(0.5); +#ifndef NDEBUG + // sanity check: we don't introduce small edges + const vec3 p_new( + coords(new_v, 0), coords(new_v, 1), coords(new_v, 2)); + + for (int i = 0; i < 4; ++i) { + + const VertexHandle v = cavity.get_cavity_vertex(c, i); + + const vec3 p(coords(v, 0), coords(v, 1), coords(v, 2)); + + assert(glm::distance2(p_new, p) >= low_edge_len_sq); + } +#endif + DEdgeHandle e0 = cavity.add_edge(new_v, cavity.get_cavity_vertex(c, 0)); const DEdgeHandle e_init = e0; @@ -171,48 +175,13 @@ __global__ static void edge_split(rxmesh::Context context, cavity.epilogue(block); block.sync(); - if (cavity.is_successful()) { - if (threadIdx.x == 0) { - printf("\n ** Patch %u passed", cavity.patch_id()); - } - // if (threadIdx.x == 0) { - // ::atomicAdd(d_buffer, s_num_splits); - //} for_each_edge(cavity.patch_info(), [&](EdgeHandle eh) { if (is_updated(eh.local_id())) { edge_status(eh) = ADDED; } }); } - - - //{ - // block.sync(); - // if (threadIdx.x == 0) { - // printf("\n done patch =%u, successful= %d", - // cavity.patch_id(), - // int(cavity.is_successful())); - // - // const PatchInfo& pi = cavity.patch_info(); - // for (uint16_t e = 0; e < pi.num_edges[0]; e++) { - // LocalEdgeT le(e); - // if (!pi.is_owned(le) && !pi.is_deleted(le)) { - // EdgeHandle eh = pi.template find(e); - // if (!eh.is_valid()) { - // printf( - // "\n $$$ B=%u, T= %u, found an edge - patch= %u, " - // "edge = %u", - // blockIdx.x, - // threadIdx.x, - // pi.patch_id, - // e); - // } - // } - // } - // } - // block.sync(); - //} } @@ -272,22 +241,19 @@ inline void split_long_edges(rxmesh::RXMeshDynamic& rx, high_edge_len_sq, low_edge_len_sq, d_buffer); - CUDA_ERROR(cudaDeviceSynchronize()); + timers.stop("Split"); timers.start("SplitCleanup"); - rx.cleanup(); - CUDA_ERROR(cudaDeviceSynchronize()); + rx.cleanup(); timers.stop("SplitCleanup"); timers.start("SplitSlice"); - rx.slice_patches(*coords, *edge_status); - CUDA_ERROR(cudaDeviceSynchronize()); + rx.slice_patches(*coords, *edge_status); timers.stop("SplitSlice"); timers.start("SplitCleanup"); - rx.cleanup(); - CUDA_ERROR(cudaDeviceSynchronize()); + rx.cleanup(); timers.stop("SplitCleanup"); bool show = false; From 07204adb4a6884179f8e737982575a3e1047ae02 Mon Sep 17 00:00:00 2001 From: Sachin Kishan Date: Fri, 2 Aug 2024 21:12:29 +0530 Subject: [PATCH 77/96] clean up code --- apps/ARAP/arap.cu | 141 ++-------------------------------------------- 1 file changed, 5 insertions(+), 136 deletions(-) diff --git a/apps/ARAP/arap.cu b/apps/ARAP/arap.cu index 4db06828..0cc69f6d 100644 --- a/apps/ARAP/arap.cu +++ b/apps/ARAP/arap.cu @@ -38,36 +38,6 @@ edge_cotan_weight(const rxmesh::VertexHandle& p_id, return std::max(0.f, weight); } - - -template -__global__ static void compute_edge_weights(const rxmesh::Context context, - rxmesh::VertexAttribute coords, - rxmesh::SparseMatrix A_mat) -{ - - auto vn_lambda = [&](VertexHandle vertex_id, VertexIterator& vv) - { - VertexHandle q_id = vv.back(); - - for (uint32_t v = 0; v < vv.size(); ++v) - { - VertexHandle r_id = vv[v]; - T e_weight = 0; - VertexHandle s_id = (v == vv.size() - 1) ? vv[0] : vv[v + 1]; - e_weight = edge_cotan_weight(vertex_id, r_id, q_id, s_id, coords); - A_mat(vertex_id, vv[v]) = e_weight; - A_mat(vertex_id, vv[v]) = 1; - } - - }; - - auto block = cooperative_groups::this_thread_block(); - Query query(context); - ShmemAllocator shrd_alloc; - query.dispatch(block, shrd_alloc, vn_lambda); -} - template __global__ static void compute_edge_weights_evd(const rxmesh::Context context, rxmesh::VertexAttribute coords, @@ -79,9 +49,6 @@ __global__ static void compute_edge_weights_evd(const rxmesh::Context conte e_weight = edge_cotan_weight(vv[0], vv[2], vv[1], vv[3], coords); A_mat(vv[0], vv[2]) = e_weight; A_mat(vv[2], vv[0]) = e_weight; - - //A_mat(vv[0], vv[2]) = 1; - //A_mat(vv[2], vv[0]) = 1; }; @@ -108,10 +75,6 @@ __global__ static void edge_weight_values( query.dispatch(block, shrd_alloc, vn_lambda); } - - -//////// - __host__ __device__ Eigen::Matrix3f calculateSVD(Eigen::Matrix3f S) { Eigen::JacobiSVD svd(S); @@ -158,11 +121,6 @@ __global__ static void calculate_rotation_matrix(const rxmesh::Context contex S = S + w * pi_vector * pi_dash_vector.transpose(); } - // perform svd on S (eigen) - - - // R =VU - Eigen::Matrix3f U; // left singular vectors Eigen::Matrix3f V; // right singular vectors Eigen::Vector3f sing_val; // singular values @@ -191,12 +149,6 @@ __global__ static void calculate_rotation_matrix(const rxmesh::Context contex Eigen::Vector3 new_coords = {current_coords(v_id, 0), current_coords(v_id, 1), current_coords(v_id, 2)}; - /* new_coords = R * new_coords; - - current_coords(v_id, 0) = new_coords[0]; - current_coords(v_id, 1) = new_coords[1]; - current_coords(v_id, 2) = new_coords[2]; - */ }; auto block = cooperative_groups::this_thread_block(); @@ -220,30 +172,11 @@ __global__ static void test_input(const rxmesh::Context context, current_coords(v_id, 0) = ref_coords(v_id, 0); current_coords(v_id, 1) = ref_coords(v_id, 1); current_coords(v_id, 2) = ref_coords(v_id, 2); - /* - if (current_coords(v_id,1)>1.35) { - current_coords(v_id, 1) = current_coords(v_id, 1) + displacement.x(); - current_coords(v_id, 0) =current_coords(v_id, 0) + displacement.z(); - current_coords(v_id, 2) =current_coords(v_id, 2) + displacement.y(); - constrained(v_id, 0) = 1; - } - else { - if (current_coords(v_id, 1) < -0.34) { - constrained(v_id, 0) = 2; - //current_coords(v_id, 0) = current_coords(v_id, 0) + 0.25; - - } - else - constrained(v_id, 0) = 0; - } - */ + //Spot specific input if (current_coords(v_id, 0) < -0.15) { - //current_coords(v_id, 0) =current_coords(v_id, 0) + displacement.x(); - //current_coords(v_id, 1) =current_coords(v_id, 1) + displacement.y(); - //current_coords(v_id, 2) =current_coords(v_id, 2) + displacement.z(); constrained(v_id, 0) = 2; } @@ -257,8 +190,6 @@ __global__ static void test_input(const rxmesh::Context context, else { if (current_coords(v_id, 0) < -0.2) { constrained(v_id, 0) = 0; - // current_coords(v_id, 0) = current_coords(v_id, 0) + 0.25; - } else constrained(v_id, 0) = 0; } @@ -347,8 +278,8 @@ __global__ static void calculate_b( template __global__ static void calculate_system_matrix( const rxmesh::Context context, - rxmesh::SparseMatrix weight_mat, // [num_coord, num_coord] - rxmesh::SparseMatrix L, // [num_coord, num_coord] + rxmesh::SparseMatrix weight_mat, + rxmesh::SparseMatrix L, rxmesh::VertexAttribute constrained) { @@ -392,8 +323,6 @@ int main(int argc, char** argv) cuda_query(device_id); RXMeshStatic rx(STRINGIFY(INPUT_DIR) "spot_low_resolution.obj"); - //RXMeshStatic rx(STRINGIFY(INPUT_DIR) "bumpycube.obj"); - Eigen::Vector3f displacement(1, 1, 1); @@ -485,45 +414,6 @@ int main(int argc, char** argv) //how many times will arap algorithm run? int iterations = 1; - /* - for (int i=0;i - <<>>(rx.get_context(), - ref_vertex_pos, - *changed_vertex_pos, - rot_mat, - weight_matrix); - - //changed_vertex_pos->move(DEVICE, HOST); - calculate_b - <<>>(rx.get_context(), - ref_vertex_pos, - *changed_vertex_pos, - rot_mat, - weight_matrix, - bMatrix, - *constraints); - - bMatrix.move(DEVICE, HOST); - - X_mat = changed_vertex_pos->to_matrix(); - //systemMatrix.solve(bMatrix, *X_mat, Solver::LU, PermuteMethod::NSTDIS); - systemMatrix.solve(bMatrix, *X_mat, Solver::QR, PermuteMethod::NSTDIS); - X_mat->move(DEVICE, HOST); - changed_vertex_pos->from_matrix(X_mat.get()); - - - } - // visualize new position - rx.get_polyscope_mesh()->updateVertexPositions(*changed_vertex_pos); - - */ rx.get_polyscope_mesh()->addVertexScalarQuantity("fixedVertices", *constraints); @@ -544,10 +434,7 @@ int main(int argc, char** argv) if (t > 1.0) flag = 1; } - displacement = (1 - t) * start + - (t) * end; - - //displacement = Eigen::Vector3f(0, 0, 0); + displacement = (1 - t) * start + (t) * end; test_input <<move(rxmesh::DEVICE, rxmesh::HOST); - -// copy the results to attributes -// changed_vertex_pos->from_matrix(X_mat.get()); \ No newline at end of file +} \ No newline at end of file From 7e16c87849a3503edc72890a61b516d427a468bb Mon Sep 17 00:00:00 2001 From: Sachin Kishan Date: Fri, 2 Aug 2024 23:25:21 +0530 Subject: [PATCH 78/96] error causing code --- apps/SCP/scp.cu | 172 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 171 insertions(+), 1 deletion(-) diff --git a/apps/SCP/scp.cu b/apps/SCP/scp.cu index 3f642cf5..56069355 100644 --- a/apps/SCP/scp.cu +++ b/apps/SCP/scp.cu @@ -6,6 +6,103 @@ using namespace rxmesh; +template +__global__ static void compute_area_matrix( + const rxmesh::Context context, + rxmesh::VertexAttribute boundaryVertices, + rxmesh::SparseMatrix AreaMatrix) +{ + + auto vn_lambda = [&](EdgeHandle edge_id, VertexIterator& vv) { + if (boundaryVertices(vv[0], 0) == 1 && boundaryVertices(vv[1], 0) == 1) { + AreaMatrix(vv[0], vv[1]) = make_cuComplex(0, -0.25); // modify later + AreaMatrix(vv[1], vv[0]) = make_cuComplex(0, 0.25); + } + }; + + auto block = cooperative_groups::this_thread_block(); + Query query(context); + ShmemAllocator shrd_alloc; + query.dispatch(block, shrd_alloc, vn_lambda); +} + +template +__device__ __forceinline__ T +edge_cotan_weight(const rxmesh::VertexHandle& p_id, + const rxmesh::VertexHandle& r_id, + const rxmesh::VertexHandle& q_id, + const rxmesh::VertexHandle& s_id, + const rxmesh::VertexAttribute& X) +{ + // Get the edge weight between the two vertices p-r where + // q and s composes the diamond around p-r + + const vec3 p(X(p_id, 0), X(p_id, 1), X(p_id, 2)); + const vec3 r(X(r_id, 0), X(r_id, 1), X(r_id, 2)); + const vec3 q(X(q_id, 0), X(q_id, 1), X(q_id, 2)); + const vec3 s(X(s_id, 0), X(s_id, 1), X(s_id, 2)); + + // cotans[(v1, v2)] =np.dot(e1, e2) / np.linalg.norm(np.cross(e1, e2)) + + float weight = 0; + if (q_id.is_valid()) + weight += dot((p - q), (r - q)) / length(cross(p - q, r - q)); + if (s_id.is_valid()) + weight += dot((p - s), (r - s)) / length(cross(p - s, r - s)); + weight /= 2; + return std::max(0.f, weight); +} + + +template +__global__ static void compute_edge_weights_evd( + const rxmesh::Context context, + rxmesh::VertexAttribute coords, + rxmesh::SparseMatrix A_mat) +{ + + auto vn_lambda = [&](EdgeHandle edge_id, VertexIterator& vv) { + T e_weight = 0; + e_weight = edge_cotan_weight(vv[0], vv[2], vv[1], vv[3], coords); + A_mat(vv[0], vv[2]) = e_weight; + A_mat(vv[2], vv[0]) = e_weight; + }; + + auto block = cooperative_groups::this_thread_block(); + Query query(context); + ShmemAllocator shrd_alloc; + query.dispatch(block, shrd_alloc, vn_lambda); +} + +template +__global__ static void calculate_Ld_matrix( + const rxmesh::Context context, + rxmesh::SparseMatrix weight_mat, // [num_coord, num_coord] + rxmesh::SparseMatrix Ld // [num_coord, num_coord] +) + +{ + auto init_lambda = [&](VertexHandle v_id, VertexIterator& vv) { + Ld(v_id, v_id) = make_cuComplex(0, 0); + + for (int nei_index = 0; nei_index < vv.size(); nei_index++) + Ld(v_id, vv[nei_index]) = make_cuComplex(0, 0); + + for (int nei_index = 0; nei_index < vv.size(); nei_index++) { + //Ld(v_id, v_id) += make_cuComplex(weight_mat(v_id, vv[nei_index]), + // weight_mat(v_id, vv[nei_index])); + + Ld(v_id, vv[nei_index]) = + cuCsubf(Ld(v_id, vv[nei_index]),make_cuComplex(weight_mat(v_id, vv[nei_index]), 0)); + } + }; + + auto block = cooperative_groups::this_thread_block(); + Query query(context); + ShmemAllocator shrd_alloc; + query.dispatch(block, shrd_alloc, init_lambda); +} + int main(int argc, char** argv) { Log::init(); @@ -13,7 +110,80 @@ int main(int argc, char** argv) const uint32_t device_id = 0; cuda_query(device_id); - RXMeshStatic rx(STRINGIFY(INPUT_DIR) "sphere3.obj"); + RXMeshStatic rx(STRINGIFY(INPUT_DIR) "bunnyhead.obj"); + + SparseMatrix Ld(rx); // complex V x V + + SparseMatrix A(rx); // 2V x 2V + + auto boundaryVertices = + *rx.add_vertex_attribute("boundaryVertices", 1); + + auto coords = *rx.get_input_vertex_coordinates(); + + rx.get_boundary_vertices( + boundaryVertices); // 0 or 1 value for boundary vertex + + // identify boundary edge (vv query) + // v1 is central; v2 is on boundary + + + + //for matrix calls + constexpr uint32_t CUDABlockSize = 256; + + rxmesh::LaunchBox launch_box_area; + rx.prepare_launch_box({rxmesh::Op:: EV}, + launch_box_area, + (void*)compute_area_matrix); + + compute_area_matrix + <<>>( + rx.get_context(), boundaryVertices, A); + + + + SparseMatrix weight_matrix(rx); + + // obtain cotangent weight matrix + rxmesh::LaunchBox launch_box; + rx.prepare_launch_box( + {rxmesh::Op::EVDiamond}, + launch_box, + (void*)compute_edge_weights_evd); + + compute_edge_weights_evd + <<>>( + rx.get_context(), coords, weight_matrix); + + + rxmesh::LaunchBox launch_box_ld; + rx.prepare_launch_box( + {rxmesh::Op::VV}, + launch_box_ld, + (void*)calculate_Ld_matrix); + + calculate_Ld_matrix + <<>>( + rx.get_context(), weight_matrix, Ld); + + + + + + + //vertex_normals->move(rxmesh::DEVICE, rxmesh::HOST); + + + + //Lc + #if USE_POLYSCOPE polyscope::show(); From 55171332f8444f86038ac053fd8421197e7e9e12 Mon Sep 17 00:00:00 2001 From: Sachin Kishan Date: Fri, 2 Aug 2024 23:48:17 +0530 Subject: [PATCH 79/96] working matrix set up --- apps/SCP/scp.cu | 59 ++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 49 insertions(+), 10 deletions(-) diff --git a/apps/SCP/scp.cu b/apps/SCP/scp.cu index 56069355..f07adc80 100644 --- a/apps/SCP/scp.cu +++ b/apps/SCP/scp.cu @@ -31,7 +31,7 @@ __device__ __forceinline__ T edge_cotan_weight(const rxmesh::VertexHandle& p_id, const rxmesh::VertexHandle& r_id, const rxmesh::VertexHandle& q_id, - const rxmesh::VertexHandle& s_id, + //const rxmesh::VertexHandle& s_id, const rxmesh::VertexAttribute& X) { // Get the edge weight between the two vertices p-r where @@ -40,20 +40,21 @@ edge_cotan_weight(const rxmesh::VertexHandle& p_id, const vec3 p(X(p_id, 0), X(p_id, 1), X(p_id, 2)); const vec3 r(X(r_id, 0), X(r_id, 1), X(r_id, 2)); const vec3 q(X(q_id, 0), X(q_id, 1), X(q_id, 2)); - const vec3 s(X(s_id, 0), X(s_id, 1), X(s_id, 2)); + //const vec3 s(X(s_id, 0), X(s_id, 1), X(s_id, 2)); // cotans[(v1, v2)] =np.dot(e1, e2) / np.linalg.norm(np.cross(e1, e2)) float weight = 0; if (q_id.is_valid()) weight += dot((p - q), (r - q)) / length(cross(p - q, r - q)); - if (s_id.is_valid()) - weight += dot((p - s), (r - s)) / length(cross(p - s, r - s)); - weight /= 2; + + //weight /= 2; return std::max(0.f, weight); } + + template __global__ static void compute_edge_weights_evd( const rxmesh::Context context, @@ -63,9 +64,16 @@ __global__ static void compute_edge_weights_evd( auto vn_lambda = [&](EdgeHandle edge_id, VertexIterator& vv) { T e_weight = 0; - e_weight = edge_cotan_weight(vv[0], vv[2], vv[1], vv[3], coords); + + if (vv[1].is_valid()) + e_weight += edge_cotan_weight(vv[0], vv[2], vv[1], coords); + if (vv[3].is_valid()) + e_weight += edge_cotan_weight(vv[0], vv[2], vv[3], coords); + //if (vv[1].is_valid() && vv[3].is_valid()) + e_weight /= 4; A_mat(vv[0], vv[2]) = e_weight; A_mat(vv[2], vv[0]) = e_weight; + }; auto block = cooperative_groups::this_thread_block(); @@ -80,7 +88,6 @@ __global__ static void calculate_Ld_matrix( rxmesh::SparseMatrix weight_mat, // [num_coord, num_coord] rxmesh::SparseMatrix Ld // [num_coord, num_coord] ) - { auto init_lambda = [&](VertexHandle v_id, VertexIterator& vv) { Ld(v_id, v_id) = make_cuComplex(0, 0); @@ -89,8 +96,10 @@ __global__ static void calculate_Ld_matrix( Ld(v_id, vv[nei_index]) = make_cuComplex(0, 0); for (int nei_index = 0; nei_index < vv.size(); nei_index++) { - //Ld(v_id, v_id) += make_cuComplex(weight_mat(v_id, vv[nei_index]), - // weight_mat(v_id, vv[nei_index])); + Ld(v_id, v_id) = + cuCaddf(Ld(v_id, v_id), + make_cuComplex(weight_mat(v_id, vv[nei_index]), + weight_mat(v_id, vv[nei_index]))); Ld(v_id, vv[nei_index]) = cuCsubf(Ld(v_id, vv[nei_index]),make_cuComplex(weight_mat(v_id, vv[nei_index]), 0)); @@ -103,6 +112,26 @@ __global__ static void calculate_Ld_matrix( query.dispatch(block, shrd_alloc, init_lambda); } +template +__global__ static void subtract_matrix(const rxmesh::Context context, + rxmesh::SparseMatrix A_mat, + rxmesh::SparseMatrix B_mat, + rxmesh::SparseMatrix C_mat) +{ + + auto subtract = [&](VertexHandle v_id, VertexIterator& vv) { + for (int i = 0; i < vv.size(); ++i) { + A_mat(v_id, vv[i]) = + cuCsubf(B_mat(v_id, vv[i]), C_mat(v_id, vv[i])); + } + }; + + auto block = cooperative_groups::this_thread_block(); + Query query(context); + ShmemAllocator shrd_alloc; + query.dispatch(block, shrd_alloc, subtract); +} + int main(int argc, char** argv) { Log::init(); @@ -174,7 +203,17 @@ int main(int argc, char** argv) rx.get_context(), weight_matrix, Ld); - + SparseMatrix Lc(rx); + rxmesh::LaunchBox launch_box_lc; + rx.prepare_launch_box({rxmesh::Op::VV}, + launch_box_ld, + (void*)subtract_matrix); + + subtract_matrix + <<>> + (rx.get_context(), Lc, Ld, A); From 0df4460058b1da933660018a0d4a46f575b3936c Mon Sep 17 00:00:00 2001 From: Sachin Kishan Date: Fri, 2 Aug 2024 23:49:10 +0530 Subject: [PATCH 80/96] reorganise --- apps/SCP/scp.cu | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/apps/SCP/scp.cu b/apps/SCP/scp.cu index f07adc80..427acb6b 100644 --- a/apps/SCP/scp.cu +++ b/apps/SCP/scp.cu @@ -141,26 +141,27 @@ int main(int argc, char** argv) RXMeshStatic rx(STRINGIFY(INPUT_DIR) "bunnyhead.obj"); - SparseMatrix Ld(rx); // complex V x V - - SparseMatrix A(rx); // 2V x 2V - + auto boundaryVertices = *rx.add_vertex_attribute("boundaryVertices", 1); - auto coords = *rx.get_input_vertex_coordinates(); rx.get_boundary_vertices( - boundaryVertices); // 0 or 1 value for boundary vertex - - // identify boundary edge (vv query) - // v1 is central; v2 is on boundary + boundaryVertices); //for matrix calls constexpr uint32_t CUDABlockSize = 256; + + SparseMatrix Ld(rx); // complex V x V + + SparseMatrix A(rx); // 2V x 2V + + auto coords = *rx.get_input_vertex_coordinates(); + + rxmesh::LaunchBox launch_box_area; rx.prepare_launch_box({rxmesh::Op:: EV}, launch_box_area, From 0dee66cc87d5f2aea3d13b5a125be1eb25333af0 Mon Sep 17 00:00:00 2001 From: Sachin Kishan Date: Fri, 2 Aug 2024 23:55:25 +0530 Subject: [PATCH 81/96] successfully compiling combined code --- apps/SCP/scp.cu | 63 +++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 61 insertions(+), 2 deletions(-) diff --git a/apps/SCP/scp.cu b/apps/SCP/scp.cu index 427acb6b..6751c4dd 100644 --- a/apps/SCP/scp.cu +++ b/apps/SCP/scp.cu @@ -103,6 +103,8 @@ __global__ static void calculate_Ld_matrix( Ld(v_id, vv[nei_index]) = cuCsubf(Ld(v_id, vv[nei_index]),make_cuComplex(weight_mat(v_id, vv[nei_index]), 0)); + + } }; @@ -216,13 +218,70 @@ int main(int argc, char** argv) launch_box_ld.smem_bytes_dyn>>> (rx.get_context(), Lc, Ld, A); + int number_of_vertices = rx.get_num_vertices(); + + + DenseMatrix eb(rx, number_of_vertices, 1); + DenseMatrix u(rx, number_of_vertices, 1); + DenseMatrix T1(rx, number_of_vertices, 1); + + DenseMatrix y(rx, number_of_vertices, 1); + + + SparseMatrix B(rx); + + uint32_t num_bd_vertices = 0; + + rx.for_each_vertex( + HOST, + [&](const VertexHandle& vh) { + if (boundaryVertices(vh)) { + num_bd_vertices++; + } + }, + NULL, + false); + + + rx.for_each_vertex(rxmesh::DEVICE, + [B, eb, boundaryVertices, num_bd_vertices] __device__( + const rxmesh::VertexHandle vh) mutable { + eb(vh, 0) = make_cuComplex( + boundaryVertices(vh, 0) / num_bd_vertices, 0.0f); + B(vh, vh) = + make_cuComplex(boundaryVertices(vh, 0), 0.0f); + }); + + B.move(rxmesh::DEVICE, rxmesh::HOST); + eb.move(rxmesh::DEVICE, rxmesh::HOST); + + // + // S = [B- (1/Vb) * ebebT]; + + cuComplex T2 = eb.dot(u); + + B.multiply(u, T1); + + eb.multiply(T2); + + rx.for_each_vertex( + rxmesh::DEVICE, + [eb, B, T2, T1] __device__(const rxmesh::VertexHandle vh) mutable { + T1(vh, 0) = cuCsubf(T1(vh, 0), eb(vh, 0)); + }); + + Lc.pre_solve(PermuteMethod::NSTDIS); // can be outside the loop + Lc.solve(T1, y); // Ly=T1 + y.multiply(1 / y.norm2()); + u.copy_from(y); - //vertex_normals->move(rxmesh::DEVICE, rxmesh::HOST); + // conversion step + rx.get_polyscope_mesh()->addVertexScalarQuantity("vBoundary", + boundaryVertices); - //Lc #if USE_POLYSCOPE From 27a979fb000ed7ed9f42cb65fe109ef9cd8ebd02 Mon Sep 17 00:00:00 2001 From: Sachin Kishan Date: Sat, 3 Aug 2024 00:12:15 +0530 Subject: [PATCH 82/96] fill u with initial random guess --- apps/SCP/scp.cu | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/apps/SCP/scp.cu b/apps/SCP/scp.cu index 6751c4dd..16299094 100644 --- a/apps/SCP/scp.cu +++ b/apps/SCP/scp.cu @@ -91,7 +91,6 @@ __global__ static void calculate_Ld_matrix( { auto init_lambda = [&](VertexHandle v_id, VertexIterator& vv) { Ld(v_id, v_id) = make_cuComplex(0, 0); - for (int nei_index = 0; nei_index < vv.size(); nei_index++) Ld(v_id, vv[nei_index]) = make_cuComplex(0, 0); @@ -232,6 +231,9 @@ int main(int argc, char** argv) uint32_t num_bd_vertices = 0; + u.fill_random(); + + rx.for_each_vertex( HOST, [&](const VertexHandle& vh) { From 76b3b5506018fdcb867012b790d76ff1872abe1c Mon Sep 17 00:00:00 2001 From: Sachin Kishan Date: Sat, 3 Aug 2024 00:26:32 +0530 Subject: [PATCH 83/96] conversion step --- apps/SCP/scp.cu | 47 +++++++++++++++++++++++++++++++++-------------- 1 file changed, 33 insertions(+), 14 deletions(-) diff --git a/apps/SCP/scp.cu b/apps/SCP/scp.cu index 16299094..9e7e196c 100644 --- a/apps/SCP/scp.cu +++ b/apps/SCP/scp.cu @@ -231,8 +231,6 @@ int main(int argc, char** argv) uint32_t num_bd_vertices = 0; - u.fill_random(); - rx.for_each_vertex( HOST, @@ -259,27 +257,48 @@ int main(int argc, char** argv) // // S = [B- (1/Vb) * ebebT]; + u.fill_random(); - cuComplex T2 = eb.dot(u); + int iterations=1; - B.multiply(u, T1); + for (int i = 0; i < iterations; i++) { + cuComplex T2 = eb.dot(u); - eb.multiply(T2); + B.multiply(u, T1); - rx.for_each_vertex( - rxmesh::DEVICE, - [eb, B, T2, T1] __device__(const rxmesh::VertexHandle vh) mutable { - T1(vh, 0) = cuCsubf(T1(vh, 0), eb(vh, 0)); - }); + eb.multiply(T2); + + rx.for_each_vertex( + rxmesh::DEVICE, + [eb, B, T2, T1] __device__(const rxmesh::VertexHandle vh) mutable { + T1(vh, 0) = cuCsubf(T1(vh, 0), eb(vh, 0)); + }); + + // Lc.pre_solve(PermuteMethod::NSTDIS); // can be outside the loop + // Lc.solve(T1, y); // Ly=T1 - Lc.pre_solve(PermuteMethod::NSTDIS); // can be outside the loop - Lc.solve(T1, y); // Ly=T1 - y.multiply(1 / y.norm2()); - u.copy_from(y); + Lc.solve(T1, y, Solver::QR, PermuteMethod::NSTDIS); + y.multiply(1 / y.norm2()); + u.copy_from(y); + } // conversion step + + auto parametric_coordinates = *rx.add_vertex_attribute("pCoords", 2); + + rx.for_each_vertex(rxmesh::DEVICE, + [u,parametric_coordinates] __device__( + const rxmesh::VertexHandle vh) mutable { + parametric_coordinates(vh, 0) = u(vh, 1).x; + parametric_coordinates(vh, 1) = u(vh, 1).y; + }); + + + rx.get_polyscope_mesh()->addVertexParameterizationQuantity( + "pCoords", parametric_coordinates); + rx.get_polyscope_mesh()->addVertexScalarQuantity("vBoundary", boundaryVertices); From f14db080d7e8d3079ceaa7d8cabc9d4f3170096c Mon Sep 17 00:00:00 2001 From: Sachin Kishan Date: Sat, 3 Aug 2024 01:46:12 +0530 Subject: [PATCH 84/96] small fixes --- apps/SCP/scp.cu | 42 ++++++++++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 16 deletions(-) diff --git a/apps/SCP/scp.cu b/apps/SCP/scp.cu index 9e7e196c..8638e491 100644 --- a/apps/SCP/scp.cu +++ b/apps/SCP/scp.cu @@ -15,7 +15,7 @@ __global__ static void compute_area_matrix( auto vn_lambda = [&](EdgeHandle edge_id, VertexIterator& vv) { if (boundaryVertices(vv[0], 0) == 1 && boundaryVertices(vv[1], 0) == 1) { - AreaMatrix(vv[0], vv[1]) = make_cuComplex(0, -0.25); // modify later + AreaMatrix(vv[0], vv[1]) = make_cuComplex(0, -0.25); AreaMatrix(vv[1], vv[0]) = make_cuComplex(0, 0.25); } }; @@ -97,8 +97,8 @@ __global__ static void calculate_Ld_matrix( for (int nei_index = 0; nei_index < vv.size(); nei_index++) { Ld(v_id, v_id) = cuCaddf(Ld(v_id, v_id), - make_cuComplex(weight_mat(v_id, vv[nei_index]), - weight_mat(v_id, vv[nei_index]))); + make_cuComplex(weight_mat(v_id, vv[nei_index]),0)); +// weight_mat(v_id, vv[nei_index]))); Ld(v_id, vv[nei_index]) = cuCsubf(Ld(v_id, vv[nei_index]),make_cuComplex(weight_mat(v_id, vv[nei_index]), 0)); @@ -208,13 +208,13 @@ int main(int argc, char** argv) SparseMatrix Lc(rx); rxmesh::LaunchBox launch_box_lc; rx.prepare_launch_box({rxmesh::Op::VV}, - launch_box_ld, + launch_box_lc, (void*)subtract_matrix); subtract_matrix - <<>> + <<>> (rx.get_context(), Lc, Ld, A); int number_of_vertices = rx.get_num_vertices(); @@ -258,31 +258,32 @@ int main(int argc, char** argv) // // S = [B- (1/Vb) * ebebT]; u.fill_random(); + Lc.pre_solve(PermuteMethod::NSTDIS); // can be outside the loop - int iterations=1; + int iterations=32; for (int i = 0; i < iterations; i++) { cuComplex T2 = eb.dot(u); B.multiply(u, T1); - eb.multiply(T2); + //eb.multiply(T2); rx.for_each_vertex( rxmesh::DEVICE, [eb, B, T2, T1] __device__(const rxmesh::VertexHandle vh) mutable { - T1(vh, 0) = cuCsubf(T1(vh, 0), eb(vh, 0)); + T1(vh, 0) = cuCsubf(T1(vh, 0), cuCmulf(eb(vh, 0),T2)); }); - // Lc.pre_solve(PermuteMethod::NSTDIS); // can be outside the loop - // Lc.solve(T1, y); // Ly=T1 + // + Lc.solve(T1, y); // Ly=T1 - Lc.solve(T1, y, Solver::QR, PermuteMethod::NSTDIS); + //Lc.solve(T1, y, Solver::QR, PermuteMethod::NSTDIS); y.multiply(1 / y.norm2()); - u.copy_from(y); } + u.copy_from(y); // conversion step @@ -291,10 +292,19 @@ int main(int argc, char** argv) rx.for_each_vertex(rxmesh::DEVICE, [u,parametric_coordinates] __device__( const rxmesh::VertexHandle vh) mutable { - parametric_coordinates(vh, 0) = u(vh, 1).x; - parametric_coordinates(vh, 1) = u(vh, 1).y; + parametric_coordinates(vh, 0) = u(vh, 0).x; + parametric_coordinates(vh, 1) = u(vh, 0).y; }); + parametric_coordinates.move(DEVICE, HOST); + + //calculate cntre, shift mesh by centre (translate back) + //divide maximum value irrespective of axis (abs max value verte)- divide by all coordinates + //u,v is always [(0,0),(1,1)] + + + + rx.get_polyscope_mesh()->addVertexParameterizationQuantity( "pCoords", parametric_coordinates); From b74d29ac1c04f8d12db8ce5be944b593a4c2c5b4 Mon Sep 17 00:00:00 2001 From: Sachin Kishan Date: Sun, 4 Aug 2024 19:05:11 +0530 Subject: [PATCH 85/96] T2 always comes out as 0 --- apps/SCP/scp.cu | 59 ++++++++++++++++++++++++++++++------------------- 1 file changed, 36 insertions(+), 23 deletions(-) diff --git a/apps/SCP/scp.cu b/apps/SCP/scp.cu index 8638e491..44887056 100644 --- a/apps/SCP/scp.cu +++ b/apps/SCP/scp.cu @@ -13,17 +13,29 @@ __global__ static void compute_area_matrix( rxmesh::SparseMatrix AreaMatrix) { - auto vn_lambda = [&](EdgeHandle edge_id, VertexIterator& vv) { - if (boundaryVertices(vv[0], 0) == 1 && boundaryVertices(vv[1], 0) == 1) { + auto vn_lambda = [&](FaceHandle face_id, VertexIterator& vv) { + if (boundaryVertices(vv[0], 0) == 1 && boundaryVertices(vv[1], 0) == 1) + { AreaMatrix(vv[0], vv[1]) = make_cuComplex(0, -0.25); AreaMatrix(vv[1], vv[0]) = make_cuComplex(0, 0.25); } + if (boundaryVertices(vv[1], 0) == 1 && + boundaryVertices(vv[2], 0) == 1) { + AreaMatrix(vv[1], vv[2]) = make_cuComplex(0, -0.25); + AreaMatrix(vv[2], vv[1]) = make_cuComplex(0, 0.25); + } + if (boundaryVertices(vv[2], 0) == 1 && + boundaryVertices(vv[0], 0) == 1) { + AreaMatrix(vv[2], vv[0]) = make_cuComplex(0, -0.25); + AreaMatrix(vv[0], vv[2]) = make_cuComplex(0, 0.25); + } + }; auto block = cooperative_groups::this_thread_block(); Query query(context); ShmemAllocator shrd_alloc; - query.dispatch(block, shrd_alloc, vn_lambda); + query.dispatch(block, shrd_alloc, vn_lambda); } template @@ -71,8 +83,8 @@ __global__ static void compute_edge_weights_evd( e_weight += edge_cotan_weight(vv[0], vv[2], vv[3], coords); //if (vv[1].is_valid() && vv[3].is_valid()) e_weight /= 4; - A_mat(vv[0], vv[2]) = e_weight; - A_mat(vv[2], vv[0]) = e_weight; + A_mat(vv[0], vv[2]) = 1; + A_mat(vv[2], vv[0]) = 1; }; @@ -95,15 +107,16 @@ __global__ static void calculate_Ld_matrix( Ld(v_id, vv[nei_index]) = make_cuComplex(0, 0); for (int nei_index = 0; nei_index < vv.size(); nei_index++) { + Ld(v_id, v_id) = cuCaddf(Ld(v_id, v_id), make_cuComplex(weight_mat(v_id, vv[nei_index]),0)); // weight_mat(v_id, vv[nei_index]))); + if (v_id != vv[nei_index]) Ld(v_id, vv[nei_index]) = - cuCsubf(Ld(v_id, vv[nei_index]),make_cuComplex(weight_mat(v_id, vv[nei_index]), 0)); - - + cuCsubf(Ld(v_id, vv[nei_index]), + make_cuComplex(weight_mat(v_id, vv[nei_index]), 0)); } }; @@ -142,29 +155,23 @@ int main(int argc, char** argv) RXMeshStatic rx(STRINGIFY(INPUT_DIR) "bunnyhead.obj"); - auto boundaryVertices = *rx.add_vertex_attribute("boundaryVertices", 1); - rx.get_boundary_vertices( boundaryVertices); - - //for matrix calls constexpr uint32_t CUDABlockSize = 256; - SparseMatrix Ld(rx); // complex V x V SparseMatrix A(rx); // 2V x 2V auto coords = *rx.get_input_vertex_coordinates(); - rxmesh::LaunchBox launch_box_area; - rx.prepare_launch_box({rxmesh::Op:: EV}, + rx.prepare_launch_box({rxmesh::Op:: FV}, launch_box_area, (void*)compute_area_matrix); @@ -174,8 +181,6 @@ int main(int argc, char** argv) launch_box_area.smem_bytes_dyn>>>( rx.get_context(), boundaryVertices, A); - - SparseMatrix weight_matrix(rx); // obtain cotangent weight matrix @@ -190,8 +195,7 @@ int main(int argc, char** argv) launch_box.num_threads, launch_box.smem_bytes_dyn>>>( rx.get_context(), coords, weight_matrix); - - + rxmesh::LaunchBox launch_box_ld; rx.prepare_launch_box( {rxmesh::Op::VV}, @@ -205,7 +209,7 @@ int main(int argc, char** argv) rx.get_context(), weight_matrix, Ld); - SparseMatrix Lc(rx); + SparseMatrix Lc(rx); rxmesh::LaunchBox launch_box_lc; rx.prepare_launch_box({rxmesh::Op::VV}, launch_box_lc, @@ -262,8 +266,17 @@ int main(int argc, char** argv) int iterations=32; + //std::cout << std::endl << u(0, 0).x; + //std::cout << std::endl << u(0, 0).y; + //std::cout << eb(0, 0).x; + + cuComplex T2 = eb.dot(u); + std::cout << std::endl<< T2.x; + std::cout << std::endl<< T2.y; + + /* for (int i = 0; i < iterations; i++) { - cuComplex T2 = eb.dot(u); + B.multiply(u, T1); @@ -297,7 +310,7 @@ int main(int argc, char** argv) }); parametric_coordinates.move(DEVICE, HOST); - + //calculate cntre, shift mesh by centre (translate back) //divide maximum value irrespective of axis (abs max value verte)- divide by all coordinates //u,v is always [(0,0),(1,1)] @@ -308,7 +321,7 @@ int main(int argc, char** argv) rx.get_polyscope_mesh()->addVertexParameterizationQuantity( "pCoords", parametric_coordinates); - +*/ rx.get_polyscope_mesh()->addVertexScalarQuantity("vBoundary", boundaryVertices); From 687d29558a1b4e2d779e35f7242bc712886d15d0 Mon Sep 17 00:00:00 2001 From: Sachin Kishan Date: Sun, 4 Aug 2024 19:11:26 +0530 Subject: [PATCH 86/96] cast boundaryVertices to float for eb --- apps/SCP/scp.cu | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/apps/SCP/scp.cu b/apps/SCP/scp.cu index 44887056..50363454 100644 --- a/apps/SCP/scp.cu +++ b/apps/SCP/scp.cu @@ -251,7 +251,7 @@ int main(int argc, char** argv) [B, eb, boundaryVertices, num_bd_vertices] __device__( const rxmesh::VertexHandle vh) mutable { eb(vh, 0) = make_cuComplex( - boundaryVertices(vh, 0) / num_bd_vertices, 0.0f); + (float)boundaryVertices(vh, 0) / num_bd_vertices, 0.0f); B(vh, vh) = make_cuComplex(boundaryVertices(vh, 0), 0.0f); }); @@ -270,13 +270,14 @@ int main(int argc, char** argv) //std::cout << std::endl << u(0, 0).y; //std::cout << eb(0, 0).x; - cuComplex T2 = eb.dot(u); - std::cout << std::endl<< T2.x; - std::cout << std::endl<< T2.y; + - /* + for (int i = 0; i < iterations; i++) { - + + cuComplex T2 = eb.dot(u); + std::cout << std::endl << T2.x; + std::cout << std::endl << T2.y; B.multiply(u, T1); @@ -321,7 +322,7 @@ int main(int argc, char** argv) rx.get_polyscope_mesh()->addVertexParameterizationQuantity( "pCoords", parametric_coordinates); -*/ + rx.get_polyscope_mesh()->addVertexScalarQuantity("vBoundary", boundaryVertices); From 82896b440ffdabb5958c64cd65d38b5d9730f29b Mon Sep 17 00:00:00 2001 From: Sachin Kishan Date: Sun, 4 Aug 2024 20:28:48 +0530 Subject: [PATCH 87/96] Update scp.cu --- apps/SCP/scp.cu | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/apps/SCP/scp.cu b/apps/SCP/scp.cu index 50363454..eabd5a77 100644 --- a/apps/SCP/scp.cu +++ b/apps/SCP/scp.cu @@ -19,12 +19,12 @@ __global__ static void compute_area_matrix( AreaMatrix(vv[0], vv[1]) = make_cuComplex(0, -0.25); AreaMatrix(vv[1], vv[0]) = make_cuComplex(0, 0.25); } - if (boundaryVertices(vv[1], 0) == 1 && + else if (boundaryVertices(vv[1], 0) == 1 && boundaryVertices(vv[2], 0) == 1) { AreaMatrix(vv[1], vv[2]) = make_cuComplex(0, -0.25); AreaMatrix(vv[2], vv[1]) = make_cuComplex(0, 0.25); } - if (boundaryVertices(vv[2], 0) == 1 && + else if (boundaryVertices(vv[2], 0) == 1 && boundaryVertices(vv[0], 0) == 1) { AreaMatrix(vv[2], vv[0]) = make_cuComplex(0, -0.25); AreaMatrix(vv[0], vv[2]) = make_cuComplex(0, 0.25); @@ -135,7 +135,9 @@ __global__ static void subtract_matrix(const rxmesh::Context context, auto subtract = [&](VertexHandle v_id, VertexIterator& vv) { for (int i = 0; i < vv.size(); ++i) { - A_mat(v_id, vv[i]) = + + + A_mat(v_id, vv[i]) = //B_mat(v_id, vv[i]); cuCsubf(B_mat(v_id, vv[i]), C_mat(v_id, vv[i])); } }; @@ -253,10 +255,10 @@ int main(int argc, char** argv) eb(vh, 0) = make_cuComplex( (float)boundaryVertices(vh, 0) / num_bd_vertices, 0.0f); B(vh, vh) = - make_cuComplex(boundaryVertices(vh, 0), 0.0f); + make_cuComplex((float)boundaryVertices(vh, 0), 0.0f); }); - B.move(rxmesh::DEVICE, rxmesh::HOST); + //B.move(rxmesh::DEVICE, rxmesh::HOST); eb.move(rxmesh::DEVICE, rxmesh::HOST); // @@ -281,23 +283,29 @@ int main(int argc, char** argv) B.multiply(u, T1); + + //eb.multiply(T2); rx.for_each_vertex( rxmesh::DEVICE, [eb, B, T2, T1] __device__(const rxmesh::VertexHandle vh) mutable { - T1(vh, 0) = cuCsubf(T1(vh, 0), cuCmulf(eb(vh, 0),T2)); + T1(vh, 0) = cuCsubf( + T1(vh, 0), + cuCmulf(eb(vh, 0),T2) + ); + }); - // Lc.solve(T1, y); // Ly=T1 + y.move(DEVICE, HOST); //Lc.solve(T1, y, Solver::QR, PermuteMethod::NSTDIS); - y.multiply(1 / y.norm2()); + //y.multiply(1 / y.norm2()); + u.copy_from(y); } - u.copy_from(y); // conversion step @@ -323,6 +331,9 @@ int main(int argc, char** argv) rx.get_polyscope_mesh()->addVertexParameterizationQuantity( "pCoords", parametric_coordinates); + rx.get_polyscope_mesh()->addVertexVectorQuantity2D("vq", + parametric_coordinates); + rx.get_polyscope_mesh()->addVertexScalarQuantity("vBoundary", boundaryVertices); From 129b967664faaf432ec49a35be0341eceded726c Mon Sep 17 00:00:00 2001 From: Sachin Kishan Date: Sun, 4 Aug 2024 22:38:40 +0530 Subject: [PATCH 88/96] fix singular rows --- apps/SCP/scp.cu | 49 +++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 41 insertions(+), 8 deletions(-) diff --git a/apps/SCP/scp.cu b/apps/SCP/scp.cu index eabd5a77..4eb95a33 100644 --- a/apps/SCP/scp.cu +++ b/apps/SCP/scp.cu @@ -18,18 +18,23 @@ __global__ static void compute_area_matrix( { AreaMatrix(vv[0], vv[1]) = make_cuComplex(0, -0.25); AreaMatrix(vv[1], vv[0]) = make_cuComplex(0, 0.25); + //printf("\nfirst edge: %f", AreaMatrix(vv[0], vv[1]).y); + } else if (boundaryVertices(vv[1], 0) == 1 && boundaryVertices(vv[2], 0) == 1) { AreaMatrix(vv[1], vv[2]) = make_cuComplex(0, -0.25); AreaMatrix(vv[2], vv[1]) = make_cuComplex(0, 0.25); - } + //printf("\nsecond edge: %f", AreaMatrix(vv[1], vv[2]).y); + + } else if (boundaryVertices(vv[2], 0) == 1 && boundaryVertices(vv[0], 0) == 1) { AreaMatrix(vv[2], vv[0]) = make_cuComplex(0, -0.25); AreaMatrix(vv[0], vv[2]) = make_cuComplex(0, 0.25); - } + //printf("\nthird edge: %f", AreaMatrix(vv[2], vv[0]).y); + } }; auto block = cooperative_groups::this_thread_block(); @@ -108,16 +113,26 @@ __global__ static void calculate_Ld_matrix( for (int nei_index = 0; nei_index < vv.size(); nei_index++) { - Ld(v_id, v_id) = + Ld(v_id, v_id) = //make_cuComplex(5, 0); cuCaddf(Ld(v_id, v_id), - make_cuComplex(weight_mat(v_id, vv[nei_index]),0)); -// weight_mat(v_id, vv[nei_index]))); + make_cuComplex(1,0));//weight_mat(v_id, vv[nei_index]),0)); + // weight_mat(v_id, vv[nei_index]))); - if (v_id != vv[nei_index]) Ld(v_id, vv[nei_index]) = cuCsubf(Ld(v_id, vv[nei_index]), make_cuComplex(weight_mat(v_id, vv[nei_index]), 0)); + + + } + //printf("\nOwner vertex: %f", Ld(v_id, v_id).x); + + for (int nei_index = 0; nei_index < vv.size(); nei_index++) { + //printf("\n%d: %f", nei_index, Ld(v_id, vv[nei_index]).x); + + } + + }; auto block = cooperative_groups::this_thread_block(); @@ -136,10 +151,20 @@ __global__ static void subtract_matrix(const rxmesh::Context context, auto subtract = [&](VertexHandle v_id, VertexIterator& vv) { for (int i = 0; i < vv.size(); ++i) { + //printf("\nBr:%f", B_mat(v_id, vv[i]).x); + //printf("\nBc:%f", B_mat(v_id, vv[i]).y); + A_mat(v_id, vv[i]) = //B_mat(v_id, vv[i]); cuCsubf(B_mat(v_id, vv[i]), C_mat(v_id, vv[i])); + + //printf("\nAr:%f", A_mat(v_id, vv[i]).x); + //printf("\nAc:%f", A_mat(v_id, vv[i]).y); + } + A_mat(v_id, v_id) = cuCsubf(B_mat(v_id, v_id), C_mat(v_id, v_id)); + //printf("\nAdiagr:%f", A_mat(v_id, v_id).x); + //printf("\nAdiagc:%f", A_mat(v_id, v_id).y); }; auto block = cooperative_groups::this_thread_block(); @@ -256,6 +281,8 @@ int main(int argc, char** argv) (float)boundaryVertices(vh, 0) / num_bd_vertices, 0.0f); B(vh, vh) = make_cuComplex((float)boundaryVertices(vh, 0), 0.0f); + + }); //B.move(rxmesh::DEVICE, rxmesh::HOST); @@ -290,10 +317,16 @@ int main(int argc, char** argv) rx.for_each_vertex( rxmesh::DEVICE, [eb, B, T2, T1] __device__(const rxmesh::VertexHandle vh) mutable { - T1(vh, 0) = cuCsubf( + + + T1(vh, 0) = cuCsubf( T1(vh, 0), cuCmulf(eb(vh, 0),T2) + ); + + + }); @@ -303,7 +336,7 @@ int main(int argc, char** argv) //Lc.solve(T1, y, Solver::QR, PermuteMethod::NSTDIS); - //y.multiply(1 / y.norm2()); + y.multiply(1 / y.norm2()); u.copy_from(y); } // conversion step From 3d5eee46ad1b6857b968f9fce088316823326c7c Mon Sep 17 00:00:00 2001 From: Sachin Kishan Date: Sun, 4 Aug 2024 23:17:04 +0530 Subject: [PATCH 89/96] with nans after around 6 iterations --- apps/SCP/scp.cu | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/apps/SCP/scp.cu b/apps/SCP/scp.cu index 4eb95a33..5b72fe9e 100644 --- a/apps/SCP/scp.cu +++ b/apps/SCP/scp.cu @@ -180,7 +180,7 @@ int main(int argc, char** argv) const uint32_t device_id = 0; cuda_query(device_id); - RXMeshStatic rx(STRINGIFY(INPUT_DIR) "bunnyhead.obj"); + RXMeshStatic rx(STRINGIFY(INPUT_DIR) "plane.obj"); auto boundaryVertices = *rx.add_vertex_attribute("boundaryVertices", 1); @@ -293,7 +293,7 @@ int main(int argc, char** argv) u.fill_random(); Lc.pre_solve(PermuteMethod::NSTDIS); // can be outside the loop - int iterations=32; + int iterations=8; //std::cout << std::endl << u(0, 0).x; //std::cout << std::endl << u(0, 0).y; @@ -305,8 +305,8 @@ int main(int argc, char** argv) for (int i = 0; i < iterations; i++) { cuComplex T2 = eb.dot(u); - std::cout << std::endl << T2.x; - std::cout << std::endl << T2.y; + //std::cout << std::endl << T2.x; + //std::cout << std::endl << T2.y; B.multiply(u, T1); @@ -316,7 +316,7 @@ int main(int argc, char** argv) rx.for_each_vertex( rxmesh::DEVICE, - [eb, B, T2, T1] __device__(const rxmesh::VertexHandle vh) mutable { + [eb, T2, T1] __device__(const rxmesh::VertexHandle vh) mutable { T1(vh, 0) = cuCsubf( @@ -330,13 +330,25 @@ int main(int argc, char** argv) }); - Lc.solve(T1, y); // Ly=T1 + Lc.solve(T1, y); // Ly=T1 y.move(DEVICE, HOST); //Lc.solve(T1, y, Solver::QR, PermuteMethod::NSTDIS); - y.multiply(1 / y.norm2()); + float norm = y.norm2(); + rx.for_each_vertex( + rxmesh::DEVICE, + [y] __device__(const rxmesh::VertexHandle vh) mutable { + printf("\nx:%f", y(vh, 0).x); + printf("\ny:%f", y(vh, 0).y); + }); + + y.multiply(1.0f / norm); + + + + u.copy_from(y); } // conversion step From d84c9a1c45f46f3c9cdeaedbc7800c43a6138320 Mon Sep 17 00:00:00 2001 From: ahmed Date: Thu, 8 Aug 2024 00:15:24 -0400 Subject: [PATCH 90/96] QR low-level API --- apps/MCF/mcf_cusolver_chol.cuh | 5 +- apps/SCP/scp.cu | 2 +- include/rxmesh/matrix/sparse_matrix.cuh | 642 ++++++++++++++++------- tests/RXMesh_test/test_sparse_matrix.cuh | 7 +- 4 files changed, 470 insertions(+), 186 deletions(-) diff --git a/apps/MCF/mcf_cusolver_chol.cuh b/apps/MCF/mcf_cusolver_chol.cuh index 9c640ecd..d49eaaca 100644 --- a/apps/MCF/mcf_cusolver_chol.cuh +++ b/apps/MCF/mcf_cusolver_chol.cuh @@ -193,7 +193,7 @@ void mcf_cusolver_chol(rxmesh::RXMeshStatic& rx) // A_mat.solve(B_mat, *X_mat, Solver::CHOL, PermuteMethod::NSTDIS); // Solving using CHOL - A_mat.pre_solve(PermuteMethod::NSTDIS); + A_mat.pre_solve(Solver::CHOL, PermuteMethod::NSTDIS); A_mat.solve(B_mat, *X_mat); @@ -206,7 +206,10 @@ void mcf_cusolver_chol(rxmesh::RXMeshStatic& rx) coords->from_matrix(X_mat.get()); rx.get_polyscope_mesh()->updateVertexPositions(*coords); + +#if USE_POLYSCOPE polyscope::show(); +#endif B_mat.release(); X_mat->release(); diff --git a/apps/SCP/scp.cu b/apps/SCP/scp.cu index 5b72fe9e..49c9ea6e 100644 --- a/apps/SCP/scp.cu +++ b/apps/SCP/scp.cu @@ -291,7 +291,7 @@ int main(int argc, char** argv) // // S = [B- (1/Vb) * ebebT]; u.fill_random(); - Lc.pre_solve(PermuteMethod::NSTDIS); // can be outside the loop + Lc.pre_solve(Solver::CHOL, PermuteMethod::NSTDIS); // can be outside the loop int iterations=8; diff --git a/include/rxmesh/matrix/sparse_matrix.cuh b/include/rxmesh/matrix/sparse_matrix.cuh index bbacf75d..3630af67 100644 --- a/include/rxmesh/matrix/sparse_matrix.cuh +++ b/include/rxmesh/matrix/sparse_matrix.cuh @@ -24,12 +24,15 @@ namespace rxmesh { /** * @brief The enum class for choosing different solver types + * Documentation of cuSolver low-level preview API + * https://docs.nvidia.com/cuda/archive/8.0/cusolver/index.html#cusolver-preview-reference */ enum class Solver { - CHOL = 0, - LU = 1, - QR = 2 + NONE = 0, + CHOL = 1, + LU = 2, + QR = 3 }; /** @@ -92,10 +95,11 @@ struct SparseMatrix m_reorder_allocated(false), m_d_cusparse_spmm_buffer(nullptr), m_d_cusparse_spmv_buffer(nullptr), - m_chol_buffer(nullptr), + m_solver_buffer(nullptr), m_d_solver_b(nullptr), m_d_solver_x(nullptr), - m_allocated(LOCATION_NONE) + m_allocated(LOCATION_NONE), + m_current_solver(Solver::NONE) { using namespace rxmesh; constexpr uint32_t blockThreads = 256; @@ -191,6 +195,8 @@ struct SparseMatrix CUSOLVER_ERROR(cusolverSpCreateCsrcholInfo(&m_chol_info)); + CUSOLVER_ERROR(cusolverSpCreateCsrqrInfo(&m_qr_info)); + // allocate the host m_h_val = static_cast(malloc(m_nnz * sizeof(T))); m_h_row_ptr = @@ -361,6 +367,8 @@ struct SparseMatrix CUSPARSE_ERROR(cusparseDestroyMatDescr(m_descr)); CUSOLVER_ERROR(cusolverSpDestroy(m_cusolver_sphandle)); CUSOLVER_ERROR(cusolverSpDestroyCsrcholInfo(m_chol_info)); + CUSOLVER_ERROR(cusolverSpDestroyCsrqrInfo(m_qr_info)); + if (m_reorder_allocated) { GPU_FREE(m_d_solver_val); @@ -374,7 +382,7 @@ struct SparseMatrix free(m_h_permute); free(m_h_permute_map); } - GPU_FREE(m_chol_buffer); + GPU_FREE(m_solver_buffer); GPU_FREE(m_d_cusparse_spmm_buffer); GPU_FREE(m_d_cusparse_spmv_buffer); } @@ -622,9 +630,9 @@ struct SparseMatrix * the columns for B and multiply them separately as sparse matrix dense * vector multiplication */ - void multiply_cw(const DenseMatrix& B_mat, - DenseMatrix& C_mat, - cudaStream_t stream = 0) + __host__ void multiply_cw(const DenseMatrix& B_mat, + DenseMatrix& C_mat, + cudaStream_t stream = 0) { assert(cols() == B_mat.cols()); assert(rows() == C_mat.rows()); @@ -644,18 +652,18 @@ struct SparseMatrix __host__ EigenSparseMatrix to_eigen() { return EigenSparseMatrix( - rows(), cols(), non_zeros(), m_h_row_ptr, m_h_col_idx, m_h_val); + rows(), cols(), non_zeros(), m_h_row_ptr, m_h_col_idx, m_h_val); } /** * @brief solve the AX=B for X where X and B are all dense matrix and we * would solve it in a column wise manner */ - void solve(const DenseMatrix& B_mat, - DenseMatrix& X_mat, - Solver solver, - PermuteMethod reorder, - cudaStream_t stream = 0) + __host__ void solve(const DenseMatrix& B_mat, + DenseMatrix& X_mat, + Solver solver, + PermuteMethod reorder, + cudaStream_t stream = 0) { for (int i = 0; i < B_mat.cols(); ++i) { cusparse_linear_solver_wrapper( @@ -671,11 +679,11 @@ struct SparseMatrix /** * @brief solve the Ax=b for x */ - void solve(const T* B_arr, - T* X_arr, - Solver solver, - PermuteMethod reorder, - cudaStream_t stream = 0) + __host__ void solve(const T* B_arr, + T* X_arr, + Solver solver, + PermuteMethod reorder, + cudaStream_t stream = 0) { cusparse_linear_solver_wrapper( solver, reorder, m_cusolver_sphandle, B_arr, X_arr, stream); @@ -687,7 +695,7 @@ struct SparseMatrix /** * @brief allocate all temp buffers needed for the solver low-level API */ - void permute_alloc(PermuteMethod reorder) + __host__ void permute_alloc(PermuteMethod reorder) { if (reorder == PermuteMethod::NONE) { return; @@ -731,7 +739,7 @@ struct SparseMatrix * the solving process. Any other function call order would be undefined. * @param reorder: the reorder method applied. */ - void permute(PermuteMethod reorder = PermuteMethod::NSTDIS) + __host__ void permute(PermuteMethod reorder) { permute_alloc(reorder); @@ -843,7 +851,7 @@ struct SparseMatrix * @brief The lower level api of matrix analysis. Generating a member value * of type csrcholInfo_t for cucolver. */ - void analyze_pattern() + __host__ void analyze_pattern(Solver solver) { if (!m_use_reorder) { m_d_solver_row_ptr = m_d_row_ptr; @@ -851,26 +859,225 @@ struct SparseMatrix m_d_solver_val = m_d_val; } - CUSOLVER_ERROR(cusolverSpXcsrcholAnalysis(m_cusolver_sphandle, - m_num_rows, - m_nnz, - m_descr, - m_d_solver_row_ptr, - m_d_solver_col_idx, - m_chol_info)); + if (solver == Solver::CHOL) { + CUSOLVER_ERROR(cusolverSpXcsrcholAnalysis(m_cusolver_sphandle, + m_num_rows, + m_nnz, + m_descr, + m_d_solver_row_ptr, + m_d_solver_col_idx, + m_chol_info)); + } else if (solver == Solver::QR) { + CUSOLVER_ERROR(cusolverSpXcsrqrAnalysis(m_cusolver_sphandle, + m_num_rows, + m_num_cols, + m_nnz, + m_descr, + m_d_solver_row_ptr, + m_d_solver_col_idx, + m_qr_info)); + } else { + RXMESH_ERROR( + "SparseMatrix::analyze_pattern() incompatible solver with " + "analyze_pattern method"); + } } /** * @brief The lower level api of matrix factorization buffer calculation and * allocation. The buffer is a member variable. */ - void post_analyze_alloc() + __host__ void post_analyze_alloc(Solver solver) { m_internalDataInBytes = 0; m_workspaceInBytes = 0; - if constexpr (std::is_same_v) { - CUSOLVER_ERROR(cusolverSpScsrcholBufferInfo(m_cusolver_sphandle, + GPU_FREE(m_solver_buffer); + + if (solver == Solver::CHOL) { + + if constexpr (std::is_same_v) { + CUSOLVER_ERROR( + cusolverSpScsrcholBufferInfo(m_cusolver_sphandle, + m_num_rows, + m_nnz, + m_descr, + m_d_solver_val, + m_d_solver_row_ptr, + m_d_solver_col_idx, + m_chol_info, + &m_internalDataInBytes, + &m_workspaceInBytes)); + } + + if constexpr (std::is_same_v) { + CUSOLVER_ERROR( + cusolverSpCcsrcholBufferInfo(m_cusolver_sphandle, + m_num_rows, + m_nnz, + m_descr, + m_d_solver_val, + m_d_solver_row_ptr, + m_d_solver_col_idx, + m_chol_info, + &m_internalDataInBytes, + &m_workspaceInBytes)); + } + + if constexpr (std::is_same_v) { + CUSOLVER_ERROR( + cusolverSpDcsrcholBufferInfo(m_cusolver_sphandle, + m_num_rows, + m_nnz, + m_descr, + m_d_solver_val, + m_d_solver_row_ptr, + m_d_solver_col_idx, + m_chol_info, + &m_internalDataInBytes, + &m_workspaceInBytes)); + } + + if constexpr (std::is_same_v) { + CUSOLVER_ERROR( + cusolverSpZcsrcholBufferInfo(m_cusolver_sphandle, + m_num_rows, + m_nnz, + m_descr, + m_d_solver_val, + m_d_solver_row_ptr, + m_d_solver_col_idx, + m_chol_info, + &m_internalDataInBytes, + &m_workspaceInBytes)); + } + } else if (solver == Solver::QR) { + if constexpr (std::is_same_v) { + float mu = 0.f; + CUSOLVER_ERROR( + cusolverSpScsrqrBufferInfo(m_cusolver_sphandle, + m_num_rows, + m_num_cols, + m_nnz, + m_descr, + m_d_solver_val, + m_d_solver_row_ptr, + m_d_solver_col_idx, + m_qr_info, + &m_internalDataInBytes, + &m_workspaceInBytes)); + + CUSOLVER_ERROR(cusolverSpScsrqrSetup(m_cusolver_sphandle, + m_num_rows, + m_num_cols, + m_nnz, + m_descr, + m_d_solver_val, + m_d_solver_row_ptr, + m_d_solver_col_idx, + mu, + m_qr_info)); + } + + if constexpr (std::is_same_v) { + cuComplex mu = make_cuComplex(0.f, 0.f); + CUSOLVER_ERROR( + cusolverSpCcsrqrBufferInfo(m_cusolver_sphandle, + m_num_rows, + m_num_cols, + m_nnz, + m_descr, + m_d_solver_val, + m_d_solver_row_ptr, + m_d_solver_col_idx, + m_qr_info, + &m_internalDataInBytes, + &m_workspaceInBytes)); + + CUSOLVER_ERROR(cusolverSpCcsrqrSetup(m_cusolver_sphandle, + m_num_rows, + m_num_cols, + m_nnz, + m_descr, + m_d_solver_val, + m_d_solver_row_ptr, + m_d_solver_col_idx, + mu, + m_qr_info)); + } + + if constexpr (std::is_same_v) { + double mu = 0.f; + CUSOLVER_ERROR( + cusolverSpDcsrqrBufferInfo(m_cusolver_sphandle, + m_num_rows, + m_num_cols, + m_nnz, + m_descr, + m_d_solver_val, + m_d_solver_row_ptr, + m_d_solver_col_idx, + m_qr_info, + &m_internalDataInBytes, + &m_workspaceInBytes)); + + CUSOLVER_ERROR(cusolverSpDcsrqrSetup(m_cusolver_sphandle, + m_num_rows, + m_num_cols, + m_nnz, + m_descr, + m_d_solver_val, + m_d_solver_row_ptr, + m_d_solver_col_idx, + mu, + m_qr_info)); + } + + if constexpr (std::is_same_v) { + cuDoubleComplex mu = make_cuDoubleComplex(0.0, 0.0); + CUSOLVER_ERROR( + cusolverSpZcsrqrBufferInfo(m_cusolver_sphandle, + m_num_rows, + m_num_cols, + m_nnz, + m_descr, + m_d_solver_val, + m_d_solver_row_ptr, + m_d_solver_col_idx, + m_qr_info, + &m_internalDataInBytes, + &m_workspaceInBytes)); + + CUSOLVER_ERROR(cusolverSpZcsrqrSetup(m_cusolver_sphandle, + m_num_rows, + m_num_cols, + m_nnz, + m_descr, + m_d_solver_val, + m_d_solver_row_ptr, + m_d_solver_col_idx, + mu, + m_qr_info)); + } + } else { + RXMESH_ERROR( + "SparseMatrix::post_analyze_alloc() incompatible solver with " + "post_analyze_alloc method"); + return; + } + CUDA_ERROR(cudaMalloc((void**)&m_solver_buffer, m_workspaceInBytes)); + } + + + /** + * @brief The lower level api of matrix factorization and save the + * factorization result in to the buffer. + */ + __host__ void factorize(Solver solver) + { + if (solver == Solver::CHOL) { + if constexpr (std::is_same_v) { + CUSOLVER_ERROR(cusolverSpScsrcholFactor(m_cusolver_sphandle, m_num_rows, m_nnz, m_descr, @@ -878,12 +1085,11 @@ struct SparseMatrix m_d_solver_row_ptr, m_d_solver_col_idx, m_chol_info, - &m_internalDataInBytes, - &m_workspaceInBytes)); - } + m_solver_buffer)); + } - if constexpr (std::is_same_v) { - CUSOLVER_ERROR(cusolverSpCcsrcholBufferInfo(m_cusolver_sphandle, + if constexpr (std::is_same_v) { + CUSOLVER_ERROR(cusolverSpCcsrcholFactor(m_cusolver_sphandle, m_num_rows, m_nnz, m_descr, @@ -891,12 +1097,10 @@ struct SparseMatrix m_d_solver_row_ptr, m_d_solver_col_idx, m_chol_info, - &m_internalDataInBytes, - &m_workspaceInBytes)); - } - - if constexpr (std::is_same_v) { - CUSOLVER_ERROR(cusolverSpDcsrcholBufferInfo(m_cusolver_sphandle, + m_solver_buffer)); + } + if constexpr (std::is_same_v) { + CUSOLVER_ERROR(cusolverSpDcsrcholFactor(m_cusolver_sphandle, m_num_rows, m_nnz, m_descr, @@ -904,12 +1108,10 @@ struct SparseMatrix m_d_solver_row_ptr, m_d_solver_col_idx, m_chol_info, - &m_internalDataInBytes, - &m_workspaceInBytes)); - } - - if constexpr (std::is_same_v) { - CUSOLVER_ERROR(cusolverSpZcsrcholBufferInfo(m_cusolver_sphandle, + m_solver_buffer)); + } + if constexpr (std::is_same_v) { + CUSOLVER_ERROR(cusolverSpZcsrcholFactor(m_cusolver_sphandle, m_num_rows, m_nnz, m_descr, @@ -917,85 +1119,105 @@ struct SparseMatrix m_d_solver_row_ptr, m_d_solver_col_idx, m_chol_info, - &m_internalDataInBytes, - &m_workspaceInBytes)); - } - - CUDA_ERROR(cudaMalloc((void**)&m_chol_buffer, m_workspaceInBytes)); - } - + m_solver_buffer)); + } + } else if (solver == Solver::QR) { + if constexpr (std::is_same_v) { + CUSOLVER_ERROR(cusolverSpScsrqrFactor(m_cusolver_sphandle, + m_num_rows, + m_num_cols, + m_nnz, + nullptr, + nullptr, + m_qr_info, + m_solver_buffer)); + } - /** - * @brief The lower level api of matrix factorization and save the - * factorization result in to the buffer. - */ - void factorize() - { - if constexpr (std::is_same_v) { - CUSOLVER_ERROR(cusolverSpScsrcholFactor(m_cusolver_sphandle, - m_num_rows, - m_nnz, - m_descr, - m_d_solver_val, - m_d_solver_row_ptr, - m_d_solver_col_idx, - m_chol_info, - m_chol_buffer)); - } + if constexpr (std::is_same_v) { + CUSOLVER_ERROR(cusolverSpCcsrqrFactor(m_cusolver_sphandle, + m_num_rows, + m_num_cols, + m_nnz, + nullptr, + nullptr, + m_qr_info, + m_solver_buffer)); + } + if constexpr (std::is_same_v) { + CUSOLVER_ERROR(cusolverSpDcsrqrFactor(m_cusolver_sphandle, + m_num_rows, + m_num_cols, + m_nnz, + nullptr, + nullptr, + m_qr_info, + m_solver_buffer)); + } + if constexpr (std::is_same_v) { + CUSOLVER_ERROR(cusolverSpZcsrqrFactor(m_cusolver_sphandle, + m_num_rows, + m_num_cols, + m_nnz, + nullptr, + nullptr, + m_qr_info, + m_solver_buffer)); + } - if constexpr (std::is_same_v) { - CUSOLVER_ERROR(cusolverSpCcsrcholFactor(m_cusolver_sphandle, - m_num_rows, - m_nnz, - m_descr, - m_d_solver_val, - m_d_solver_row_ptr, - m_d_solver_col_idx, - m_chol_info, - m_chol_buffer)); - } - if constexpr (std::is_same_v) { - CUSOLVER_ERROR(cusolverSpDcsrcholFactor(m_cusolver_sphandle, - m_num_rows, - m_nnz, - m_descr, - m_d_solver_val, - m_d_solver_row_ptr, - m_d_solver_col_idx, - m_chol_info, - m_chol_buffer)); - } - if constexpr (std::is_same_v) { - CUSOLVER_ERROR(cusolverSpZcsrcholFactor(m_cusolver_sphandle, - m_num_rows, - m_nnz, - m_descr, - m_d_solver_val, - m_d_solver_row_ptr, - m_d_solver_col_idx, - m_chol_info, - m_chol_buffer)); + } else { + RXMESH_ERROR( + "SparseMatrix::factorize() incompatible solver with factorize " + "method"); + return; } double tol = 1.0e-8; int singularity; - if constexpr (std::is_same_v) { - CUSOLVER_ERROR(cusolverSpScsrcholZeroPivot( - m_cusolver_sphandle, m_chol_info, tol, &singularity)); - } - if constexpr (std::is_same_v) { - CUSOLVER_ERROR(cusolverSpCcsrcholZeroPivot( - m_cusolver_sphandle, m_chol_info, tol, &singularity)); - } - if constexpr (std::is_same_v) { - CUSOLVER_ERROR(cusolverSpDcsrcholZeroPivot( - m_cusolver_sphandle, m_chol_info, tol, &singularity)); - } - if constexpr (std::is_same_v) { - CUSOLVER_ERROR(cusolverSpZcsrcholZeroPivot( - m_cusolver_sphandle, m_chol_info, tol, &singularity)); + if (solver == Solver::CHOL) { + if constexpr (std::is_same_v) { + CUSOLVER_ERROR(cusolverSpScsrcholZeroPivot( + m_cusolver_sphandle, m_chol_info, tol, &singularity)); + } + if constexpr (std::is_same_v) { + CUSOLVER_ERROR(cusolverSpCcsrcholZeroPivot( + m_cusolver_sphandle, m_chol_info, tol, &singularity)); + } + if constexpr (std::is_same_v) { + CUSOLVER_ERROR(cusolverSpDcsrcholZeroPivot( + m_cusolver_sphandle, m_chol_info, tol, &singularity)); + } + if constexpr (std::is_same_v) { + CUSOLVER_ERROR(cusolverSpZcsrcholZeroPivot( + m_cusolver_sphandle, m_chol_info, tol, &singularity)); + } + } else if (solver == Solver::QR) { + + if constexpr (std::is_same_v) { + CUSOLVER_ERROR(cusolverSpScsrqrZeroPivot( + m_cusolver_sphandle, m_qr_info, tol, &singularity)); + } + if constexpr (std::is_same_v) { + CUSOLVER_ERROR(cusolverSpCcsrqrZeroPivot( + m_cusolver_sphandle, m_qr_info, tol, &singularity)); + } + if constexpr (std::is_same_v) { + CUSOLVER_ERROR(cusolverSpDcsrqrZeroPivot( + m_cusolver_sphandle, m_qr_info, tol, &singularity)); + } + if constexpr (std::is_same_v) { + CUSOLVER_ERROR(cusolverSpZcsrqrZeroPivot( + m_cusolver_sphandle, m_qr_info, tol, &singularity)); + } + + + } else { + RXMESH_ERROR( + "SparseMatrix::factorize() incompatible solver with factorize " + "method"); + return; } + if (0 <= singularity) { RXMESH_WARN( "SparseMatrix::factorize() The matrix is singular at row {} " @@ -1010,13 +1232,22 @@ struct SparseMatrix * sparse matrix before calling the solve() method below. After calling this * pre_solve(), solver() can be called with multiple right hand sides */ - void pre_solve(PermuteMethod reorder = PermuteMethod::NSTDIS) + __host__ void pre_solve(Solver solver, + PermuteMethod reorder = PermuteMethod::NSTDIS) { - permute_alloc(PermuteMethod::NSTDIS); - permute(PermuteMethod::NSTDIS); - analyze_pattern(); - post_analyze_alloc(); - factorize(); + if (solver != Solver::CHOL && solver != Solver::QR) { + RXMESH_WARN( + "SparseMatrix::pre_solve() the low-level API only works for " + "Cholesky and QR solvers"); + return; + } + m_current_solver = solver; + + permute_alloc(reorder); + permute(reorder); + analyze_pattern(solver); + post_analyze_alloc(solver); + factorize(solver); } /** @@ -1027,9 +1258,9 @@ struct SparseMatrix * @param B_mat: right hand side * @param X_mat: output solution */ - void solve(DenseMatrix& B_mat, - DenseMatrix& X_mat, - cudaStream_t stream = NULL) + __host__ void solve(DenseMatrix& B_mat, + DenseMatrix& X_mat, + cudaStream_t stream = NULL) { CUSOLVER_ERROR(cusolverSpSetStream(m_cusolver_sphandle, stream)); for (int i = 0; i < B_mat.cols(); ++i) { @@ -1045,7 +1276,7 @@ struct SparseMatrix * @param d_b: right hand side * @param d_x: output solution */ - void solve(T* d_b, T* d_x) + __host__ void solve(T* d_b, T* d_x) { T* d_solver_b; T* d_solver_x; @@ -1061,39 +1292,89 @@ struct SparseMatrix d_solver_x = d_x; } - if constexpr (std::is_same_v) { - CUSOLVER_ERROR(cusolverSpScsrcholSolve(m_cusolver_sphandle, - m_num_rows, - d_solver_b, - d_solver_x, - m_chol_info, - m_chol_buffer)); - } + if (m_current_solver == Solver::CHOL) { - if constexpr (std::is_same_v) { - CUSOLVER_ERROR(cusolverSpCcsrcholSolve(m_cusolver_sphandle, - m_num_rows, - d_solver_b, - d_solver_x, - m_chol_info, - m_chol_buffer)); - } + if constexpr (std::is_same_v) { + CUSOLVER_ERROR(cusolverSpScsrcholSolve(m_cusolver_sphandle, + m_num_rows, + d_solver_b, + d_solver_x, + m_chol_info, + m_solver_buffer)); + } - if constexpr (std::is_same_v) { - CUSOLVER_ERROR(cusolverSpDcsrcholSolve(m_cusolver_sphandle, - m_num_rows, - d_solver_b, - d_solver_x, - m_chol_info, - m_chol_buffer)); - } - if constexpr (std::is_same_v) { - CUSOLVER_ERROR(cusolverSpZcsrcholSolve(m_cusolver_sphandle, - m_num_rows, - d_solver_b, - d_solver_x, - m_chol_info, - m_chol_buffer)); + if constexpr (std::is_same_v) { + CUSOLVER_ERROR(cusolverSpCcsrcholSolve(m_cusolver_sphandle, + m_num_rows, + d_solver_b, + d_solver_x, + m_chol_info, + m_solver_buffer)); + } + + if constexpr (std::is_same_v) { + CUSOLVER_ERROR(cusolverSpDcsrcholSolve(m_cusolver_sphandle, + m_num_rows, + d_solver_b, + d_solver_x, + m_chol_info, + m_solver_buffer)); + } + if constexpr (std::is_same_v) { + CUSOLVER_ERROR(cusolverSpZcsrcholSolve(m_cusolver_sphandle, + m_num_rows, + d_solver_b, + d_solver_x, + m_chol_info, + m_solver_buffer)); + } + } else if (m_current_solver == Solver::QR) { + + if constexpr (std::is_same_v) { + CUSOLVER_ERROR(cusolverSpScsrqrSolve(m_cusolver_sphandle, + m_num_rows, + m_num_cols, + d_solver_b, + d_solver_x, + m_qr_info, + m_solver_buffer)); + } + + if constexpr (std::is_same_v) { + CUSOLVER_ERROR(cusolverSpCcsrqrSolve(m_cusolver_sphandle, + m_num_rows, + m_num_cols, + d_solver_b, + d_solver_x, + m_qr_info, + m_solver_buffer)); + } + + if constexpr (std::is_same_v) { + CUSOLVER_ERROR(cusolverSpDcsrqrSolve(m_cusolver_sphandle, + m_num_rows, + m_num_cols, + d_solver_b, + d_solver_x, + m_qr_info, + m_solver_buffer)); + } + if constexpr (std::is_same_v) { + CUSOLVER_ERROR(cusolverSpZcsrqrSolve(m_cusolver_sphandle, + m_num_rows, + m_num_cols, + d_solver_b, + d_solver_x, + m_qr_info, + m_solver_buffer)); + } + + + } else { + RXMESH_ERROR( + "SparseMatrix::solve() the low-level API only works for " + "Cholesky and QR solvers"); + return; } if (m_use_reorder) { @@ -1103,7 +1384,7 @@ struct SparseMatrix private: - void release(locationT location) + __host__ void release(locationT location) { if (((location & HOST) == HOST) && ((m_allocated & HOST) == HOST)) { free(m_h_val); @@ -1124,7 +1405,7 @@ struct SparseMatrix } } - void allocate(locationT location) + __host__ void allocate(locationT location) { if ((location & HOST) == HOST) { release(HOST); @@ -1154,12 +1435,12 @@ struct SparseMatrix * @brief wrapper for cuSolver API for solving linear systems using cuSolver * High-level API */ - void cusparse_linear_solver_wrapper(const Solver solver, - const PermuteMethod reorder, - cusolverSpHandle_t handle, - const T* d_b, - T* d_x, - cudaStream_t stream) + __host__ void cusparse_linear_solver_wrapper(const Solver solver, + const PermuteMethod reorder, + cusolverSpHandle_t handle, + const T* d_b, + T* d_x, + cudaStream_t stream) { CUSOLVER_ERROR(cusolverSpSetStream(handle, stream)); @@ -1290,9 +1571,10 @@ struct SparseMatrix &singularity)); } } else if (solver == Solver::LU) { - RXMESH_ERROR( - "SparseMatrix: LU Solver is run on the host. Make sure your " - "data resides on the host before calling the solver"); + RXMESH_WARN( + "SparseMatrix::cusparse_linear_solver_wrapper() LU Solver is " + "run on the host. Make sure your data resides on the host " + "before calling the solver"); if constexpr (std::is_same_v) { CUSOLVER_ERROR(cusolverSpScsrlsvluHost(handle, @@ -1386,7 +1668,7 @@ struct SparseMatrix } } - void permute_scatter(IndexT* d_p, T* d_in, T* d_out, IndexT size) + __host__ void permute_scatter(IndexT* d_p, T* d_in, T* d_out, IndexT size) { // d_out[d_p[i]] = d_in[i] thrust::device_ptr t_p(d_p); @@ -1396,7 +1678,7 @@ struct SparseMatrix thrust::scatter(thrust::device, t_i, t_i + size, t_p, t_o); } - void permute_gather(IndexT* d_p, T* d_in, T* d_out, IndexT size) + __host__ void permute_gather(IndexT* d_p, T* d_in, T* d_out, IndexT size) { // d_out[i] = d_in[d_p[i]] thrust::device_ptr t_p(d_p); @@ -1434,20 +1716,24 @@ struct SparseMatrix csrcholInfo_t m_chol_info; size_t m_internalDataInBytes; size_t m_workspaceInBytes; - void* m_chol_buffer; + void* m_solver_buffer; + csrqrInfo_t m_qr_info; // purmutation array IndexT* m_h_permute; IndexT* m_d_permute; // CSR matrix for solving only - // equal to the original matrix if not permutated - // only allocated as a new CSR matrix if permutated + // equal to the original matrix if not permuted + // only allocated as a new CSR matrix if permuted bool m_reorder_allocated; IndexT* m_d_solver_row_ptr; IndexT* m_d_solver_col_idx; T* m_d_solver_val; + // caching user's solver that is used in pre_solve + Solver m_current_solver; + IndexT* m_h_solver_row_ptr; IndexT* m_h_solver_col_idx; diff --git a/tests/RXMesh_test/test_sparse_matrix.cuh b/tests/RXMesh_test/test_sparse_matrix.cuh index eb79652e..d87d69e6 100644 --- a/tests/RXMesh_test/test_sparse_matrix.cuh +++ b/tests/RXMesh_test/test_sparse_matrix.cuh @@ -356,12 +356,7 @@ TEST(RXMeshStatic, SparseMatrixLowerLevelAPISolve) rx.get_context(), *coords, A_mat, X_mat, B_mat, time_step); // A_mat.solve(B_mat, X_mat, Solver::CHOL, PermuteMethod::NSTDIS); - - A_mat.permute_alloc(PermuteMethod::NSTDIS); - A_mat.permute(PermuteMethod::NSTDIS); - A_mat.analyze_pattern(); - A_mat.post_analyze_alloc(); - A_mat.factorize(); + A_mat.pre_solve(Solver::CHOL, PermuteMethod::NSTDIS); A_mat.solve(B_mat, X_mat); A_mat.multiply(X_mat, ret_mat); From 6e423571497b00ab38deb2c79048ef248364b6d9 Mon Sep 17 00:00:00 2001 From: ahmed Date: Thu, 8 Aug 2024 13:59:26 -0400 Subject: [PATCH 91/96] arap on the dragon with qr pre_solve --- apps/ARAP/arap.cu | 548 +++++++++++++++++++--------------------------- 1 file changed, 230 insertions(+), 318 deletions(-) diff --git a/apps/ARAP/arap.cu b/apps/ARAP/arap.cu index 0cc69f6d..5b65264b 100644 --- a/apps/ARAP/arap.cu +++ b/apps/ARAP/arap.cu @@ -11,103 +11,69 @@ using namespace rxmesh; -template -__device__ __forceinline__ T -edge_cotan_weight(const rxmesh::VertexHandle& p_id, - const rxmesh::VertexHandle& r_id, - const rxmesh::VertexHandle& q_id, - const rxmesh::VertexHandle& s_id, - const rxmesh::VertexAttribute& X) -{ - // Get the edge weight between the two vertices p-r where - // q and s composes the diamond around p-r - - const vec3 p(X(p_id, 0), X(p_id, 1), X(p_id, 2)); - const vec3 r(X(r_id, 0), X(r_id, 1), X(r_id, 2)); - const vec3 q(X(q_id, 0), X(q_id, 1), X(q_id, 2)); - const vec3 s(X(s_id, 0), X(s_id, 1), X(s_id, 2)); - - //cotans[(v1, v2)] =np.dot(e1, e2) / np.linalg.norm(np.cross(e1, e2)) - - float weight = 0; - if (q_id.is_valid()) - weight += dot((p - q), (r - q)) / length(cross(p - q, r - q)); - if (s_id.is_valid()) - weight += dot((p - s), (r - s)) / length(cross(p - s, r - s)); - weight /= 2; - return std::max(0.f, weight); -} - -template -__global__ static void compute_edge_weights_evd(const rxmesh::Context context, - rxmesh::VertexAttribute coords, - rxmesh::SparseMatrix A_mat) -{ - - auto vn_lambda = [&](EdgeHandle edge_id, VertexIterator& vv) { - T e_weight = 0; - e_weight = edge_cotan_weight(vv[0], vv[2], vv[1], vv[3], coords); - A_mat(vv[0], vv[2]) = e_weight; - A_mat(vv[2], vv[0]) = e_weight; - - }; - - auto block = cooperative_groups::this_thread_block(); - Query query(context); - ShmemAllocator shrd_alloc; - query.dispatch(block, shrd_alloc, vn_lambda); -} - template -__global__ static void edge_weight_values( +__global__ static void calc_edge_weights_mat( const rxmesh::Context context, - rxmesh::EdgeAttribute edge_weights, - rxmesh::SparseMatrix A_mat) + rxmesh::VertexAttribute coords, + rxmesh::SparseMatrix edge_weights) { - auto vn_lambda = [&](EdgeHandle edge_id, VertexIterator& ev) { - edge_weights(edge_id, 0) = A_mat(ev[0], ev[1]); - }; + auto calc_weights = [&](EdgeHandle edge_id, VertexIterator& vv) { + // the edge goes from p-r while the q and s are the opposite vertices + const rxmesh::VertexHandle p_id = vv[0]; + const rxmesh::VertexHandle r_id = vv[2]; + const rxmesh::VertexHandle q_id = vv[1]; + const rxmesh::VertexHandle s_id = vv[3]; - auto block = cooperative_groups::this_thread_block(); - Query query(context); - ShmemAllocator shrd_alloc; - query.dispatch(block, shrd_alloc, vn_lambda); -} + if (!p_id.is_valid() || !r_id.is_valid() || !q_id.is_valid() || + !s_id.is_valid()) { + return; + } -__host__ __device__ Eigen::Matrix3f calculateSVD(Eigen::Matrix3f S) -{ - Eigen::JacobiSVD svd(S); + const vec3 p(coords(p_id, 0), coords(p_id, 1), coords(p_id, 2)); + const vec3 r(coords(r_id, 0), coords(r_id, 1), coords(r_id, 2)); + const vec3 q(coords(q_id, 0), coords(q_id, 1), coords(q_id, 2)); + const vec3 s(coords(s_id, 0), coords(s_id, 1), coords(s_id, 2)); + // cotans[(v1, v2)] =np.dot(e1, e2) / np.linalg.norm(np.cross(e1, e2)) - Eigen::MatrixXf V = svd.matrixV(); - Eigen::MatrixXf U = svd.matrixU().eval(); + T weight = 0; + // if (q_id.is_valid()) + weight += dot((p - q), (r - q)) / length(cross(p - q, r - q)); - float smallest_singular_value = svd.singularValues().minCoeff(); + // if (s_id.is_valid()) + weight += dot((p - s), (r - s)) / length(cross(p - s, r - s)); - U.col(smallest_singular_value) = U.col(smallest_singular_value) * -1; + weight /= 2; + weight = std::max(0.f, weight); - Eigen::MatrixXf R = V * U; + edge_weights(p_id, r_id) = weight; + edge_weights(r_id, p_id) = weight; + }; - return R; + auto block = cooperative_groups::this_thread_block(); + Query query(context); + ShmemAllocator shrd_alloc; + query.dispatch(block, shrd_alloc, calc_weights); } + template -__global__ static void calculate_rotation_matrix(const rxmesh::Context context, - rxmesh::VertexAttribute ref_coords, - rxmesh::VertexAttribute current_coords, - rxmesh::VertexAttribute rotationVector, - rxmesh::SparseMatrix weight_mat) +__global__ static void calculate_rotation_matrix( + const rxmesh::Context context, + rxmesh::VertexAttribute ref_coords, + rxmesh::VertexAttribute current_coords, + rxmesh::VertexAttribute rotationVector, + rxmesh::SparseMatrix weight_mat) { - auto vn_lambda = [&](VertexHandle v_id, VertexIterator& vv) - { - Eigen::Matrix3f S=Eigen::Matrix3f::Zero(); + auto vn_lambda = [&](VertexHandle v_id, VertexIterator& vv) { + Eigen::Matrix3f S = Eigen::Matrix3f::Zero(); - for (int j=0;j pi_vector = { + Eigen::Vector pi_vector = { ref_coords(v_id, 0) - ref_coords(vv[j], 0), ref_coords(v_id, 1) - ref_coords(vv[j], 1), ref_coords(v_id, 2) - ref_coords(vv[j], 2)}; @@ -139,13 +105,13 @@ __global__ static void calculate_rotation_matrix(const rxmesh::Context contex } // Matrix R to vector attribute R - - for (int i = 0; i < 3; i++) + + for (int i = 0; i < 3; i++) for (int j = 0; j < 3; j++) rotationVector(v_id, i * 3 + j) = R(i, j); - //Apply rotation - + // Apply rotation + Eigen::Vector3 new_coords = {current_coords(v_id, 0), current_coords(v_id, 1), current_coords(v_id, 2)}; @@ -157,164 +123,96 @@ __global__ static void calculate_rotation_matrix(const rxmesh::Context contex query.dispatch(block, shrd_alloc, vn_lambda); } -template -__global__ static void test_input(const rxmesh::Context context, - rxmesh::VertexAttribute ref_coords, - rxmesh::VertexAttribute current_coords, - rxmesh::VertexAttribute constrained, - Eigen::Vector3f displacement - ) -{ - - //above a specific z up value, shift x by 1 - auto vn_lambda = [&](VertexHandle v_id, VertexIterator& vv) { - - current_coords(v_id, 0) = ref_coords(v_id, 0); - current_coords(v_id, 1) = ref_coords(v_id, 1); - current_coords(v_id, 2) = ref_coords(v_id, 2); - - //Spot specific input - - if (current_coords(v_id, 0) < -0.15) - { - constrained(v_id, 0) = 2; - } - else if (current_coords(v_id, 1) < -0.65) { - current_coords(v_id, 0) = - current_coords(v_id, 0) + displacement.x(); - current_coords(v_id, 2) = - current_coords(v_id, 2) + displacement.z(); - constrained(v_id, 0) = 1; - } - else { - if (current_coords(v_id, 0) < -0.2) { - constrained(v_id, 0) = 0; - } else - constrained(v_id, 0) = 0; - } - - if (current_coords(v_id,1)>-0.15) { - constrained(v_id, 0) = 2; - } - - }; - - - auto block = cooperative_groups::this_thread_block(); - Query query(context); - ShmemAllocator shrd_alloc; - query.dispatch(block, shrd_alloc, vn_lambda); - -} - -/* compute all entries of bMatrix parallely */ template __global__ static void calculate_b( const rxmesh::Context context, rxmesh::VertexAttribute original_coords, // [num_coord, 3] - rxmesh::VertexAttribute changed_coords, // [num_coord, 3] + rxmesh::VertexAttribute changed_coords, // [num_coord, 3] rxmesh::VertexAttribute rot_mat, // [num_coord, 9] rxmesh::SparseMatrix weight_mat, // [num_coord, num_coord] - rxmesh::DenseMatrix bMatrix, // [num_coord, 3] - rxmesh::VertexAttribute constrained) + rxmesh::DenseMatrix bMatrix, // [num_coord, 3] + rxmesh::VertexAttribute constraints) { auto init_lambda = [&](VertexHandle v_id, VertexIterator& vv) { - // variable to store ith entry of bMatrix Eigen::Vector3f bi(0.0f, 0.0f, 0.0f); // get rotation matrix for ith vertex Eigen::Matrix3f Ri = Eigen::Matrix3f::Zero(3, 3); - for (int i = 0; i < 3; i++) - { + for (int i = 0; i < 3; i++) { for (int j = 0; j < 3; j++) Ri(i, j) = rot_mat(v_id, i * 3 + j); } - for (int nei_index = 0; nei_index < vv.size(); nei_index++) - { + for (int nei_index = 0; nei_index < vv.size(); nei_index++) { // get rotation matrix for neightbor j Eigen::Matrix3f Rj = Eigen::Matrix3f::Zero(3, 3); - for (int i = 0; i < 3; i++) + for (int i = 0; i < 3; i++) for (int j = 0; j < 3; j++) Rj(i, j) = rot_mat(vv[nei_index], i * 3 + j); - + // find rotation addition Eigen::Matrix3f rot_add = Ri + Rj; // find coord difference - Eigen::Vector3f vert_diff = - { + Eigen::Vector3f vert_diff = { original_coords(v_id, 0) - original_coords(vv[nei_index], 0), original_coords(v_id, 1) - original_coords(vv[nei_index], 1), - original_coords(v_id, 2) - original_coords(vv[nei_index], 2) - }; + original_coords(v_id, 2) - original_coords(vv[nei_index], 2)}; // update bi - bi = bi + 0.5 * weight_mat(v_id, vv[nei_index]) * rot_add * vert_diff; + bi = bi + + 0.5 * weight_mat(v_id, vv[nei_index]) * rot_add * vert_diff; } - if (constrained(v_id, 0) == 0) { + if (constraints(v_id, 0) == 0) { bMatrix(v_id, 0) = bi[0]; bMatrix(v_id, 1) = bi[1]; bMatrix(v_id, 2) = bi[2]; - } - else - { - bMatrix(v_id, 0) = changed_coords(v_id,0); + } else { + bMatrix(v_id, 0) = changed_coords(v_id, 0); bMatrix(v_id, 1) = changed_coords(v_id, 1); bMatrix(v_id, 2) = changed_coords(v_id, 2); } }; - auto block = cooperative_groups::this_thread_block(); + auto block = cooperative_groups::this_thread_block(); Query query(context); ShmemAllocator shrd_alloc; query.dispatch(block, shrd_alloc, init_lambda); } -/* compute system matrix rows parallely (L from eq9) */ + template __global__ static void calculate_system_matrix( - const rxmesh::Context context, - rxmesh::SparseMatrix weight_mat, - rxmesh::SparseMatrix L, - rxmesh::VertexAttribute constrained) + const rxmesh::Context context, + rxmesh::SparseMatrix weight_mat, + rxmesh::SparseMatrix sys_mat, + rxmesh::VertexAttribute constraints) { - auto init_lambda = [&](VertexHandle v_id, VertexIterator& vv) { - - L(v_id, v_id) = 0; - for (int nei_index = 0; nei_index < vv.size(); nei_index++) - L(v_id, vv[nei_index]) = 0; - - if (constrained(v_id, 0)==0) - { - for (int nei_index = 0; nei_index < vv.size(); nei_index++) - { - L(v_id, v_id) += weight_mat(v_id, vv[nei_index]); - L(v_id, vv[nei_index]) -= weight_mat(v_id, vv[nei_index]); + auto calc_mat = [&](VertexHandle v_id, VertexIterator& vv) { + if (constraints(v_id, 0) == 0) { + for (int i = 0; i < vv.size(); i++) { + sys_mat(v_id, v_id) += weight_mat(v_id, vv[i]); + sys_mat(v_id, vv[i]) -= weight_mat(v_id, vv[i]); } - } - else - { - for (int nei_index = 0; nei_index < vv.size(); nei_index++) - { - L(v_id, vv[nei_index]) = 0; + } else { + for (int i = 0; i < vv.size(); i++) { + sys_mat(v_id, vv[i]) = 0; } - L(v_id, v_id) = 1; + sys_mat(v_id, v_id) = 1; } }; auto block = cooperative_groups::this_thread_block(); Query query(context); ShmemAllocator shrd_alloc; - query.dispatch(block, shrd_alloc, init_lambda); + query.dispatch(block, shrd_alloc, calc_mat); } - int main(int argc, char** argv) { Log::init(); @@ -322,167 +220,181 @@ int main(int argc, char** argv) const uint32_t device_id = 0; cuda_query(device_id); - RXMeshStatic rx(STRINGIFY(INPUT_DIR) "spot_low_resolution.obj"); + RXMeshStatic rx(STRINGIFY(INPUT_DIR) "dragon.obj"); - Eigen::Vector3f displacement(1, 1, 1); - - - auto ref_vertex_pos = - *rx.get_input_vertex_coordinates(); // stays same across computation - auto changed_vertex_pos = - rx.add_vertex_attribute("P", 3); // changes per iteration + if (!rx.is_closed()) { + RXMESH_ERROR("Input mesh should be closed without boundaries"); + return EXIT_FAILURE; + } + polyscope::view::upDir = polyscope::UpDir::ZUp; + constexpr uint32_t CUDABlockSize = 256; + // stays same across computation + auto ref_vertex_pos = *rx.get_input_vertex_coordinates(); - //input - auto constraints = rx.add_vertex_attribute("FixedVertices", 1); - constexpr uint32_t CUDABlockSize = 256; - rxmesh::LaunchBox input_launch_box; - rx.prepare_launch_box( - {rxmesh::Op::VV}, - input_launch_box, - (void*)test_input); + // deformed vertex position that change every iteration + auto deformed_vertex_pos = *rx.add_vertex_attribute("deformedV", 3); + deformed_vertex_pos.copy_from(ref_vertex_pos, DEVICE, DEVICE); - test_input<<>>( - rx.get_context(), ref_vertex_pos, *changed_vertex_pos, *constraints, displacement); + // deformed vertex position as a matrix (used in the solver) + std::shared_ptr> deformed_vertex_pos_mat = + deformed_vertex_pos.to_matrix(); - changed_vertex_pos->move(DEVICE, HOST); - constraints->move(DEVICE, HOST); - rx.get_polyscope_mesh()->updateVertexPositions(*changed_vertex_pos); - //process + // vertex constraints where + // 0 means free + // 1 means user-displaced + // 2 means fixed + auto constraints = *rx.add_vertex_attribute("FixedVertices", 1); - - //compute weights + // compute weights auto weights = rx.add_edge_attribute("edgeWeights", 1); SparseMatrix weight_matrix(rx); + weight_matrix.set_value(0.f); + + // system matrix + SparseMatrix sys_mat(rx); + sys_mat.set_value(0.f); - //obtain cotangent weight matrix - rxmesh::LaunchBox launch_box; + // rotation matrix as a very attribute where every vertex has 3x3 matrix + auto rot_mat = *rx.add_vertex_attribute("RotationMatrix", 9); + + // b-matrix + DenseMatrix b_mat(rx, rx.get_num_vertices(), 3); + b_mat.set_value(0.f); + + // obtain cotangent weight matrix + rxmesh::LaunchBox lb; rx.prepare_launch_box({rxmesh::Op::EVDiamond}, - launch_box, - (void*)compute_edge_weights_evd); + lb, + (void*)calc_edge_weights_mat); + + calc_edge_weights_mat + <<>>( + rx.get_context(), ref_vertex_pos, weight_matrix); + + // set constraints + const vec3 sphere_center(0.1818329, -0.99023, 0.325066); + rx.for_each_vertex(DEVICE, [=] __device__(const VertexHandle& vh) { + const vec3 p(deformed_vertex_pos(vh, 0), + deformed_vertex_pos(vh, 1), + deformed_vertex_pos(vh, 2)); + + // fix the bottom + if (p[2] < -0.63) { + constraints(vh) = 2; + } - compute_edge_weights_evd - <<>>( - rx.get_context(), ref_vertex_pos, weight_matrix); + // move the jaw + if (glm::distance(p, sphere_center) < 0.1) { + constraints(vh) = 1; + } + }); - // Calculate System - SparseMatrix systemMatrix(rx); - // call function to calculate L Matrix entries parallely - rxmesh::LaunchBox launch_box_L; - rx.prepare_launch_box( - {rxmesh::Op::VV}, - launch_box_L, - (void*)calculate_system_matrix); + // move constraints to the host and add it to Polyscope + constraints.move(DEVICE, HOST); - calculate_system_matrix - <<>>( - rx.get_context(), weight_matrix, systemMatrix, *constraints); +#if USE_POLYSCOPE + rx.get_polyscope_mesh()->addVertexScalarQuantity("constraintsV", + constraints); +#endif - systemMatrix.move(DEVICE, HOST); + // Calculate system matrix + rx.prepare_launch_box({rxmesh::Op::VV}, + lb, + (void*)calculate_system_matrix); + calculate_system_matrix + <<>>( + rx.get_context(), weight_matrix, sys_mat, constraints); - auto rot_mat = *rx.add_vertex_attribute("RotationMatrix", 9); - rxmesh::LaunchBox rotation_launch_box; + // pre_solve sys_mat + sys_mat.pre_solve(Solver::QR, PermuteMethod::NSTDIS); + + // launch box for rotation matrix calculation + rxmesh::LaunchBox lb_rot; rx.prepare_launch_box( - {rxmesh::Op::VV}, - rotation_launch_box, - (void*)calculate_rotation_matrix); + {rxmesh::Op::VV}, + lb_rot, + (void*)calculate_rotation_matrix); - DenseMatrix bMatrix(rx, rx.get_num_vertices(), 3); - // call function to calculate bMatrix entries parallely - rxmesh::LaunchBox launch_box_bMatrix; - rx.prepare_launch_box({rxmesh::Op::VV}, - launch_box_bMatrix, - (void*)calculate_b); - - - - //DenseMatrix X_mat(rx, rx.get_num_vertices(), 3); - std::shared_ptr> X_mat = changed_vertex_pos->to_matrix(); - - - //how many times will arap algorithm run? - int iterations = 1; - - rx.get_polyscope_mesh()->addVertexScalarQuantity("fixedVertices", - *constraints); - - float t = 0; - int flag = 0; - Eigen::Vector3f start = Eigen::Vector3f(-0.5, 0.0, 0.0); - Eigen::Vector3f end = Eigen::Vector3f(0.5, 0.0, 0.0); - - auto polyscope_callback = [&]() mutable { - //input step - - if (flag == 1) { - t -= 0.03; - if (t < 0) - flag = 0; - } else if (flag == 0) { - t += 0.03; - if (t > 1.0) - flag = 1; - } - displacement = (1 - t) * start + (t) * end; - - test_input - <<>>(rx.get_context(), - ref_vertex_pos, - *changed_vertex_pos, - *constraints, - displacement); - - //process step - for (int i = 0; i < iterations; i++) { - // rotation part - // calculate rotation matrix - calculate_rotation_matrix - <<>>(rx.get_context(), - ref_vertex_pos, - *changed_vertex_pos, - rot_mat, - weight_matrix); - calculate_b - <<>>(rx.get_context(), - ref_vertex_pos, - *changed_vertex_pos, - rot_mat, - weight_matrix, - bMatrix, - *constraints); - - X_mat = changed_vertex_pos->to_matrix(); - systemMatrix.solve(bMatrix, *X_mat, Solver::QR, PermuteMethod::NSTDIS); - - } - X_mat->move(DEVICE, HOST); - changed_vertex_pos->from_matrix(X_mat.get()); - - //update step - #if USE_POLYSCOPE - //x->move(DEVICE, HOST); - rx.get_polyscope_mesh()->updateVertexPositions(*changed_vertex_pos); - #endif - - }; - + // launch box for b matrix calculation + rxmesh::LaunchBox lb_b_mat; + rx.prepare_launch_box( + {rxmesh::Op::VV}, lb_b_mat, (void*)calculate_b); + + + // how many times will arap algorithm run? + int iterations = 1; + + float t = 0; + bool flag = false; + vec3 start(0.0f, 0.2f, 0.0f); + vec3 end(0.0f, -0.2f, 0.0f); + vec3 displacement(0.0f, 0.0f, 0.0f); + + auto polyscope_callback = [&]() mutable { + t += flag ? -0.5f : 0.5f; + + flag = (t < 0 || t > 1.0f) ? !flag : flag; + + displacement = (1 - t) * start + (t)*end; + + // apply user deformation + rx.for_each_vertex(DEVICE, [=] __device__(const VertexHandle& vh) { + if (vh.local_id() == 0 && vh.patch_id() == 0) { + printf("\n Disp = %f, %f, %f", + displacement[0], + displacement[1], + displacement[2]); + } + if (constraints(vh) == 1) { + deformed_vertex_pos(vh, 0) += displacement[0]; + deformed_vertex_pos(vh, 1) += displacement[1]; + deformed_vertex_pos(vh, 2) += displacement[2]; + } + }); + + + // process step + for (int i = 0; i < iterations; i++) { + // solver for rotation + calculate_rotation_matrix + <<>>( + rx.get_context(), + ref_vertex_pos, + deformed_vertex_pos, + rot_mat, + weight_matrix); + + // solve for position + calculate_b + <<>>(rx.get_context(), + ref_vertex_pos, + deformed_vertex_pos, + rot_mat, + weight_matrix, + b_mat, + constraints); + + sys_mat.solve(b_mat, *deformed_vertex_pos_mat); + } + + // move mat to the host + deformed_vertex_pos_mat->move(DEVICE, HOST); + deformed_vertex_pos.from_matrix(deformed_vertex_pos_mat.get()); + + +#if USE_POLYSCOPE + rx.get_polyscope_mesh()->updateVertexPositions(deformed_vertex_pos); +#endif + }; + #if USE_POLYSCOPE polyscope::state::userCallback = polyscope_callback; From d93a4c343c582ad1e2ec948653a8c0534cc9be26 Mon Sep 17 00:00:00 2001 From: ahmed Date: Thu, 8 Aug 2024 14:00:27 -0400 Subject: [PATCH 92/96] remove printf --- apps/ARAP/arap.cu | 6 ------ 1 file changed, 6 deletions(-) diff --git a/apps/ARAP/arap.cu b/apps/ARAP/arap.cu index 5b65264b..13f803b2 100644 --- a/apps/ARAP/arap.cu +++ b/apps/ARAP/arap.cu @@ -345,12 +345,6 @@ int main(int argc, char** argv) // apply user deformation rx.for_each_vertex(DEVICE, [=] __device__(const VertexHandle& vh) { - if (vh.local_id() == 0 && vh.patch_id() == 0) { - printf("\n Disp = %f, %f, %f", - displacement[0], - displacement[1], - displacement[2]); - } if (constraints(vh) == 1) { deformed_vertex_pos(vh, 0) += displacement[0]; deformed_vertex_pos(vh, 1) += displacement[1]; From 95d6a98a48e1b0d0c4ee392ed2c097215fda34fb Mon Sep 17 00:00:00 2001 From: ahmed Date: Thu, 8 Aug 2024 14:15:58 -0400 Subject: [PATCH 93/96] renaming variables --- apps/ARAP/arap.cu | 66 +++++++++++++++++++++-------------------------- 1 file changed, 30 insertions(+), 36 deletions(-) diff --git a/apps/ARAP/arap.cu b/apps/ARAP/arap.cu index 13f803b2..cf4cef7e 100644 --- a/apps/ARAP/arap.cu +++ b/apps/ARAP/arap.cu @@ -61,28 +61,28 @@ __global__ static void calc_edge_weights_mat( template __global__ static void calculate_rotation_matrix( const rxmesh::Context context, - rxmesh::VertexAttribute ref_coords, - rxmesh::VertexAttribute current_coords, - rxmesh::VertexAttribute rotationVector, - rxmesh::SparseMatrix weight_mat) + rxmesh::VertexAttribute ref_vertex_pos, + rxmesh::VertexAttribute deformed_vertex_pos, + rxmesh::VertexAttribute rot_mat, + rxmesh::SparseMatrix weight_matrix) { auto vn_lambda = [&](VertexHandle v_id, VertexIterator& vv) { Eigen::Matrix3f S = Eigen::Matrix3f::Zero(); for (int j = 0; j < vv.size(); j++) { - float w = weight_mat(v_id, vv[j]); + float w = weight_matrix(v_id, vv[j]); Eigen::Vector pi_vector = { - ref_coords(v_id, 0) - ref_coords(vv[j], 0), - ref_coords(v_id, 1) - ref_coords(vv[j], 1), - ref_coords(v_id, 2) - ref_coords(vv[j], 2)}; + ref_vertex_pos(v_id, 0) - ref_vertex_pos(vv[j], 0), + ref_vertex_pos(v_id, 1) - ref_vertex_pos(vv[j], 1), + ref_vertex_pos(v_id, 2) - ref_vertex_pos(vv[j], 2)}; Eigen::Vector pi_dash_vector = { - current_coords(v_id, 0) - current_coords(vv[j], 0), - current_coords(v_id, 1) - current_coords(vv[j], 1), - current_coords(v_id, 2) - current_coords(vv[j], 2)}; + deformed_vertex_pos(v_id, 0) - deformed_vertex_pos(vv[j], 0), + deformed_vertex_pos(v_id, 1) - deformed_vertex_pos(vv[j], 1), + deformed_vertex_pos(v_id, 2) - deformed_vertex_pos(vv[j], 2)}; S = S + w * pi_vector * pi_dash_vector.transpose(); } @@ -108,13 +108,7 @@ __global__ static void calculate_rotation_matrix( for (int i = 0; i < 3; i++) for (int j = 0; j < 3; j++) - rotationVector(v_id, i * 3 + j) = R(i, j); - - // Apply rotation - - Eigen::Vector3 new_coords = {current_coords(v_id, 0), - current_coords(v_id, 1), - current_coords(v_id, 2)}; + rot_mat(v_id, i * 3 + j) = R(i, j); }; auto block = cooperative_groups::this_thread_block(); @@ -127,15 +121,15 @@ __global__ static void calculate_rotation_matrix( template __global__ static void calculate_b( const rxmesh::Context context, - rxmesh::VertexAttribute original_coords, // [num_coord, 3] - rxmesh::VertexAttribute changed_coords, // [num_coord, 3] - rxmesh::VertexAttribute rot_mat, // [num_coord, 9] - rxmesh::SparseMatrix weight_mat, // [num_coord, num_coord] - rxmesh::DenseMatrix bMatrix, // [num_coord, 3] + rxmesh::VertexAttribute ref_vertex_pos, + rxmesh::VertexAttribute deformed_vertex_pos, + rxmesh::VertexAttribute rot_mat, + rxmesh::SparseMatrix weight_mat, + rxmesh::DenseMatrix b_mat, rxmesh::VertexAttribute constraints) { auto init_lambda = [&](VertexHandle v_id, VertexIterator& vv) { - // variable to store ith entry of bMatrix + // variable to store ith entry of b_mat Eigen::Vector3f bi(0.0f, 0.0f, 0.0f); // get rotation matrix for ith vertex @@ -157,9 +151,9 @@ __global__ static void calculate_b( Eigen::Matrix3f rot_add = Ri + Rj; // find coord difference Eigen::Vector3f vert_diff = { - original_coords(v_id, 0) - original_coords(vv[nei_index], 0), - original_coords(v_id, 1) - original_coords(vv[nei_index], 1), - original_coords(v_id, 2) - original_coords(vv[nei_index], 2)}; + ref_vertex_pos(v_id, 0) - ref_vertex_pos(vv[nei_index], 0), + ref_vertex_pos(v_id, 1) - ref_vertex_pos(vv[nei_index], 1), + ref_vertex_pos(v_id, 2) - ref_vertex_pos(vv[nei_index], 2)}; // update bi bi = bi + @@ -167,13 +161,13 @@ __global__ static void calculate_b( } if (constraints(v_id, 0) == 0) { - bMatrix(v_id, 0) = bi[0]; - bMatrix(v_id, 1) = bi[1]; - bMatrix(v_id, 2) = bi[2]; + b_mat(v_id, 0) = bi[0]; + b_mat(v_id, 1) = bi[1]; + b_mat(v_id, 2) = bi[2]; } else { - bMatrix(v_id, 0) = changed_coords(v_id, 0); - bMatrix(v_id, 1) = changed_coords(v_id, 1); - bMatrix(v_id, 2) = changed_coords(v_id, 2); + b_mat(v_id, 0) = deformed_vertex_pos(v_id, 0); + b_mat(v_id, 1) = deformed_vertex_pos(v_id, 1); + b_mat(v_id, 2) = deformed_vertex_pos(v_id, 2); } }; @@ -187,7 +181,7 @@ __global__ static void calculate_b( template __global__ static void calculate_system_matrix( const rxmesh::Context context, - rxmesh::SparseMatrix weight_mat, + rxmesh::SparseMatrix weight_matrix, rxmesh::SparseMatrix sys_mat, rxmesh::VertexAttribute constraints) @@ -195,8 +189,8 @@ __global__ static void calculate_system_matrix( auto calc_mat = [&](VertexHandle v_id, VertexIterator& vv) { if (constraints(v_id, 0) == 0) { for (int i = 0; i < vv.size(); i++) { - sys_mat(v_id, v_id) += weight_mat(v_id, vv[i]); - sys_mat(v_id, vv[i]) -= weight_mat(v_id, vv[i]); + sys_mat(v_id, v_id) += weight_matrix(v_id, vv[i]); + sys_mat(v_id, vv[i]) -= weight_matrix(v_id, vv[i]); } } else { for (int i = 0; i < vv.size(); i++) { From 00eddb5a16978271b25226a869cc663620a1c2c0 Mon Sep 17 00:00:00 2001 From: ahmed Date: Thu, 8 Aug 2024 14:19:33 -0400 Subject: [PATCH 94/96] renaming --- apps/ARAP/arap.cu | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/apps/ARAP/arap.cu b/apps/ARAP/arap.cu index cf4cef7e..27e5213f 100644 --- a/apps/ARAP/arap.cu +++ b/apps/ARAP/arap.cu @@ -63,7 +63,7 @@ __global__ static void calculate_rotation_matrix( const rxmesh::Context context, rxmesh::VertexAttribute ref_vertex_pos, rxmesh::VertexAttribute deformed_vertex_pos, - rxmesh::VertexAttribute rot_mat, + rxmesh::VertexAttribute rotations, rxmesh::SparseMatrix weight_matrix) { auto vn_lambda = [&](VertexHandle v_id, VertexIterator& vv) { @@ -108,7 +108,7 @@ __global__ static void calculate_rotation_matrix( for (int i = 0; i < 3; i++) for (int j = 0; j < 3; j++) - rot_mat(v_id, i * 3 + j) = R(i, j); + rotations(v_id, i * 3 + j) = R(i, j); }; auto block = cooperative_groups::this_thread_block(); @@ -123,7 +123,7 @@ __global__ static void calculate_b( const rxmesh::Context context, rxmesh::VertexAttribute ref_vertex_pos, rxmesh::VertexAttribute deformed_vertex_pos, - rxmesh::VertexAttribute rot_mat, + rxmesh::VertexAttribute rotations, rxmesh::SparseMatrix weight_mat, rxmesh::DenseMatrix b_mat, rxmesh::VertexAttribute constraints) @@ -137,7 +137,7 @@ __global__ static void calculate_b( for (int i = 0; i < 3; i++) { for (int j = 0; j < 3; j++) - Ri(i, j) = rot_mat(v_id, i * 3 + j); + Ri(i, j) = rotations(v_id, i * 3 + j); } for (int nei_index = 0; nei_index < vv.size(); nei_index++) { @@ -145,7 +145,7 @@ __global__ static void calculate_b( Eigen::Matrix3f Rj = Eigen::Matrix3f::Zero(3, 3); for (int i = 0; i < 3; i++) for (int j = 0; j < 3; j++) - Rj(i, j) = rot_mat(vv[nei_index], i * 3 + j); + Rj(i, j) = rotations(vv[nei_index], i * 3 + j); // find rotation addition Eigen::Matrix3f rot_add = Ri + Rj; @@ -182,21 +182,21 @@ template __global__ static void calculate_system_matrix( const rxmesh::Context context, rxmesh::SparseMatrix weight_matrix, - rxmesh::SparseMatrix sys_mat, + rxmesh::SparseMatrix laplace_mat, rxmesh::VertexAttribute constraints) { auto calc_mat = [&](VertexHandle v_id, VertexIterator& vv) { if (constraints(v_id, 0) == 0) { for (int i = 0; i < vv.size(); i++) { - sys_mat(v_id, v_id) += weight_matrix(v_id, vv[i]); - sys_mat(v_id, vv[i]) -= weight_matrix(v_id, vv[i]); + laplace_mat(v_id, v_id) += weight_matrix(v_id, vv[i]); + laplace_mat(v_id, vv[i]) -= weight_matrix(v_id, vv[i]); } } else { for (int i = 0; i < vv.size(); i++) { - sys_mat(v_id, vv[i]) = 0; + laplace_mat(v_id, vv[i]) = 0; } - sys_mat(v_id, v_id) = 1; + laplace_mat(v_id, v_id) = 1; } }; @@ -249,11 +249,11 @@ int main(int argc, char** argv) weight_matrix.set_value(0.f); // system matrix - SparseMatrix sys_mat(rx); - sys_mat.set_value(0.f); + SparseMatrix laplace_mat(rx); + laplace_mat.set_value(0.f); // rotation matrix as a very attribute where every vertex has 3x3 matrix - auto rot_mat = *rx.add_vertex_attribute("RotationMatrix", 9); + auto rotations = *rx.add_vertex_attribute("RotationMatrix", 9); // b-matrix DenseMatrix b_mat(rx, rx.get_num_vertices(), 3); @@ -302,10 +302,10 @@ int main(int argc, char** argv) calculate_system_matrix <<>>( - rx.get_context(), weight_matrix, sys_mat, constraints); + rx.get_context(), weight_matrix, laplace_mat, constraints); - // pre_solve sys_mat - sys_mat.pre_solve(Solver::QR, PermuteMethod::NSTDIS); + // pre_solve laplace_mat + laplace_mat.pre_solve(Solver::QR, PermuteMethod::NSTDIS); // launch box for rotation matrix calculation rxmesh::LaunchBox lb_rot; @@ -355,7 +355,7 @@ int main(int argc, char** argv) rx.get_context(), ref_vertex_pos, deformed_vertex_pos, - rot_mat, + rotations, weight_matrix); // solve for position @@ -365,12 +365,12 @@ int main(int argc, char** argv) lb_b_mat.smem_bytes_dyn>>>(rx.get_context(), ref_vertex_pos, deformed_vertex_pos, - rot_mat, + rotations, weight_matrix, b_mat, constraints); - sys_mat.solve(b_mat, *deformed_vertex_pos_mat); + laplace_mat.solve(b_mat, *deformed_vertex_pos_mat); } // move mat to the host From 564e73f4f80b600c1efd7a343b461041aa90dd4a Mon Sep 17 00:00:00 2001 From: ahmed Date: Thu, 8 Aug 2024 21:24:23 -0400 Subject: [PATCH 95/96] cleanup arap --- apps/ARAP/arap.cu | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/apps/ARAP/arap.cu b/apps/ARAP/arap.cu index 27e5213f..5b1bb595 100644 --- a/apps/ARAP/arap.cu +++ b/apps/ARAP/arap.cu @@ -388,4 +388,12 @@ int main(int argc, char** argv) polyscope::state::userCallback = polyscope_callback; polyscope::show(); #endif + + deformed_vertex_pos_mat->release(); + weight_matrix.release(); + laplace_mat.release(); + b_mat.release(); + + + return 0; } \ No newline at end of file From 89011c812f7a74c41e422fb1fa4e0cfdfc105707 Mon Sep 17 00:00:00 2001 From: ahmed Date: Thu, 8 Aug 2024 21:24:34 -0400 Subject: [PATCH 96/96] cleanup scp --- apps/SCP/scp.cu | 452 ++++++++++++++++-------------------------------- 1 file changed, 152 insertions(+), 300 deletions(-) diff --git a/apps/SCP/scp.cu b/apps/SCP/scp.cu index 49c9ea6e..e8c67301 100644 --- a/apps/SCP/scp.cu +++ b/apps/SCP/scp.cu @@ -1,176 +1,98 @@ +#include "rxmesh/matrix/sparse_matrix.cuh" #include "rxmesh/query.cuh" +#include "rxmesh/reduce_handle.h" #include "rxmesh/rxmesh_static.h" -#include "rxmesh/matrix/sparse_matrix.cuh" - using namespace rxmesh; -template -__global__ static void compute_area_matrix( - const rxmesh::Context context, - rxmesh::VertexAttribute boundaryVertices, - rxmesh::SparseMatrix AreaMatrix) +template +__global__ static void area_term(const Context context, + const VertexAttribute v_bd, + SparseMatrix E) { + using namespace rxmesh; - auto vn_lambda = [&](FaceHandle face_id, VertexIterator& vv) { - if (boundaryVertices(vv[0], 0) == 1 && boundaryVertices(vv[1], 0) == 1) - { - AreaMatrix(vv[0], vv[1]) = make_cuComplex(0, -0.25); - AreaMatrix(vv[1], vv[0]) = make_cuComplex(0, 0.25); - //printf("\nfirst edge: %f", AreaMatrix(vv[0], vv[1]).y); - - } - else if (boundaryVertices(vv[1], 0) == 1 && - boundaryVertices(vv[2], 0) == 1) { - AreaMatrix(vv[1], vv[2]) = make_cuComplex(0, -0.25); - AreaMatrix(vv[2], vv[1]) = make_cuComplex(0, 0.25); - //printf("\nsecond edge: %f", AreaMatrix(vv[1], vv[2]).y); + auto compute = [&](FaceHandle& face_id, const VertexIterator& iter) { + assert(iter.size() == 3); + for (int i = 0; i < 3; ++i) { + int j = (i + 1) % 3; + if (v_bd(iter[i]) == 1 && v_bd(iter[j]) == 1) { + ::atomicAdd(&E(iter[i], iter[j]).y, 0.25f); + ::atomicAdd(&E(iter[j], iter[i]).y, -0.25f); } - else if (boundaryVertices(vv[2], 0) == 1 && - boundaryVertices(vv[0], 0) == 1) { - AreaMatrix(vv[2], vv[0]) = make_cuComplex(0, -0.25); - AreaMatrix(vv[0], vv[2]) = make_cuComplex(0, 0.25); - //printf("\nthird edge: %f", AreaMatrix(vv[2], vv[0]).y); - } }; - auto block = cooperative_groups::this_thread_block(); - Query query(context); - ShmemAllocator shrd_alloc; - query.dispatch(block, shrd_alloc, vn_lambda); -} + auto block = cooperative_groups::this_thread_block(); -template -__device__ __forceinline__ T -edge_cotan_weight(const rxmesh::VertexHandle& p_id, - const rxmesh::VertexHandle& r_id, - const rxmesh::VertexHandle& q_id, - //const rxmesh::VertexHandle& s_id, - const rxmesh::VertexAttribute& X) -{ - // Get the edge weight between the two vertices p-r where - // q and s composes the diamond around p-r - - const vec3 p(X(p_id, 0), X(p_id, 1), X(p_id, 2)); - const vec3 r(X(r_id, 0), X(r_id, 1), X(r_id, 2)); - const vec3 q(X(q_id, 0), X(q_id, 1), X(q_id, 2)); - //const vec3 s(X(s_id, 0), X(s_id, 1), X(s_id, 2)); - - // cotans[(v1, v2)] =np.dot(e1, e2) / np.linalg.norm(np.cross(e1, e2)) - - float weight = 0; - if (q_id.is_valid()) - weight += dot((p - q), (r - q)) / length(cross(p - q, r - q)); - - //weight /= 2; - return std::max(0.f, weight); -} - - - - -template -__global__ static void compute_edge_weights_evd( - const rxmesh::Context context, - rxmesh::VertexAttribute coords, - rxmesh::SparseMatrix A_mat) -{ - - auto vn_lambda = [&](EdgeHandle edge_id, VertexIterator& vv) { - T e_weight = 0; - - if (vv[1].is_valid()) - e_weight += edge_cotan_weight(vv[0], vv[2], vv[1], coords); - if (vv[3].is_valid()) - e_weight += edge_cotan_weight(vv[0], vv[2], vv[3], coords); - //if (vv[1].is_valid() && vv[3].is_valid()) - e_weight /= 4; - A_mat(vv[0], vv[2]) = 1; - A_mat(vv[2], vv[0]) = 1; + Query query(context); - }; + ShmemAllocator shrd_alloc; - auto block = cooperative_groups::this_thread_block(); - Query query(context); - ShmemAllocator shrd_alloc; - query.dispatch(block, shrd_alloc, vn_lambda); + query.dispatch(block, shrd_alloc, compute); } -template -__global__ static void calculate_Ld_matrix( - const rxmesh::Context context, - rxmesh::SparseMatrix weight_mat, // [num_coord, num_coord] - rxmesh::SparseMatrix Ld // [num_coord, num_coord] -) +template +__global__ static void conformal_energy(const Context context, + const VertexAttribute coord, + SparseMatrix E) { - auto init_lambda = [&](VertexHandle v_id, VertexIterator& vv) { - Ld(v_id, v_id) = make_cuComplex(0, 0); - for (int nei_index = 0; nei_index < vv.size(); nei_index++) - Ld(v_id, vv[nei_index]) = make_cuComplex(0, 0); + using namespace rxmesh; - for (int nei_index = 0; nei_index < vv.size(); nei_index++) { + auto compute = [&](EdgeHandle& p0, const VertexIterator& iter) { + auto weight = [&](const vec3& P, + const vec3& Q, + const vec3& O) { + const vec3 l1 = O - Q; + const vec3 l2 = O - P; - Ld(v_id, v_id) = //make_cuComplex(5, 0); - cuCaddf(Ld(v_id, v_id), - make_cuComplex(1,0));//weight_mat(v_id, vv[nei_index]),0)); - // weight_mat(v_id, vv[nei_index]))); + float w = glm::dot(l1, l2) / glm::length(glm::cross(l1, l2)); + return std::max(0.f, w); + }; - Ld(v_id, vv[nei_index]) = - cuCsubf(Ld(v_id, vv[nei_index]), - make_cuComplex(weight_mat(v_id, vv[nei_index]), 0)); + VertexHandle p = iter[0]; + VertexHandle q = iter[2]; + VertexHandle o0 = iter[1]; + VertexHandle o1 = iter[3]; + assert(p.is_valid() && q.is_valid()); + assert(o0.is_valid() || o1.is_valid()); - } - //printf("\nOwner vertex: %f", Ld(v_id, v_id).x); + const vec3 P(coord(p, 0), coord(p, 1), coord(p, 2)); + const vec3 Q(coord(q, 0), coord(q, 1), coord(q, 2)); - for (int nei_index = 0; nei_index < vv.size(); nei_index++) { - //printf("\n%d: %f", nei_index, Ld(v_id, vv[nei_index]).x); + float coef = 0; + if (o0.is_valid()) { + const vec3 O0(coord(o0, 0), coord(o0, 1), coord(o0, 2)); + coef += weight(P, Q, O0); + } + if (o1.is_valid()) { + const vec3 O1(coord(o1, 0), coord(o1, 1), coord(o1, 2)); + coef += weight(P, Q, O1); } + coef *= 0.25f; - }; - - auto block = cooperative_groups::this_thread_block(); - Query query(context); - ShmemAllocator shrd_alloc; - query.dispatch(block, shrd_alloc, init_lambda); -} - -template -__global__ static void subtract_matrix(const rxmesh::Context context, - rxmesh::SparseMatrix A_mat, - rxmesh::SparseMatrix B_mat, - rxmesh::SparseMatrix C_mat) -{ - - auto subtract = [&](VertexHandle v_id, VertexIterator& vv) { - for (int i = 0; i < vv.size(); ++i) { + // off-diagonal + E(p, q).x = -coef; + E(q, p).x = -coef; - //printf("\nBr:%f", B_mat(v_id, vv[i]).x); - //printf("\nBc:%f", B_mat(v_id, vv[i]).y); + // diagonal-diagonal + ::atomicAdd(&E(p, p).x, coef); + ::atomicAdd(&E(q, q).x, coef); + }; + auto block = cooperative_groups::this_thread_block(); - A_mat(v_id, vv[i]) = //B_mat(v_id, vv[i]); - cuCsubf(B_mat(v_id, vv[i]), C_mat(v_id, vv[i])); - - //printf("\nAr:%f", A_mat(v_id, vv[i]).x); - //printf("\nAc:%f", A_mat(v_id, vv[i]).y); + Query query(context); - } - A_mat(v_id, v_id) = cuCsubf(B_mat(v_id, v_id), C_mat(v_id, v_id)); - //printf("\nAdiagr:%f", A_mat(v_id, v_id).x); - //printf("\nAdiagc:%f", A_mat(v_id, v_id).y); - }; + ShmemAllocator shrd_alloc; - auto block = cooperative_groups::this_thread_block(); - Query query(context); - ShmemAllocator shrd_alloc; - query.dispatch(block, shrd_alloc, subtract); + query.dispatch(block, shrd_alloc, compute); } int main(int argc, char** argv) @@ -180,212 +102,142 @@ int main(int argc, char** argv) const uint32_t device_id = 0; cuda_query(device_id); - RXMeshStatic rx(STRINGIFY(INPUT_DIR) "plane.obj"); - - auto boundaryVertices = - *rx.add_vertex_attribute("boundaryVertices", 1); + RXMeshStatic rx(STRINGIFY(INPUT_DIR) "bunnyhead.obj"); - rx.get_boundary_vertices( - boundaryVertices); - - //for matrix calls constexpr uint32_t CUDABlockSize = 256; - SparseMatrix Ld(rx); // complex V x V - - SparseMatrix A(rx); // 2V x 2V + auto v_bd = *rx.add_vertex_attribute("vBoundary", 1); + rx.get_boundary_vertices(v_bd); auto coords = *rx.get_input_vertex_coordinates(); - rxmesh::LaunchBox launch_box_area; - rx.prepare_launch_box({rxmesh::Op:: FV}, - launch_box_area, - (void*)compute_area_matrix); + auto uv = *rx.add_vertex_attribute("uv", 3); - compute_area_matrix - <<>>( - rx.get_context(), boundaryVertices, A); + DenseMatrix uv_mat(rx, rx.get_num_vertices(), 1); - SparseMatrix weight_matrix(rx); - - // obtain cotangent weight matrix - rxmesh::LaunchBox launch_box; - rx.prepare_launch_box( - {rxmesh::Op::EVDiamond}, - launch_box, - (void*)compute_edge_weights_evd); - - compute_edge_weights_evd - <<>>( - rx.get_context(), coords, weight_matrix); - - rxmesh::LaunchBox launch_box_ld; - rx.prepare_launch_box( - {rxmesh::Op::VV}, - launch_box_ld, - (void*)calculate_Ld_matrix); - - calculate_Ld_matrix - <<>>( - rx.get_context(), weight_matrix, Ld); + // calc number of boundary vertices + ReduceHandle rh(v_bd); + int num_bd_vertices = rh.reduce(v_bd, cub::Sum(), 0); + // compute conformal energy matrix Lc SparseMatrix Lc(rx); - rxmesh::LaunchBox launch_box_lc; - rx.prepare_launch_box({rxmesh::Op::VV}, - launch_box_lc, - (void*)subtract_matrix); - - subtract_matrix - <<>> - (rx.get_context(), Lc, Ld, A); - - int number_of_vertices = rx.get_num_vertices(); + Lc.set_value(make_cuComplex(0.f, 0.f)); + rxmesh::LaunchBox lb; + rx.prepare_launch_box( + {Op::EVDiamond}, lb, (void*)conformal_energy); + conformal_energy + <<>>( + rx.get_context(), coords, Lc); - DenseMatrix eb(rx, number_of_vertices, 1); - DenseMatrix u(rx, number_of_vertices, 1); - DenseMatrix T1(rx, number_of_vertices, 1); + // area term + rx.prepare_launch_box({Op::FV}, lb, (void*)area_term); - DenseMatrix y(rx, number_of_vertices, 1); + area_term<<>>( + rx.get_context(), v_bd, Lc); + // Compute B and eb matrix + DenseMatrix eb(rx, rx.get_num_vertices(), 1); + eb.set_value(make_cuComplex(0.f, 0.f)); SparseMatrix B(rx); + B.set_value(make_cuComplex(0.f, 0.f)); - uint32_t num_bd_vertices = 0; - - + float nb = 1.f / std::sqrt(float(num_bd_vertices)); rx.for_each_vertex( - HOST, - [&](const VertexHandle& vh) { - if (boundaryVertices(vh)) { - num_bd_vertices++; - } - }, - NULL, - false); - - - rx.for_each_vertex(rxmesh::DEVICE, - [B, eb, boundaryVertices, num_bd_vertices] __device__( - const rxmesh::VertexHandle vh) mutable { - eb(vh, 0) = make_cuComplex( - (float)boundaryVertices(vh, 0) / num_bd_vertices, 0.0f); - B(vh, vh) = - make_cuComplex((float)boundaryVertices(vh, 0), 0.0f); + rxmesh::DEVICE, + [B, eb, v_bd, nb] __device__(const rxmesh::VertexHandle vh) mutable { + eb(vh, 0) = make_cuComplex((float)v_bd(vh, 0) * (float)nb, 0.0f); + B(vh, vh) = make_cuComplex((float)v_bd(vh, 0), 0.0f); + }); - - }); + // temp mat needed for the power method + DenseMatrix T1(rx, rx.get_num_vertices(), 1); + T1.set_value(make_cuComplex(0.f, 0.f)); - //B.move(rxmesh::DEVICE, rxmesh::HOST); - eb.move(rxmesh::DEVICE, rxmesh::HOST); - // - // S = [B- (1/Vb) * ebebT]; - u.fill_random(); - Lc.pre_solve(Solver::CHOL, PermuteMethod::NSTDIS); // can be outside the loop + // fill-in the init solution of the eigen vector with random values + uv_mat.fill_random(); - int iterations=8; + // factorize the matrix + Lc.pre_solve(Solver::CHOL, PermuteMethod::NSTDIS); - //std::cout << std::endl << u(0, 0).x; - //std::cout << std::endl << u(0, 0).y; - //std::cout << eb(0, 0).x; + // the power method + int iterations = 32; - + float prv_norm = std::numeric_limits::max(); - for (int i = 0; i < iterations; i++) { + cuComplex T2 = eb.dot(uv_mat); + rx.for_each_vertex(rxmesh::DEVICE, + [eb, T2, T1, uv_mat, B] __device__( + const rxmesh::VertexHandle vh) mutable { + T1(vh, 0) = + cuCsubf(cuCmulf(B(vh, vh), uv_mat(vh, 0)), + cuCmulf(eb(vh, 0), T2)); + }); - cuComplex T2 = eb.dot(u); - //std::cout << std::endl << T2.x; - //std::cout << std::endl << T2.y; - - B.multiply(u, T1); - - - - //eb.multiply(T2); + Lc.solve(T1, uv_mat); - rx.for_each_vertex( - rxmesh::DEVICE, - [eb, T2, T1] __device__(const rxmesh::VertexHandle vh) mutable { + float norm = uv_mat.norm2(); - - T1(vh, 0) = cuCsubf( - T1(vh, 0), - cuCmulf(eb(vh, 0),T2) + uv_mat.multiply(1.0f / norm); - ); - - - - - }); - - Lc.solve(T1, y); // Ly=T1 - - y.move(DEVICE, HOST); - - //Lc.solve(T1, y, Solver::QR, PermuteMethod::NSTDIS); - - float norm = y.norm2(); - rx.for_each_vertex( - rxmesh::DEVICE, - [y] __device__(const rxmesh::VertexHandle vh) mutable { - printf("\nx:%f", y(vh, 0).x); - printf("\ny:%f", y(vh, 0).y); - }); - - y.multiply(1.0f / norm); - - - - - u.copy_from(y); + if (std::abs(prv_norm - norm) < 0.0001) { + break; + } + prv_norm = norm; } - // conversion step - - - auto parametric_coordinates = *rx.add_vertex_attribute("pCoords", 2); - - rx.for_each_vertex(rxmesh::DEVICE, - [u,parametric_coordinates] __device__( - const rxmesh::VertexHandle vh) mutable { - parametric_coordinates(vh, 0) = u(vh, 0).x; - parametric_coordinates(vh, 1) = u(vh, 0).y; - }); - - parametric_coordinates.move(DEVICE, HOST); - - //calculate cntre, shift mesh by centre (translate back) - //divide maximum value irrespective of axis (abs max value verte)- divide by all coordinates - //u,v is always [(0,0),(1,1)] + // convert from matrix format to attributes + rx.for_each_vertex( + rxmesh::DEVICE, + [uv_mat, uv] __device__(const rxmesh::VertexHandle vh) mutable { + uv(vh, 0) = uv_mat(vh, 0).x; + uv(vh, 1) = uv_mat(vh, 0).y; + }); + uv.move(DEVICE, HOST); + // normalize the uv coordinates + glm::vec3 lower; + glm::vec3 upper; + ReduceHandle rrh(uv); - rx.get_polyscope_mesh()->addVertexParameterizationQuantity( - "pCoords", parametric_coordinates); + lower[0] = rrh.reduce(uv, cub::Min(), std::numeric_limits::max(), 0); + lower[1] = rrh.reduce(uv, cub::Min(), std::numeric_limits::max(), 1); + lower[2] = rrh.reduce(uv, cub::Min(), std::numeric_limits::max(), 2); - rx.get_polyscope_mesh()->addVertexVectorQuantity2D("vq", - parametric_coordinates); + upper[0] = rrh.reduce(uv, cub::Max(), std::numeric_limits::min(), 0); + upper[1] = rrh.reduce(uv, cub::Max(), std::numeric_limits::min(), 1); + upper[2] = rrh.reduce(uv, cub::Max(), std::numeric_limits::min(), 2); - rx.get_polyscope_mesh()->addVertexScalarQuantity("vBoundary", - boundaryVertices); + upper -= lower; + float s = std::max(upper[0], upper[1]); + rx.for_each_vertex(HOST, [&](const VertexHandle vh) { + uv(vh, 0) -= upper[0]; + uv(vh, 1) -= upper[1]; + uv(vh, 0) /= s; + uv(vh, 1) /= s; + }); + // add uv to Polyscope + rx.get_polyscope_mesh()->addVertexParameterizationQuantity("uv", uv); + rx.get_polyscope_mesh()->addVertexScalarQuantity("vBoundary", v_bd); #if USE_POLYSCOPE polyscope::show(); #endif + + uv_mat.release(); + Lc.release(); + eb.release(); + B.release(); + T1.release(); + + return 0; } \ No newline at end of file