first commit

Yaepiii · Oct 17, 2024 · bf5bedf · bf5bedf
1 parent 284638a
commit bf5bedf
Show file tree

Hide file tree

Showing 65 changed files with 33,591 additions and 0 deletions.
diff --git a/trlo/3rdparty/libspconv/include/spconv/check.hpp b/trlo/3rdparty/libspconv/include/spconv/check.hpp
@@ -0,0 +1,101 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __SPCONV_CHECK_HPP__
+#define __SPCONV_CHECK_HPP__
+
+#include <assert.h>
+#include <cuda_runtime.h>
+#include <stdarg.h>
+#include <stdio.h>
+
+#include <string>
+
+namespace spconv {
+
+#if DEBUG
+#define checkRuntime(call) spconv::check_runtime(call, #call, __LINE__, __FILE__)
+#define checkKernel(...)                                                                \
+  [&] {                                                                                 \
+    __VA_ARGS__;                                                                        \
+    checkRuntime(cudaStreamSynchronize(nullptr));                                       \
+    return spconv::check_runtime(cudaGetLastError(), #__VA_ARGS__, __LINE__, __FILE__); \
+  }()
+#define dprintf printf
+#else
+#define checkRuntime(call) spconv::check_runtime(call, #call, __LINE__, __FILE__)
+#define checkKernel(...)                                                            \
+  do {                                                                              \
+    __VA_ARGS__;                                                                    \
+    spconv::check_runtime(cudaPeekAtLastError(), #__VA_ARGS__, __LINE__, __FILE__); \
+  } while (0)
+#define dprintf(...)
+#endif
+
+#define Assertf(cond, fmt, ...)                                                                 \
+  do {                                                                                          \
+    if (!(cond)) {                                                                              \
+      fprintf(stderr, "Assert failed 💀. %s in file %s:%d, message: " fmt "\n", #cond, __FILE__, \
+              __LINE__, __VA_ARGS__);                                                           \
+      abort();                                                                                  \
+    }                                                                                           \
+  } while (false)
+#define Asserts(cond, s)                                                                      \
+  do {                                                                                        \
+    if (!(cond)) {                                                                            \
+      fprintf(stderr, "Assert failed 💀. %s in file %s:%d, message: " s "\n", #cond, __FILE__, \
+              __LINE__);                                                                      \
+      abort();                                                                                \
+    }                                                                                         \
+  } while (false)
+#define Assert(cond)                                                                     \
+  do {                                                                                   \
+    if (!(cond)) {                                                                       \
+      fprintf(stderr, "Assert failed 💀. %s in file %s:%d\n", #cond, __FILE__, __LINE__); \
+      abort();                                                                           \
+    }                                                                                    \
+  } while (false)
+
+static inline std::string format(const char *fmt, ...) {
+  char buffer[2048];
+  va_list vl;
+  va_start(vl, fmt);
+  vsnprintf(buffer, sizeof(buffer), fmt, vl);
+  return buffer;
+}
+
+static inline bool check_runtime(cudaError_t e, const char *call, int line, const char *file) {
+  if (e != cudaSuccess) {
+    fprintf(stderr,
+            "CUDA Runtime error %s # %s, code = %s [ %d ] in file "
+            "%s:%d\n",
+            call, cudaGetErrorString(e), cudaGetErrorName(e), e, file, line);
+    abort();
+    return false;
+  }
+  return true;
+}
+
+};  // namespace spconv
+
+#endif  // #ifndef __SPCONV_CHECK_HPP__
diff --git a/trlo/3rdparty/libspconv/include/spconv/engine.hpp b/trlo/3rdparty/libspconv/include/spconv/engine.hpp
@@ -0,0 +1,117 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __SPCONV_ENGINE_HPP__
+#define __SPCONV_ENGINE_HPP__
+
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace spconv {
+
+#define Exported __attribute__((visibility("default")))
+
+enum class DType : int { None = 0, Int32 = 1, Float16 = 2 };
+enum class Precision : int { None = 0, Float16 = 1, Int8 = 2 };
+
+/**
+  Storage of data tensor
+**/
+class DTensor {
+ public:
+  virtual std::vector<int64_t> features_shape() const = 0;
+  virtual DType features_dtype() const = 0;
+  virtual void* features_data() = 0;
+
+  virtual std::vector<int64_t> indices_shape() const = 0;
+  virtual DType indices_dtype() const = 0;
+  virtual void* indices_data() = 0;
+
+  virtual std::vector<int> grid_size() const = 0;
+  virtual int device() const = 0;
+};
+
+/**
+  Engine types for sparse convolution
+**/
+class Engine {
+ public:
+  /**
+    Inference function for sparse convolution
+
+    features_shape: The shape of the input feature matrix, it must be two elements.
+    features_dtype: The data type of the input feature matrix, it must be Float16 now.
+    features_data:  The data pointer of the input feature matrix
+    indices_shape:  The shape of the input indices matrix, it must be two elements[n, 4]
+    indices_dtype:  The data type of the input indices matrix, it must be Int32 now.
+    indices_data:   The data pointer of the input indices matrix
+    batch:          The batch size of the input, it must be 1 now.
+    grid_size:      The grid size of the input data, For example: 41,1440,1440 or 1440,1440,41
+    stream:         Which stream is expected to enqueue the inference.
+  **/
+  Exported virtual DTensor* forward(const std::vector<int64_t>& features_shape,
+                                    DType features_dtype, void* features_data,
+                                    const std::vector<int64_t>& indices_shape, DType indices_dtype,
+                                    void* indices_data, int batch, std::vector<int> grid_size,
+                                    void* stream = nullptr) = 0;
+
+  // If you change the precision of a node after loading the model, you should call this function to
+  // reconfigure it
+  Exported virtual void reconfigure() = 0;
+
+  // If you want to execute an implicit PTQ calibration, you can enable int8calibration by marking
+  // it and collecting the maximum value of the tensor in the next forward.
+  Exported virtual void set_int8_calibration(bool enable) = 0;
+
+  // You can modify the precision of a node with this function, but don't forget to call reconfigure
+  Exported virtual void set_node_precision_byname(const char* name, Precision compute_precision,
+                                                  Precision output_precision) = 0;
+  Exported virtual void set_node_precision_byoptype(const char* optype, Precision compute_precision,
+                                                    Precision output_precision) = 0;
+};
+
+/**
+  Create an engine and load the weights from onnx file
+
+  onnx_file: Store the onnx of model structure, please use tool/deploy/export-scn.py to export the
+corresponding onnx precision: What precision to use for model inference. For each layer's precision
+should be stored in the "precision" attribute of the layer
+            - Model inference will ignore the "precision" attribute of each layer what if set to
+Float16
+**/
+Exported std::shared_ptr<Engine> load_engine_from_onnx(const std::string& onnx_file,
+                                                       Precision precision = Precision::Float16);
+
+/**
+  Enable detailed information output
+
+  enable: You should set this to true if you want to debug the model inference process. default:
+  false
+*/
+Exported void set_verbose(bool enable);
+Exported const char* get_precision_string(Precision precision);
+
+};  // namespace spconv
+
+#endif  // #ifndef __SPCONV_ENGINE_HPP__
diff --git a/trlo/3rdparty/libspconv/include/spconv/memory.hpp b/trlo/3rdparty/libspconv/include/spconv/memory.hpp
@@ -0,0 +1,135 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __SPCONV_MEMORY_HPP__
+#define __SPCONV_MEMORY_HPP__
+
+#include <memory>
+#include <string>
+#include <unordered_map>
+
+#include "check.hpp"
+
+namespace spconv {
+
+class GPUData {
+ public:
+  inline void *ptr() const { return ptr_; }
+  inline size_t bytes() const { return bytes_; }
+  inline bool empty() const { return ptr_ == nullptr; }
+  virtual ~GPUData() { free_memory(); }
+  GPUData() = default;
+  GPUData(const std::string &name) { this->name_ = name; }
+
+  void alloc_or_resize_to(size_t nbytes) {
+    if (capacity_ < nbytes) {
+      dprintf("%s Free old %d, malloc new %d bytes.\n", name_.c_str(), capacity_, nbytes);
+      free_memory();
+      checkRuntime(cudaMalloc(&ptr_, nbytes));
+      capacity_ = nbytes;
+    }
+    bytes_ = nbytes;
+  }
+
+  void alloc(size_t nbytes) { alloc_or_resize_to(nbytes); }
+
+  void resize(size_t nbytes) {
+    if (capacity_ < nbytes) {
+      Assertf(false, "%s Failed to resize memory to %ld bytes. capacity = %ld", name_.c_str(),
+              nbytes, capacity_);
+    }
+    bytes_ = nbytes;
+  }
+
+  void free_memory() {
+    if (ptr_) {
+      checkRuntime(cudaFree(ptr_));
+      ptr_ = nullptr;
+      capacity_ = 0;
+      bytes_ = 0;
+    }
+  }
+
+ private:
+  void *ptr_ = nullptr;
+  size_t bytes_ = 0;
+  size_t capacity_ = 0;
+  std::string name_;
+};
+
+template <typename T>
+class GPUMemory {
+ public:
+  T *ptr() const { return data_ ? (T *)data_->ptr() : nullptr; }
+  size_t size() const { return size_; }
+  size_t bytes() const { return data_ ? data_->bytes() : 0; }
+  bool empty() const { return data_ == nullptr || data_->empty(); }
+  bool unset() const { return data_ == nullptr; }
+  // GPUMemory() { data_.reset(new GPUData()); }
+  virtual ~GPUMemory() { data_.reset(); }
+  void set_gpudata(std::shared_ptr<GPUData> data) { this->data_ = data; }
+
+  void alloc_or_resize_to(size_t size) {
+    if (data_) {
+      size_ = size;
+      data_->alloc_or_resize_to(size * sizeof(T));
+    } else {
+      Asserts(false, "Failed to alloc or resize memory that because data is nullptr.");
+    }
+  }
+
+  void alloc(size_t size) { alloc_or_resize_to(size); }
+
+  void resize(size_t size) {
+    if (data_) {
+      size_ = size;
+      data_->resize(size * sizeof(T));
+    } else {
+      Asserts(false, "Failed to resize memory that because data is nullptr.");
+    }
+  }
+
+ private:
+  std::shared_ptr<GPUData> data_;
+  size_t size_ = 0;
+};
+
+class GPUDataManager {
+ public:
+  std::shared_ptr<GPUData> query_or_alloc(const std::string &tensor_id,
+                                          const std::string &subname = "default") {
+    std::shared_ptr<GPUData> &output = data_dict_[tensor_id][subname];
+    if (output == nullptr) {
+      output.reset(new GPUData(tensor_id + "." + subname));
+    }
+    return output;
+  }
+
+ private:
+  std::unordered_map<std::string, std::unordered_map<std::string, std::shared_ptr<GPUData>>>
+      data_dict_;
+};
+
+};  // namespace spconv
+
+#endif  // #ifndef __SPCONV_MEMORY_HPP__