Skip to content

Commit

Permalink
first commit
Browse files Browse the repository at this point in the history
  • Loading branch information
Yaepiii committed Oct 17, 2024
1 parent 284638a commit bf5bedf
Show file tree
Hide file tree
Showing 65 changed files with 33,591 additions and 0 deletions.
101 changes: 101 additions & 0 deletions trlo/3rdparty/libspconv/include/spconv/check.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/

#ifndef __SPCONV_CHECK_HPP__
#define __SPCONV_CHECK_HPP__

#include <assert.h>
#include <cuda_runtime.h>
#include <stdarg.h>
#include <stdio.h>

#include <string>

namespace spconv {

#if DEBUG
#define checkRuntime(call) spconv::check_runtime(call, #call, __LINE__, __FILE__)
#define checkKernel(...) \
[&] { \
__VA_ARGS__; \
checkRuntime(cudaStreamSynchronize(nullptr)); \
return spconv::check_runtime(cudaGetLastError(), #__VA_ARGS__, __LINE__, __FILE__); \
}()
#define dprintf printf
#else
#define checkRuntime(call) spconv::check_runtime(call, #call, __LINE__, __FILE__)
#define checkKernel(...) \
do { \
__VA_ARGS__; \
spconv::check_runtime(cudaPeekAtLastError(), #__VA_ARGS__, __LINE__, __FILE__); \
} while (0)
#define dprintf(...)
#endif

#define Assertf(cond, fmt, ...) \
do { \
if (!(cond)) { \
fprintf(stderr, "Assert failed 💀. %s in file %s:%d, message: " fmt "\n", #cond, __FILE__, \
__LINE__, __VA_ARGS__); \
abort(); \
} \
} while (false)
#define Asserts(cond, s) \
do { \
if (!(cond)) { \
fprintf(stderr, "Assert failed 💀. %s in file %s:%d, message: " s "\n", #cond, __FILE__, \
__LINE__); \
abort(); \
} \
} while (false)
#define Assert(cond) \
do { \
if (!(cond)) { \
fprintf(stderr, "Assert failed 💀. %s in file %s:%d\n", #cond, __FILE__, __LINE__); \
abort(); \
} \
} while (false)

static inline std::string format(const char *fmt, ...) {
char buffer[2048];
va_list vl;
va_start(vl, fmt);
vsnprintf(buffer, sizeof(buffer), fmt, vl);
return buffer;
}

static inline bool check_runtime(cudaError_t e, const char *call, int line, const char *file) {
if (e != cudaSuccess) {
fprintf(stderr,
"CUDA Runtime error %s # %s, code = %s [ %d ] in file "
"%s:%d\n",
call, cudaGetErrorString(e), cudaGetErrorName(e), e, file, line);
abort();
return false;
}
return true;
}

}; // namespace spconv

#endif // #ifndef __SPCONV_CHECK_HPP__
117 changes: 117 additions & 0 deletions trlo/3rdparty/libspconv/include/spconv/engine.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/

#ifndef __SPCONV_ENGINE_HPP__
#define __SPCONV_ENGINE_HPP__

#include <memory>
#include <string>
#include <vector>

namespace spconv {

#define Exported __attribute__((visibility("default")))

enum class DType : int { None = 0, Int32 = 1, Float16 = 2 };
enum class Precision : int { None = 0, Float16 = 1, Int8 = 2 };

/**
Storage of data tensor
**/
class DTensor {
public:
virtual std::vector<int64_t> features_shape() const = 0;
virtual DType features_dtype() const = 0;
virtual void* features_data() = 0;

virtual std::vector<int64_t> indices_shape() const = 0;
virtual DType indices_dtype() const = 0;
virtual void* indices_data() = 0;

virtual std::vector<int> grid_size() const = 0;
virtual int device() const = 0;
};

/**
Engine types for sparse convolution
**/
class Engine {
public:
/**
Inference function for sparse convolution
features_shape: The shape of the input feature matrix, it must be two elements.
features_dtype: The data type of the input feature matrix, it must be Float16 now.
features_data: The data pointer of the input feature matrix
indices_shape: The shape of the input indices matrix, it must be two elements[n, 4]
indices_dtype: The data type of the input indices matrix, it must be Int32 now.
indices_data: The data pointer of the input indices matrix
batch: The batch size of the input, it must be 1 now.
grid_size: The grid size of the input data, For example: 41,1440,1440 or 1440,1440,41
stream: Which stream is expected to enqueue the inference.
**/
Exported virtual DTensor* forward(const std::vector<int64_t>& features_shape,
DType features_dtype, void* features_data,
const std::vector<int64_t>& indices_shape, DType indices_dtype,
void* indices_data, int batch, std::vector<int> grid_size,
void* stream = nullptr) = 0;

// If you change the precision of a node after loading the model, you should call this function to
// reconfigure it
Exported virtual void reconfigure() = 0;

// If you want to execute an implicit PTQ calibration, you can enable int8calibration by marking
// it and collecting the maximum value of the tensor in the next forward.
Exported virtual void set_int8_calibration(bool enable) = 0;

// You can modify the precision of a node with this function, but don't forget to call reconfigure
Exported virtual void set_node_precision_byname(const char* name, Precision compute_precision,
Precision output_precision) = 0;
Exported virtual void set_node_precision_byoptype(const char* optype, Precision compute_precision,
Precision output_precision) = 0;
};

/**
Create an engine and load the weights from onnx file
onnx_file: Store the onnx of model structure, please use tool/deploy/export-scn.py to export the
corresponding onnx precision: What precision to use for model inference. For each layer's precision
should be stored in the "precision" attribute of the layer
- Model inference will ignore the "precision" attribute of each layer what if set to
Float16
**/
Exported std::shared_ptr<Engine> load_engine_from_onnx(const std::string& onnx_file,
Precision precision = Precision::Float16);

/**
Enable detailed information output
enable: You should set this to true if you want to debug the model inference process. default:
false
*/
Exported void set_verbose(bool enable);
Exported const char* get_precision_string(Precision precision);

}; // namespace spconv

#endif // #ifndef __SPCONV_ENGINE_HPP__
135 changes: 135 additions & 0 deletions trlo/3rdparty/libspconv/include/spconv/memory.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/

#ifndef __SPCONV_MEMORY_HPP__
#define __SPCONV_MEMORY_HPP__

#include <memory>
#include <string>
#include <unordered_map>

#include "check.hpp"

namespace spconv {

class GPUData {
public:
inline void *ptr() const { return ptr_; }
inline size_t bytes() const { return bytes_; }
inline bool empty() const { return ptr_ == nullptr; }
virtual ~GPUData() { free_memory(); }
GPUData() = default;
GPUData(const std::string &name) { this->name_ = name; }

void alloc_or_resize_to(size_t nbytes) {
if (capacity_ < nbytes) {
dprintf("%s Free old %d, malloc new %d bytes.\n", name_.c_str(), capacity_, nbytes);
free_memory();
checkRuntime(cudaMalloc(&ptr_, nbytes));
capacity_ = nbytes;
}
bytes_ = nbytes;
}

void alloc(size_t nbytes) { alloc_or_resize_to(nbytes); }

void resize(size_t nbytes) {
if (capacity_ < nbytes) {
Assertf(false, "%s Failed to resize memory to %ld bytes. capacity = %ld", name_.c_str(),
nbytes, capacity_);
}
bytes_ = nbytes;
}

void free_memory() {
if (ptr_) {
checkRuntime(cudaFree(ptr_));
ptr_ = nullptr;
capacity_ = 0;
bytes_ = 0;
}
}

private:
void *ptr_ = nullptr;
size_t bytes_ = 0;
size_t capacity_ = 0;
std::string name_;
};

template <typename T>
class GPUMemory {
public:
T *ptr() const { return data_ ? (T *)data_->ptr() : nullptr; }
size_t size() const { return size_; }
size_t bytes() const { return data_ ? data_->bytes() : 0; }
bool empty() const { return data_ == nullptr || data_->empty(); }
bool unset() const { return data_ == nullptr; }
// GPUMemory() { data_.reset(new GPUData()); }
virtual ~GPUMemory() { data_.reset(); }
void set_gpudata(std::shared_ptr<GPUData> data) { this->data_ = data; }

void alloc_or_resize_to(size_t size) {
if (data_) {
size_ = size;
data_->alloc_or_resize_to(size * sizeof(T));
} else {
Asserts(false, "Failed to alloc or resize memory that because data is nullptr.");
}
}

void alloc(size_t size) { alloc_or_resize_to(size); }

void resize(size_t size) {
if (data_) {
size_ = size;
data_->resize(size * sizeof(T));
} else {
Asserts(false, "Failed to resize memory that because data is nullptr.");
}
}

private:
std::shared_ptr<GPUData> data_;
size_t size_ = 0;
};

class GPUDataManager {
public:
std::shared_ptr<GPUData> query_or_alloc(const std::string &tensor_id,
const std::string &subname = "default") {
std::shared_ptr<GPUData> &output = data_dict_[tensor_id][subname];
if (output == nullptr) {
output.reset(new GPUData(tensor_id + "." + subname));
}
return output;
}

private:
std::unordered_map<std::string, std::unordered_map<std::string, std::shared_ptr<GPUData>>>
data_dict_;
};

}; // namespace spconv

#endif // #ifndef __SPCONV_MEMORY_HPP__
Loading

0 comments on commit bf5bedf

Please sign in to comment.