-
Notifications
You must be signed in to change notification settings - Fork 8
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
65 changed files
with
33,591 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
/* | ||
* SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. | ||
* SPDX-License-Identifier: MIT | ||
* | ||
* Permission is hereby granted, free of charge, to any person obtaining a | ||
* copy of this software and associated documentation files (the "Software"), | ||
* to deal in the Software without restriction, including without limitation | ||
* the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
* and/or sell copies of the Software, and to permit persons to whom the | ||
* Software is furnished to do so, subject to the following conditions: | ||
* | ||
* The above copyright notice and this permission notice shall be included in | ||
* all copies or substantial portions of the Software. | ||
* | ||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
* DEALINGS IN THE SOFTWARE. | ||
*/ | ||
|
||
#ifndef __SPCONV_CHECK_HPP__ | ||
#define __SPCONV_CHECK_HPP__ | ||
|
||
#include <assert.h> | ||
#include <cuda_runtime.h> | ||
#include <stdarg.h> | ||
#include <stdio.h> | ||
|
||
#include <string> | ||
|
||
namespace spconv { | ||
|
||
#if DEBUG | ||
#define checkRuntime(call) spconv::check_runtime(call, #call, __LINE__, __FILE__) | ||
#define checkKernel(...) \ | ||
[&] { \ | ||
__VA_ARGS__; \ | ||
checkRuntime(cudaStreamSynchronize(nullptr)); \ | ||
return spconv::check_runtime(cudaGetLastError(), #__VA_ARGS__, __LINE__, __FILE__); \ | ||
}() | ||
#define dprintf printf | ||
#else | ||
#define checkRuntime(call) spconv::check_runtime(call, #call, __LINE__, __FILE__) | ||
#define checkKernel(...) \ | ||
do { \ | ||
__VA_ARGS__; \ | ||
spconv::check_runtime(cudaPeekAtLastError(), #__VA_ARGS__, __LINE__, __FILE__); \ | ||
} while (0) | ||
#define dprintf(...) | ||
#endif | ||
|
||
#define Assertf(cond, fmt, ...) \ | ||
do { \ | ||
if (!(cond)) { \ | ||
fprintf(stderr, "Assert failed 💀. %s in file %s:%d, message: " fmt "\n", #cond, __FILE__, \ | ||
__LINE__, __VA_ARGS__); \ | ||
abort(); \ | ||
} \ | ||
} while (false) | ||
#define Asserts(cond, s) \ | ||
do { \ | ||
if (!(cond)) { \ | ||
fprintf(stderr, "Assert failed 💀. %s in file %s:%d, message: " s "\n", #cond, __FILE__, \ | ||
__LINE__); \ | ||
abort(); \ | ||
} \ | ||
} while (false) | ||
#define Assert(cond) \ | ||
do { \ | ||
if (!(cond)) { \ | ||
fprintf(stderr, "Assert failed 💀. %s in file %s:%d\n", #cond, __FILE__, __LINE__); \ | ||
abort(); \ | ||
} \ | ||
} while (false) | ||
|
||
static inline std::string format(const char *fmt, ...) { | ||
char buffer[2048]; | ||
va_list vl; | ||
va_start(vl, fmt); | ||
vsnprintf(buffer, sizeof(buffer), fmt, vl); | ||
return buffer; | ||
} | ||
|
||
static inline bool check_runtime(cudaError_t e, const char *call, int line, const char *file) { | ||
if (e != cudaSuccess) { | ||
fprintf(stderr, | ||
"CUDA Runtime error %s # %s, code = %s [ %d ] in file " | ||
"%s:%d\n", | ||
call, cudaGetErrorString(e), cudaGetErrorName(e), e, file, line); | ||
abort(); | ||
return false; | ||
} | ||
return true; | ||
} | ||
|
||
}; // namespace spconv | ||
|
||
#endif // #ifndef __SPCONV_CHECK_HPP__ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,117 @@ | ||
/* | ||
* SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. | ||
* SPDX-License-Identifier: MIT | ||
* | ||
* Permission is hereby granted, free of charge, to any person obtaining a | ||
* copy of this software and associated documentation files (the "Software"), | ||
* to deal in the Software without restriction, including without limitation | ||
* the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
* and/or sell copies of the Software, and to permit persons to whom the | ||
* Software is furnished to do so, subject to the following conditions: | ||
* | ||
* The above copyright notice and this permission notice shall be included in | ||
* all copies or substantial portions of the Software. | ||
* | ||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
* DEALINGS IN THE SOFTWARE. | ||
*/ | ||
|
||
#ifndef __SPCONV_ENGINE_HPP__ | ||
#define __SPCONV_ENGINE_HPP__ | ||
|
||
#include <memory> | ||
#include <string> | ||
#include <vector> | ||
|
||
namespace spconv { | ||
|
||
#define Exported __attribute__((visibility("default"))) | ||
|
||
enum class DType : int { None = 0, Int32 = 1, Float16 = 2 }; | ||
enum class Precision : int { None = 0, Float16 = 1, Int8 = 2 }; | ||
|
||
/** | ||
Storage of data tensor | ||
**/ | ||
class DTensor { | ||
public: | ||
virtual std::vector<int64_t> features_shape() const = 0; | ||
virtual DType features_dtype() const = 0; | ||
virtual void* features_data() = 0; | ||
|
||
virtual std::vector<int64_t> indices_shape() const = 0; | ||
virtual DType indices_dtype() const = 0; | ||
virtual void* indices_data() = 0; | ||
|
||
virtual std::vector<int> grid_size() const = 0; | ||
virtual int device() const = 0; | ||
}; | ||
|
||
/** | ||
Engine types for sparse convolution | ||
**/ | ||
class Engine { | ||
public: | ||
/** | ||
Inference function for sparse convolution | ||
features_shape: The shape of the input feature matrix, it must be two elements. | ||
features_dtype: The data type of the input feature matrix, it must be Float16 now. | ||
features_data: The data pointer of the input feature matrix | ||
indices_shape: The shape of the input indices matrix, it must be two elements[n, 4] | ||
indices_dtype: The data type of the input indices matrix, it must be Int32 now. | ||
indices_data: The data pointer of the input indices matrix | ||
batch: The batch size of the input, it must be 1 now. | ||
grid_size: The grid size of the input data, For example: 41,1440,1440 or 1440,1440,41 | ||
stream: Which stream is expected to enqueue the inference. | ||
**/ | ||
Exported virtual DTensor* forward(const std::vector<int64_t>& features_shape, | ||
DType features_dtype, void* features_data, | ||
const std::vector<int64_t>& indices_shape, DType indices_dtype, | ||
void* indices_data, int batch, std::vector<int> grid_size, | ||
void* stream = nullptr) = 0; | ||
|
||
// If you change the precision of a node after loading the model, you should call this function to | ||
// reconfigure it | ||
Exported virtual void reconfigure() = 0; | ||
|
||
// If you want to execute an implicit PTQ calibration, you can enable int8calibration by marking | ||
// it and collecting the maximum value of the tensor in the next forward. | ||
Exported virtual void set_int8_calibration(bool enable) = 0; | ||
|
||
// You can modify the precision of a node with this function, but don't forget to call reconfigure | ||
Exported virtual void set_node_precision_byname(const char* name, Precision compute_precision, | ||
Precision output_precision) = 0; | ||
Exported virtual void set_node_precision_byoptype(const char* optype, Precision compute_precision, | ||
Precision output_precision) = 0; | ||
}; | ||
|
||
/** | ||
Create an engine and load the weights from onnx file | ||
onnx_file: Store the onnx of model structure, please use tool/deploy/export-scn.py to export the | ||
corresponding onnx precision: What precision to use for model inference. For each layer's precision | ||
should be stored in the "precision" attribute of the layer | ||
- Model inference will ignore the "precision" attribute of each layer what if set to | ||
Float16 | ||
**/ | ||
Exported std::shared_ptr<Engine> load_engine_from_onnx(const std::string& onnx_file, | ||
Precision precision = Precision::Float16); | ||
|
||
/** | ||
Enable detailed information output | ||
enable: You should set this to true if you want to debug the model inference process. default: | ||
false | ||
*/ | ||
Exported void set_verbose(bool enable); | ||
Exported const char* get_precision_string(Precision precision); | ||
|
||
}; // namespace spconv | ||
|
||
#endif // #ifndef __SPCONV_ENGINE_HPP__ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,135 @@ | ||
/* | ||
* SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. | ||
* SPDX-License-Identifier: MIT | ||
* | ||
* Permission is hereby granted, free of charge, to any person obtaining a | ||
* copy of this software and associated documentation files (the "Software"), | ||
* to deal in the Software without restriction, including without limitation | ||
* the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
* and/or sell copies of the Software, and to permit persons to whom the | ||
* Software is furnished to do so, subject to the following conditions: | ||
* | ||
* The above copyright notice and this permission notice shall be included in | ||
* all copies or substantial portions of the Software. | ||
* | ||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
* DEALINGS IN THE SOFTWARE. | ||
*/ | ||
|
||
#ifndef __SPCONV_MEMORY_HPP__ | ||
#define __SPCONV_MEMORY_HPP__ | ||
|
||
#include <memory> | ||
#include <string> | ||
#include <unordered_map> | ||
|
||
#include "check.hpp" | ||
|
||
namespace spconv { | ||
|
||
class GPUData { | ||
public: | ||
inline void *ptr() const { return ptr_; } | ||
inline size_t bytes() const { return bytes_; } | ||
inline bool empty() const { return ptr_ == nullptr; } | ||
virtual ~GPUData() { free_memory(); } | ||
GPUData() = default; | ||
GPUData(const std::string &name) { this->name_ = name; } | ||
|
||
void alloc_or_resize_to(size_t nbytes) { | ||
if (capacity_ < nbytes) { | ||
dprintf("%s Free old %d, malloc new %d bytes.\n", name_.c_str(), capacity_, nbytes); | ||
free_memory(); | ||
checkRuntime(cudaMalloc(&ptr_, nbytes)); | ||
capacity_ = nbytes; | ||
} | ||
bytes_ = nbytes; | ||
} | ||
|
||
void alloc(size_t nbytes) { alloc_or_resize_to(nbytes); } | ||
|
||
void resize(size_t nbytes) { | ||
if (capacity_ < nbytes) { | ||
Assertf(false, "%s Failed to resize memory to %ld bytes. capacity = %ld", name_.c_str(), | ||
nbytes, capacity_); | ||
} | ||
bytes_ = nbytes; | ||
} | ||
|
||
void free_memory() { | ||
if (ptr_) { | ||
checkRuntime(cudaFree(ptr_)); | ||
ptr_ = nullptr; | ||
capacity_ = 0; | ||
bytes_ = 0; | ||
} | ||
} | ||
|
||
private: | ||
void *ptr_ = nullptr; | ||
size_t bytes_ = 0; | ||
size_t capacity_ = 0; | ||
std::string name_; | ||
}; | ||
|
||
template <typename T> | ||
class GPUMemory { | ||
public: | ||
T *ptr() const { return data_ ? (T *)data_->ptr() : nullptr; } | ||
size_t size() const { return size_; } | ||
size_t bytes() const { return data_ ? data_->bytes() : 0; } | ||
bool empty() const { return data_ == nullptr || data_->empty(); } | ||
bool unset() const { return data_ == nullptr; } | ||
// GPUMemory() { data_.reset(new GPUData()); } | ||
virtual ~GPUMemory() { data_.reset(); } | ||
void set_gpudata(std::shared_ptr<GPUData> data) { this->data_ = data; } | ||
|
||
void alloc_or_resize_to(size_t size) { | ||
if (data_) { | ||
size_ = size; | ||
data_->alloc_or_resize_to(size * sizeof(T)); | ||
} else { | ||
Asserts(false, "Failed to alloc or resize memory that because data is nullptr."); | ||
} | ||
} | ||
|
||
void alloc(size_t size) { alloc_or_resize_to(size); } | ||
|
||
void resize(size_t size) { | ||
if (data_) { | ||
size_ = size; | ||
data_->resize(size * sizeof(T)); | ||
} else { | ||
Asserts(false, "Failed to resize memory that because data is nullptr."); | ||
} | ||
} | ||
|
||
private: | ||
std::shared_ptr<GPUData> data_; | ||
size_t size_ = 0; | ||
}; | ||
|
||
class GPUDataManager { | ||
public: | ||
std::shared_ptr<GPUData> query_or_alloc(const std::string &tensor_id, | ||
const std::string &subname = "default") { | ||
std::shared_ptr<GPUData> &output = data_dict_[tensor_id][subname]; | ||
if (output == nullptr) { | ||
output.reset(new GPUData(tensor_id + "." + subname)); | ||
} | ||
return output; | ||
} | ||
|
||
private: | ||
std::unordered_map<std::string, std::unordered_map<std::string, std::shared_ptr<GPUData>>> | ||
data_dict_; | ||
}; | ||
|
||
}; // namespace spconv | ||
|
||
#endif // #ifndef __SPCONV_MEMORY_HPP__ |
Oops, something went wrong.