diff --git a/linux/CMakeLists.txt b/linux/CMakeLists.txt new file mode 100644 index 0000000..ec1cc44 --- /dev/null +++ b/linux/CMakeLists.txt @@ -0,0 +1,48 @@ +cmake_minimum_required(VERSION 3.10) + +set(ProjectName "DragGan-NCNN") +project(${ProjectName}) +message("ProjectName: ${ProjectName}") + +set(CMAKE_BUILD_TYPE Release) +set(CMAKE_AUTOMOC ON) +set(CMAKE_AUTORCC ON) +set(CMAKE_AUTOUIC ON) +set(ncnn_DIR ${PROJECT_SOURCE_DIR}/lib/cmake/ncnn) +# add your own path to qt5 +set(CMAKE_PREFIX_PATH "[PATH TO]/qt5/5.14.2/gcc_64/lib/cmake/") + +add_definitions("-fPIC") + + +find_package(ncnn REQUIRED) +find_package(Qt5 COMPONENTS + Core + Gui + Widgets + REQUIRED) + +find_package(OpenCV REQUIRED) + + +add_executable(${ProjectName} + main.cpp + mainwindow.cpp + mainwindow.h + mainwindow.ui + mainwindow.qrc +) + +target_include_directories(${ProjectName} PUBLIC + ${OpenCV_INCLUDE_DIRS} + ${PROJECT_SOURCE_DIR}/include +) + +target_link_libraries(${ProjectName} + ${OpenCV_LIBS} + ncnn + Qt5::Core + Qt5::Gui + Qt5::Widgets +) + diff --git a/linux/DragGan-NCNN b/linux/DragGan-NCNN new file mode 100755 index 0000000..cf340e3 Binary files /dev/null and b/linux/DragGan-NCNN differ diff --git a/linux/assets/PUT *.bin & *.param FILES HERE b/linux/assets/PUT *.bin & *.param FILES HERE new file mode 100644 index 0000000..e69de29 diff --git a/linux/include/ncnn/allocator.h b/linux/include/ncnn/allocator.h new file mode 100644 index 0000000..3a5ebca --- /dev/null +++ b/linux/include/ncnn/allocator.h @@ -0,0 +1,448 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_ALLOCATOR_H +#define NCNN_ALLOCATOR_H + +#ifdef _WIN32 +#define WIN32_LEAN_AND_MEAN +#include +#endif + +#include "platform.h" + +#include + +#if NCNN_VULKAN +#include +#endif // NCNN_VULKAN + +#if NCNN_PLATFORM_API +#if __ANDROID_API__ >= 26 +#include +#endif // __ANDROID_API__ >= 26 +#endif // NCNN_PLATFORM_API + +namespace ncnn { + +// the alignment of all the allocated buffers +#if NCNN_AVX512 +#define NCNN_MALLOC_ALIGN 64 +#elif NCNN_AVX +#define NCNN_MALLOC_ALIGN 32 +#else +#define NCNN_MALLOC_ALIGN 16 +#endif + +// we have some optimized kernels that may overread buffer a bit in loop +// it is common to interleave next-loop data load with arithmetic instructions +// allocating more bytes keeps us safe from SEGV_ACCERR failure +#define NCNN_MALLOC_OVERREAD 64 + +// Aligns a pointer to the specified number of bytes +// ptr Aligned pointer +// n Alignment size that must be a power of two +template +static NCNN_FORCEINLINE _Tp* alignPtr(_Tp* ptr, int n = (int)sizeof(_Tp)) +{ + return (_Tp*)(((size_t)ptr + n - 1) & -n); +} + +// Aligns a buffer size to the specified number of bytes +// The function returns the minimum number that is greater or equal to sz and is divisible by n +// sz Buffer size to align +// n Alignment size that must be a power of two +static NCNN_FORCEINLINE size_t alignSize(size_t sz, int n) +{ + return (sz + n - 1) & -n; +} + +static NCNN_FORCEINLINE void* fastMalloc(size_t size) +{ +#if _MSC_VER + return _aligned_malloc(size, NCNN_MALLOC_ALIGN); +#elif (defined(__unix__) || defined(__APPLE__)) && _POSIX_C_SOURCE >= 200112L || (__ANDROID__ && __ANDROID_API__ >= 17) + void* ptr = 0; + if (posix_memalign(&ptr, NCNN_MALLOC_ALIGN, size + NCNN_MALLOC_OVERREAD)) + ptr = 0; + return ptr; +#elif __ANDROID__ && __ANDROID_API__ < 17 + return memalign(NCNN_MALLOC_ALIGN, size + NCNN_MALLOC_OVERREAD); +#else + unsigned char* udata = (unsigned char*)malloc(size + sizeof(void*) + NCNN_MALLOC_ALIGN + NCNN_MALLOC_OVERREAD); + if (!udata) + return 0; + unsigned char** adata = alignPtr((unsigned char**)udata + 1, NCNN_MALLOC_ALIGN); + adata[-1] = udata; + return adata; +#endif +} + +static NCNN_FORCEINLINE void fastFree(void* ptr) +{ + if (ptr) + { +#if _MSC_VER + _aligned_free(ptr); +#elif (defined(__unix__) || defined(__APPLE__)) && _POSIX_C_SOURCE >= 200112L || (__ANDROID__ && __ANDROID_API__ >= 17) + free(ptr); +#elif __ANDROID__ && __ANDROID_API__ < 17 + free(ptr); +#else + unsigned char* udata = ((unsigned char**)ptr)[-1]; + free(udata); +#endif + } +} + +#if NCNN_THREADS +// exchange-add operation for atomic operations on reference counters +#if defined __riscv && !defined __riscv_atomic +// riscv target without A extension +static NCNN_FORCEINLINE int NCNN_XADD(int* addr, int delta) +{ + int tmp = *addr; + *addr += delta; + return tmp; +} +#elif defined __INTEL_COMPILER && !(defined WIN32 || defined _WIN32) +// atomic increment on the linux version of the Intel(tm) compiler +#define NCNN_XADD(addr, delta) (int)_InterlockedExchangeAdd(const_cast(reinterpret_cast(addr)), delta) +#elif defined __GNUC__ +#if defined __clang__ && __clang_major__ >= 3 && !defined __ANDROID__ && !defined __EMSCRIPTEN__ && !defined(__CUDACC__) +#ifdef __ATOMIC_ACQ_REL +#define NCNN_XADD(addr, delta) __c11_atomic_fetch_add((_Atomic(int)*)(addr), delta, __ATOMIC_ACQ_REL) +#else +#define NCNN_XADD(addr, delta) __atomic_fetch_add((_Atomic(int)*)(addr), delta, 4) +#endif +#else +#if defined __ATOMIC_ACQ_REL && !defined __clang__ +// version for gcc >= 4.7 +#define NCNN_XADD(addr, delta) (int)__atomic_fetch_add((unsigned*)(addr), (unsigned)(delta), __ATOMIC_ACQ_REL) +#else +#define NCNN_XADD(addr, delta) (int)__sync_fetch_and_add((unsigned*)(addr), (unsigned)(delta)) +#endif +#endif +#elif defined _MSC_VER && !defined RC_INVOKED +#define NCNN_XADD(addr, delta) (int)_InterlockedExchangeAdd((long volatile*)addr, delta) +#else +// thread-unsafe branch +static NCNN_FORCEINLINE int NCNN_XADD(int* addr, int delta) +{ + int tmp = *addr; + *addr += delta; + return tmp; +} +#endif +#else // NCNN_THREADS +static NCNN_FORCEINLINE int NCNN_XADD(int* addr, int delta) +{ + int tmp = *addr; + *addr += delta; + return tmp; +} +#endif // NCNN_THREADS + +class NCNN_EXPORT Allocator +{ +public: + virtual ~Allocator(); + virtual void* fastMalloc(size_t size) = 0; + virtual void fastFree(void* ptr) = 0; +}; + +class PoolAllocatorPrivate; +class NCNN_EXPORT PoolAllocator : public Allocator +{ +public: + PoolAllocator(); + ~PoolAllocator(); + + // ratio range 0 ~ 1 + // default cr = 0 + void set_size_compare_ratio(float scr); + + // budget drop threshold + // default threshold = 10 + void set_size_drop_threshold(size_t); + + // release all budgets immediately + void clear(); + + virtual void* fastMalloc(size_t size); + virtual void fastFree(void* ptr); + +private: + PoolAllocator(const PoolAllocator&); + PoolAllocator& operator=(const PoolAllocator&); + +private: + PoolAllocatorPrivate* const d; +}; + +class UnlockedPoolAllocatorPrivate; +class NCNN_EXPORT UnlockedPoolAllocator : public Allocator +{ +public: + UnlockedPoolAllocator(); + ~UnlockedPoolAllocator(); + + // ratio range 0 ~ 1 + // default cr = 0 + void set_size_compare_ratio(float scr); + + // budget drop threshold + // default threshold = 10 + void set_size_drop_threshold(size_t); + + // release all budgets immediately + void clear(); + + virtual void* fastMalloc(size_t size); + virtual void fastFree(void* ptr); + +private: + UnlockedPoolAllocator(const UnlockedPoolAllocator&); + UnlockedPoolAllocator& operator=(const UnlockedPoolAllocator&); + +private: + UnlockedPoolAllocatorPrivate* const d; +}; + +#if NCNN_VULKAN + +class VulkanDevice; + +class NCNN_EXPORT VkBufferMemory +{ +public: + VkBuffer buffer; + + // the base offset assigned by allocator + size_t offset; + size_t capacity; + + VkDeviceMemory memory; + void* mapped_ptr; + + // buffer state, modified by command functions internally + mutable VkAccessFlags access_flags; + mutable VkPipelineStageFlags stage_flags; + + // initialize and modified by mat + int refcount; +}; + +class NCNN_EXPORT VkImageMemory +{ +public: + VkImage image; + VkImageView imageview; + + // underlying info assigned by allocator + int width; + int height; + int depth; + VkFormat format; + + VkDeviceMemory memory; + void* mapped_ptr; + + // the base offset assigned by allocator + size_t bind_offset; + size_t bind_capacity; + + // image state, modified by command functions internally + mutable VkAccessFlags access_flags; + mutable VkImageLayout image_layout; + mutable VkPipelineStageFlags stage_flags; + + // in-execution state, modified by command functions internally + mutable int command_refcount; + + // initialize and modified by mat + int refcount; +}; + +class NCNN_EXPORT VkAllocator +{ +public: + explicit VkAllocator(const VulkanDevice* _vkdev); + virtual ~VkAllocator(); + + virtual void clear(); + + virtual VkBufferMemory* fastMalloc(size_t size) = 0; + virtual void fastFree(VkBufferMemory* ptr) = 0; + virtual int flush(VkBufferMemory* ptr); + virtual int invalidate(VkBufferMemory* ptr); + + virtual VkImageMemory* fastMalloc(int w, int h, int c, size_t elemsize, int elempack) = 0; + virtual void fastFree(VkImageMemory* ptr) = 0; + +public: + const VulkanDevice* vkdev; + uint32_t buffer_memory_type_index; + uint32_t image_memory_type_index; + uint32_t reserved_type_index; + bool mappable; + bool coherent; + +protected: + VkBuffer create_buffer(size_t size, VkBufferUsageFlags usage); + VkDeviceMemory allocate_memory(size_t size, uint32_t memory_type_index); + VkDeviceMemory allocate_dedicated_memory(size_t size, uint32_t memory_type_index, VkImage image, VkBuffer buffer); + + VkImage create_image(int width, int height, int depth, VkFormat format, VkImageTiling tiling, VkImageUsageFlags usage); + VkImageView create_imageview(VkImage image, VkFormat format); +}; + +class VkBlobAllocatorPrivate; +class NCNN_EXPORT VkBlobAllocator : public VkAllocator +{ +public: + explicit VkBlobAllocator(const VulkanDevice* vkdev, size_t preferred_block_size = 16 * 1024 * 1024); // 16M + virtual ~VkBlobAllocator(); + +public: + // release all budgets immediately + virtual void clear(); + + virtual VkBufferMemory* fastMalloc(size_t size); + virtual void fastFree(VkBufferMemory* ptr); + virtual VkImageMemory* fastMalloc(int w, int h, int c, size_t elemsize, int elempack); + virtual void fastFree(VkImageMemory* ptr); + +private: + VkBlobAllocator(const VkBlobAllocator&); + VkBlobAllocator& operator=(const VkBlobAllocator&); + +private: + VkBlobAllocatorPrivate* const d; +}; + +class VkWeightAllocatorPrivate; +class NCNN_EXPORT VkWeightAllocator : public VkAllocator +{ +public: + explicit VkWeightAllocator(const VulkanDevice* vkdev, size_t preferred_block_size = 8 * 1024 * 1024); // 8M + virtual ~VkWeightAllocator(); + +public: + // release all blocks immediately + virtual void clear(); + +public: + virtual VkBufferMemory* fastMalloc(size_t size); + virtual void fastFree(VkBufferMemory* ptr); + virtual VkImageMemory* fastMalloc(int w, int h, int c, size_t elemsize, int elempack); + virtual void fastFree(VkImageMemory* ptr); + +private: + VkWeightAllocator(const VkWeightAllocator&); + VkWeightAllocator& operator=(const VkWeightAllocator&); + +private: + VkWeightAllocatorPrivate* const d; +}; + +class VkStagingAllocatorPrivate; +class NCNN_EXPORT VkStagingAllocator : public VkAllocator +{ +public: + explicit VkStagingAllocator(const VulkanDevice* vkdev); + virtual ~VkStagingAllocator(); + +public: + // ratio range 0 ~ 1 + // default cr = 0.75 + void set_size_compare_ratio(float scr); + + // release all budgets immediately + virtual void clear(); + + virtual VkBufferMemory* fastMalloc(size_t size); + virtual void fastFree(VkBufferMemory* ptr); + virtual VkImageMemory* fastMalloc(int w, int h, int c, size_t elemsize, int elempack); + virtual void fastFree(VkImageMemory* ptr); + +private: + VkStagingAllocator(const VkStagingAllocator&); + VkStagingAllocator& operator=(const VkStagingAllocator&); + +private: + VkStagingAllocatorPrivate* const d; +}; + +class VkWeightStagingAllocatorPrivate; +class NCNN_EXPORT VkWeightStagingAllocator : public VkAllocator +{ +public: + explicit VkWeightStagingAllocator(const VulkanDevice* vkdev); + virtual ~VkWeightStagingAllocator(); + +public: + virtual VkBufferMemory* fastMalloc(size_t size); + virtual void fastFree(VkBufferMemory* ptr); + virtual VkImageMemory* fastMalloc(int w, int h, int c, size_t elemsize, int elempack); + virtual void fastFree(VkImageMemory* ptr); + +private: + VkWeightStagingAllocator(const VkWeightStagingAllocator&); + VkWeightStagingAllocator& operator=(const VkWeightStagingAllocator&); + +private: + VkWeightStagingAllocatorPrivate* const d; +}; + +#if NCNN_PLATFORM_API +#if __ANDROID_API__ >= 26 +class NCNN_EXPORT VkAndroidHardwareBufferImageAllocator : public VkAllocator +{ +public: + VkAndroidHardwareBufferImageAllocator(const VulkanDevice* _vkdev, AHardwareBuffer* _hb); + virtual ~VkAndroidHardwareBufferImageAllocator(); + +public: + virtual VkBufferMemory* fastMalloc(size_t size); + virtual void fastFree(VkBufferMemory* ptr); + virtual VkImageMemory* fastMalloc(int w, int h, int c, size_t elemsize, int elempack); + virtual void fastFree(VkImageMemory* ptr); + +private: + VkAndroidHardwareBufferImageAllocator(const VkAndroidHardwareBufferImageAllocator&); + VkAndroidHardwareBufferImageAllocator& operator=(const VkAndroidHardwareBufferImageAllocator&); + +public: + int init(); + + int width() const; + int height() const; + uint64_t external_format() const; + +public: + AHardwareBuffer* hb; + AHardwareBuffer_Desc bufferDesc; + VkAndroidHardwareBufferFormatPropertiesANDROID bufferFormatProperties; + VkAndroidHardwareBufferPropertiesANDROID bufferProperties; + VkSamplerYcbcrConversionKHR samplerYcbcrConversion; +}; +#endif // __ANDROID_API__ >= 26 +#endif // NCNN_PLATFORM_API + +#endif // NCNN_VULKAN + +} // namespace ncnn + +#endif // NCNN_ALLOCATOR_H diff --git a/linux/include/ncnn/benchmark.h b/linux/include/ncnn/benchmark.h new file mode 100644 index 0000000..ed42c1a --- /dev/null +++ b/linux/include/ncnn/benchmark.h @@ -0,0 +1,39 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_BENCHMARK_H +#define NCNN_BENCHMARK_H + +#include "layer.h" +#include "mat.h" +#include "platform.h" + +namespace ncnn { + +// get now timestamp in ms +NCNN_EXPORT double get_current_time(); + +// sleep milliseconds +NCNN_EXPORT void sleep(unsigned long long int milliseconds = 1000); + +#if NCNN_BENCHMARK + +NCNN_EXPORT void benchmark(const Layer* layer, double start, double end); +NCNN_EXPORT void benchmark(const Layer* layer, const Mat& bottom_blob, Mat& top_blob, double start, double end); + +#endif // NCNN_BENCHMARK + +} // namespace ncnn + +#endif // NCNN_BENCHMARK_H diff --git a/linux/include/ncnn/blob.h b/linux/include/ncnn/blob.h new file mode 100644 index 0000000..c9f144f --- /dev/null +++ b/linux/include/ncnn/blob.h @@ -0,0 +1,44 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_BLOB_H +#define NCNN_BLOB_H + +#include "mat.h" +#include "platform.h" + +namespace ncnn { + +class NCNN_EXPORT Blob +{ +public: + // empty + Blob(); + +public: +#if NCNN_STRING + // blob name + std::string name; +#endif // NCNN_STRING + // layer index which produce this blob as output + int producer; + // layer index which need this blob as input + int consumer; + // shape hint + Mat shape; +}; + +} // namespace ncnn + +#endif // NCNN_BLOB_H diff --git a/linux/include/ncnn/c_api.h b/linux/include/ncnn/c_api.h new file mode 100644 index 0000000..31d5b6d --- /dev/null +++ b/linux/include/ncnn/c_api.h @@ -0,0 +1,347 @@ +/* Tencent is pleased to support the open source community by making ncnn available. + * + * Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved. + * + * Licensed under the BSD 3-Clause License (the "License"); you may not use this file except + * in compliance with the License. You may obtain a copy of the License at + * + * https://opensource.org/licenses/BSD-3-Clause + * + * Unless required by applicable law or agreed to in writing, software distributed + * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR + * CONDITIONS OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + */ + +#ifndef NCNN_C_API_H +#define NCNN_C_API_H + +#include "platform.h" + +#if NCNN_C_API + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +NCNN_EXPORT const char* ncnn_version(); + +/* allocator api */ +typedef struct __ncnn_allocator_t* ncnn_allocator_t; +struct NCNN_EXPORT __ncnn_allocator_t +{ + void* pthis; + + void* (*fast_malloc)(ncnn_allocator_t allocator, size_t size); + void (*fast_free)(ncnn_allocator_t allocator, void* ptr); +}; + +NCNN_EXPORT ncnn_allocator_t ncnn_allocator_create_pool_allocator(); +NCNN_EXPORT ncnn_allocator_t ncnn_allocator_create_unlocked_pool_allocator(); +NCNN_EXPORT void ncnn_allocator_destroy(ncnn_allocator_t allocator); + +/* option api */ +typedef struct __ncnn_option_t* ncnn_option_t; + +NCNN_EXPORT ncnn_option_t ncnn_option_create(); +NCNN_EXPORT void ncnn_option_destroy(ncnn_option_t opt); + +NCNN_EXPORT int ncnn_option_get_num_threads(const ncnn_option_t opt); +NCNN_EXPORT void ncnn_option_set_num_threads(ncnn_option_t opt, int num_threads); + +NCNN_EXPORT int ncnn_option_get_use_local_pool_allocator(const ncnn_option_t opt); +NCNN_EXPORT void ncnn_option_set_use_local_pool_allocator(ncnn_option_t opt, int use_local_pool_allocator); + +NCNN_EXPORT void ncnn_option_set_blob_allocator(ncnn_option_t opt, ncnn_allocator_t allocator); +NCNN_EXPORT void ncnn_option_set_workspace_allocator(ncnn_option_t opt, ncnn_allocator_t allocator); + +NCNN_EXPORT int ncnn_option_get_use_vulkan_compute(const ncnn_option_t opt); +NCNN_EXPORT void ncnn_option_set_use_vulkan_compute(ncnn_option_t opt, int use_vulkan_compute); + +/* mat api */ +typedef struct __ncnn_mat_t* ncnn_mat_t; + +NCNN_EXPORT ncnn_mat_t ncnn_mat_create(); +NCNN_EXPORT ncnn_mat_t ncnn_mat_create_1d(int w, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_create_2d(int w, int h, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_create_3d(int w, int h, int c, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_create_4d(int w, int h, int d, int c, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_create_external_1d(int w, void* data, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_create_external_2d(int w, int h, void* data, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_create_external_3d(int w, int h, int c, void* data, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_create_external_4d(int w, int h, int d, int c, void* data, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_create_1d_elem(int w, size_t elemsize, int elempack, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_create_2d_elem(int w, int h, size_t elemsize, int elempack, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_create_3d_elem(int w, int h, int c, size_t elemsize, int elempack, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_create_4d_elem(int w, int h, int d, int c, size_t elemsize, int elempack, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_create_external_1d_elem(int w, void* data, size_t elemsize, int elempack, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_create_external_2d_elem(int w, int h, void* data, size_t elemsize, int elempack, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_create_external_3d_elem(int w, int h, int c, void* data, size_t elemsize, int elempack, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_create_external_4d_elem(int w, int h, int d, int c, void* data, size_t elemsize, int elempack, ncnn_allocator_t allocator); +NCNN_EXPORT void ncnn_mat_destroy(ncnn_mat_t mat); + +NCNN_EXPORT void ncnn_mat_fill_float(ncnn_mat_t mat, float v); + +NCNN_EXPORT ncnn_mat_t ncnn_mat_clone(const ncnn_mat_t mat, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_reshape_1d(const ncnn_mat_t mat, int w, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_reshape_2d(const ncnn_mat_t mat, int w, int h, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_reshape_3d(const ncnn_mat_t mat, int w, int h, int c, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_reshape_4d(const ncnn_mat_t mat, int w, int h, int d, int c, ncnn_allocator_t allocator); + +NCNN_EXPORT int ncnn_mat_get_dims(const ncnn_mat_t mat); +NCNN_EXPORT int ncnn_mat_get_w(const ncnn_mat_t mat); +NCNN_EXPORT int ncnn_mat_get_h(const ncnn_mat_t mat); +NCNN_EXPORT int ncnn_mat_get_d(const ncnn_mat_t mat); +NCNN_EXPORT int ncnn_mat_get_c(const ncnn_mat_t mat); +NCNN_EXPORT size_t ncnn_mat_get_elemsize(const ncnn_mat_t mat); +NCNN_EXPORT int ncnn_mat_get_elempack(const ncnn_mat_t mat); +NCNN_EXPORT size_t ncnn_mat_get_cstep(const ncnn_mat_t mat); +NCNN_EXPORT void* ncnn_mat_get_data(const ncnn_mat_t mat); + +NCNN_EXPORT void* ncnn_mat_get_channel_data(const ncnn_mat_t mat, int c); + +#if NCNN_PIXEL + +/* mat pixel api */ +#define NCNN_MAT_PIXEL_RGB 1 +#define NCNN_MAT_PIXEL_BGR 2 +#define NCNN_MAT_PIXEL_GRAY 3 +#define NCNN_MAT_PIXEL_RGBA 4 +#define NCNN_MAT_PIXEL_BGRA 5 +#define NCNN_MAT_PIXEL_X2Y(X, Y) (X | (Y << 16)) +NCNN_EXPORT ncnn_mat_t ncnn_mat_from_pixels(const unsigned char* pixels, int type, int w, int h, int stride, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_from_pixels_resize(const unsigned char* pixels, int type, int w, int h, int stride, int target_width, int target_height, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_from_pixels_roi(const unsigned char* pixels, int type, int w, int h, int stride, int roix, int roiy, int roiw, int roih, ncnn_allocator_t allocator); +NCNN_EXPORT ncnn_mat_t ncnn_mat_from_pixels_roi_resize(const unsigned char* pixels, int type, int w, int h, int stride, int roix, int roiy, int roiw, int roih, int target_width, int target_height, ncnn_allocator_t allocator); +NCNN_EXPORT void ncnn_mat_to_pixels(const ncnn_mat_t mat, unsigned char* pixels, int type, int stride); +NCNN_EXPORT void ncnn_mat_to_pixels_resize(const ncnn_mat_t mat, unsigned char* pixels, int type, int target_width, int target_height, int target_stride); + +#endif /* NCNN_PIXEL */ + +NCNN_EXPORT void ncnn_mat_substract_mean_normalize(ncnn_mat_t mat, const float* mean_vals, const float* norm_vals); + +NCNN_EXPORT void ncnn_convert_packing(const ncnn_mat_t src, ncnn_mat_t* dst, int elempack, const ncnn_option_t opt); +NCNN_EXPORT void ncnn_flatten(const ncnn_mat_t src, ncnn_mat_t* dst, const ncnn_option_t opt); + +/* blob api */ +typedef struct __ncnn_blob_t* ncnn_blob_t; + +#if NCNN_STRING +NCNN_EXPORT const char* ncnn_blob_get_name(const ncnn_blob_t blob); +#endif /* NCNN_STRING */ + +NCNN_EXPORT int ncnn_blob_get_producer(const ncnn_blob_t blob); +NCNN_EXPORT int ncnn_blob_get_consumer(const ncnn_blob_t blob); + +NCNN_EXPORT void ncnn_blob_get_shape(const ncnn_blob_t blob, int* dims, int* w, int* h, int* c); + +/* paramdict api */ +typedef struct __ncnn_paramdict_t* ncnn_paramdict_t; + +NCNN_EXPORT ncnn_paramdict_t ncnn_paramdict_create(); +NCNN_EXPORT void ncnn_paramdict_destroy(ncnn_paramdict_t pd); + +NCNN_EXPORT int ncnn_paramdict_get_type(const ncnn_paramdict_t pd, int id); + +NCNN_EXPORT int ncnn_paramdict_get_int(const ncnn_paramdict_t pd, int id, int def); +NCNN_EXPORT float ncnn_paramdict_get_float(const ncnn_paramdict_t pd, int id, float def); +NCNN_EXPORT ncnn_mat_t ncnn_paramdict_get_array(const ncnn_paramdict_t pd, int id, const ncnn_mat_t def); + +NCNN_EXPORT void ncnn_paramdict_set_int(ncnn_paramdict_t pd, int id, int i); +NCNN_EXPORT void ncnn_paramdict_set_float(ncnn_paramdict_t pd, int id, float f); +NCNN_EXPORT void ncnn_paramdict_set_array(ncnn_paramdict_t pd, int id, const ncnn_mat_t v); + +/* datareader api */ +typedef struct __ncnn_datareader_t* ncnn_datareader_t; +struct NCNN_EXPORT __ncnn_datareader_t +{ + void* pthis; + +#if NCNN_STRING + int (*scan)(ncnn_datareader_t dr, const char* format, void* p); +#endif /* NCNN_STRING */ + size_t (*read)(ncnn_datareader_t dr, void* buf, size_t size); +}; + +NCNN_EXPORT ncnn_datareader_t ncnn_datareader_create(); +#if NCNN_STDIO +NCNN_EXPORT ncnn_datareader_t ncnn_datareader_create_from_stdio(FILE* fp); +#endif /* NCNN_STDIO */ +NCNN_EXPORT ncnn_datareader_t ncnn_datareader_create_from_memory(const unsigned char** mem); +NCNN_EXPORT void ncnn_datareader_destroy(ncnn_datareader_t dr); + +/* modelbin api */ +typedef struct __ncnn_modelbin_t* ncnn_modelbin_t; +struct NCNN_EXPORT __ncnn_modelbin_t +{ + void* pthis; + + ncnn_mat_t (*load_1d)(const ncnn_modelbin_t mb, int w, int type); + ncnn_mat_t (*load_2d)(const ncnn_modelbin_t mb, int w, int h, int type); + ncnn_mat_t (*load_3d)(const ncnn_modelbin_t mb, int w, int h, int c, int type); +}; + +NCNN_EXPORT ncnn_modelbin_t ncnn_modelbin_create_from_datareader(const ncnn_datareader_t dr); +NCNN_EXPORT ncnn_modelbin_t ncnn_modelbin_create_from_mat_array(const ncnn_mat_t* weights, int n); +NCNN_EXPORT void ncnn_modelbin_destroy(ncnn_modelbin_t mb); + +/* layer api */ +typedef struct __ncnn_layer_t* ncnn_layer_t; +struct NCNN_EXPORT __ncnn_layer_t +{ + void* pthis; + + int (*load_param)(ncnn_layer_t layer, const ncnn_paramdict_t pd); + int (*load_model)(ncnn_layer_t layer, const ncnn_modelbin_t mb); + + int (*create_pipeline)(ncnn_layer_t layer, const ncnn_option_t opt); + int (*destroy_pipeline)(ncnn_layer_t layer, const ncnn_option_t opt); + + int (*forward_1)(const ncnn_layer_t layer, const ncnn_mat_t bottom_blob, ncnn_mat_t* top_blob, const ncnn_option_t opt); + int (*forward_n)(const ncnn_layer_t layer, const ncnn_mat_t* bottom_blobs, int n, ncnn_mat_t* top_blobs, int n2, const ncnn_option_t opt); + + int (*forward_inplace_1)(const ncnn_layer_t layer, ncnn_mat_t bottom_top_blob, const ncnn_option_t opt); + int (*forward_inplace_n)(const ncnn_layer_t layer, ncnn_mat_t* bottom_top_blobs, int n, const ncnn_option_t opt); +}; + +NCNN_EXPORT ncnn_layer_t ncnn_layer_create(); +NCNN_EXPORT ncnn_layer_t ncnn_layer_create_by_typeindex(int typeindex); +#if NCNN_STRING +NCNN_EXPORT ncnn_layer_t ncnn_layer_create_by_type(const char* type); +NCNN_EXPORT int ncnn_layer_type_to_index(const char* type); +#endif /* NCNN_STRING */ +NCNN_EXPORT void ncnn_layer_destroy(ncnn_layer_t layer); + +#if NCNN_STRING +NCNN_EXPORT const char* ncnn_layer_get_name(const ncnn_layer_t layer); +#endif /* NCNN_STRING */ + +NCNN_EXPORT int ncnn_layer_get_typeindex(const ncnn_layer_t layer); +#if NCNN_STRING +NCNN_EXPORT const char* ncnn_layer_get_type(const ncnn_layer_t layer); +#endif /* NCNN_STRING */ + +NCNN_EXPORT int ncnn_layer_get_one_blob_only(const ncnn_layer_t layer); +NCNN_EXPORT int ncnn_layer_get_support_inplace(const ncnn_layer_t layer); +NCNN_EXPORT int ncnn_layer_get_support_vulkan(const ncnn_layer_t layer); +NCNN_EXPORT int ncnn_layer_get_support_packing(const ncnn_layer_t layer); +NCNN_EXPORT int ncnn_layer_get_support_bf16_storage(const ncnn_layer_t layer); +NCNN_EXPORT int ncnn_layer_get_support_fp16_storage(const ncnn_layer_t layer); +NCNN_EXPORT int ncnn_layer_get_support_image_storage(const ncnn_layer_t layer); + +NCNN_EXPORT void ncnn_layer_set_one_blob_only(ncnn_layer_t layer, int enable); +NCNN_EXPORT void ncnn_layer_set_support_inplace(ncnn_layer_t layer, int enable); +NCNN_EXPORT void ncnn_layer_set_support_vulkan(ncnn_layer_t layer, int enable); +NCNN_EXPORT void ncnn_layer_set_support_packing(ncnn_layer_t layer, int enable); +NCNN_EXPORT void ncnn_layer_set_support_bf16_storage(ncnn_layer_t layer, int enable); +NCNN_EXPORT void ncnn_layer_set_support_fp16_storage(ncnn_layer_t layer, int enable); +NCNN_EXPORT void ncnn_layer_set_support_image_storage(ncnn_layer_t layer, int enable); + +NCNN_EXPORT int ncnn_layer_get_bottom_count(const ncnn_layer_t layer); +NCNN_EXPORT int ncnn_layer_get_bottom(const ncnn_layer_t layer, int i); +NCNN_EXPORT int ncnn_layer_get_top_count(const ncnn_layer_t layer); +NCNN_EXPORT int ncnn_layer_get_top(const ncnn_layer_t layer, int i); + +NCNN_EXPORT void ncnn_blob_get_bottom_shape(const ncnn_layer_t layer, int i, int* dims, int* w, int* h, int* c); +NCNN_EXPORT void ncnn_blob_get_top_shape(const ncnn_layer_t layer, int i, int* dims, int* w, int* h, int* c); + +/* layer factory function */ +typedef ncnn_layer_t (*ncnn_layer_creator_t)(void* userdata); +typedef void (*ncnn_layer_destroyer_t)(ncnn_layer_t layer, void* userdata); + +typedef struct __ncnn_net_custom_layer_factory_t* ncnn_net_custom_layer_factory_t; +struct __ncnn_net_custom_layer_factory_t +{ + ncnn_layer_creator_t creator; + ncnn_layer_destroyer_t destroyer; + void* userdata; + ncnn_net_custom_layer_factory_t next; +}; + +/* net api */ +typedef struct __ncnn_net_t* ncnn_net_t; +struct __ncnn_net_t +{ + void* pthis; + + ncnn_net_custom_layer_factory_t custom_layer_factory; +}; + +NCNN_EXPORT ncnn_net_t ncnn_net_create(); +NCNN_EXPORT void ncnn_net_destroy(ncnn_net_t net); + +NCNN_EXPORT ncnn_option_t ncnn_net_get_option(ncnn_net_t net); +NCNN_EXPORT void ncnn_net_set_option(ncnn_net_t net, ncnn_option_t opt); + +#if NCNN_STRING +NCNN_EXPORT void ncnn_net_register_custom_layer_by_type(ncnn_net_t net, const char* type, ncnn_layer_creator_t creator, ncnn_layer_destroyer_t destroyer, void* userdata); +#endif /* NCNN_STRING */ +NCNN_EXPORT void ncnn_net_register_custom_layer_by_typeindex(ncnn_net_t net, int typeindex, ncnn_layer_creator_t creator, ncnn_layer_destroyer_t destroyer, void* userdata); + +#if NCNN_STDIO +#if NCNN_STRING +NCNN_EXPORT int ncnn_net_load_param(ncnn_net_t net, const char* path); +#endif /* NCNN_STRING */ +NCNN_EXPORT int ncnn_net_load_param_bin(ncnn_net_t net, const char* path); +NCNN_EXPORT int ncnn_net_load_model(ncnn_net_t net, const char* path); +#endif /* NCNN_STDIO */ + +#if NCNN_STDIO +#if NCNN_STRING +NCNN_EXPORT int ncnn_net_load_param_memory(ncnn_net_t net, const char* mem); +#endif /* NCNN_STRING */ +#endif /* NCNN_STDIO */ +NCNN_EXPORT int ncnn_net_load_param_bin_memory(ncnn_net_t net, const unsigned char* mem); +NCNN_EXPORT int ncnn_net_load_model_memory(ncnn_net_t net, const unsigned char* mem); + +#if NCNN_STRING +NCNN_EXPORT int ncnn_net_load_param_datareader(ncnn_net_t net, const ncnn_datareader_t dr); +#endif /* NCNN_STRING */ +NCNN_EXPORT int ncnn_net_load_param_bin_datareader(ncnn_net_t net, const ncnn_datareader_t dr); +NCNN_EXPORT int ncnn_net_load_model_datareader(ncnn_net_t net, const ncnn_datareader_t dr); + +NCNN_EXPORT void ncnn_net_clear(ncnn_net_t net); + +NCNN_EXPORT int ncnn_net_get_input_count(const ncnn_net_t net); +NCNN_EXPORT int ncnn_net_get_output_count(const ncnn_net_t net); +#if NCNN_STRING +NCNN_EXPORT const char* ncnn_net_get_input_name(const ncnn_net_t net, int i); +NCNN_EXPORT const char* ncnn_net_get_output_name(const ncnn_net_t net, int i); +#endif /* NCNN_STRING */ +NCNN_EXPORT int ncnn_net_get_input_index(const ncnn_net_t net, int i); +NCNN_EXPORT int ncnn_net_get_output_index(const ncnn_net_t net, int i); + +/* extractor api */ +typedef struct __ncnn_extractor_t* ncnn_extractor_t; + +NCNN_EXPORT ncnn_extractor_t ncnn_extractor_create(ncnn_net_t net); +NCNN_EXPORT void ncnn_extractor_destroy(ncnn_extractor_t ex); + +NCNN_EXPORT void ncnn_extractor_set_option(ncnn_extractor_t ex, const ncnn_option_t opt); + +#if NCNN_STRING +NCNN_EXPORT int ncnn_extractor_input(ncnn_extractor_t ex, const char* name, const ncnn_mat_t mat); +NCNN_EXPORT int ncnn_extractor_extract(ncnn_extractor_t ex, const char* name, ncnn_mat_t* mat); +#endif /* NCNN_STRING */ +NCNN_EXPORT int ncnn_extractor_input_index(ncnn_extractor_t ex, int index, const ncnn_mat_t mat); +NCNN_EXPORT int ncnn_extractor_extract_index(ncnn_extractor_t ex, int index, ncnn_mat_t* mat); + +/* mat process api */ +#define NCNN_BORDER_CONSTANT 0 +#define NCNN_BORDER_REPLICATE 1 +#define NCNN_BORDER_REFLECT 2 +#define NCNN_BORDER_TRANSPARENT -233 +NCNN_EXPORT void ncnn_copy_make_border(const ncnn_mat_t src, ncnn_mat_t dst, int top, int bottom, int left, int right, int type, float v, const ncnn_option_t opt); +NCNN_EXPORT void ncnn_copy_make_border_3d(const ncnn_mat_t src, ncnn_mat_t dst, int top, int bottom, int left, int right, int front, int behind, int type, float v, const ncnn_option_t opt); +NCNN_EXPORT void ncnn_copy_cut_border(const ncnn_mat_t src, ncnn_mat_t dst, int top, int bottom, int left, int right, const ncnn_option_t opt); +NCNN_EXPORT void ncnn_copy_cut_border_3d(const ncnn_mat_t src, ncnn_mat_t dst, int top, int bottom, int left, int right, int front, int behind, const ncnn_option_t opt); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* NCNN_C_API */ + +#endif /* NCNN_C_API_H */ diff --git a/linux/include/ncnn/command.h b/linux/include/ncnn/command.h new file mode 100644 index 0000000..337d085 --- /dev/null +++ b/linux/include/ncnn/command.h @@ -0,0 +1,136 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_COMMAND_H +#define NCNN_COMMAND_H + +#include "platform.h" + +#if NCNN_VULKAN + +#include "mat.h" + +#include + +namespace ncnn { + +class Pipeline; +#if NCNN_PLATFORM_API +#if __ANDROID_API__ >= 26 +class ImportAndroidHardwareBufferPipeline; +#endif // __ANDROID_API__ >= 26 +#endif // NCNN_PLATFORM_API +class VkComputePrivate; +class NCNN_EXPORT VkCompute +{ +public: + explicit VkCompute(const VulkanDevice* vkdev); + virtual ~VkCompute(); + +public: + void record_upload(const Mat& src, VkMat& dst, const Option& opt); + + void record_upload(const Mat& src, VkImageMat& dst, const Option& opt); + + void record_download(const VkMat& src, Mat& dst, const Option& opt); + + void record_download(const VkImageMat& src, Mat& dst, const Option& opt); + + void record_buffer_to_image(const VkMat& src, VkImageMat& dst, const Option& opt); + + void record_image_to_buffer(const VkImageMat& src, VkMat& dst, const Option& opt); + + void record_clone(const Mat& src, VkMat& dst, const Option& opt); + + void record_clone(const Mat& src, VkImageMat& dst, const Option& opt); + + void record_clone(const VkMat& src, Mat& dst, const Option& opt); + + void record_clone(const VkImageMat& src, Mat& dst, const Option& opt); + + void record_clone(const VkMat& src, VkMat& dst, const Option& opt); + + void record_clone(const VkImageMat& src, VkImageMat& dst, const Option& opt); + + void record_clone(const VkMat& src, VkImageMat& dst, const Option& opt); + + void record_clone(const VkImageMat& src, VkMat& dst, const Option& opt); + + void record_pipeline(const Pipeline* pipeline, const std::vector& bindings, const std::vector& constants, const VkMat& dispatcher); + + void record_pipeline(const Pipeline* pipeline, const std::vector& bindings, const std::vector& constants, const VkImageMat& dispatcher); + + void record_pipeline(const Pipeline* pipeline, const std::vector& buffer_bindings, const std::vector& image_bindings, const std::vector& constants, const VkMat& dispatcher); + void record_pipeline(const Pipeline* pipeline, const std::vector& buffer_bindings, const std::vector& image_bindings, const std::vector& constants, const VkImageMat& dispatcher); + void record_pipeline(const Pipeline* pipeline, const std::vector& buffer_bindings, const std::vector& image_bindings, const std::vector& constants, const Mat& dispatcher); + +#if NCNN_BENCHMARK + void record_write_timestamp(uint32_t query); +#endif // NCNN_BENCHMARK + +#if NCNN_PLATFORM_API +#if __ANDROID_API__ >= 26 + void record_import_android_hardware_buffer(const ImportAndroidHardwareBufferPipeline* pipeline, const VkImageMat& src, const VkMat& dst); + + void record_import_android_hardware_buffer(const ImportAndroidHardwareBufferPipeline* pipeline, const VkImageMat& src, const VkImageMat& dst); +#endif // __ANDROID_API__ >= 26 +#endif // NCNN_PLATFORM_API + + int submit_and_wait(); + + int reset(); + +#if NCNN_BENCHMARK + int create_query_pool(uint32_t query_count); + + int get_query_pool_results(uint32_t first_query, uint32_t query_count, std::vector& results); +#endif // NCNN_BENCHMARK + +protected: + const VulkanDevice* vkdev; + + void barrier_readwrite(const VkMat& binding); + void barrier_readwrite(const VkImageMat& binding); + void barrier_readonly(const VkImageMat& binding); + +private: + VkComputePrivate* const d; +}; + +class VkTransferPrivate; +class NCNN_EXPORT VkTransfer +{ +public: + explicit VkTransfer(const VulkanDevice* vkdev); + virtual ~VkTransfer(); + +public: + void record_upload(const Mat& src, VkMat& dst, const Option& opt, bool flatten = true); + + void record_upload(const Mat& src, VkImageMat& dst, const Option& opt); + + int submit_and_wait(); + +protected: + const VulkanDevice* vkdev; + +private: + VkTransferPrivate* const d; +}; + +} // namespace ncnn + +#endif // NCNN_VULKAN + +#endif // NCNN_COMMAND_H diff --git a/linux/include/ncnn/cpu.h b/linux/include/ncnn/cpu.h new file mode 100644 index 0000000..7d6bfce --- /dev/null +++ b/linux/include/ncnn/cpu.h @@ -0,0 +1,178 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_CPU_H +#define NCNN_CPU_H + +#include + +#if (defined _WIN32 && !(defined __MINGW32__)) +#define WIN32_LEAN_AND_MEAN +#include +#endif +#if defined __ANDROID__ || defined __linux__ +#include // cpu_set_t +#endif + +#include "platform.h" + +namespace ncnn { + +class NCNN_EXPORT CpuSet +{ +public: + CpuSet(); + void enable(int cpu); + void disable(int cpu); + void disable_all(); + bool is_enabled(int cpu) const; + int num_enabled() const; + +public: +#if (defined _WIN32 && !(defined __MINGW32__)) + ULONG_PTR mask; +#endif +#if defined __ANDROID__ || defined __linux__ + cpu_set_t cpu_set; +#endif +#if __APPLE__ + unsigned int policy; +#endif +}; + +// test optional cpu features +// edsp = armv7 edsp +NCNN_EXPORT int cpu_support_arm_edsp(); +// neon = armv7 neon or aarch64 asimd +NCNN_EXPORT int cpu_support_arm_neon(); +// vfpv4 = armv7 fp16 + fma +NCNN_EXPORT int cpu_support_arm_vfpv4(); +// asimdhp = aarch64 asimd half precision +NCNN_EXPORT int cpu_support_arm_asimdhp(); +// cpuid = aarch64 cpuid info +NCNN_EXPORT int cpu_support_arm_cpuid(); +// asimddp = aarch64 asimd dot product +NCNN_EXPORT int cpu_support_arm_asimddp(); +// asimdfhm = aarch64 asimd fhm +NCNN_EXPORT int cpu_support_arm_asimdfhm(); +// bf16 = aarch64 bf16 +NCNN_EXPORT int cpu_support_arm_bf16(); +// i8mm = aarch64 i8mm +NCNN_EXPORT int cpu_support_arm_i8mm(); +// sve = aarch64 sve +NCNN_EXPORT int cpu_support_arm_sve(); +// sve2 = aarch64 sve2 +NCNN_EXPORT int cpu_support_arm_sve2(); +// svebf16 = aarch64 svebf16 +NCNN_EXPORT int cpu_support_arm_svebf16(); +// svei8mm = aarch64 svei8mm +NCNN_EXPORT int cpu_support_arm_svei8mm(); +// svef32mm = aarch64 svef32mm +NCNN_EXPORT int cpu_support_arm_svef32mm(); + +// avx = x86 avx +NCNN_EXPORT int cpu_support_x86_avx(); +// fma = x86 fma +NCNN_EXPORT int cpu_support_x86_fma(); +// xop = x86 xop +NCNN_EXPORT int cpu_support_x86_xop(); +// f16c = x86 f16c +NCNN_EXPORT int cpu_support_x86_f16c(); +// avx2 = x86 avx2 + fma + f16c +NCNN_EXPORT int cpu_support_x86_avx2(); +// avx_vnni = x86 avx vnni +NCNN_EXPORT int cpu_support_x86_avx_vnni(); +// avx512 = x86 avx512f + avx512cd + avx512bw + avx512dq + avx512vl +NCNN_EXPORT int cpu_support_x86_avx512(); +// avx512_vnni = x86 avx512 vnni +NCNN_EXPORT int cpu_support_x86_avx512_vnni(); +// avx512_bf16 = x86 avx512 bf16 +NCNN_EXPORT int cpu_support_x86_avx512_bf16(); +// avx512_fp16 = x86 avx512 fp16 +NCNN_EXPORT int cpu_support_x86_avx512_fp16(); + +// lsx = loongarch lsx +NCNN_EXPORT int cpu_support_loongarch_lsx(); +// lasx = loongarch lasx +NCNN_EXPORT int cpu_support_loongarch_lasx(); + +// msa = mips mas +NCNN_EXPORT int cpu_support_mips_msa(); +// mmi = loongson mmi +NCNN_EXPORT int cpu_support_loongson_mmi(); + +// v = riscv vector +NCNN_EXPORT int cpu_support_riscv_v(); +// zfh = riscv half-precision float +NCNN_EXPORT int cpu_support_riscv_zfh(); +// vlenb = riscv vector length in bytes +NCNN_EXPORT int cpu_riscv_vlenb(); + +// cpu info +NCNN_EXPORT int get_cpu_count(); +NCNN_EXPORT int get_little_cpu_count(); +NCNN_EXPORT int get_big_cpu_count(); + +NCNN_EXPORT int get_physical_cpu_count(); +NCNN_EXPORT int get_physical_little_cpu_count(); +NCNN_EXPORT int get_physical_big_cpu_count(); + +// cpu l2 varies from 64k to 1M, but l3 can be zero +NCNN_EXPORT int get_cpu_level2_cache_size(); +NCNN_EXPORT int get_cpu_level3_cache_size(); + +// bind all threads on little clusters if powersave enabled +// affects HMP arch cpu like ARM big.LITTLE +// only implemented on android at the moment +// switching powersave is expensive and not thread-safe +// 0 = all cores enabled(default) +// 1 = only little clusters enabled +// 2 = only big clusters enabled +// return 0 if success for setter function +NCNN_EXPORT int get_cpu_powersave(); +NCNN_EXPORT int set_cpu_powersave(int powersave); + +// convenient wrapper +NCNN_EXPORT const CpuSet& get_cpu_thread_affinity_mask(int powersave); + +// set explicit thread affinity +NCNN_EXPORT int set_cpu_thread_affinity(const CpuSet& thread_affinity_mask); + +// runtime thread affinity info +NCNN_EXPORT int is_current_thread_running_on_a53_a55(); + +// misc function wrapper for openmp routines +NCNN_EXPORT int get_omp_num_threads(); +NCNN_EXPORT void set_omp_num_threads(int num_threads); + +NCNN_EXPORT int get_omp_dynamic(); +NCNN_EXPORT void set_omp_dynamic(int dynamic); + +NCNN_EXPORT int get_omp_thread_num(); + +NCNN_EXPORT int get_kmp_blocktime(); +NCNN_EXPORT void set_kmp_blocktime(int time_ms); + +// need to flush denormals on Intel Chipset. +// Other architectures such as ARM can be added as needed. +// 0 = DAZ OFF, FTZ OFF +// 1 = DAZ ON , FTZ OFF +// 2 = DAZ OFF, FTZ ON +// 3 = DAZ ON, FTZ ON +NCNN_EXPORT int get_flush_denormals(); +NCNN_EXPORT int set_flush_denormals(int flush_denormals); + +} // namespace ncnn + +#endif // NCNN_CPU_H diff --git a/linux/include/ncnn/datareader.h b/linux/include/ncnn/datareader.h new file mode 100644 index 0000000..ed2aba3 --- /dev/null +++ b/linux/include/ncnn/datareader.h @@ -0,0 +1,122 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_DATAREADER_H +#define NCNN_DATAREADER_H + +#include "platform.h" +#if NCNN_STDIO +#include +#endif + +#if NCNN_PLATFORM_API +#if __ANDROID_API__ >= 9 +#include +#endif +#endif // NCNN_PLATFORM_API + +namespace ncnn { + +// data read wrapper +class NCNN_EXPORT DataReader +{ +public: + DataReader(); + virtual ~DataReader(); + +#if NCNN_STRING + // parse plain param text + // return 1 if scan success + virtual int scan(const char* format, void* p) const; +#endif // NCNN_STRING + + // read binary param and model data + // return bytes read + virtual size_t read(void* buf, size_t size) const; + + // get model data reference + // return bytes referenced + virtual size_t reference(size_t size, const void** buf) const; +}; + +#if NCNN_STDIO +class DataReaderFromStdioPrivate; +class NCNN_EXPORT DataReaderFromStdio : public DataReader +{ +public: + explicit DataReaderFromStdio(FILE* fp); + virtual ~DataReaderFromStdio(); + +#if NCNN_STRING + virtual int scan(const char* format, void* p) const; +#endif // NCNN_STRING + virtual size_t read(void* buf, size_t size) const; + +private: + DataReaderFromStdio(const DataReaderFromStdio&); + DataReaderFromStdio& operator=(const DataReaderFromStdio&); + +private: + DataReaderFromStdioPrivate* const d; +}; +#endif // NCNN_STDIO + +class DataReaderFromMemoryPrivate; +class NCNN_EXPORT DataReaderFromMemory : public DataReader +{ +public: + explicit DataReaderFromMemory(const unsigned char*& mem); + virtual ~DataReaderFromMemory(); + +#if NCNN_STRING + virtual int scan(const char* format, void* p) const; +#endif // NCNN_STRING + virtual size_t read(void* buf, size_t size) const; + virtual size_t reference(size_t size, const void** buf) const; + +private: + DataReaderFromMemory(const DataReaderFromMemory&); + DataReaderFromMemory& operator=(const DataReaderFromMemory&); + +private: + DataReaderFromMemoryPrivate* const d; +}; + +#if NCNN_PLATFORM_API +#if __ANDROID_API__ >= 9 +class DataReaderFromAndroidAssetPrivate; +class NCNN_EXPORT DataReaderFromAndroidAsset : public DataReader +{ +public: + explicit DataReaderFromAndroidAsset(AAsset* asset); + virtual ~DataReaderFromAndroidAsset(); + +#if NCNN_STRING + virtual int scan(const char* format, void* p) const; +#endif // NCNN_STRING + virtual size_t read(void* buf, size_t size) const; + +private: + DataReaderFromAndroidAsset(const DataReaderFromAndroidAsset&); + DataReaderFromAndroidAsset& operator=(const DataReaderFromAndroidAsset&); + +private: + DataReaderFromAndroidAssetPrivate* const d; +}; +#endif // __ANDROID_API__ >= 9 +#endif // NCNN_PLATFORM_API + +} // namespace ncnn + +#endif // NCNN_DATAREADER_H diff --git a/linux/include/ncnn/gpu.h b/linux/include/ncnn/gpu.h new file mode 100644 index 0000000..1eff228 --- /dev/null +++ b/linux/include/ncnn/gpu.h @@ -0,0 +1,392 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_GPU_H +#define NCNN_GPU_H + +#include "platform.h" + +#if NCNN_VULKAN + +#include "mat.h" + +#include + +#include "vulkan_header_fix.h" + +namespace ncnn { + +// instance + +// Create VkInstance and initialize some objects that need to be calculated by GPU +// Creates a VkInstance object, Checks the extended attributes supported by the Vulkan instance concerned, +// Initializes, and creates Vulkan validation layers (if ENABLE_VALIDATION_LAYER is enabled), +// Iterates over all supported physical devices, etc. +NCNN_EXPORT int create_gpu_instance(); + +// Get global VkInstance variable +// Must be called after create_gpu_instance() and before destroy_gpu_instance() +NCNN_EXPORT VkInstance get_gpu_instance(); + +// Destroy VkInstance object and free the memory of the associated object +// Usually called in the destructor of the main program exit +NCNN_EXPORT void destroy_gpu_instance(); + +// instance extension capability +extern int support_VK_KHR_external_memory_capabilities; +extern int support_VK_KHR_get_physical_device_properties2; +extern int support_VK_KHR_get_surface_capabilities2; +extern int support_VK_KHR_surface; +extern int support_VK_EXT_debug_utils; +extern int support_VK_EXT_validation_features; +extern int support_VK_EXT_validation_flags; +#if __ANDROID_API__ >= 26 +extern int support_VK_KHR_android_surface; +#endif // __ANDROID_API__ >= 26 + +// VK_KHR_cooperative_matrix +extern PFN_vkGetPhysicalDeviceCooperativeMatrixPropertiesKHR vkGetPhysicalDeviceCooperativeMatrixPropertiesKHR; + +// VK_KHR_external_memory_capabilities +extern PFN_vkGetPhysicalDeviceExternalBufferPropertiesKHR vkGetPhysicalDeviceExternalBufferPropertiesKHR; + +// VK_KHR_get_physical_device_properties2 +extern PFN_vkGetPhysicalDeviceFeatures2KHR vkGetPhysicalDeviceFeatures2KHR; +extern PFN_vkGetPhysicalDeviceProperties2KHR vkGetPhysicalDeviceProperties2KHR; +extern PFN_vkGetPhysicalDeviceFormatProperties2KHR vkGetPhysicalDeviceFormatProperties2KHR; +extern PFN_vkGetPhysicalDeviceImageFormatProperties2KHR vkGetPhysicalDeviceImageFormatProperties2KHR; +extern PFN_vkGetPhysicalDeviceQueueFamilyProperties2KHR vkGetPhysicalDeviceQueueFamilyProperties2KHR; +extern PFN_vkGetPhysicalDeviceMemoryProperties2KHR vkGetPhysicalDeviceMemoryProperties2KHR; +extern PFN_vkGetPhysicalDeviceSparseImageFormatProperties2KHR vkGetPhysicalDeviceSparseImageFormatProperties2KHR; + +// VK_KHR_get_surface_capabilities2 +extern PFN_vkGetPhysicalDeviceSurfaceCapabilities2KHR vkGetPhysicalDeviceSurfaceCapabilities2KHR; +extern PFN_vkGetPhysicalDeviceSurfaceFormats2KHR vkGetPhysicalDeviceSurfaceFormats2KHR; + +// VK_KHR_surface +extern PFN_vkDestroySurfaceKHR vkDestroySurfaceKHR; +extern PFN_vkGetPhysicalDeviceSurfaceSupportKHR vkGetPhysicalDeviceSurfaceSupportKHR; +extern PFN_vkGetPhysicalDeviceSurfaceCapabilitiesKHR vkGetPhysicalDeviceSurfaceCapabilitiesKHR; +extern PFN_vkGetPhysicalDeviceSurfaceFormatsKHR vkGetPhysicalDeviceSurfaceFormatsKHR; +extern PFN_vkGetPhysicalDeviceSurfacePresentModesKHR vkGetPhysicalDeviceSurfacePresentModesKHR; + +#if __ANDROID_API__ >= 26 +// VK_KHR_android_surface +extern PFN_vkCreateAndroidSurfaceKHR vkCreateAndroidSurfaceKHR; +#endif // __ANDROID_API__ >= 26 + +// VK_NV_cooperative_matrix +extern PFN_vkGetPhysicalDeviceCooperativeMatrixPropertiesNV vkGetPhysicalDeviceCooperativeMatrixPropertiesNV; + +// get info +NCNN_EXPORT int get_gpu_count(); +NCNN_EXPORT int get_default_gpu_index(); + +class GpuInfoPrivate; +class NCNN_EXPORT GpuInfo +{ +public: + explicit GpuInfo(); + virtual ~GpuInfo(); + + // vulkan physical device + VkPhysicalDevice physical_device() const; + + // memory properties + const VkPhysicalDeviceMemoryProperties& physical_device_memory_properties() const; + + // info + uint32_t api_version() const; + uint32_t driver_version() const; + uint32_t vendor_id() const; + uint32_t device_id() const; + const char* device_name() const; + uint8_t* pipeline_cache_uuid() const; + + // 0 = discrete gpu + // 1 = integrated gpu + // 2 = virtual gpu + // 3 = cpu + int type() const; + + // hardware limit + uint32_t max_shared_memory_size() const; + uint32_t max_workgroup_count_x() const; + uint32_t max_workgroup_count_y() const; + uint32_t max_workgroup_count_z() const; + uint32_t max_workgroup_invocations() const; + uint32_t max_workgroup_size_x() const; + uint32_t max_workgroup_size_y() const; + uint32_t max_workgroup_size_z() const; + size_t memory_map_alignment() const; + size_t buffer_offset_alignment() const; + size_t non_coherent_atom_size() const; + size_t buffer_image_granularity() const; + uint32_t max_image_dimension_1d() const; + uint32_t max_image_dimension_2d() const; + uint32_t max_image_dimension_3d() const; + float timestamp_period() const; + + // runtime + uint32_t compute_queue_family_index() const; + uint32_t graphics_queue_family_index() const; + uint32_t transfer_queue_family_index() const; + + uint32_t compute_queue_count() const; + uint32_t graphics_queue_count() const; + uint32_t transfer_queue_count() const; + + // property + bool unified_compute_transfer_queue() const; + + // subgroup + uint32_t subgroup_size() const; + bool support_subgroup_basic() const; + bool support_subgroup_vote() const; + bool support_subgroup_ballot() const; + bool support_subgroup_shuffle() const; + + // bug is not feature + bool bug_storage_buffer_no_l1() const; + bool bug_corrupted_online_pipeline_cache() const; + bool bug_buffer_image_load_zero() const; + + // but sometimes bug is a feature + bool bug_implicit_fp16_arithmetic() const; + + // fp16 and int8 feature + bool support_fp16_packed() const; + bool support_fp16_storage() const; + bool support_fp16_arithmetic() const; + bool support_int8_packed() const; + bool support_int8_storage() const; + bool support_int8_arithmetic() const; + + // ycbcr conversion feature + bool support_ycbcr_conversion() const; + + // cooperative matrix feature + bool support_cooperative_matrix() const; + bool support_cooperative_matrix_16_8_8() const; + bool support_cooperative_matrix_16_8_16() const; + bool support_cooperative_matrix_16_16_16() const; + + // extension capability + int support_VK_KHR_8bit_storage() const; + int support_VK_KHR_16bit_storage() const; + int support_VK_KHR_bind_memory2() const; + int support_VK_KHR_buffer_device_address() const; + int support_VK_KHR_create_renderpass2() const; + int support_VK_KHR_cooperative_matrix() const; + int support_VK_KHR_dedicated_allocation() const; + int support_VK_KHR_descriptor_update_template() const; + int support_VK_KHR_external_memory() const; + int support_VK_KHR_get_memory_requirements2() const; + int support_VK_KHR_maintenance1() const; + int support_VK_KHR_maintenance2() const; + int support_VK_KHR_maintenance3() const; + int support_VK_KHR_multiview() const; + int support_VK_KHR_portability_subset() const; + int support_VK_KHR_push_descriptor() const; + int support_VK_KHR_sampler_ycbcr_conversion() const; + int support_VK_KHR_shader_float16_int8() const; + int support_VK_KHR_shader_float_controls() const; + int support_VK_KHR_storage_buffer_storage_class() const; + int support_VK_KHR_swapchain() const; + int support_VK_EXT_buffer_device_address() const; + int support_VK_EXT_descriptor_indexing() const; + int support_VK_EXT_memory_budget() const; + int support_VK_EXT_memory_priority() const; + int support_VK_EXT_queue_family_foreign() const; + int support_VK_AMD_device_coherent_memory() const; +#if __ANDROID_API__ >= 26 + int support_VK_ANDROID_external_memory_android_hardware_buffer() const; +#endif // __ANDROID_API__ >= 26 + int support_VK_NV_cooperative_matrix() const; + +private: + GpuInfo(const GpuInfo&); + GpuInfo& operator=(const GpuInfo&); + +private: + friend int create_gpu_instance(); + GpuInfoPrivate* const d; +}; + +NCNN_EXPORT const GpuInfo& get_gpu_info(int device_index = get_default_gpu_index()); + +class VkAllocator; +class VkCompute; +class Option; +class PipelineCache; +class VulkanDevicePrivate; +class NCNN_EXPORT VulkanDevice +{ +public: + VulkanDevice(int device_index = get_default_gpu_index()); + ~VulkanDevice(); + + const GpuInfo& info; + + VkDevice vkdevice() const; + + VkShaderModule compile_shader_module(const uint32_t* spv_data, size_t spv_data_size) const; + + // with fixed workgroup size + VkShaderModule compile_shader_module(const uint32_t* spv_data, size_t spv_data_size, uint32_t local_size_x, uint32_t local_size_y, uint32_t local_size_z) const; + + // helper for creating pipeline + int create_descriptorset_layout(int binding_count, const int* binding_types, VkDescriptorSetLayout* descriptorset_layout) const; + int create_pipeline_layout(int push_constant_count, VkDescriptorSetLayout descriptorset_layout, VkPipelineLayout* pipeline_layout) const; + int create_pipeline(VkShaderModule shader_module, VkPipelineLayout pipeline_layout, const std::vector& specializations, VkPipeline* pipeline) const; + int create_descriptor_update_template(int binding_count, const int* binding_types, VkDescriptorSetLayout descriptorset_layout, VkPipelineLayout pipeline_layout, VkDescriptorUpdateTemplateKHR* descriptor_update_template) const; + + uint32_t find_memory_index(uint32_t memory_type_bits, VkFlags required, VkFlags preferred, VkFlags preferred_not) const; + bool is_mappable(uint32_t memory_type_index) const; + bool is_coherent(uint32_t memory_type_index) const; + + VkQueue acquire_queue(uint32_t queue_family_index) const; + void reclaim_queue(uint32_t queue_family_index, VkQueue queue) const; + + // allocator on this device + VkAllocator* acquire_blob_allocator() const; + void reclaim_blob_allocator(VkAllocator* allocator) const; + + VkAllocator* acquire_staging_allocator() const; + void reclaim_staging_allocator(VkAllocator* allocator) const; + + // immutable sampler for texelfetch + const VkSampler* immutable_texelfetch_sampler() const; + + // dummy buffer image + VkMat get_dummy_buffer() const; + VkImageMat get_dummy_image() const; + VkImageMat get_dummy_image_readonly() const; + + // pipeline cache on this device + const PipelineCache* get_pipeline_cache() const; + + // test image allocation + bool shape_support_image_storage(const Mat& shape) const; + + // current gpu heap memory budget in MB + uint32_t get_heap_budget() const; + + // utility operator + void convert_packing(const VkMat& src, VkMat& dst, int dst_elempack, VkCompute& cmd, const Option& opt) const; + void convert_packing(const VkImageMat& src, VkImageMat& dst, int dst_elempack, VkCompute& cmd, const Option& opt) const; + void convert_packing(const VkMat& src, VkImageMat& dst, int dst_elempack, VkCompute& cmd, const Option& opt) const; + void convert_packing(const VkImageMat& src, VkMat& dst, int dst_elempack, VkCompute& cmd, const Option& opt) const; + + // VK_KHR_bind_memory2 + PFN_vkBindBufferMemory2KHR vkBindBufferMemory2KHR; + PFN_vkBindImageMemory2KHR vkBindImageMemory2KHR; + + // VK_KHR_buffer_device_address + PFN_vkGetBufferDeviceAddressKHR vkGetBufferDeviceAddressKHR; + PFN_vkGetBufferOpaqueCaptureAddressKHR vkGetBufferOpaqueCaptureAddressKHR; + PFN_vkGetDeviceMemoryOpaqueCaptureAddressKHR vkGetDeviceMemoryOpaqueCaptureAddressKHR; + + // VK_KHR_create_renderpass2 + PFN_vkCmdBeginRenderPass2KHR vkCmdBeginRenderPass2KHR; + PFN_vkCmdEndRenderPass2KHR vkCmdEndRenderPass2KHR; + PFN_vkCmdNextSubpass2KHR vkCmdNextSubpass2KHR; + PFN_vkCreateRenderPass2KHR vkCreateRenderPass2KHR; + + // VK_KHR_descriptor_update_template + PFN_vkCreateDescriptorUpdateTemplateKHR vkCreateDescriptorUpdateTemplateKHR; + PFN_vkDestroyDescriptorUpdateTemplateKHR vkDestroyDescriptorUpdateTemplateKHR; + PFN_vkUpdateDescriptorSetWithTemplateKHR vkUpdateDescriptorSetWithTemplateKHR; + + // VK_KHR_get_memory_requirements2 + PFN_vkGetImageMemoryRequirements2KHR vkGetImageMemoryRequirements2KHR; + PFN_vkGetBufferMemoryRequirements2KHR vkGetBufferMemoryRequirements2KHR; + PFN_vkGetImageSparseMemoryRequirements2KHR vkGetImageSparseMemoryRequirements2KHR; + + // VK_KHR_maintenance1 + PFN_vkTrimCommandPoolKHR vkTrimCommandPoolKHR; + + // VK_KHR_maintenance3 + PFN_vkGetDescriptorSetLayoutSupportKHR vkGetDescriptorSetLayoutSupportKHR; + + // VK_KHR_push_descriptor + PFN_vkCmdPushDescriptorSetWithTemplateKHR vkCmdPushDescriptorSetWithTemplateKHR; + PFN_vkCmdPushDescriptorSetKHR vkCmdPushDescriptorSetKHR; + + // VK_KHR_sampler_ycbcr_conversion + PFN_vkCreateSamplerYcbcrConversionKHR vkCreateSamplerYcbcrConversionKHR; + PFN_vkDestroySamplerYcbcrConversionKHR vkDestroySamplerYcbcrConversionKHR; + + // VK_KHR_swapchain + PFN_vkCreateSwapchainKHR vkCreateSwapchainKHR; + PFN_vkDestroySwapchainKHR vkDestroySwapchainKHR; + PFN_vkGetSwapchainImagesKHR vkGetSwapchainImagesKHR; + PFN_vkAcquireNextImageKHR vkAcquireNextImageKHR; + PFN_vkQueuePresentKHR vkQueuePresentKHR; + + // VK_EXT_buffer_device_address + PFN_vkGetBufferDeviceAddressEXT vkGetBufferDeviceAddressEXT; + +#if __ANDROID_API__ >= 26 + // VK_ANDROID_external_memory_android_hardware_buffer + PFN_vkGetAndroidHardwareBufferPropertiesANDROID vkGetAndroidHardwareBufferPropertiesANDROID; + PFN_vkGetMemoryAndroidHardwareBufferANDROID vkGetMemoryAndroidHardwareBufferANDROID; +#endif // __ANDROID_API__ >= 26 + +protected: + // device extension + int init_device_extension(); + +private: + VulkanDevice(const VulkanDevice&); + VulkanDevice& operator=(const VulkanDevice&); + +private: + VulkanDevicePrivate* const d; +}; + +NCNN_EXPORT VulkanDevice* get_gpu_device(int device_index = get_default_gpu_index()); + +// online spirv compilation +NCNN_EXPORT int compile_spirv_module(const char* comp_string, const Option& opt, std::vector& spirv); +NCNN_EXPORT int compile_spirv_module(const char* comp_data, int comp_data_size, const Option& opt, std::vector& spirv); +NCNN_EXPORT int compile_spirv_module(int shader_type_index, const Option& opt, std::vector& spirv); + +// info from spirv +class NCNN_EXPORT ShaderInfo +{ +public: + int specialization_count; + int binding_count; + int push_constant_count; + + // 0 = null + // 1 = storage buffer + // 2 = storage image + // 3 = combined image sampler + int binding_types[16]; // 16 is large enough I think ... + + int reserved_0; + int reserved_1; + int reserved_2; + int reserved_3; +}; + +NCNN_EXPORT int resolve_shader_info(const uint32_t* spv_data, size_t spv_data_size, ShaderInfo& shader_info); + +} // namespace ncnn + +#endif // NCNN_VULKAN + +#endif // NCNN_GPU_H diff --git a/linux/include/ncnn/layer.h b/linux/include/ncnn/layer.h new file mode 100644 index 0000000..ae4a843 --- /dev/null +++ b/linux/include/ncnn/layer.h @@ -0,0 +1,224 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_LAYER_H +#define NCNN_LAYER_H + +#include "mat.h" +#include "modelbin.h" +#include "option.h" +#include "paramdict.h" +#include "platform.h" + +#include + +#if NCNN_VULKAN +#include "command.h" +#include "pipeline.h" + +#include +#endif // NCNN_VULKAN + +namespace ncnn { + +class NCNN_EXPORT Layer +{ +public: + // empty + Layer(); + // virtual destructor + virtual ~Layer(); + + // load layer specific parameter from parsed dict + // return 0 if success + virtual int load_param(const ParamDict& pd); + + // load layer specific weight data from model binary + // return 0 if success + virtual int load_model(const ModelBin& mb); + + // layer implementation specific setup + // return 0 if success + virtual int create_pipeline(const Option& opt); + + // layer implementation specific clean + // return 0 if success + virtual int destroy_pipeline(const Option& opt); + +public: + // one input and one output blob + bool one_blob_only; + + // support inplace inference + bool support_inplace; + + // support vulkan compute + bool support_vulkan; + + // accept input blob with packed storage + bool support_packing; + + // accept bf16 + bool support_bf16_storage; + + // accept fp16 + bool support_fp16_storage; + + // accept int8 + bool support_int8_storage; + + // shader image storage + bool support_image_storage; + + // shader tensor storage + bool support_tensor_storage; + + bool support_reserved_00; + + bool support_reserved_0; + bool support_reserved_1; + bool support_reserved_2; + bool support_reserved_3; + bool support_reserved_4; + bool support_reserved_5; + bool support_reserved_6; + bool support_reserved_7; + bool support_reserved_8; + bool support_reserved_9; + + // feature disabled set + int featmask; + +public: + // implement inference + // return 0 if success + virtual int forward(const std::vector& bottom_blobs, std::vector& top_blobs, const Option& opt) const; + virtual int forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const; + + // implement inplace inference + // return 0 if success + virtual int forward_inplace(std::vector& bottom_top_blobs, const Option& opt) const; + virtual int forward_inplace(Mat& bottom_top_blob, const Option& opt) const; + +#if NCNN_VULKAN +public: + // upload weight blob from host to device + virtual int upload_model(VkTransfer& cmd, const Option& opt); + +public: + // implement inference + // return 0 if success + virtual int forward(const std::vector& bottom_blobs, std::vector& top_blobs, VkCompute& cmd, const Option& opt) const; + virtual int forward(const VkMat& bottom_blob, VkMat& top_blob, VkCompute& cmd, const Option& opt) const; + + // implement inference + // return 0 if success + virtual int forward(const std::vector& bottom_blobs, std::vector& top_blobs, VkCompute& cmd, const Option& opt) const; + virtual int forward(const VkImageMat& bottom_blob, VkImageMat& top_blob, VkCompute& cmd, const Option& opt) const; + + // implement inplace inference + // return 0 if success + virtual int forward_inplace(std::vector& bottom_top_blobs, VkCompute& cmd, const Option& opt) const; + virtual int forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, const Option& opt) const; + + // implement inplace inference + // return 0 if success + virtual int forward_inplace(std::vector& bottom_top_blobs, VkCompute& cmd, const Option& opt) const; + virtual int forward_inplace(VkImageMat& bottom_top_blob, VkCompute& cmd, const Option& opt) const; + +public: + // assigned immediately after creating this layer + const VulkanDevice* vkdev; +#endif // NCNN_VULKAN + +public: + // custom user data + void* userdata; + // layer type index + int typeindex; +#if NCNN_STRING + // layer type name + std::string type; + // layer name + std::string name; +#endif // NCNN_STRING + // blob index which this layer needs as input + std::vector bottoms; + // blob index which this layer produces as output + std::vector tops; + // shape hint + std::vector bottom_shapes; + std::vector top_shapes; +}; + +// layer factory function +typedef Layer* (*layer_creator_func)(void*); +typedef void (*layer_destroyer_func)(Layer*, void*); + +struct layer_registry_entry +{ +#if NCNN_STRING + // layer type name + const char* name; +#endif // NCNN_STRING + // layer factory entry + layer_creator_func creator; +}; + +struct custom_layer_registry_entry +{ +#if NCNN_STRING + // layer type name + const char* name; +#endif // NCNN_STRING + // layer factory entry + layer_creator_func creator; + layer_destroyer_func destroyer; + void* userdata; +}; + +struct overwrite_builtin_layer_registry_entry +{ + // layer type index + int typeindex; + // layer factory entry + layer_creator_func creator; + layer_destroyer_func destroyer; + void* userdata; +}; + +#if NCNN_STRING +// get layer type from type name +NCNN_EXPORT int layer_to_index(const char* type); +// create layer from type name +NCNN_EXPORT Layer* create_layer(const char* type); +#endif // NCNN_STRING +// create layer from layer type +NCNN_EXPORT Layer* create_layer(int index); + +#define DEFINE_LAYER_CREATOR(name) \ + ::ncnn::Layer* name##_layer_creator(void* /*userdata*/) \ + { \ + return new name; \ + } + +#define DEFINE_LAYER_DESTROYER(name) \ + void name##_layer_destroyer(::ncnn::Layer* layer, void* /*userdata*/) \ + { \ + delete layer; \ + } + +} // namespace ncnn + +#endif // NCNN_LAYER_H diff --git a/linux/include/ncnn/layer_shader_type.h b/linux/include/ncnn/layer_shader_type.h new file mode 100644 index 0000000..c143e7d --- /dev/null +++ b/linux/include/ncnn/layer_shader_type.h @@ -0,0 +1,29 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_LAYER_SHADER_TYPE_H +#define NCNN_LAYER_SHADER_TYPE_H + +namespace ncnn { + +namespace LayerShaderType { +enum LayerShaderType +{ +#include "layer_shader_type_enum.h" +}; +} // namespace LayerShaderType + +} // namespace ncnn + +#endif // NCNN_LAYER_SHADER_TYPE_H diff --git a/linux/include/ncnn/layer_shader_type_enum.h b/linux/include/ncnn/layer_shader_type_enum.h new file mode 100644 index 0000000..75782d0 --- /dev/null +++ b/linux/include/ncnn/layer_shader_type_enum.h @@ -0,0 +1,398 @@ +// Layer Shader Enum header +// +// This file is auto-generated by cmake, don't edit it. + +absval = 0, +absval_pack4 = 1, +absval_pack8 = 2, +batchnorm = 3, +batchnorm_pack4 = 4, +batchnorm_pack8 = 5, +concat = 6, +concat_pack4 = 7, +concat_pack4to1 = 8, +concat_pack8 = 9, +concat_pack8to1 = 10, +concat_pack8to4 = 11, +convolution = 12, +convolution_1x1s1d1 = 13, +convolution_3x3s1d1_winograd23_transform_input = 14, +convolution_3x3s1d1_winograd23_transform_output = 15, +convolution_3x3s1d1_winograd43_transform_input = 16, +convolution_3x3s1d1_winograd43_transform_output = 17, +convolution_3x3s1d1_winograd_gemm = 18, +convolution_gemm = 19, +convolution_pack1to4 = 20, +convolution_pack1to4_1x1s1d1 = 21, +convolution_pack1to4_3x3s1d1_winograd_gemm = 22, +convolution_pack1to4_gemm = 23, +convolution_pack1to8 = 24, +convolution_pack1to8_1x1s1d1 = 25, +convolution_pack1to8_3x3s1d1_winograd_gemm = 26, +convolution_pack1to8_gemm = 27, +convolution_pack4 = 28, +convolution_pack4_1x1s1d1 = 29, +convolution_pack4_1x1s1d1_khr_cm_16_16_16 = 30, +convolution_pack4_1x1s1d1_khr_cm_16_8_8 = 31, +convolution_pack4_1x1s1d1_nv_cm_16_16_16 = 32, +convolution_pack4_1x1s1d1_nv_cm_16_8_8 = 33, +convolution_pack4_3x3s1d1_winograd23_transform_input = 34, +convolution_pack4_3x3s1d1_winograd23_transform_output = 35, +convolution_pack4_3x3s1d1_winograd43_transform_input = 36, +convolution_pack4_3x3s1d1_winograd43_transform_output = 37, +convolution_pack4_3x3s1d1_winograd_gemm = 38, +convolution_pack4_3x3s1d1_winograd_gemm_khr_cm_16_16_16 = 39, +convolution_pack4_3x3s1d1_winograd_gemm_khr_cm_16_8_8 = 40, +convolution_pack4_3x3s1d1_winograd_gemm_nv_cm_16_16_16 = 41, +convolution_pack4_3x3s1d1_winograd_gemm_nv_cm_16_8_8 = 42, +convolution_pack4_gemm = 43, +convolution_pack4_gemm_khr_cm_16_16_16 = 44, +convolution_pack4_gemm_khr_cm_16_8_8 = 45, +convolution_pack4_gemm_nv_cm_16_16_16 = 46, +convolution_pack4_gemm_nv_cm_16_8_8 = 47, +convolution_pack4to1 = 48, +convolution_pack4to1_1x1s1d1 = 49, +convolution_pack4to1_3x3s1d1_winograd_gemm = 50, +convolution_pack4to1_gemm = 51, +convolution_pack4to8 = 52, +convolution_pack4to8_1x1s1d1 = 53, +convolution_pack4to8_3x3s1d1_winograd_gemm = 54, +convolution_pack4to8_gemm = 55, +convolution_pack8 = 56, +convolution_pack8_1x1s1d1 = 57, +convolution_pack8_3x3s1d1_winograd23_transform_input = 58, +convolution_pack8_3x3s1d1_winograd23_transform_output = 59, +convolution_pack8_3x3s1d1_winograd43_transform_input = 60, +convolution_pack8_3x3s1d1_winograd43_transform_output = 61, +convolution_pack8_3x3s1d1_winograd_gemm = 62, +convolution_pack8_gemm = 63, +convolution_pack8to1 = 64, +convolution_pack8to1_1x1s1d1 = 65, +convolution_pack8to1_3x3s1d1_winograd_gemm = 66, +convolution_pack8to1_gemm = 67, +convolution_pack8to4 = 68, +convolution_pack8to4_1x1s1d1 = 69, +convolution_pack8to4_3x3s1d1_winograd_gemm = 70, +convolution_pack8to4_gemm = 71, +crop = 72, +crop_pack1to4 = 73, +crop_pack1to8 = 74, +crop_pack4 = 75, +crop_pack4to1 = 76, +crop_pack4to8 = 77, +crop_pack8 = 78, +crop_pack8to1 = 79, +crop_pack8to4 = 80, +deconvolution = 81, +deconvolution_col2im = 82, +deconvolution_gemm = 83, +deconvolution_pack1to4 = 84, +deconvolution_pack1to4_gemm = 85, +deconvolution_pack1to8 = 86, +deconvolution_pack1to8_gemm = 87, +deconvolution_pack4 = 88, +deconvolution_pack4_col2im = 89, +deconvolution_pack4_gemm = 90, +deconvolution_pack4_gemm_khr_cm_16_16_16 = 91, +deconvolution_pack4_gemm_khr_cm_16_8_8 = 92, +deconvolution_pack4_gemm_nv_cm_16_16_16 = 93, +deconvolution_pack4_gemm_nv_cm_16_8_8 = 94, +deconvolution_pack4to1 = 95, +deconvolution_pack4to1_gemm = 96, +deconvolution_pack4to8 = 97, +deconvolution_pack4to8_gemm = 98, +deconvolution_pack8 = 99, +deconvolution_pack8_col2im = 100, +deconvolution_pack8_gemm = 101, +deconvolution_pack8to1 = 102, +deconvolution_pack8to1_gemm = 103, +deconvolution_pack8to4 = 104, +deconvolution_pack8to4_gemm = 105, +dropout = 106, +dropout_pack4 = 107, +dropout_pack8 = 108, +eltwise = 109, +eltwise_pack4 = 110, +eltwise_pack8 = 111, +elu = 112, +elu_pack4 = 113, +elu_pack8 = 114, +flatten = 115, +flatten_pack1to4 = 116, +flatten_pack1to8 = 117, +flatten_pack4 = 118, +flatten_pack4to8 = 119, +flatten_pack8 = 120, +innerproduct = 121, +innerproduct_gemm = 122, +innerproduct_gemm_wp1to4 = 123, +innerproduct_gemm_wp1to8 = 124, +innerproduct_gemm_wp4 = 125, +innerproduct_gemm_wp4to1 = 126, +innerproduct_gemm_wp4to8 = 127, +innerproduct_gemm_wp8 = 128, +innerproduct_gemm_wp8to1 = 129, +innerproduct_gemm_wp8to4 = 130, +innerproduct_pack1to4 = 131, +innerproduct_pack1to8 = 132, +innerproduct_pack4 = 133, +innerproduct_pack4to1 = 134, +innerproduct_pack4to8 = 135, +innerproduct_pack8 = 136, +innerproduct_pack8to1 = 137, +innerproduct_pack8to4 = 138, +innerproduct_reduce_sum8 = 139, +innerproduct_reduce_sum8_pack4 = 140, +innerproduct_reduce_sum8_pack8 = 141, +innerproduct_sum8 = 142, +innerproduct_sum8_pack1to4 = 143, +innerproduct_sum8_pack1to8 = 144, +innerproduct_sum8_pack4 = 145, +innerproduct_sum8_pack4to1 = 146, +innerproduct_sum8_pack4to8 = 147, +innerproduct_sum8_pack8 = 148, +innerproduct_sum8_pack8to1 = 149, +innerproduct_sum8_pack8to4 = 150, +lrn_norm = 151, +lrn_norm_across_channel_pack4 = 152, +lrn_norm_across_channel_pack8 = 153, +lrn_norm_within_channel_pack4 = 154, +lrn_norm_within_channel_pack8 = 155, +lrn_square_pad = 156, +lrn_square_pad_across_channel_pack4 = 157, +lrn_square_pad_across_channel_pack8 = 158, +lrn_square_pad_within_channel_pack4 = 159, +lrn_square_pad_within_channel_pack8 = 160, +pooling = 161, +pooling_adaptive = 162, +pooling_adaptive_pack4 = 163, +pooling_adaptive_pack8 = 164, +pooling_global = 165, +pooling_global_pack4 = 166, +pooling_global_pack8 = 167, +pooling_pack4 = 168, +pooling_pack8 = 169, +prelu = 170, +prelu_pack4 = 171, +prelu_pack8 = 172, +relu = 173, +relu_pack4 = 174, +relu_pack8 = 175, +reshape = 176, +reshape_pack1to4 = 177, +reshape_pack1to8 = 178, +reshape_pack4 = 179, +reshape_pack4to1 = 180, +reshape_pack4to8 = 181, +reshape_pack8 = 182, +reshape_pack8to1 = 183, +reshape_pack8to4 = 184, +scale = 185, +scale_pack4 = 186, +scale_pack8 = 187, +sigmoid = 188, +sigmoid_pack4 = 189, +sigmoid_pack8 = 190, +slice = 191, +slice_pack1to4 = 192, +slice_pack1to8 = 193, +slice_pack4 = 194, +slice_pack4to8 = 195, +slice_pack8 = 196, +softmax_div_sum = 197, +softmax_div_sum_pack4 = 198, +softmax_div_sum_pack8 = 199, +softmax_exp_sub_max = 200, +softmax_exp_sub_max_pack4 = 201, +softmax_exp_sub_max_pack8 = 202, +softmax_reduce_max = 203, +softmax_reduce_max_pack4 = 204, +softmax_reduce_max_pack8 = 205, +softmax_reduce_sum = 206, +softmax_reduce_sum_pack4 = 207, +softmax_reduce_sum_pack8 = 208, +tanh = 209, +tanh_pack4 = 210, +tanh_pack8 = 211, +binaryop = 212, +binaryop_broadcast = 213, +binaryop_broadcast_pack1to4 = 214, +binaryop_broadcast_pack1to8 = 215, +binaryop_broadcast_pack4 = 216, +binaryop_broadcast_pack8 = 217, +binaryop_pack4 = 218, +binaryop_pack8 = 219, +unaryop = 220, +unaryop_pack4 = 221, +unaryop_pack8 = 222, +convolutiondepthwise = 223, +convolutiondepthwise_group = 224, +convolutiondepthwise_group_pack1to4 = 225, +convolutiondepthwise_group_pack1to8 = 226, +convolutiondepthwise_group_pack4 = 227, +convolutiondepthwise_group_pack4to1 = 228, +convolutiondepthwise_group_pack4to8 = 229, +convolutiondepthwise_group_pack8 = 230, +convolutiondepthwise_group_pack8to1 = 231, +convolutiondepthwise_group_pack8to4 = 232, +convolutiondepthwise_pack4 = 233, +convolutiondepthwise_pack8 = 234, +padding = 235, +padding_3d = 236, +padding_3d_pack4 = 237, +padding_3d_pack8 = 238, +padding_pack1to4 = 239, +padding_pack1to8 = 240, +padding_pack4 = 241, +padding_pack4to1 = 242, +padding_pack4to8 = 243, +padding_pack8 = 244, +padding_pack8to1 = 245, +padding_pack8to4 = 246, +normalize_coeffs = 247, +normalize_coeffs_pack4 = 248, +normalize_coeffs_pack8 = 249, +normalize_norm = 250, +normalize_norm_pack4 = 251, +normalize_norm_pack8 = 252, +normalize_reduce_sum4_fp16_to_fp32 = 253, +normalize_reduce_sum4_fp16_to_fp32_pack4 = 254, +normalize_reduce_sum4_fp16_to_fp32_pack8 = 255, +normalize_reduce_sum4_fp32 = 256, +normalize_reduce_sum4_fp32_pack4 = 257, +normalize_reduce_sum4_fp32_pack8 = 258, +permute = 259, +permute_pack1to4 = 260, +permute_pack1to8 = 261, +permute_pack4 = 262, +permute_pack4to1 = 263, +permute_pack4to8 = 264, +permute_pack8 = 265, +permute_pack8to1 = 266, +permute_pack8to4 = 267, +priorbox = 268, +priorbox_mxnet = 269, +interp = 270, +interp_bicubic = 271, +interp_bicubic_coeffs = 272, +interp_bicubic_pack4 = 273, +interp_bicubic_pack8 = 274, +interp_pack4 = 275, +interp_pack8 = 276, +deconvolutiondepthwise = 277, +deconvolutiondepthwise_group = 278, +deconvolutiondepthwise_group_pack1to4 = 279, +deconvolutiondepthwise_group_pack1to8 = 280, +deconvolutiondepthwise_group_pack4 = 281, +deconvolutiondepthwise_group_pack4to1 = 282, +deconvolutiondepthwise_group_pack4to8 = 283, +deconvolutiondepthwise_group_pack8 = 284, +deconvolutiondepthwise_group_pack8to1 = 285, +deconvolutiondepthwise_group_pack8to4 = 286, +deconvolutiondepthwise_pack4 = 287, +deconvolutiondepthwise_pack8 = 288, +shufflechannel = 289, +shufflechannel_pack4 = 290, +shufflechannel_pack8 = 291, +instancenorm_coeffs = 292, +instancenorm_coeffs_pack4 = 293, +instancenorm_coeffs_pack8 = 294, +instancenorm_norm = 295, +instancenorm_norm_pack4 = 296, +instancenorm_norm_pack8 = 297, +instancenorm_reduce_mean = 298, +instancenorm_reduce_mean_pack4 = 299, +instancenorm_reduce_mean_pack8 = 300, +instancenorm_reduce_sum4_fp16_to_fp32 = 301, +instancenorm_reduce_sum4_fp16_to_fp32_pack4 = 302, +instancenorm_reduce_sum4_fp16_to_fp32_pack8 = 303, +instancenorm_reduce_sum4_fp32 = 304, +instancenorm_reduce_sum4_fp32_pack4 = 305, +instancenorm_reduce_sum4_fp32_pack8 = 306, +instancenorm_sub_mean_square = 307, +instancenorm_sub_mean_square_pack4 = 308, +instancenorm_sub_mean_square_pack8 = 309, +clip = 310, +clip_pack4 = 311, +clip_pack8 = 312, +reorg = 313, +reorg_pack1to4 = 314, +reorg_pack1to8 = 315, +reorg_pack4 = 316, +reorg_pack4to8 = 317, +reorg_pack8 = 318, +packing = 319, +packing_fp16_to_fp32 = 320, +packing_fp32_to_fp16 = 321, +packing_pack1to4 = 322, +packing_pack1to4_fp16_to_fp32 = 323, +packing_pack1to4_fp32_to_fp16 = 324, +packing_pack1to8 = 325, +packing_pack1to8_fp16_to_fp32 = 326, +packing_pack1to8_fp32_to_fp16 = 327, +packing_pack4 = 328, +packing_pack4_fp16_to_fp32 = 329, +packing_pack4_fp32_to_fp16 = 330, +packing_pack4to1 = 331, +packing_pack4to1_fp16_to_fp32 = 332, +packing_pack4to1_fp32_to_fp16 = 333, +packing_pack4to8 = 334, +packing_pack4to8_fp16_to_fp32 = 335, +packing_pack4to8_fp32_to_fp16 = 336, +packing_pack8 = 337, +packing_pack8_fp16_to_fp32 = 338, +packing_pack8_fp32_to_fp16 = 339, +packing_pack8to1 = 340, +packing_pack8to1_fp16_to_fp32 = 341, +packing_pack8to1_fp32_to_fp16 = 342, +packing_pack8to4 = 343, +packing_pack8to4_fp16_to_fp32 = 344, +packing_pack8to4_fp32_to_fp16 = 345, +cast_fp16_to_fp32 = 346, +cast_fp16_to_fp32_pack4 = 347, +cast_fp16_to_fp32_pack8 = 348, +cast_fp32_to_fp16 = 349, +cast_fp32_to_fp16_pack4 = 350, +cast_fp32_to_fp16_pack8 = 351, +hardsigmoid = 352, +hardsigmoid_pack4 = 353, +hardsigmoid_pack8 = 354, +hardswish = 355, +hardswish_pack4 = 356, +hardswish_pack8 = 357, +pixelshuffle = 358, +pixelshuffle_pack4 = 359, +pixelshuffle_pack4to1 = 360, +pixelshuffle_pack8 = 361, +pixelshuffle_pack8to1 = 362, +pixelshuffle_pack8to4 = 363, +deepcopy = 364, +deepcopy_pack4 = 365, +deepcopy_pack8 = 366, +mish = 367, +mish_pack4 = 368, +mish_pack8 = 369, +swish = 370, +swish_pack4 = 371, +swish_pack8 = 372, +gemm = 373, +multiheadattention_qk_cross = 374, +multiheadattention_qk_cross_pack1to4 = 375, +multiheadattention_qk_cross_pack4 = 376, +multiheadattention_qk_cross_pack4to1 = 377, +multiheadattention_qkv_cross = 378, +multiheadattention_qkv_cross_pack1to4 = 379, +multiheadattention_qkv_cross_pack4 = 380, +multiheadattention_qkv_cross_pack4to1 = 381, +gelu = 382, +gelu_pack4 = 383, +gelu_pack8 = 384, +erf = 385, +erf_pack4 = 386, +erf_pack8 = 387, +celu = 388, +celu_pack4 = 389, +celu_pack8 = 390, +convert_ycbcr = 391, +vulkan_activation = 392, + diff --git a/linux/include/ncnn/layer_type.h b/linux/include/ncnn/layer_type.h new file mode 100644 index 0000000..511c714 --- /dev/null +++ b/linux/include/ncnn/layer_type.h @@ -0,0 +1,30 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_LAYER_TYPE_H +#define NCNN_LAYER_TYPE_H + +namespace ncnn { + +namespace LayerType { +enum LayerType +{ +#include "layer_type_enum.h" + CustomBit = (1 << 8), +}; +} // namespace LayerType + +} // namespace ncnn + +#endif // NCNN_LAYER_TYPE_H diff --git a/linux/include/ncnn/layer_type_enum.h b/linux/include/ncnn/layer_type_enum.h new file mode 100644 index 0000000..fd539bc --- /dev/null +++ b/linux/include/ncnn/layer_type_enum.h @@ -0,0 +1,108 @@ +// Layer Type Enum header +// +// This file is auto-generated by cmake, don't edit it. + +AbsVal = 0, +ArgMax = 1, +BatchNorm = 2, +Bias = 3, +BNLL = 4, +Concat = 5, +Convolution = 6, +Crop = 7, +Deconvolution = 8, +Dropout = 9, +Eltwise = 10, +ELU = 11, +Embed = 12, +Exp = 13, +Flatten = 14, +InnerProduct = 15, +Input = 16, +Log = 17, +LRN = 18, +MemoryData = 19, +MVN = 20, +Pooling = 21, +Power = 22, +PReLU = 23, +Proposal = 24, +Reduction = 25, +ReLU = 26, +Reshape = 27, +ROIPooling = 28, +Scale = 29, +Sigmoid = 30, +Slice = 31, +Softmax = 32, +Split = 33, +SPP = 34, +TanH = 35, +Threshold = 36, +Tile = 37, +RNN = 38, +LSTM = 39, +BinaryOp = 40, +UnaryOp = 41, +ConvolutionDepthWise = 42, +Padding = 43, +Squeeze = 44, +ExpandDims = 45, +Normalize = 46, +Permute = 47, +PriorBox = 48, +DetectionOutput = 49, +Interp = 50, +DeconvolutionDepthWise = 51, +ShuffleChannel = 52, +InstanceNorm = 53, +Clip = 54, +Reorg = 55, +YoloDetectionOutput = 56, +Quantize = 57, +Dequantize = 58, +Yolov3DetectionOutput = 59, +PSROIPooling = 60, +ROIAlign = 61, +Packing = 62, +Requantize = 63, +Cast = 64, +HardSigmoid = 65, +SELU = 66, +HardSwish = 67, +Noop = 68, +PixelShuffle = 69, +DeepCopy = 70, +Mish = 71, +StatisticsPooling = 72, +Swish = 73, +Gemm = 74, +GroupNorm = 75, +LayerNorm = 76, +Softplus = 77, +GRU = 78, +MultiHeadAttention = 79, +GELU = 80, +Convolution1D = 81, +Pooling1D = 82, +ConvolutionDepthWise1D = 83, +Convolution3D = 84, +ConvolutionDepthWise3D = 85, +Pooling3D = 86, +MatMul = 87, +Deconvolution1D = 88, +DeconvolutionDepthWise1D = 89, +Deconvolution3D = 90, +DeconvolutionDepthWise3D = 91, +Einsum = 92, +DeformableConv2D = 93, +GLU = 94, +Fold = 95, +Unfold = 96, +GridSample = 97, +CumulativeSum = 98, +CopyTo = 99, +Erf = 100, +Diag = 101, +CELU = 102, + diff --git a/linux/include/ncnn/mat.h b/linux/include/ncnn/mat.h new file mode 100644 index 0000000..c6f59ef --- /dev/null +++ b/linux/include/ncnn/mat.h @@ -0,0 +1,1843 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_MAT_H +#define NCNN_MAT_H + +#include +#include +#if __ARM_NEON +#include +#endif +#if __SSE2__ +#include +#if __AVX__ +#include +#endif +#endif +#if __mips_msa +#include +#endif +#if __loongarch_sx +#include +#endif +#if __riscv_vector +#include +#include "cpu.h" // cpu_riscv_vlenb() +#endif + +#include "allocator.h" +#include "option.h" +#include "platform.h" + +#if NCNN_VULKAN +#include +#endif // NCNN_VULKAN + +#if NCNN_PIXEL +#if NCNN_PLATFORM_API +#if __ANDROID_API__ >= 9 +#include +#include +#endif // __ANDROID_API__ >= 9 +#endif // NCNN_PLATFORM_API +#endif // NCNN_PIXEL + +namespace ncnn { + +#if NCNN_VULKAN +class VkMat; +class VkImageMat; +#endif // NCNN_VULKAN + +// the three dimension matrix +class NCNN_EXPORT Mat +{ +public: + // empty + Mat(); + // vec + Mat(int w, size_t elemsize = 4u, Allocator* allocator = 0); + // image + Mat(int w, int h, size_t elemsize = 4u, Allocator* allocator = 0); + // dim + Mat(int w, int h, int c, size_t elemsize = 4u, Allocator* allocator = 0); + // cube + Mat(int w, int h, int d, int c, size_t elemsize = 4u, Allocator* allocator = 0); + // packed vec + Mat(int w, size_t elemsize, int elempack, Allocator* allocator = 0); + // packed image + Mat(int w, int h, size_t elemsize, int elempack, Allocator* allocator = 0); + // packed dim + Mat(int w, int h, int c, size_t elemsize, int elempack, Allocator* allocator = 0); + // packed cube + Mat(int w, int h, int d, int c, size_t elemsize, int elempack, Allocator* allocator = 0); + // copy + Mat(const Mat& m); + // external vec + Mat(int w, void* data, size_t elemsize = 4u, Allocator* allocator = 0); + // external image + Mat(int w, int h, void* data, size_t elemsize = 4u, Allocator* allocator = 0); + // external dim + Mat(int w, int h, int c, void* data, size_t elemsize = 4u, Allocator* allocator = 0); + // external cube + Mat(int w, int h, int d, int c, void* data, size_t elemsize = 4u, Allocator* allocator = 0); + // external packed vec + Mat(int w, void* data, size_t elemsize, int elempack, Allocator* allocator = 0); + // external packed image + Mat(int w, int h, void* data, size_t elemsize, int elempack, Allocator* allocator = 0); + // external packed dim + Mat(int w, int h, int c, void* data, size_t elemsize, int elempack, Allocator* allocator = 0); + // external packed cube + Mat(int w, int h, int d, int c, void* data, size_t elemsize, int elempack, Allocator* allocator = 0); + // release + ~Mat(); + // assign + Mat& operator=(const Mat& m); + // set all + void fill(float v); + void fill(int v); +#if __ARM_NEON + void fill(float32x4_t _v); + void fill(uint16x4_t _v); + void fill(int32x4_t _v); + void fill(int32x4_t _v0, int32x4_t _v1); +#if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC + void fill(float16x4_t _v); + void fill(float16x8_t _v); +#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#endif // __ARM_NEON +#if __SSE2__ +#if __AVX__ +#if __AVX512F__ + void fill(__m512 _v); +#endif // __AVX512F__ + void fill(__m256 _v, int i = 0); +#endif // __AVX__ + void fill(__m128 _v); + void fill(__m128i _v); +#endif // __SSE2__ +#if __mips_msa + void fill(v4f32 _v); +#endif // __mips_msa +#if __loongarch_sx + void fill(__m128 _v); +#endif //__loongarch_sx +#if __riscv_vector + void fill(vfloat32m1_t _v); + void fill(vuint16m1_t _v); + void fill(vint8m1_t _v); +#if __riscv_zfh + void fill(vfloat16m1_t _v); +#endif // __riscv_zfh +#endif // __riscv_vector + template + void fill(T v); + // deep copy + Mat clone(Allocator* allocator = 0) const; + // deep copy from other mat, inplace + void clone_from(const ncnn::Mat& mat, Allocator* allocator = 0); + // reshape vec + Mat reshape(int w, Allocator* allocator = 0) const; + // reshape image + Mat reshape(int w, int h, Allocator* allocator = 0) const; + // reshape dim + Mat reshape(int w, int h, int c, Allocator* allocator = 0) const; + // reshape cube + Mat reshape(int w, int h, int d, int c, Allocator* allocator = 0) const; + // allocate vec + void create(int w, size_t elemsize = 4u, Allocator* allocator = 0); + // allocate image + void create(int w, int h, size_t elemsize = 4u, Allocator* allocator = 0); + // allocate dim + void create(int w, int h, int c, size_t elemsize = 4u, Allocator* allocator = 0); + // allocate cube + void create(int w, int h, int d, int c, size_t elemsize = 4u, Allocator* allocator = 0); + // allocate packed vec + void create(int w, size_t elemsize, int elempack, Allocator* allocator = 0); + // allocate packed image + void create(int w, int h, size_t elemsize, int elempack, Allocator* allocator = 0); + // allocate packed dim + void create(int w, int h, int c, size_t elemsize, int elempack, Allocator* allocator = 0); + // allocate packed cube + void create(int w, int h, int d, int c, size_t elemsize, int elempack, Allocator* allocator = 0); + // allocate like + void create_like(const Mat& m, Allocator* allocator = 0); +#if NCNN_VULKAN + // allocate like + void create_like(const VkMat& m, Allocator* allocator = 0); + // allocate like + void create_like(const VkImageMat& im, Allocator* allocator = 0); +#endif // NCNN_VULKAN + // refcount++ + void addref(); + // refcount-- + void release(); + + bool empty() const; + size_t total() const; + + // bits per element + int elembits() const; + + // shape only + Mat shape() const; + + // data reference + Mat channel(int c); + const Mat channel(int c) const; + Mat depth(int z); + const Mat depth(int z) const; + float* row(int y); + const float* row(int y) const; + template + T* row(int y); + template + const T* row(int y) const; + + // range reference + Mat channel_range(int c, int channels); + const Mat channel_range(int c, int channels) const; + Mat depth_range(int z, int depths); + const Mat depth_range(int z, int depths) const; + Mat row_range(int y, int rows); + const Mat row_range(int y, int rows) const; + Mat range(int x, int n); + const Mat range(int x, int n) const; + + // access raw data + template + operator T*(); + template + operator const T*() const; + + // convenient access float vec element + float& operator[](size_t i); + const float& operator[](size_t i) const; + +#if NCNN_PIXEL + enum PixelType + { + PIXEL_CONVERT_SHIFT = 16, + PIXEL_FORMAT_MASK = 0x0000ffff, + PIXEL_CONVERT_MASK = 0xffff0000, + + PIXEL_RGB = 1, + PIXEL_BGR = 2, + PIXEL_GRAY = 3, + PIXEL_RGBA = 4, + PIXEL_BGRA = 5, + + PIXEL_RGB2BGR = PIXEL_RGB | (PIXEL_BGR << PIXEL_CONVERT_SHIFT), + PIXEL_RGB2GRAY = PIXEL_RGB | (PIXEL_GRAY << PIXEL_CONVERT_SHIFT), + PIXEL_RGB2RGBA = PIXEL_RGB | (PIXEL_RGBA << PIXEL_CONVERT_SHIFT), + PIXEL_RGB2BGRA = PIXEL_RGB | (PIXEL_BGRA << PIXEL_CONVERT_SHIFT), + + PIXEL_BGR2RGB = PIXEL_BGR | (PIXEL_RGB << PIXEL_CONVERT_SHIFT), + PIXEL_BGR2GRAY = PIXEL_BGR | (PIXEL_GRAY << PIXEL_CONVERT_SHIFT), + PIXEL_BGR2RGBA = PIXEL_BGR | (PIXEL_RGBA << PIXEL_CONVERT_SHIFT), + PIXEL_BGR2BGRA = PIXEL_BGR | (PIXEL_BGRA << PIXEL_CONVERT_SHIFT), + + PIXEL_GRAY2RGB = PIXEL_GRAY | (PIXEL_RGB << PIXEL_CONVERT_SHIFT), + PIXEL_GRAY2BGR = PIXEL_GRAY | (PIXEL_BGR << PIXEL_CONVERT_SHIFT), + PIXEL_GRAY2RGBA = PIXEL_GRAY | (PIXEL_RGBA << PIXEL_CONVERT_SHIFT), + PIXEL_GRAY2BGRA = PIXEL_GRAY | (PIXEL_BGRA << PIXEL_CONVERT_SHIFT), + + PIXEL_RGBA2RGB = PIXEL_RGBA | (PIXEL_RGB << PIXEL_CONVERT_SHIFT), + PIXEL_RGBA2BGR = PIXEL_RGBA | (PIXEL_BGR << PIXEL_CONVERT_SHIFT), + PIXEL_RGBA2GRAY = PIXEL_RGBA | (PIXEL_GRAY << PIXEL_CONVERT_SHIFT), + PIXEL_RGBA2BGRA = PIXEL_RGBA | (PIXEL_BGRA << PIXEL_CONVERT_SHIFT), + + PIXEL_BGRA2RGB = PIXEL_BGRA | (PIXEL_RGB << PIXEL_CONVERT_SHIFT), + PIXEL_BGRA2BGR = PIXEL_BGRA | (PIXEL_BGR << PIXEL_CONVERT_SHIFT), + PIXEL_BGRA2GRAY = PIXEL_BGRA | (PIXEL_GRAY << PIXEL_CONVERT_SHIFT), + PIXEL_BGRA2RGBA = PIXEL_BGRA | (PIXEL_RGBA << PIXEL_CONVERT_SHIFT), + }; + // convenient construct from pixel data + static Mat from_pixels(const unsigned char* pixels, int type, int w, int h, Allocator* allocator = 0); + // convenient construct from pixel data with stride(bytes-per-row) parameter + static Mat from_pixels(const unsigned char* pixels, int type, int w, int h, int stride, Allocator* allocator = 0); + // convenient construct from pixel data and resize to specific size + static Mat from_pixels_resize(const unsigned char* pixels, int type, int w, int h, int target_width, int target_height, Allocator* allocator = 0); + // convenient construct from pixel data and resize to specific size with stride(bytes-per-row) parameter + static Mat from_pixels_resize(const unsigned char* pixels, int type, int w, int h, int stride, int target_width, int target_height, Allocator* allocator = 0); + // convenient construct from pixel data roi + static Mat from_pixels_roi(const unsigned char* pixels, int type, int w, int h, int roix, int roiy, int roiw, int roih, Allocator* allocator = 0); + // convenient construct from pixel data roi with stride(bytes-per-row) parameter + static Mat from_pixels_roi(const unsigned char* pixels, int type, int w, int h, int stride, int roix, int roiy, int roiw, int roih, Allocator* allocator = 0); + // convenient construct from pixel data roi and resize to specific size + static Mat from_pixels_roi_resize(const unsigned char* pixels, int type, int w, int h, int roix, int roiy, int roiw, int roih, int target_width, int target_height, Allocator* allocator = 0); + // convenient construct from pixel data roi and resize to specific size with stride(bytes-per-row) parameter + static Mat from_pixels_roi_resize(const unsigned char* pixels, int type, int w, int h, int stride, int roix, int roiy, int roiw, int roih, int target_width, int target_height, Allocator* allocator = 0); + + // convenient export to pixel data + void to_pixels(unsigned char* pixels, int type) const; + // convenient export to pixel data with stride(bytes-per-row) parameter + void to_pixels(unsigned char* pixels, int type, int stride) const; + // convenient export to pixel data and resize to specific size + void to_pixels_resize(unsigned char* pixels, int type, int target_width, int target_height) const; + // convenient export to pixel data and resize to specific size with stride(bytes-per-row) parameter + void to_pixels_resize(unsigned char* pixels, int type, int target_width, int target_height, int target_stride) const; + +#if NCNN_PLATFORM_API +#if __ANDROID_API__ >= 9 + // convenient construct from android Bitmap + static Mat from_android_bitmap(JNIEnv* env, jobject bitmap, int type_to, Allocator* allocator = 0); + // convenient construct from android Bitmap and resize to specific size + static Mat from_android_bitmap_resize(JNIEnv* env, jobject bitmap, int type_to, int target_width, int target_height, Allocator* allocator = 0); + // convenient construct from android Bitmap roi + static Mat from_android_bitmap_roi(JNIEnv* env, jobject bitmap, int type_to, int roix, int roiy, int roiw, int roih, Allocator* allocator = 0); + // convenient construct from android Bitmap roi and resize to specific size + static Mat from_android_bitmap_roi_resize(JNIEnv* env, jobject bitmap, int type_to, int roix, int roiy, int roiw, int roih, int target_width, int target_height, Allocator* allocator = 0); + // convenient export to android Bitmap and resize to the android Bitmap size + void to_android_bitmap(JNIEnv* env, jobject bitmap, int type_from) const; +#endif // __ANDROID_API__ >= 9 +#endif // NCNN_PLATFORM_API +#endif // NCNN_PIXEL + + // substract channel-wise mean values, then multiply by normalize values, pass 0 to skip + void substract_mean_normalize(const float* mean_vals, const float* norm_vals); + + // convenient construct from half precision floating point data + static Mat from_float16(const unsigned short* data, int size); + + // pointer to the data + void* data; + + // pointer to the reference counter + // when points to user-allocated data, the pointer is NULL + int* refcount; + + // element size in bytes + // 4 = float32/int32 + // 2 = float16 + // 1 = int8/uint8 + // 0 = empty + size_t elemsize; + + // packed count inside element + // c/1-d-h-w-1 c/1-h-w-1 h/1-w-1 w/1-1 scalar + // c/4-d-h-w-4 c/4-h-w-4 h/4-w-4 w/4-4 sse/neon + // c/8-d-h-w-8 c/8-h-w-8 h/8-w-8 w/8-8 avx/fp16 + int elempack; + + // the allocator + Allocator* allocator; + + // the dimension rank + int dims; + + int w; + int h; + int d; + int c; + + size_t cstep; +}; + +#if NCNN_VULKAN + +// the three dimension matrix, vulkan version +class NCNN_EXPORT VkMat +{ +public: + // empty + VkMat(); + // vec + VkMat(int w, size_t elemsize, VkAllocator* allocator); + // image + VkMat(int w, int h, size_t elemsize, VkAllocator* allocator); + // dim + VkMat(int w, int h, int c, size_t elemsize, VkAllocator* allocator); + // cube + VkMat(int w, int h, int d, int c, size_t elemsize, VkAllocator* allocator); + // packed vec + VkMat(int w, size_t elemsize, int elempack, VkAllocator* allocator); + // packed image + VkMat(int w, int h, size_t elemsize, int elempack, VkAllocator* allocator); + // packed dim + VkMat(int w, int h, int c, size_t elemsize, int elempack, VkAllocator* allocator); + // packed cube + VkMat(int w, int h, int d, int c, size_t elemsize, int elempack, VkAllocator* allocator); + // copy + VkMat(const VkMat& m); + // external vec + VkMat(int w, VkBufferMemory* data, size_t elemsize, VkAllocator* allocator); + // external image + VkMat(int w, int h, VkBufferMemory* data, size_t elemsize, VkAllocator* allocator); + // external dim + VkMat(int w, int h, int c, VkBufferMemory* data, size_t elemsize, VkAllocator* allocator); + // external cube + VkMat(int w, int h, int d, int c, VkBufferMemory* data, size_t elemsize, VkAllocator* allocator); + // external packed vec + VkMat(int w, VkBufferMemory* data, size_t elemsize, int elempack, VkAllocator* allocator); + // external packed image + VkMat(int w, int h, VkBufferMemory* data, size_t elemsize, int elempack, VkAllocator* allocator); + // external packed dim + VkMat(int w, int h, int c, VkBufferMemory* data, size_t elemsize, int elempack, VkAllocator* allocator); + // external packed cube + VkMat(int w, int h, int d, int c, VkBufferMemory* data, size_t elemsize, int elempack, VkAllocator* allocator); + // release + ~VkMat(); + // assign + VkMat& operator=(const VkMat& m); + // allocate vec + void create(int w, size_t elemsize, VkAllocator* allocator); + // allocate image + void create(int w, int h, size_t elemsize, VkAllocator* allocator); + // allocate dim + void create(int w, int h, int c, size_t elemsize, VkAllocator* allocator); + // allocate cube + void create(int w, int h, int d, int c, size_t elemsize, VkAllocator* allocator); + // allocate packed vec + void create(int w, size_t elemsize, int elempack, VkAllocator* allocator); + // allocate packed image + void create(int w, int h, size_t elemsize, int elempack, VkAllocator* allocator); + // allocate packed dim + void create(int w, int h, int c, size_t elemsize, int elempack, VkAllocator* allocator); + // allocate packed cube + void create(int w, int h, int d, int c, size_t elemsize, int elempack, VkAllocator* allocator); + // allocate like + void create_like(const Mat& m, VkAllocator* allocator); + // allocate like + void create_like(const VkMat& m, VkAllocator* allocator); + // allocate like + void create_like(const VkImageMat& im, VkAllocator* allocator); + + // mapped + Mat mapped() const; + void* mapped_ptr() const; + + // refcount++ + void addref(); + // refcount-- + void release(); + + bool empty() const; + size_t total() const; + + // bits per element + int elembits() const; + + // shape only + Mat shape() const; + + // low-level reference + VkBuffer buffer() const; + size_t buffer_offset() const; + size_t buffer_capacity() const; + + // device buffer + VkBufferMemory* data; + + // pointer to the reference counter + // when points to user-allocated data, the pointer is NULL + int* refcount; + + // element size in bytes + // 4 = float32/int32 + // 2 = float16 + // 1 = int8/uint8 + // 0 = empty + size_t elemsize; + + // packed count inside element + // c/1-d-h-w-1 c/1-h-w-1 h/1-w-1 w/1-1 scalar + // c/4-d-h-w-4 c/4-h-w-4 h/4-w-4 w/4-4 sse/neon + // c/8-d-h-w-8 c/8-h-w-8 h/8-w-8 w/8-8 avx/fp16 + int elempack; + + // the allocator + VkAllocator* allocator; + + // the dimension rank + int dims; + + int w; + int h; + int d; + int c; + + size_t cstep; +}; + +class NCNN_EXPORT VkImageMat +{ +public: + // empty + VkImageMat(); + // vec + VkImageMat(int w, size_t elemsize, VkAllocator* allocator); + // image + VkImageMat(int w, int h, size_t elemsize, VkAllocator* allocator); + // dim + VkImageMat(int w, int h, int c, size_t elemsize, VkAllocator* allocator); + // cube + VkImageMat(int w, int h, int d, int c, size_t elemsize, VkAllocator* allocator); + // packed vec + VkImageMat(int w, size_t elemsize, int elempack, VkAllocator* allocator); + // packed image + VkImageMat(int w, int h, size_t elemsize, int elempack, VkAllocator* allocator); + // packed dim + VkImageMat(int w, int h, int c, size_t elemsize, int elempack, VkAllocator* allocator); + // packed cube + VkImageMat(int w, int h, int d, int c, size_t elemsize, int elempack, VkAllocator* allocator); + // copy + VkImageMat(const VkImageMat& m); + // external vec + VkImageMat(int w, VkImageMemory* data, size_t elemsize, VkAllocator* allocator); + // external image + VkImageMat(int w, int h, VkImageMemory* data, size_t elemsize, VkAllocator* allocator); + // external dim + VkImageMat(int w, int h, int c, VkImageMemory* data, size_t elemsize, VkAllocator* allocator); + // external cube + VkImageMat(int w, int h, int d, int c, VkImageMemory* data, size_t elemsize, VkAllocator* allocator); + // external packed vec + VkImageMat(int w, VkImageMemory* data, size_t elemsize, int elempack, VkAllocator* allocator); + // external packed image + VkImageMat(int w, int h, VkImageMemory* data, size_t elemsize, int elempack, VkAllocator* allocator); + // external packed dim + VkImageMat(int w, int h, int c, VkImageMemory* data, size_t elemsize, int elempack, VkAllocator* allocator); + // external packed cube + VkImageMat(int w, int h, int d, int c, VkImageMemory* data, size_t elemsize, int elempack, VkAllocator* allocator); + // release + ~VkImageMat(); + // assign + VkImageMat& operator=(const VkImageMat& m); + // allocate vec + void create(int w, size_t elemsize, VkAllocator* allocator); + // allocate image + void create(int w, int h, size_t elemsize, VkAllocator* allocator); + // allocate dim + void create(int w, int h, int c, size_t elemsize, VkAllocator* allocator); + // allocate cube + void create(int w, int h, int d, int c, size_t elemsize, VkAllocator* allocator); + // allocate packed vec + void create(int w, size_t elemsize, int elempack, VkAllocator* allocator); + // allocate packed image + void create(int w, int h, size_t elemsize, int elempack, VkAllocator* allocator); + // allocate packed dim + void create(int w, int h, int c, size_t elemsize, int elempack, VkAllocator* allocator); + // allocate packed cube + void create(int w, int h, int d, int c, size_t elemsize, int elempack, VkAllocator* allocator); + // allocate like + void create_like(const Mat& m, VkAllocator* allocator); + // allocate like + void create_like(const VkMat& m, VkAllocator* allocator); + // allocate like + void create_like(const VkImageMat& im, VkAllocator* allocator); + + // mapped + Mat mapped() const; + void* mapped_ptr() const; + + // refcount++ + void addref(); + // refcount-- + void release(); + + bool empty() const; + size_t total() const; + + // bits per element + int elembits() const; + + // shape only + Mat shape() const; + + // low-level reference + VkImage image() const; + VkImageView imageview() const; + +#if NCNN_PLATFORM_API +#if __ANDROID_API__ >= 26 + // convenient construct from android hardware buffer + static VkImageMat from_android_hardware_buffer(VkAndroidHardwareBufferImageAllocator* allocator); +#endif // __ANDROID_API__ >= 26 +#endif // NCNN_PLATFORM_API + + // device image + VkImageMemory* data; + + // pointer to the reference counter + // when points to user-allocated data, the pointer is NULL + int* refcount; + + // element size in bytes + // 4 = float32/int32 + // 2 = float16 + // 1 = int8/uint8 + // 0 = empty + size_t elemsize; + + // packed count inside element + // c/1-d-h-w-1 c/1-h-w-1 h/1-w-1 w/1-1 scalar + // c/4-d-h-w-4 c/4-h-w-4 h/4-w-4 w/4-4 sse/neon + // c/8-d-h-w-8 c/8-h-w-8 h/8-w-8 w/8-8 avx/fp16 + int elempack; + + // the allocator + VkAllocator* allocator; + + // the dimension rank + int dims; + + int w; + int h; + int d; + int c; +}; + +// type for vulkan specialization constant and push constant +union vk_specialization_type +{ + int i; + float f; + uint32_t u32; +}; +union vk_constant_type +{ + int i; + float f; +}; +#endif // NCNN_VULKAN + +// misc function +#if NCNN_PIXEL +// convert yuv420sp(nv21) to rgb, the fast approximate version +NCNN_EXPORT void yuv420sp2rgb(const unsigned char* yuv420sp, int w, int h, unsigned char* rgb); +// convert yuv420sp(nv12) to rgb, the fast approximate version +NCNN_EXPORT void yuv420sp2rgb_nv12(const unsigned char* yuv420sp, int w, int h, unsigned char* rgb); +// convert yuv420sp(nv21) to rgb with half resize, the faster approximate version +NCNN_EXPORT void yuv420sp2rgb_half(const unsigned char* yuv420sp, int w, int h, unsigned char* rgb); +// image pixel bilinear resize +NCNN_EXPORT void resize_bilinear_c1(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h); +NCNN_EXPORT void resize_bilinear_c2(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h); +NCNN_EXPORT void resize_bilinear_c3(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h); +NCNN_EXPORT void resize_bilinear_c4(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h); +// image pixel bilinear resize with stride(bytes-per-row) parameter +NCNN_EXPORT void resize_bilinear_c1(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride); +NCNN_EXPORT void resize_bilinear_c2(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride); +NCNN_EXPORT void resize_bilinear_c3(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride); +NCNN_EXPORT void resize_bilinear_c4(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride); +// image pixel bilinear resize, convenient wrapper for yuv420sp(nv21/nv12) +NCNN_EXPORT void resize_bilinear_yuv420sp(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h); +#endif // NCNN_PIXEL +#if NCNN_PIXEL_ROTATE +// type is the from type, 6 means rotating from 6 to 1 +// +// 1 2 3 4 5 6 7 8 +// +// 888888 888888 88 88 8888888888 88 88 8888888888 +// 88 88 88 88 88 88 88 88 88 88 88 88 +// 8888 8888 8888 8888 88 8888888888 8888888888 88 +// 88 88 88 88 +// 88 88 888888 888888 +// +// ref http://sylvana.net/jpegcrop/exif_orientation.html +// image pixel kanna rotate +NCNN_EXPORT void kanna_rotate_c1(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h, int type); +NCNN_EXPORT void kanna_rotate_c2(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h, int type); +NCNN_EXPORT void kanna_rotate_c3(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h, int type); +NCNN_EXPORT void kanna_rotate_c4(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h, int type); +// image pixel kanna rotate with stride(bytes-per-row) parameter +NCNN_EXPORT void kanna_rotate_c1(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride, int type); +NCNN_EXPORT void kanna_rotate_c2(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride, int type); +NCNN_EXPORT void kanna_rotate_c3(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride, int type); +NCNN_EXPORT void kanna_rotate_c4(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride, int type); +// image pixel kanna rotate, convenient wrapper for yuv420sp(nv21/nv12) +NCNN_EXPORT void kanna_rotate_yuv420sp(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h, int type); +#endif // NCNN_PIXEL_ROTATE +#if NCNN_PIXEL_AFFINE +// resolve affine transform matrix from rotation angle, scale factor and x y offset +NCNN_EXPORT void get_rotation_matrix(float angle, float scale, float dx, float dy, float* tm); +// resolve affine transform matrix from two set of points, num_point must be >= 2 +NCNN_EXPORT void get_affine_transform(const float* points_from, const float* points_to, int num_point, float* tm); +// resolve the inversion affine transform matrix +NCNN_EXPORT void invert_affine_transform(const float* tm, float* tm_inv); +// image pixel bilinear warpaffine inverse transform, set -233 for transparent border color, the color RGBA is little-endian encoded +NCNN_EXPORT void warpaffine_bilinear_c1(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h, const float* tm, int type = 0, unsigned int v = 0); +NCNN_EXPORT void warpaffine_bilinear_c2(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h, const float* tm, int type = 0, unsigned int v = 0); +NCNN_EXPORT void warpaffine_bilinear_c3(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h, const float* tm, int type = 0, unsigned int v = 0); +NCNN_EXPORT void warpaffine_bilinear_c4(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h, const float* tm, int type = 0, unsigned int v = 0); +// image pixel bilinear warpaffine inverse transform with stride(bytes-per-row) parameter, set -233 for transparent border color, the color RGBA is little-endian encoded +NCNN_EXPORT void warpaffine_bilinear_c1(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride, const float* tm, int type = 0, unsigned int v = 0); +NCNN_EXPORT void warpaffine_bilinear_c2(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride, const float* tm, int type = 0, unsigned int v = 0); +NCNN_EXPORT void warpaffine_bilinear_c3(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride, const float* tm, int type = 0, unsigned int v = 0); +NCNN_EXPORT void warpaffine_bilinear_c4(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride, const float* tm, int type = 0, unsigned int v = 0); +// image pixel bilinear warpaffine, convenient wrapper for yuv420sp(nv21/nv12), set -233 for transparent border color, the color YUV_ is little-endian encoded +NCNN_EXPORT void warpaffine_bilinear_yuv420sp(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h, const float* tm, int type = 0, unsigned int v = 0); +#endif // NCNN_PIXEL_AFFINE +#if NCNN_PIXEL_DRAWING +// draw rectangle, set thickness -1 for filled rectangle, the color RGBA is little-endian encoded +NCNN_EXPORT void draw_rectangle_c1(unsigned char* pixels, int w, int h, int rx, int ry, int rw, int rh, unsigned int color, int thickness); +NCNN_EXPORT void draw_rectangle_c2(unsigned char* pixels, int w, int h, int rx, int ry, int rw, int rh, unsigned int color, int thickness); +NCNN_EXPORT void draw_rectangle_c3(unsigned char* pixels, int w, int h, int rx, int ry, int rw, int rh, unsigned int color, int thickness); +NCNN_EXPORT void draw_rectangle_c4(unsigned char* pixels, int w, int h, int rx, int ry, int rw, int rh, unsigned int color, int thickness); +// draw rectangle with stride(bytes-per-row) parameter, set thickness -1 for filled rectangle, the color RGBA is little-endian encoded +NCNN_EXPORT void draw_rectangle_c1(unsigned char* pixels, int w, int h, int stride, int rx, int ry, int rw, int rh, unsigned int color, int thickness); +NCNN_EXPORT void draw_rectangle_c2(unsigned char* pixels, int w, int h, int stride, int rx, int ry, int rw, int rh, unsigned int color, int thickness); +NCNN_EXPORT void draw_rectangle_c3(unsigned char* pixels, int w, int h, int stride, int rx, int ry, int rw, int rh, unsigned int color, int thickness); +NCNN_EXPORT void draw_rectangle_c4(unsigned char* pixels, int w, int h, int stride, int rx, int ry, int rw, int rh, unsigned int color, int thickness); +// draw rectangle, convenient wrapper for yuv420sp(nv21/nv12), set thickness -1 for filled rectangle, the color YUV_ is little-endian encoded +NCNN_EXPORT void draw_rectangle_yuv420sp(unsigned char* yuv420sp, int w, int h, int rx, int ry, int rw, int rh, unsigned int color, int thickness); +// draw circle, set thickness -1 for filled circle, the color RGBA is little-endian encoded +NCNN_EXPORT void draw_circle_c1(unsigned char* pixels, int w, int h, int cx, int cy, int radius, unsigned int color, int thickness); +NCNN_EXPORT void draw_circle_c2(unsigned char* pixels, int w, int h, int cx, int cy, int radius, unsigned int color, int thickness); +NCNN_EXPORT void draw_circle_c3(unsigned char* pixels, int w, int h, int cx, int cy, int radius, unsigned int color, int thickness); +NCNN_EXPORT void draw_circle_c4(unsigned char* pixels, int w, int h, int cx, int cy, int radius, unsigned int color, int thickness); +// draw circle with stride(bytes-per-row) parameter, set thickness -1 for filled circle, the color RGBA is little-endian encoded +NCNN_EXPORT void draw_circle_c1(unsigned char* pixels, int w, int h, int stride, int cx, int cy, int radius, unsigned int color, int thickness); +NCNN_EXPORT void draw_circle_c2(unsigned char* pixels, int w, int h, int stride, int cx, int cy, int radius, unsigned int color, int thickness); +NCNN_EXPORT void draw_circle_c3(unsigned char* pixels, int w, int h, int stride, int cx, int cy, int radius, unsigned int color, int thickness); +NCNN_EXPORT void draw_circle_c4(unsigned char* pixels, int w, int h, int stride, int cx, int cy, int radius, unsigned int color, int thickness); +// draw circle, convenient wrapper for yuv420sp(nv21/nv12), set thickness -1 for filled circle, the color YUV_ is little-endian encoded +NCNN_EXPORT void draw_circle_yuv420sp(unsigned char* yuv420sp, int w, int h, int cx, int cy, int radius, unsigned int color, int thickness); +// draw line, the color RGBA is little-endian encoded +NCNN_EXPORT void draw_line_c1(unsigned char* pixels, int w, int h, int x0, int y0, int x1, int y1, unsigned int color, int thickness); +NCNN_EXPORT void draw_line_c2(unsigned char* pixels, int w, int h, int x0, int y0, int x1, int y1, unsigned int color, int thickness); +NCNN_EXPORT void draw_line_c3(unsigned char* pixels, int w, int h, int x0, int y0, int x1, int y1, unsigned int color, int thickness); +NCNN_EXPORT void draw_line_c4(unsigned char* pixels, int w, int h, int x0, int y0, int x1, int y1, unsigned int color, int thickness); +// draw line with stride(bytes-per-row) parameter, the color RGBA is little-endian encoded +NCNN_EXPORT void draw_line_c1(unsigned char* pixels, int w, int h, int stride, int x0, int y0, int x1, int y1, unsigned int color, int thickness); +NCNN_EXPORT void draw_line_c2(unsigned char* pixels, int w, int h, int stride, int x0, int y0, int x1, int y1, unsigned int color, int thickness); +NCNN_EXPORT void draw_line_c3(unsigned char* pixels, int w, int h, int stride, int x0, int y0, int x1, int y1, unsigned int color, int thickness); +NCNN_EXPORT void draw_line_c4(unsigned char* pixels, int w, int h, int stride, int x0, int y0, int x1, int y1, unsigned int color, int thickness); +// draw line, convenient wrapper for yuv420sp(nv21/nv12), the color YUV_ is little-endian encoded +NCNN_EXPORT void draw_line_yuv420sp(unsigned char* yuv420sp, int w, int h, int x0, int y0, int x1, int y1, unsigned int color, int thickness); +// resolve text bounding box size +NCNN_EXPORT void get_text_drawing_size(const char* text, int fontpixelsize, int* w, int* h); +// draw ascii printables and newline, the color RGBA is little-endian encoded +NCNN_EXPORT void draw_text_c1(unsigned char* pixels, int w, int h, const char* text, int x, int y, int fontpixelsize, unsigned int color); +NCNN_EXPORT void draw_text_c2(unsigned char* pixels, int w, int h, const char* text, int x, int y, int fontpixelsize, unsigned int color); +NCNN_EXPORT void draw_text_c3(unsigned char* pixels, int w, int h, const char* text, int x, int y, int fontpixelsize, unsigned int color); +NCNN_EXPORT void draw_text_c4(unsigned char* pixels, int w, int h, const char* text, int x, int y, int fontpixelsize, unsigned int color); +// draw ascii printables and newline with stride(bytes-per-row) parameter, the color RGBA is little-endian encoded +NCNN_EXPORT void draw_text_c1(unsigned char* pixels, int w, int h, int stride, const char* text, int x, int y, int fontpixelsize, unsigned int color); +NCNN_EXPORT void draw_text_c2(unsigned char* pixels, int w, int h, int stride, const char* text, int x, int y, int fontpixelsize, unsigned int color); +NCNN_EXPORT void draw_text_c3(unsigned char* pixels, int w, int h, int stride, const char* text, int x, int y, int fontpixelsize, unsigned int color); +NCNN_EXPORT void draw_text_c4(unsigned char* pixels, int w, int h, int stride, const char* text, int x, int y, int fontpixelsize, unsigned int color); +// draw ascii printables and newline, convenient wrapper for yuv420sp(nv21/nv12), the color YUV_ is little-endian encoded +NCNN_EXPORT void draw_text_yuv420sp(unsigned char* yuv420sp, int w, int h, const char* text, int x, int y, int fontpixelsize, unsigned int color); +#endif // NCNN_PIXEL_DRAWING + +// type conversion +// convert float to half precision floating point +NCNN_EXPORT unsigned short float32_to_float16(float value); +// convert half precision floating point to float +NCNN_EXPORT float float16_to_float32(unsigned short value); +// convert float to brain half +NCNN_EXPORT NCNN_FORCEINLINE unsigned short float32_to_bfloat16(float value) +{ + // 16 : 16 + union + { + unsigned int u; + float f; + } tmp; + tmp.f = value; + return tmp.u >> 16; +} +// convert brain half to float +NCNN_EXPORT NCNN_FORCEINLINE float bfloat16_to_float32(unsigned short value) +{ + // 16 : 16 + union + { + unsigned int u; + float f; + } tmp; + tmp.u = value << 16; + return tmp.f; +} + +// mat process +enum BorderType +{ + BORDER_CONSTANT = 0, + BORDER_REPLICATE = 1, + BORDER_REFLECT = 2, + BORDER_TRANSPARENT = -233, +}; +NCNN_EXPORT void copy_make_border(const Mat& src, Mat& dst, int top, int bottom, int left, int right, int type, float v, const Option& opt = Option()); +NCNN_EXPORT void copy_make_border_3d(const Mat& src, Mat& dst, int top, int bottom, int left, int right, int front, int behind, int type, float v, const Option& opt = Option()); +NCNN_EXPORT void copy_cut_border(const Mat& src, Mat& dst, int top, int bottom, int left, int right, const Option& opt = Option()); +NCNN_EXPORT void copy_cut_border_3d(const Mat& src, Mat& dst, int top, int bottom, int left, int right, int front, int behind, const Option& opt = Option()); +NCNN_EXPORT void resize_nearest(const Mat& src, Mat& dst, int w, int h, const Option& opt = Option()); +NCNN_EXPORT void resize_bilinear(const Mat& src, Mat& dst, int w, int h, const Option& opt = Option()); +NCNN_EXPORT void resize_bicubic(const Mat& src, Mat& dst, int w, int h, const Option& opt = Option()); +NCNN_EXPORT void convert_packing(const Mat& src, Mat& dst, int elempack, const Option& opt = Option()); +NCNN_EXPORT void flatten(const Mat& src, Mat& dst, const Option& opt = Option()); +NCNN_EXPORT void cast_float32_to_float16(const Mat& src, Mat& dst, const Option& opt = Option()); +NCNN_EXPORT void cast_float16_to_float32(const Mat& src, Mat& dst, const Option& opt = Option()); +NCNN_EXPORT void cast_int8_to_float32(const Mat& src, Mat& dst, const Option& opt = Option()); +NCNN_EXPORT void cast_float32_to_bfloat16(const Mat& src, Mat& dst, const Option& opt = Option()); +NCNN_EXPORT void cast_bfloat16_to_float32(const Mat& src, Mat& dst, const Option& opt = Option()); +NCNN_EXPORT void quantize_to_int8(const Mat& src, Mat& dst, const Mat& scale_data, const Option& opt = Option()); +NCNN_EXPORT void dequantize_from_int32(const Mat& src, Mat& dst, const Mat& scale_data, const Mat& bias_data, const Option& opt = Option()); +NCNN_EXPORT void requantize_from_int32_to_int8(const Mat& src, Mat& dst, const Mat& scale_in_data, const Mat& scale_out_data, const Mat& bias_data, int activation_type, const Mat& activation_params, const Option& opt = Option()); + +NCNN_FORCEINLINE Mat::Mat() + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ +} + +NCNN_FORCEINLINE Mat::Mat(int _w, size_t _elemsize, Allocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ + create(_w, _elemsize, _allocator); +} + +NCNN_FORCEINLINE Mat::Mat(int _w, int _h, size_t _elemsize, Allocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ + create(_w, _h, _elemsize, _allocator); +} + +NCNN_FORCEINLINE Mat::Mat(int _w, int _h, int _c, size_t _elemsize, Allocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ + create(_w, _h, _c, _elemsize, _allocator); +} + +NCNN_FORCEINLINE Mat::Mat(int _w, int _h, int _d, int _c, size_t _elemsize, Allocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ + create(_w, _h, _d, _c, _elemsize, _allocator); +} + +NCNN_FORCEINLINE Mat::Mat(int _w, size_t _elemsize, int _elempack, Allocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ + create(_w, _elemsize, _elempack, _allocator); +} + +NCNN_FORCEINLINE Mat::Mat(int _w, int _h, size_t _elemsize, int _elempack, Allocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ + create(_w, _h, _elemsize, _elempack, _allocator); +} + +NCNN_FORCEINLINE Mat::Mat(int _w, int _h, int _c, size_t _elemsize, int _elempack, Allocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ + create(_w, _h, _c, _elemsize, _elempack, _allocator); +} + +NCNN_FORCEINLINE Mat::Mat(int _w, int _h, int _d, int _c, size_t _elemsize, int _elempack, Allocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ + create(_w, _h, _d, _c, _elemsize, _elempack, _allocator); +} + +NCNN_FORCEINLINE Mat::Mat(const Mat& m) + : data(m.data), refcount(m.refcount), elemsize(m.elemsize), elempack(m.elempack), allocator(m.allocator), dims(m.dims), w(m.w), h(m.h), d(m.d), c(m.c), cstep(m.cstep) +{ + addref(); +} + +NCNN_FORCEINLINE Mat::Mat(int _w, void* _data, size_t _elemsize, Allocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(1), w(_w), h(1), d(1), c(1) +{ + cstep = w; +} + +NCNN_FORCEINLINE Mat::Mat(int _w, int _h, void* _data, size_t _elemsize, Allocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(2), w(_w), h(_h), d(1), c(1) +{ + cstep = (size_t)w * h; +} + +NCNN_FORCEINLINE Mat::Mat(int _w, int _h, int _c, void* _data, size_t _elemsize, Allocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(3), w(_w), h(_h), d(1), c(_c) +{ + cstep = alignSize((size_t)w * h * elemsize, 16) / elemsize; +} + +NCNN_FORCEINLINE Mat::Mat(int _w, int _h, int _d, int _c, void* _data, size_t _elemsize, Allocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(4), w(_w), h(_h), d(_d), c(_c) +{ + cstep = alignSize((size_t)w * h * d * elemsize, 16) / elemsize; +} + +NCNN_FORCEINLINE Mat::Mat(int _w, void* _data, size_t _elemsize, int _elempack, Allocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(1), w(_w), h(1), d(1), c(1) +{ + cstep = w; +} + +NCNN_FORCEINLINE Mat::Mat(int _w, int _h, void* _data, size_t _elemsize, int _elempack, Allocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(2), w(_w), h(_h), d(1), c(1) +{ + cstep = (size_t)w * h; +} + +NCNN_FORCEINLINE Mat::Mat(int _w, int _h, int _c, void* _data, size_t _elemsize, int _elempack, Allocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(3), w(_w), h(_h), d(1), c(_c) +{ + cstep = alignSize((size_t)w * h * elemsize, 16) / elemsize; +} + +NCNN_FORCEINLINE Mat::Mat(int _w, int _h, int _d, int _c, void* _data, size_t _elemsize, int _elempack, Allocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(4), w(_w), h(_h), d(_d), c(_c) +{ + cstep = alignSize((size_t)w * h * d * elemsize, 16) / elemsize; +} + +NCNN_FORCEINLINE Mat::~Mat() +{ + release(); +} + +NCNN_FORCEINLINE void Mat::fill(float _v) +{ + int size = (int)total(); + float* ptr = (float*)data; + + int i = 0; +#if __ARM_NEON + float32x4_t _c = vdupq_n_f32(_v); + for (; i + 3 < size; i += 4) + { + vst1q_f32(ptr, _c); + ptr += 4; + } +#endif // __ARM_NEON + for (; i < size; i++) + { + *ptr++ = _v; + } +} + +NCNN_FORCEINLINE void Mat::fill(int _v) +{ + int size = (int)total(); + int* ptr = (int*)data; + + int i = 0; +#if __ARM_NEON + int32x4_t _c = vdupq_n_s32(_v); + for (; i + 3 < size; i += 4) + { + vst1q_s32(ptr, _c); + ptr += 4; + } +#endif // __ARM_NEON + for (; i < size; i++) + { + *ptr++ = _v; + } +} + +#if __ARM_NEON +NCNN_FORCEINLINE void Mat::fill(float32x4_t _v) +{ + int size = (int)total(); + float* ptr = (float*)data; + for (int i = 0; i < size; i++) + { + vst1q_f32(ptr, _v); + ptr += 4; + } +} + +NCNN_FORCEINLINE void Mat::fill(uint16x4_t _v) +{ + int size = (int)total(); + unsigned short* ptr = (unsigned short*)data; + for (int i = 0; i < size; i++) + { + vst1_u16(ptr, _v); + ptr += 4; + } +} + +NCNN_FORCEINLINE void Mat::fill(int32x4_t _v) +{ + int size = (int)total(); + int* ptr = (int*)data; + for (int i = 0; i < size; i++) + { + vst1q_s32(ptr, _v); + ptr += 4; + } +} + +NCNN_FORCEINLINE void Mat::fill(int32x4_t _v0, int32x4_t _v1) +{ + int size = (int)total(); + int* ptr = (int*)data; + for (int i = 0; i < size; i++) + { + vst1q_s32(ptr, _v0); + vst1q_s32(ptr + 4, _v1); + ptr += 8; + } +} +#if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +NCNN_FORCEINLINE void Mat::fill(float16x4_t _v) +{ + int size = (int)total(); + __fp16* ptr = (__fp16*)data; + for (int i = 0; i < size; i++) + { + vst1_f16(ptr, _v); + ptr += 4; + } +} + +NCNN_FORCEINLINE void Mat::fill(float16x8_t _v) +{ + int size = (int)total(); + __fp16* ptr = (__fp16*)data; + for (int i = 0; i < size; i++) + { + vst1q_f16(ptr, _v); + ptr += 8; + } +} +#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#endif // __ARM_NEON + +#if __SSE2__ +#if __AVX__ +#if __AVX512F__ +NCNN_FORCEINLINE void Mat::fill(__m512 _v) +{ + int size = (int)total(); + float* ptr = (float*)data; + for (int i = 0; i < size; i++) + { + _mm512_storeu_ps(ptr, _v); + ptr += 16; + } +} +#endif // __AVX512F__ +NCNN_FORCEINLINE void Mat::fill(__m256 _v, int _i) +{ + // old gcc cannot overload __m128 and __m256 type + // add a dummy int parameter for different mangled function symbol + (void)_i; + int size = (int)total(); + float* ptr = (float*)data; + for (int i = 0; i < size; i++) + { + _mm256_storeu_ps(ptr, _v); + ptr += 8; + } +} +#endif // __AVX__ +NCNN_FORCEINLINE void Mat::fill(__m128 _v) +{ + int size = (int)total(); + float* ptr = (float*)data; + for (int i = 0; i < size; i++) + { + _mm_storeu_ps(ptr, _v); + ptr += 4; + } +} +NCNN_FORCEINLINE void Mat::fill(__m128i _v) +{ + int size = (int)total(); + unsigned short* ptr = (unsigned short*)data; + for (int i = 0; i < size; i++) + { + _mm_store_si128((__m128i*)ptr, _v); + ptr += 8; + } +} +#endif // __SSE2__ + +#if __mips_msa +NCNN_FORCEINLINE void Mat::fill(v4f32 _v) +{ + int size = (int)total(); + float* ptr = (float*)data; + for (int i = 0; i < size; i++) + { + __msa_st_w((v4i32)_v, ptr, 0); + ptr += 4; + } +} +#endif // __mips_msa + +#if __loongarch_sx +NCNN_FORCEINLINE void Mat::fill(__m128 _v) +{ + int size = (int)total(); + float* ptr = (float*)data; + for (int i = 0; i < size; i++) + { + __lsx_vst(_v, ptr, 0); + ptr += 4; + } +} +#endif // __loongarch_sx +#if __riscv_vector +NCNN_FORCEINLINE void Mat::fill(vfloat32m1_t _v) +{ + const int packn = cpu_riscv_vlenb() / 4; + const size_t vl = vsetvl_e32m1(packn); + + int size = (int)total(); + float* ptr = (float*)data; + for (int i = 0; i < size; i++) + { + vse32_v_f32m1(ptr, _v, vl); + ptr += packn; + } +} + +NCNN_FORCEINLINE void Mat::fill(vuint16m1_t _v) +{ + const int packn = cpu_riscv_vlenb() / 2; + const size_t vl = vsetvl_e16m1(packn); + + int size = (int)total(); + unsigned short* ptr = (unsigned short*)data; + for (int i = 0; i < size; i++) + { + vse16_v_u16m1(ptr, _v, vl); + ptr += packn; + } +} + +NCNN_FORCEINLINE void Mat::fill(vint8m1_t _v) +{ + const int packn = cpu_riscv_vlenb() / 1; + const size_t vl = vsetvl_e8m1(packn); + + int size = (int)total(); + signed char* ptr = (signed char*)data; + for (int i = 0; i < size; i++) + { + vse8_v_i8m1(ptr, _v, vl); + ptr += packn; + } +} +#if __riscv_zfh +NCNN_FORCEINLINE void Mat::fill(vfloat16m1_t _v) +{ + const int packn = cpu_riscv_vlenb() / 2; + const size_t vl = vsetvl_e16m1(packn); + + int size = (int)total(); + __fp16* ptr = (__fp16*)data; + for (int i = 0; i < size; i++) + { + vse16_v_f16m1(ptr, _v, vl); + ptr += packn; + } +} +#endif // __riscv_zfh +#endif // __riscv_vector + +template +NCNN_FORCEINLINE void Mat::fill(T _v) +{ + int size = (int)total(); + T* ptr = (T*)data; + for (int i = 0; i < size; i++) + { + ptr[i] = _v; + } +} + +NCNN_FORCEINLINE Mat& Mat::operator=(const Mat& m) +{ + if (this == &m) + return *this; + + if (m.refcount) + NCNN_XADD(m.refcount, 1); + + release(); + + data = m.data; + refcount = m.refcount; + elemsize = m.elemsize; + elempack = m.elempack; + allocator = m.allocator; + + dims = m.dims; + w = m.w; + h = m.h; + d = m.d; + c = m.c; + + cstep = m.cstep; + + return *this; +} + +NCNN_FORCEINLINE void Mat::addref() +{ + if (refcount) + NCNN_XADD(refcount, 1); +} + +NCNN_FORCEINLINE void Mat::release() +{ + if (refcount && NCNN_XADD(refcount, -1) == 1) + { + if (allocator) + allocator->fastFree(data); + else + fastFree(data); + } + + data = 0; + + elemsize = 0; + elempack = 0; + + dims = 0; + w = 0; + h = 0; + d = 0; + c = 0; + + cstep = 0; + + refcount = 0; +} + +NCNN_FORCEINLINE bool Mat::empty() const +{ + return data == 0 || total() == 0; +} + +NCNN_FORCEINLINE size_t Mat::total() const +{ + return cstep * c; +} + +NCNN_FORCEINLINE int Mat::elembits() const +{ + return elempack ? static_cast(elemsize * 8) / elempack : 0; +} + +NCNN_FORCEINLINE Mat Mat::shape() const +{ + if (dims == 1) + return Mat(w * elempack, (void*)0); + if (dims == 2) + return Mat(w, h * elempack, (void*)0); + if (dims == 3) + return Mat(w, h, c * elempack, (void*)0); + if (dims == 4) + return Mat(w, h, d, c * elempack, (void*)0); + + return Mat(); +} + +NCNN_FORCEINLINE Mat Mat::channel(int _c) +{ + Mat m(w, h, d, (unsigned char*)data + cstep * _c * elemsize, elemsize, elempack, allocator); + m.dims = dims - 1; + if (dims == 4) + m.cstep = (size_t)w * h; + return m; +} + +NCNN_FORCEINLINE const Mat Mat::channel(int _c) const +{ + Mat m(w, h, d, (unsigned char*)data + cstep * _c * elemsize, elemsize, elempack, allocator); + m.dims = dims - 1; + if (dims == 4) + m.cstep = (size_t)w * h; + return m; +} + +NCNN_FORCEINLINE Mat Mat::depth(int z) +{ + return Mat(w, h, (unsigned char*)data + (size_t)w * h * z * elemsize, elemsize, elempack, allocator); +} + +NCNN_FORCEINLINE const Mat Mat::depth(int z) const +{ + return Mat(w, h, (unsigned char*)data + (size_t)w * h * z * elemsize, elemsize, elempack, allocator); +} + +NCNN_FORCEINLINE float* Mat::row(int y) +{ + return (float*)((unsigned char*)data + (size_t)w * y * elemsize); +} + +NCNN_FORCEINLINE const float* Mat::row(int y) const +{ + return (const float*)((unsigned char*)data + (size_t)w * y * elemsize); +} + +template +NCNN_FORCEINLINE T* Mat::row(int y) +{ + return (T*)((unsigned char*)data + (size_t)w * y * elemsize); +} + +template +NCNN_FORCEINLINE const T* Mat::row(int y) const +{ + return (const T*)((unsigned char*)data + (size_t)w * y * elemsize); +} + +NCNN_FORCEINLINE Mat Mat::channel_range(int _c, int channels) +{ + Mat m(w, h, d, channels, (unsigned char*)data + cstep * _c * elemsize, elemsize, elempack, allocator); + m.dims = dims; + return m; +} + +NCNN_FORCEINLINE const Mat Mat::channel_range(int _c, int channels) const +{ + Mat m(w, h, d, channels, (unsigned char*)data + cstep * _c * elemsize, elemsize, elempack, allocator); + m.dims = dims; + return m; +} + +NCNN_FORCEINLINE Mat Mat::depth_range(int z, int depths) +{ + Mat m(w, h, depths, (unsigned char*)data + (size_t)w * h * z * elemsize, elemsize, elempack, allocator); + m.cstep = (size_t)w * h; + return m; +} + +NCNN_FORCEINLINE const Mat Mat::depth_range(int z, int depths) const +{ + Mat m(w, h, depths, (unsigned char*)data + (size_t)w * h * z * elemsize, elemsize, elempack, allocator); + m.cstep = (size_t)w * h; + return m; +} + +NCNN_FORCEINLINE Mat Mat::row_range(int y, int rows) +{ + return Mat(w, rows, (unsigned char*)data + (size_t)w * y * elemsize, elemsize, elempack, allocator); +} + +NCNN_FORCEINLINE const Mat Mat::row_range(int y, int rows) const +{ + return Mat(w, rows, (unsigned char*)data + (size_t)w * y * elemsize, elemsize, elempack, allocator); +} + +NCNN_FORCEINLINE Mat Mat::range(int x, int n) +{ + return Mat(n, (unsigned char*)data + x * elemsize, elemsize, elempack, allocator); +} + +NCNN_FORCEINLINE const Mat Mat::range(int x, int n) const +{ + return Mat(n, (unsigned char*)data + x * elemsize, elemsize, elempack, allocator); +} + +template +NCNN_FORCEINLINE Mat::operator T*() +{ + return (T*)data; +} + +template +NCNN_FORCEINLINE Mat::operator const T*() const +{ + return (const T*)data; +} + +NCNN_FORCEINLINE float& Mat::operator[](size_t i) +{ + return ((float*)data)[i]; +} + +NCNN_FORCEINLINE const float& Mat::operator[](size_t i) const +{ + return ((const float*)data)[i]; +} + +#if NCNN_VULKAN + +NCNN_FORCEINLINE VkMat::VkMat() + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ +} + +NCNN_FORCEINLINE VkMat::VkMat(int _w, size_t _elemsize, VkAllocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ + create(_w, _elemsize, _allocator); +} + +NCNN_FORCEINLINE VkMat::VkMat(int _w, int _h, size_t _elemsize, VkAllocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ + create(_w, _h, _elemsize, _allocator); +} + +NCNN_FORCEINLINE VkMat::VkMat(int _w, int _h, int _c, size_t _elemsize, VkAllocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ + create(_w, _h, _c, _elemsize, _allocator); +} + +NCNN_FORCEINLINE VkMat::VkMat(int _w, int _h, int _d, int _c, size_t _elemsize, VkAllocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ + create(_w, _h, _d, _c, _elemsize, _allocator); +} + +NCNN_FORCEINLINE VkMat::VkMat(int _w, size_t _elemsize, int _elempack, VkAllocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ + create(_w, _elemsize, _elempack, _allocator); +} + +NCNN_FORCEINLINE VkMat::VkMat(int _w, int _h, size_t _elemsize, int _elempack, VkAllocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ + create(_w, _h, _elemsize, _elempack, _allocator); +} + +NCNN_FORCEINLINE VkMat::VkMat(int _w, int _h, int _c, size_t _elemsize, int _elempack, VkAllocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ + create(_w, _h, _c, _elemsize, _elempack, _allocator); +} + +NCNN_FORCEINLINE VkMat::VkMat(int _w, int _h, int _d, int _c, size_t _elemsize, int _elempack, VkAllocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0), cstep(0) +{ + create(_w, _h, _d, _c, _elemsize, _elempack, _allocator); +} + +NCNN_FORCEINLINE VkMat::VkMat(const VkMat& m) + : data(m.data), refcount(m.refcount), elemsize(m.elemsize), elempack(m.elempack), allocator(m.allocator), dims(m.dims), w(m.w), h(m.h), d(m.d), c(m.c) +{ + addref(); + + cstep = m.cstep; +} + +NCNN_FORCEINLINE VkMat::VkMat(int _w, VkBufferMemory* _data, size_t _elemsize, VkAllocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(1), w(_w), h(1), d(1), c(1) +{ + cstep = w; +} + +NCNN_FORCEINLINE VkMat::VkMat(int _w, int _h, VkBufferMemory* _data, size_t _elemsize, VkAllocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(2), w(_w), h(_h), d(1), c(1) +{ + cstep = w * h; +} + +NCNN_FORCEINLINE VkMat::VkMat(int _w, int _h, int _c, VkBufferMemory* _data, size_t _elemsize, VkAllocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(3), w(_w), h(_h), d(1), c(_c) +{ + cstep = alignSize(w * h * elemsize, 16) / elemsize; +} + +NCNN_FORCEINLINE VkMat::VkMat(int _w, int _h, int _d, int _c, VkBufferMemory* _data, size_t _elemsize, VkAllocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(4), w(_w), h(_h), d(_d), c(_c) +{ + cstep = alignSize(w * h * d * elemsize, 16) / elemsize; +} + +NCNN_FORCEINLINE VkMat::VkMat(int _w, VkBufferMemory* _data, size_t _elemsize, int _elempack, VkAllocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(1), w(_w), h(1), d(1), c(1) +{ + cstep = w; +} + +NCNN_FORCEINLINE VkMat::VkMat(int _w, int _h, VkBufferMemory* _data, size_t _elemsize, int _elempack, VkAllocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(2), w(_w), h(_h), d(1), c(1) +{ + cstep = w * h; +} + +NCNN_FORCEINLINE VkMat::VkMat(int _w, int _h, int _c, VkBufferMemory* _data, size_t _elemsize, int _elempack, VkAllocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(3), w(_w), h(_h), d(1), c(_c) +{ + cstep = alignSize(w * h * elemsize, 16) / elemsize; +} + +NCNN_FORCEINLINE VkMat::VkMat(int _w, int _h, int _d, int _c, VkBufferMemory* _data, size_t _elemsize, int _elempack, VkAllocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(4), w(_w), h(_h), d(_d), c(_c) +{ + cstep = alignSize(w * h * d * elemsize, 16) / elemsize; +} + +NCNN_FORCEINLINE VkMat::~VkMat() +{ + release(); +} + +NCNN_FORCEINLINE VkMat& VkMat::operator=(const VkMat& m) +{ + if (this == &m) + return *this; + + if (m.refcount) + NCNN_XADD(m.refcount, 1); + + release(); + + data = m.data; + refcount = m.refcount; + elemsize = m.elemsize; + elempack = m.elempack; + allocator = m.allocator; + + dims = m.dims; + w = m.w; + h = m.h; + d = m.d; + c = m.c; + + cstep = m.cstep; + + return *this; +} + +NCNN_FORCEINLINE Mat VkMat::mapped() const +{ + if (!allocator->mappable) + return Mat(); + + if (dims == 1) + return Mat(w, mapped_ptr(), elemsize, elempack, 0); + + if (dims == 2) + return Mat(w, h, mapped_ptr(), elemsize, elempack, 0); + + if (dims == 3) + return Mat(w, h, c, mapped_ptr(), elemsize, elempack, 0); + + if (dims == 4) + return Mat(w, h, d, c, mapped_ptr(), elemsize, elempack, 0); + + return Mat(); +} + +NCNN_FORCEINLINE void* VkMat::mapped_ptr() const +{ + if (!allocator->mappable) + return 0; + + return (unsigned char*)data->mapped_ptr + data->offset; +} + +NCNN_FORCEINLINE void VkMat::addref() +{ + if (refcount) + NCNN_XADD(refcount, 1); +} + +NCNN_FORCEINLINE void VkMat::release() +{ + if (refcount && NCNN_XADD(refcount, -1) == 1) + { + if (allocator && data) + { + allocator->fastFree(data); + } + } + + data = 0; + + elemsize = 0; + elempack = 0; + + dims = 0; + w = 0; + h = 0; + d = 0; + c = 0; + + cstep = 0; + + refcount = 0; +} + +NCNN_FORCEINLINE bool VkMat::empty() const +{ + return data == 0 || total() == 0; +} + +NCNN_FORCEINLINE size_t VkMat::total() const +{ + return cstep * c; +} + +NCNN_FORCEINLINE int VkMat::elembits() const +{ + return elempack ? static_cast(elemsize) * 8 / elempack : 0; +} + +NCNN_FORCEINLINE Mat VkMat::shape() const +{ + if (dims == 1) + return Mat(w * elempack, (void*)0); + if (dims == 2) + return Mat(w, h * elempack, (void*)0); + if (dims == 3) + return Mat(w, h, c * elempack, (void*)0); + if (dims == 4) + return Mat(w, h, d, c * elempack, (void*)0); + + return Mat(); +} + +NCNN_FORCEINLINE VkBuffer VkMat::buffer() const +{ + return data->buffer; +} + +NCNN_FORCEINLINE size_t VkMat::buffer_offset() const +{ + return data->offset; +} + +NCNN_FORCEINLINE size_t VkMat::buffer_capacity() const +{ + return data->capacity; +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat() + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0) +{ +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, size_t _elemsize, VkAllocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0) +{ + create(_w, _elemsize, _allocator); +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, int _h, size_t _elemsize, VkAllocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0) +{ + create(_w, _h, _elemsize, _allocator); +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, int _h, int _c, size_t _elemsize, VkAllocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0) +{ + create(_w, _h, _c, _elemsize, _allocator); +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, int _h, int _d, int _c, size_t _elemsize, VkAllocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0) +{ + create(_w, _h, _d, _c, _elemsize, _allocator); +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, size_t _elemsize, int _elempack, VkAllocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0) +{ + create(_w, _elemsize, _elempack, _allocator); +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, int _h, size_t _elemsize, int _elempack, VkAllocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0) +{ + create(_w, _h, _elemsize, _elempack, _allocator); +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, int _h, int _c, size_t _elemsize, int _elempack, VkAllocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0) +{ + create(_w, _h, _c, _elemsize, _elempack, _allocator); +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, int _h, int _d, int _c, size_t _elemsize, int _elempack, VkAllocator* _allocator) + : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), d(0), c(0) +{ + create(_w, _h, _d, _c, _elemsize, _elempack, _allocator); +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat(const VkImageMat& m) + : data(m.data), refcount(m.refcount), elemsize(m.elemsize), elempack(m.elempack), allocator(m.allocator), dims(m.dims), w(m.w), h(m.h), d(m.d), c(m.c) +{ + addref(); +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, VkImageMemory* _data, size_t _elemsize, VkAllocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(1), w(_w), h(1), d(1), c(1) +{ +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, int _h, VkImageMemory* _data, size_t _elemsize, VkAllocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(2), w(_w), h(_h), d(1), c(1) +{ +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, int _h, int _c, VkImageMemory* _data, size_t _elemsize, VkAllocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(3), w(_w), h(_h), d(1), c(_c) +{ +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, int _h, int _d, int _c, VkImageMemory* _data, size_t _elemsize, VkAllocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(4), w(_w), h(_h), d(_d), c(_c) +{ +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, VkImageMemory* _data, size_t _elemsize, int _elempack, VkAllocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(1), w(_w), h(1), d(1), c(1) +{ +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, int _h, VkImageMemory* _data, size_t _elemsize, int _elempack, VkAllocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(2), w(_w), h(_h), d(1), c(1) +{ +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, int _h, int _c, VkImageMemory* _data, size_t _elemsize, int _elempack, VkAllocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(3), w(_w), h(_h), d(1), c(_c) +{ +} + +NCNN_FORCEINLINE VkImageMat::VkImageMat(int _w, int _h, int _d, int _c, VkImageMemory* _data, size_t _elemsize, int _elempack, VkAllocator* _allocator) + : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(4), w(_w), h(_h), d(_d), c(_c) +{ +} + +NCNN_FORCEINLINE VkImageMat::~VkImageMat() +{ + release(); +} + +NCNN_FORCEINLINE VkImageMat& VkImageMat::operator=(const VkImageMat& m) +{ + if (this == &m) + return *this; + + if (m.refcount) + NCNN_XADD(m.refcount, 1); + + release(); + + data = m.data; + refcount = m.refcount; + elemsize = m.elemsize; + elempack = m.elempack; + allocator = m.allocator; + + dims = m.dims; + w = m.w; + h = m.h; + d = m.d; + c = m.c; + + return *this; +} + +NCNN_FORCEINLINE Mat VkImageMat::mapped() const +{ + if (!allocator->mappable || !data->mapped_ptr) + return Mat(); + + if (dims == 1) + return Mat(w, mapped_ptr(), elemsize, elempack, 0); + + if (dims == 2) + return Mat(w, h, mapped_ptr(), elemsize, elempack, 0); + + if (dims == 3) + return Mat(w, h, c, mapped_ptr(), elemsize, elempack, 0); + + if (dims == 4) + return Mat(w, h, d, c, mapped_ptr(), elemsize, elempack, 0); + + return Mat(); +} + +NCNN_FORCEINLINE void* VkImageMat::mapped_ptr() const +{ + if (!allocator->mappable || !data->mapped_ptr) + return 0; + + return (unsigned char*)data->mapped_ptr + data->bind_offset; +} + +NCNN_FORCEINLINE void VkImageMat::addref() +{ + if (refcount) + NCNN_XADD(refcount, 1); +} + +NCNN_FORCEINLINE void VkImageMat::release() +{ + if (refcount && NCNN_XADD(refcount, -1) == 1) + { + if (allocator && data) + { + allocator->fastFree(data); + } + } + + data = 0; + + elemsize = 0; + elempack = 0; + + dims = 0; + w = 0; + h = 0; + d = 0; + c = 0; + + refcount = 0; +} + +NCNN_FORCEINLINE bool VkImageMat::empty() const +{ + return data == 0 || total() == 0; +} + +NCNN_FORCEINLINE size_t VkImageMat::total() const +{ + return w * h * d * c; +} + +NCNN_FORCEINLINE int VkImageMat::elembits() const +{ + return elempack ? static_cast(elemsize) * 8 / elempack : 0; +} + +NCNN_FORCEINLINE Mat VkImageMat::shape() const +{ + if (dims == 1) + return Mat(w * elempack, (void*)0); + if (dims == 2) + return Mat(w, h * elempack, (void*)0); + if (dims == 3) + return Mat(w, h, c * elempack, (void*)0); + if (dims == 4) + return Mat(w, h, d, c * elempack, (void*)0); + + return Mat(); +} + +NCNN_FORCEINLINE VkImage VkImageMat::image() const +{ + return data->image; +} + +NCNN_FORCEINLINE VkImageView VkImageMat::imageview() const +{ + return data->imageview; +} + +#endif // NCNN_VULKAN + +} // namespace ncnn + +#endif // NCNN_MAT_H diff --git a/linux/include/ncnn/modelbin.h b/linux/include/ncnn/modelbin.h new file mode 100644 index 0000000..aada5f6 --- /dev/null +++ b/linux/include/ncnn/modelbin.h @@ -0,0 +1,80 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_MODELBIN_H +#define NCNN_MODELBIN_H + +#include "mat.h" + +namespace ncnn { + +class DataReader; +class NCNN_EXPORT ModelBin +{ +public: + ModelBin(); + virtual ~ModelBin(); + // element type + // 0 = auto + // 1 = float32 + // 2 = float16 + // 3 = int8 + // load vec + virtual Mat load(int w, int type) const; + // load image + virtual Mat load(int w, int h, int type) const; + // load dim + virtual Mat load(int w, int h, int c, int type) const; + // load cube + virtual Mat load(int w, int h, int d, int c, int type) const; +}; + +class ModelBinFromDataReaderPrivate; +class NCNN_EXPORT ModelBinFromDataReader : public ModelBin +{ +public: + explicit ModelBinFromDataReader(const DataReader& dr); + virtual ~ModelBinFromDataReader(); + + virtual Mat load(int w, int type) const; + +private: + ModelBinFromDataReader(const ModelBinFromDataReader&); + ModelBinFromDataReader& operator=(const ModelBinFromDataReader&); + +private: + ModelBinFromDataReaderPrivate* const d; +}; + +class ModelBinFromMatArrayPrivate; +class NCNN_EXPORT ModelBinFromMatArray : public ModelBin +{ +public: + // construct from weight blob array + explicit ModelBinFromMatArray(const Mat* weights); + virtual ~ModelBinFromMatArray(); + + virtual Mat load(int w, int type) const; + +private: + ModelBinFromMatArray(const ModelBinFromMatArray&); + ModelBinFromMatArray& operator=(const ModelBinFromMatArray&); + +private: + ModelBinFromMatArrayPrivate* const d; +}; + +} // namespace ncnn + +#endif // NCNN_MODELBIN_H diff --git a/linux/include/ncnn/ncnn_export.h b/linux/include/ncnn/ncnn_export.h new file mode 100644 index 0000000..e2f5fde --- /dev/null +++ b/linux/include/ncnn/ncnn_export.h @@ -0,0 +1,42 @@ + +#ifndef NCNN_EXPORT_H +#define NCNN_EXPORT_H + +#ifdef NCNN_STATIC_DEFINE +# define NCNN_EXPORT +# define NCNN_NO_EXPORT +#else +# ifndef NCNN_EXPORT +# ifdef ncnn_EXPORTS + /* We are building this library */ +# define NCNN_EXPORT __attribute__((visibility("default"))) +# else + /* We are using this library */ +# define NCNN_EXPORT __attribute__((visibility("default"))) +# endif +# endif + +# ifndef NCNN_NO_EXPORT +# define NCNN_NO_EXPORT __attribute__((visibility("hidden"))) +# endif +#endif + +#ifndef NCNN_DEPRECATED +# define NCNN_DEPRECATED __attribute__ ((__deprecated__)) +#endif + +#ifndef NCNN_DEPRECATED_EXPORT +# define NCNN_DEPRECATED_EXPORT NCNN_EXPORT NCNN_DEPRECATED +#endif + +#ifndef NCNN_DEPRECATED_NO_EXPORT +# define NCNN_DEPRECATED_NO_EXPORT NCNN_NO_EXPORT NCNN_DEPRECATED +#endif + +#if 0 /* DEFINE_NO_DEPRECATED */ +# ifndef NCNN_NO_DEPRECATED +# define NCNN_NO_DEPRECATED +# endif +#endif + +#endif /* NCNN_EXPORT_H */ diff --git a/linux/include/ncnn/net.h b/linux/include/ncnn/net.h new file mode 100644 index 0000000..98e3ec3 --- /dev/null +++ b/linux/include/ncnn/net.h @@ -0,0 +1,274 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_NET_H +#define NCNN_NET_H + +#include "blob.h" +#include "layer.h" +#include "mat.h" +#include "option.h" +#include "platform.h" + +#if NCNN_PLATFORM_API +#if __ANDROID_API__ >= 9 +#include +#endif // __ANDROID_API__ >= 9 +#endif // NCNN_PLATFORM_API + +namespace ncnn { + +#if NCNN_VULKAN +class VkCompute; +#endif // NCNN_VULKAN +class DataReader; +class Extractor; +class NetPrivate; +class NCNN_EXPORT Net +{ +public: + // empty init + Net(); + // clear and destroy + virtual ~Net(); + +public: + // option can be changed before loading + Option opt; + +#if NCNN_VULKAN + // set gpu device by index + void set_vulkan_device(int device_index); + + // set gpu device by device handle, no owner transfer + void set_vulkan_device(const VulkanDevice* vkdev); + + const VulkanDevice* vulkan_device() const; +#endif // NCNN_VULKAN + +#if NCNN_STRING + // register custom layer or overwrite built-in layer by layer type name + // return 0 if success + int register_custom_layer(const char* type, layer_creator_func creator, layer_destroyer_func destroyer = 0, void* userdata = 0); + virtual int custom_layer_to_index(const char* type); +#endif // NCNN_STRING + // register custom layer or overwrite built-in layer by layer type + // return 0 if success + int register_custom_layer(int index, layer_creator_func creator, layer_destroyer_func destroyer = 0, void* userdata = 0); + +#if NCNN_STRING + int load_param(const DataReader& dr); +#endif // NCNN_STRING + + int load_param_bin(const DataReader& dr); + + int load_model(const DataReader& dr); + +#if NCNN_STDIO +#if NCNN_STRING + // load network structure from plain param file + // return 0 if success + int load_param(FILE* fp); + int load_param(const char* protopath); + int load_param_mem(const char* mem); +#endif // NCNN_STRING + // load network structure from binary param file + // return 0 if success + int load_param_bin(FILE* fp); + int load_param_bin(const char* protopath); + + // load network weight data from model file + // return 0 if success + int load_model(FILE* fp); + int load_model(const char* modelpath); +#endif // NCNN_STDIO + + // load network structure from external memory + // memory pointer must be 32-bit aligned + // return bytes consumed + int load_param(const unsigned char* mem); + + // reference network weight data from external memory + // weight data is not copied but referenced + // so external memory should be retained when used + // memory pointer must be 32-bit aligned + // return bytes consumed + int load_model(const unsigned char* mem); + +#if NCNN_PLATFORM_API +#if __ANDROID_API__ >= 9 +#if NCNN_STRING + // convenient load network structure from android asset plain param file + int load_param(AAsset* asset); + int load_param(AAssetManager* mgr, const char* assetpath); +#endif // NCNN_STRING + // convenient load network structure from android asset binary param file + int load_param_bin(AAsset* asset); + int load_param_bin(AAssetManager* mgr, const char* assetpath); + + // convenient load network weight data from android asset model file + int load_model(AAsset* asset); + int load_model(AAssetManager* mgr, const char* assetpath); +#endif // __ANDROID_API__ >= 9 +#endif // NCNN_PLATFORM_API + + // unload network structure and weight data + void clear(); + + // construct an Extractor from network + Extractor create_extractor() const; + + // get input/output indexes/names + const std::vector& input_indexes() const; + const std::vector& output_indexes() const; +#if NCNN_STRING + const std::vector& input_names() const; + const std::vector& output_names() const; +#endif + + const std::vector& blobs() const; + const std::vector& layers() const; + + std::vector& mutable_blobs(); + std::vector& mutable_layers(); + +protected: + friend class Extractor; +#if NCNN_STRING + int find_blob_index_by_name(const char* name) const; + int find_layer_index_by_name(const char* name) const; + virtual Layer* create_custom_layer(const char* type); + virtual Layer* create_overwrite_builtin_layer(const char* type); +#endif // NCNN_STRING + virtual Layer* create_custom_layer(int index); + virtual Layer* create_overwrite_builtin_layer(int typeindex); + +private: + Net(const Net&); + Net& operator=(const Net&); + +private: + NetPrivate* const d; +}; + +class ExtractorPrivate; +class NCNN_EXPORT Extractor +{ +public: + virtual ~Extractor(); + + // copy + Extractor(const Extractor&); + + // assign + Extractor& operator=(const Extractor&); + + // clear blob mats and alloctors + void clear(); + + // enable light mode + // intermediate blob will be recycled when enabled + // enabled by default + void set_light_mode(bool enable); + + // set thread count for this extractor + // this will overwrite the global setting + // default count is system depended + void set_num_threads(int num_threads); + + // set blob memory allocator + void set_blob_allocator(Allocator* allocator); + + // set workspace memory allocator + void set_workspace_allocator(Allocator* allocator); + +#if NCNN_VULKAN + void set_vulkan_compute(bool enable); + + void set_blob_vkallocator(VkAllocator* allocator); + + void set_workspace_vkallocator(VkAllocator* allocator); + + void set_staging_vkallocator(VkAllocator* allocator); +#endif // NCNN_VULKAN + +#if NCNN_STRING + // set input by blob name + // return 0 if success + int input(const char* blob_name, const Mat& in); + + // get result by blob name + // return 0 if success + // type = 0, default + // type = 1, do not convert fp16/bf16 or / and packing + int extract(const char* blob_name, Mat& feat, int type = 0); +#endif // NCNN_STRING + + // set input by blob index + // return 0 if success + int input(int blob_index, const Mat& in); + + // get result by blob index + // return 0 if success + // type = 0, default + // type = 1, do not convert fp16/bf16 or / and packing + int extract(int blob_index, Mat& feat, int type = 0); + +#if NCNN_VULKAN +#if NCNN_STRING + // set input by blob name + // return 0 if success + int input(const char* blob_name, const VkMat& in); + + // get result by blob name + // return 0 if success + int extract(const char* blob_name, VkMat& feat, VkCompute& cmd); + + // set input by blob name + // return 0 if success + int input(const char* blob_name, const VkImageMat& in); + + // get result by blob name + // return 0 if success + int extract(const char* blob_name, VkImageMat& feat, VkCompute& cmd); +#endif // NCNN_STRING + + // set input by blob index + // return 0 if success + int input(int blob_index, const VkMat& in); + + // get result by blob index + // return 0 if success + int extract(int blob_index, VkMat& feat, VkCompute& cmd); + + // set input by blob index + // return 0 if success + int input(int blob_index, const VkImageMat& in); + + // get result by blob index + // return 0 if success + int extract(int blob_index, VkImageMat& feat, VkCompute& cmd); +#endif // NCNN_VULKAN + +protected: + friend Extractor Net::create_extractor() const; + Extractor(const Net* net, size_t blob_count); + +private: + ExtractorPrivate* const d; +}; + +} // namespace ncnn + +#endif // NCNN_NET_H diff --git a/linux/include/ncnn/option.h b/linux/include/ncnn/option.h new file mode 100644 index 0000000..7d0cc60 --- /dev/null +++ b/linux/include/ncnn/option.h @@ -0,0 +1,156 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_OPTION_H +#define NCNN_OPTION_H + +#include "platform.h" + +namespace ncnn { + +#if NCNN_VULKAN +class VkAllocator; +class PipelineCache; +#endif // NCNN_VULKAN + +class Allocator; +class NCNN_EXPORT Option +{ +public: + // default option + Option(); + +public: + // light mode + // intermediate blob will be recycled when enabled + // enabled by default + bool lightmode; + + // thread count + // default value is the one returned by get_cpu_count() + int num_threads; + + // blob memory allocator + Allocator* blob_allocator; + + // workspace memory allocator + Allocator* workspace_allocator; + +#if NCNN_VULKAN + // blob memory allocator + VkAllocator* blob_vkallocator; + + // workspace memory allocator + VkAllocator* workspace_vkallocator; + + // staging memory allocator + VkAllocator* staging_vkallocator; + + // pipeline cache + PipelineCache* pipeline_cache; +#endif // NCNN_VULKAN + + // the time openmp threads busy-wait for more work before going to sleep + // default value is 20ms to keep the cores enabled + // without too much extra power consumption afterwards + int openmp_blocktime; + + // enable winograd convolution optimization + // improve convolution 3x3 stride1 performance, may consume more memory + // changes should be applied before loading network structure and weight + // enabled by default + bool use_winograd_convolution; + + // enable sgemm convolution optimization + // improve convolution 1x1 stride1 performance, may consume more memory + // changes should be applied before loading network structure and weight + // enabled by default + bool use_sgemm_convolution; + + // enable quantized int8 inference + // use low-precision int8 path for quantized model + // changes should be applied before loading network structure and weight + // enabled by default + bool use_int8_inference; + + // enable vulkan compute + bool use_vulkan_compute; + + // enable bf16 data type for storage + // improve most operator performance on all arm devices, may consume more memory + bool use_bf16_storage; + + // enable options for gpu inference + bool use_fp16_packed; + bool use_fp16_storage; + bool use_fp16_arithmetic; + bool use_int8_packed; + bool use_int8_storage; + bool use_int8_arithmetic; + + // enable simd-friendly packed memory layout + // improve all operator performance on all arm devices, will consume more memory + // changes should be applied before loading network structure and weight + // enabled by default + bool use_packing_layout; + + bool use_shader_pack8; + + // subgroup option + bool use_subgroup_basic; + bool use_subgroup_vote; + bool use_subgroup_ballot; + bool use_subgroup_shuffle; + + // turn on for adreno + bool use_image_storage; + bool use_tensor_storage; + + bool use_reserved_0; + + // enable DAZ(Denormals-Are-Zero) and FTZ(Flush-To-Zero) + // default value is 3 + // 0 = DAZ OFF, FTZ OFF + // 1 = DAZ ON , FTZ OFF + // 2 = DAZ OFF, FTZ ON + // 3 = DAZ ON, FTZ ON + int flush_denormals; + + bool use_local_pool_allocator; + + // enable local memory optimization for gpu inference + bool use_shader_local_memory; + + // enable cooperative matrix optimization for gpu inference + bool use_cooperative_matrix; + + // more fine-grained control of winograd convolution + bool use_winograd23_convolution; + bool use_winograd43_convolution; + bool use_winograd63_convolution; + + // this option is turned on for A53/A55 automatically + // but you can force this on/off if you wish + bool use_a53_a55_optimized_kernel; + + bool use_reserved_7; + bool use_reserved_8; + bool use_reserved_9; + bool use_reserved_10; + bool use_reserved_11; +}; + +} // namespace ncnn + +#endif // NCNN_OPTION_H diff --git a/linux/include/ncnn/paramdict.h b/linux/include/ncnn/paramdict.h new file mode 100644 index 0000000..c2ef160 --- /dev/null +++ b/linux/include/ncnn/paramdict.h @@ -0,0 +1,73 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_PARAMDICT_H +#define NCNN_PARAMDICT_H + +#include "mat.h" + +// at most 32 parameters +#define NCNN_MAX_PARAM_COUNT 32 + +namespace ncnn { + +class DataReader; +class Net; +class ParamDictPrivate; +class NCNN_EXPORT ParamDict +{ +public: + // empty + ParamDict(); + + virtual ~ParamDict(); + + // copy + ParamDict(const ParamDict&); + + // assign + ParamDict& operator=(const ParamDict&); + + // get type + int type(int id) const; + + // get int + int get(int id, int def) const; + // get float + float get(int id, float def) const; + // get array + Mat get(int id, const Mat& def) const; + + // set int + void set(int id, int i); + // set float + void set(int id, float f); + // set array + void set(int id, const Mat& v); + +protected: + friend class Net; + + void clear(); + + int load_param(const DataReader& dr); + int load_param_bin(const DataReader& dr); + +private: + ParamDictPrivate* const d; +}; + +} // namespace ncnn + +#endif // NCNN_PARAMDICT_H diff --git a/linux/include/ncnn/pipeline.h b/linux/include/ncnn/pipeline.h new file mode 100644 index 0000000..c284a14 --- /dev/null +++ b/linux/include/ncnn/pipeline.h @@ -0,0 +1,113 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_PIPELINE_H +#define NCNN_PIPELINE_H + +#include "mat.h" +#include "platform.h" +#if NCNN_VULKAN +#include "gpu.h" + +#include +#endif // NCNN_VULKAN + +namespace ncnn { + +#if NCNN_VULKAN +class Option; +class PipelinePrivate; +class NCNN_EXPORT Pipeline +{ +public: + explicit Pipeline(const VulkanDevice* vkdev); + virtual ~Pipeline(); + +public: + void set_optimal_local_size_xyz(int w = 4, int h = 4, int c = 4); + void set_optimal_local_size_xyz(const Mat& local_size_xyz); + void set_local_size_xyz(int w, int h, int c); + + int create(const uint32_t* spv_data, size_t spv_data_size, const std::vector& specializations); + + int create(int shader_type_index, const Option& opt, const std::vector& specializations); + +public: + VkShaderModule shader_module() const; + VkDescriptorSetLayout descriptorset_layout() const; + VkPipelineLayout pipeline_layout() const; + VkPipeline pipeline() const; + VkDescriptorUpdateTemplateKHR descriptor_update_template() const; + + const ShaderInfo& shader_info() const; + + uint32_t local_size_x() const; + uint32_t local_size_y() const; + uint32_t local_size_z() const; + +protected: + void set_shader_module(VkShaderModule shader_module); + void set_descriptorset_layout(VkDescriptorSetLayout descriptorset_layout); + void set_pipeline_layout(VkPipelineLayout pipeline_layout); + void set_pipeline(VkPipeline pipeline); + void set_descriptor_update_template(VkDescriptorUpdateTemplateKHR descriptor_update_template); + + void set_shader_info(const ShaderInfo& shader_info); + +public: + const VulkanDevice* vkdev; + +private: + Pipeline(const Pipeline&); + Pipeline& operator=(const Pipeline&); + +private: + PipelinePrivate* const d; +}; + +#if NCNN_PLATFORM_API +#if __ANDROID_API__ >= 26 +class VkCompute; +class NCNN_EXPORT ImportAndroidHardwareBufferPipeline : private Pipeline +{ +public: + explicit ImportAndroidHardwareBufferPipeline(const VulkanDevice* vkdev); + virtual ~ImportAndroidHardwareBufferPipeline(); + + int create(VkAndroidHardwareBufferImageAllocator* ahb_im_allocator, int type_to, int rotate_from, const Option& opt); + int create(VkAndroidHardwareBufferImageAllocator* ahb_im_allocator, int type_to, int rotate_from, int target_width, int target_height, const Option& opt); + void destroy(); + + friend class VkCompute; + +protected: + int create_shader_module(const Option& opt); + int create_sampler(VkAndroidHardwareBufferImageAllocator* ahb_im_allocator); + int create_descriptorset_layout(); + +public: + int type_to; + int rotate_from; + bool need_resize; + + VkSampler sampler; +}; +#endif // __ANDROID_API__ >= 26 +#endif // NCNN_PLATFORM_API + +#endif // NCNN_VULKAN + +} // namespace ncnn + +#endif // NCNN_PIPELINE_H diff --git a/linux/include/ncnn/pipelinecache.h b/linux/include/ncnn/pipelinecache.h new file mode 100644 index 0000000..bb6b8fb --- /dev/null +++ b/linux/include/ncnn/pipelinecache.h @@ -0,0 +1,85 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_PIPELINECACHE_H +#define NCNN_PIPELINECACHE_H + +#include "platform.h" + +#if NCNN_VULKAN +#include +#endif // NCNN_VULKAN + +#include "mat.h" +#include "gpu.h" + +namespace ncnn { + +#if NCNN_VULKAN + +class VulkanDevice; +class PipelineCachePrivate; +class NCNN_EXPORT PipelineCache +{ +public: + explicit PipelineCache(const VulkanDevice* _vkdev); + + virtual ~PipelineCache(); + + void clear(); + + int get_pipeline(const uint32_t* spv_data, size_t spv_data_size, const std::vector& specializations, + uint32_t local_size_x, uint32_t local_size_y, uint32_t local_size_z, + VkShaderModule* shader_module, + VkDescriptorSetLayout* descriptorset_layout, + VkPipelineLayout* pipeline_layout, + VkPipeline* pipeline, + VkDescriptorUpdateTemplateKHR* descriptor_update_template, + ShaderInfo& shader_info) const; + + int get_pipeline(int shader_type_index, const Option& opt, const std::vector& specializations, + uint32_t local_size_x, uint32_t local_size_y, uint32_t local_size_z, + VkShaderModule* shader_module, + VkDescriptorSetLayout* descriptorset_layout, + VkPipelineLayout* pipeline_layout, + VkPipeline* pipeline, + VkDescriptorUpdateTemplateKHR* descriptor_update_template, + ShaderInfo& shader_info) const; + +protected: + int create_shader_module(int shader_type_index, const Option& opt, uint32_t local_size_x, uint32_t local_size_y, uint32_t local_size_z, + VkShaderModule* _shader_module, ShaderInfo& si) const; + + int new_pipeline(VkShaderModule shader_module, const ShaderInfo& shader_info, const std::vector& specializations, + VkDescriptorSetLayout* descriptorset_layout, + VkPipelineLayout* pipeline_layout, + VkPipeline* pipeline, + VkDescriptorUpdateTemplateKHR* descriptor_update_template) const; + +protected: + const VulkanDevice* vkdev; + +private: + PipelineCache(const PipelineCache&); + PipelineCache& operator=(const PipelineCache&); + +private: + PipelineCachePrivate* const d; +}; + +#endif // NCNN_VULKAN + +} // namespace ncnn + +#endif // NCNN_PIPELINECACHE_H diff --git a/linux/include/ncnn/platform.h b/linux/include/ncnn/platform.h new file mode 100644 index 0000000..4a3ce3d --- /dev/null +++ b/linux/include/ncnn/platform.h @@ -0,0 +1,284 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_PLATFORM_H +#define NCNN_PLATFORM_H + +#define NCNN_STDIO 1 +#define NCNN_STRING 1 +#define NCNN_SIMPLEOCV 0 +#define NCNN_SIMPLEOMP 0 +#define NCNN_SIMPLESTL 0 +#define NCNN_THREADS 1 +#define NCNN_BENCHMARK 0 +#define NCNN_C_API 1 +#define NCNN_PLATFORM_API 1 +#define NCNN_PIXEL 1 +#define NCNN_PIXEL_ROTATE 1 +#define NCNN_PIXEL_AFFINE 1 +#define NCNN_PIXEL_DRAWING 1 +#define NCNN_VULKAN 1 +#define NCNN_SYSTEM_GLSLANG 0 +#define NCNN_RUNTIME_CPU 1 +#define NCNN_GNU_INLINE_ASM 1 +#define NCNN_AVX 1 +#define NCNN_XOP 1 +#define NCNN_FMA 1 +#define NCNN_F16C 1 +#define NCNN_AVX2 1 +#define NCNN_AVXVNNI 0 +#define NCNN_AVX512 1 +#define NCNN_AVX512VNNI 1 +#define NCNN_AVX512BF16 0 +#define NCNN_AVX512FP16 0 +#define NCNN_VFPV4 0 +#define NCNN_ARM82 0 +#define NCNN_ARM82DOT 0 +#define NCNN_ARM82FP16FML 0 +#define NCNN_ARM84BF16 0 +#define NCNN_ARM84I8MM 0 +#define NCNN_ARM86SVE 0 +#define NCNN_ARM86SVE2 0 +#define NCNN_ARM86SVEBF16 0 +#define NCNN_ARM86SVEI8MM 0 +#define NCNN_ARM86SVEF32MM 0 +#define NCNN_MSA 0 +#define NCNN_LSX 0 +#define NCNN_MMI 0 +#define NCNN_RVV 0 +#define NCNN_INT8 1 +#define NCNN_BF16 1 +#define NCNN_FORCE_INLINE 1 + +#define NCNN_VERSION_STRING "1.0.20230920" + +#include "ncnn_export.h" + +#ifdef __cplusplus + +#if NCNN_THREADS +#if (defined _WIN32 && !(defined __MINGW32__)) +#define WIN32_LEAN_AND_MEAN +#include +#include +#else +#include +#endif +#endif // NCNN_THREADS + +#if __ANDROID_API__ >= 26 +#define VK_USE_PLATFORM_ANDROID_KHR +#endif // __ANDROID_API__ >= 26 + +namespace ncnn { + +#if NCNN_THREADS +#if (defined _WIN32 && !(defined __MINGW32__)) +class NCNN_EXPORT Mutex +{ +public: + Mutex() { InitializeSRWLock(&srwlock); } + ~Mutex() {} + void lock() { AcquireSRWLockExclusive(&srwlock); } + void unlock() { ReleaseSRWLockExclusive(&srwlock); } +private: + friend class ConditionVariable; + // NOTE SRWLock is available from windows vista + SRWLOCK srwlock; +}; + +class NCNN_EXPORT ConditionVariable +{ +public: + ConditionVariable() { InitializeConditionVariable(&condvar); } + ~ConditionVariable() {} + void wait(Mutex& mutex) { SleepConditionVariableSRW(&condvar, &mutex.srwlock, INFINITE, 0); } + void broadcast() { WakeAllConditionVariable(&condvar); } + void signal() { WakeConditionVariable(&condvar); } +private: + CONDITION_VARIABLE condvar; +}; + +static unsigned __stdcall start_wrapper(void* args); +class NCNN_EXPORT Thread +{ +public: + Thread(void* (*start)(void*), void* args = 0) { _start = start; _args = args; handle = (HANDLE)_beginthreadex(0, 0, start_wrapper, this, 0, 0); } + ~Thread() {} + void join() { WaitForSingleObject(handle, INFINITE); CloseHandle(handle); } +private: + friend unsigned __stdcall start_wrapper(void* args) + { + Thread* t = (Thread*)args; + t->_start(t->_args); + return 0; + } + HANDLE handle; + void* (*_start)(void*); + void* _args; +}; + +class NCNN_EXPORT ThreadLocalStorage +{ +public: + ThreadLocalStorage() { key = TlsAlloc(); } + ~ThreadLocalStorage() { TlsFree(key); } + void set(void* value) { TlsSetValue(key, (LPVOID)value); } + void* get() { return (void*)TlsGetValue(key); } +private: + DWORD key; +}; +#else // (defined _WIN32 && !(defined __MINGW32__)) +class NCNN_EXPORT Mutex +{ +public: + Mutex() { pthread_mutex_init(&mutex, 0); } + ~Mutex() { pthread_mutex_destroy(&mutex); } + void lock() { pthread_mutex_lock(&mutex); } + void unlock() { pthread_mutex_unlock(&mutex); } +private: + friend class ConditionVariable; + pthread_mutex_t mutex; +}; + +class NCNN_EXPORT ConditionVariable +{ +public: + ConditionVariable() { pthread_cond_init(&cond, 0); } + ~ConditionVariable() { pthread_cond_destroy(&cond); } + void wait(Mutex& mutex) { pthread_cond_wait(&cond, &mutex.mutex); } + void broadcast() { pthread_cond_broadcast(&cond); } + void signal() { pthread_cond_signal(&cond); } +private: + pthread_cond_t cond; +}; + +class NCNN_EXPORT Thread +{ +public: + Thread(void* (*start)(void*), void* args = 0) { pthread_create(&t, 0, start, args); } + ~Thread() {} + void join() { pthread_join(t, 0); } +private: + pthread_t t; +}; + +class NCNN_EXPORT ThreadLocalStorage +{ +public: + ThreadLocalStorage() { pthread_key_create(&key, 0); } + ~ThreadLocalStorage() { pthread_key_delete(key); } + void set(void* value) { pthread_setspecific(key, value); } + void* get() { return pthread_getspecific(key); } +private: + pthread_key_t key; +}; +#endif // (defined _WIN32 && !(defined __MINGW32__)) +#else // NCNN_THREADS +class NCNN_EXPORT Mutex +{ +public: + Mutex() {} + ~Mutex() {} + void lock() {} + void unlock() {} +}; + +class NCNN_EXPORT ConditionVariable +{ +public: + ConditionVariable() {} + ~ConditionVariable() {} + void wait(Mutex& /*mutex*/) {} + void broadcast() {} + void signal() {} +}; + +class NCNN_EXPORT Thread +{ +public: + Thread(void* (*/*start*/)(void*), void* /*args*/ = 0) {} + ~Thread() {} + void join() {} +}; + +class NCNN_EXPORT ThreadLocalStorage +{ +public: + ThreadLocalStorage() { data = 0; } + ~ThreadLocalStorage() {} + void set(void* value) { data = value; } + void* get() { return data; } +private: + void* data; +}; +#endif // NCNN_THREADS + +class NCNN_EXPORT MutexLockGuard +{ +public: + MutexLockGuard(Mutex& _mutex) : mutex(_mutex) { mutex.lock(); } + ~MutexLockGuard() { mutex.unlock(); } +private: + Mutex& mutex; +}; + +} // namespace ncnn + +#if NCNN_SIMPLESTL +#include "simplestl.h" +#else +#include +#include +#include +#include +#endif + +#endif // __cplusplus + +#if NCNN_STDIO +#if NCNN_PLATFORM_API && __ANDROID_API__ >= 8 +#include +#define NCNN_LOGE(...) do { \ + fprintf(stderr, ##__VA_ARGS__); fprintf(stderr, "\n"); \ + __android_log_print(ANDROID_LOG_WARN, "ncnn", ##__VA_ARGS__); } while(0) +#else // NCNN_PLATFORM_API && __ANDROID_API__ >= 8 +#include +#define NCNN_LOGE(...) do { \ + fprintf(stderr, ##__VA_ARGS__); fprintf(stderr, "\n"); } while(0) +#endif // NCNN_PLATFORM_API && __ANDROID_API__ >= 8 +#else +#define NCNN_LOGE(...) +#endif + + +#if NCNN_FORCE_INLINE +#ifdef _MSC_VER + #define NCNN_FORCEINLINE __forceinline +#elif defined(__GNUC__) + #define NCNN_FORCEINLINE inline __attribute__((__always_inline__)) +#elif defined(__CLANG__) + #if __has_attribute(__always_inline__) + #define NCNN_FORCEINLINE inline __attribute__((__always_inline__)) + #else + #define NCNN_FORCEINLINE inline + #endif +#else + #define NCNN_FORCEINLINE inline +#endif +#else + #define NCNN_FORCEINLINE inline +#endif + +#endif // NCNN_PLATFORM_H diff --git a/linux/include/ncnn/simpleocv.h b/linux/include/ncnn/simpleocv.h new file mode 100644 index 0000000..54b22d9 --- /dev/null +++ b/linux/include/ncnn/simpleocv.h @@ -0,0 +1,503 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_SIMPLEOCV_H +#define NCNN_SIMPLEOCV_H + +#include "platform.h" + +#if NCNN_SIMPLEOCV + +#include +#include +#include "allocator.h" +#include "mat.h" + +#if defined(_MSC_VER) || defined(__GNUC__) +#pragma push_macro("min") +#pragma push_macro("max") +#undef min +#undef max +#endif + +#ifndef NCNN_XADD +using ncnn::NCNN_XADD; +#endif + +typedef unsigned char uchar; +typedef unsigned short ushort; +typedef unsigned int uint; + +enum +{ + CV_LOAD_IMAGE_UNCHANGED = -1, + CV_LOAD_IMAGE_GRAYSCALE = 0, + CV_LOAD_IMAGE_COLOR = 1, +}; + +enum +{ + CV_IMWRITE_JPEG_QUALITY = 1 +}; + +// minimal opencv style data structure implementation +namespace cv { + +template +static inline _Tp saturate_cast(int v) +{ + return _Tp(v); +} +template<> +inline uchar saturate_cast(int v) +{ + return (uchar)((unsigned)v <= UCHAR_MAX ? v : v > 0 ? UCHAR_MAX : 0); +} + +template +struct Scalar_ +{ + Scalar_() + { + v[0] = 0; + v[1] = 0; + v[2] = 0; + v[3] = 0; + } + Scalar_(_Tp _v0) + { + v[0] = _v0; + v[1] = 0; + v[2] = 0; + v[3] = 0; + } + Scalar_(_Tp _v0, _Tp _v1, _Tp _v2) + { + v[0] = _v0; + v[1] = _v1; + v[2] = _v2; + v[3] = 0; + } + Scalar_(_Tp _v0, _Tp _v1, _Tp _v2, _Tp _v3) + { + v[0] = _v0; + v[1] = _v1; + v[2] = _v2; + v[3] = _v3; + } + + const _Tp operator[](const int i) const + { + return v[i]; + } + + _Tp operator[](const int i) + { + return v[i]; + } + + _Tp v[4]; +}; + +typedef Scalar_ Scalar; + +template +struct Point_ +{ + Point_() + : x(0), y(0) + { + } + Point_(_Tp _x, _Tp _y) + : x(_x), y(_y) + { + } + + template + operator Point_<_Tp2>() const + { + return Point_<_Tp2>(saturate_cast<_Tp2>(x), saturate_cast<_Tp2>(y)); + } + + _Tp x; + _Tp y; +}; + +typedef Point_ Point; +typedef Point_ Point2f; + +template +struct Size_ +{ + Size_() + : width(0), height(0) + { + } + Size_(_Tp _w, _Tp _h) + : width(_w), height(_h) + { + } + + template + operator Size_<_Tp2>() const + { + return Size_<_Tp2>(saturate_cast<_Tp2>(width), saturate_cast<_Tp2>(height)); + } + + _Tp width; + _Tp height; +}; + +typedef Size_ Size; +typedef Size_ Size2f; + +template +struct Rect_ +{ + Rect_() + : x(0), y(0), width(0), height(0) + { + } + Rect_(_Tp _x, _Tp _y, _Tp _w, _Tp _h) + : x(_x), y(_y), width(_w), height(_h) + { + } + Rect_(Point_<_Tp> _p, Size_<_Tp> _size) + : x(_p.x), y(_p.y), width(_size.width), height(_size.height) + { + } + + template + operator Rect_<_Tp2>() const + { + return Rect_<_Tp2>(saturate_cast<_Tp2>(x), saturate_cast<_Tp2>(y), saturate_cast<_Tp2>(width), saturate_cast<_Tp2>(height)); + } + + _Tp x; + _Tp y; + _Tp width; + _Tp height; + + // area + _Tp area() const + { + return width * height; + } +}; + +template +static inline Rect_<_Tp>& operator&=(Rect_<_Tp>& a, const Rect_<_Tp>& b) +{ + _Tp x1 = std::max(a.x, b.x), y1 = std::max(a.y, b.y); + a.width = std::min(a.x + a.width, b.x + b.width) - x1; + a.height = std::min(a.y + a.height, b.y + b.height) - y1; + a.x = x1; + a.y = y1; + if (a.width <= 0 || a.height <= 0) + a = Rect_<_Tp>(); + return a; +} + +template +static inline Rect_<_Tp>& operator|=(Rect_<_Tp>& a, const Rect_<_Tp>& b) +{ + _Tp x1 = std::min(a.x, b.x), y1 = std::min(a.y, b.y); + a.width = std::max(a.x + a.width, b.x + b.width) - x1; + a.height = std::max(a.y + a.height, b.y + b.height) - y1; + a.x = x1; + a.y = y1; + return a; +} + +template +static inline Rect_<_Tp> operator&(const Rect_<_Tp>& a, const Rect_<_Tp>& b) +{ + Rect_<_Tp> c = a; + return c &= b; +} + +template +static inline Rect_<_Tp> operator|(const Rect_<_Tp>& a, const Rect_<_Tp>& b) +{ + Rect_<_Tp> c = a; + return c |= b; +} + +typedef Rect_ Rect; +typedef Rect_ Rect2f; + +#define CV_8UC1 1 +#define CV_8UC3 3 +#define CV_8UC4 4 +#define CV_32FC1 4 + +struct NCNN_EXPORT Mat +{ + Mat() + : data(0), refcount(0), rows(0), cols(0), c(0) + { + } + + Mat(int _rows, int _cols, int flags) + : data(0), refcount(0) + { + create(_rows, _cols, flags); + } + + // copy + Mat(const Mat& m) + : data(m.data), refcount(m.refcount) + { + if (refcount) + NCNN_XADD(refcount, 1); + + rows = m.rows; + cols = m.cols; + c = m.c; + } + + Mat(int _rows, int _cols, int flags, void* _data) + : data((unsigned char*)_data), refcount(0) + { + rows = _rows; + cols = _cols; + c = flags; + } + + ~Mat() + { + release(); + } + + // assign + Mat& operator=(const Mat& m) + { + if (this == &m) + return *this; + + if (m.refcount) + NCNN_XADD(m.refcount, 1); + + release(); + + data = m.data; + refcount = m.refcount; + + rows = m.rows; + cols = m.cols; + c = m.c; + + return *this; + } + + Mat& operator=(const Scalar& s) + { + if (total() > 0) + { + uchar* p = data; + for (int i = 0; i < cols * rows; i++) + { + for (int j = 0; j < c; j++) + { + *p++ = s[j]; + } + } + } + + return *this; + } + + void create(int _rows, int _cols, int flags) + { + release(); + + rows = _rows; + cols = _cols; + c = flags; + + if (total() > 0) + { + // refcount address must be aligned, so we expand totalsize here + size_t totalsize = (total() + 3) >> 2 << 2; + data = (uchar*)ncnn::fastMalloc(totalsize + (int)sizeof(*refcount)); + refcount = (int*)(((uchar*)data) + totalsize); + *refcount = 1; + } + } + + void release() + { + if (refcount && NCNN_XADD(refcount, -1) == 1) + ncnn::fastFree(data); + + data = 0; + + rows = 0; + cols = 0; + c = 0; + + refcount = 0; + } + + Mat clone() const + { + if (empty()) + return Mat(); + + Mat m(rows, cols, c); + + if (total() > 0) + { + memcpy(m.data, data, total()); + } + + return m; + } + + bool empty() const + { + return data == 0 || total() == 0; + } + + int channels() const + { + return c; + } + + int type() const + { + return c; + } + + size_t total() const + { + return cols * rows * c; + } + + const uchar* ptr(int y) const + { + return data + y * cols * c; + } + + uchar* ptr(int y) + { + return data + y * cols * c; + } + + template + const _Tp* ptr(int y) const + { + return (const _Tp*)data + y * cols * c; + } + + template + _Tp* ptr(int y) + { + return (_Tp*)data + y * cols * c; + } + + // roi + Mat operator()(const Rect& roi) const + { + if (empty()) + return Mat(); + + Mat m(roi.height, roi.width, c); + + int sy = roi.y; + for (int y = 0; y < roi.height; y++) + { + const uchar* sptr = ptr(sy) + roi.x * c; + uchar* dptr = m.ptr(y); + memcpy(dptr, sptr, roi.width * c); + sy++; + } + + return m; + } + + uchar* data; + + // pointer to the reference counter; + // when points to user-allocated data, the pointer is NULL + int* refcount; + + int rows; + int cols; + + int c; +}; + +enum ImreadModes +{ + IMREAD_UNCHANGED = -1, + IMREAD_GRAYSCALE = 0, + IMREAD_COLOR = 1 +}; + +NCNN_EXPORT Mat imread(const std::string& path, int flags = IMREAD_COLOR); + +NCNN_EXPORT Mat imdecode(const std::vector& buf, int flags = IMREAD_COLOR); + +enum ImwriteFlags +{ + IMWRITE_JPEG_QUALITY = 1 +}; + +NCNN_EXPORT bool imwrite(const std::string& path, const Mat& m, const std::vector& params = std::vector()); + +NCNN_EXPORT void imshow(const std::string& name, const Mat& m); + +NCNN_EXPORT int waitKey(int delay = 0); + +#if NCNN_PIXEL +NCNN_EXPORT void resize(const Mat& src, Mat& dst, const Size& size, float sw = 0.f, float sh = 0.f, int flags = 0); +#endif // NCNN_PIXEL + +#if NCNN_PIXEL_DRAWING + +enum +{ + FILLED = -1 +}; + +NCNN_EXPORT void rectangle(Mat& img, Point pt1, Point pt2, const Scalar& color, int thickness = 1); + +NCNN_EXPORT void rectangle(Mat& img, Rect rec, const Scalar& color, int thickness = 1); + +NCNN_EXPORT void circle(Mat& img, Point center, int radius, const Scalar& color, int thickness = 1); + +NCNN_EXPORT void line(Mat& img, Point p0, Point p1, const Scalar& color, int thickness = 1); + +enum +{ + FONT_HERSHEY_SIMPLEX = 0 +}; + +NCNN_EXPORT void putText(Mat& img, const std::string& text, Point org, int fontFace, double fontScale, Scalar color, int thickness = 1); + +NCNN_EXPORT Size getTextSize(const std::string& text, int fontFace, double fontScale, int thickness, int* baseLine); + +#endif // NCNN_PIXEL_DRAWING + +} // namespace cv + +#if defined(_MSC_VER) || defined(__GNUC__) +#pragma pop_macro("min") +#pragma pop_macro("max") +#endif + +#endif // NCNN_SIMPLEOCV + +#endif // NCNN_SIMPLEOCV_H diff --git a/linux/include/ncnn/simpleomp.h b/linux/include/ncnn/simpleomp.h new file mode 100644 index 0000000..13e2452 --- /dev/null +++ b/linux/include/ncnn/simpleomp.h @@ -0,0 +1,53 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_SIMPLEOMP_H +#define NCNN_SIMPLEOMP_H + +#include "platform.h" + +#if NCNN_SIMPLEOMP + +#include + +// This minimal openmp runtime implementation only supports the llvm openmp abi +// and only supports #pragma omp parallel for num_threads(X) + +#ifdef __cplusplus +extern "C" { +#endif + +NCNN_EXPORT int omp_get_max_threads(); + +NCNN_EXPORT void omp_set_num_threads(int num_threads); + +NCNN_EXPORT int omp_get_dynamic(); + +NCNN_EXPORT void omp_set_dynamic(int dynamic); + +NCNN_EXPORT int omp_get_num_threads(); + +NCNN_EXPORT int omp_get_thread_num(); + +NCNN_EXPORT int kmp_get_blocktime(); + +NCNN_EXPORT void kmp_set_blocktime(int blocktime); + +#ifdef __cplusplus +} +#endif + +#endif // NCNN_SIMPLEOMP + +#endif // NCNN_SIMPLEOMP_H diff --git a/linux/include/ncnn/simplestl.h b/linux/include/ncnn/simplestl.h new file mode 100644 index 0000000..00ff468 --- /dev/null +++ b/linux/include/ncnn/simplestl.h @@ -0,0 +1,565 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_SIMPLESTL_H +#define NCNN_SIMPLESTL_H + +#include +#include +#include + +#if !NCNN_SIMPLESTL + +#include + +#else + +// allocation functions +NCNN_EXPORT void* operator new(size_t size); +NCNN_EXPORT void* operator new[](size_t size); +// placement allocation functions +NCNN_EXPORT void* operator new(size_t size, void* ptr); +NCNN_EXPORT void* operator new[](size_t size, void* ptr); +// deallocation functions +NCNN_EXPORT void operator delete(void* ptr); +NCNN_EXPORT void operator delete[](void* ptr); +// deallocation functions since c++14 +#if __cplusplus >= 201402L +NCNN_EXPORT void operator delete(void* ptr, size_t sz); +NCNN_EXPORT void operator delete[](void* ptr, size_t sz); +#endif +// placement deallocation functions +NCNN_EXPORT void operator delete(void* ptr, void* voidptr2); +NCNN_EXPORT void operator delete[](void* ptr, void* voidptr2); + +#endif + +// minimal stl data structure implementation +namespace std { + +template +const T& max(const T& a, const T& b) +{ + return (a < b) ? b : a; +} + +template +const T& min(const T& a, const T& b) +{ + return (a > b) ? b : a; +} + +template +void swap(T& a, T& b) +{ + T temp(a); + a = b; + b = temp; +} + +template +struct pair +{ + pair() + : first(), second() + { + } + pair(const T1& t1, const T2& t2) + : first(t1), second(t2) + { + } + + T1 first; + T2 second; +}; + +template +bool operator==(const pair& x, const pair& y) +{ + return (x.first == y.first && x.second == y.second); +} +template +bool operator<(const pair& x, const pair& y) +{ + return x.first < y.first || (!(y.first < x.first) && x.second < y.second); +} +template +bool operator!=(const pair& x, const pair& y) +{ + return !(x == y); +} +template +bool operator>(const pair& x, const pair& y) +{ + return y < x; +} +template +bool operator<=(const pair& x, const pair& y) +{ + return !(y < x); +} +template +bool operator>=(const pair& x, const pair& y) +{ + return !(x < y); +} + +template +pair make_pair(const T1& t1, const T2& t2) +{ + return pair(t1, t2); +} + +template +struct node +{ + node* prev_; + node* next_; + T data_; + + node() + : prev_(0), next_(0), data_() + { + } + node(const T& t) + : prev_(0), next_(0), data_(t) + { + } +}; + +template +struct iter_list +{ + iter_list() + : curr_(0) + { + } + iter_list(node* n) + : curr_(n) + { + } + iter_list(const iter_list& i) + : curr_(i.curr_) + { + } + ~iter_list() + { + } + + iter_list& operator=(const iter_list& i) + { + curr_ = i.curr_; + return *this; + } + + T& operator*() + { + return curr_->data_; + } + T* operator->() + { + return &(curr_->data_); + } + + bool operator==(const iter_list& i) + { + return curr_ == i.curr_; + } + bool operator!=(const iter_list& i) + { + return curr_ != i.curr_; + } + + iter_list& operator++() + { + curr_ = curr_->next_; + return *this; + } + iter_list& operator--() + { + curr_ = curr_->prev_; + return *this; + } + + node* curr_; +}; + +template +struct list +{ + typedef iter_list iterator; + + list() + { + head_ = new node(); + tail_ = head_; + count_ = 0; + } + ~list() + { + clear(); + delete head_; + } + list(const list& l) + { + head_ = new node(); + tail_ = head_; + count_ = 0; + + for (iter_list i = l.begin(); i != l.end(); ++i) + { + push_back(*i); + } + } + + list& operator=(const list& l) + { + if (this == &l) + { + return *this; + } + clear(); + + for (iter_list i = l.begin(); i != l.end(); ++i) + { + push_back(*i); + } + return *this; + } + + void clear() + { + while (count_ > 0) + { + pop_front(); + } + } + + void pop_front() + { + if (count_ > 0) + { + head_ = head_->next_; + delete head_->prev_; + head_->prev_ = 0; + --count_; + } + } + + size_t size() const + { + return count_; + } + iter_list begin() const + { + return iter_list(head_); + } + iter_list end() const + { + return iter_list(tail_); + } + bool empty() const + { + return count_ == 0; + } + + void push_back(const T& t) + { + if (count_ == 0) + { + head_ = new node(t); + head_->prev_ = 0; + head_->next_ = tail_; + tail_->prev_ = head_; + count_ = 1; + } + else + { + node* temp = new node(t); + temp->prev_ = tail_->prev_; + temp->next_ = tail_; + tail_->prev_->next_ = temp; + tail_->prev_ = temp; + ++count_; + } + } + + iter_list erase(iter_list pos) + { + if (pos != end()) + { + node* temp = pos.curr_; + if (temp == head_) + { + ++pos; + temp->next_->prev_ = 0; + head_ = temp->next_; + } + else + { + --pos; + temp->next_->prev_ = temp->prev_; + temp->prev_->next_ = temp->next_; + ++pos; + } + delete temp; + --count_; + } + return pos; + } + +protected: + node* head_; + node* tail_; + size_t count_; +}; + +template +struct greater +{ + bool operator()(const T& x, const T& y) const + { + return (x > y); + } +}; + +template +struct less +{ + bool operator()(const T& x, const T& y) const + { + return (x < y); + } +}; + +template +void partial_sort(RandomAccessIter first, RandomAccessIter middle, RandomAccessIter last, Compare comp) +{ + // [TODO] heap sort should be used here, but we simply use bubble sort now + for (RandomAccessIter i = first; i < middle; ++i) + { + // bubble sort + for (RandomAccessIter j = last - 1; j > first; --j) + { + if (comp(*j, *(j - 1))) + { + swap(*j, *(j - 1)); + } + } + } +} + +template +struct vector +{ + vector() + : data_(0), size_(0), capacity_(0) + { + } + vector(const size_t new_size, const T& value = T()) + : data_(0), size_(0), capacity_(0) + { + resize(new_size, value); + } + ~vector() + { + clear(); + } + vector(const vector& v) + : data_(0), size_(0), capacity_(0) + { + resize(v.size()); + for (size_t i = 0; i < size_; i++) + { + data_[i] = v.data_[i]; + } + } + + vector& operator=(const vector& v) + { + if (this == &v) + { + return *this; + } + resize(0); + resize(v.size()); + for (size_t i = 0; i < size_; i++) + { + data_[i] = v.data_[i]; + } + return *this; + } + + void resize(const size_t new_size, const T& value = T()) + { + try_alloc(new_size); + if (new_size > size_) + { + for (size_t i = size_; i < new_size; i++) + { + new (&data_[i]) T(value); + } + } + else if (new_size < size_) + { + for (size_t i = new_size; i < size_; i++) + { + data_[i].~T(); + } + } + size_ = new_size; + } + + void clear() + { + for (size_t i = 0; i < size_; i++) + { + data_[i].~T(); + } + delete[](char*) data_; + data_ = 0; + size_ = 0; + capacity_ = 0; + } + + T* data() const + { + return data_; + } + size_t size() const + { + return size_; + } + T& operator[](size_t i) const + { + return data_[i]; + } + T* begin() const + { + return &data_[0]; + } + T* end() const + { + return &data_[size_]; + } + bool empty() const + { + return size_ == 0; + } + + void push_back(const T& t) + { + try_alloc(size_ + 1); + new (&data_[size_]) T(t); + size_++; + } + + void insert(T* pos, T* b, T* e) + { + vector* v = 0; + if (b >= begin() && b < end()) + { + //the same vector + v = new vector(*this); + b = v->begin() + (b - begin()); + e = v->begin() + (e - begin()); + } + size_t diff = pos - begin(); + try_alloc(size_ + (e - b)); + pos = begin() + diff; + memmove(pos + (e - b), pos, (end() - pos) * sizeof(T)); + size_t len = e - b; + size_ += len; + for (size_t i = 0; i < len; i++) + { + *pos = *b; + pos++; + b++; + } + delete v; + } + + T* erase(T* pos) + { + pos->~T(); + memmove(pos, pos + 1, (end() - pos - 1) * sizeof(T)); + size_--; + return pos; + } + +protected: + T* data_; + size_t size_; + size_t capacity_; + void try_alloc(size_t new_size) + { + if (new_size * 3 / 2 > capacity_ / 2) + { + capacity_ = new_size * 2; + T* new_data = (T*)new char[capacity_ * sizeof(T)]; + memset(static_cast(new_data), 0, capacity_ * sizeof(T)); + if (data_) + { + memmove(new_data, data_, sizeof(T) * size_); + delete[](char*) data_; + } + data_ = new_data; + } + } +}; + +struct NCNN_EXPORT string : public vector +{ + string() + { + } + string(const char* str) + { + size_t len = strlen(str); + resize(len); + memcpy(data_, str, len); + } + const char* c_str() const + { + return (const char*)data_; + } + bool operator==(const string& str2) const + { + return strcmp(data_, str2.data_) == 0; + } + bool operator==(const char* str2) const + { + return strcmp(data_, str2) == 0; + } + bool operator!=(const char* str2) const + { + return strcmp(data_, str2) != 0; + } + string& operator+=(const string& str1) + { + insert(end(), str1.begin(), str1.end()); + return *this; + } +}; + +inline string operator+(const string& str1, const string& str2) +{ + string str(str1); + str.insert(str.end(), str2.begin(), str2.end()); + return str; +} + +} // namespace std + +#endif // NCNN_SIMPLESTL_H diff --git a/linux/include/ncnn/vulkan_header_fix.h b/linux/include/ncnn/vulkan_header_fix.h new file mode 100644 index 0000000..0a5ea9b --- /dev/null +++ b/linux/include/ncnn/vulkan_header_fix.h @@ -0,0 +1,449 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_VULKAN_HEADER_FIX_H +#define NCNN_VULKAN_HEADER_FIX_H + +#include + +// This header contains new structure and function declearation to fix build with old vulkan sdk + +#if VK_HEADER_VERSION < 70 +#define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES (VkStructureType)1000094000 +typedef enum VkSubgroupFeatureFlagBits +{ + VK_SUBGROUP_FEATURE_BASIC_BIT = 0x00000001, + VK_SUBGROUP_FEATURE_VOTE_BIT = 0x00000002, + VK_SUBGROUP_FEATURE_ARITHMETIC_BIT = 0x00000004, + VK_SUBGROUP_FEATURE_BALLOT_BIT = 0x00000008, + VK_SUBGROUP_FEATURE_SHUFFLE_BIT = 0x00000010, + VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT = 0x00000020, + VK_SUBGROUP_FEATURE_CLUSTERED_BIT = 0x00000040, + VK_SUBGROUP_FEATURE_QUAD_BIT = 0x00000080, + VK_SUBGROUP_FEATURE_PARTITIONED_BIT_NV = 0x00000100, + VK_SUBGROUP_FEATURE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VkSubgroupFeatureFlagBits; +typedef VkFlags VkSubgroupFeatureFlags; +typedef struct VkPhysicalDeviceSubgroupProperties +{ + VkStructureType sType; + void* pNext; + uint32_t subgroupSize; + VkShaderStageFlags supportedStages; + VkSubgroupFeatureFlags supportedOperations; + VkBool32 quadOperationsInAllStages; +} VkPhysicalDeviceSubgroupProperties; +#define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_3_PROPERTIES (VkStructureType)1000168000 +#define VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_SUPPORT (VkStructureType)1000168001 +typedef struct VkPhysicalDeviceMaintenance3Properties +{ + VkStructureType sType; + void* pNext; + uint32_t maxPerSetDescriptors; + VkDeviceSize maxMemoryAllocationSize; +} VkPhysicalDeviceMaintenance3Properties; +typedef struct VkDescriptorSetLayoutSupport +{ + VkStructureType sType; + void* pNext; + VkBool32 supported; +} VkDescriptorSetLayoutSupport; +typedef VkPhysicalDeviceMaintenance3Properties VkPhysicalDeviceMaintenance3PropertiesKHR; +typedef VkDescriptorSetLayoutSupport VkDescriptorSetLayoutSupportKHR; +typedef void(VKAPI_PTR* PFN_vkGetDescriptorSetLayoutSupportKHR)(VkDevice device, const VkDescriptorSetLayoutCreateInfo* pCreateInfo, VkDescriptorSetLayoutSupport* pSupport); +#endif // VK_HEADER_VERSION < 70 + +#if VK_HEADER_VERSION < 80 +#define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES_KHR (VkStructureType)1000177000 +typedef struct VkPhysicalDevice8BitStorageFeaturesKHR +{ + VkStructureType sType; + void* pNext; + VkBool32 storageBuffer8BitAccess; + VkBool32 uniformAndStorageBuffer8BitAccess; + VkBool32 storagePushConstant8; +} VkPhysicalDevice8BitStorageFeaturesKHR; +#define VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2_KHR (VkStructureType)1000109000 +#define VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2_KHR (VkStructureType)1000109001 +#define VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2_KHR (VkStructureType)1000109002 +#define VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2_KHR (VkStructureType)1000109003 +#define VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2_KHR (VkStructureType)1000109004 +#define VK_STRUCTURE_TYPE_SUBPASS_BEGIN_INFO_KHR (VkStructureType)1000109005 +#define VK_STRUCTURE_TYPE_SUBPASS_END_INFO_KHR (VkStructureType)1000109006 +typedef struct VkAttachmentDescription2KHR +{ + VkStructureType sType; + const void* pNext; + VkAttachmentDescriptionFlags flags; + VkFormat format; + VkSampleCountFlagBits samples; + VkAttachmentLoadOp loadOp; + VkAttachmentStoreOp storeOp; + VkAttachmentLoadOp stencilLoadOp; + VkAttachmentStoreOp stencilStoreOp; + VkImageLayout initialLayout; + VkImageLayout finalLayout; +} VkAttachmentDescription2KHR; +typedef struct VkAttachmentReference2KHR +{ + VkStructureType sType; + const void* pNext; + uint32_t attachment; + VkImageLayout layout; + VkImageAspectFlags aspectMask; +} VkAttachmentReference2KHR; +typedef struct VkSubpassDescription2KHR +{ + VkStructureType sType; + const void* pNext; + VkSubpassDescriptionFlags flags; + VkPipelineBindPoint pipelineBindPoint; + uint32_t viewMask; + uint32_t inputAttachmentCount; + const VkAttachmentReference2KHR* pInputAttachments; + uint32_t colorAttachmentCount; + const VkAttachmentReference2KHR* pColorAttachments; + const VkAttachmentReference2KHR* pResolveAttachments; + const VkAttachmentReference2KHR* pDepthStencilAttachment; + uint32_t preserveAttachmentCount; + const uint32_t* pPreserveAttachments; +} VkSubpassDescription2KHR; +typedef struct VkSubpassDependency2KHR +{ + VkStructureType sType; + const void* pNext; + uint32_t srcSubpass; + uint32_t dstSubpass; + VkPipelineStageFlags srcStageMask; + VkPipelineStageFlags dstStageMask; + VkAccessFlags srcAccessMask; + VkAccessFlags dstAccessMask; + VkDependencyFlags dependencyFlags; + int32_t viewOffset; +} VkSubpassDependency2KHR; +typedef struct VkRenderPassCreateInfo2KHR +{ + VkStructureType sType; + const void* pNext; + VkRenderPassCreateFlags flags; + uint32_t attachmentCount; + const VkAttachmentDescription2KHR* pAttachments; + uint32_t subpassCount; + const VkSubpassDescription2KHR* pSubpasses; + uint32_t dependencyCount; + const VkSubpassDependency2KHR* pDependencies; + uint32_t correlatedViewMaskCount; + const uint32_t* pCorrelatedViewMasks; +} VkRenderPassCreateInfo2KHR; +typedef struct VkSubpassBeginInfoKHR +{ + VkStructureType sType; + const void* pNext; + VkSubpassContents contents; +} VkSubpassBeginInfoKHR; + +typedef struct VkSubpassEndInfoKHR +{ + VkStructureType sType; + const void* pNext; +} VkSubpassEndInfoKHR; +typedef VkResult(VKAPI_PTR* PFN_vkCreateRenderPass2KHR)(VkDevice device, const VkRenderPassCreateInfo2KHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkRenderPass* pRenderPass); +typedef void(VKAPI_PTR* PFN_vkCmdBeginRenderPass2KHR)(VkCommandBuffer commandBuffer, const VkRenderPassBeginInfo* pRenderPassBegin, const VkSubpassBeginInfoKHR* pSubpassBeginInfo); +typedef void(VKAPI_PTR* PFN_vkCmdNextSubpass2KHR)(VkCommandBuffer commandBuffer, const VkSubpassBeginInfoKHR* pSubpassBeginInfo, const VkSubpassEndInfoKHR* pSubpassEndInfo); +typedef void(VKAPI_PTR* PFN_vkCmdEndRenderPass2KHR)(VkCommandBuffer commandBuffer, const VkSubpassEndInfoKHR* pSubpassEndInfo); +#endif // VK_HEADER_VERSION < 80 + +#if VK_HEADER_VERSION < 95 +#define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT16_INT8_FEATURES_KHR (VkStructureType)1000082000 +typedef struct VkPhysicalDeviceFloat16Int8FeaturesKHR +{ + VkStructureType sType; + void* pNext; + VkBool32 shaderFloat16; + VkBool32 shaderInt8; +} VkPhysicalDeviceFloat16Int8FeaturesKHR; +#endif // VK_HEADER_VERSION < 95 + +#if VK_HEADER_VERSION < 97 +#define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT (VkStructureType)1000237000 +#define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PRIORITY_FEATURES_EXT (VkStructureType)1000238000 +#define VK_STRUCTURE_TYPE_MEMORY_PRIORITY_ALLOCATE_INFO_EXT (VkStructureType)1000238001 +#define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_ADDRESS_FEATURES_EXT (VkStructureType)1000244000 +#define VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO_EXT (VkStructureType)1000244001 +#define VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_CREATE_INFO_EXT (VkStructureType)1000244002 +#define VK_STRUCTURE_TYPE_VALIDATION_FEATURES_EXT (VkStructureType)1000247000 +#define VK_BUFFER_CREATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT_EXT (VkBufferCreateFlagBits)0x00020000 +#define VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_EXT (VkBufferUsageFlagBits)0x00020000 +typedef uint64_t VkDeviceAddress; +typedef struct VkPhysicalDeviceMemoryBudgetPropertiesEXT +{ + VkStructureType sType; + void* pNext; + VkDeviceSize heapBudget[VK_MAX_MEMORY_HEAPS]; + VkDeviceSize heapUsage[VK_MAX_MEMORY_HEAPS]; +} VkPhysicalDeviceMemoryBudgetPropertiesEXT; +typedef struct VkPhysicalDeviceMemoryPriorityFeaturesEXT +{ + VkStructureType sType; + void* pNext; + VkBool32 memoryPriority; +} VkPhysicalDeviceMemoryPriorityFeaturesEXT; +typedef struct VkMemoryPriorityAllocateInfoEXT +{ + VkStructureType sType; + const void* pNext; + float priority; +} VkMemoryPriorityAllocateInfoEXT; +typedef struct VkPhysicalDeviceBufferAddressFeaturesEXT +{ + VkStructureType sType; + void* pNext; + VkBool32 bufferDeviceAddress; + VkBool32 bufferDeviceAddressCaptureReplay; + VkBool32 bufferDeviceAddressMultiDevice; +} VkPhysicalDeviceBufferAddressFeaturesEXT; +typedef struct VkBufferDeviceAddressInfoEXT +{ + VkStructureType sType; + const void* pNext; + VkBuffer buffer; +} VkBufferDeviceAddressInfoEXT; +typedef struct VkBufferDeviceAddressCreateInfoEXT +{ + VkStructureType sType; + const void* pNext; + VkDeviceSize deviceAddress; +} VkBufferDeviceAddressCreateInfoEXT; +typedef VkDeviceAddress(VKAPI_PTR* PFN_vkGetBufferDeviceAddressEXT)(VkDevice device, const VkBufferDeviceAddressInfoEXT* pInfo); +typedef enum VkValidationFeatureEnableEXT +{ + VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_EXT = 0, + VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_RESERVE_BINDING_SLOT_EXT = 1, + VK_VALIDATION_FEATURE_ENABLE_BEGIN_RANGE_EXT = VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_EXT, + VK_VALIDATION_FEATURE_ENABLE_END_RANGE_EXT = VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_RESERVE_BINDING_SLOT_EXT, + VK_VALIDATION_FEATURE_ENABLE_RANGE_SIZE_EXT = (VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_RESERVE_BINDING_SLOT_EXT - VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_EXT + 1), + VK_VALIDATION_FEATURE_ENABLE_MAX_ENUM_EXT = 0x7FFFFFFF +} VkValidationFeatureEnableEXT; +typedef enum VkValidationFeatureDisableEXT +{ + VK_VALIDATION_FEATURE_DISABLE_ALL_EXT = 0, + VK_VALIDATION_FEATURE_DISABLE_SHADERS_EXT = 1, + VK_VALIDATION_FEATURE_DISABLE_THREAD_SAFETY_EXT = 2, + VK_VALIDATION_FEATURE_DISABLE_API_PARAMETERS_EXT = 3, + VK_VALIDATION_FEATURE_DISABLE_OBJECT_LIFETIMES_EXT = 4, + VK_VALIDATION_FEATURE_DISABLE_CORE_CHECKS_EXT = 5, + VK_VALIDATION_FEATURE_DISABLE_UNIQUE_HANDLES_EXT = 6, + VK_VALIDATION_FEATURE_DISABLE_BEGIN_RANGE_EXT = VK_VALIDATION_FEATURE_DISABLE_ALL_EXT, + VK_VALIDATION_FEATURE_DISABLE_END_RANGE_EXT = VK_VALIDATION_FEATURE_DISABLE_UNIQUE_HANDLES_EXT, + VK_VALIDATION_FEATURE_DISABLE_RANGE_SIZE_EXT = (VK_VALIDATION_FEATURE_DISABLE_UNIQUE_HANDLES_EXT - VK_VALIDATION_FEATURE_DISABLE_ALL_EXT + 1), + VK_VALIDATION_FEATURE_DISABLE_MAX_ENUM_EXT = 0x7FFFFFFF +} VkValidationFeatureDisableEXT; +typedef struct VkValidationFeaturesEXT +{ + VkStructureType sType; + const void* pNext; + uint32_t enabledValidationFeatureCount; + const VkValidationFeatureEnableEXT* pEnabledValidationFeatures; + uint32_t disabledValidationFeatureCount; + const VkValidationFeatureDisableEXT* pDisabledValidationFeatures; +} VkValidationFeaturesEXT; +#endif // VK_HEADER_VERSION < 97 + +#if VK_HEADER_VERSION < 101 +#define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COOPERATIVE_MATRIX_FEATURES_NV (VkStructureType)1000249000 +#define VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_NV (VkStructureType)1000249001 +#define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COOPERATIVE_MATRIX_PROPERTIES_NV (VkStructureType)1000249002 +typedef enum VkComponentTypeNV +{ + VK_COMPONENT_TYPE_FLOAT16_NV = 0, + VK_COMPONENT_TYPE_FLOAT32_NV = 1, + VK_COMPONENT_TYPE_FLOAT64_NV = 2, + VK_COMPONENT_TYPE_SINT8_NV = 3, + VK_COMPONENT_TYPE_SINT16_NV = 4, + VK_COMPONENT_TYPE_SINT32_NV = 5, + VK_COMPONENT_TYPE_SINT64_NV = 6, + VK_COMPONENT_TYPE_UINT8_NV = 7, + VK_COMPONENT_TYPE_UINT16_NV = 8, + VK_COMPONENT_TYPE_UINT32_NV = 9, + VK_COMPONENT_TYPE_UINT64_NV = 10, + VK_COMPONENT_TYPE_BEGIN_RANGE_NV = VK_COMPONENT_TYPE_FLOAT16_NV, + VK_COMPONENT_TYPE_END_RANGE_NV = VK_COMPONENT_TYPE_UINT64_NV, + VK_COMPONENT_TYPE_RANGE_SIZE_NV = (VK_COMPONENT_TYPE_UINT64_NV - VK_COMPONENT_TYPE_FLOAT16_NV + 1), + VK_COMPONENT_TYPE_MAX_ENUM_NV = 0x7FFFFFFF +} VkComponentTypeNV; +typedef enum VkScopeNV +{ + VK_SCOPE_DEVICE_NV = 1, + VK_SCOPE_WORKGROUP_NV = 2, + VK_SCOPE_SUBGROUP_NV = 3, + VK_SCOPE_QUEUE_FAMILY_NV = 5, + VK_SCOPE_BEGIN_RANGE_NV = VK_SCOPE_DEVICE_NV, + VK_SCOPE_END_RANGE_NV = VK_SCOPE_QUEUE_FAMILY_NV, + VK_SCOPE_RANGE_SIZE_NV = (VK_SCOPE_QUEUE_FAMILY_NV - VK_SCOPE_DEVICE_NV + 1), + VK_SCOPE_MAX_ENUM_NV = 0x7FFFFFFF +} VkScopeNV; +typedef struct VkCooperativeMatrixPropertiesNV +{ + VkStructureType sType; + void* pNext; + uint32_t MSize; + uint32_t NSize; + uint32_t KSize; + VkComponentTypeNV AType; + VkComponentTypeNV BType; + VkComponentTypeNV CType; + VkComponentTypeNV DType; + VkScopeNV scope; +} VkCooperativeMatrixPropertiesNV; +typedef struct VkPhysicalDeviceCooperativeMatrixFeaturesNV +{ + VkStructureType sType; + void* pNext; + VkBool32 cooperativeMatrix; + VkBool32 cooperativeMatrixRobustBufferAccess; +} VkPhysicalDeviceCooperativeMatrixFeaturesNV; +typedef struct VkPhysicalDeviceCooperativeMatrixPropertiesNV +{ + VkStructureType sType; + void* pNext; + VkShaderStageFlags cooperativeMatrixSupportedStages; +} VkPhysicalDeviceCooperativeMatrixPropertiesNV; +typedef VkResult(VKAPI_PTR* PFN_vkGetPhysicalDeviceCooperativeMatrixPropertiesNV)(VkPhysicalDevice physicalDevice, uint32_t* pPropertyCount, VkCooperativeMatrixPropertiesNV* pProperties); +#endif // VK_HEADER_VERSION < 101 + +#if VK_HEADER_VERSION < 121 +#define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COHERENT_MEMORY_FEATURES_AMD (VkStructureType)1000229000 +#define VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD (VkMemoryPropertyFlagBits)0x00000040 +#define VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD (VkMemoryPropertyFlagBits)0x00000040 +typedef struct VkPhysicalDeviceCoherentMemoryFeaturesAMD +{ + VkStructureType sType; + void* pNext; + VkBool32 deviceCoherentMemory; +} VkPhysicalDeviceCoherentMemoryFeaturesAMD; +#endif // VK_HEADER_VERSION < 121 + +#if VK_HEADER_VERSION < 129 +#define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES_KHR (VkStructureType)1000257000 +#define VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO_KHR (VkStructureType)1000244001 +#define VK_STRUCTURE_TYPE_BUFFER_OPAQUE_CAPTURE_ADDRESS_CREATE_INFO_KHR (VkStructureType)1000257002 +#define VK_STRUCTURE_TYPE_MEMORY_OPAQUE_CAPTURE_ADDRESS_ALLOCATE_INFO_KHR (VkStructureType)1000257003 +#define VK_STRUCTURE_TYPE_DEVICE_MEMORY_OPAQUE_CAPTURE_ADDRESS_INFO_KHR (VkStructureType)1000257004 +#define VK_BUFFER_CREATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT_KHR (VkBufferCreateFlagBits)0x00020000 +#define VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_KHR (VkBufferUsageFlagBits)0x00020000 +#define VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT_KHR (VkMemoryAllocateFlagBits)0x00000002 +#define VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT_KHR (VkMemoryAllocateFlagBits)0x00000004 +typedef struct VkPhysicalDeviceBufferDeviceAddressFeaturesKHR +{ + VkStructureType sType; + void* pNext; + VkBool32 bufferDeviceAddress; + VkBool32 bufferDeviceAddressCaptureReplay; + VkBool32 bufferDeviceAddressMultiDevice; +} VkPhysicalDeviceBufferDeviceAddressFeaturesKHR; +typedef struct VkBufferDeviceAddressInfoKHR +{ + VkStructureType sType; + const void* pNext; + VkBuffer buffer; +} VkBufferDeviceAddressInfoKHR; +typedef struct VkBufferOpaqueCaptureAddressCreateInfoKHR +{ + VkStructureType sType; + const void* pNext; + uint64_t opaqueCaptureAddress; +} VkBufferOpaqueCaptureAddressCreateInfoKHR; +typedef struct VkMemoryOpaqueCaptureAddressAllocateInfoKHR +{ + VkStructureType sType; + const void* pNext; + uint64_t opaqueCaptureAddress; +} VkMemoryOpaqueCaptureAddressAllocateInfoKHR; +typedef struct VkDeviceMemoryOpaqueCaptureAddressInfoKHR +{ + VkStructureType sType; + const void* pNext; + VkDeviceMemory memory; +} VkDeviceMemoryOpaqueCaptureAddressInfoKHR; +typedef VkDeviceAddress(VKAPI_PTR* PFN_vkGetBufferDeviceAddressKHR)(VkDevice device, const VkBufferDeviceAddressInfoKHR* pInfo); +typedef uint64_t(VKAPI_PTR* PFN_vkGetBufferOpaqueCaptureAddressKHR)(VkDevice device, const VkBufferDeviceAddressInfoKHR* pInfo); +typedef uint64_t(VKAPI_PTR* PFN_vkGetDeviceMemoryOpaqueCaptureAddressKHR)(VkDevice device, const VkDeviceMemoryOpaqueCaptureAddressInfoKHR* pInfo); +#endif // VK_HEADER_VERSION < 129 + +#if VK_HEADER_VERSION < 208 +typedef enum VkInstanceCreateFlagBits +{ + VK_INSTANCE_CREATE_ENUMERATE_PORTABILITY_BIT_KHR = 0x00000001, + VK_INSTANCE_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VkInstanceCreateFlagBits; +#endif // VK_HEADER_VERSION < 208 + +#if VK_HEADER_VERSION < 255 +#define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COOPERATIVE_MATRIX_FEATURES_KHR (VkStructureType)1000506000 +#define VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_KHR (VkStructureType)1000506001 +#define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COOPERATIVE_MATRIX_PROPERTIES_KHR (VkStructureType)1000506002 +typedef enum VkComponentTypeKHR +{ + VK_COMPONENT_TYPE_FLOAT16_KHR = 0, + VK_COMPONENT_TYPE_FLOAT32_KHR = 1, + VK_COMPONENT_TYPE_FLOAT64_KHR = 2, + VK_COMPONENT_TYPE_SINT8_KHR = 3, + VK_COMPONENT_TYPE_SINT16_KHR = 4, + VK_COMPONENT_TYPE_SINT32_KHR = 5, + VK_COMPONENT_TYPE_SINT64_KHR = 6, + VK_COMPONENT_TYPE_UINT8_KHR = 7, + VK_COMPONENT_TYPE_UINT16_KHR = 8, + VK_COMPONENT_TYPE_UINT32_KHR = 9, + VK_COMPONENT_TYPE_UINT64_KHR = 10, + VK_COMPONENT_TYPE_MAX_ENUM_KHR = 0x7FFFFFFF +} VkComponentTypeKHR; +typedef enum VkScopeKHR +{ + VK_SCOPE_DEVICE_KHR = 1, + VK_SCOPE_WORKGROUP_KHR = 2, + VK_SCOPE_SUBGROUP_KHR = 3, + VK_SCOPE_QUEUE_FAMILY_KHR = 5, + VK_SCOPE_MAX_ENUM_KHR = 0x7FFFFFFF +} VkScopeKHR; +typedef struct VkCooperativeMatrixPropertiesKHR +{ + VkStructureType sType; + void* pNext; + uint32_t MSize; + uint32_t NSize; + uint32_t KSize; + VkComponentTypeKHR AType; + VkComponentTypeKHR BType; + VkComponentTypeKHR CType; + VkComponentTypeKHR ResultType; + VkBool32 saturatingAccumulation; + VkScopeKHR scope; +} VkCooperativeMatrixPropertiesKHR; +typedef struct VkPhysicalDeviceCooperativeMatrixFeaturesKHR +{ + VkStructureType sType; + void* pNext; + VkBool32 cooperativeMatrix; + VkBool32 cooperativeMatrixRobustBufferAccess; +} VkPhysicalDeviceCooperativeMatrixFeaturesKHR; +typedef struct VkPhysicalDeviceCooperativeMatrixPropertiesKHR +{ + VkStructureType sType; + void* pNext; + VkShaderStageFlags cooperativeMatrixSupportedStages; +} VkPhysicalDeviceCooperativeMatrixPropertiesKHR; +typedef VkResult(VKAPI_PTR* PFN_vkGetPhysicalDeviceCooperativeMatrixPropertiesKHR)(VkPhysicalDevice physicalDevice, uint32_t* pPropertyCount, VkCooperativeMatrixPropertiesKHR* pProperties); +#endif // VK_HEADER_VERSION < 255 + +#endif // NCNN_VULKAN_HEADER_FIX_H diff --git a/linux/lib/cmake/ncnn/ncnn-release.cmake b/linux/lib/cmake/ncnn/ncnn-release.cmake new file mode 100644 index 0000000..fe14fff --- /dev/null +++ b/linux/lib/cmake/ncnn/ncnn-release.cmake @@ -0,0 +1,19 @@ +#---------------------------------------------------------------- +# Generated CMake target import file for configuration "Release". +#---------------------------------------------------------------- + +# Commands may need to know the format version. +set(CMAKE_IMPORT_FILE_VERSION 1) + +# Import target "ncnn" for configuration "Release" +set_property(TARGET ncnn APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE) +set_target_properties(ncnn PROPERTIES + IMPORTED_LOCATION_RELEASE "${_IMPORT_PREFIX}/lib/libncnn.so.1.0.20230920" + IMPORTED_SONAME_RELEASE "libncnn.so.1" + ) + +list(APPEND _cmake_import_check_targets ncnn ) +list(APPEND _cmake_import_check_files_for_ncnn "${_IMPORT_PREFIX}/lib/libncnn.so.1.0.20230920" ) + +# Commands beyond this point should not need to know the version. +set(CMAKE_IMPORT_FILE_VERSION) diff --git a/linux/lib/cmake/ncnn/ncnn.cmake b/linux/lib/cmake/ncnn/ncnn.cmake new file mode 100644 index 0000000..5737c77 --- /dev/null +++ b/linux/lib/cmake/ncnn/ncnn.cmake @@ -0,0 +1,108 @@ +# Generated by CMake + +if("${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION}" LESS 2.8) + message(FATAL_ERROR "CMake >= 2.8.0 required") +endif() +if(CMAKE_VERSION VERSION_LESS "2.8.3") + message(FATAL_ERROR "CMake >= 2.8.3 required") +endif() +cmake_policy(PUSH) +cmake_policy(VERSION 2.8.3...3.25) +#---------------------------------------------------------------- +# Generated CMake target import file. +#---------------------------------------------------------------- + +# Commands may need to know the format version. +set(CMAKE_IMPORT_FILE_VERSION 1) + +# Protect against multiple inclusion, which would fail when already imported targets are added once more. +set(_cmake_targets_defined "") +set(_cmake_targets_not_defined "") +set(_cmake_expected_targets "") +foreach(_cmake_expected_target IN ITEMS ncnn) + list(APPEND _cmake_expected_targets "${_cmake_expected_target}") + if(TARGET "${_cmake_expected_target}") + list(APPEND _cmake_targets_defined "${_cmake_expected_target}") + else() + list(APPEND _cmake_targets_not_defined "${_cmake_expected_target}") + endif() +endforeach() +unset(_cmake_expected_target) +if(_cmake_targets_defined STREQUAL _cmake_expected_targets) + unset(_cmake_targets_defined) + unset(_cmake_targets_not_defined) + unset(_cmake_expected_targets) + unset(CMAKE_IMPORT_FILE_VERSION) + cmake_policy(POP) + return() +endif() +if(NOT _cmake_targets_defined STREQUAL "") + string(REPLACE ";" ", " _cmake_targets_defined_text "${_cmake_targets_defined}") + string(REPLACE ";" ", " _cmake_targets_not_defined_text "${_cmake_targets_not_defined}") + message(FATAL_ERROR "Some (but not all) targets in this export set were already defined.\nTargets Defined: ${_cmake_targets_defined_text}\nTargets not yet defined: ${_cmake_targets_not_defined_text}\n") +endif() +unset(_cmake_targets_defined) +unset(_cmake_targets_not_defined) +unset(_cmake_expected_targets) + + +# Compute the installation prefix relative to this file. +get_filename_component(_IMPORT_PREFIX "${CMAKE_CURRENT_LIST_FILE}" PATH) +get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH) +get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH) +get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH) +if(_IMPORT_PREFIX STREQUAL "/") + set(_IMPORT_PREFIX "") +endif() + +# Create imported target ncnn +add_library(ncnn SHARED IMPORTED) + +set_target_properties(ncnn PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include/ncnn" + INTERFACE_LINK_LIBRARIES "OpenMP::OpenMP_CXX;Threads::Threads;Vulkan::Vulkan" + INTERFACE_POSITION_INDEPENDENT_CODE "ON" +) + +if(CMAKE_VERSION VERSION_LESS 2.8.12) + message(FATAL_ERROR "This file relies on consumers using CMake 2.8.12 or greater.") +endif() + +# Load information for each installed configuration. +file(GLOB _cmake_config_files "${CMAKE_CURRENT_LIST_DIR}/ncnn-*.cmake") +foreach(_cmake_config_file IN LISTS _cmake_config_files) + include("${_cmake_config_file}") +endforeach() +unset(_cmake_config_file) +unset(_cmake_config_files) + +# Cleanup temporary variables. +set(_IMPORT_PREFIX) + +# Loop over all imported files and verify that they actually exist +foreach(_cmake_target IN LISTS _cmake_import_check_targets) + foreach(_cmake_file IN LISTS "_cmake_import_check_files_for_${_cmake_target}") + if(NOT EXISTS "${_cmake_file}") + message(FATAL_ERROR "The imported target \"${_cmake_target}\" references the file + \"${_cmake_file}\" +but this file does not exist. Possible reasons include: +* The file was deleted, renamed, or moved to another location. +* An install or uninstall procedure did not complete successfully. +* The installation package was faulty and contained + \"${CMAKE_CURRENT_LIST_FILE}\" +but not all the files it references. +") + endif() + endforeach() + unset(_cmake_file) + unset("_cmake_import_check_files_for_${_cmake_target}") +endforeach() +unset(_cmake_target) +unset(_cmake_import_check_targets) + +# This file does not depend on other imported targets which have +# been exported from the same project but in a separate export set. + +# Commands beyond this point should not need to know the version. +set(CMAKE_IMPORT_FILE_VERSION) +cmake_policy(POP) diff --git a/linux/lib/cmake/ncnn/ncnnConfig.cmake b/linux/lib/cmake/ncnn/ncnnConfig.cmake new file mode 100644 index 0000000..abb2dd6 --- /dev/null +++ b/linux/lib/cmake/ncnn/ncnnConfig.cmake @@ -0,0 +1,42 @@ +set(NCNN_OPENMP ON) +set(NCNN_THREADS ON) +set(NCNN_VULKAN ON) +set(NCNN_SHARED_LIB ON) +set(NCNN_SYSTEM_GLSLANG OFF) + +if(NCNN_OPENMP) + find_package(OpenMP) +endif() + +if(NCNN_THREADS) + set(CMAKE_THREAD_PREFER_PTHREAD TRUE) + set(THREADS_PREFER_PTHREAD_FLAG TRUE) + find_package(Threads REQUIRED) +endif() + +if(NCNN_VULKAN) + find_package(Vulkan REQUIRED) + + if(NOT NCNN_SHARED_LIB) + if(NCNN_SYSTEM_GLSLANG) + find_package(glslang QUIET) + if(NOT glslang_FOUND) + set(GLSLANG_TARGET_DIR "") + include(${GLSLANG_TARGET_DIR}/OSDependentTargets.cmake) + include(${GLSLANG_TARGET_DIR}/OGLCompilerTargets.cmake) + if(EXISTS "${GLSLANG_TARGET_DIR}/HLSLTargets.cmake") + # hlsl support can be optional + include("${GLSLANG_TARGET_DIR}/HLSLTargets.cmake") + endif() + include(${GLSLANG_TARGET_DIR}/glslangTargets.cmake) + include(${GLSLANG_TARGET_DIR}/SPIRVTargets.cmake) + endif() + else() + set(glslang_DIR "${CMAKE_CURRENT_LIST_DIR}/../../../lib/cmake/glslang") + find_package(glslang QUIET) + endif() + + endif() +endif() + +include(${CMAKE_CURRENT_LIST_DIR}/ncnn.cmake) diff --git a/linux/lib/libncnn.so b/linux/lib/libncnn.so new file mode 120000 index 0000000..f9f93ce --- /dev/null +++ b/linux/lib/libncnn.so @@ -0,0 +1 @@ +libncnn.so.1 \ No newline at end of file diff --git a/linux/lib/libncnn.so.1 b/linux/lib/libncnn.so.1 new file mode 120000 index 0000000..428e460 --- /dev/null +++ b/linux/lib/libncnn.so.1 @@ -0,0 +1 @@ +libncnn.so.1.0.20230920 \ No newline at end of file diff --git a/linux/lib/libncnn.so.1.0.20230920 b/linux/lib/libncnn.so.1.0.20230920 new file mode 100644 index 0000000..88a5d12 Binary files /dev/null and b/linux/lib/libncnn.so.1.0.20230920 differ diff --git a/linux/lib/pkgconfig/ncnn.pc b/linux/lib/pkgconfig/ncnn.pc new file mode 100644 index 0000000..f1190cf --- /dev/null +++ b/linux/lib/pkgconfig/ncnn.pc @@ -0,0 +1,11 @@ +prefix=${pcfiledir}/../.. +librarydir=${prefix}/lib +includedir=${prefix}/include + +Name: ncnn +Description: high-performance neural network inference framework optimized for the mobile platform +Version: 1.0.20230920 +URL: https://github.com/Tencent/ncnn +Libs: -L"${librarydir}" -lncnn +Cflags: -I"${includedir}" + diff --git a/linux/libncnn.so b/linux/libncnn.so new file mode 100644 index 0000000..88a5d12 Binary files /dev/null and b/linux/libncnn.so differ diff --git a/linux/main.cpp b/linux/main.cpp new file mode 100644 index 0000000..54a3cdb --- /dev/null +++ b/linux/main.cpp @@ -0,0 +1,10 @@ +#include "mainwindow.h" +#include + +int main(int argc, char *argv[]) +{ + QApplication a(argc, argv); + MainWindow w; + w.show(); + return a.exec(); +} diff --git a/linux/mainwindow.cpp b/linux/mainwindow.cpp new file mode 100644 index 0000000..f786e28 --- /dev/null +++ b/linux/mainwindow.cpp @@ -0,0 +1,829 @@ +#include "mainwindow.h" + +const int H = 512; +const int W = 512; + +double X[H], Y[W]; +double xx[H][W], yy[H][W]; +double distance[H][W]; + +class Where : public ncnn::Layer +{ +public: + Where() + { + one_blob_only = true; + } + + virtual int forward(const ncnn::Mat& bottom_blob, ncnn::Mat& top_blob, const ncnn::Option& opt) const + { + int w = bottom_blob.w; + int h = bottom_blob.h; + int channels = bottom_blob.c; + + //printf("[Where] (%d,%d,%d)\n",channels,h,w); + + top_blob.create(w, h, channels, 4u, 1, opt.blob_allocator); + if (top_blob.empty()) + return -100; + +#pragma omp parallel for num_threads(opt.num_threads) + for (int p = 0; p < channels; p++) + { + const float* src = bottom_blob.channel(p); + float* dst = top_blob.channel(p); + for (int y = 0; y < h; y++) { + for (int x = 0; x < w; x++) { + if (src[0] >= 0) + dst[0] = 1.0f; + else + dst[0] = 0.2f; + src++; + dst++; + } + } + } + + return 0; + } +}; + +DEFINE_LAYER_CREATOR(Where) + +class ConvTranspose2d1 : public ncnn::Layer +{ +public: + ConvTranspose2d1() + { + one_blob_only = false; + } + + virtual int forward(const std::vector& bottom_blobs, std::vector& top_blobs, const ncnn::Option& opt) const + { + const ncnn::Mat& bottom_blob = bottom_blobs[0]; + const ncnn::Mat& _weight_data = bottom_blobs[1]; + ncnn::Mat& top_blob = top_blobs[0]; + + //printf("[ConvTranspose2d1] (%d,%d,%d)*(%d,%d,%d,%d)\n", bottom_blob.c, bottom_blob.h, bottom_blob.w, _weight_data.c, _weight_data.d, _weight_data.h, _weight_data.w); + + const int _kernel_w = _weight_data.w; + const int _kernel_h = _weight_data.h; + const int _num_output = _weight_data.c * _weight_data.elempack; + + ncnn::Mat weight_data_flattened; + ncnn::flatten(_weight_data, weight_data_flattened, opt); + if (weight_data_flattened.empty()) + return -100; + + // weight_data_flattened as pack1 + weight_data_flattened.w *= weight_data_flattened.elempack; + weight_data_flattened.elemsize /= weight_data_flattened.elempack; + weight_data_flattened.elempack = 1; + + ncnn::Mat bias_data_flattened; + + ncnn::Layer* op = ncnn::create_layer(ncnn::LayerType::Deconvolution); + + const int dilation_w = 1; + const int dilation_h = 1; + const int stride_w = 2; + const int stride_h = 2; + const int pad_left = 0; + const int pad_right = 0; + const int pad_top = 0; + const int pad_bottom = 0; + const int bias_term = 0; + const int output_pad_right = 0; + const int output_pad_bottom = 0; + + ncnn::ParamDict pd; + pd.set(0, _num_output); + pd.set(1, _kernel_w); + pd.set(11, _kernel_h); + pd.set(2, dilation_w); + pd.set(12, dilation_h); + pd.set(3, stride_w); + pd.set(13, stride_h); + pd.set(4, pad_left); + pd.set(15, pad_right); + pd.set(14, pad_top); + pd.set(16, pad_bottom); + pd.set(18, output_pad_right); + pd.set(19, output_pad_bottom); + pd.set(5, bias_term); + pd.set(6, weight_data_flattened.w); + pd.set(9, 0); + pd.set(10, ncnn::Mat()); + + + op->load_param(pd); + + ncnn::Mat weights[2]; + weights[0] = weight_data_flattened; + weights[1] = bias_data_flattened; + + op->load_model(ncnn::ModelBinFromMatArray(weights)); + + op->create_pipeline(opt); + + op->forward(bottom_blob, top_blob, opt); + + op->destroy_pipeline(opt); + + delete op; + + return 0; + } +}; + +DEFINE_LAYER_CREATOR(ConvTranspose2d1) + +class ConvTranspose2d2 : public ncnn::Layer +{ +public: + ConvTranspose2d2() + { + one_blob_only = false; + } + + virtual int forward(const std::vector& bottom_blobs, std::vector& top_blobs, const ncnn::Option& opt) const + { + const ncnn::Mat& bottom_blob = bottom_blobs[0]; + const ncnn::Mat& _weight_data = bottom_blobs[1]; + ncnn::Mat& top_blob = top_blobs[0]; + + // transpose wegiht from cdhw to dchw + ncnn::Layer* transpose_op = ncnn::create_layer(ncnn::LayerType::Permute); + ncnn::ParamDict transpose_pd; + transpose_pd.set(0, 6); // WHDC->WHCD + transpose_op->load_param(transpose_pd); + transpose_op->create_pipeline(opt); + ncnn::Mat _weight_data_T; + transpose_op->forward(_weight_data, _weight_data_T, opt); + + //printf("[ConvTranspose2d2] (%d,%d,%d)*(%d,%d,%d,%d)\n", bottom_blob.c, bottom_blob.h, bottom_blob.w, _weight_data_T.c, _weight_data_T.d, _weight_data_T.h, _weight_data_T.w); + + const int _kernel_w = _weight_data_T.w; + const int _kernel_h = _weight_data_T.h; + const int _num_output = _weight_data_T.c * _weight_data_T.elempack; + + ncnn::Mat weight_data_flattened; + ncnn::flatten(_weight_data_T, weight_data_flattened, opt); + if (weight_data_flattened.empty()) + return -100; + + // weight_data_flattened as pack1 + weight_data_flattened.w *= weight_data_flattened.elempack; + weight_data_flattened.elemsize /= weight_data_flattened.elempack; + weight_data_flattened.elempack = 1; + + ncnn::Layer* op = ncnn::create_layer(ncnn::LayerType::Deconvolution); + const int dilation_w = 1; + const int dilation_h = 1; + const int stride_w = 1; + const int stride_h = 1; + const int pad_left = 1; + const int pad_right = 1; + const int pad_top = 1; + const int pad_bottom = 1; + const int bias_term = 0; + const int output_pad_right = 0; + const int output_pad_bottom = 0; + ncnn::ParamDict pd; + pd.set(0, _num_output); + pd.set(1, _kernel_w); + pd.set(11, _kernel_h); + pd.set(2, dilation_w); + pd.set(12, dilation_h); + pd.set(3, stride_w); + pd.set(13, stride_h); + pd.set(4, pad_left); + pd.set(15, pad_right); + pd.set(14, pad_top); + pd.set(16, pad_bottom); + pd.set(18, output_pad_right); + pd.set(19, output_pad_bottom); + pd.set(5, bias_term); + pd.set(6, weight_data_flattened.w); + pd.set(9, 0); + pd.set(10, ncnn::Mat()); + op->load_param(pd); + + ncnn::Mat weights[2]; + ncnn::Mat bias_data_flattened; + weights[0] = weight_data_flattened; + weights[1] = bias_data_flattened; + + op->load_model(ncnn::ModelBinFromMatArray(weights)); + op->create_pipeline(opt); + op->forward(bottom_blob, top_blob, opt); + + + op->destroy_pipeline(opt); + delete op; + + transpose_op->destroy_pipeline(opt); + delete transpose_op; + + return 0; + } +}; + +DEFINE_LAYER_CREATOR(ConvTranspose2d2) + +class BConv2d1 : public ncnn::Layer +{ +public: + BConv2d1() + { + one_blob_only = false; + } + + virtual int forward(const std::vector& bottom_blobs, std::vector& top_blobs, const ncnn::Option& opt) const + { + const ncnn::Mat& bottom_blob = bottom_blobs[0]; + const ncnn::Mat& _weight_data = bottom_blobs[1]; + + //printf("[BConv2d1] (%d,%d,%d)%d*(%d,%d,%d,%d)%d\n", bottom_blob.c, bottom_blob.h, bottom_blob.w, bottom_blob.elempack, _weight_data.c, _weight_data.d, _weight_data.h, _weight_data.w, _weight_data.elempack); + + + // ����conv2d���� + const int _kernel_w = _weight_data.w; + const int _kernel_h = _weight_data.h; + const int _num_output = _weight_data.c * _weight_data.elempack; + const int _num_input = bottom_blob.c; + + ncnn::Mat weight_data_flattened; + ncnn::flatten(_weight_data, weight_data_flattened, opt); + if (weight_data_flattened.empty()) + return -100; + + weight_data_flattened.w *= weight_data_flattened.elempack; + weight_data_flattened.elemsize /= weight_data_flattened.elempack; + weight_data_flattened.elempack = 1; + + ncnn::Mat bias_data_flattened; + ncnn::Layer* op = ncnn::create_layer(ncnn::LayerType::Convolution); + ncnn::ParamDict pd; + pd.set(0, _num_output); + pd.set(1, _kernel_w); + pd.set(11, _kernel_h); + pd.set(2, 2); + pd.set(21, 2); + pd.set(3, 1); + pd.set(31, 1); + pd.set(4, 0); + pd.set(15, 0); + pd.set(14, 0); + pd.set(16, 0); + pd.set(18, 0); + pd.set(5, 0); + pd.set(6, weight_data_flattened.w); + pd.set(8, 0); + op->load_param(pd); + + ncnn::Mat weights[2]; + weights[0] = weight_data_flattened; + weights[1] = bias_data_flattened; + op->load_model(ncnn::ModelBinFromMatArray(weights)); + op->create_pipeline(opt); + + + // ѭ�����ɣ�ע�⣬�����bottom_blob��top_blob_set���ܻ���elempack������ + std::vector top_blob_set(_num_input); + for (int i = 0; i < _num_input; i++) { + op->forward(bottom_blob.channel(i), top_blob_set[i], opt); + } + + // ƴ�ӽ�� + std::vector cat_out(1); + ncnn::Layer* cat = ncnn::create_layer(ncnn::LayerType::Concat); + ncnn::ParamDict cat_pd; + cat_pd.set(0, 0); + cat->load_param(cat_pd); + cat->create_pipeline(opt); + cat->forward(top_blob_set, cat_out, opt); + + // reshape + ncnn::Layer* reshape = ncnn::create_layer(ncnn::LayerType::Reshape); + ncnn::ParamDict reshape_pd; + reshape_pd.set(0, cat_out[0].w); + reshape_pd.set(1, cat_out[0].h); + reshape_pd.set(11, _num_output); + reshape_pd.set(2, _num_input); + reshape->load_param(reshape_pd); + reshape->create_pipeline(opt); + reshape->forward(cat_out[0], top_blobs[0], opt); + + + // �ͷ� + reshape->destroy_pipeline(opt); + delete reshape; + + cat->destroy_pipeline(opt); + delete cat; + + op->destroy_pipeline(opt); + delete op; + + return 0; + } +}; + +DEFINE_LAYER_CREATOR(BConv2d1) + +class BConv2d2 : public ncnn::Layer +{ +public: + BConv2d2() + { + one_blob_only = false; + } + + virtual int forward(const std::vector& bottom_blobs, std::vector& top_blobs, const ncnn::Option& opt) const + { + const ncnn::Mat& bottom_blob = bottom_blobs[0]; + const ncnn::Mat& _weight_data = bottom_blobs[1]; + ncnn::Mat& top_blob = top_blobs[0]; + + //printf("[BConv2d2] (%d,%d,%d)%d*(%d,%d,%d,%d)%d\n", bottom_blob.c, bottom_blob.h, bottom_blob.w, bottom_blob.elempack, _weight_data.c, _weight_data.d, _weight_data.h, _weight_data.w, _weight_data.elempack); + + + // ����conv2d���� + const int _kernel_w = _weight_data.w; + const int _kernel_h = _weight_data.h; + const int _num_output = _weight_data.c * _weight_data.elempack; + const int _num_input = bottom_blob.c; + + ncnn::Mat weight_data_flattened; + ncnn::flatten(_weight_data, weight_data_flattened, opt); + if (weight_data_flattened.empty()) + return -100; + + weight_data_flattened.w *= weight_data_flattened.elempack; + weight_data_flattened.elemsize /= weight_data_flattened.elempack; + weight_data_flattened.elempack = 1; + + ncnn::Mat bias_data_flattened; + ncnn::Layer* op = ncnn::create_layer(ncnn::LayerType::Convolution); + ncnn::ParamDict pd; + pd.set(0, _num_output); + pd.set(1, _kernel_w); + pd.set(11, _kernel_h); + pd.set(2, 1); + pd.set(21, 1); + pd.set(3, 1); + pd.set(31, 1); + pd.set(4, 1); + pd.set(15, 1); + pd.set(14, 1); + pd.set(16, 1); + pd.set(18, 0); + pd.set(5, 0); + pd.set(6, weight_data_flattened.w); + op->load_param(pd); + + ncnn::Mat weights[2]; + weights[0] = weight_data_flattened; + weights[1] = bias_data_flattened; + op->load_model(ncnn::ModelBinFromMatArray(weights)); + op->create_pipeline(opt); + + + // ѭ�����ɣ�ע�⣬�����bottom_blob��top_blob_set���ܻ���elempack������ + std::vector top_blob_set(_num_input); + for (int i = 0; i < _num_input; i++) { + op->forward(bottom_blob.channel(i), top_blob_set[i], opt); + } + + // ƴ�ӽ�� + std::vector cat_out(1); + ncnn::Layer* cat = ncnn::create_layer(ncnn::LayerType::Concat); + ncnn::ParamDict cat_pd; + cat_pd.set(0, 0); + cat->load_param(cat_pd); + cat->create_pipeline(opt); + cat->forward(top_blob_set, cat_out, opt); + + // reshape + ncnn::Mat reshape_out; + ncnn::Layer* reshape = ncnn::create_layer(ncnn::LayerType::Reshape); + ncnn::ParamDict reshape_pd; + reshape_pd.set(0, cat_out[0].w); + reshape_pd.set(1, cat_out[0].h); + reshape_pd.set(11, _num_output); + reshape_pd.set(2, _num_input); + reshape->load_param(reshape_pd); + reshape->create_pipeline(opt); + reshape->forward(cat_out[0], top_blob, opt); + + // �ͷ� + reshape->destroy_pipeline(opt); + delete reshape; + + cat->destroy_pipeline(opt); + delete cat; + + op->destroy_pipeline(opt); + delete op; + + return 0; + } +}; + +DEFINE_LAYER_CREATOR(BConv2d2) + +ncnn::Mat rand(int seed) +{ + cv::Mat cv_x(cv::Size(512, 1), CV_32FC4); + cv::RNG rng(seed); + rng.fill(cv_x, cv::RNG::NORMAL, 0, 1); + ncnn::Mat x_mat(512, 1, (void*)cv_x.data); + return x_mat.clone(); +} + +void linspace(double* arr, double start, double end, int size) +{ + double step = (end - start) / (size - 1); + for (int i = 0; i < size; i++) + { + arr[i] = start + i * step; + } +} + +void meshgrid(double* X, double* Y, double(*xx)[W], double(*yy)[W]) +{ + for (int i = 0; i < H; i++) + { + for (int j = 0; j < W; j++) + { + xx[i][j] = X[i]; + yy[i][j] = Y[j]; + } + } +} + +template +int sign(T val) { + return (T(0) < val) - (val < T(0)); +} + +MainWindow::MainWindow(QWidget *parent) + : QMainWindow(parent) +{ + ui.setupUi(this); + + // ����mapping + mapping.load_param("assets/mapping.param"); + mapping.load_model("assets/mapping.bin"); + + // ����generator + generator.register_custom_layer("Where", Where_layer_creator); + generator.register_custom_layer("ConvTranspose2d1", ConvTranspose2d1_layer_creator); + generator.register_custom_layer("ConvTranspose2d2", ConvTranspose2d2_layer_creator); + generator.register_custom_layer("BConv2d1", BConv2d1_layer_creator); + generator.register_custom_layer("BConv2d2", BConv2d2_layer_creator); + generator.load_param("assets/generator.param"); + generator.load_model("assets/generator.bin"); + + // ����gridsample���� + gridsample_opt = generator.opt; + gridsample = ncnn::create_layer(ncnn::LayerType::GridSample); + ncnn::ParamDict gridsample_pd; + gridsample->load_param(gridsample_pd); + gridsample->create_pipeline(gridsample_opt); + + // ����interp���� + interp_opt = generator.opt; + interp = ncnn::create_layer(ncnn::LayerType::Interp); + ncnn::ParamDict interp_pd; + interp_pd.set(0, 2); + interp_pd.set(3, 128); + interp_pd.set(4, 128); + interp->load_param(interp_pd); + interp->create_pipeline(interp_opt); + + // Ԥ�������� + linspace(X, 0, H, H); + linspace(Y, 0, W, W); + meshgrid(X, Y, xx, yy); +} + +MainWindow::~MainWindow() +{ + gridsample->destroy_pipeline(gridsample_opt); + delete gridsample; + + interp->destroy_pipeline(interp_opt); + delete interp; +} + +void MainWindow::showImage(cv::Mat in) +{ + cv::Mat show; + cv::resize(in, show, cv::Size(1024, 1024)); + QImage qImage(show.data, show.cols, show.rows, static_cast(show.step), QImage::Format_RGB888); + ui.show->setPixmap(QPixmap::fromImage(qImage)); + ui.show->show(); +} + +void MainWindow::on_getBtn_clicked() +{ + int seed = QVariant(ui.seedEdit->text()).toInt(); + + // �����mapping�õ���ʼw + ncnn::Mat z = rand(seed); + ncnn::Mat w0(512, 16); + w0.fill(1.0f); + { + ncnn::Mat output; + ncnn::Extractor ex = mapping.create_extractor(); + ex.input("/mapping/Cast_output_0", z); + ex.extract("/mapping/Sub_2_output_0", output); + float* src = output.row(0); + for (int i = 0; i < 16; i++) { + float* dst = w0.row(i); + for (int j = 0; j < 512; j++) { + dst[j] = src[j]; + } + } + } + ncnn::Mat w = w0.row_range(0, 6).clone(); + + // ����һ��w + w_tmp = w.clone(); + w0_tmp = w0.clone(); + + // ���ɳ�ʼͼ�� + { + ncnn::Mat ws(512, 16); + { + for (int i = 0; i < 6; i++) { + float* src = w.row(i); + float* dst = ws.row(i); + for (int j = 0; j < 512; j++) { + dst[j] = src[j]; + } + } + for (int i = 6; i < 16; i++) { + float* src = w0.row(i); + float* dst = ws.row(i); + for (int j = 0; j < 512; j++) { + dst[j] = src[j]; + } + } + } + + ncnn::Mat img; + { + ncnn::Extractor ex = generator.create_extractor(); + ex.set_light_mode(true); + ex.input("in0", ws); + ex.extract("out0", img); + } + const float _mean_[3] = { -128.0f / 127.5f, -128.0f / 127.5f, -128.0f / 127.5f }; + const float _norm_[3] = { 127.5f, 127.5f, 127.5f }; + img.substract_mean_normalize(_mean_, _norm_); + cv::Mat image(512, 512, CV_8UC3); + img.to_pixels(image.data, ncnn::Mat::PIXEL_RGB); + showwing = image.clone(); + + showImage(showwing); + + qDebug() << "[Init] seed:" << seed; + } +} + +void MainWindow::mousePressEvent(QMouseEvent* event) +{ + if (event->button() == Qt::LeftButton) { + + QPoint globalPos = event->globalPos(); + + QPoint labelPos = ui.show->mapToGlobal(QPoint(0, 0)); + int labelWidth = ui.show->width(); + int labelHeight = ui.show->height(); + + int relativeX = globalPos.x() - labelPos.x(); + int relativeY = globalPos.y() - labelPos.y(); + + relativeX = relativeX / 2; + relativeY = relativeY / 2; + + if (points[0] == -1) { + cv::Mat show_point = showwing.clone(); + cv::circle(show_point, cv::Point(relativeX, relativeY), 3, cv::Scalar(255, 0, 0), -1); + showImage(show_point); + + points[0] = relativeY; + points[1] = relativeX; + + qDebug() << "[Choose] start point: (" << points[0] << "," << points[1] << ")"; + } + else if (targets[0] == -1) { + cv::Mat show_point = showwing.clone(); + cv::circle(show_point, cv::Point(int(points[1]), int(points[0])), 3, cv::Scalar(255, 0, 0), -1); + cv::circle(show_point, cv::Point(relativeX, relativeY), 3, cv::Scalar(0, 255, 0), -1); + showImage(show_point); + + targets[0] = relativeY; + targets[1] = relativeX; + + qDebug() << "[Choose] target point: (" << targets[0] << "," << targets[1] << ")"; + } + + } +} + +void MainWindow::on_cleanBtn_clicked() +{ + points[0] = -1; + points[1] = -1; + targets[0] = -1; + targets[1] = -1; + showImage(showwing); + qDebug() << "[Clean] point"; +} + +void MainWindow::on_dragBtn_clicked() +{ + float lr = 0.1; + ncnn::Mat feat_refs; + int r1 = 3, r2 = 12; + + ncnn::Mat w = w_tmp.clone(); + ncnn::Mat w0 = w0_tmp.clone(); + + for (int it = 0; it < 200; it++) + { + ncnn::Mat ws(512, 16); + { + for (int i = 0; i < 6; i++) { + float* src = w.row(i); + float* dst = ws.row(i); + for (int j = 0; j < 512; j++) { + dst[j] = src[j]; + } + } + for (int i = 6; i < 16; i++) { + float* src = w0.row(i); + float* dst = ws.row(i); + for (int j = 0; j < 512; j++) { + dst[j] = src[j]; + } + } + } + + { + ncnn::Mat feat5; + ncnn::Extractor ex = generator.create_extractor(); + ex.set_light_mode(false); + ex.input("in0", ws); + ex.extract("out1", feat5); + + ncnn::Mat feat_resize; + ncnn::resize_bilinear(feat5, feat_resize, W, H); + + // ��һ�εĻ�Ҫ��¼feature + if (feat_refs.empty()) { + feat_refs.create(256); + for (int i = 0; i < 256; i++) { + feat_refs[i] = feat_resize.channel(i).row(int(std::round(points[0])))[int(std::round(points[1]))]; + } + } + + // Point tracking with feature matching + int r = std::round(r2 / 512.0 * H); + int up = std::max(points[0] - r, (double)0.0); + int down = std::min(points[0] + r + 1, (double)H); + int left = std::max(points[1] - r, (double)0.0); + int right = std::min(points[1] + r + 1, (double)W); + int height_patch = down - up; + int width_patch = right - left; + float min_value = 1e8; + int min_y = -1, min_x = -1; + for (int h = 0; h < height_patch; h++) { + for (int w = 0; w < width_patch; w++) { + float tmp = 0.0f; + for (int c = 0; c < 256; c++) { + tmp += std::pow(feat_resize.channel(c).row(up + h)[left + w] - feat_refs[c], 2); + } + tmp = std::sqrt(tmp); + if ((min_y == -1 && min_x == -1) || tmp < min_value) { + min_value = tmp; + min_y = up + h; + min_x = left + w; + } + } + } + points[0] = min_y; + points[1] = min_x; + + qDebug() << "[Drag " << it << "]current:(" << int(points[0]) << "," << int(points[1]) << "), target:(" << targets[0] << "," << targets[1] << ")"; + + //// save intermediate + //{ + // ncnn::Mat img; + // ex.extract("out0", img); + // const float _mean_[3] = { -128.0f / 127.5f, -128.0f / 127.5f, -128.0f / 127.5f }; + // const float _norm_[3] = { 127.5f, 127.5f, 127.5f }; + // img.substract_mean_normalize(_mean_, _norm_); + // cv::Mat image(512, 512, CV_8UC3); + // img.to_pixels(image.data, ncnn::Mat::PIXEL_RGB2BGR); + // cv::circle(image, cv::Point(int(points[1]), int(points[0])), 3, cv::Scalar(0, 0, 255), -1); + // cv::circle(image, cv::Point(int(targets[1]), int(targets[0])), 3, cv::Scalar(0, 255, 0), -1); + // cv::imwrite("images/" + std::to_string(it) + ".png", image); + //} + + // Motion supervision + double direction[2] = { targets[1] - points[1],targets[0] - points[0] }; + if (std::sqrt(std::pow(direction[0], 2) + std::pow(direction[1], 2)) > 1) { + + std::vector relis, reljs; + for (int h = 0; h < H; h++) { + for (int w = 0; w < W; w++) { + if (std::sqrt(std::pow(xx[h][w] - points[0], 2) + std::pow(yy[h][w] - points[1], 2)) < std::round(r1 / 512.0 * H)) { + relis.push_back(h); + reljs.push_back(w); + } + } + } + + double direction_norm = std::sqrt(std::pow(direction[0], 2) + std::pow(direction[1], 2)); + direction[0] /= direction_norm; + direction[1] /= direction_norm; + + ncnn::Mat grid; + grid.create(2, int(relis.size()), 1, (size_t)4u); + for (int w = 0; w < relis.size(); w++) { + grid.channel(0).row(w)[0] = (reljs[w] - direction[0]) / (W - 1) * 2 - 1; + grid.channel(0).row(w)[1] = (relis[w] - direction[1]) / (H - 1) * 2 - 1; + } + + std::vector inputs(2); + inputs[0] = feat_resize; + inputs[1] = grid; + std::vector outputs(1); + gridsample->forward(inputs, outputs, gridsample_opt); + ncnn::Mat& target = outputs[0]; + + ncnn::Mat feat5_grad(512, 512, 256, (size_t)4u); + for (int i = 0; i < relis.size(); i++) { + for (int c = 0; c < 256; c++) { + feat5_grad.channel(c).row(relis[i])[reljs[i]] = sign(feat_resize.channel(c).row(relis[i])[reljs[i]] - target.channel(c).row(0)[i]) / 256.0; + } + } + ncnn::Mat feat5_grad_fit; + interp->forward(feat5_grad, feat5_grad_fit, interp_opt); + + ex.input("in1", feat5_grad_fit); + std::vector vg(6); + ex.extract("out2", vg[0]); + ex.extract("out3", vg[1]); + ex.extract("out4", vg[2]); + ex.extract("out5", vg[3]); + ex.extract("out6", vg[4]); + ex.extract("out7", vg[5]); + + // update w + for (int i = 0; i < 6; i++) { + for (int j = 0; j < 512; j++) { + w.row(i)[j] = w.row(i)[j] - lr * vg[i].row(0)[j] * w.row(i)[j]; + } + } + } + else { + qDebug() << "[Finish]"; + + ncnn::Mat img; + ex.extract("out0", img); + const float _mean_[3] = { -128.0f / 127.5f, -128.0f / 127.5f, -128.0f / 127.5f }; + const float _norm_[3] = { 127.5f, 127.5f, 127.5f }; + img.substract_mean_normalize(_mean_, _norm_); + cv::Mat image(512, 512, CV_8UC3); + img.to_pixels(image.data, ncnn::Mat::PIXEL_RGB); + showwing = image.clone(); + + showImage(showwing); + + break; + } + + if (it == 200 - 1) { + ncnn::Mat img; + ex.extract("out0", img); + const float _mean_[3] = { -128.0f / 127.5f, -128.0f / 127.5f, -128.0f / 127.5f }; + const float _norm_[3] = { 127.5f, 127.5f, 127.5f }; + img.substract_mean_normalize(_mean_, _norm_); + cv::Mat image(512, 512, CV_8UC3); + img.to_pixels(image.data, ncnn::Mat::PIXEL_RGB); + showwing = image.clone(); + + showImage(showwing); + } + } + } + + + +} \ No newline at end of file diff --git a/linux/mainwindow.h b/linux/mainwindow.h new file mode 100644 index 0000000..2eb90b5 --- /dev/null +++ b/linux/mainwindow.h @@ -0,0 +1,64 @@ +#pragma once +#include +#include "ui_mainwindow.h" +#include +#include +#include + +// AVOID moc parse error +#ifndef Q_MOC_RUN +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "ncnn/net.h" +#include "ncnn/layer.h" +#include "ncnn/layer_type.h" +#include "ncnn/benchmark.h" +#endif + +class MainWindow : public QMainWindow +{ + Q_OBJECT + +public: + MainWindow(QWidget *parent = nullptr); + ~MainWindow(); + void showImage(cv::Mat in); + +private slots: + void on_getBtn_clicked(); + void on_cleanBtn_clicked(); + void on_dragBtn_clicked(); + +protected: + void mousePressEvent(QMouseEvent* event); + +private: + Ui::MainWindowClass ui; + + cv::Mat showwing; + double points[2] = { -1,-1 }; + double targets[2] = { -1,-1 }; + + ncnn::Mat w_tmp; + ncnn::Mat w0_tmp; + + ncnn::Net mapping; + ncnn::Net generator; + + ncnn::Layer* gridsample; + ncnn::Option gridsample_opt; + + ncnn::Layer* interp; + ncnn::Option interp_opt; +}; diff --git a/linux/mainwindow.qrc b/linux/mainwindow.qrc new file mode 100644 index 0000000..68043e7 --- /dev/null +++ b/linux/mainwindow.qrc @@ -0,0 +1,4 @@ + + + + diff --git a/linux/mainwindow.ui b/linux/mainwindow.ui new file mode 100644 index 0000000..66fee83 --- /dev/null +++ b/linux/mainwindow.ui @@ -0,0 +1,126 @@ + + + MainWindowClass + + + + 0 + 0 + 1024 + 1077 + + + + MainWindow + + + + + 0 + + + 0 + + + 0 + + + 0 + + + 0 + + + + + + + + + + + Get Image + + + + + + + Qt::Horizontal + + + + 40 + 20 + + + + + + + + Clean Point + + + + + + + Qt::Horizontal + + + + 40 + 20 + + + + + + + + Drag + + + + + + + + + + TextLabel + + + Qt::AlignCenter + + + + + + + + + 0 + 0 + 1024 + 26 + + + + + + TopToolBarArea + + + false + + + + + + + + + +