Skip to content
This repository has been archived by the owner on Aug 30, 2024. It is now read-only.

[CI] enable clang tidy #29

Merged
merged 14 commits into from
Jan 10, 2024
Merged
65 changes: 65 additions & 0 deletions .clang-tidy
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
Checks: >
-*,
# readability-identifier-naming,
airMeng marked this conversation as resolved.
Show resolved Hide resolved
readability-const-return-type,
readability-redundant-smartptr-get,
readability-misleading-indentation,
readability-redundant-control-flow,
readability-redundant-member-init,
readability-redundant-string-cstr,
readability-redundant-string-init,
readability-simplify-subscript-expr,
readability-static-accessed-through-instance,
readability-static-definition-in-anonymous-namespace,
readability-uniqueptr-delete-release,
readability-container-size-empty,
# readability-delete-null-pointer, // not applicable for gcc/msvc
readability-make-member-function-const,
readability-redundant-access-specifiers,
performance-for-range-copy,
performance-implicit-conversion-in-loop,
performance-inefficient-algorithm,
performance-inefficient-string-concatenation,
# performance-inefficient-vector-operation,
airMeng marked this conversation as resolved.
Show resolved Hide resolved
performance-move-const-arg,
performance-unnecessary-copy-initialization,
performance-unnecessary-value-param,
performance-no-automatic-move,
performance-trivially-destructible,
modernize-make-shared,
modernize-use-bool-literals,
modernize-use-emplace,
modernize-use-equals-default,
modernize-use-override,
modernize-use-nullptr,
modernize-use-using,
bugprone-assert-side-effect,
bugprone-copy-constructor-init,
bugprone-forward-declaration-namespace,
bugprone-move-forwarding-reference,
bugprone-parent-virtual-call,
bugprone-too-small-loop-variable,
bugprone-undefined-memory-manipulation,
bugprone-unhandled-self-assignment,
bugprone-multiple-statement-macro,
bugprone-macro-parentheses,
bugprone-undefined-memory-manipulation,
bugprone-unhandled-self-assignment,
# google-default-arguments,
misc-misplaced-const,
misc-definitions-in-headers,
misc-redundant-expression,
misc-uniqueptr-reset-release,
misc-unused-alias-decls,
misc-unused-using-decls,
cppcoreguidelines-prefer-member-initializer,

CheckOptions:
- key: readability-identifier-naming.ClassCase
value: lower_case
- key: readability-identifier-naming.StructCase
value: lower_case
- key: readability-identifier-naming.ClassSuffix
value: _t
- key: readability-identifier-naming.StructSuffix
value: _t
2 changes: 1 addition & 1 deletion .github/workflows/format_scan.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ jobs:
"bandit",
"clangformat",
"cloc",
"cpplint",
"clangtidy",
# "pydocstyle",
#"pyspelling",
]
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/scripts/formatScan/clangformat.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ log_path=${log_dir}/clangformat.log
cd /neural-speed
git config --global --add safe.directory "*"

cd /neural-speed/neural_speed
python scripts/clang-format.py
cd /neural-speed
python clang-format.py

echo "run git diff"
git diff 2>&1 | tee -a ${log_path}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,19 @@

source /neural-speed/.github/workflows/scripts/change_color.sh

pip install cpplint
pip install cmake ninja clang-tidy==16.0.4
REPO_DIR=/neural-speed
log_dir=/neural-speed/.github/workflows/scripts/formatScan
log_path=${log_dir}/cpplint.log
cpplint --extensions cpp,hpp --filter=-build/include_subdir,-build/header_guard --recursive --quiet --linelength=120 ${REPO_DIR}/neural_speed 2>&1 | tee ${log_path}
if [[ ! -f ${log_path} ]] || [[ $(grep -c "Total errors found:" ${log_path}) != 0 ]]; then
log_path=${log_dir}/clangtidy.log

# compile binary
cd ${REPO_DIR}
mkdir build
cd build
cmake .. -G Ninja -DNS_USE_CLANG_TIDY=CHECK -DBTLA_USE_OPENMP=OFF
ninja 2>&1 | tee ${log_path}

if [[ ! -f ${log_path} ]] || [[ $(grep -c "warning:" ${log_path}) != 0 ]]; then
exit 1
fi
$BOLD_PURPLE && echo "Congratulations, check passed!" && $LIGHT_PURPLE && echo "You can click on the artifact button to see the log details." && $RESET
Expand Down
1 change: 0 additions & 1 deletion .github/workflows/scripts/install_binary.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
source /neural-speed/.github/workflows/scripts/change_color.sh

cd /neural-speed
export CMAKE_ARGS="-DNE_DNNL_CACHE_DIR=/cache"
$BOLD_YELLOW && echo "---------------- git submodule update --init --recursive -------------" && $RESET
git config --global --add safe.directory "*"
git submodule update --init --recursive
Expand Down
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,8 @@
.vscode/*
bestla/build/
bestla/build/*
neural_speed.egg-info/
build/
debug/
.eggs/
dist/
103 changes: 49 additions & 54 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,73 +27,68 @@ endif()
#

# general
option(NE_STATIC "neural_engine: static link libraries" OFF)
option(NE_NATIVE "neural_engine: enable -march=native flag" OFF)
option(NE_LTO "neural_engine: enable link time optimization" OFF)
option(NE_BUILD_APPLICATIONS "neural_engine: build applications" ON)
option(NS_STATIC "neural_speed: static link libraries" OFF)
option(NS_NATIVE "neural_speed: enable -march=native flag" OFF)
option(NS_LTO "neural_speed: enable link time optimization" OFF)
option(NS_BUILD_APPLICATIONS "neural_speed: build applications" ON)

# GPU
option(NE_GPU "neural_engine: enable GPU inference" OFF)
option(NS_GPU "neural_speed: enable GPU inference" OFF)

# debug
option(NE_ALL_WARNINGS "neural_engine: enable all compiler warnings" ON)
option(NE_ALL_WARNINGS_3RD_PARTY "neural_engine: enable all compiler warnings in 3rd party libs" OFF)
option(NE_GPROF "neural_engine: enable gprof" OFF)
option(NS_ALL_WARNINGS "neural_speed: enable all compiler warnings" ON)
option(NS_ALL_WARNINGS_3RD_PARTY "neural_speed: enable all compiler warnings in 3rd party libs" OFF)
option(NS_GPROF "neural_speed: enable gprof" OFF)

# tensor parallism
option(NE_TP "neural_engine: enable tensor parallism" OFF)
if (NE_TP)
add_compile_definitions(NE_TP_MODEL)
option(NS_TP "neural_speed: enable tensor parallism" OFF)
if (NS_TP)
add_compile_definitions(NS_TP_MODEL)
endif()

# sanitizers
option(NE_SANITIZE_THREAD "neural_engine: enable thread sanitizer" OFF)
option(NE_SANITIZE_ADDRESS "neural_engine: enable address sanitizer" OFF)
option(NE_SANITIZE_UNDEFINED "neural_engine: enable undefined sanitizer" OFF)
option(NS_SANITIZE_THREAD "neural_speed: enable thread sanitizer" OFF)
option(NS_SANITIZE_ADDRESS "neural_speed: enable address sanitizer" OFF)
option(NS_SANITIZE_UNDEFINED "neural_speed: enable undefined sanitizer" OFF)

# instruction set specific
option(NE_AVX "neural_engine: enable AVX" ON)
option(NE_AVX2 "neural_engine: enable AVX2" ON)
option(NE_AVX512 "neural_engine: enable AVX512" OFF)
option(NE_AVX512_VBMI "neural_engine: enable AVX512-VBMI" OFF)
option(NE_AVX512_VNNI "neural_engine: enable AVX512-VNNI" OFF)
option(NE_FMA "neural_engine: enable FMA" ON)
option(NE_AMX "neural_engine: enable AMX" OFF)
option(NE_F16C "neural_engine: enable F16C" ON)

# 3rd party libs
option(NE_ONEDNN "neural_engine: use oneDNN" ON)
option(NE_LIBXSMM "neural_engine: use libxsmm" OFF)
option(NE_XETLA "neural_engine: use XeTLA" OFF)
if (NE_GPU)
set(NE_XETLA ON)
option(NS_AVX "neural_speed: enable AVX" ON)
option(NS_AVX2 "neural_speed: enable AVX2" ON)
option(NS_AVX512 "neural_speed: enable AVX512" OFF)
option(NS_AVX512_VBMI "neural_speed: enable AVX512-VBMI" OFF)
option(NS_AVX512_VNNI "neural_speed: enable AVX512-VNNI" OFF)
option(NS_FMA "neural_speed: enable FMA" ON)
option(NS_AMX "neural_speed: enable AMX" OFF)
option(NS_F16C "neural_speed: enable F16C" ON)

option(NS_BUILD_TESTS "neural_speed: build tests" ${NS_STANDALONE})
option(NS_BTLA_UT "enable BesTLA's unit tests" OFF)
option(NS_BUILD_EXAMPLES "neural_speed: build examples" ${NS_STANDALONE})
option(NS_USE_CLANG_TIDY "neural_speed: clang-tidy check" OFF)


if(NS_BUILD_TESTS)
add_compile_definitions(NS_BUILD_TESTS)
endif()

option(NE_BUILD_TESTS "neural_engine: build tests" ${NE_STANDALONE})
option(NE_BTLA_UT "enable BesTLA's unit tests" OFF)
option(NE_BUILD_EXAMPLES "neural_engine: build examples" ${NE_STANDALONE})
if(NE_BUILD_TESTS)
add_compile_definitions(NE_BUILD_TESTS)
add_compile_definitions(NS_PERF)
option(NS_BEAM_SEARCH_VERBOSE "neural_speed: print beam search processing log" OFF)
if (NS_BEAM_SEARCH_VERBOSE)
add_compile_definitions(NS_BEAM_SEARCH_VERBOSE_ON)
endif()

add_compile_definitions(NE_PERF)
option(NE_BEAM_SEARCH_VERBOSE "neural_engine: print beam search processing log" OFF)
if (NE_BEAM_SEARCH_VERBOSE)
add_compile_definitions(NE_BEAM_SEARCH_VERBOSE_ON)
endif()
option(NE_GELU_VEC "neural_engine: enable vec in gelu" ON)
if (NE_GELU_VEC)
add_compile_definitions(NE_GELU_USE_VEC)
option(NS_GELU_VEC "neural_speed: enable vec in gelu" ON)
if (NS_GELU_VEC)
add_compile_definitions(NS_GELU_USE_VEC)
endif()
option(NE_PYTHON_API "neural_engine: use python api" OFF)
option(NE_SIMD_VEC_DOT_F16 "neural_engine: enable vec_dot_fp16 SIMD optimization" ON)
option(NS_PYTHON_API "neural_speed: use python api" OFF)
option(NS_SIMD_VEC_DOT_F16 "neural_speed: enable vec_dot_fp16 SIMD optimization" ON)
option(BUILD_SHARED_LIBS "If build as shared libs" ON)

if (NE_SIMD_VEC_DOT_F16)
add_compile_definitions(NE_SIMD_VEC_DOT_F16)
if (NS_SIMD_VEC_DOT_F16)
add_compile_definitions(NS_SIMD_VEC_DOT_F16)
endif()

if(NE_BUILD_TESTS)
if(NS_BUILD_TESTS)
enable_testing()
endif()

Expand All @@ -104,7 +99,7 @@ if (MSVC)
endif()
endif()

if (NE_LTO)
if (NS_LTO)
include(CheckIPOSupported)
check_ipo_supported(RESULT result OUTPUT output)
if (result)
Expand All @@ -115,16 +110,16 @@ if (NE_LTO)
endif()

if (NOT MSVC)
if (NE_STATIC)
if (NS_STATIC)
add_link_options(-static)
if (MINGW)
add_link_options(-static-libgcc -static-libstdc++)
endif()
endif()
if (NE_GPROF)
if (NS_GPROF)
add_compile_options(-pg)
endif()
if (NE_NATIVE)
if (NS_NATIVE)
add_compile_options(-march=native)
endif()
endif()
Expand All @@ -133,11 +128,11 @@ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)

if (NE_PYTHON_API)
if (NS_PYTHON_API)
add_subdirectory(third_party/pybind11)
endif()

if (NE_BTLA_UT)
if (NS_BTLA_UT)
set(BTLA_UT_ALL ON)
endif()
include(FindOpenMP)
Expand Down
2 changes: 1 addition & 1 deletion CMakePresets.json
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@
"inherits": "x64-debug",
"cacheVariables": {
"CMAKE_BUILD_TYPE": "Release",
"NE_BTLA_UT": "ON"
"NS_BTLA_UT": "ON"
}
}
]
Expand Down
9 changes: 8 additions & 1 deletion bestla/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ project(bestla LANGUAGES CXX VERSION 0.1.0)
file(GLOB headers ${PROJECT_NAME}/*.h ${PROJECT_NAME}/*.hpp)
file(GLOB xbyak_headers ${PROJECT_NAME}/xbyak/*.h ${PROJECT_NAME}/xbyak/*.hpp)

option(BTLA_USE_OPENMP "Enable OpenMP thread pool" ON)

option(BTLA_UT_ALL "Enable all unit tests" OFF)
option(BTLA_UT_DEBUG "Enable debug unit tests" OFF)
option(BTLA_UT_EPILOGUE "Enable unit test for epilogue" OFF)
Expand Down Expand Up @@ -41,7 +43,12 @@ endif()
include(GNUInstallDirs)
add_library(${PROJECT_NAME} INTERFACE)
add_library(${PROJECT_NAME}::${PROJECT_NAME} ALIAS ${PROJECT_NAME})


if(BTLA_USE_OPENMP)
message(STATUS "BesTLA using OpenMP")
target_compile_definitions(${PROJECT_NAME} INTERFACE BTLA_USE_OPENMP)
airMeng marked this conversation as resolved.
Show resolved Hide resolved
endif(BTLA_USE_OPENMP)

target_include_directories(
${PROJECT_NAME} INTERFACE
"$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>"
Expand Down
6 changes: 3 additions & 3 deletions bestla/bestla/bestla_parallel.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
#include <functional>
#include <thread>
#include <vector>
#ifdef _OPENMP
#if BTLA_OPENMP
#include <omp.h>
#endif
#include "bestla_utils.h"
Expand Down Expand Up @@ -588,14 +588,14 @@ class IThreading {
public:
explicit IThreading(int nthreads) : mThreadNum(nthreads) {}
virtual void parallel_for(const thread_func& func) const = 0;
virtual inline void sync() const = 0;
virtual inline void sync() const { assert(0); };
virtual int num_threads() const { return mThreadNum; };
virtual void set_threads(int nthreads) = 0;

protected:
int mThreadNum;
};
#ifdef _OPENMP
#if BTLA_OPENMP
class OMPThreading : public IThreading {
public:
explicit OMPThreading(int nthreads) : IThreading(nthreads) { omp_set_num_threads(nthreads); }
Expand Down
23 changes: 11 additions & 12 deletions bestla/bestla/bestla_prologue_b.h
Original file line number Diff line number Diff line change
Expand Up @@ -382,8 +382,7 @@ class WeightKBlockNInteger {
void packQWeight(const int N, const int K, const int8_t* B, const int ldb, const float* scales,
const int8_t* zero_points, StorageWeight* stor, parallel::IThreading* threading) {
setQuantCorrection(N, K, zero_points, scales, stor, threading);
if (stor->mDType == BTLA_DTYPE::S8 || stor->mDType == BTLA_DTYPE::F8_E4M3 ||
stor->mDType == BTLA_DTYPE::F8_E5M2) {
if (stor->mDType == BTLA_DTYPE::S8 || stor->mDType == BTLA_DTYPE::F8_E4M3 || stor->mDType == BTLA_DTYPE::F8_E5M2) {
reorderWeight(N, K, B, ldb, stor->WPtr<int8_t>(), threading);
} else {
auto reorded = utils::amalloc<int8_t>((size_t)stor->mKPad * stor->mNPad);
Expand Down Expand Up @@ -774,8 +773,8 @@ class WeightKBlockNInteger {
auto ptr = reinterpret_cast<StorageWeight*>(stor);
auto quant_dtype = ptr->mDType;
if (quant_dtype == BTLA_DTYPE::S8) {
kernel::wrapper::QuantizeSignIntRowBlock::forward<ISA_T, BTLA_DTYPE::S8>(
srcptr, dstptr, row, col, ld_src, ld_dst, scales, zero_points, ptr->mBlockSize);
kernel::wrapper::QuantizeSignIntRowBlock::forward<ISA_T, BTLA_DTYPE::S8>(srcptr, dstptr, row, col, ld_src, ld_dst,
scales, zero_points, ptr->mBlockSize);
} else if (quant_dtype == BTLA_DTYPE::S4_FULLRANGE) {
kernel::wrapper::QuantizeSignIntRowBlock::forward<ISA_T, BTLA_DTYPE::S4_FULLRANGE>(
srcptr, dstptr, row, col, ld_src, ld_dst, scales, zero_points, ptr->mBlockSize);
Expand Down Expand Up @@ -953,20 +952,20 @@ class WeightKBlockNFloat : public WeightKBlockNInteger<_GemmCore_T, ISA_T> {
auto ptr = reinterpret_cast<StorageWeight*>(stor);
auto quant_dtype = ptr->mDType;
if (quant_dtype == BTLA_DTYPE::F8_E4M3) {
kernel::wrapper::QuantizeF8RowBlock::forward<ISA_T, BTLA_DTYPE::F8_E4M3>(
srcptr, dstptr, row, col, ld_src, ld_dst, scales, ptr->mBlockSize, ptr->SDtype());
kernel::wrapper::QuantizeF8RowBlock::forward<ISA_T, BTLA_DTYPE::F8_E4M3>(srcptr, dstptr, row, col, ld_src, ld_dst,
scales, ptr->mBlockSize, ptr->SDtype());
} else if (quant_dtype == BTLA_DTYPE::F8_E5M2) {
kernel::wrapper::QuantizeF8RowBlock::forward<ISA_T, BTLA_DTYPE::F8_E5M2>(
srcptr, dstptr, row, col, ld_src, ld_dst, scales, ptr->mBlockSize, ptr->SDtype());
kernel::wrapper::QuantizeF8RowBlock::forward<ISA_T, BTLA_DTYPE::F8_E5M2>(srcptr, dstptr, row, col, ld_src, ld_dst,
scales, ptr->mBlockSize, ptr->SDtype());
} else if (quant_dtype == BTLA_DTYPE::F4_BNB) {
kernel::wrapper::QuantizeF4RowBlock::forward<ISA_T, BTLA_DTYPE::F4_BNB>(srcptr, dstptr, row, col, ld_src, ld_dst,
scales, zero_points, ptr->mBlockSize);
scales, zero_points, ptr->mBlockSize);
} else if (quant_dtype == BTLA_DTYPE::F4_E2M1) {
kernel::wrapper::QuantizeF4RowBlock::forward<ISA_T, BTLA_DTYPE::F4_E2M1>(
srcptr, dstptr, row, col, ld_src, ld_dst, scales, zero_points, ptr->mBlockSize);
kernel::wrapper::QuantizeF4RowBlock::forward<ISA_T, BTLA_DTYPE::F4_E2M1>(srcptr, dstptr, row, col, ld_src, ld_dst,
scales, zero_points, ptr->mBlockSize);
} else if (quant_dtype == BTLA_DTYPE::F4_NF4) {
kernel::wrapper::QuantizeF4RowBlock::forward<ISA_T, BTLA_DTYPE::F4_NF4>(srcptr, dstptr, row, col, ld_src, ld_dst,
scales, zero_points, ptr->mBlockSize);
scales, zero_points, ptr->mBlockSize);
} else {
assert(0);
}
Expand Down
Loading