diff --git a/.gitignore b/.gitignore index 0411936535b39..6e9cfa29dc756 100644 --- a/.gitignore +++ b/.gitignore @@ -25,6 +25,9 @@ torch/lib/*.lib torch/lib/*.dylib* torch/lib/*.h torch/lib/build +torch/lib/cmake +torch/lib/pkgconfig +torch/lib/protoc torch/lib/tmp_install torch/lib/include torch/lib/torch_shm_manager @@ -150,7 +153,7 @@ build build_host_protoc build_android build_ios -build_* +/build_* .build_debug/* .build_release/* distribute/* diff --git a/.jenkins/caffe2/build.sh b/.jenkins/caffe2/build.sh index 49ff3619abd4c..da20f748bafb4 100755 --- a/.jenkins/caffe2/build.sh +++ b/.jenkins/caffe2/build.sh @@ -96,9 +96,12 @@ if [[ "${BUILD_ENVIRONMENT}" == conda* ]]; then exit 0 fi -# Run cmake from ./build directory -mkdir -p ./build -cd ./build +# Run cmake from ./build_caffe2 directory so it doesn't conflict with +# standard PyTorch build directory. Eventually these won't need to +# be separate. +rm -rf build_caffe2 +mkdir build_caffe2 +cd ./build_caffe2 INSTALL_PREFIX="/usr/local/caffe2" CMAKE_ARGS+=("-DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX}") diff --git a/.jenkins/pytorch/build.sh b/.jenkins/pytorch/build.sh index 0545ad004e018..2a04fe4752a2f 100755 --- a/.jenkins/pytorch/build.sh +++ b/.jenkins/pytorch/build.sh @@ -35,8 +35,8 @@ fi WERROR=1 python setup.py install -# Add the ATen test binaries so that they won't be git clean'ed away -git add -f aten/build/src/ATen/test +# Add the test binaries so that they won't be git clean'ed away +git add -f build/bin # Testing ATen install if [[ "$BUILD_ENVIRONMENT" != *cuda* ]]; then diff --git a/.jenkins/pytorch/dirty.sh b/.jenkins/pytorch/dirty.sh index 73a43f26ae488..f62fdafa93765 100755 --- a/.jenkins/pytorch/dirty.sh +++ b/.jenkins/pytorch/dirty.sh @@ -3,4 +3,4 @@ set -ex upstream="$1" pr="$2" git diff --name-only "$upstream" "$pr" -git diff --name-only "$upstream" "$pr" | grep -Eq '^(aten/|.jenkins/pytorch|docs/(make.bat|Makefile|requirements.txt|source)|mypy|requirements.txt|setup.py|test/|third_party/|tools/|\.gitmodules|torch/)' +git diff --name-only "$upstream" "$pr" | grep -Eq '^(aten/|caffe2/|.jenkins/pytorch|docs/(make.bat|Makefile|requirements.txt|source)|mypy|requirements.txt|setup.py|test/|third_party/|tools/|\.gitmodules|torch/)' diff --git a/.jenkins/pytorch/macos-build-test.sh b/.jenkins/pytorch/macos-build-test.sh index 05fd0bbe59b23..97d3f8c9ffd39 100755 --- a/.jenkins/pytorch/macos-build-test.sh +++ b/.jenkins/pytorch/macos-build-test.sh @@ -28,8 +28,18 @@ python test/run_test.py --verbose # NB: Install outside of source directory (at the same level as the root # pytorch folder) so that it doesn't get cleaned away prior to docker push. +# But still clean it before we perform our own build. +# CPP_BUILD="$PWD/../cpp-build" +rm -rf $CPP_BUILD +mkdir -p $CPP_BUILD WERROR=1 VERBOSE=1 tools/cpp_build/build_all.sh "$CPP_BUILD" -python tools/download_mnist.py --quiet -d test/cpp/api/mnist -"$CPP_BUILD"/libtorch/bin/test_api +# TODO; Enable tests on Mac as soon as possible +#python tools/download_mnist.py --quiet -d test/cpp/api/mnist +# +# # Unfortunately it seems like the test can't load from miniconda3 +# # without these paths being set +# export DYLD_LIBRARY_PATH="$DYLD_LIBRARY_PATH:$PWD/miniconda3/lib" +# export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:$PWD/miniconda3/lib" +# "$CPP_BUILD"/libtorch/bin/test_api diff --git a/.jenkins/pytorch/test.sh b/.jenkins/pytorch/test.sh index c09005ba93917..499ef6fef2730 100755 --- a/.jenkins/pytorch/test.sh +++ b/.jenkins/pytorch/test.sh @@ -31,13 +31,14 @@ time python test/run_test.py --verbose # Test ATen if [[ "$BUILD_ENVIRONMENT" != *asan* ]]; then - echo "Testing ATen" + echo "Running ATen tests with pytorch lib" TORCH_LIB_PATH=$(python -c "import site; print(site.getsitepackages()[0])")/torch/lib # NB: the ATen test binaries don't have RPATH set, so it's necessary to # put the dynamic libraries somewhere were the dynamic linker can find them. # This is a bit of a hack. - ln -s "$TORCH_LIB_PATH"/libATen*.so aten/build/src/ATen - aten/tools/run_tests.sh aten/build + ln -s "$TORCH_LIB_PATH"/libcaffe2* build/bin + ls build/bin + aten/tools/run_tests.sh build/bin fi rm -rf ninja diff --git a/.travis.aten.yml b/.travis.aten.yml index 61a7aefa6d3d4..0e9d8022aaec4 100644 --- a/.travis.aten.yml +++ b/.travis.aten.yml @@ -16,7 +16,7 @@ script: - cd aten - mkdir build install - cd build - - cmake .. -DNO_CUDA=1 -DCMAKE_INSTALL_PREFIX=../install + - cmake .. -DUSE_CUDA=OFF -DCMAKE_INSTALL_PREFIX=../install - make install - ../tools/run_tests.sh . - cd .. diff --git a/CMakeLists.txt b/CMakeLists.txt index 072eee596220b..69078663cab3b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -44,10 +44,11 @@ option(USE_ACL "Use ARM Compute Library" OFF) option(USE_ASAN "Use Address Sanitizer" OFF) option(USE_ATEN "Use ATen" OFF) option(USE_CUDA "Use CUDA" ON) +option(USE_ROCM "Use ROCm" OFF) option(CAFFE2_STATIC_LINK_CUDA "Statically link CUDA libraries" OFF) cmake_dependent_option( USE_CUDNN "Use cuDNN" ON - "USE_CUDA" OFF) # New option + "USE_CUDA" OFF) option(USE_FFMPEG "Use ffmpeg" OFF) cmake_dependent_option( USE_GFLAGS "Use GFLAGS" ON @@ -58,7 +59,7 @@ cmake_dependent_option( cmake_dependent_option( USE_GLOO "Use Gloo" ON "BUILD_CAFFE2" OFF) -option(USE_GLOO_IBVERBS "Use Gloo IB verbs for distributed support" OFF) # New option +option(USE_GLOO_IBVERBS "Use Gloo IB verbs for distributed support" OFF) cmake_dependent_option( USE_LEVELDB "Use LEVELDB" ON "BUILD_CAFFE2" OFF) @@ -77,7 +78,7 @@ cmake_dependent_option( "BUILD_CAFFE2" OFF) option(USE_NATIVE_ARCH "Use -march=native" OFF) option(USE_NCCL "Use NCCL" ON) -option(USE_SYSTEM_NCCL "Use system-wide NCCL" OFF) # New option +option(USE_SYSTEM_NCCL "Use system-wide NCCL" OFF) option(USE_NERVANA_GPU "Use Nervana GPU backend" OFF) option(USE_NNAPI "Use NNAPI" OFF) option(USE_NNPACK "Use NNPACK" ON) @@ -104,13 +105,16 @@ cmake_dependent_option( cmake_dependent_option( USE_MKLML "Use MKLML interface in MKL BLAS" ON "BUILD_CAFFE2" OFF) -option(USE_DISTRIBUTED "Use THD (distributed)" OFF) # New option -option(USE_DISTRIBUTED_MW "Use THD (distributed) master worker" OFF) # New option +option(USE_DISTRIBUTED "Use THD (distributed)" OFF) +option(USE_DISTRIBUTED_MW "Use THD (distributed) master worker" OFF) # Legacy options, which we will eventually remove cmake_dependent_option( WITH_CUDA "Legacy CUDA" ON "USE_CUDA" OFF) +cmake_dependent_option( + WITH_ROCM "Legacy ROCm" ON + "USE_ROCM" OFF) cmake_dependent_option( NO_CUDA "Legacy no CUDA" OFF "USE_CUDA" ON) @@ -158,9 +162,18 @@ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) enable_testing() +# ---[ Build variables set within the cmake tree +include(cmake/BuildVariables.cmake) +set(CAFFE2_WHITELIST "" CACHE STRING "A whitelist file of files that one should build.") + +# Set default build type +if(NOT CMAKE_BUILD_TYPE) + message(STATUS "Build type not set - defaulting to Release") + set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Choose the type of build from: Debug Release RelWithDebInfo MinSizeRel Coverage." FORCE) +endif() + # ---[ Misc checks to cope with various compiler modes include(cmake/MiscCheck.cmake) -include(cmake/BuildVariables.cmake) # External projects include(ExternalProject) @@ -170,14 +183,6 @@ include(ExternalProject) include(cmake/Utils.cmake) include(cmake/public/utils.cmake) -set(CAFFE2_WHITELIST "" CACHE STRING "A whitelist file of files that one should build.") - -# Set default build type -if(NOT CMAKE_BUILD_TYPE) - message(STATUS "Build type not set - defaulting to Release") - set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Choose the type of build from: Debug Release RelWithDebInfo MinSizeRel Coverage." FORCE) -endif() - # ---[ Dependencies include(cmake/Dependencies.cmake) @@ -249,7 +254,9 @@ include_directories(BEFORE ${PROJECT_SOURCE_DIR}) include_directories(BEFORE ${PROJECT_BINARY_DIR}) # ---[ Old caffe protobuf -add_subdirectory(caffe/proto) +if(BUILD_CAFFE2) + add_subdirectory(caffe/proto) +endif() # ---[ Main build add_subdirectory(caffe2) @@ -313,7 +320,7 @@ if ((NOT USE_GLOG) OR (NOT USE_GFLAGS) OR BUILD_CUSTOM_PROTOBUF) "generate files that are not well tested.") endif() -if (USE_CUDA) +if (USE_CUDA OR USE_ROCM) # TODO: check if we should include other cuda dependency libraries # to the interface as well. diff --git a/aten/CMakeLists.txt b/aten/CMakeLists.txt index fc6811ddf6a9e..35554efb47f2e 100644 --- a/aten/CMakeLists.txt +++ b/aten/CMakeLists.txt @@ -1,16 +1,19 @@ if (CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO) - if (NOT USE_ATEN) + if (NOT BUILD_ATEN) return() endif() else() cmake_minimum_required(VERSION 3.0 FATAL_ERROR) include(CMakeDependentOption) option(USE_CUDA "Use CUDA" ON) + option(USE_ROCM "Use ROCm" OFF) option(USE_CUDNN "Use cuDNN" ON) option(USE_MKLDNN "Use MKLDNN" ON) cmake_dependent_option( USE_CUDNN "Use cuDNN" ON "USE_CUDA" OFF) + option(ATEN_NO_TEST "Do not build ATen test binaries" OFF) + option(ATEN_NO_CONTRIB "Do not build ATen contrib" OFF) # Legacy options, which we will eventually remove cmake_dependent_option( @@ -22,10 +25,19 @@ else() cmake_dependent_option( NO_MKLDNN "Legacy no MKLDNN" OFF "USE_MKLDNN" ON) + cmake_dependent_option( + WITH_ROCM "Legacy ROCm" ON + "USE_ROCM" OFF) + + # Flag for shared dependencies + set(BUILD_ATEN ON) endif() if (NOT USE_CUDA) set(NO_CUDA ON) endif() +if (WITH_ROCM) + set(USE_ROCM ON) +endif() # Create the project in all cases project(ATen CXX C) @@ -40,444 +52,57 @@ list(APPEND CMAKE_LIBRARY_PATH /usr/lib/x86_64-linux-gnu/) cmake_policy(SET CMP0012 NEW) -# For caffe2_interface_library -include(utils) - # Polyfill for upstream FindCUDA include(CMakeInitializeConfigs) -# RPATH stuff -# see https://cmake.org/Wiki/CMake_RPATH_handling -if(APPLE) - set(CMAKE_MACOSX_RPATH ON) -endif() -set(CMAKE_SKIP_BUILD_RPATH FALSE) -set(CMAKE_BUILD_WITH_INSTALL_RPATH FALSE) -set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib") -set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE) -set(CMAKE_POSITION_INDEPENDENT_CODE TRUE) -list(FIND CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES "${CMAKE_INSTALL_PREFIX}/lib" isSystemDir) -if("${isSystemDir}" STREQUAL "-1") - set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib") -endif() - -if(NOT MSVC) - set(CMAKE_CXX_FLAGS "--std=c++11 ${CMAKE_CXX_FLAGS}") -endif() - -INCLUDE(CheckCXXSourceCompiles) - -# disable some verbose warnings -IF (MSVC) - set(CMAKE_CXX_FLAGS "/wd4267 /wd4251 /wd4522 /wd4522 /wd4838 /wd4305 /wd4244 /wd4190 /wd4101 /wd4996 /wd4275 ${CMAKE_CXX_FLAGS}") -ENDIF(MSVC) - -# windef.h will define max/min macros if NOMINMAX is not defined -IF(MSVC) - add_definitions(/DNOMINMAX) -ENDIF(MSVC) - -#Check if certain std functions are supported. Sometimes -#_GLIBCXX_USE_C99 macro is not defined and some functions are missing. -CHECK_CXX_SOURCE_COMPILES(" -#include -#include - -int main() { - int a = std::isinf(3.0); - int b = std::isnan(0.0); - std::string s = std::to_string(1); - - return 0; - }" SUPPORT_GLIBCXX_USE_C99) - -if(NOT SUPPORT_GLIBCXX_USE_C99) - message(FATAL_ERROR - "The C++ compiler does not support required functions. " - "This is very likely due to a known bug in GCC 5 " - "(and maybe other versions) on Ubuntu 17.10 and newer. " - "For more information, see: " - "https://github.com/pytorch/pytorch/issues/5229" - ) -endif() - -# Top-level build config -############################################ -# Flags -# When using MSVC - -# Detect CUDA architecture and get best NVCC flags -# finding cuda must be first because other things depend on the result -# -# NB: We MUST NOT run this find_package if NO_CUDA is set, because upstream -# FindCUDA has a bug where it will still attempt to make use of NOTFOUND -# compiler variables to run various probe tests. We could try to fix -# this, but since FindCUDA upstream is subsumed by first-class support -# for CUDA language, it seemed not worth fixing. -IF(NOT CUDA_FOUND AND NOT NO_CUDA) - FIND_PACKAGE(CUDA 5.5) -ENDIF() - -# Find the HIP package, set the HIP paths, load the HIP CMake. -IF(WITH_ROCM) - include(LoadHIP) -ENDIF() - -IF(MSVC) - # we want to respect the standard, and we are bored of those **** . - ADD_DEFINITIONS(-D_CRT_SECURE_NO_DEPRECATE=1) - LIST(APPEND CUDA_NVCC_FLAGS "-Xcompiler /wd4819 -Xcompiler /wd4503 -Xcompiler /wd4190 -Xcompiler /wd4244 -Xcompiler /wd4251 -Xcompiler /wd4275 -Xcompiler /wd4522") -ENDIF(MSVC) - -IF (NOT MSVC) - IF (CMAKE_VERSION VERSION_LESS "3.1") - SET(CMAKE_C_FLAGS "-std=c11 ${CMAKE_C_FLAGS}") - ELSE () - SET(CMAKE_C_STANDARD 11) - ENDIF () -ENDIF(NOT MSVC) - -if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") - if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER "4.9") - if(CUDA_VERSION VERSION_LESS "8.0") - MESSAGE(STATUS "Found gcc >=5 and CUDA <= 7.5, adding workaround C++ flags") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_FORCE_INLINES -D_MWAITXINTRIN_H_INCLUDED -D__STRICT_ANSI__") - endif(CUDA_VERSION VERSION_LESS "8.0") - endif(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER "4.9") -endif(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") - -LIST(APPEND CUDA_NVCC_FLAGS -Wno-deprecated-gpu-targets) -LIST(APPEND CUDA_NVCC_FLAGS --expt-extended-lambda) - -if(NOT CMAKE_CXX_COMPILER_ID STREQUAL "GNU") - SET(CMAKE_CXX_STANDARD 11) -endif() - -IF(NOT COMMAND CUDA_SELECT_NVCC_ARCH_FLAGS) - INCLUDE(${CMAKE_CURRENT_SOURCE_DIR}/../cmake/Modules_CUDA_fix/FindCUDA/select_compute_arch.cmake) -ENDIF() -LIST(APPEND CUDA_NVCC_FLAGS $ENV{TORCH_NVCC_FLAGS}) -CUDA_SELECT_NVCC_ARCH_FLAGS(NVCC_FLAGS_EXTRA $ENV{TORCH_CUDA_ARCH_LIST}) -LIST(APPEND CUDA_NVCC_FLAGS ${NVCC_FLAGS_EXTRA}) -IF(CMAKE_POSITION_INDEPENDENT_CODE AND NOT MSVC) - LIST(APPEND CUDA_NVCC_FLAGS "-Xcompiler -fPIC") -ENDIF() - -IF(CUDA_HAS_FP16 OR NOT ${CUDA_VERSION} LESS 7.5) - MESSAGE(STATUS "Found CUDA with FP16 support, compiling with torch.CudaHalfTensor") - LIST(APPEND CUDA_NVCC_FLAGS "-DCUDA_HAS_FP16=1 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF2_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__") - add_compile_options(-DCUDA_HAS_FP16=1) -ELSE(CUDA_HAS_FP16 OR NOT ${CUDA_VERSION} LESS 7.5) - MESSAGE(STATUS "Could not find CUDA with FP16 support, compiling without torch.CudaHalfTensor") -ENDIF(CUDA_HAS_FP16 OR NOT ${CUDA_VERSION} LESS 7.5) - -OPTION(NDEBUG "disable asserts (WARNING: this may result in silent UB e.g. with out-of-bound indices)") -IF(NOT NDEBUG) - MESSAGE(STATUS "Removing -DNDEBUG from compile flags") - STRING(REGEX REPLACE "[-/]DNDEBUG" "" CMAKE_C_FLAGS "" ${CMAKE_C_FLAGS}) - STRING(REGEX REPLACE "[-/]DNDEBUG" "" CMAKE_C_FLAGS_DEBUG "" ${CMAKE_C_FLAGS_DEBUG}) - STRING(REGEX REPLACE "[-/]DNDEBUG" "" CMAKE_C_FLAGS_RELEASE "" ${CMAKE_C_FLAGS_RELEASE}) - STRING(REGEX REPLACE "[-/]DNDEBUG" "" CMAKE_CXX_FLAGS "" ${CMAKE_CXX_FLAGS}) - STRING(REGEX REPLACE "[-/]DNDEBUG" "" CMAKE_CXX_FLAGS_DEBUG "" ${CMAKE_CXX_FLAGS_DEBUG}) - STRING(REGEX REPLACE "[-/]DNDEBUG" "" CMAKE_CXX_FLAGS_RELEASE "" ${CMAKE_CXX_FLAGS_RELEASE}) -ENDIF() - -# OpenMP support? -SET(WITH_OPENMP ON CACHE BOOL "OpenMP support if available?") -IF (APPLE AND CMAKE_COMPILER_IS_GNUCC) - EXEC_PROGRAM (uname ARGS -v OUTPUT_VARIABLE DARWIN_VERSION) - STRING (REGEX MATCH "[0-9]+" DARWIN_VERSION ${DARWIN_VERSION}) - MESSAGE (STATUS "MAC OS Darwin Version: ${DARWIN_VERSION}") - IF (DARWIN_VERSION GREATER 9) - SET(APPLE_OPENMP_SUCKS 1) - ENDIF (DARWIN_VERSION GREATER 9) - EXECUTE_PROCESS (COMMAND ${CMAKE_C_COMPILER} -dumpversion - OUTPUT_VARIABLE GCC_VERSION) - IF (APPLE_OPENMP_SUCKS AND GCC_VERSION VERSION_LESS 4.6.2) - MESSAGE(STATUS "Warning: Disabling OpenMP (unstable with this version of GCC)") - MESSAGE(STATUS " Install GCC >= 4.6.2 or change your OS to enable OpenMP") - add_compile_options(-Wno-unknown-pragmas) - SET(WITH_OPENMP OFF CACHE BOOL "OpenMP support if available?" FORCE) - ENDIF () -ENDIF () - -IF (WITH_OPENMP AND NOT CHECKED_OPENMP) - FIND_PACKAGE(OpenMP) - SET(CHECKED_OPENMP ON CACHE BOOL "already checked for OpenMP") - - # OPENMP_FOUND is not cached in FindOpenMP.cmake (all other variables are cached) - # see https://github.com/Kitware/CMake/blob/master/Modules/FindOpenMP.cmake - SET(OPENMP_FOUND ${OPENMP_FOUND} CACHE BOOL "OpenMP Support found") -ENDIF (WITH_OPENMP AND NOT CHECKED_OPENMP) - -IF(OPENMP_FOUND) - MESSAGE(STATUS "Compiling with OpenMP support") - SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") - SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") -ENDIF(OPENMP_FOUND) - - -SET(CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE OFF) - -FIND_PACKAGE(MAGMA) -IF(NOT NO_CUDA AND MAGMA_FOUND) - INCLUDE_DIRECTORIES("${MAGMA_INCLUDE_DIR}") - SET(CMAKE_REQUIRED_INCLUDES "${MAGMA_INCLUDE_DIR};${CUDA_INCLUDE_DIRS}") - INCLUDE(CheckPrototypeDefinition) - check_prototype_definition(magma_get_sgeqrf_nb - "magma_int_t magma_get_sgeqrf_nb( magma_int_t m, magma_int_t n );" - "0" - "magma.h" - MAGMA_V2) - IF (MAGMA_V2) - add_definitions(-DMAGMA_V2) - - SET(USE_MAGMA 1) - MESSAGE(STATUS "Compiling with MAGMA V2 support") - MESSAGE(STATUS "MAGMA INCLUDE DIRECTORIES: ${MAGMA_INCLUDE_DIR}") - MESSAGE(STATUS "MAGMA LIBRARIES: ${MAGMA_LIBRARIES}") - ELSE() - MESSAGE(STATUS "PyTorch only supports MAGMA 2, but a different version \ - of MAGMA was detected with includes here: ${MAGMA_INCLUDE_DIR}") - MESSAGE(STATUS "Compiling without MAGMA support") - ENDIF() -ELSE() - MESSAGE(STATUS "MAGMA not found. Compiling without MAGMA support") -ENDIF() - -# ARM specific flags -FIND_PACKAGE(ARM) -IF (ASIMD_FOUND) - MESSAGE(STATUS "asimd/Neon found with compiler flag : -D__NEON__") - add_compile_options(-D__NEON__) -ELSEIF (NEON_FOUND) - MESSAGE(STATUS "Neon found with compiler flag : -mfpu=neon -D__NEON__") - add_compile_options(-mfpu=neon -D__NEON__) -ENDIF (ASIMD_FOUND) -IF (CORTEXA8_FOUND) - MESSAGE(STATUS "Cortex-A8 Found with compiler flag : -mcpu=cortex-a8") - add_compile_options(-mcpu=cortex-a8 -fprefetch-loop-arrays) -ENDIF (CORTEXA8_FOUND) -IF (CORTEXA9_FOUND) - MESSAGE(STATUS "Cortex-A9 Found with compiler flag : -mcpu=cortex-a9") - add_compile_options(-mcpu=cortex-a9) -ENDIF (CORTEXA9_FOUND) - -IF(UNIX) - # prevent Unknown CMake command "check_function_exists". - INCLUDE(CheckFunctionExists) -ENDIF(UNIX) - -INCLUDE (CheckIncludeFile) -INCLUDE (CheckCSourceCompiles) -INCLUDE (CheckCSourceRuns) - -# Check that our programs run. This is different from the native CMake compiler -# check, which just tests if the program compiles and links. This is important -# because with ASAN you might need to help the compiled library find some -# dynamic libraries. -CHECK_C_SOURCE_RUNS(" -int main() { return 0; } -" COMPILER_WORKS) -IF(NOT COMPILER_WORKS) - # Force cmake to retest next time around - unset(COMPILER_WORKS CACHE) - MESSAGE(FATAL_ERROR - "Could not run a simple program built with your compiler. " - "If you are trying to use -fsanitize=address, make sure " - "libasan is properly installed on your system (you can confirm " - "if the problem is this by attempting to build and run a " - "small program.)") -ENDIF() - -CHECK_INCLUDE_FILE(cpuid.h HAVE_CPUID_H) -# Check for a cpuid intrinsic -IF(HAVE_CPUID_H) - CHECK_C_SOURCE_COMPILES("#include - int main() - { - unsigned int eax, ebx, ecx, edx; - return __get_cpuid(0, &eax, &ebx, &ecx, &edx); - }" HAVE_GCC_GET_CPUID) -ENDIF() -IF(HAVE_GCC_GET_CPUID) - add_compile_options(-DHAVE_GCC_GET_CPUID) -ENDIF(HAVE_GCC_GET_CPUID) - -CHECK_C_SOURCE_COMPILES("#include - static inline void cpuid(uint32_t *eax, uint32_t *ebx, - uint32_t *ecx, uint32_t *edx) - { - uint32_t a = *eax, b, c = *ecx, d; - asm volatile ( \"cpuid\" : \"+a\"(a), \"=b\"(b), \"+c\"(c), \"=d\"(d) ); - *eax = a; *ebx = b; *ecx = c; *edx = d; - } - int main() { - uint32_t a,b,c,d; - cpuid(&a, &b, &c, &d); - return 0; - }" NO_GCC_EBX_FPIC_BUG) - -IF(NOT NO_GCC_EBX_FPIC_BUG) - add_compile_options(-DUSE_GCC_GET_CPUID) -ENDIF(NOT NO_GCC_EBX_FPIC_BUG) - -FIND_PACKAGE(SSE) # checks SSE, AVX and AVX2 -IF(C_SSE2_FOUND) - MESSAGE(STATUS "SSE2 Found") - # TODO: Work out correct way to do this. Note that C_SSE2_FLAGS is often - # empty, in which case it expands to " " flag which is bad - SET(CMAKE_C_FLAGS "${C_SSE2_FLAGS} ${CMAKE_C_FLAGS}") - SET(CMAKE_CXX_FLAGS "${C_SSE2_FLAGS} ${CMAKE_CXX_FLAGS}") - add_compile_options(-DUSE_SSE2) -ENDIF(C_SSE2_FOUND) -IF(C_SSE4_1_FOUND AND C_SSE4_2_FOUND) - SET(CMAKE_C_FLAGS "${C_SSE4_1_FLAGS} ${C_SSE4_2_FLAGS} ${CMAKE_C_FLAGS}") - SET(CMAKE_CXX_FLAGS "${C_SSE4_1_FLAGS} ${C_SSE4_2_FLAGS} ${CMAKE_CXX_FLAGS}") - add_compile_options(-DUSE_SSE4_1 -DUSE_SSE4_2) -ENDIF() -IF(C_SSE3_FOUND) - MESSAGE(STATUS "SSE3 Found") - SET(CMAKE_C_FLAGS "${C_SSE3_FLAGS} ${CMAKE_C_FLAGS}") - SET(CMAKE_CXX_FLAGS "${C_SSE3_FLAGS} ${CMAKE_CXX_FLAGS}") - add_compile_options(-DUSE_SSE3) -ENDIF(C_SSE3_FOUND) - -# we don't set -mavx and -mavx2 flags globally, but only for specific files -# however, we want to enable the AVX codepaths, so we still need to -# add USE_AVX and USE_AVX2 macro defines -IF(C_AVX_FOUND) - MESSAGE(STATUS "AVX Found") - add_compile_options(-DUSE_AVX) -ENDIF(C_AVX_FOUND) -IF(C_AVX2_FOUND) - MESSAGE(STATUS "AVX2 Found") - add_compile_options(-DUSE_AVX2) -ENDIF(C_AVX2_FOUND) - -CHECK_C_SOURCE_RUNS(" -#include -// ATOMIC_INT_LOCK_FREE is flaky on some older gcc versions -// so if this define is not usable a preprocessor definition -// we fail this check and fall back to GCC atomics -#if ATOMIC_INT_LOCK_FREE == 2 -#define TH_ATOMIC_IPC_REFCOUNT 1 -#endif -int main() -{ - int a; - int oa; - atomic_store(&a, 1); - atomic_fetch_add(&a, 1); - oa = atomic_load(&a); - if(!atomic_compare_exchange_strong(&a, &oa, 3)) - return -1; - return 0; -} -" HAS_C11_ATOMICS) - -IF(NOT HAS_C11_ATOMICS) - CHECK_C_SOURCE_RUNS(" -#include -int main() -{ - long a; - _InterlockedExchange(&a, 1); - _InterlockedExchangeAdd(&a, 1); - if(_InterlockedCompareExchange(&a, 3, 2) != 2) - return -1; - return 0; -} -" HAS_MSC_ATOMICS) - - CHECK_C_SOURCE_RUNS(" -int main() -{ - int a; - __sync_lock_test_and_set(&a, 1); - __sync_fetch_and_add(&a, 1); - if(!__sync_bool_compare_and_swap(&a, 2, 3)) - return -1; - return 0; -} -" HAS_GCC_ATOMICS) -ENDIF() - -IF(HAS_C11_ATOMICS) - ADD_DEFINITIONS(-DUSE_C11_ATOMICS=1) - MESSAGE(STATUS "Atomics: using C11 intrinsics") -ELSEIF(HAS_MSC_ATOMICS) - ADD_DEFINITIONS(-DUSE_MSC_ATOMICS=1) - MESSAGE(STATUS "Atomics: using MSVC intrinsics") -ELSEIF(HAS_GCC_ATOMICS) - ADD_DEFINITIONS(-DUSE_GCC_ATOMICS=1) - MESSAGE(STATUS "Atomics: using GCC intrinsics") -ELSE() - SET(CMAKE_THREAD_PREFER_PTHREAD TRUE) - FIND_PACKAGE(Threads) - IF(THREADS_FOUND) - ADD_DEFINITIONS(-DUSE_PTHREAD_ATOMICS=1) - TARGET_LINK_LIBRARIES(TH ${CMAKE_THREAD_LIBS_INIT}) - MESSAGE(STATUS "Atomics: using pthread") - ENDIF() -ENDIF() - -IF (WIN32 AND NOT CYGWIN) - SET(BLAS_INSTALL_LIBRARIES "OFF" - CACHE BOOL "Copy the required BLAS DLLs into the TH install dirs") -ENDIF (WIN32 AND NOT CYGWIN) - -MACRO(Install_Required_Library ln) - get_filename_component(libpath ${ln} PATH) - get_filename_component(libname ${ln} NAME_WE) - file(GLOB libdlls "${libpath}/${libname}*.dll") - install(PROGRAMS ${libdlls} - DESTINATION "${TH_INSTALL_BIN_SUBDIR}") -ENDMACRO(Install_Required_Library libname) - -FIND_PACKAGE(BLAS) -SET(AT_MKL_ENABLED 0) -SET(AT_MKL_MT 0) -IF(BLAS_FOUND) - SET(USE_BLAS 1) - IF(BLAS_INFO STREQUAL "mkl") - ADD_DEFINITIONS(-DTH_BLAS_MKL) - IF(NOT BLAS_INCLUDE_DIR) - MESSAGE(FATAL_ERROR "MKL is used, but MKL header files are not found. \ - You can get them by `conda install mkl-include` if using conda (if \ - it is missing, run `conda upgrade -n root conda` first), and \ - `pip install mkl-devel` if using pip. If build fails with header files \ - available in the system, please make sure that CMake will search the \ - directory containing them, e.g., by setting CMAKE_INCLUDE_PATH.") - ENDIF() - IF(MSVC AND MKL_LIBRARIES MATCHES ".*libiomp5md\\.lib.*") - ADD_DEFINITIONS(-D_OPENMP_NOFORCE_MANIFEST) - SET(AT_MKL_MT 1) - ENDIF() - INCLUDE_DIRECTORIES(${BLAS_INCLUDE_DIR}) # include MKL headers - SET(AT_MKL_ENABLED 1) - ENDIF() -ENDIF(BLAS_FOUND) - -FIND_PACKAGE(LAPACK) -IF(LAPACK_FOUND) - SET(USE_LAPACK 1) -ENDIF(LAPACK_FOUND) - ############################################# set(ATen_CPU_SRCS) +set(ATen_CPU_TEST_SRCS) set(ATen_CPU_INCLUDE) set(ATen_CUDA_SRCS) +set(ATen_CUDA_TEST_SRCS) set(ATen_CUDA_INCLUDE) +set(ATen_CPU_DEPENDENCY_LIBS) +set(ATen_CUDA_DEPENDENCY_LIBS) +set(ATen_PUBLIC_CUDA_DEPENDENCY_LIBS) SET(ATEN_INSTALL_BIN_SUBDIR "bin" CACHE PATH "ATen install binary subdirectory") SET(ATEN_INSTALL_LIB_SUBDIR "lib" CACHE PATH "ATen install library subdirectory") SET(ATEN_INSTALL_INCLUDE_SUBDIR "include" CACHE PATH "ATen install include subdirectory") -add_definitions(-DTH_INDEX_BASE=0) +if (NOT CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO) + # ---[ Build variables set within the cmake tree + include(../cmake/BuildVariables.cmake) + set(CAFFE2_WHITELIST "" CACHE STRING "A whitelist file of files that one should build.") + + # ---[ Misc checks to cope with various compiler modes + include(../cmake/MiscCheck.cmake) + + # External projects + include(ExternalProject) + + # ---[ Utils + # TODO: merge the following 3 files into cmake/public/utils.cmake. + include(../cmake/Utils.cmake) + include(../cmake/public/utils.cmake) + + # ---[ Dependencies + include(../cmake/Dependencies.cmake) + list(APPEND ATen_CPU_INCLUDE ${Caffe2_CPU_INCLUDE}) + list(APPEND ATen_CUDA_INCLUDE ${Caffe2_GPU_INCLUDE}) + list(APPEND ATen_CPU_DEPENDENCY_LIBS ${Caffe2_DEPENDENCY_LIBS}) + list(APPEND ATen_CUDA_DEPENDENCY_LIBS ${Caffe2_CUDA_DEPENDENCY_LIBS}) + list(APPEND ATen_PUBLIC_CUDA_DEPENDENCY_LIBS + ${Caffe2_PUBLIC_CUDA_DEPENDENCY_LIBS}) +endif() + +if(NOT NO_CUDA) + list(APPEND ATen_CUDA_INCLUDE ${CUDA_INCLUDE_DIRS}) +endif() + set(TH_LINK_STYLE STATIC) add_subdirectory(src/TH) -include_directories( +set(TH_CPU_INCLUDE # dense ${CMAKE_CURRENT_SOURCE_DIR}/src/TH ${CMAKE_CURRENT_SOURCE_DIR}/src/THC @@ -490,10 +115,23 @@ include_directories( ${CMAKE_CURRENT_BINARY_DIR}/src/THCS ${CMAKE_CURRENT_SOURCE_DIR}/src - ${CMAKE_CURRENT_BINARY_DIR}/src) + ${CMAKE_CURRENT_BINARY_DIR}/src + ${CMAKE_BINARY_DIR}/aten/src) +list(APPEND ATen_CPU_INCLUDE ${TH_CPU_INCLUDE}) add_subdirectory(src/THNN) add_subdirectory(src/THS) +# Find the HIP package, set the HIP paths, load the HIP CMake. +IF(WITH_ROCM) + include(LoadHIP) +ENDIF() + +IF(MSVC) + # we want to respect the standard, and we are bored of those **** . + ADD_DEFINITIONS(-D_CRT_SECURE_NO_DEPRECATE=1) + LIST(APPEND CUDA_NVCC_FLAGS "-Xcompiler /wd4819 -Xcompiler /wd4503 -Xcompiler /wd4190 -Xcompiler /wd4244 -Xcompiler /wd4251 -Xcompiler /wd4275 -Xcompiler /wd4522") +ENDIF(MSVC) + if(WITH_ROCM) SET(AT_CUDA_ENABLED 1) add_subdirectory(src/THC) @@ -502,8 +140,6 @@ if(WITH_ROCM) message("ROCm is enabled.") elseif(NOT NO_CUDA) SET(AT_CUDA_ENABLED 1) - INCLUDE_DIRECTORIES(${CUDA_INCLUDE_DIRS}) - find_package(CUDA 5.5 REQUIRED) add_subdirectory(src/THC) add_subdirectory(src/THCUNN) add_subdirectory(src/THCS) @@ -512,57 +148,15 @@ else() SET(AT_CUDA_ENABLED 0) endif() -IF (NOT NO_CUDA) - find_package(CuDNN) -ENDIF() -IF(NOT AT_CUDA_ENABLED OR NOT CUDNN_FOUND) - MESSAGE(STATUS "CuDNN not found. Compiling without CuDNN support") - set(AT_CUDNN_ENABLED 0) -ELSE() - INCLUDE_DIRECTORIES(BEFORE ${CUDNN_INCLUDE_DIRS}) - set(AT_CUDNN_ENABLED 1) -ENDIF() - -if(NO_MKLDNN) - message("disabling MKLDNN because NO_MKLDNN is set") - set(AT_MKLDNN_ENABLED 0) -else() - find_package(MKLDNN) - if(NOT MKLDNN_FOUND) - message(STATUS "MKLDNN not found. Compiling without MKLDNN support") - set(AT_MKLDNN_ENABLED 0) - else() - INCLUDE_DIRECTORIES(${MKLDNN_INCLUDE_DIRS}) - set(AT_MKLDNN_ENABLED 1) - endif() -endif() - -set(cwrap_files - ${CMAKE_CURRENT_SOURCE_DIR}/src/ATen/Declarations.cwrap - ${CMAKE_CURRENT_SOURCE_DIR}/src/THNN/generic/THNN.h - ${CMAKE_CURRENT_SOURCE_DIR}/src/THCUNN/generic/THCUNN.h - ${CMAKE_CURRENT_SOURCE_DIR}/src/ATen/nn.yaml - ${CMAKE_CURRENT_SOURCE_DIR}/src/ATen/native/native_functions.yaml -) - -include_directories( -${CMAKE_CURRENT_SOURCE_DIR}/src/THNN -${CMAKE_CURRENT_SOURCE_DIR}/src/THCUNN) +list(APPEND ATen_CPU_INCLUDE + ${CMAKE_CURRENT_SOURCE_DIR}/src/THNN + ${CMAKE_CURRENT_SOURCE_DIR}/src/THCUNN) +list(APPEND ATen_CPU_INCLUDE + ${CMAKE_CURRENT_SOURCE_DIR}/src + ${CMAKE_CURRENT_SOURCE_DIR}/../third_party/catch/single_include + ${CMAKE_CURRENT_BINARY_DIR}/src/ATen) add_subdirectory(src/ATen) -include_directories( -${CMAKE_CURRENT_SOURCE_DIR}/src -${CMAKE_CURRENT_SOURCE_DIR}/../third_party/catch/single_include -${CMAKE_CURRENT_BINARY_DIR}/src/ATen) -if(NOT NO_CUDA AND NOT WITH_ROCM) - include_directories(${CUDA_INCLUDE_DIRS}) -endif() - -if(ATEN_NO_TEST) - message("disable test because ATEN_NO_TEST is set") -else() - add_subdirectory(src/ATen/test) -endif() if(ATEN_NO_CONTRIB) message("disable contrib because ATEN_NO_CONTRIB is set") @@ -570,3 +164,15 @@ else() add_subdirectory(contrib/data) add_subdirectory(contrib/meter) endif() + +if (CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO) + # Pass source, includes, and libs to parent + set(ATen_CPU_SRCS ${ATen_CPU_SRCS} PARENT_SCOPE) + set(ATen_CUDA_SRCS ${ATen_CUDA_SRCS} PARENT_SCOPE) + set(ATen_CPU_TEST_SRCS ${ATen_CPU_TEST_SRCS} PARENT_SCOPE) + set(ATen_CUDA_TEST_SRCS ${ATen_CUDA_TEST_SRCS} PARENT_SCOPE) + set(ATen_CPU_INCLUDE ${ATen_CPU_INCLUDE} PARENT_SCOPE) + set(ATen_CUDA_INCLUDE ${ATen_CUDA_INCLUDE} PARENT_SCOPE) + set(ATen_CPU_DEPENDENCY_LIBS ${ATen_CPU_DEPENDENCY_LIBS} PARENT_SCOPE) + set(ATen_CUDA_DEPENDENCY_LIBS ${ATen_CUDA_DEPENDENCY_LIBS} PARENT_SCOPE) +endif() diff --git a/aten/README.md b/aten/README.md index 64b395917fecd..409a667e27852 100644 --- a/aten/README.md +++ b/aten/README.md @@ -48,7 +48,7 @@ sudo pip install pyyaml mkdir build cd build cmake .. -DCMAKE_INSTALL_PREFIX=/where/you/want # specify your dest directory -# cmake .. -DNO_CUDA=true # for CPU only machines +# cmake .. -DUSE_CUDA=OFF # for CPU only machines make install ``` diff --git a/aten/contrib/data/CMakeLists.txt b/aten/contrib/data/CMakeLists.txt index 29fa51d840c68..07bda18fbdef7 100644 --- a/aten/contrib/data/CMakeLists.txt +++ b/aten/contrib/data/CMakeLists.txt @@ -23,8 +23,9 @@ set(src ) add_library(xtdata ${TH_LINK_STYLE} ${src}) -target_link_libraries(xtdata ATen_cpu) +target_link_libraries(xtdata ATen_cpu_library) +target_include_directories(xtdata PRIVATE ${ATen_CPU_INCLUDE}) +target_include_directories(xtdata PRIVATE .) -include_directories(.) # add_executable(test-data test/basic.cc) # target_link_libraries(test-data xtdata) diff --git a/aten/contrib/meter/CMakeLists.txt b/aten/contrib/meter/CMakeLists.txt index 2533ee3859c47..1f23ff50e6450 100644 --- a/aten/contrib/meter/CMakeLists.txt +++ b/aten/contrib/meter/CMakeLists.txt @@ -17,9 +17,11 @@ set(src MSEMeter.cc ) -add_library(xtmeter ${TH_LINK_STYLE} ${src}) -target_link_libraries(xtmeter ATen_cpu) +add_library(xtmeter SHARED ${src}) +target_link_libraries(xtmeter ATen_cpu_library) +target_include_directories(xtmeter PRIVATE ${ATen_CPU_INCLUDE}) add_executable(test-meter test/basic.cc ${BACKWARD_ENABLE}) # add_backward(test-meter) target_link_libraries(test-meter xtmeter) +target_include_directories(test-meter PRIVATE ${ATen_CPU_INCLUDE}) diff --git a/aten/src/ATen/ATenGeneral.h b/aten/src/ATen/ATenGeneral.h index ab212f83041f5..88c58a06b0c97 100644 --- a/aten/src/ATen/ATenGeneral.h +++ b/aten/src/ATen/ATenGeneral.h @@ -1,7 +1,7 @@ #pragma once #ifdef _WIN32 -# ifdef ATen_cpu_EXPORTS +# if defined(ATen_cpu_EXPORTS) || defined(caffe2_EXPORTS) # define AT_API __declspec(dllexport) # else # define AT_API __declspec(dllimport) diff --git a/aten/src/ATen/CMakeLists.txt b/aten/src/ATen/CMakeLists.txt index 54c2d96e3e193..af6cd71707f86 100644 --- a/aten/src/ATen/CMakeLists.txt +++ b/aten/src/ATen/CMakeLists.txt @@ -1,37 +1,10 @@ -CMAKE_MINIMUM_REQUIRED(VERSION 2.8) +cmake_minimum_required(VERSION 3.0 FATAL_ERROR) SET(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake ${CMAKE_MODULE_PATH}) -IF(WITH_ROCM) - INCLUDE_DIRECTORIES(${HIP_PATH}/include) - INCLUDE_DIRECTORIES(${HIPBLAS_PATH}/include) - INCLUDE_DIRECTORIES(${HIPSPARSE_PATH}/include) - INCLUDE_DIRECTORIES(${HIPRNG_PATH}/include) - INCLUDE_DIRECTORIES(${THRUST_PATH}) - - # load HIP cmake module and load platform id - EXECUTE_PROCESS(COMMAND ${HIP_PATH}/bin/hipconfig -P OUTPUT_VARIABLE PLATFORM) - EXECUTE_PROCESS(COMMAND ${HIP_PATH}/bin/hipconfig --cpp_config OUTPUT_VARIABLE HIP_CXX_FLAGS) - - # Link with HIPCC https://github.com/ROCm-Developer-Tools/HIP/blob/master/docs/markdown/hip_porting_guide.md#linking-with-hipcc - SET(CMAKE_CXX_LINK_EXECUTABLE ${HIP_HIPCC_EXECUTABLE}) - - # Show message that we're using ROCm. - MESSAGE(STATUS "ROCM TRUE:") - MESSAGE(STATUS "CMAKE_CXX_COMPILER: " ${CMAKE_CXX_COMPILER}) -ENDIF(WITH_ROCM) - -# avoid some cmake warnings -IF(POLICY CMP0026) - CMAKE_POLICY(SET CMP0026 OLD) -ENDIF() - -IF(MSVC AND NOT "${CMAKE_BUILD_TYPE}" MATCHES "Debug") - SET(MSVC_OPT_FLAG "/Ox /fp:strict ") - SET(VCOMP_LIB "vcomp") -ELSE() - SET(MSVC_OPT_FLAG " ") - SET(VCOMP_LIB "vcompd") -ENDIF() +if (NOT CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO) + # ---[ Generate and install header and cpp files + include(../../../cmake/Codegen.cmake) +endif() IF(NOT MSVC) SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-ignored-qualifiers") @@ -40,102 +13,10 @@ IF(NOT MSVC) SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-absolute-value") ENDIF(NOT MSVC) -######################## -# SET_SOURCE_FILES_PROPERTIES must be in the same CMakeLists.txt file as the target that includes the file -# so we need to set these commands here rather than in src/TH -IF(C_SSE4_1_FOUND AND C_SSE4_2_FOUND) - IF(MSVC) - SET_SOURCE_FILES_PROPERTIES(${CMAKE_CURRENT_SOURCE_DIR}/../TH/generic/simd/convolve5x5_sse.cpp PROPERTIES COMPILE_FLAGS "${MSVC_OPT_FLAG}/fp:fast") - ELSE(MSVC) - SET_SOURCE_FILES_PROPERTIES(${CMAKE_CURRENT_SOURCE_DIR}/../TH/generic/simd/convolve5x5_sse.cpp PROPERTIES COMPILE_FLAGS "-O3 -ffast-math") - ENDIF(MSVC) -ENDIF(C_SSE4_1_FOUND AND C_SSE4_2_FOUND) -IF(C_AVX_FOUND) - IF(MSVC) - SET_SOURCE_FILES_PROPERTIES(${CMAKE_CURRENT_SOURCE_DIR}/../TH/generic/simd/convolve5x5_avx.cpp PROPERTIES COMPILE_FLAGS "${MSVC_OPT_FLAG}/fp:fast ${CXX_AVX_FLAGS}") - SET_SOURCE_FILES_PROPERTIES(${CMAKE_CURRENT_SOURCE_DIR}/../TH/vector/AVX.cpp PROPERTIES COMPILE_FLAGS "${MSVC_OPT_FLAG}/arch:AVX ${CXX_AVX_FLAGS}") - ELSE(MSVC) - SET_SOURCE_FILES_PROPERTIES(${CMAKE_CURRENT_SOURCE_DIR}/../TH/generic/simd/convolve5x5_avx.cpp PROPERTIES COMPILE_FLAGS "-O3 -ffast-math ${CXX_AVX_FLAGS}") - SET_SOURCE_FILES_PROPERTIES(${CMAKE_CURRENT_SOURCE_DIR}/../TH/vector/AVX.cpp PROPERTIES COMPILE_FLAGS "-O3 ${CXX_AVX_FLAGS}") - ENDIF(MSVC) -ENDIF(C_AVX_FOUND) - -IF(C_AVX2_FOUND) - IF(MSVC) - SET_SOURCE_FILES_PROPERTIES(${CMAKE_CURRENT_SOURCE_DIR}/../TH/vector/AVX2.cpp PROPERTIES COMPILE_FLAGS "${MSVC_OPT_FLAG}/arch:AVX2 ${CXX_AVX2_FLAGS}") - ELSE(MSVC) - SET_SOURCE_FILES_PROPERTIES(${CMAKE_CURRENT_SOURCE_DIR}/../TH/vector/AVX2.cpp PROPERTIES COMPILE_FLAGS "-O3 ${CXX_AVX2_FLAGS}") - ENDIF(MSVC) -ENDIF(C_AVX2_FOUND) - -IF(NOT MSVC AND NOT "${CMAKE_C_COMPILER_ID}" MATCHES "Clang") - SET_SOURCE_FILES_PROPERTIES(${CMAKE_CURRENT_SOURCE_DIR}/../TH/THAllocator.cpp PROPERTIES COMPILE_FLAGS "-fno-openmp") -ENDIF() - -FILE(GLOB cpu_kernel_cpp_in RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "native/cpu/*.cpp") - -LIST(APPEND CPU_CAPABILITY_NAMES "DEFAULT") -IF(MSVC) - LIST(APPEND CPU_CAPABILITY_FLAGS "${MSVC_OPT_FLAG}") -ELSE(MSVC) - LIST(APPEND CPU_CAPABILITY_FLAGS "-O3") -ENDIF(MSVC) - -IF(CXX_AVX_FOUND) - SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DHAVE_AVX_CPU_DEFINITION") - LIST(APPEND CPU_CAPABILITY_NAMES "AVX") - IF(MSVC) - LIST(APPEND CPU_CAPABILITY_FLAGS "${MSVC_OPT_FLAG}/arch:AVX") - ELSE(MSVC) - LIST(APPEND CPU_CAPABILITY_FLAGS "-O3 -mavx") - ENDIF(MSVC) -ENDIF(CXX_AVX_FOUND) - -IF(CXX_AVX2_FOUND) - SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DHAVE_AVX2_CPU_DEFINITION") - LIST(APPEND CPU_CAPABILITY_NAMES "AVX2") - IF(MSVC) - LIST(APPEND CPU_CAPABILITY_FLAGS "${MSVC_OPT_FLAG}/arch:AVX2") - ELSE(MSVC) - LIST(APPEND CPU_CAPABILITY_FLAGS "-O3 -mavx2") - ENDIF(MSVC) -ENDIF(CXX_AVX2_FOUND) - -list(LENGTH CPU_CAPABILITY_NAMES NUM_CPU_CAPABILITY_NAMES) -math(EXPR NUM_CPU_CAPABILITY_NAMES "${NUM_CPU_CAPABILITY_NAMES}-1") - -FOREACH(i RANGE ${NUM_CPU_CAPABILITY_NAMES}) - FOREACH(IMPL ${cpu_kernel_cpp_in}) - LIST(GET CPU_CAPABILITY_NAMES ${i} CPU_CAPABILITY) - SET(NEW_IMPL ${CMAKE_CURRENT_BINARY_DIR}/${IMPL}.${CPU_CAPABILITY}.cpp) - CONFIGURE_FILE(${IMPL} ${NEW_IMPL} COPYONLY) - SET(cpu_kernel_cpp ${NEW_IMPL} ${cpu_kernel_cpp}) # Create list of copies - LIST(GET CPU_CAPABILITY_FLAGS ${i} FLAGS) - IF(MSVC) - SET(MACRO_FLAG "/DCPU_CAPABILITY=${CPU_CAPABILITY} /DCPU_CAPABILITY_${CPU_CAPABILITY}") - ELSE(MSVC) - SET(MACRO_FLAG "-DCPU_CAPABILITY=${CPU_CAPABILITY} -DCPU_CAPABILITY_${CPU_CAPABILITY}") - ENDIF(MSVC) - SET_SOURCE_FILES_PROPERTIES(${NEW_IMPL} PROPERTIES COMPILE_FLAGS "${FLAGS} ${MACRO_FLAG}") - ENDFOREACH() -ENDFOREACH() - ################################################################################ # Helper functions ################################################################################ -FUNCTION(EXCLUDE_DIR list_name dir_name) - # A helper that excludes all files that contain dir_name in their file path - SET(local_list ${${list_name}}) - FOREACH(source ${local_list}) - IF(${source} MATCHES ${dir_name}) - MESSAGE(STATUS "Excluding " ${source} " from the build") - LIST(REMOVE_ITEM local_list ${source}) - ENDIF() - ENDFOREACH() - SET(${list_name} ${local_list} PARENT_SCOPE) -ENDFUNCTION() - function(filter_list output input) unset(result) foreach(filename ${${input}}) @@ -157,74 +38,26 @@ IF(NOT AT_INSTALL_BIN_DIR OR NOT AT_INSTALL_LIB_DIR OR NOT AT_INSTALL_INCLUDE_DI SET(AT_INSTALL_SHARE_DIR "share" CACHE PATH "AT install include subdirectory") ENDIF() -# TODO: Maybe put these in the generated files directory CONFIGURE_FILE(Config.h.in "${CMAKE_CURRENT_SOURCE_DIR}/Config.h") CONFIGURE_FILE(cuda/CUDAConfig.h.in "${CMAKE_CURRENT_SOURCE_DIR}/cuda/CUDAConfig.h") # NB: If you edit these globs, you'll have to update setup.py package_data as well -FILE(GLOB base_h RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.h" "detail/*.h") -FILE(GLOB base_cpp RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cpp" "detail/*.cpp") - -FILE(GLOB cuda_h RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "cuda/*.h" "cuda/detail/*.h" "cuda/*.cuh" "cuda/detail/*.cuh") -FILE(GLOB cuda_cpp RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "cuda/*.cpp" "cuda/detail/*.cpp") -FILE(GLOB cuda_cu RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "cuda/*.cu" "cuda/detail/*.cu") -FILE(GLOB cudnn_h RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "cudnn/*.h" "cudnn/*.cuh") -FILE(GLOB cudnn_cpp RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "cudnn/*.cpp") -FILE(GLOB mkl_cpp RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "mkl/*.cpp") -FILE(GLOB mkldnn_cpp RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "mkldnn/*.cpp") - -FILE(GLOB native_cpp RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "native/*.cpp") -FILE(GLOB native_cudnn_cpp RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "native/cudnn/*.cpp") -FILE(GLOB native_cuda_cu RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "native/cuda/*.cu") -FILE(GLOB native_cuda_cpp RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "native/cuda/*.cpp") -FILE(GLOB native_mkl_cpp RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "native/mkl/*.cpp") -FILE(GLOB native_mkldnn_cpp RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "native/mkldnn/*.cpp") - -FILE(GLOB all_python RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.py") - -IF(DEFINED ENV{PYTORCH_PYTHON}) - message(STATUS "Using python found in $ENV{PYTORCH_PYTHON}") - SET(PYCMD "$ENV{PYTORCH_PYTHON}") -ELSE() - SET(PYCMD "python") -ENDIF() - -SET(GEN_COMMAND - ${PYCMD} ${CMAKE_CURRENT_SOURCE_DIR}/gen.py - -s ${CMAKE_CURRENT_SOURCE_DIR} - ${cwrap_files} -) - -EXECUTE_PROCESS( - COMMAND ${GEN_COMMAND} - --output-dependencies ${CMAKE_CURRENT_BINARY_DIR}/generated_cpp.txt - RESULT_VARIABLE RETURN_VALUE -) -if (NOT RETURN_VALUE EQUAL 0) - message(STATUS ${generated_cpp}) - message(FATAL_ERROR "Failed to get generated_cpp list") -endif() -file(READ ${CMAKE_CURRENT_BINARY_DIR}/generated_cpp.txt generated_cpp) -file(READ ${CMAKE_CURRENT_BINARY_DIR}/generated_cpp.txt-cuda cuda_generated_cpp) - -FILE(GLOB_RECURSE all_templates "templates/*") - -FILE(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/ATen) - -ADD_CUSTOM_COMMAND(OUTPUT ${generated_cpp} ${cuda_generated_cpp} -COMMAND ${GEN_COMMAND} -DEPENDS ${all_python} ${all_templates} ${cwrap_files}) - -# Generated headers used from a cuda (.cu) file are -# not tracked correctly in cmake . We make the libATen.so depend explicitly -# on building the generated aten files to workaround. -ADD_CUSTOM_TARGET(aten_files_are_generated - DEPENDS ${generated_cpp} -) - -ADD_CUSTOM_TARGET(cuda_aten_files_are_generated - DEPENDS ${cuda_generated_cpp} -) +FILE(GLOB base_h "*.h" "detail/*.h") +FILE(GLOB base_cpp "*.cpp" "detail/*.cpp") +FILE(GLOB cuda_h "cuda/*.h" "cuda/detail/*.h" "cuda/*.cuh" "cuda/detail/*.cuh") +FILE(GLOB cuda_cpp "cuda/*.cpp" "cuda/detail/*.cpp") +FILE(GLOB cuda_cu "cuda/*.cu" "cuda/detail/*.cu") +FILE(GLOB cudnn_h "cudnn/*.h" "cudnn/*.cuh") +FILE(GLOB cudnn_cpp "cudnn/*.cpp") +FILE(GLOB mkl_cpp "mkl/*.cpp") +FILE(GLOB mkldnn_cpp "mkldnn/*.cpp") + +FILE(GLOB native_cpp "native/*.cpp") +FILE(GLOB native_cudnn_cpp "native/cudnn/*.cpp") +FILE(GLOB native_cuda_cu "native/cuda/*.cu") +FILE(GLOB native_cuda_cpp "native/cuda/*.cpp") +FILE(GLOB native_mkl_cpp "native/mkl/*.cpp") +FILE(GLOB native_mkldnn_cpp "native/mkldnn/*.cpp") set(all_cpu_cpp ${base_cpp} ${native_cpp} ${native_mkl_cpp} ${native_mkldnn_cpp} ${generated_cpp} ${ATen_CPU_SRCS} ${cpu_kernel_cpp}) if(AT_MKL_ENABLED) @@ -234,10 +67,8 @@ if(AT_MKLDNN_ENABLED) set(all_cpu_cpp ${all_cpu_cpp} ${mkldnn_cpp}) endif() -INCLUDE_DIRECTORIES(${ATen_CPU_INCLUDE}) IF(NOT NO_CUDA OR WITH_ROCM) - INCLUDE_DIRECTORIES(${ATen_CUDA_INCLUDE}) - INCLUDE_DIRECTORIES("${CMAKE_CURRENT_SOURCE_DIR}/cuda") + list(APPEND ATen_CUDA_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/cuda) set(ATen_CUDA_SRCS ${ATen_CUDA_SRCS} ${cuda_cu} ${native_cuda_cu}) set(all_cuda_cpp ${native_cudnn_cpp} ${cuda_cpp} ${native_cuda_cpp} ${cuda_generated_cpp} ${ATen_CUDA_SRCS}) IF(CUDNN_FOUND) @@ -248,52 +79,13 @@ endif() filter_list(generated_h generated_cpp "\\.h$") filter_list(cuda_generated_h cuda_generated_cpp "\\.h$") -INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/..) +list(APPEND ATen_CPU_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/..) # so the build can find the generated header files -INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) +list(APPEND ATen_CPU_INCLUDE ${CMAKE_CURRENT_BINARY_DIR}) IF(NOT AT_LINK_STYLE) SET(AT_LINK_STYLE SHARED) ENDIF() -# We have two libraries: libATen_cpu.so and libATen_cuda.so, -# with libATen_cuda.so depending on libATen_cpu.so. The CPU library -# contains CPU code only. libATen_cpu.so is invariant to the setting -# of NO_CUDA (it always builds the same way); libATen_cuda.so is only -# built when NO_CUDA=0 and CUDA is available. - -add_library(ATen_cpu ${AT_LINK_STYLE} ${all_cpu_cpp}) -add_dependencies(ATen_cpu aten_files_are_generated) -if(NOT NO_CUDA OR WITH_ROCM) - TORCH_CUDA_BASED_ADD_LIBRARY(ATen_cuda ${AT_LINK_STYLE} ${all_cuda_cpp}) - target_link_libraries(ATen_cuda ATen_cpu) - add_dependencies(ATen_cuda cuda_aten_files_are_generated) - caffe2_interface_library(ATen_cuda ATen_cuda_library) -endif() - -function(aten_compile_options libname) - target_compile_options(${libname} - PRIVATE - -Wall - -Wextra - -fexceptions - -Wno-missing-field-initializers - -Wno-type-limits - -Wno-unused-parameter - -Wno-unknown-warning-option - -Wno-unknown-pragmas) - - if ($ENV{WERROR}) - target_compile_options(${libname} PRIVATE -Werror) - endif() -endfunction() - -if(NOT MSVC) - aten_compile_options(ATen_cpu) - if(NOT NO_CUDA OR WITH_ROCM) - aten_compile_options(ATen_cuda) - endif() -endif() - set(TBB_ROOT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/tbb") set(TBB_BUILD_STATIC ON CACHE BOOL " " FORCE) set(TBB_BUILD_SHARED OFF CACHE BOOL " " FORCE) @@ -302,33 +94,28 @@ set(TBB_BUILD_TBBMALLOC_PROXY OFF CACHE BOOL " " FORCE) set(TBB_BUILD_TESTS OFF CACHE BOOL " " FORCE) add_subdirectory(cpu/tbb) set_property(TARGET tbb_static tbb_def_files PROPERTY FOLDER "dependencies") -target_include_directories(tbb_static PUBLIC ${TBB_ROOT_DIR}/include) -target_link_libraries(ATen_cpu tbb_static) - -if(NOT ${CMAKE_VERSION} VERSION_LESS "3.1") - set_property(TARGET ATen_cpu PROPERTY CXX_STANDARD 11) - if(NOT NO_CUDA OR WITH_ROCM) - set_property(TARGET ATen_cuda PROPERTY CXX_STANDARD 11) - endif() -endif() +list(APPEND ATen_CPU_INCLUDE ${TBB_ROOT_DIR}/include) +list(APPEND ATen_CPU_DEPENDENCY_LIBS tbb_static) IF(BLAS_FOUND) IF ($ENV{TH_BINARY_BUILD}) MESSAGE(STATUS "TH_BINARY_BUILD detected. Enabling special linkage.") - TARGET_LINK_LIBRARIES(ATen_cpu "${BLAS_LIBRARIES};${BLAS_LIBRARIES};${BLAS_LIBRARIES}") + list(APPEND ATen_CPU_DEPENDENCY_LIBS + "${BLAS_LIBRARIES};${BLAS_LIBRARIES};${BLAS_LIBRARIES}") if(NOT NO_CUDA OR WITH_ROCM) - TARGET_LINK_LIBRARIES(ATen_cuda "${BLAS_LIBRARIES};${BLAS_LIBRARIES};${BLAS_LIBRARIES}") + list(APPEND ATen_CUDA_DEPENDENCY_LIBS + "${BLAS_LIBRARIES};${BLAS_LIBRARIES};${BLAS_LIBRARIES}") endif() ELSE ($ENV{TH_BINARY_BUILD}) - TARGET_LINK_LIBRARIES(ATen_cpu ${BLAS_LIBRARIES}) + list(APPEND ATen_CPU_DEPENDENCY_LIBS ${BLAS_LIBRARIES}) if(NOT NO_CUDA OR WITH_ROCM) - TARGET_LINK_LIBRARIES(ATen_cuda "${BLAS_LIBRARIES}") + list(APPEND ATen_CUDA_DEPENDENCY_LIBS "${BLAS_LIBRARIES}") endif() ENDIF ($ENV{TH_BINARY_BUILD}) ENDIF(BLAS_FOUND) IF(LAPACK_FOUND) - TARGET_LINK_LIBRARIES(ATen_cpu ${LAPACK_LIBRARIES}) + list(APPEND ATen_CPU_DEPENDENCY_LIBS ${LAPACK_LIBRARIES}) if(NOT NO_CUDA OR WITH_ROCM) # Although Lapack provides CPU (and thus, one might expect that ATen_cuda # would not need this at all), some of our libraries (magma in particular) @@ -336,7 +123,7 @@ IF(LAPACK_FOUND) # we get the *right* implementation, because even if the symbols are the # same, LAPACK implementions may have different calling conventions. # This caused https://github.com/pytorch/pytorch/issues/7353 - TARGET_LINK_LIBRARIES(ATen_cuda ${LAPACK_LIBRARIES}) + list(APPEND ATen_CUDA_DEPENDENCY_LIBS ${LAPACK_LIBRARIES}) endif() ENDIF(LAPACK_FOUND) @@ -345,7 +132,7 @@ IF (UNIX AND NOT APPLE) # https://github.com/libgit2/libgit2/issues/2128#issuecomment-35649830 CHECK_LIBRARY_EXISTS(rt clock_gettime "time.h" NEED_LIBRT) IF(NEED_LIBRT) - TARGET_LINK_LIBRARIES(ATen_cpu rt) + list(APPEND ATen_CPU_DEPENDENCY_LIBS rt) SET(CMAKE_REQUIRED_LIBRARIES ${CMAKE_REQUIRED_LIBRARIES} rt) ENDIF(NEED_LIBRT) ENDIF(UNIX AND NOT APPLE) @@ -372,79 +159,35 @@ IF(UNIX) ENDIF(HAVE_MALLOC_USABLE_SIZE) ENDIF(UNIX) -IF(NOT MSVC) - TARGET_LINK_LIBRARIES(ATen_cpu m) -ELSE(NOT MSVC) - IF(AT_MKL_MT) - set_target_properties(ATen_cpu PROPERTIES LINK_FLAGS_RELEASE "/NODEFAULTLIB:${VCOMP_LIB}") - set_target_properties(ATen_cpu PROPERTIES LINK_FLAGS_DEBUG "/NODEFAULTLIB:${VCOMP_LIB}") - set_target_properties(ATen_cpu PROPERTIES STATIC_LIBRARY_FLAGS "/NODEFAULTLIB:${VCOMP_LIB}") - if(NOT NO_CUDA OR WITH_ROCM) - set_target_properties(ATen_cuda PROPERTIES LINK_FLAGS_RELEASE "/NODEFAULTLIB:${VCOMP_LIB}") - set_target_properties(ATen_cuda PROPERTIES LINK_FLAGS_DEBUG "/NODEFAULTLIB:${VCOMP_LIB}") - set_target_properties(ATen_cuda PROPERTIES STATIC_LIBRARY_FLAGS "/NODEFAULTLIB:${VCOMP_LIB}") - endif() - ENDIF(AT_MKL_MT) -ENDIF(NOT MSVC) - -# Is __thread supported? -IF(NOT MSVC) - CHECK_C_SOURCE_COMPILES("static __thread int x = 1; int main() { return x; }" C_HAS_THREAD) -ELSE(NOT MSVC) - CHECK_C_SOURCE_COMPILES("static __declspec( thread ) int x = 1; int main() { return x; }" C_HAS_THREAD) -ENDIF(NOT MSVC) -IF(NOT C_HAS_THREAD) - MESSAGE(STATUS "Warning: __thread is not supported, generating thread-unsafe code") -ELSE(NOT C_HAS_THREAD) - add_compile_options(-DTH_HAVE_THREAD) -ENDIF(NOT C_HAS_THREAD) +if(NOT MSVC) + list(APPEND ATen_CPU_DEPENDENCY_LIBS m) +endif() if(MKLDNN_FOUND) - target_link_libraries(ATen_cpu ${MKLDNN_LIBRARIES}) + list(APPEND ATen_CPU_DEPENDENCY_LIBS ${MKLDNN_LIBRARIES}) endif(MKLDNN_FOUND) -# Directory where cpuinfo will download and build all dependencies -set(CONFU_DEPENDENCIES_BINARY_DIR ${PROJECT_BINARY_DIR}/confu-deps - CACHE PATH "Confu-style dependencies binary directory") - -# ---[ Configure cpuinfo -if (NOT TARGET cpuinfo) - if (NOT DEFINED CPUINFO_SOURCE_DIR) - set(CPUINFO_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/cpuinfo" CACHE STRING "cpuinfo source directory") - endif() +list(APPEND ATen_CPU_DEPENDENCY_LIBS cpuinfo) - set(CPUINFO_BUILD_TOOLS OFF CACHE BOOL "") - set(CPUINFO_BUILD_UNIT_TESTS OFF CACHE BOOL "") - set(CPUINFO_BUILD_MOCK_TESTS OFF CACHE BOOL "") - set(CPUINFO_BUILD_BENCHMARKS OFF CACHE BOOL "") - set(CPUINFO_LIBRARY_TYPE "static" CACHE STRING "") - if(MSVC) - # ATen is built with default MSVC runtime linking, which is /MD (DLL) - # so no need to check *_USE_MSVC_STATIC_RUNTIME or some equivalent. - set(CPUINFO_RUNTIME_TYPE "shared" CACHE STRING "") - endif() - add_subdirectory( - "${CPUINFO_SOURCE_DIR}" - "${CONFU_DEPENDENCIES_BINARY_DIR}/cpuinfo") - # We build static version of cpuinfo but link - # them into a shared library for Caffe2, so they need PIC. - set_property(TARGET cpuinfo PROPERTY POSITION_INDEPENDENT_CODE ON) -endif() -TARGET_LINK_LIBRARIES(ATen_cpu cpuinfo) +if(NOT MSVC) + # Preserve values for the main build + set(__aten_sleef_build_shared_libs ${BUILD_SHARED_LIBS}) + set(__aten_sleef_build_tests ${BUILD_TESTS}) -if(MSVC) - set(BUILD_SHARED_LIBS ON CACHE BOOL "Build sleef shared" FORCE) -else() set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build sleef static" FORCE) + set(BUILD_DFT OFF CACHE BOOL "Don't build sleef DFT lib" FORCE) + set(BUILD_GNUABI_LIBS OFF CACHE BOOL "Don't build sleef gnuabi libs" FORCE) + set(BUILD_TESTS OFF CACHE BOOL "Don't build sleef tests" FORCE) + add_subdirectory("${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/sleef" ${CMAKE_BINARY_DIR}/sleef) + set_property(TARGET sleef PROPERTY FOLDER "dependencies") + list(APPEND ATen_CPU_INCLUDE SYSTEM ${CMAKE_BINARY_DIR}/include) + link_directories(${CMAKE_BINARY_DIR}/sleef/lib) + list(APPEND ATen_CPU_DEPENDENCY_LIBS sleef) + + # Set these back. TODO: Use SLEEF_ to pass these instead + set(BUILD_SHARED_LIBS ${__aten_sleef_build_shared_libs} CACHE BOOL "Build shared libs" FORCE) + set(BUILD_TESTS ${__aten_sleef_build_tests} CACHE BOOL "Build tests" FORCE) endif() -set(BUILD_DFT OFF CACHE BOOL "Don't build sleef DFT lib" FORCE) -set(BUILD_GNUABI_LIBS OFF CACHE BOOL "Don't build sleef gnuabi libs" FORCE) -set(BUILD_TESTS OFF CACHE BOOL "Don't build sleef tests" FORCE) -add_subdirectory("${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/sleef" ${CMAKE_BINARY_DIR}/sleef) -set_property(TARGET sleef PROPERTY FOLDER "dependencies") -include_directories(SYSTEM ${CMAKE_BINARY_DIR}/include) -link_directories(${CMAKE_BINARY_DIR}/sleef/lib) -TARGET_LINK_LIBRARIES(ATen_cpu sleef) IF(NOT NO_CUDA) IF ($ENV{ATEN_STATIC_CUDA}) @@ -480,7 +223,7 @@ IF(NOT NO_CUDA) add_dependencies(FAKELINKED_CUFFT FAKELINKED_CUFFT_TARGET) set_target_properties(FAKELINKED_CUFFT PROPERTIES IMPORTED_LOCATION ${CMAKE_CURRENT_BINARY_DIR}/cufft_static_library.a) - TARGET_LINK_LIBRARIES(ATen_cuda + list(APPEND ATen_CUDA_DEPENDENCY_LIBS ${CUDA_LIBRARIES} ${CUDA_TOOLKIT_ROOT_DIR}/lib64/libcusparse_static.a ${CUDA_TOOLKIT_ROOT_DIR}/lib64/libcurand_static.a @@ -489,26 +232,25 @@ IF(NOT NO_CUDA) ${CUDA_TOOLKIT_ROOT_DIR}/lib64/libcufft_static.a ) ELSE() - TARGET_LINK_LIBRARIES(ATen_cuda + list(APPEND ATen_CUDA_DEPENDENCY_LIBS ${CUDA_LIBRARIES} ${CUDA_cusparse_LIBRARY} ${CUDA_curand_LIBRARY}) - CUDA_ADD_CUBLAS_TO_TARGET(ATen_cuda) - CUDA_ADD_CUFFT_TO_TARGET(ATen_cuda) ENDIF() if(CUDNN_FOUND) - target_link_libraries(ATen_cuda ${CUDNN_LIBRARIES}) + list(APPEND ATen_CUDA_DEPENDENCY_LIBS ${CUDNN_LIBRARIES}) endif(CUDNN_FOUND) IF(USE_MAGMA) - TARGET_LINK_LIBRARIES(ATen_cuda ${MAGMA_LIBRARIES}) + list(APPEND ATen_CUDA_DEPENDENCY_LIBS ${MAGMA_LIBRARIES}) IF ($ENV{TH_BINARY_BUILD}) - TARGET_LINK_LIBRARIES(ATen_cuda "${BLAS_LIBRARIES};${BLAS_LIBRARIES};${BLAS_LIBRARIES}") + list(APPEND ATen_CUDA_DEPENDENCY_LIBS + "${BLAS_LIBRARIES};${BLAS_LIBRARIES};${BLAS_LIBRARIES}") ENDIF($ENV{TH_BINARY_BUILD}) ENDIF(USE_MAGMA) IF ($ENV{ATEN_STATIC_CUDA}) - TARGET_LINK_LIBRARIES(ATen_cuda "${CUDA_TOOLKIT_ROOT_DIR}/lib64/libculibos.a") + list(APPEND ATen_CUDA_DEPENDENCY_LIBS "${CUDA_TOOLKIT_ROOT_DIR}/lib64/libculibos.a") ENDIF($ENV{ATEN_STATIC_CUDA}) ENDIF() @@ -517,36 +259,128 @@ IF(WITH_ROCM) FIND_LIBRARY(HIPBLAS_LIBRARY hipblas HINTS ${HIPBLAS_PATH}/lib) FIND_LIBRARY(HIPRNG_LIBRARY hiprng HINTS ${HIPRNG_PATH}/lib) - TARGET_LINK_LIBRARIES(ATen_cuda ${HIPBLAS_LIBRARY} ${HIPRNG_LIBRARY}) + list(APPEND ATen_CUDA_DEPENDENCY_LIBS ${HIPBLAS_LIBRARY} ${HIPRNG_LIBRARY}) ENDIF() -install(TARGETS ATen_cpu - RUNTIME DESTINATION "${AT_INSTALL_BIN_DIR}" - LIBRARY DESTINATION "${AT_INSTALL_LIB_DIR}" - ARCHIVE DESTINATION "${AT_INSTALL_LIB_DIR}") +# Include CPU paths for CUDA as well +list(APPEND ATen_CUDA_INCLUDE ${ATen_CPU_INCLUDE}) -if(NOT NO_CUDA OR WITH_ROCM) - install(TARGETS ATen_cuda - RUNTIME DESTINATION "${AT_INSTALL_BIN_DIR}" - LIBRARY DESTINATION "${AT_INSTALL_LIB_DIR}" - ARCHIVE DESTINATION "${AT_INSTALL_LIB_DIR}") +# We have two libraries: libATen_cpu.so and libATen_cuda.so, +# with libATen_cuda.so depending on libATen_cpu.so. The CPU library +# contains CPU code only. libATen_cpu.so is invariant to the setting +# of NO_CUDA (it always builds the same way); libATen_cuda.so is only +# built when NO_CUDA=0 and CUDA is available. +set(ATen_CPU_SRCS ${all_cpu_cpp}) +if(AT_LINK_STYLE STREQUAL "INTERFACE") + # Source code can't be added to an interface library, so it is + # passed back to be compiled into the containing library + add_library(ATen_cpu INTERFACE) + list(APPEND ATen_CPU_DEPENDENCY_LIBS ATEN_CPU_FILES_GEN_LIB) +else() + add_library(ATen_cpu ${AT_LINK_STYLE} ${ATen_CPU_SRCS}) + target_include_directories(ATen_cpu INTERFACE $) + target_include_directories(ATen_cpu PRIVATE ${ATen_CPU_INCLUDE}) + target_link_libraries(ATen_cpu PUBLIC ${ATen_CPU_DEPENDENCY_LIBS}) + target_link_libraries(ATen_cpu PRIVATE ATEN_CPU_FILES_GEN_LIB) + caffe2_interface_library(ATen_cpu ATen_cpu_library) + # Set standard properties on the target + aten_set_target_props(ATen_cpu) + + # Make sure these don't get built by parent + set(ATen_CPU_SRCS) endif() -get_target_property(ATEN_CPU_OUTPUT_NAME ATen_cpu LOCATION) -get_filename_component(ATEN_CPU_OUTPUT_NAME ${ATEN_CPU_OUTPUT_NAME} NAME) - if(NOT NO_CUDA OR WITH_ROCM) - get_target_property(ATEN_CUDA_OUTPUT_NAME ATen_cuda LOCATION) - get_filename_component(ATEN_CUDA_OUTPUT_NAME ${ATEN_CUDA_OUTPUT_NAME} NAME) + if(AT_LINK_STYLE STREQUAL "INTERFACE") + # Source code can't be added to an interface library, so it is + # passed back to be compiled into the containing library + add_library(ATen_cuda INTERFACE) + list(APPEND ATen_CUDA_DEPENDENCY_LIBS ATEN_CUDA_FILES_GEN_LIB) + set(ATen_CUDA_SRCS ${all_cuda_cpp}) + else() + # A hack to deal with cuda library dependencies and modern CMake: the + # CUDA_ADD_LIBRARY includes a target_link_libraries, and as a result, + # one cannot use PUBLIC/PRIVATE/INTERFACE for the target anymore. This + # hack adds the PRIVATE keywords to CUDA_LIBRARIES so we can deal with + # it. We will then manually add the cudart library as interface libs. + set(__tmp ${CUDA_LIBRARIES}) + set(CUDA_LIBRARIES PRIVATE ${CUDA_LIBRARIES}) + torch_cuda_based_add_library(ATen_cuda ${AT_LINK_STYLE} ${ATen_CUDA_SRCS}) + set(CUDA_LIBRARIES ${__tmp}) + target_link_libraries(ATen_cuda INTERFACE caffe2::cudart) + + target_include_directories( + ATen_cuda INTERFACE $) + target_include_directories( + ATen_cuda PRIVATE ${ATen_CUDA_INCLUDE}) + target_link_libraries( + ATen_cuda PRIVATE ${ATen_CUDA_DEPENDENCY_LIBS} ATEN_CUDA_FILES_GEN_LIB) + + # These public dependencies must go after the previous dependencies, as the + # order of the libraries in the linker call matters here when statically + # linking; libculibos and cublas must be last. + target_link_libraries( + ATen_cuda PUBLIC ATen_cpu ${ATen_PUBLIC_CUDA_DEPENDENCY_LIBS}) + + # Set standard properties on the target + aten_set_target_props(ATen_cuda) + + caffe2_interface_library(ATen_cuda ATen_cuda_library) + + # Make sure these don't get built by parent + set(ATen_CUDA_SRCS) + endif() endif() -set(ATEN_LIBRARIES - "${CMAKE_INSTALL_PREFIX}/${AT_INSTALL_LIB_DIR}/${ATEN_CPU_OUTPUT_NAME}") +if(NOT AT_LINK_STYLE STREQUAL "INTERFACE") + if(NOT NO_CUDA) + if (NOT $ENV{ATEN_STATIC_CUDA}) + cuda_add_cublas_to_target(ATen_cuda) + cuda_add_cufft_to_target(ATen_cuda) + endif() + endif() + + if(NOT MSVC) + aten_compile_options(ATen_cpu) + if(NOT NO_CUDA OR WITH_ROCM) + aten_compile_options(ATen_cuda) + endif() + endif() -if(NOT NO_CUDA) + if(NOT ${CMAKE_VERSION} VERSION_LESS "3.1") + set_property(TARGET ATen_cpu PROPERTY CXX_STANDARD 11) + if(NOT NO_CUDA OR WITH_ROCM) + set_property(TARGET ATen_cuda PROPERTY CXX_STANDARD 11) + endif() + endif() +endif() + +if (NOT CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO) + # Eventually replace this use of LOCATION with use of + # $, but generators only work in some cases + cmake_policy(SET CMP0026 OLD) + get_target_property(ATEN_CPU_OUTPUT_NAME ATen_cpu LOCATION) + get_filename_component(ATEN_CPU_OUTPUT_NAME ${ATEN_CPU_OUTPUT_NAME} NAME) set(ATEN_LIBRARIES - ${ATEN_LIBRARIES} - "${CMAKE_INSTALL_PREFIX}/${AT_INSTALL_LIB_DIR}/${ATEN_CUDA_OUTPUT_NAME}") + "${CMAKE_INSTALL_PREFIX}/${AT_INSTALL_LIB_DIR}/${ATEN_CPU_OUTPUT_NAME}") + if(NOT NO_CUDA OR WITH_ROCM) + get_target_property(ATEN_CUDA_OUTPUT_NAME ATen_cuda LOCATION) + get_filename_component(ATEN_CUDA_OUTPUT_NAME ${ATEN_CUDA_OUTPUT_NAME} NAME) + list(APPEND ATEN_LIBRARIES + "${CMAKE_INSTALL_PREFIX}/${AT_INSTALL_LIB_DIR}/${ATEN_CUDA_OUTPUT_NAME}") + endif() + + install(TARGETS ATen_cpu + RUNTIME DESTINATION "${AT_INSTALL_BIN_DIR}" + LIBRARY DESTINATION "${AT_INSTALL_LIB_DIR}" + ARCHIVE DESTINATION "${AT_INSTALL_LIB_DIR}") + + if(NOT NO_CUDA OR WITH_ROCM) + install(TARGETS ATen_cuda + RUNTIME DESTINATION "${AT_INSTALL_BIN_DIR}" + LIBRARY DESTINATION "${AT_INSTALL_LIB_DIR}" + ARCHIVE DESTINATION "${AT_INSTALL_LIB_DIR}") + endif() endif() SET(ATEN_INCLUDE_DIR "${CMAKE_INSTALL_PREFIX}/${AT_INSTALL_INCLUDE_DIR}") @@ -556,20 +390,64 @@ INSTALL(FILES "${CMAKE_CURRENT_BINARY_DIR}/cmake-exports/ATenConfig.cmake" # https://stackoverflow.com/questions/11096471/how-can-i-install-a-hierarchy-of-files-using-cmake FOREACH(HEADER ${base_h}) - GET_FILENAME_COMPONENT(DIR ${HEADER} DIRECTORY) + string(REPLACE "${CMAKE_CURRENT_SOURCE_DIR}/" "" HEADER_SUB ${HEADER}) + GET_FILENAME_COMPONENT(DIR ${HEADER_SUB} DIRECTORY) INSTALL(FILES ${HEADER} DESTINATION ${AT_INSTALL_INCLUDE_DIR}/ATen/${DIR}) ENDFOREACH() FOREACH(HEADER ${generated_h} ${cuda_generated_h}) # NB: Assumed to be flat - INSTALL(FILES ${CMAKE_CURRENT_BINARY_DIR}/${HEADER} DESTINATION ${AT_INSTALL_INCLUDE_DIR}/ATen) + INSTALL(FILES ${HEADER} DESTINATION ${AT_INSTALL_INCLUDE_DIR}/ATen) ENDFOREACH() FOREACH(HEADER ${cuda_h}) - GET_FILENAME_COMPONENT(DIR ${HEADER} DIRECTORY) - INSTALL(FILES ${HEADER} DESTINATION ${AT_INSTALL_INCLUDE_DIR}/ATen/${DIR}) -ENDFOREACH() -FOREACH(HEADER ${cudnn_h}) - GET_FILENAME_COMPONENT(DIR ${HEADER} DIRECTORY) + string(REPLACE "${CMAKE_CURRENT_SOURCE_DIR}/" "" HEADER_SUB ${HEADER}) + GET_FILENAME_COMPONENT(DIR ${HEADER_SUB} DIRECTORY) INSTALL(FILES ${HEADER} DESTINATION ${AT_INSTALL_INCLUDE_DIR}/ATen/${DIR}) ENDFOREACH() -INSTALL(FILES ${CMAKE_CURRENT_BINARY_DIR}/ATen/Declarations.yaml +INSTALL(FILES ${CMAKE_BINARY_DIR}/aten/src/ATen/Declarations.yaml DESTINATION ${AT_INSTALL_SHARE_DIR}/ATen) + +if(ATEN_NO_TEST) + message("disable test because ATEN_NO_TEST is set") +else() + add_subdirectory(test) +endif() + +if (NOT CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO) + foreach(test_src ${ATen_CPU_TEST_SRCS}) + get_filename_component(test_name ${test_src} NAME_WE) + add_executable(${test_name} "${test_src}") + target_include_directories( + ${test_name} PRIVATE $) + target_include_directories(${test_name} PRIVATE ${ATen_CPU_INCLUDE}) + target_link_libraries(${test_name} ATen_cpu) + add_test(NAME ${test_name} COMMAND $) + install(TARGETS ${test_name} DESTINATION test) + endforeach() + + if(NOT NO_CUDA OR WITH_ROCM) + foreach(test_src ${ATen_CUDA_TEST_SRCS}) + get_filename_component(test_name ${test_src} NAME_WE) + torch_cuda_based_add_executable(${test_name} "${test_src}") + target_include_directories( + ${test_name} PRIVATE $) + target_include_directories(${test_name} PRIVATE ${ATen_CPU_INCLUDE}) + target_link_libraries(${test_name} ATen_cpu ATen_cuda) + add_test(NAME ${test_name} COMMAND $) + install(TARGETS ${test_name} DESTINATION test) + endforeach() + endif() + + # Make sure these don't get built by parent + set(ATen_CPU_TEST_SRCS) + set(ATen_CUDA_TEST_SRCS) +endif() + +# Pass source, includes, and libs to parent +set(ATen_CPU_SRCS ${ATen_CPU_SRCS} PARENT_SCOPE) +set(ATen_CUDA_SRCS ${ATen_CUDA_SRCS} PARENT_SCOPE) +set(ATen_CPU_TEST_SRCS ${ATen_CPU_TEST_SRCS} PARENT_SCOPE) +set(ATen_CUDA_TEST_SRCS ${ATen_CUDA_TEST_SRCS} PARENT_SCOPE) +set(ATen_CPU_INCLUDE ${ATen_CPU_INCLUDE} PARENT_SCOPE) +set(ATen_CUDA_INCLUDE ${ATen_CUDA_INCLUDE} PARENT_SCOPE) +set(ATen_CPU_DEPENDENCY_LIBS ${ATen_CPU_DEPENDENCY_LIBS} PARENT_SCOPE) +set(ATen_CUDA_DEPENDENCY_LIBS ${ATen_CUDA_DEPENDENCY_LIBS} PARENT_SCOPE) diff --git a/aten/src/ATen/cpu/vec256/vec256_double.h b/aten/src/ATen/cpu/vec256/vec256_double.h index 5bc11755117ae..cfbed1977fb7f 100644 --- a/aten/src/ATen/cpu/vec256/vec256_double.h +++ b/aten/src/ATen/cpu/vec256/vec256_double.h @@ -2,7 +2,7 @@ #include "intrinsics.h" #include "vec256_base.h" -#ifdef __AVX__ +#if defined(__AVX__) && !defined(_MSC_VER) #include #endif @@ -10,7 +10,7 @@ namespace at { namespace vec256 { namespace { -#ifdef __AVX__ +#if defined(__AVX__) && !defined(_MSC_VER) template <> class Vec256 { public: diff --git a/aten/src/ATen/cpu/vec256/vec256_float.h b/aten/src/ATen/cpu/vec256/vec256_float.h index e8c9c680a73f5..726afcec0480c 100644 --- a/aten/src/ATen/cpu/vec256/vec256_float.h +++ b/aten/src/ATen/cpu/vec256/vec256_float.h @@ -2,7 +2,7 @@ #include "intrinsics.h" #include "vec256_base.h" -#ifdef __AVX__ +#if defined(__AVX__) && !defined(_MSC_VER) #include #endif @@ -10,7 +10,7 @@ namespace at { namespace vec256 { namespace { -#ifdef __AVX__ +#if defined(__AVX__) && !defined(_MSC_VER) template <> class Vec256 { public: diff --git a/aten/src/ATen/cuda/ATenCUDAGeneral.h b/aten/src/ATen/cuda/ATenCUDAGeneral.h index 95b81023c492b..4dade5e3a131d 100644 --- a/aten/src/ATen/cuda/ATenCUDAGeneral.h +++ b/aten/src/ATen/cuda/ATenCUDAGeneral.h @@ -1,7 +1,7 @@ #pragma once #ifdef _WIN32 -# ifdef ATen_cuda_EXPORTS +# if defined(ATen_cuda_EXPORTS) || defined(caffe2_gpu_EXPORTS) # define AT_CUDA_API __declspec(dllexport) # else # define AT_CUDA_API __declspec(dllimport) diff --git a/aten/src/ATen/native/cpu/ReduceOpsKernel.cpp b/aten/src/ATen/native/cpu/ReduceOpsKernel.cpp index 939b28ff4eb0c..2f1bf85956f85 100644 --- a/aten/src/ATen/native/cpu/ReduceOpsKernel.cpp +++ b/aten/src/ATen/native/cpu/ReduceOpsKernel.cpp @@ -127,7 +127,9 @@ struct Reduction { }); if (cols_rounded != cols) { +#if !defined(__APPLE__) #pragma GCC diagnostic ignored "-Wmaybe-uninitialized" +#endif scalar_t buf[WIDTH]; // Initializes the entire (tiny) array to avoid uninitialized warnings diff --git a/aten/src/ATen/test/CMakeLists.txt b/aten/src/ATen/test/CMakeLists.txt index 41f1de88a25d9..f7a40f58d17ef 100644 --- a/aten/src/ATen/test/CMakeLists.txt +++ b/aten/src/ATen/test/CMakeLists.txt @@ -4,61 +4,30 @@ IF (MSVC) ENDIF() ENDIF(MSVC) -ADD_EXECUTABLE(scalar_test scalar_test.cpp) -target_link_libraries(scalar_test ATen_cpu) - -ADD_EXECUTABLE(apply_utils_test apply_utils_test.cpp) -target_link_libraries(apply_utils_test ATen_cpu) - -ADD_EXECUTABLE(basic basic.cpp) -target_link_libraries(basic ATen_cpu) - -add_executable(atest atest.cpp) -target_link_libraries(atest ATen_cpu) - -add_executable(broadcast_test broadcast_test.cpp) -target_link_libraries(broadcast_test ATen_cpu) - -add_executable(wrapdim_test wrapdim_test.cpp) -target_link_libraries(wrapdim_test ATen_cpu) - -add_executable(dlconvertor_test dlconvertor_test.cpp) -target_link_libraries(dlconvertor_test ATen_cpu) - -add_executable(native_test native_test.cpp) -target_link_libraries(native_test ATen_cpu) - -add_executable(scalar_tensor_test scalar_tensor_test.cpp) -target_link_libraries(scalar_tensor_test ATen_cpu) - -add_executable(test_parallel test_parallel.cpp) -target_link_libraries(test_parallel ATen_cpu) - -add_executable(undefined_tensor_test undefined_tensor_test.cpp) -target_link_libraries(undefined_tensor_test ATen_cpu) - -add_executable(verify_api_visibility verify_api_visibility.cpp) -target_link_libraries(verify_api_visibility ATen_cpu) - -add_executable(tbb_init_test tbb_init_test.cpp) -target_link_libraries(tbb_init_test ATen_cpu) - -if(NOT NO_CUDA OR WITH_ROCM) - torch_cuda_based_add_executable(integer_divider_test integer_divider_test.cu) - target_link_libraries(integer_divider_test ATen_cpu ATen_cuda_library) -endif() - -if(NOT NO_CUDA OR WITH_ROCM) - torch_cuda_based_add_executable(cuda_rng_test cuda_rng_test.cpp) - target_link_libraries(cuda_rng_test ATen_cpu ATen_cuda_library) -endif() - -if(NOT NO_CUDA OR WITH_ROCM) - torch_cuda_based_add_executable(apply_test apply_test.cpp) - target_link_libraries(apply_test ATen_cpu ATen_cuda_library) -endif() - +list(APPEND ATen_CPU_TEST_SRCS + ${CMAKE_CURRENT_SOURCE_DIR}/scalar_test.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/apply_utils_test.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/basic.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/atest.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/broadcast_test.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/wrapdim_test.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/dlconvertor_test.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/native_test.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/scalar_tensor_test.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/test_parallel.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/undefined_tensor_test.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/verify_api_visibility.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/tbb_init_test.cpp) + +list(APPEND ATen_CUDA_TEST_SRCS + ${CMAKE_CURRENT_SOURCE_DIR}/integer_divider_test.cu + ${CMAKE_CURRENT_SOURCE_DIR}/cuda_rng_test.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/apply_test.cpp) if (CUDNN_FOUND) - add_executable(cudnn_test cudnn_test.cpp) - target_link_libraries(cudnn_test ATen_cpu ATen_cuda_library) + list(APPEND ATen_CUDA_TEST_SRCS + ${CMAKE_CURRENT_SOURCE_DIR}/cudnn_test.cpp) endif() + +# ---[ Send the lists to the parent scope. +set(ATen_CPU_TEST_SRCS ${ATen_CPU_TEST_SRCS} PARENT_SCOPE) +set(ATen_CUDA_TEST_SRCS ${ATen_CUDA_TEST_SRCS} PARENT_SCOPE) diff --git a/aten/src/TH/CMakeLists.txt b/aten/src/TH/CMakeLists.txt index 6026c58dc4684..b9fe27676f098 100644 --- a/aten/src/TH/CMakeLists.txt +++ b/aten/src/TH/CMakeLists.txt @@ -48,7 +48,13 @@ set(ATen_CPU_SRCS ${ATen_CPU_SRCS} ${ATen_TH_SRCS} PARENT_SCOPE) set(ATen_CPU_INCLUDE ${ATen_CPU_INCLUDE} - "${CMAKE_CURRENT_BINARY_DIR}" + ${CMAKE_CURRENT_BINARY_DIR} + ${CMAKE_CURRENT_SOURCE_DIR} +PARENT_SCOPE) + +set(ATen_CUDA_INCLUDE ${ATen_CUDA_INCLUDE} + ${CMAKE_CURRENT_BINARY_DIR} + ${CMAKE_CURRENT_SOURCE_DIR} PARENT_SCOPE) CONFIGURE_FILE(THGeneral.h.in "${CMAKE_CURRENT_BINARY_DIR}/THGeneral.h") diff --git a/aten/src/TH/THAllocator.cpp b/aten/src/TH/THAllocator.cpp index 0174fbbc0f05a..9de65974caac3 100644 --- a/aten/src/TH/THAllocator.cpp +++ b/aten/src/TH/THAllocator.cpp @@ -552,7 +552,7 @@ static void THRefcountedMapAllocator_free(void* ctx_, void* data) { #ifdef _WIN32 THMapInfo *info = (THMapInfo*)(((char*)data) - TH_ALLOC_ALIGNMENT); if (--info->refcount == 0) { - SetEvent(ctx->event); + SetEvent(ctx->event); } if(UnmapViewOfFile(((char*)data) - TH_ALLOC_ALIGNMENT) == 0) THError("could not unmap the shared memory file"); diff --git a/aten/src/TH/THGeneral.h.in b/aten/src/TH/THGeneral.h.in index c07cad7ced4fb..16b55307f7205 100644 --- a/aten/src/TH/THGeneral.h.in +++ b/aten/src/TH/THGeneral.h.in @@ -29,7 +29,7 @@ #endif #ifdef _WIN32 -# ifdef ATen_cpu_EXPORTS +# if defined(ATen_cpu_EXPORTS) || defined(caffe2_EXPORTS) # define TH_API TH_EXTERNC __declspec(dllexport) # else # define TH_API TH_EXTERNC __declspec(dllimport) diff --git a/aten/src/THC/THCGeneral.h.in b/aten/src/THC/THCGeneral.h.in index f63db70d00750..1b4e115a1fab4 100644 --- a/aten/src/THC/THCGeneral.h.in +++ b/aten/src/THC/THCGeneral.h.in @@ -23,7 +23,7 @@ #endif #ifdef _WIN32 -# ifdef ATen_cuda_EXPORTS +# if defined(ATen_cuda_EXPORTS) || defined(caffe2_gpu_EXPORTS) # define THC_API THC_EXTERNC __declspec(dllexport) # define THC_CLASS __declspec(dllexport) # else diff --git a/aten/src/THCUNN/CMakeLists.txt b/aten/src/THCUNN/CMakeLists.txt index cb4caeb766828..79b11c2db9b64 100644 --- a/aten/src/THCUNN/CMakeLists.txt +++ b/aten/src/THCUNN/CMakeLists.txt @@ -83,9 +83,6 @@ set(ATen_CUDA_INCLUDE ${ATen_CUDA_INCLUDE} "${CMAKE_CURRENT_SOURCE_DIR}" PARENT_SCOPE) -FILE(GLOB_RECURSE thcunn_h RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*.cuh" "*.h") -FOREACH(HEADER ${thcunn_h}) - # https://stackoverflow.com/questions/11096471/how-can-i-install-a-hierarchy-of-files-using-cmake - GET_FILENAME_COMPONENT(DIR ${HEADER} DIRECTORY) - INSTALL(FILES ${HEADER} DESTINATION ${ATEN_INSTALL_INCLUDE_SUBDIR}/THCUNN) -ENDFOREACH() +install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + DESTINATION ${ATEN_INSTALL_INCLUDE_SUBDIR} + FILES_MATCHING PATTERN "*.h" PATTERN "*.cuh") diff --git a/aten/tools/run_tests.sh b/aten/tools/run_tests.sh index eb9c7d6377838..e26d5cf4ab805 100755 --- a/aten/tools/run_tests.sh +++ b/aten/tools/run_tests.sh @@ -2,28 +2,32 @@ set -x set -e +VALGRIND_SUP="${PWD}/`dirname $0`/valgrind.sup" +pushd $1 + VALGRIND=${VALGRIND:=ON} -BUILD_ROOT=$1 -$BUILD_ROOT/src/ATen/test/basic -$BUILD_ROOT/src/ATen/test/atest -$BUILD_ROOT/src/ATen/test/scalar_test -$BUILD_ROOT/src/ATen/test/broadcast_test -$BUILD_ROOT/src/ATen/test/wrapdim_test -$BUILD_ROOT/src/ATen/test/apply_utils_test -$BUILD_ROOT/src/ATen/test/dlconvertor_test -$BUILD_ROOT/src/ATen/test/native_test -$BUILD_ROOT/src/ATen/test/scalar_tensor_test -$BUILD_ROOT/src/ATen/test/undefined_tensor_test -if [[ -x $BUILD_ROOT/src/ATen/test/cudnn_test ]]; then - $BUILD_ROOT/src/ATen/test/cudnn_test +./basic +./atest +./scalar_test +./broadcast_test +./wrapdim_test +./apply_utils_test +./dlconvertor_test +./native_test +./scalar_tensor_test +./undefined_tensor_test +if [[ -x ./cudnn_test ]]; then + ./cudnn_test fi -if [[ -x $BUILD_ROOT/src/ATen/test/cuda_rng_test ]]; then - $BUILD_ROOT/src/ATen/test/cuda_rng_test +if [[ -x ./cuda_rng_test ]]; then + ./cuda_rng_test fi -if [[ -x $BUILD_ROOT/src/ATen/test/apply_test ]]; then - $BUILD_ROOT/src/ATen/test/apply_test +if [[ -x ./apply_test ]]; then + ./apply_test fi if [ "$VALGRIND" == "ON" ] then - valgrind --suppressions=`dirname $0`/valgrind.sup --error-exitcode=1 $BUILD_ROOT/src/ATen/test/basic "[cpu]" + valgrind --suppressions="$VALGRIND_SUP" --error-exitcode=1 ./basic "[cpu]" fi + +popd diff --git a/aten/tools/test_install.sh b/aten/tools/test_install.sh index 3dc22ba21e383..381d1e73b7858 100755 --- a/aten/tools/test_install.sh +++ b/aten/tools/test_install.sh @@ -3,6 +3,7 @@ set -x set -e INSTALL_ROOT=$1 SRC_ROOT=$2 +rm -rf test_build mkdir test_build cd test_build cmake -DCMAKE_PREFIX_PATH=$INSTALL_ROOT $SRC_ROOT/src/ATen/test/test_install diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt index 75d289200b043..6291416e67509 100644 --- a/caffe2/CMakeLists.txt +++ b/caffe2/CMakeLists.txt @@ -1,28 +1,55 @@ -# ---[ Declare source file lists - -# ---[ Add respective subdirectories -# Note: the folders that are being commented out have not been properly -# addressed yet. +# ---[ Generate and install header and cpp files +include(../cmake/Codegen.cmake) -# ---[ Shared build -add_subdirectory(proto) +# ---[ Declare source file lists # ---[ ATen build if(BUILD_ATEN) - # Unfortunately we need to have this built in contrib/aten/ in the - # case of BUILD_CAFFE2, so gate it here. We should combine these. - if(NOT BUILD_CAFFE2) - set(__caffe2_CMAKE_POSITION_INDEPENDENT_CODE ${CMAKE_POSITION_INDEPENDENT_CODE}) - set(CMAKE_POSITION_INDEPENDENT_CODE ON) - set(ATEN_NO_TEST ON) - set(AT_LINK_STYLE STATIC) - add_subdirectory(../aten aten) - set(CMAKE_POSITION_INDEPENDENT_CODE ${__caffe2_CMAKE_POSITION_INDEPENDENT_CODE}) + set(__caffe2_CMAKE_POSITION_INDEPENDENT_CODE ${CMAKE_POSITION_INDEPENDENT_CODE}) + set(CMAKE_POSITION_INDEPENDENT_CODE ON) + set(AT_LINK_STYLE INTERFACE) + # Disable contrib for root-level build + set(ATEN_NO_CONTRIB ON) + add_subdirectory(../aten aten) + set(CMAKE_POSITION_INDEPENDENT_CODE ${__caffe2_CMAKE_POSITION_INDEPENDENT_CODE}) + + if(BUILD_CAFFE2) + # Generate the headers wrapped by our operator + add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/contrib/aten/aten_op.h + COMMAND + ${PYCMD} ${CMAKE_CURRENT_SOURCE_DIR}/contrib/aten/gen_op.py + --aten_root=${CMAKE_CURRENT_SOURCE_DIR}/../aten + --template_dir=${CMAKE_CURRENT_SOURCE_DIR}/contrib/aten + --yaml_dir=${CMAKE_CURRENT_BINARY_DIR}/../aten/src/ATen + --install_dir=${CMAKE_CURRENT_BINARY_DIR}/contrib/aten + DEPENDS + ATen_cpu + ${CMAKE_CURRENT_SOURCE_DIR}/contrib/aten/gen_op.py + ${CMAKE_CURRENT_SOURCE_DIR}/contrib/aten/aten_op_template.h) + + add_custom_target(__aten_op_header_gen + DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/contrib/aten/aten_op.h) + add_library(aten_op_header_gen INTERFACE) + add_dependencies(aten_op_header_gen __aten_op_header_gen) endif() + + # Add source, includes, and libs to lists + list(APPEND Caffe2_CPU_SRCS ${ATen_CPU_SRCS}) + list(APPEND Caffe2_GPU_SRCS ${ATen_CUDA_SRCS}) + # ATen tests use catch instead of gtest so keep separate for now + # list(APPEND Caffe2_CPU_TEST_SRCS ${ATen_CPU_TEST_SRCS}) + # list(APPEND Caffe2_GPU_TEST_SRCS ${ATen_CUDA_TEST_SRCS}) + list(APPEND Caffe2_CPU_INCLUDE ${ATen_CPU_INCLUDE}) + list(APPEND Caffe2_GPU_INCLUDE ${ATen_CUDA_INCLUDE}) + list(APPEND Caffe2_DEPENDENCY_LIBS ${ATen_CPU_DEPENDENCY_LIBS}) + list(APPEND Caffe2_CUDA_DEPENDENCY_LIBS ${ATen_CUDA_DEPENDENCY_LIBS}) endif() # ---[ Caffe2 build if(BUILD_CAFFE2) + # Note: the folders that are being commented out have not been properly + # addressed yet. + add_subdirectory(proto) add_subdirectory(core) add_subdirectory(utils) add_subdirectory(contrib) @@ -71,6 +98,16 @@ if (FALSE) message(STATUS " " ${tmp}) endforeach() + message(STATUS "CPU include: ") + foreach(tmp ${Caffe2_CPU_INCLUDE}) + message(STATUS " " ${tmp}) + endforeach() + + message(STATUS "GPU include: ") + foreach(tmp ${Caffe2_GPU_INCLUDE}) + message(STATUS " " ${tmp}) + endforeach() + message(STATUS "CPU test sources: ") foreach(tmp ${Caffe2_CPU_TEST_SRCS}) message(STATUS " " ${tmp}) @@ -80,109 +117,124 @@ if (FALSE) foreach(tmp ${Caffe2_GPU_TEST_SRCS}) message(STATUS " " ${tmp}) endforeach() -endif() - -# ---[ Generate and install header files. -# Write the macros file. -configure_file( - ${PROJECT_SOURCE_DIR}/caffe2/core/macros.h.in - ${PROJECT_BINARY_DIR}/caffe2/core/macros.h) - -# Installing the header files -install(DIRECTORY ${CMAKE_CURRENT_LIST_DIR} - DESTINATION include - FILES_MATCHING PATTERN "*.h") -install(FILES ${PROJECT_BINARY_DIR}/caffe2/core/macros.h - DESTINATION include/caffe2/core) + message(STATUS "ATen CPU test sources: ") + foreach(tmp ${ATen_CPU_TEST_SRCS}) + message(STATUS " " ${tmp}) + endforeach() + message(STATUS "ATen CUDA test sources: ") + foreach(tmp ${ATen_CUDA_TEST_SRCS}) + message(STATUS " " ${tmp}) + endforeach() +endif() # ---[ List of libraries to link with -add_library(caffe2_protos STATIC $ $) -add_dependencies(caffe2_protos Caffe_PROTO Caffe2_PROTO) -# If we are going to link protobuf locally inside caffe2 libraries, what we will do is -# to create a helper static library that always contains libprotobuf source files, and -# link the caffe2 related dependent libraries to it. -target_include_directories(caffe2_protos INTERFACE $) -# Reason for this public dependency is as follows: -# (1) Strictly speaking, we should not expose any Protobuf related functions. We should -# only use function interfaces wrapped with our own public API, and link protobuf -# locally. -# (2) However, currently across the Caffe2 codebase, we have extensive use of protobuf -# functionalities. For example, not only libcaffe2.so uses it, but also other -# binaries such as python extensions etc. As a result, we will have to have a -# transitive dependency to libprotobuf. -# -# Good thing is that, if we specify CAFFE2_LINK_LOCAL_PROTOBUF, then we do not need to -# separately deploy protobuf binaries - libcaffe2.so will contain all functionalities -# one needs. One can verify this via ldd. -# -# TODO item in the future includes: -# (1) Enable using lite protobuf -# (2) Properly define public API that do not directly depend on protobuf itself. -# (3) Expose the libprotobuf.a file for dependent libraries to link to. -# -# What it means for users/developers? -# (1) Users: nothing affecting the users, other than the fact that CAFFE2_LINK_LOCAL_PROTOBUF -# avoids the need to deploy protobuf. -# (2) Developers: if one simply uses core caffe2 functionality without using protobuf, -# nothing changes. If one has a dependent library that uses protobuf, then one needs to -# have the right protobuf version as well as linking to libprotobuf.a. -target_link_libraries(caffe2_protos PUBLIC protobuf::libprotobuf) +if(BUILD_CAFFE2) + add_library(caffe2_protos STATIC $ $) + add_dependencies(caffe2_protos Caffe_PROTO Caffe2_PROTO) + # If we are going to link protobuf locally inside caffe2 libraries, what we will do is + # to create a helper static library that always contains libprotobuf source files, and + # link the caffe2 related dependent libraries to it. + target_include_directories(caffe2_protos INTERFACE $) + # Reason for this public dependency is as follows: + # (1) Strictly speaking, we should not expose any Protobuf related functions. We should + # only use function interfaces wrapped with our own public API, and link protobuf + # locally. + # (2) However, currently across the Caffe2 codebase, we have extensive use of protobuf + # functionalities. For example, not only libcaffe2.so uses it, but also other + # binaries such as python extensions etc. As a result, we will have to have a + # transitive dependency to libprotobuf. + # + # Good thing is that, if we specify CAFFE2_LINK_LOCAL_PROTOBUF, then we do not need to + # separately deploy protobuf binaries - libcaffe2.so will contain all functionalities + # one needs. One can verify this via ldd. + # + # TODO item in the future includes: + # (1) Enable using lite protobuf + # (2) Properly define public API that do not directly depend on protobuf itself. + # (3) Expose the libprotobuf.a file for dependent libraries to link to. + # + # What it means for users/developers? + # (1) Users: nothing affecting the users, other than the fact that CAFFE2_LINK_LOCAL_PROTOBUF + # avoids the need to deploy protobuf. + # (2) Developers: if one simply uses core caffe2 functionality without using protobuf, + # nothing changes. If one has a dependent library that uses protobuf, then one needs to + # have the right protobuf version as well as linking to libprotobuf.a. + target_link_libraries(caffe2_protos PUBLIC protobuf::libprotobuf) +endif() # Compile exposed libraries. add_library(caffe2 ${Caffe2_CPU_SRCS}) -caffe2_interface_library(caffe2_protos caffe2_protos_whole) -target_link_libraries(caffe2 PRIVATE caffe2_protos_whole) -if (${CAFFE2_LINK_LOCAL_PROTOBUF}) - target_link_libraries(caffe2 INTERFACE protobuf::libprotobuf) -else() - target_link_libraries(caffe2 PUBLIC protobuf::libprotobuf) +if (BUILD_CAFFE2) + caffe2_interface_library(caffe2_protos caffe2_protos_whole) + target_link_libraries(caffe2 PRIVATE caffe2_protos_whole) + if (${CAFFE2_LINK_LOCAL_PROTOBUF}) + target_link_libraries(caffe2 INTERFACE protobuf::libprotobuf) + else() + target_link_libraries(caffe2 PUBLIC protobuf::libprotobuf) + endif() endif() target_link_libraries(caffe2 PUBLIC ${Caffe2_PUBLIC_DEPENDENCY_LIBS}) target_link_libraries(caffe2 PRIVATE ${Caffe2_DEPENDENCY_LIBS}) target_link_libraries(caffe2 PRIVATE ${Caffe2_DEPENDENCY_WHOLE_LINK_LIBS}) target_include_directories(caffe2 INTERFACE $) +target_include_directories(caffe2 PRIVATE ${Caffe2_CPU_INCLUDE}) +# Set standard properties on the target +aten_set_target_props(caffe2) target_compile_options(caffe2 INTERFACE "-std=c++11") target_compile_options(caffe2 PRIVATE "-DCAFFE2_BUILD_MAIN_LIB") # Use -O2 for release builds (-O3 doesn't improve perf, and -Os results in perf regression) target_compile_options(caffe2 PRIVATE "$<$,$>:-O2>") +set_target_properties(caffe2 PROPERTIES VERSION 1 SOVERSION 1) install(TARGETS caffe2 EXPORT Caffe2Targets DESTINATION lib) caffe2_interface_library(caffe2 caffe2_library) list(APPEND Caffe2_MAIN_LIBS caffe2_library) # ---[ CUDA library. -if(BUILD_CAFFE2) - if(USE_CUDA) - # A hack to deal with cuda library dependencies and modern CMake: the - # CUDA_ADD_LIBRARY includes a target_link_libraries, and as a result, - # one cannot use PUBLIC/PRIVATE/INTERFACE for the target anymore. This - # hack adds the PRIVATE keywords to CUDA_LIBRARIES so we can deal with - # it. We will then manually add the cudart library as interface libs. - set(__tmp ${CUDA_LIBRARIES}) - set(CUDA_LIBRARIES PRIVATE ${CUDA_LIBRARIES}) - if(CAFFE2_STATIC_LINK_CUDA) - CUDA_ADD_LIBRARY(caffe2_gpu STATIC ${Caffe2_GPU_SRCS}) - else() - CUDA_ADD_LIBRARY(caffe2_gpu ${Caffe2_GPU_SRCS}) - endif() - set(CUDA_LIBRARIES ${__tmp}) - target_link_libraries(caffe2_gpu INTERFACE caffe2::cudart) +if(USE_CUDA OR USE_ROCM) + # A hack to deal with cuda library dependencies and modern CMake: the + # CUDA_ADD_LIBRARY includes a target_link_libraries, and as a result, + # one cannot use PUBLIC/PRIVATE/INTERFACE for the target anymore. This + # hack adds the PRIVATE keywords to CUDA_LIBRARIES so we can deal with + # it. We will then manually add the cudart library as interface libs. + set(__tmp ${CUDA_LIBRARIES}) + set(CUDA_LIBRARIES PRIVATE ${CUDA_LIBRARIES}) + if(CAFFE2_STATIC_LINK_CUDA) + torch_cuda_based_add_library(caffe2_gpu STATIC ${Caffe2_GPU_SRCS}) + else() + torch_cuda_based_add_library(caffe2_gpu ${Caffe2_GPU_SRCS}) + endif() + set(CUDA_LIBRARIES ${__tmp}) + target_link_libraries(caffe2_gpu INTERFACE caffe2::cudart) - target_include_directories( - caffe2_gpu INTERFACE $) - target_link_libraries( - caffe2_gpu PRIVATE ${Caffe2_CUDA_DEPENDENCY_LIBS}) + target_include_directories( + caffe2_gpu INTERFACE $) + target_include_directories( + caffe2_gpu PRIVATE ${Caffe2_GPU_INCLUDE}) + target_link_libraries( + caffe2_gpu PRIVATE ${Caffe2_CUDA_DEPENDENCY_LIBS}) - # These public dependencies must go after the previous dependencies, as the - # order of the libraries in the linker call matters here when statically - # linking; libculibos and cublas must be last. - target_link_libraries( - caffe2_gpu PUBLIC caffe2 ${Caffe2_PUBLIC_CUDA_DEPENDENCY_LIBS}) + # These public dependencies must go after the previous dependencies, as the + # order of the libraries in the linker call matters here when statically + # linking; libculibos and cublas must be last. + target_link_libraries( + caffe2_gpu PUBLIC caffe2 ${Caffe2_PUBLIC_CUDA_DEPENDENCY_LIBS}) + + # Set standard properties on the target + aten_set_target_props(caffe2_gpu) + set_target_properties(caffe2_gpu PROPERTIES VERSION 1 SOVERSION 1) + + install(TARGETS caffe2_gpu EXPORT Caffe2Targets DESTINATION lib) + caffe2_interface_library(caffe2_gpu caffe2_gpu_library) + list(APPEND Caffe2_MAIN_LIBS caffe2_gpu_library) +endif() - caffe2_interface_library(caffe2_gpu caffe2_gpu_library) - list(APPEND Caffe2_MAIN_LIBS caffe2_gpu_library) - install(TARGETS caffe2_gpu EXPORT Caffe2Targets DESTINATION lib) +# ---[ Check if warnings should be errors. +if ($ENV{WERROR}) + target_compile_options(caffe2 PRIVATE -Werror) + if(USE_CUDA OR USE_ROCM) + target_compile_options(caffe2_gpu PRIVATE -Werror) endif() endif() @@ -190,7 +242,7 @@ endif() if(BUILD_CAFFE2) if (BUILD_TEST) set(Caffe2_ALL_TEST_SRCS ${Caffe2_CPU_TEST_SRCS}) - if (USE_CUDA) + if (USE_CUDA OR USE_ROCM) list(APPEND Caffe2_ALL_TEST_SRCS ${Caffe2_GPU_TEST_SRCS}) endif() @@ -206,6 +258,28 @@ if(BUILD_CAFFE2) endforeach() endif() endif() +if(BUILD_ATEN) + foreach(test_src ${ATen_CPU_TEST_SRCS}) + get_filename_component(test_name ${test_src} NAME_WE) + add_executable(${test_name} "${test_src}") + target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE}) + target_link_libraries(${test_name} ${Caffe2_MAIN_LIBS}) + add_test(NAME ${test_name} COMMAND $) + install(TARGETS ${test_name} DESTINATION test) + endforeach() + + if(USE_CUDA OR USE_ROCM) + foreach(test_src ${ATen_CUDA_TEST_SRCS}) + get_filename_component(test_name ${test_src} NAME_WE) + torch_cuda_based_add_executable(${test_name} "${test_src}") + target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE}) + target_link_libraries(${test_name} ${Caffe2_MAIN_LIBS}) + add_test(NAME ${test_name} COMMAND $) + install(TARGETS ${test_name} DESTINATION test) + endforeach() + endif() + +endif() if(BUILD_CAFFE2) if (BUILD_PYTHON) @@ -253,7 +327,7 @@ if(BUILD_CAFFE2) endif(WIN32) install(TARGETS caffe2_pybind11_state DESTINATION "${PYTHON_LIB_REL_PATH}/caffe2/python") - if(USE_CUDA) + if(USE_CUDA OR USE_ROCM) add_library(caffe2_pybind11_state_gpu MODULE ${Caffe2_GPU_PYTHON_SRCS}) set_target_properties(caffe2_pybind11_state_gpu PROPERTIES COMPILE_FLAGS "-fvisibility=hidden") set_target_properties(caffe2_pybind11_state_gpu PROPERTIES PREFIX "") @@ -283,7 +357,7 @@ if(BUILD_CAFFE2) COMMAND ${CMAKE_COMMAND} -E copy $ ${CMAKE_BINARY_DIR}/caffe2/python) - if (USE_CUDA) + if (USE_CUDA OR USE_ROCM) add_dependencies(windows_python_copy_lib caffe2_pybind11_state_gpu) add_custom_command( TARGET windows_python_copy_lib POST_BUILD diff --git a/caffe2/contrib/CMakeLists.txt b/caffe2/contrib/CMakeLists.txt index 7becad2b0af08..be8c0bd5bec29 100644 --- a/caffe2/contrib/CMakeLists.txt +++ b/caffe2/contrib/CMakeLists.txt @@ -8,14 +8,19 @@ add_subdirectory(script) if (USE_TENSORRT) add_subdirectory(tensorrt) endif() -# Finally pass the src lists back to the parent -# CPU source, test sources, binary sources +# Pass the src lists back to the parent + +# CPU source, include, deps, test sources, binary sources set(Caffe2_CPU_SRCS ${Caffe2_CPU_SRCS} PARENT_SCOPE) +set(Caffe2_CPU_INCLUDE ${Caffe2_CPU_INCLUDE} PARENT_SCOPE) +set(Caffe2_DEPENDENCY_LIBS ${Caffe2_DEPENDENCY_LIBS} PARENT_SCOPE) set(Caffe2_CPU_TEST_SRCS ${Caffe2_CPU_TEST_SRCS} PARENT_SCOPE) set(Caffe2_CPU_BINARY_SRCS ${Caffe2_CPU_BINARY_SRCS} PARENT_SCOPE) -# GPU source, test sources, binary sources +# GPU source, include, deps, test sources, binary sources set(Caffe2_GPU_SRCS ${Caffe2_GPU_SRCS} PARENT_SCOPE) +set(Caffe2_GPU_INCLUDE ${Caffe2_GPU_INCLUDE} PARENT_SCOPE) +set(Caffe2_CUDA_DEPENDENCY_LIBS ${Caffe2_CUDA_DEPENDENCY_LIBS} PARENT_SCOPE) set(Caffe2_GPU_TEST_SRCS ${Caffe2_GPU_TEST_SRCS} PARENT_SCOPE) set(Caffe2_GPU_BINARY_SRCS ${Caffe2_GPU_BINARY_SRCS} PARENT_SCOPE) diff --git a/caffe2/contrib/aten/CMakeLists.txt b/caffe2/contrib/aten/CMakeLists.txt index b51b13671550f..5bc2341e3d288 100644 --- a/caffe2/contrib/aten/CMakeLists.txt +++ b/caffe2/contrib/aten/CMakeLists.txt @@ -1,27 +1,7 @@ if(BUILD_ATEN) - # TODO: Combine this with caffe2/ version of the aten target - # Unfortunately the custom command below requires this to be here - set(__caffe2_CMAKE_POSITION_INDEPENDENT_CODE ${CMAKE_POSITION_INDEPENDENT_CODE}) - set(CMAKE_POSITION_INDEPENDENT_CODE ON) - set(ATEN_NO_TEST ON) - set(AT_LINK_STYLE STATIC) - add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/../../../aten aten) - set(CMAKE_POSITION_INDEPENDENT_CODE ${__caffe2_CMAKE_POSITION_INDEPENDENT_CODE}) - - add_custom_command(OUTPUT aten_op.h - COMMAND - python ${CMAKE_CURRENT_SOURCE_DIR}/gen_op.py - --aten_root=${CMAKE_CURRENT_SOURCE_DIR}/../../../aten - --template_dir=${CMAKE_CURRENT_SOURCE_DIR} - DEPENDS - ATen_cpu - ${CMAKE_CURRENT_SOURCE_DIR}/gen_op.py - ${CMAKE_CURRENT_SOURCE_DIR}/aten_op_template.h) - - add_custom_target(__aten_op_header_gen DEPENDS aten_op.h) - add_library(aten_op_header_gen INTERFACE) - add_dependencies(aten_op_header_gen __aten_op_header_gen) - - set(Caffe2_CPU_SRCS ${Caffe2_CPU_SRCS} ${CMAKE_CURRENT_SOURCE_DIR}/aten_op.cc PARENT_SCOPE) - set(Caffe2_GPU_SRCS ${Caffe2_GPU_SRCS} ${CMAKE_CURRENT_SOURCE_DIR}/aten_op_cuda.cc PARENT_SCOPE) + # Add source generated by Codegen.cmake and pass to parent + list(APPEND Caffe2_CPU_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/aten_op.cc) + list(APPEND Caffe2_GPU_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/aten_op_cuda.cc) + set(Caffe2_CPU_SRCS ${Caffe2_CPU_SRCS} PARENT_SCOPE) + set(Caffe2_GPU_SRCS ${Caffe2_GPU_SRCS} PARENT_SCOPE) endif() diff --git a/cmake/BuildVariables.cmake b/cmake/BuildVariables.cmake index 89edb63b599ee..7897f63cffa40 100644 --- a/cmake/BuildVariables.cmake +++ b/cmake/BuildVariables.cmake @@ -17,6 +17,11 @@ set(Caffe2_GPU_SRCS) set(Caffe2_CPU_TEST_SRCS) set(Caffe2_GPU_TEST_SRCS) +# Caffe2_{CPU,GPU}_INCLUDE is the list that will have all the include +# directories for CPU and GPU respectively. +set(Caffe2_CPU_INCLUDE) +set(Caffe2_GPU_INCLUDE) + # Caffe2_MAIN_LIBS is a list of the libraries that a dependent library should # depend on when it links against Caffe2. set(Caffe2_MAIN_LIBS) @@ -25,13 +30,13 @@ set(Caffe2_MAIN_LIBS) set(Caffe2_DEPENDENCY_LIBS "") set(Caffe2_CUDA_DEPENDENCY_LIBS "") # This variable contains dependency libraries of Caffe2 which requires whole -# symbol linkage. One example is the onnx lib where we need all its schema -# symbols. However, if the lib is whole linked in caffe2 lib, we don't want +# symbol linkage. One example is the onnx lib where we need all its schema +# symbols. However, if the lib is whole linked in caffe2 lib, we don't want # it to be linked in binaries that will link caffe2 lib. Because if caffe2 lib -# is built as dynamic library, it will result in two copied of symbols of +# is built as dynamic library, it will result in two copied of symbols of # Caffe2_DEPENDENCY_WHOLE_LINK_LIBS existing in caffe2.so and the binary, which # will cause issues. Therefore Caffe2_DEPENDENCY_WHOLE_LINK_LIBS will only -# be linked by caffe2 lib. +# be linked by caffe2 lib. set(Caffe2_DEPENDENCY_WHOLE_LINK_LIBS "") # Lists for Caffe2 public dependency libraries. These libraries will be diff --git a/cmake/Caffe2Config.cmake.in b/cmake/Caffe2Config.cmake.in index fde0e1ad5ae8b..3b9bb04afa213 100644 --- a/cmake/Caffe2Config.cmake.in +++ b/cmake/Caffe2Config.cmake.in @@ -74,9 +74,17 @@ if (@USE_CUDA@) include("${CMAKE_CURRENT_LIST_DIR}/public/cuda.cmake") if (NOT CAFFE2_FOUND_CUDA) message(FATAL_ERROR - "Your installed Caffe2 version uses cuda but I cannot find the cuda " - "libraries. Please set the proper cuda prefixes and / or install " - "cuda.") + "Your installed Caffe2 version uses CUDA but I cannot find the CUDA " + "libraries. Please set the proper CUDA prefixes and / or install " + "CUDA.") + endif() + if (@BUILD_CAFFE2@) + if (NOT CAFFE2_FOUND_CUDNN) + message(FATAL_ERROR + "Your installed Caffe2 version uses cuDNN but I cannot find the cuDNN " + "libraries. Please set the proper cuDNN prefixes and / or install " + "cuDNN.") + endif() endif() endif() diff --git a/cmake/Codegen.cmake b/cmake/Codegen.cmake new file mode 100644 index 0000000000000..8f11fb0c97d83 --- /dev/null +++ b/cmake/Codegen.cmake @@ -0,0 +1,158 @@ +if (DEFINED ENV{PYTORCH_PYTHON}) + message(STATUS "Using python found in $ENV{PYTORCH_PYTHON}") + set(PYCMD "$ENV{PYTORCH_PYTHON}") +else() + SET(PYCMD "python") +endif() + +# ---[ Write the macros file +configure_file( + ${CMAKE_CURRENT_LIST_DIR}/../caffe2/core/macros.h.in + ${CMAKE_BINARY_DIR}/caffe2/core/macros.h) + +# ---[ Installing the header files +install(DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/../caffe2 + DESTINATION include + FILES_MATCHING PATTERN "*.h") +install(FILES ${CMAKE_BINARY_DIR}/caffe2/core/macros.h + DESTINATION include/caffe2/core) + +# ---[ ATen specific +if (BUILD_ATEN) + # SET_SOURCE_FILES_PROPERTIES must be in the same CMakeLists.txt file as the target that includes the file + # so we need to set these commands here rather than in src/TH + IF(C_SSE4_1_FOUND AND C_SSE4_2_FOUND) + IF(MSVC) + SET_SOURCE_FILES_PROPERTIES(${CMAKE_CURRENT_LIST_DIR}/../aten/src/TH/generic/simd/convolve5x5_sse.cpp PROPERTIES COMPILE_FLAGS "${MSVC_OPT_FLAG}/fp:fast") + ELSE(MSVC) + SET_SOURCE_FILES_PROPERTIES(${CMAKE_CURRENT_LIST_DIR}/../aten/src/TH/generic/simd/convolve5x5_sse.cpp PROPERTIES COMPILE_FLAGS "-O3 -ffast-math") + ENDIF(MSVC) + ENDIF(C_SSE4_1_FOUND AND C_SSE4_2_FOUND) + IF(C_AVX_FOUND) + IF(MSVC) + SET_SOURCE_FILES_PROPERTIES(${CMAKE_CURRENT_LIST_DIR}/../aten/src/TH/generic/simd/convolve5x5_avx.cpp PROPERTIES COMPILE_FLAGS "${MSVC_OPT_FLAG}/fp:fast ${CXX_AVX_FLAGS}") + SET_SOURCE_FILES_PROPERTIES(${CMAKE_CURRENT_LIST_DIR}/../aten/src/TH/vector/AVX.cpp PROPERTIES COMPILE_FLAGS "${MSVC_OPT_FLAG}/arch:AVX ${CXX_AVX_FLAGS}") + ELSE(MSVC) + SET_SOURCE_FILES_PROPERTIES(${CMAKE_CURRENT_LIST_DIR}/../aten/src/TH/generic/simd/convolve5x5_avx.cpp PROPERTIES COMPILE_FLAGS "-O3 -ffast-math ${CXX_AVX_FLAGS}") + SET_SOURCE_FILES_PROPERTIES(${CMAKE_CURRENT_LIST_DIR}/../aten/src/TH/vector/AVX.cpp PROPERTIES COMPILE_FLAGS "-O3 ${CXX_AVX_FLAGS}") + ENDIF(MSVC) + ENDIF(C_AVX_FOUND) + + IF(C_AVX2_FOUND) + IF(MSVC) + SET_SOURCE_FILES_PROPERTIES(${CMAKE_CURRENT_LIST_DIR}/../aten/src/TH/vector/AVX2.cpp PROPERTIES COMPILE_FLAGS "${MSVC_OPT_FLAG}/arch:AVX2 ${CXX_AVX2_FLAGS}") + ELSE(MSVC) + SET_SOURCE_FILES_PROPERTIES(${CMAKE_CURRENT_LIST_DIR}/../aten/src/TH/vector/AVX2.cpp PROPERTIES COMPILE_FLAGS "-O3 ${CXX_AVX2_FLAGS}") + ENDIF(MSVC) + ENDIF(C_AVX2_FOUND) + + IF(NOT MSVC AND NOT "${CMAKE_C_COMPILER_ID}" MATCHES "Clang") + SET_SOURCE_FILES_PROPERTIES(${CMAKE_CURRENT_LIST_DIR}/../aten/src/TH/THAllocator.cpp PROPERTIES COMPILE_FLAGS "-fno-openmp") + ENDIF() + + FILE(GLOB cpu_kernel_cpp_in "${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/native/cpu/*.cpp") + + IF(MSVC AND NOT "${CMAKE_BUILD_TYPE}" MATCHES "Debug") + SET(MSVC_OPT_FLAG "/Ox /fp:strict ") + SET(VCOMP_LIB "vcomp") + ELSE() + SET(MSVC_OPT_FLAG " ") + SET(VCOMP_LIB "vcompd") + ENDIF() + + LIST(APPEND CPU_CAPABILITY_NAMES "DEFAULT") + IF(MSVC) + LIST(APPEND CPU_CAPABILITY_FLAGS "${MSVC_OPT_FLAG}") + ELSE(MSVC) + LIST(APPEND CPU_CAPABILITY_FLAGS "-O3") + ENDIF(MSVC) + + IF(CXX_AVX_FOUND) + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DHAVE_AVX_CPU_DEFINITION") + LIST(APPEND CPU_CAPABILITY_NAMES "AVX") + IF(MSVC) + LIST(APPEND CPU_CAPABILITY_FLAGS "${MSVC_OPT_FLAG}/arch:AVX") + ELSE(MSVC) + LIST(APPEND CPU_CAPABILITY_FLAGS "-O3 -mavx") + ENDIF(MSVC) + ENDIF(CXX_AVX_FOUND) + + IF(CXX_AVX2_FOUND) + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DHAVE_AVX2_CPU_DEFINITION") + LIST(APPEND CPU_CAPABILITY_NAMES "AVX2") + IF(MSVC) + LIST(APPEND CPU_CAPABILITY_FLAGS "${MSVC_OPT_FLAG}/arch:AVX2") + ELSE(MSVC) + LIST(APPEND CPU_CAPABILITY_FLAGS "-O3 -mavx2") + ENDIF(MSVC) + ENDIF(CXX_AVX2_FOUND) + + list(LENGTH CPU_CAPABILITY_NAMES NUM_CPU_CAPABILITY_NAMES) + math(EXPR NUM_CPU_CAPABILITY_NAMES "${NUM_CPU_CAPABILITY_NAMES}-1") + + FOREACH(i RANGE ${NUM_CPU_CAPABILITY_NAMES}) + FOREACH(IMPL ${cpu_kernel_cpp_in}) + string(REPLACE "${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/" "" NAME ${IMPL}) + LIST(GET CPU_CAPABILITY_NAMES ${i} CPU_CAPABILITY) + SET(NEW_IMPL ${CMAKE_BINARY_DIR}/aten/src/ATen/${NAME}.${CPU_CAPABILITY}.cpp) + CONFIGURE_FILE(${IMPL} ${NEW_IMPL} COPYONLY) + SET(cpu_kernel_cpp ${NEW_IMPL} ${cpu_kernel_cpp}) # Create list of copies + LIST(GET CPU_CAPABILITY_FLAGS ${i} FLAGS) + IF(MSVC) + SET(MACRO_FLAG "/DCPU_CAPABILITY=${CPU_CAPABILITY} /DCPU_CAPABILITY_${CPU_CAPABILITY}") + ELSE(MSVC) + SET(MACRO_FLAG "-DCPU_CAPABILITY=${CPU_CAPABILITY} -DCPU_CAPABILITY_${CPU_CAPABILITY}") + ENDIF(MSVC) + SET_SOURCE_FILES_PROPERTIES(${NEW_IMPL} PROPERTIES COMPILE_FLAGS "${FLAGS} ${MACRO_FLAG}") + ENDFOREACH() + ENDFOREACH() + list(APPEND ATen_CPU_SRCS ${cpu_kernel_cpp}) + + set(cwrap_files + ${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/Declarations.cwrap + ${CMAKE_CURRENT_LIST_DIR}/../aten/src/THNN/generic/THNN.h + ${CMAKE_CURRENT_LIST_DIR}/../aten/src/THCUNN/generic/THCUNN.h + ${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/nn.yaml + ${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/native/native_functions.yaml) + + FILE(GLOB all_python "${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/*.py") + + SET(GEN_COMMAND + ${PYCMD} ${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/gen.py + --source-path ${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen + --output-dir ${CMAKE_BINARY_DIR}/aten/src/ATen + ${cwrap_files} + ) + + EXECUTE_PROCESS( + COMMAND ${GEN_COMMAND} + --output-dependencies ${CMAKE_BINARY_DIR}/aten/src/ATen/generated_cpp.txt + --output-dir ${CMAKE_BINARY_DIR}/aten/src/ATen + RESULT_VARIABLE RETURN_VALUE + ) + if (NOT RETURN_VALUE EQUAL 0) + message(STATUS ${generated_cpp}) + message(FATAL_ERROR "Failed to get generated_cpp list") + endif() + file(READ ${CMAKE_BINARY_DIR}/aten/src/ATen/generated_cpp.txt generated_cpp) + file(READ ${CMAKE_BINARY_DIR}/aten/src/ATen/generated_cpp.txt-cuda cuda_generated_cpp) + + file(GLOB_RECURSE all_templates "${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/templates/*") + + file(MAKE_DIRECTORY ${CMAKE_BINARY_DIR}/aten/src/ATen) + + add_custom_command(OUTPUT ${generated_cpp} ${cuda_generated_cpp} + COMMAND ${GEN_COMMAND} + --output-dir ${CMAKE_BINARY_DIR}/aten/src/ATen + DEPENDS ${all_python} ${all_templates} ${cwrap_files}) + + # Generated headers used from a CUDA (.cu) file are + # not tracked correctly in CMake. We make the libATen.so depend explicitly + # on building the generated ATen files to workaround. + add_custom_target(ATEN_CPU_FILES_GEN_TARGET DEPENDS ${generated_cpp}) + add_custom_target(ATEN_CUDA_FILES_GEN_TARGET DEPENDS ${cuda_generated_cpp}) + add_library(ATEN_CPU_FILES_GEN_LIB INTERFACE) + add_library(ATEN_CUDA_FILES_GEN_LIB INTERFACE) + add_dependencies(ATEN_CPU_FILES_GEN_LIB ATEN_CPU_FILES_GEN_TARGET) + add_dependencies(ATEN_CUDA_FILES_GEN_LIB ATEN_CUDA_FILES_GEN_TARGET) +endif() diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake index 56beb830e1a06..bad405ed92b52 100644 --- a/cmake/Dependencies.cmake +++ b/cmake/Dependencies.cmake @@ -1,9 +1,11 @@ # ---[ Custom Protobuf -include("cmake/ProtoBuf.cmake") +if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO) + include(${CMAKE_CURRENT_LIST_DIR}/ProtoBuf.cmake) +endif() # ---[ Threads if(BUILD_CAFFE2) - include(cmake/public/threads.cmake) + include(${CMAKE_CURRENT_LIST_DIR}/public/threads.cmake) if (TARGET Threads::Threads) list(APPEND Caffe2_PUBLIC_DEPENDENCY_LIBS Threads::Threads) else() @@ -13,30 +15,37 @@ if(BUILD_CAFFE2) endif() # ---[ protobuf -if(USE_LITE_PROTO) - set(CAFFE2_USE_LITE_PROTO 1) +if(BUILD_CAFFE2) + if(USE_LITE_PROTO) + set(CAFFE2_USE_LITE_PROTO 1) + endif() endif() # ---[ git: used to generate git build string. -find_package(Git) -if(GIT_FOUND) - execute_process(COMMAND ${GIT_EXECUTABLE} describe --tags --always --dirty - ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE - WORKING_DIRECTORY "${PROJECT_SOURCE_DIR}" - OUTPUT_VARIABLE CAFFE2_GIT_VERSION - RESULT_VARIABLE __git_result) - if(NOT ${__git_result} EQUAL 0) - set(CAFFE2_GIT_VERSION "unknown") +if(BUILD_CAFFE2) + find_package(Git) + if(GIT_FOUND) + execute_process(COMMAND ${GIT_EXECUTABLE} describe --tags --always --dirty + ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE + WORKING_DIRECTORY "${CMAKE_CURRENT_LIST_DIR}/.." + OUTPUT_VARIABLE CAFFE2_GIT_VERSION + RESULT_VARIABLE __git_result) + if(NOT ${__git_result} EQUAL 0) + set(CAFFE2_GIT_VERSION "unknown") + endif() + else() + message( + WARNING + "Cannot find git, so Caffe2 won't have any git build info available") endif() -else() - message( - WARNING - "Cannot find git, so Caffe2 won't have any git build info available") endif() - # ---[ BLAS -set(BLAS "Eigen" CACHE STRING "Selected BLAS library") +if(BUILD_ATEN) + set(BLAS "MKL" CACHE STRING "Selected BLAS library") +else() + set(BLAS "Eigen" CACHE STRING "Selected BLAS library") +endif() set_property(CACHE BLAS PROPERTY STRINGS "Eigen;ATLAS;OpenBLAS;MKL;vecLib") message(STATUS "The BLAS backend of choice:" ${BLAS}) @@ -72,7 +81,7 @@ set(CONFU_DEPENDENCIES_BINARY_DIR ${PROJECT_BINARY_DIR}/confu-deps # ---[ NNPACK if(USE_NNPACK) - include("cmake/External/nnpack.cmake") + include(${CMAKE_CURRENT_LIST_DIR}/External/nnpack.cmake) if(NNPACK_FOUND) if(TARGET nnpack) # ---[ NNPACK is being built together with Caffe2: explicitly specify dependency @@ -90,7 +99,7 @@ endif() # ---[ Caffe2 uses cpuinfo library in the thread pool if (NOT TARGET cpuinfo) if (NOT DEFINED CPUINFO_SOURCE_DIR) - set(CPUINFO_SOURCE_DIR "${PROJECT_SOURCE_DIR}/third_party/cpuinfo" CACHE STRING "cpuinfo source directory") + set(CPUINFO_SOURCE_DIR "${CMAKE_CURRENT_LIST_DIR}/../third_party/cpuinfo" CACHE STRING "cpuinfo source directory") endif() set(CPUINFO_BUILD_TOOLS OFF CACHE BOOL "") @@ -116,7 +125,7 @@ list(APPEND Caffe2_DEPENDENCY_LIBS cpuinfo) # ---[ gflags if(USE_GFLAGS) - include(cmake/public/gflags.cmake) + include(${CMAKE_CURRENT_LIST_DIR}/public/gflags.cmake) if (TARGET gflags) set(CAFFE2_USE_GFLAGS 1) list(APPEND Caffe2_PUBLIC_DEPENDENCY_LIBS gflags) @@ -131,7 +140,7 @@ endif() # ---[ Google-glog if(USE_GLOG) - include(cmake/public/glog.cmake) + include(${CMAKE_CURRENT_LIST_DIR}/public/glog.cmake) if (TARGET glog::glog) set(CAFFE2_USE_GOOGLE_GLOG 1) list(APPEND Caffe2_PUBLIC_DEPENDENCY_LIBS glog::glog) @@ -160,15 +169,15 @@ if(BUILD_TEST) if (NOT CAFFE2_USE_MSVC_STATIC_RUNTIME) set(gtest_force_shared_crt ON) endif() - add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/googletest) - include_directories(${PROJECT_SOURCE_DIR}/third_party/googletest/googletest/include) + add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/../third_party/googletest) + include_directories(${CMAKE_CURRENT_LIST_DIR}/../third_party/googletest/googletest/include) # We will not need to test benchmark lib itself. set(BENCHMARK_ENABLE_TESTING OFF CACHE BOOL "Disable benchmark testing as we don't need it.") # We will not need to install benchmark since we link it statically. set(BENCHMARK_ENABLE_INSTALL OFF CACHE BOOL "Disable benchmark install to avoid overwriting vendor install.") - add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/benchmark) - include_directories(${PROJECT_SOURCE_DIR}/third_party/benchmark/include) + add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/../third_party/benchmark) + include_directories(${CMAKE_CURRENT_LIST_DIR}/../third_party/benchmark/include) # Recover the build shared libs option. set(BUILD_SHARED_LIBS ${TEMP_BUILD_SHARED_LIBS}) @@ -190,7 +199,7 @@ if (USE_OPENCL) message(INFO "USING OPENCL") find_package(OpenCL REQUIRED) include_directories(${OpenCL_INCLUDE_DIRS}) - include_directories(${PROJECT_SOURCE_DIR}/caffe2/contrib/opencl) + include_directories(${CMAKE_CURRENT_LIST_DIR}/../caffe2/contrib/opencl) list(APPEND Caffe2_DEPENDENCY_LIBS ${OpenCL_LIBRARIES}) endif() @@ -284,16 +293,18 @@ if(USE_FFMPEG) endif() # ---[ EIGEN -# Due to license considerations, we will only use the MPL2 parts of Eigen. -set(EIGEN_MPL2_ONLY 1) -find_package(Eigen3) -if(EIGEN3_FOUND) - message(STATUS "Found system Eigen at " ${EIGEN3_INCLUDE_DIR}) -else() - message(STATUS "Did not find system Eigen. Using third party subdirectory.") - set(EIGEN3_INCLUDE_DIR ${PROJECT_SOURCE_DIR}/third_party/eigen) +if(BUILD_CAFFE2) + # Due to license considerations, we will only use the MPL2 parts of Eigen. + set(EIGEN_MPL2_ONLY 1) + find_package(Eigen3) + if(EIGEN3_FOUND) + message(STATUS "Found system Eigen at " ${EIGEN3_INCLUDE_DIR}) + else() + message(STATUS "Did not find system Eigen. Using third party subdirectory.") + set(EIGEN3_INCLUDE_DIR ${CMAKE_CURRENT_LIST_DIR}/../third_party/eigen) + endif() + include_directories(${EIGEN3_INCLUDE_DIR}) endif() -include_directories(${EIGEN3_INCLUDE_DIR}) # ---[ Python + Numpy if(BUILD_PYTHON) @@ -316,7 +327,7 @@ find_package(pybind11) if(pybind11_FOUND) include_directories(${pybind11_INCLUDE_DIRS}) else() - include_directories(${PROJECT_SOURCE_DIR}/third_party/pybind11/include) + include_directories(${CMAKE_CURRENT_LIST_DIR}/../third_party/pybind11/include) endif() # ---[ MPI @@ -372,22 +383,42 @@ endif() # ---[ CUDA if(USE_CUDA) - include(cmake/public/cuda.cmake) + include(${CMAKE_CURRENT_LIST_DIR}/public/cuda.cmake) if(CAFFE2_FOUND_CUDA) # A helper variable recording the list of Caffe2 dependent libraries # caffe2::cudart is dealt with separately, due to CUDA_ADD_LIBRARY # design reason (it adds CUDA_LIBRARIES itself). set(Caffe2_PUBLIC_CUDA_DEPENDENCY_LIBS - caffe2::cuda caffe2::curand caffe2::cudnn caffe2::nvrtc) - if(CAFFE2_STATIC_LINK_CUDA) - # When statically linking, this must be the order of the libraries - LIST(APPEND Caffe2_PUBLIC_CUDA_DEPENDENCY_LIBS - "${CUDA_TOOLKIT_ROOT_DIR}/lib64/libculibos.a" caffe2::cublas) + caffe2::cuda caffe2::cufft caffe2::curand caffe2::nvrtc) + if(CAFFE2_FOUND_CUDNN) + LIST(APPEND Caffe2_PUBLIC_CUDA_DEPENDENCY_LIBS caffe2::cudnn) else() - LIST(APPEND Caffe2_PUBLIC_CUDA_DEPENDENCY_LIBS caffe2::cublas) + if(BUILD_CAFFE2) + # TODO: Get rid of special case for Caffe2 where we require + # CUDA *and* cuDNN to be installed. + message(WARNING + "Not compiling with CUDA since cuDNN is missing. Suppress " + "this warning with -DUSE_CUDA=OFF.") + caffe2_update_option(USE_CUDA OFF) + caffe2_update_option(USE_CUDNN OFF) + else() + message(WARNING + "Not compiling with cuDNN. Suppress this warning with " + "-DUSE_CUDNN=OFF.") + caffe2_update_option(USE_CUDNN OFF) + endif() endif() - if(USE_TENSORRT) - list(APPEND Caffe2_PUBLIC_CUDA_DEPENDENCY_LIBS caffe2::tensorrt) + if(USE_CUDA) + if(CAFFE2_STATIC_LINK_CUDA) + # When statically linking, this must be the order of the libraries + LIST(APPEND Caffe2_PUBLIC_CUDA_DEPENDENCY_LIBS + "${CUDA_TOOLKIT_ROOT_DIR}/lib64/libculibos.a" caffe2::cublas) + else() + LIST(APPEND Caffe2_PUBLIC_CUDA_DEPENDENCY_LIBS caffe2::cublas) + endif() + if(USE_TENSORRT) + list(APPEND Caffe2_PUBLIC_CUDA_DEPENDENCY_LIBS caffe2::tensorrt) + endif() endif() else() message(WARNING @@ -397,27 +428,51 @@ if(USE_CUDA) endif() endif() +# ---[ ROCm +if(USE_ROCM) + include_directories(${HIP_PATH}/include) + include_directories(${HIPBLAS_PATH}/include) + include_directories(${HIPSPARSE_PATH}/include) + include_directories(${HIPRNG_PATH}/include) + linclude_directories(${THRUST_PATH}) + + # load HIP cmake module and load platform id + EXECUTE_PROCESS(COMMAND ${HIP_PATH}/bin/hipconfig -P OUTPUT_VARIABLE PLATFORM) + EXECUTE_PROCESS(COMMAND ${HIP_PATH}/bin/hipconfig --cpp_config OUTPUT_VARIABLE HIP_CXX_FLAGS) + + # Link with HIPCC https://github.com/ROCm-Developer-Tools/HIP/blob/master/docs/markdown/hip_porting_guide.md#linking-with-hipcc + SET(CMAKE_CXX_LINK_EXECUTABLE ${HIP_HIPCC_EXECUTABLE}) + + # Show message that we're using ROCm. + MESSAGE(STATUS "ROCM TRUE:") + MESSAGE(STATUS "CMAKE_CXX_COMPILER: " ${CMAKE_CXX_COMPILER}) +endif() + # ---[ NCCL if(USE_NCCL) if(NOT USE_CUDA) - message(WARNING "If not using cuda, one should not use NCCL either.") + message(WARNING + "Not using CUDA, so disabling NCCL. Suppress this warning with " + "-DUSE_NCCL=OFF.") caffe2_update_option(USE_NCCL OFF) elseif(NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Linux") message(WARNING "NCCL is currently only supported under Linux.") caffe2_update_option(USE_NCCL OFF) else() - include("cmake/External/nccl.cmake") + include(${CMAKE_CURRENT_LIST_DIR}/External/nccl.cmake) list(APPEND Caffe2_CUDA_DEPENDENCY_LIBS __caffe2_nccl) endif() endif() # ---[ CUB -if(USE_CUDA) - find_package(CUB) - if(CUB_FOUND) - include_directories(${CUB_INCLUDE_DIRS}) - else() - include_directories(${PROJECT_SOURCE_DIR}/third_party/cub) +if(BUILD_CAFFE2) + if(USE_CUDA) + find_package(CUB) + if(CUB_FOUND) + include_directories(${CUB_INCLUDE_DIRS}) + else() + include_directories(${CMAKE_CURRENT_LIST_DIR}/../third_party/cub) + endif() endif() endif() @@ -443,11 +498,11 @@ if(USE_GLOO) set(__BUILD_BENCHMARK ${BUILD_BENCHMARK}) set(BUILD_TEST OFF) set(BUILD_BENCHMARK OFF) - add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/gloo) + add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/../third_party/gloo) # Here is a little bit hacky. We have to put PROJECT_BINARY_DIR in front # of PROJECT_SOURCE_DIR with/without conda system. The reason is that # gloo generates a new config.h in the binary diretory. - include_directories(BEFORE SYSTEM ${PROJECT_SOURCE_DIR}/third_party/gloo) + include_directories(BEFORE SYSTEM ${CMAKE_CURRENT_LIST_DIR}/../third_party/gloo) include_directories(BEFORE SYSTEM ${PROJECT_BINARY_DIR}/third_party/gloo) set(BUILD_TEST ${__BUILD_TEST}) set(BUILD_BENCHMARK ${__BUILD_BENCHMARK}) @@ -513,7 +568,7 @@ if (USE_ACL) list(APPEND ARM_COMPUTE_INCLUDE_DIRS "third_party/ComputeLibrary/include") include_directories(${ARM_COMPUTE_INCLUDE_DIRS}) string (REPLACE ";" " -I" ANDROID_STL_INCLUDE_FLAGS "-I${ANDROID_STL_INCLUDE_DIRS}") - set (ARM_COMPUTE_SRC_DIR "${PROJECT_SOURCE_DIR}/third_party/ComputeLibrary/") + set (ARM_COMPUTE_SRC_DIR "${CMAKE_CURRENT_LIST_DIR}/../third_party/ComputeLibrary/") set (ARM_COMPUTE_LIB "${CMAKE_CURRENT_BINARY_DIR}/libarm_compute.a") set (ARM_COMPUTE_CORE_LIB "${CMAKE_CURRENT_BINARY_DIR}/libarm_compute_core.a") set (ARM_COMPUTE_LIBS ${ARM_COMPUTE_LIB} ${ARM_COMPUTE_CORE_LIB}) @@ -578,52 +633,522 @@ if (BUILD_ATEN) include_directories(${PROJECT_BINARY_DIR}/caffe2/contrib/aten/aten/src/ATen) include_directories(${PROJECT_BINARY_DIR}/caffe2/contrib/aten) endif() - - list(APPEND Caffe2_DEPENDENCY_LIBS ATen_cpu) - if (USE_CUDA) - list(APPEND Caffe2_CUDA_DEPENDENCY_LIBS ATen_cuda) - endif() - include_directories(${PROJECT_SOURCE_DIR}/aten/src) endif() if (USE_ZSTD) list(APPEND Caffe2_DEPENDENCY_LIBS libzstd_static) - include_directories(${PROJECT_SOURCE_DIR}/third_party/zstd/lib) - add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/zstd/build/cmake) + include_directories(${CMAKE_CURRENT_LIST_DIR}/../third_party/zstd/lib) + add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/../third_party/zstd/build/cmake) set_property(TARGET libzstd_static PROPERTY POSITION_INDEPENDENT_CODE ON) endif() # ---[ Onnx -if (NOT DEFINED ONNX_NAMESPACE) - SET(ONNX_NAMESPACE "onnx_c2") -endif() -if(EXISTS "${CAFFE2_CUSTOM_PROTOC_EXECUTABLE}") - set(ONNX_CUSTOM_PROTOC_EXECUTABLE ${CAFFE2_CUSTOM_PROTOC_EXECUTABLE}) -endif() -set(TEMP_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS}) -# We will build onnx as static libs and embed it directly into the binary. -set(BUILD_SHARED_LIBS OFF) -set(ONNX_USE_MSVC_STATIC_RUNTIME ${CAFFE2_USE_MSVC_STATIC_RUNTIME}) -add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/onnx) -include_directories(${ONNX_INCLUDE_DIRS}) -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DONNX_NAMESPACE=${ONNX_NAMESPACE}") -# In mobile build we care about code size, and so we need drop -# everything (e.g. checker, optimizer) in onnx but the pb definition. -if (ANDROID OR IOS) - caffe2_interface_library(onnx_proto onnx_library) -else() - caffe2_interface_library(onnx onnx_library) +if (BUILD_CAFFE2) + if (NOT DEFINED ONNX_NAMESPACE) + SET(ONNX_NAMESPACE "onnx_c2") + endif() + if(EXISTS "${CAFFE2_CUSTOM_PROTOC_EXECUTABLE}") + set(ONNX_CUSTOM_PROTOC_EXECUTABLE ${CAFFE2_CUSTOM_PROTOC_EXECUTABLE}) + endif() + set(TEMP_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS}) + # We will build onnx as static libs and embed it directly into the binary. + set(BUILD_SHARED_LIBS OFF) + set(ONNX_USE_MSVC_STATIC_RUNTIME ${CAFFE2_USE_MSVC_STATIC_RUNTIME}) + add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/../third_party/onnx) + include_directories(${ONNX_INCLUDE_DIRS}) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DONNX_NAMESPACE=${ONNX_NAMESPACE}") + # In mobile build we care about code size, and so we need drop + # everything (e.g. checker, optimizer) in onnx but the pb definition. + if (ANDROID OR IOS) + caffe2_interface_library(onnx_proto onnx_library) + else() + caffe2_interface_library(onnx onnx_library) + endif() + list(APPEND Caffe2_DEPENDENCY_WHOLE_LINK_LIBS onnx_library) + # Recover the build shared libs option. + set(BUILD_SHARED_LIBS ${TEMP_BUILD_SHARED_LIBS}) endif() -list(APPEND Caffe2_DEPENDENCY_WHOLE_LINK_LIBS onnx_library) -# Recover the build shared libs option. -set(BUILD_SHARED_LIBS ${TEMP_BUILD_SHARED_LIBS}) # --[ TensorRT integration with onnx-trt -if (USE_TENSORRT) - set(CMAKE_CUDA_COMPILER ${CUDA_NVCC_EXECUTABLE}) - add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/onnx-tensorrt) - include_directories("${PROJECT_SOURCE_DIR}/third_party/onnx-tensorrt") - caffe2_interface_library(nvonnxparser_static onnx_trt_library) - list(APPEND Caffe2_DEPENDENCY_WHOLE_LINK_LIBS onnx_trt_library) - set(CAFFE2_USE_TRT 1) +if (BUILD_CAFFE2) + if (USE_TENSORRT) + set(CMAKE_CUDA_COMPILER ${CUDA_NVCC_EXECUTABLE}) + add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/../third_party/onnx-tensorrt) + include_directories("${CMAKE_CURRENT_LIST_DIR}/../third_party/onnx-tensorrt") + caffe2_interface_library(nvonnxparser_static onnx_trt_library) + list(APPEND Caffe2_DEPENDENCY_WHOLE_LINK_LIBS onnx_trt_library) + set(CAFFE2_USE_TRT 1) + endif() +endif() + +# --[ ATen checks +if (BUILD_ATEN) + set(TORCH_CUDA_ARCH_LIST $ENV{TORCH_CUDA_ARCH_LIST}) + set(TORCH_NVCC_FLAGS $ENV{TORCH_NVCC_FLAGS}) + + add_definitions(-DTH_INDEX_BASE=0) + + # RPATH stuff + # see https://cmake.org/Wiki/CMake_RPATH_handling + if (APPLE) + set(CMAKE_MACOSX_RPATH ON) + endif() + set(CMAKE_SKIP_BUILD_RPATH FALSE) + set(CMAKE_BUILD_WITH_INSTALL_RPATH FALSE) + set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib") + set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE) + set(CMAKE_POSITION_INDEPENDENT_CODE TRUE) + list(FIND CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES "${CMAKE_INSTALL_PREFIX}/lib" isSystemDir) + if ("${isSystemDir}" STREQUAL "-1") + set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib") + endif() + + if (NOT MSVC) + set(CMAKE_CXX_FLAGS "--std=c++11 ${CMAKE_CXX_FLAGS}") + endif() + + INCLUDE(CheckCXXSourceCompiles) + + # disable some verbose warnings + IF (MSVC) + set(CMAKE_CXX_FLAGS "/wd4267 /wd4251 /wd4522 /wd4522 /wd4838 /wd4305 /wd4244 /wd4190 /wd4101 /wd4996 /wd4275 ${CMAKE_CXX_FLAGS}") + ENDIF() + + # windef.h will define max/min macros if NOMINMAX is not defined + IF (MSVC) + add_definitions(/DNOMINMAX) + ENDIF() + + #Check if certain std functions are supported. Sometimes + #_GLIBCXX_USE_C99 macro is not defined and some functions are missing. + CHECK_CXX_SOURCE_COMPILES(" + #include + #include + + int main() { + int a = std::isinf(3.0); + int b = std::isnan(0.0); + std::string s = std::to_string(1); + + return 0; + }" SUPPORT_GLIBCXX_USE_C99) + + if (NOT SUPPORT_GLIBCXX_USE_C99) + message(FATAL_ERROR + "The C++ compiler does not support required functions. " + "This is very likely due to a known bug in GCC 5 " + "(and maybe other versions) on Ubuntu 17.10 and newer. " + "For more information, see: " + "https://github.com/pytorch/pytorch/issues/5229" + ) + endif() + + # Top-level build config + ############################################ + # Flags + # When using MSVC + + # Detect CUDA architecture and get best NVCC flags + # finding cuda must be first because other things depend on the result + # + # NB: We MUST NOT run this find_package if NOT USE_CUDA is set, because upstream + # FindCUDA has a bug where it will still attempt to make use of NOTFOUND + # compiler variables to run various probe tests. We could try to fix + # this, but since FindCUDA upstream is subsumed by first-class support + # for CUDA language, it seemed not worth fixing. + + IF (MSVC) + # we want to respect the standard, and we are bored of those **** . + ADD_DEFINITIONS(-D_CRT_SECURE_NO_DEPRECATE=1) + LIST(APPEND CUDA_NVCC_FLAGS "-Xcompiler /wd4819 -Xcompiler /wd4503 -Xcompiler /wd4190 -Xcompiler /wd4244 -Xcompiler /wd4251 -Xcompiler /wd4275 -Xcompiler /wd4522") + ENDIF() + + IF (NOT MSVC) + IF (CMAKE_VERSION VERSION_LESS "3.1") + SET(CMAKE_C_FLAGS "-std=c11 ${CMAKE_C_FLAGS}") + ELSE () + SET(CMAKE_C_STANDARD 11) + ENDIF () + ENDIF() + + if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER "4.9") + if (CUDA_VERSION VERSION_LESS "8.0") + MESSAGE(STATUS "Found gcc >=5 and CUDA <= 7.5, adding workaround C++ flags") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_FORCE_INLINES -D_MWAITXINTRIN_H_INCLUDED -D__STRICT_ANSI__") + endif() + endif() + endif() + + LIST(APPEND CUDA_NVCC_FLAGS -Wno-deprecated-gpu-targets) + LIST(APPEND CUDA_NVCC_FLAGS --expt-extended-lambda) + + if (NOT CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + SET(CMAKE_CXX_STANDARD 11) + endif() + + LIST(APPEND CUDA_NVCC_FLAGS ${TORCH_NVCC_FLAGS}) + LIST(APPEND CUDA_NVCC_FLAGS ${NVCC_FLAGS_EXTRA}) + IF (CMAKE_POSITION_INDEPENDENT_CODE AND NOT MSVC) + LIST(APPEND CUDA_NVCC_FLAGS "-Xcompiler -fPIC") + ENDIF() + + IF (CUDA_HAS_FP16 OR NOT ${CUDA_VERSION} LESS 7.5) + MESSAGE(STATUS "Found CUDA with FP16 support, compiling with torch.CudaHalfTensor") + LIST(APPEND CUDA_NVCC_FLAGS "-DCUDA_HAS_FP16=1 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__") + add_compile_options(-DCUDA_HAS_FP16=1) + ELSE() + MESSAGE(STATUS "Could not find CUDA with FP16 support, compiling without torch.CudaHalfTensor") + ENDIF() + + OPTION(NDEBUG "disable asserts (WARNING: this may result in silent UB e.g. with out-of-bound indices)") + IF (NOT NDEBUG) + MESSAGE(STATUS "Removing -DNDEBUG from compile flags") + STRING(REPLACE "-DNDEBUG" "" CMAKE_C_FLAGS "" ${CMAKE_C_FLAGS}) + STRING(REPLACE "-DNDEBUG" "" CMAKE_C_FLAGS_DEBUG "" ${CMAKE_C_FLAGS_DEBUG}) + STRING(REPLACE "-DNDEBUG" "" CMAKE_C_FLAGS_RELEASE "" ${CMAKE_C_FLAGS_RELEASE}) + STRING(REPLACE "-DNDEBUG" "" CMAKE_CXX_FLAGS "" ${CMAKE_CXX_FLAGS}) + STRING(REPLACE "-DNDEBUG" "" CMAKE_CXX_FLAGS_DEBUG "" ${CMAKE_CXX_FLAGS_DEBUG}) + STRING(REPLACE "-DNDEBUG" "" CMAKE_CXX_FLAGS_RELEASE "" ${CMAKE_CXX_FLAGS_RELEASE}) + ENDIF() + + # OpenMP support? + SET(WITH_OPENMP ON CACHE BOOL "OpenMP support if available?") + IF (APPLE AND CMAKE_COMPILER_IS_GNUCC) + EXEC_PROGRAM (uname ARGS -v OUTPUT_VARIABLE DARWIN_VERSION) + STRING (REGEX MATCH "[0-9]+" DARWIN_VERSION ${DARWIN_VERSION}) + MESSAGE (STATUS "MAC OS Darwin Version: ${DARWIN_VERSION}") + IF (DARWIN_VERSION GREATER 9) + SET(APPLE_OPENMP_SUCKS 1) + ENDIF (DARWIN_VERSION GREATER 9) + EXECUTE_PROCESS (COMMAND ${CMAKE_C_COMPILER} -dumpversion + OUTPUT_VARIABLE GCC_VERSION) + IF (APPLE_OPENMP_SUCKS AND GCC_VERSION VERSION_LESS 4.6.2) + MESSAGE(STATUS "Warning: Disabling OpenMP (unstable with this version of GCC)") + MESSAGE(STATUS " Install GCC >= 4.6.2 or change your OS to enable OpenMP") + add_compile_options(-Wno-unknown-pragmas) + SET(WITH_OPENMP OFF CACHE BOOL "OpenMP support if available?" FORCE) + ENDIF() + ENDIF() + + IF (WITH_OPENMP AND NOT CHECKED_OPENMP) + FIND_PACKAGE(OpenMP) + SET(CHECKED_OPENMP ON CACHE BOOL "already checked for OpenMP") + + # OPENMP_FOUND is not cached in FindOpenMP.cmake (all other variables are cached) + # see https://github.com/Kitware/CMake/blob/master/Modules/FindOpenMP.cmake + SET(OPENMP_FOUND ${OPENMP_FOUND} CACHE BOOL "OpenMP Support found") + ENDIF() + + IF (OPENMP_FOUND) + MESSAGE(STATUS "Compiling with OpenMP support") + SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") + ENDIF() + + + SET(CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE OFF) + + FIND_PACKAGE(MAGMA) + IF (USE_CUDA AND MAGMA_FOUND) + INCLUDE_DIRECTORIES(${MAGMA_INCLUDE_DIR}) + SET(CMAKE_REQUIRED_INCLUDES "${MAGMA_INCLUDE_DIR};${CUDA_INCLUDE_DIRS}") + INCLUDE(CheckPrototypeDefinition) + check_prototype_definition(magma_get_sgeqrf_nb + "magma_int_t magma_get_sgeqrf_nb( magma_int_t m, magma_int_t n );" + "0" + "magma.h" + MAGMA_V2) + IF (MAGMA_V2) + add_definitions(-DMAGMA_V2) + ENDIF (MAGMA_V2) + + SET(USE_MAGMA 1) + MESSAGE(STATUS "Compiling with MAGMA support") + MESSAGE(STATUS "MAGMA INCLUDE DIRECTORIES: ${MAGMA_INCLUDE_DIR}") + MESSAGE(STATUS "MAGMA LIBRARIES: ${MAGMA_LIBRARIES}") + MESSAGE(STATUS "MAGMA V2 check: ${MAGMA_V2}") + ELSE() + MESSAGE(STATUS "MAGMA not found. Compiling without MAGMA support") + ENDIF() + + # ARM specific flags + FIND_PACKAGE(ARM) + IF (ASIMD_FOUND) + MESSAGE(STATUS "asimd/Neon found with compiler flag : -D__NEON__") + add_compile_options(-D__NEON__) + ELSEIF (NEON_FOUND) + MESSAGE(STATUS "Neon found with compiler flag : -mfpu=neon -D__NEON__") + add_compile_options(-mfpu=neon -D__NEON__) + ENDIF () + IF (CORTEXA8_FOUND) + MESSAGE(STATUS "Cortex-A8 Found with compiler flag : -mcpu=cortex-a8") + add_compile_options(-mcpu=cortex-a8 -fprefetch-loop-arrays) + ENDIF () + IF (CORTEXA9_FOUND) + MESSAGE(STATUS "Cortex-A9 Found with compiler flag : -mcpu=cortex-a9") + add_compile_options(-mcpu=cortex-a9) + ENDIF() + + # Check that our programs run. This is different from the native CMake compiler + # check, which just tests if the program compiles and links. This is important + # because with ASAN you might need to help the compiled library find some + # dynamic libraries. + CHECK_C_SOURCE_RUNS(" + int main() { return 0; } + " COMPILER_WORKS) + IF (NOT COMPILER_WORKS) + # Force cmake to retest next time around + unset(COMPILER_WORKS CACHE) + MESSAGE(FATAL_ERROR + "Could not run a simple program built with your compiler. " + "If you are trying to use -fsanitize=address, make sure " + "libasan is properly installed on your system (you can confirm " + "if the problem is this by attempting to build and run a " + "small program.)") + ENDIF() + + CHECK_INCLUDE_FILE(cpuid.h HAVE_CPUID_H) + # Check for a cpuid intrinsic + IF (HAVE_CPUID_H) + CHECK_C_SOURCE_COMPILES("#include + int main() + { + unsigned int eax, ebx, ecx, edx; + return __get_cpuid(0, &eax, &ebx, &ecx, &edx); + }" HAVE_GCC_GET_CPUID) + ENDIF() + IF (HAVE_GCC_GET_CPUID) + add_compile_options(-DHAVE_GCC_GET_CPUID) + ENDIF() + + CHECK_C_SOURCE_COMPILES("#include + static inline void cpuid(uint32_t *eax, uint32_t *ebx, + uint32_t *ecx, uint32_t *edx) + { + uint32_t a = *eax, b, c = *ecx, d; + asm volatile ( \"cpuid\" : \"+a\"(a), \"=b\"(b), \"+c\"(c), \"=d\"(d) ); + *eax = a; *ebx = b; *ecx = c; *edx = d; + } + int main() { + uint32_t a,b,c,d; + cpuid(&a, &b, &c, &d); + return 0; + }" NO_GCC_EBX_FPIC_BUG) + + IF (NOT NO_GCC_EBX_FPIC_BUG) + add_compile_options(-DUSE_GCC_GET_CPUID) + ENDIF() + + FIND_PACKAGE(SSE) # checks SSE, AVX and AVX2 + IF (C_SSE2_FOUND) + MESSAGE(STATUS "SSE2 Found") + # TODO: Work out correct way to do this. Note that C_SSE2_FLAGS is often + # empty, in which case it expands to " " flag which is bad + SET(CMAKE_C_FLAGS "${C_SSE2_FLAGS} ${CMAKE_C_FLAGS}") + SET(CMAKE_CXX_FLAGS "${C_SSE2_FLAGS} ${CMAKE_CXX_FLAGS}") + add_compile_options(-DUSE_SSE2) + ENDIF() + IF (C_SSE4_1_FOUND AND C_SSE4_2_FOUND) + SET(CMAKE_C_FLAGS "${C_SSE4_1_FLAGS} ${C_SSE4_2_FLAGS} ${CMAKE_C_FLAGS}") + SET(CMAKE_CXX_FLAGS "${C_SSE4_1_FLAGS} ${C_SSE4_2_FLAGS} ${CMAKE_CXX_FLAGS}") + add_compile_options(-DUSE_SSE4_1 -DUSE_SSE4_2) + ENDIF() + IF (C_SSE3_FOUND) + MESSAGE(STATUS "SSE3 Found") + SET(CMAKE_C_FLAGS "${C_SSE3_FLAGS} ${CMAKE_C_FLAGS}") + SET(CMAKE_CXX_FLAGS "${C_SSE3_FLAGS} ${CMAKE_CXX_FLAGS}") + add_compile_options(-DUSE_SSE3) + ENDIF() + + # we don't set -mavx and -mavx2 flags globally, but only for specific files + # however, we want to enable the AVX codepaths, so we still need to + # add USE_AVX and USE_AVX2 macro defines + IF (C_AVX_FOUND) + MESSAGE(STATUS "AVX Found") + add_compile_options(-DUSE_AVX) + ENDIF() + IF (C_AVX2_FOUND) + MESSAGE(STATUS "AVX2 Found") + add_compile_options(-DUSE_AVX2) + ENDIF() + + CHECK_C_SOURCE_RUNS(" + #include + // ATOMIC_INT_LOCK_FREE is flaky on some older gcc versions + // so if this define is not usable a preprocessor definition + // we fail this check and fall back to GCC atomics + #if ATOMIC_INT_LOCK_FREE == 2 + #define TH_ATOMIC_IPC_REFCOUNT 1 + #endif + int main() + { + int a; + int oa; + atomic_store(&a, 1); + atomic_fetch_add(&a, 1); + oa = atomic_load(&a); + if(!atomic_compare_exchange_strong(&a, &oa, 3)) + return -1; + return 0; + } + " HAS_C11_ATOMICS) + + IF (NOT HAS_C11_ATOMICS) + CHECK_C_SOURCE_RUNS(" + #include + int main() + { + long a; + _InterlockedExchange(&a, 1); + _InterlockedExchangeAdd(&a, 1); + if(_InterlockedCompareExchange(&a, 3, 2) != 2) + return -1; + return 0; + } + " HAS_MSC_ATOMICS) + + CHECK_C_SOURCE_RUNS(" + int main() + { + int a; + __sync_lock_test_and_set(&a, 1); + __sync_fetch_and_add(&a, 1); + if(!__sync_bool_compare_and_swap(&a, 2, 3)) + return -1; + return 0; + } + " HAS_GCC_ATOMICS) + ENDIF() + + IF (HAS_C11_ATOMICS) + ADD_DEFINITIONS(-DUSE_C11_ATOMICS=1) + MESSAGE(STATUS "Atomics: using C11 intrinsics") + ELSEIF (HAS_MSC_ATOMICS) + ADD_DEFINITIONS(-DUSE_MSC_ATOMICS=1) + MESSAGE(STATUS "Atomics: using MSVC intrinsics") + ELSEIF (HAS_GCC_ATOMICS) + ADD_DEFINITIONS(-DUSE_GCC_ATOMICS=1) + MESSAGE(STATUS "Atomics: using GCC intrinsics") + ELSE() + SET(CMAKE_THREAD_PREFER_PTHREAD TRUE) + FIND_PACKAGE(Threads) + IF(THREADS_FOUND) + ADD_DEFINITIONS(-DUSE_PTHREAD_ATOMICS=1) + TARGET_LINK_LIBRARIES(TH ${CMAKE_THREAD_LIBS_INIT}) + MESSAGE(STATUS "Atomics: using pthread") + ENDIF() + ENDIF() + + IF (WIN32 AND NOT CYGWIN) + SET(BLAS_INSTALL_LIBRARIES "OFF" + CACHE BOOL "Copy the required BLAS DLLs into the TH install dirs") + ENDIF() + + FIND_PACKAGE(BLAS) + SET(AT_MKL_ENABLED 0) + SET(AT_MKL_MT 0) + IF (BLAS_FOUND) + SET(USE_BLAS 1) + IF (BLAS_INFO STREQUAL "mkl") + ADD_DEFINITIONS(-DTH_BLAS_MKL) + IF(NOT BLAS_INCLUDE_DIR) + MESSAGE(FATAL_ERROR "MKL is used, but MKL header files are not found. \ + You can get them by `conda install mkl-include` if using conda (if \ + it is missing, run `conda upgrade -n root conda` first), and \ + `pip install mkl-devel` if using pip. If build fails with header files \ + available in the system, please make sure that CMake will search the \ + directory containing them, e.g., by setting CMAKE_INCLUDE_PATH.") + ENDIF() + IF (MSVC AND MKL_LIBRARIES MATCHES ".*libiomp5md\\.lib.*") + ADD_DEFINITIONS(-D_OPENMP_NOFORCE_MANIFEST) + SET(AT_MKL_MT 1) + ENDIF() + INCLUDE_DIRECTORIES(${BLAS_INCLUDE_DIR}) # include MKL headers + SET(AT_MKL_ENABLED 1) + ENDIF() + ENDIF() + + FIND_PACKAGE(LAPACK) + IF (LAPACK_FOUND) + SET(USE_LAPACK 1) + ENDIF() + + if (NOT USE_CUDA) + message("disabling CUDA because NOT USE_CUDA is set") + SET(AT_CUDA_ENABLED 0) + else() + SET(AT_CUDA_ENABLED 1) + find_package(CUDA 5.5 REQUIRED) + endif() + + IF (NOT AT_CUDA_ENABLED OR NOT CUDNN_FOUND) + MESSAGE(STATUS "CuDNN not found. Compiling without CuDNN support") + set(AT_CUDNN_ENABLED 0) + ELSE() + include_directories(${CUDNN_INCLUDE_DIRS}) + set(AT_CUDNN_ENABLED 1) + ENDIF() + + if (NO_MKLDNN) + message("disabling MKLDNN because NO_MKLDNN is set") + set(AT_MKLDNN_ENABLED 0) + else() + find_package(MKLDNN) + if(NOT MKLDNN_FOUND) + message(STATUS "MKLDNN not found. Compiling without MKLDNN support") + set(AT_MKLDNN_ENABLED 0) + else() + include_directories(${MKLDNN_INCLUDE_DIRS}) + set(AT_MKLDNN_ENABLED 1) + endif() + endif() + + IF(UNIX AND NOT APPLE) + INCLUDE(CheckLibraryExists) + # https://github.com/libgit2/libgit2/issues/2128#issuecomment-35649830 + CHECK_LIBRARY_EXISTS(rt clock_gettime "time.h" NEED_LIBRT) + IF(NEED_LIBRT) + list(APPEND Caffe2_DEPENDENCY_LIBS rt) + SET(CMAKE_REQUIRED_LIBRARIES ${CMAKE_REQUIRED_LIBRARIES} rt) + ENDIF(NEED_LIBRT) + ENDIF(UNIX AND NOT APPLE) + + IF(UNIX) + SET(CMAKE_EXTRA_INCLUDE_FILES "sys/mman.h") + CHECK_FUNCTION_EXISTS(mmap HAVE_MMAP) + IF(HAVE_MMAP) + ADD_DEFINITIONS(-DHAVE_MMAP=1) + ENDIF(HAVE_MMAP) + # done for lseek: https://www.gnu.org/software/libc/manual/html_node/File-Position-Primitive.html + ADD_DEFINITIONS(-D_FILE_OFFSET_BITS=64) + CHECK_FUNCTION_EXISTS(shm_open HAVE_SHM_OPEN) + IF(HAVE_SHM_OPEN) + ADD_DEFINITIONS(-DHAVE_SHM_OPEN=1) + ENDIF(HAVE_SHM_OPEN) + CHECK_FUNCTION_EXISTS(shm_unlink HAVE_SHM_UNLINK) + IF(HAVE_SHM_UNLINK) + ADD_DEFINITIONS(-DHAVE_SHM_UNLINK=1) + ENDIF(HAVE_SHM_UNLINK) + CHECK_FUNCTION_EXISTS(malloc_usable_size HAVE_MALLOC_USABLE_SIZE) + IF(HAVE_MALLOC_USABLE_SIZE) + ADD_DEFINITIONS(-DHAVE_MALLOC_USABLE_SIZE=1) + ENDIF(HAVE_MALLOC_USABLE_SIZE) + ENDIF(UNIX) + + # Is __thread supported? + IF(NOT MSVC) + CHECK_C_SOURCE_COMPILES("static __thread int x = 1; int main() { return x; }" C_HAS_THREAD) + ELSE(NOT MSVC) + CHECK_C_SOURCE_COMPILES("static __declspec( thread ) int x = 1; int main() { return x; }" C_HAS_THREAD) + ENDIF(NOT MSVC) + IF(NOT C_HAS_THREAD) + MESSAGE(STATUS "Warning: __thread is not supported, generating thread-unsafe code") + ELSE(NOT C_HAS_THREAD) + add_compile_options(-DTH_HAVE_THREAD) + ENDIF(NOT C_HAS_THREAD) endif() + +# +# End ATen checks +# diff --git a/cmake/External/nccl.cmake b/cmake/External/nccl.cmake index 5725c1cc95f4c..99b4e783a40a5 100644 --- a/cmake/External/nccl.cmake +++ b/cmake/External/nccl.cmake @@ -14,7 +14,7 @@ if (NOT __NCCL_INCLUDED) # we build nccl statically, but want to link it into the caffe shared library # this requires position-independent code if (UNIX) - set(NCCL_EXTRA_COMPILER_FLAGS "-fPIC") + set(NCCL_EXTRA_COMPILER_FLAGS "-Xcompiler -fPIC") endif() set(NCCL_CXX_FLAGS ${CMAKE_CXX_FLAGS} ${NCCL_EXTRA_COMPILER_FLAGS}) @@ -29,6 +29,7 @@ if (NOT __NCCL_INCLUDED) "CXX=${CMAKE_CXX_COMPILER}" "CUDA_HOME=${CUDA_TOOLKIT_ROOT_DIR}" "NVCC=${CUDA_NVCC_EXECUTABLE}" + "VERBOSE=0" BUILD_BYPRODUCTS "${nccl_PREFIX}/build/lib/libnccl_static.a" INSTALL_COMMAND "" ) diff --git a/cmake/MiscCheck.cmake b/cmake/MiscCheck.cmake index a32dc59cf75ef..023d4861bd96c 100644 --- a/cmake/MiscCheck.cmake +++ b/cmake/MiscCheck.cmake @@ -1,3 +1,10 @@ +if (UNIX) + # prevent Unknown CMake command "check_function_exists". + include(CheckFunctionExists) +endif() +include(CheckIncludeFile) +include(CheckCSourceCompiles) +include(CheckCSourceRuns) include(CheckCCompilerFlag) include(CheckCXXSourceCompiles) include(CheckCXXCompilerFlag) diff --git a/cmake/Modules/FindMKL.cmake b/cmake/Modules/FindMKL.cmake index f2bea702ed5b2..8aa6cd1681784 100644 --- a/cmake/Modules/FindMKL.cmake +++ b/cmake/Modules/FindMKL.cmake @@ -18,207 +18,6 @@ # MKL_SOLVER_LIBRARIES - list of libraries to add for the solvers # MKL_CDFT_LIBRARIES - list of libraries to add for the solvers -# ---[ Special code for IDEEP or MKLML flows -if(USE_MKL_IDEEP_OR_MKLML) - if(USE_IDEEP) - set(IDEEP_ROOT "${PROJECT_SOURCE_DIR}/third_party/ideep") - set(MKLDNN_ROOT "${IDEEP_ROOT}/mkl-dnn") - set(__ideep_looked_for IDEEP_ROOT) - - find_path(IDEEP_INCLUDE_DIR ideep.hpp PATHS ${IDEEP_ROOT} PATH_SUFFIXES include) - find_path(MKLDNN_INCLUDE_DIR mkldnn.hpp mkldnn.h PATHS ${MKLDNN_ROOT} PATH_SUFFIXES include) - if (NOT MKLDNN_INCLUDE_DIR) - execute_process(COMMAND git submodule update --init mkl-dnn WORKING_DIRECTORY ${IDEEP_ROOT}) - find_path(MKLDNN_INCLUDE_DIR mkldnn.hpp mkldnn.h PATHS ${MKLDNN_ROOT} PATH_SUFFIXES include) - endif() - - if (MKLDNN_INCLUDE_DIR) - # to avoid adding conflicting submodels - set(ORIG_WITH_TEST ${WITH_TEST}) - set(WITH_TEST OFF) - add_subdirectory(${IDEEP_ROOT}) - set(WITH_TEST ${ORIG_WITH_TEST}) - - file(GLOB_RECURSE MKLML_INNER_INCLUDE_DIR ${MKLDNN_ROOT}/external/*/mkl_vsl.h) - if(MKLML_INNER_INCLUDE_DIR) - # if user has multiple version under external/ then guess last - # one alphabetically is "latest" and warn - list(LENGTH MKLML_INNER_INCLUDE_DIR MKLINCLEN) - if(MKLINCLEN GREATER 1) - list(SORT MKLML_INNER_INCLUDE_DIR) - list(REVERSE MKLML_INNER_INCLUDE_DIR) - list(GET MKLML_INNER_INCLUDE_DIR 0 MKLINCLST) - set(MKLML_INNER_INCLUDE_DIR "${MKLINCLST}") - endif() - get_filename_component(MKLML_INNER_INCLUDE_DIR ${MKLML_INNER_INCLUDE_DIR} DIRECTORY) - list(APPEND IDEEP_INCLUDE_DIR ${MKLDNN_INCLUDE_DIR} ${MKLML_INNER_INCLUDE_DIR}) - list(APPEND __ideep_looked_for IDEEP_INCLUDE_DIR) - - if(APPLE) - set(__mklml_inner_libs mklml iomp5) - else() - set(__mklml_inner_libs mklml_intel iomp5) - endif() - - set(IDEEP_LIBRARIES "") - foreach (__mklml_inner_lib ${__mklml_inner_libs}) - string(TOUPPER ${__mklml_inner_lib} __mklml_inner_lib_upper) - find_library(${__mklml_inner_lib_upper}_LIBRARY - NAMES ${__mklml_inner_lib} - PATHS "${MKLML_INNER_INCLUDE_DIR}/../lib" - DOC "The path to Intel(R) MKLML ${__mklml_inner_lib} library") - mark_as_advanced(${__mklml_inner_lib_upper}_LIBRARY) - list(APPEND IDEEP_LIBRARIES ${${__mklml_inner_lib_upper}_LIBRARY}) - list(APPEND __ideep_looked_for ${__mklml_inner_lib_upper}_LIBRARY) - endforeach() - - include(FindPackageHandleStandardArgs) - find_package_handle_standard_args(IDEEP DEFAULT_MSG ${__ideep_looked_for}) - - if(IDEEP_FOUND) - set(MKLDNN_LIB "${CMAKE_SHARED_LIBRARY_PREFIX}mkldnn${CMAKE_SHARED_LIBRARY_SUFFIX}") - list(APPEND IDEEP_LIBRARIES "${PROJECT_BINARY_DIR}/lib/${MKLDNN_LIB}") - set(CAFFE2_USE_IDEEP 1) - message(STATUS "Found IDEEP (include: ${IDEEP_INCLUDE_DIR}, lib: ${IDEEP_LIBRARIES})") - endif() - - caffe_clear_vars(__ideep_looked_for __mklml_inner_libs) - endif() - endif() - - if(NOT IDEEP_FOUND) - message(FATAL_ERROR "Did not find IDEEP files!") - endif() - endif() - - if(USE_MKLML) - - # ---[ Options - option(MKLML_USE_SINGLE_DYNAMIC_LIBRARY "Use single dynamic library interface" ON) - cmake_dependent_option( - MKLML_USE_STATIC_LIBS "Use static libraries" OFF - "NOT MKLML_USE_SINGLE_DYNAMIC_LIBRARY" OFF) - cmake_dependent_option( - MKLML_MULTI_THREADED "Use multi-threading" ON - "NOT MKLML_USE_SINGLE_DYNAMIC_LIBRARY" OFF) - - # ---[ Root folders - if(MSVC) - set(INTEL_ROOT_DEFAULT "C:/Program Files (x86)/IntelSWTools/compilers_and_libraries/windows") - else() - set(INTEL_ROOT_DEFAULT "/opt/intel") - endif() - set(INTEL_ROOT ${INTEL_ROOT_DEFAULT} CACHE PATH "Folder contains intel libs") - find_path(MKLML_ROOT include/mkl.h PATHS $ENV{MKLMLROOT} ${INTEL_ROOT}/mkl - DOC "Folder contains MKLML") - - # ---[ Find include dir - find_path(MKLML_INCLUDE_DIR mkl.h PATHS ${MKLML_ROOT} PATH_SUFFIXES include) - set(__looked_for MKLML_INCLUDE_DIR) - - # ---[ Find libraries - if(CMAKE_SIZEOF_VOID_P EQUAL 4) - set(__path_suffixes lib lib/ia32) - else() - set(__path_suffixes lib lib/intel64) - endif() - - set(__mklml_libs "") - if(MKLML_USE_SINGLE_DYNAMIC_LIBRARY) - list(APPEND __mklml_libs rt) - else() - if(CMAKE_SIZEOF_VOID_P EQUAL 4) - if(WIN32) - list(APPEND __mklml_libs intel_c) - else() - list(APPEND __mklml_libs intel gf) - endif() - else() - list(APPEND __mklml_libs intel_lp64 gf_lp64) - endif() - - if(MKLML_MULTI_THREADED) - list(APPEND __mklml_libs intel_thread) - else() - list(APPEND __mklml_libs sequential) - endif() - - list(APPEND __mklml_libs core) - endif() - - foreach (__lib ${__mklml_libs}) - set(__mklml_lib "mkl_${__lib}") - string(TOUPPER ${__mklml_lib} __mklml_lib_upper) - - if(MKLML_USE_STATIC_LIBS) - set(__mklml_lib "lib${__mklml_lib}.a") - endif() - - find_library(${__mklml_lib_upper}_LIBRARY - NAMES ${__mklml_lib} - PATHS ${MKLML_ROOT} "${MKLML_INCLUDE_DIR}/.." - PATH_SUFFIXES ${__path_suffixes} - DOC "The path to Intel(R) MKLML ${__mklml_lib} library") - mark_as_advanced(${__mklml_lib_upper}_LIBRARY) - - list(APPEND __looked_for ${__mklml_lib_upper}_LIBRARY) - list(APPEND MKLML_LIBRARIES ${${__mklml_lib_upper}_LIBRARY}) - endforeach() - - if(NOT MKLML_USE_SINGLE_DYNAMIC_LIBRARY) - if (MKLML_USE_STATIC_LIBS) - set(__iomp5_libs iomp5 libiomp5mt.lib) - else() - set(__iomp5_libs iomp5 libiomp5md.lib) - endif() - - if(WIN32) - find_path(INTEL_INCLUDE_DIR omp.h PATHS ${INTEL_ROOT} PATH_SUFFIXES include) - list(APPEND __looked_for INTEL_INCLUDE_DIR) - endif() - - find_library(MKLML_RTL_LIBRARY ${__iomp5_libs} - PATHS ${INTEL_RTL_ROOT} ${INTEL_ROOT}/compiler ${MKLML_ROOT}/.. ${MKLML_ROOT}/../compiler - PATH_SUFFIXES ${__path_suffixes} - DOC "Path to OpenMP runtime library") - - list(APPEND __looked_for MKLML_RTL_LIBRARY) - list(APPEND MKLML_LIBRARIES ${MKLML_RTL_LIBRARY}) - endif() - - include(FindPackageHandleStandardArgs) - find_package_handle_standard_args(MKLML DEFAULT_MSG ${__looked_for}) - - if(MKLML_FOUND) - set(CAFFE2_USE_MKL 1) - message(STATUS "Found MKLML (include: ${MKLML_INCLUDE_DIR}, lib: ${MKLML_LIBRARIES})") - endif() - - caffe_clear_vars(__looked_for __mklml_libs __path_suffixes __iomp5_libs) - - endif() - - if(IDEEP_FOUND OR MKLML_FOUND) - set(USE_MKL ON) - set(MKL_FOUND True) - if (IDEEP_FOUND) - list(APPEND MKL_INCLUDE_DIR ${IDEEP_INCLUDE_DIR}) - list(APPEND MKL_LIBRARIES ${IDEEP_LIBRARIES}) - endif() - if (MKLML_FOUND) - list(APPEND MKL_INCLUDE_DIR ${MKLML_INCLUDE_DIR}) - list(APPEND MKL_LIBRARIES ${MKLML_LIBRARIES}) - endif() - else() - set(USE_MKL OFF) - set(USE_IDEEP OFF) - set(USE_MKLML OFF) - endif() - - # Return early for this special flow - return() -endif() - # Do nothing if MKL_FOUND was set before! IF (NOT MKL_FOUND) @@ -235,9 +34,9 @@ INCLUDE(CheckTypeSize) INCLUDE(CheckFunctionExists) # Intel Compiler Suite -SET(INTEL_COMPILER_DIR CACHE STRING +SET(INTEL_COMPILER_DIR "/opt/intel" CACHE STRING "Root directory of the Intel Compiler Suite (contains ipp, mkl, etc.)") -SET(INTEL_MKL_DIR CACHE STRING +SET(INTEL_MKL_DIR "/opt/intel/mkl" CACHE STRING "Root directory of the Intel MKL (standalone)") SET(INTEL_MKL_SEQUENTIAL OFF CACHE BOOL "Force using the sequential (non threaded) libraries") @@ -274,7 +73,7 @@ SET(mklseq) # Paths SET(saved_CMAKE_LIBRARY_PATH ${CMAKE_LIBRARY_PATH}) SET(saved_CMAKE_INCLUDE_PATH ${CMAKE_INCLUDE_PATH}) -IF (INTEL_COMPILER_DIR) +IF (EXISTS ${INTEL_COMPILER_DIR}) # TODO: diagnostic if dir does not exist SET(CMAKE_LIBRARY_PATH ${CMAKE_LIBRARY_PATH} "${INTEL_COMPILER_DIR}/lib/${iccvers}") @@ -282,7 +81,7 @@ IF (INTEL_COMPILER_DIR) SET(INTEL_MKL_DIR "${INTEL_COMPILER_DIR}/mkl") ENDIF (NOT INTEL_MKL_DIR) ENDIF (INTEL_COMPILER_DIR) -IF (INTEL_MKL_DIR) +IF (EXISTS ${INTEL_MKL_DIR}) # TODO: diagnostic if dir does not exist SET(CMAKE_INCLUDE_PATH ${CMAKE_INCLUDE_PATH} "${INTEL_MKL_DIR}/include") @@ -486,5 +285,79 @@ IF(NOT MKL_FIND_QUIETLY) ENDIF(MKL_FOUND) ENDIF(NOT MKL_FIND_QUIETLY) +# MKLML is included in the MKL package +if (USE_MKL AND USE_MKLML) + set(CAFFE2_USE_MKL 1) +endif() + +if (USE_MKL AND USE_IDEEP) + set(IDEEP_ROOT "${PROJECT_SOURCE_DIR}/third_party/ideep") + set(MKLDNN_ROOT "${IDEEP_ROOT}/mkl-dnn") + find_path(IDEEP_INCLUDE_DIR ideep.hpp PATHS ${IDEEP_ROOT} PATH_SUFFIXES include) + find_path(MKLDNN_INCLUDE_DIR mkldnn.hpp mkldnn.h PATHS ${MKLDNN_ROOT} PATH_SUFFIXES include) + if (NOT MKLDNN_INCLUDE_DIR) + execute_process(COMMAND git submodule update --init mkl-dnn WORKING_DIRECTORY ${IDEEP_ROOT}) + find_path(MKLDNN_INCLUDE_DIR mkldnn.hpp mkldnn.h PATHS ${MKLDNN_ROOT} PATH_SUFFIXES include) + endif() + + if (MKLDNN_INCLUDE_DIR) + # to avoid adding conflicting submodels + set(ORIG_WITH_TEST ${WITH_TEST}) + set(WITH_TEST OFF) + add_subdirectory(${IDEEP_ROOT}) + set(WITH_TEST ${ORIG_WITH_TEST}) + + file(GLOB_RECURSE MKLML_INNER_INCLUDE_DIR ${MKLDNN_ROOT}/external/*/mkl_vsl.h) + if(MKLML_INNER_INCLUDE_DIR) + # if user has multiple version under external/ then guess last + # one alphabetically is "latest" and warn + list(LENGTH MKLML_INNER_INCLUDE_DIR MKLINCLEN) + if(MKLINCLEN GREATER 1) + list(SORT MKLML_INNER_INCLUDE_DIR) + list(REVERSE MKLML_INNER_INCLUDE_DIR) + list(GET MKLML_INNER_INCLUDE_DIR 0 MKLINCLST) + set(MKLML_INNER_INCLUDE_DIR "${MKLINCLST}") + endif() + get_filename_component(MKLML_INNER_INCLUDE_DIR ${MKLML_INNER_INCLUDE_DIR} DIRECTORY) + list(APPEND IDEEP_INCLUDE_DIR ${MKLDNN_INCLUDE_DIR} ${MKLML_INNER_INCLUDE_DIR}) + list(APPEND __ideep_looked_for IDEEP_INCLUDE_DIR) + + if(APPLE) + set(__mklml_inner_libs mklml iomp5) + else() + set(__mklml_inner_libs mklml_intel iomp5) + endif() + + set(IDEEP_LIBRARIES "") + foreach (__mklml_inner_lib ${__mklml_inner_libs}) + string(TOUPPER ${__mklml_inner_lib} __mklml_inner_lib_upper) + find_library(${__mklml_inner_lib_upper}_LIBRARY + NAMES ${__mklml_inner_lib} + PATHS "${MKLML_INNER_INCLUDE_DIR}/../lib" + DOC "The path to Intel(R) MKLML ${__mklml_inner_lib} library") + mark_as_advanced(${__mklml_inner_lib_upper}_LIBRARY) + list(APPEND IDEEP_LIBRARIES ${${__mklml_inner_lib_upper}_LIBRARY}) + list(APPEND __ideep_looked_for ${__mklml_inner_lib_upper}_LIBRARY) + endforeach() + + include(FindPackageHandleStandardArgs) + find_package_handle_standard_args(IDEEP DEFAULT_MSG ${__ideep_looked_for}) + + if(IDEEP_FOUND) + set(MKLDNN_LIB "${CMAKE_SHARED_LIBRARY_PREFIX}mkldnn${CMAKE_SHARED_LIBRARY_SUFFIX}") + list(APPEND IDEEP_LIBRARIES "${PROJECT_BINARY_DIR}/lib/${MKLDNN_LIB}") + message(STATUS "Found IDEEP (include: ${IDEEP_INCLUDE_DIR}, lib: ${IDEEP_LIBRARIES})") + set(CAFFE2_USE_IDEEP 1) + list(APPEND MKL_INCLUDE_DIR ${IDEEP_INCLUDE_DIR}) + list(APPEND MKL_LIBRARIES ${IDEEP_LIBRARIES}) + else() + message(FATAL_ERROR "Did not find IDEEP files!") + endif() + + caffe_clear_vars(__ideep_looked_for __mklml_inner_libs) + endif() # MKLML_INNER_INCLUDE_DIR + endif() # MKLDNN_INCLUDE_DIR +endif() # USE_IDEEP + # Do nothing if MKL_FOUND was set before! ENDIF (NOT MKL_FOUND) diff --git a/cmake/Modules/FindNCCL.cmake b/cmake/Modules/FindNCCL.cmake index 1a9d214e794ca..705fa17633283 100644 --- a/cmake/Modules/FindNCCL.cmake +++ b/cmake/Modules/FindNCCL.cmake @@ -14,7 +14,7 @@ # install NCCL in the same location as the CUDA toolkit. # See https://github.com/caffe2/caffe2/issues/1601 -set(NCCL_ROOT_DIR "" CACHE PATH "Folder contains NVIDIA NCCL") +set(NCCL_ROOT_DIR $ENV{NCCL_ROOT_DIR} CACHE PATH "Folder contains NVIDIA NCCL") find_path(NCCL_INCLUDE_DIRS NAMES nccl.h diff --git a/cmake/ProtoBuf.cmake b/cmake/ProtoBuf.cmake index f6c3c3972d9db..a60804a8f9d7c 100644 --- a/cmake/ProtoBuf.cmake +++ b/cmake/ProtoBuf.cmake @@ -21,9 +21,6 @@ macro(custom_protobuf_find) endif() # We will make sure that protobuf and caffe2 uses the same msvc runtime. set(protobuf_MSVC_STATIC_RUNTIME ${CAFFE2_USE_MSVC_STATIC_RUNTIME}) - if (MSVC AND BUILD_SHARED_LIBS) - add_definitions(-DPROTOBUF_USE_DLLS) - endif() if (${CAFFE2_LINK_LOCAL_PROTOBUF}) set(__caffe2_CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ${CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS}) @@ -41,7 +38,7 @@ macro(custom_protobuf_find) set(__caffe2_CMAKE_POSITION_INDEPENDENT_CODE ${CMAKE_POSITION_INDEPENDENT_CODE}) set(CMAKE_POSITION_INDEPENDENT_CODE ON) - add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/protobuf/cmake) + add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/../third_party/protobuf/cmake) set(CMAKE_POSITION_INDEPENDENT_CODE ${__caffe2_CMAKE_POSITION_INDEPENDENT_CODE}) diff --git a/cmake/Summary.cmake b/cmake/Summary.cmake index 187ceef5115b8..940b5da609328 100644 --- a/cmake/Summary.cmake +++ b/cmake/Summary.cmake @@ -44,7 +44,9 @@ function (caffe2_print_configuration_summary) message(STATUS " CUDA static link : ${CAFFE2_STATIC_LINK_CUDA}") message(STATUS " USE_CUDNN : ${USE_CUDNN}") message(STATUS " CUDA version : ${CUDA_VERSION}") - message(STATUS " cuDNN version : ${CUDNN_VERSION}") + if(${USE_CUDNN}) + message(STATUS " cuDNN version : ${CUDNN_VERSION}") + endif() message(STATUS " CUDA root directory : ${CUDA_TOOLKIT_ROOT_DIR}") get_target_property(__tmp caffe2::cuda IMPORTED_LOCATION) message(STATUS " CUDA library : ${__tmp}") @@ -52,10 +54,14 @@ function (caffe2_print_configuration_summary) message(STATUS " cudart library : ${__tmp}") get_target_property(__tmp caffe2::cublas INTERFACE_LINK_LIBRARIES) message(STATUS " cublas library : ${__tmp}") + get_target_property(__tmp caffe2::cufft INTERFACE_LINK_LIBRARIES) + message(STATUS " cufft library : ${__tmp}") get_target_property(__tmp caffe2::curand IMPORTED_LOCATION) message(STATUS " curand library : ${__tmp}") - get_target_property(__tmp caffe2::cudnn IMPORTED_LOCATION) - message(STATUS " CuDNN library : ${__tmp}") + if(${USE_CUDNN}) + get_target_property(__tmp caffe2::cudnn IMPORTED_LOCATION) + message(STATUS " cuDNN library : ${__tmp}") + endif() get_target_property(__tmp caffe2::nvrtc IMPORTED_LOCATION) message(STATUS " nvrtc : ${__tmp}") message(STATUS " CUDA include path : ${CUDA_INCLUDE_DIRS}") @@ -67,6 +73,7 @@ function (caffe2_print_configuration_summary) message(STATUS " TensorRT include path : ${TENSORRT_INCLUDE_DIR}") endif() endif() + message(STATUS " USE_ROCM : ${USE_ROCM}") message(STATUS " USE_EIGEN_FOR_BLAS : ${CAFFE2_USE_EIGEN_FOR_BLAS}") message(STATUS " USE_FFMPEG : ${USE_FFMPEG}") message(STATUS " USE_GFLAGS : ${USE_GFLAGS}") diff --git a/cmake/public/cuda.cmake b/cmake/public/cuda.cmake index 78cc4411b9d34..611229cc29e11 100644 --- a/cmake/public/cuda.cmake +++ b/cmake/public/cuda.cmake @@ -1,18 +1,20 @@ # ---[ cuda set(CAFFE2_FOUND_CUDA FALSE) +set(CAFFE2_FOUND_CUDNN FALSE) -# Find Cuda. +# Find CUDA. find_package(CUDA 7.0) if(NOT CUDA_FOUND) message(WARNING - "Caffe2: Cuda cannot be found. Depending on whether you are building " - "Caffe2 or a Caffe2 dependent library, the next warning / error will " - "give you more info.") + "Caffe2: CUDA cannot be found. Depending on whether you are building " + "Caffe2 or a Caffe2 dependent library, the next warning / error will " + "give you more info.") return() endif() +set(CAFFE2_FOUND_CUDA TRUE) -# Find cudnn. +# Find cuDNN. if(CAFFE2_STATIC_LINK_CUDA) SET(CUDNN_LIBNAME "libcudnn_static.a") else() @@ -28,12 +30,12 @@ find_library(CUDNN_LIBRARY ${CUDNN_LIBNAME} PATH_SUFFIXES lib lib64 cuda/lib cuda/lib64 lib/x64) find_package_handle_standard_args( CUDNN DEFAULT_MSG CUDNN_INCLUDE_DIR CUDNN_LIBRARY) - if(NOT CUDNN_FOUND) message(WARNING - "Caffe2: cudnn cannot be found. Caffe2 CUDA depends explicitly " - "on cudnn so you should consider installing it.") - return() + "Caffe2: Cannot find cuDNN library. Turning the option off") + set(USE_CUDNN OFF) +else() + set(CAFFE2_FOUND_CUDNN TRUE) endif() # Optionally, find TensorRT @@ -47,39 +49,40 @@ if (${USE_TENSORRT}) find_package_handle_standard_args( TENSORRT DEFAULT_MSG TENSORRT_INCLUDE_DIR TENSORRT_LIBRARY) if(NOT TENSORRT_FOUND) - message(WARNING + message(WARNING "Caffe2: Cannot find TensorRT library. Turning the option off") set(USE_TENSORRT OFF) endif() endif() -# After both cuda and cudnn are found, we can safely proceed. -set(CAFFE2_FOUND_CUDA TRUE) +# ---[ Exract versions message(STATUS "Caffe2: CUDA detected: " ${CUDA_VERSION}) -# get cuDNN version -file(READ ${CUDNN_INCLUDE_DIR}/cudnn.h CUDNN_HEADER_CONTENTS) -string(REGEX MATCH "define CUDNN_MAJOR * +([0-9]+)" - CUDNN_VERSION_MAJOR "${CUDNN_HEADER_CONTENTS}") -string(REGEX REPLACE "define CUDNN_MAJOR * +([0-9]+)" "\\1" - CUDNN_VERSION_MAJOR "${CUDNN_VERSION_MAJOR}") -string(REGEX MATCH "define CUDNN_MINOR * +([0-9]+)" - CUDNN_VERSION_MINOR "${CUDNN_HEADER_CONTENTS}") -string(REGEX REPLACE "define CUDNN_MINOR * +([0-9]+)" "\\1" - CUDNN_VERSION_MINOR "${CUDNN_VERSION_MINOR}") -string(REGEX MATCH "define CUDNN_PATCHLEVEL * +([0-9]+)" - CUDNN_VERSION_PATCH "${CUDNN_HEADER_CONTENTS}") -string(REGEX REPLACE "define CUDNN_PATCHLEVEL * +([0-9]+)" "\\1" - CUDNN_VERSION_PATCH "${CUDNN_VERSION_PATCH}") -# Assemble cuDNN version -if(NOT CUDNN_VERSION_MAJOR) - set(CUDNN_VERSION "?") -else() - set(CUDNN_VERSION - "${CUDNN_VERSION_MAJOR}.${CUDNN_VERSION_MINOR}.${CUDNN_VERSION_PATCH}") +if (CAFFE2_FOUND_CUDNN) + # Get cuDNN version + file(READ ${CUDNN_INCLUDE_DIR}/cudnn.h CUDNN_HEADER_CONTENTS) + string(REGEX MATCH "define CUDNN_MAJOR * +([0-9]+)" + CUDNN_VERSION_MAJOR "${CUDNN_HEADER_CONTENTS}") + string(REGEX REPLACE "define CUDNN_MAJOR * +([0-9]+)" "\\1" + CUDNN_VERSION_MAJOR "${CUDNN_VERSION_MAJOR}") + string(REGEX MATCH "define CUDNN_MINOR * +([0-9]+)" + CUDNN_VERSION_MINOR "${CUDNN_HEADER_CONTENTS}") + string(REGEX REPLACE "define CUDNN_MINOR * +([0-9]+)" "\\1" + CUDNN_VERSION_MINOR "${CUDNN_VERSION_MINOR}") + string(REGEX MATCH "define CUDNN_PATCHLEVEL * +([0-9]+)" + CUDNN_VERSION_PATCH "${CUDNN_HEADER_CONTENTS}") + string(REGEX REPLACE "define CUDNN_PATCHLEVEL * +([0-9]+)" "\\1" + CUDNN_VERSION_PATCH "${CUDNN_VERSION_PATCH}") + # Assemble cuDNN version + if(NOT CUDNN_VERSION_MAJOR) + set(CUDNN_VERSION "?") + else() + set(CUDNN_VERSION + "${CUDNN_VERSION_MAJOR}.${CUDNN_VERSION_MINOR}.${CUDNN_VERSION_PATCH}") + endif() + message(STATUS "Found cuDNN: v${CUDNN_VERSION} (include: ${CUDNN_INCLUDE_DIR}, library: ${CUDNN_LIBRARY})") endif() -message(STATUS "Found cuDNN: v${CUDNN_VERSION} (include: ${CUDNN_INCLUDE_DIR}, library: ${CUDNN_LIBRARY})") -# ---[ Cuda Libraries wrapper +# ---[ CUDA libraries wrapper # find libcuda.so and lbnvrtc.so # For libcuda.so, we will find it under lib, lib64, and then the @@ -126,13 +129,15 @@ set_property( # cudnn # static linking is handled by USE_STATIC_CUDNN environment variable -add_library(caffe2::cudnn UNKNOWN IMPORTED) -set_property( - TARGET caffe2::cudnn PROPERTY IMPORTED_LOCATION - ${CUDNN_LIBRARY}) -set_property( - TARGET caffe2::cudnn PROPERTY INTERFACE_INCLUDE_DIRECTORIES - ${CUDNN_INCLUDE_DIR}) +if(${USE_CUDNN}) + add_library(caffe2::cudnn UNKNOWN IMPORTED) + set_property( + TARGET caffe2::cudnn PROPERTY IMPORTED_LOCATION + ${CUDNN_LIBRARY}) + set_property( + TARGET caffe2::cudnn PROPERTY INTERFACE_INCLUDE_DIRECTORIES + ${CUDNN_INCLUDE_DIR}) +endif() # curand add_library(caffe2::curand UNKNOWN IMPORTED) @@ -149,6 +154,22 @@ set_property( TARGET caffe2::curand PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${CUDA_INCLUDE_DIRS}) +# cufft. CUDA_CUFFT_LIBRARIES is actually a list, so we will make an +# interface library similar to cudart. +add_library(caffe2::cufft INTERFACE IMPORTED) +if(CAFFE2_STATIC_LINK_CUDA) + set_property( + TARGET caffe2::cufft PROPERTY INTERFACE_LINK_LIBRARIES + "${CUDA_TOOLKIT_ROOT_DIR}/lib64/libcufft_static.a") +else() + set_property( + TARGET caffe2::cufft PROPERTY INTERFACE_LINK_LIBRARIES + ${CUDA_CUFFT_LIBRARIES}) +endif() +set_property( + TARGET caffe2::cufft PROPERTY INTERFACE_INCLUDE_DIRECTORIES + ${CUDA_INCLUDE_DIRS}) + # TensorRT if(${USE_TENSORRT}) add_library(caffe2::tensorrt UNKNOWN IMPORTED) @@ -192,7 +213,7 @@ set_property( # ---[ Cuda flags # Known NVIDIA GPU achitectures Caffe2 can be compiled for. -# Default is set to cuda 9. If we detect the cuda architectores to be less than +# Default is set to cuda 9. If we detect the cuda architectures to be less than # 9, we will lower it to the corresponding known archs. set(Caffe2_known_gpu_archs "30 35 50 52 60 61 70") # for CUDA 9.x set(Caffe2_known_gpu_archs8 "30 35 50 52 60 61") # for CUDA 8.x @@ -251,9 +272,9 @@ function(caffe2_select_nvcc_arch_flags out_variable) # List of arch names set(__archs_names "Kepler" "Maxwell" "Pascal" "Volta" "All" "Manual") set(__archs_name_default "All") - if(NOT CMAKE_CROSSCOMPILING) - list(APPEND __archs_names "Auto") - set(__archs_name_default "Auto") + if(NOT CMAKE_CROSSCOMPILING) + list(APPEND __archs_names "Auto") + set(__archs_name_default "Auto") endif() # Set CUDA_ARCH_NAME strings (so it will be seen as dropbox in the CMake GUI) @@ -278,7 +299,11 @@ function(caffe2_select_nvcc_arch_flags out_variable) unset(CUDA_ARCH_PTX CACHE) endif() - if(${CUDA_ARCH_NAME} STREQUAL "Kepler") + if($ENV{TORCH_CUDA_ARCH_LIST}) + # Pass CUDA architecture directly + set(__cuda_arch_bin $ENV{TORCH_CUDA_ARCH_LIST}) + message(STATUS "Set CUDA arch from TORCH_CUDA_ARCH_LIST: ${__cuda_arch_bin}") + elseif(${CUDA_ARCH_NAME} STREQUAL "Kepler") set(__cuda_arch_bin "30 35") elseif(${CUDA_ARCH_NAME} STREQUAL "Maxwell") set(__cuda_arch_bin "50") diff --git a/cmake/public/utils.cmake b/cmake/public/utils.cmake index 03e942437c5ce..3137a5e93818f 100644 --- a/cmake/public/utils.cmake +++ b/cmake/public/utils.cmake @@ -27,9 +27,8 @@ macro(caffe2_interface_library SRC DST) ${DST} INTERFACE -WHOLEARCHIVE:$) else() # Assume everything else is like gcc - target_link_libraries( - ${DST} INTERFACE - "-Wl,--whole-archive $ -Wl,--no-whole-archive") + target_link_libraries(${DST} INTERFACE + "-Wl,--whole-archive,$ -Wl,--no-whole-archive") endif() # Link all interface link libraries of the src target as well. # For static library, we need to explicitly depend on all the libraries @@ -52,8 +51,8 @@ macro(caffe2_interface_library SRC DST) $) elseif(${__src_target_type} STREQUAL "SHARED_LIBRARY") if("${CMAKE_CXX_COMPILER_ID}" MATCHES "GNU") - target_link_libraries( - ${DST} INTERFACE -Wl,--no-as-needed ${SRC} -Wl,--as-needed) + target_link_libraries(${DST} INTERFACE + "-Wl,--no-as-needed,$ -Wl,--as-needed") else() target_link_libraries(${DST} INTERFACE ${SRC}) endif() @@ -106,32 +105,67 @@ function(caffe2_binary_target target_name_or_src) install(TARGETS ${__target} DESTINATION bin) endfunction() + ############################################################################## # Multiplex between loading executables for CUDA versus HIP (AMD Software Stack). # Usage: # torch_cuda_based_add_executable(cuda_target) # macro(torch_cuda_based_add_executable cuda_target) - IF (WITH_ROCM) + IF (USE_ROCM) hip_add_executable(${cuda_target} ${ARGN}) - ELSEIF(NOT NO_CUDA) + ELSEIF(USE_CUDA) cuda_add_executable(${cuda_target} ${ARGN}) ELSE() ENDIF() endmacro() + ############################################################################## # Multiplex between adding libraries for CUDA versus HIP (AMD Software Stack). # Usage: # torch_cuda_based_add_library(cuda_target) # macro(torch_cuda_based_add_library cuda_target) - IF (WITH_ROCM) + IF (USE_ROCM) hip_add_library(${cuda_target} ${ARGN}) - ELSEIF(NOT NO_CUDA) + ELSEIF(USE_CUDA) cuda_add_library(${cuda_target} ${ARGN}) ELSE() - ENDIF() endmacro() + + +############################################################################## +# Add ATen compile options. +# Usage: +# aten_compile_options(lib_name) +function(aten_compile_options libname) + target_compile_options(${libname} + PRIVATE + -Wall + -Wextra + -fexceptions + -Wno-missing-field-initializers + -Wno-type-limits + -Wno-unused-parameter + -Wno-unknown-warning-option + -Wno-unknown-pragmas) + if ($ENV{WERROR}) + target_compile_options(${libname} PRIVATE -Werror) + endif() +endfunction() + + +############################################################################## +# Set ATen target properties. +# Usage: +# aten_set_target_props(lib_name) +function(aten_set_target_props libname) + if(MSVC AND AT_MKL_MT) + set_target_properties(${libname} PROPERTIES LINK_FLAGS_RELEASE "/NODEFAULTLIB:${VCOMP_LIB}") + set_target_properties(${libname} PROPERTIES LINK_FLAGS_DEBUG "/NODEFAULTLIB:${VCOMP_LIB}") + set_target_properties(${libname} PROPERTIES STATIC_LIBRARY_FLAGS "/NODEFAULTLIB:${VCOMP_LIB}") + endif() +endfunction() diff --git a/setup.py b/setup.py index 7010f93876eeb..ed52426baf993 100644 --- a/setup.py +++ b/setup.py @@ -188,7 +188,7 @@ def patched_link(self, *args, **kwargs): ################################################################################ dep_libs = [ - 'nccl', 'ATen', + 'nccl', 'caffe2', 'libshm', 'libshm_windows', 'gloo', 'THD', 'nanopb', ] @@ -242,6 +242,7 @@ def build_libs(libs): build_libs_cmd += ['--with-distributed-mw'] if subprocess.call(build_libs_cmd + libs, env=my_env) != 0: + print("Failed to run '{}'".format(' '.join(build_libs_cmd + libs))) sys.exit(1) missing_pydep = ''' @@ -286,7 +287,7 @@ def check_file(f): libs = [] if WITH_NCCL and not WITH_SYSTEM_NCCL: libs += ['nccl'] - libs += ['ATen', 'nanopb'] + libs += ['caffe2', 'nanopb'] if IS_WINDOWS: libs += ['libshm_windows'] else: @@ -571,9 +572,9 @@ def run(self): library_dirs.append(lib_path) # we specify exact lib names to avoid conflict with lua-torch installs -ATEN_LIBS = [os.path.join(lib_path, 'libATen_cpu.so')] +CAFFE2_LIBS = [os.path.join(lib_path, 'libcaffe2.so')] if WITH_CUDA or WITH_ROCM: - ATEN_LIBS.extend(['-Wl,--no-as-needed', os.path.join(lib_path, 'libATen_cuda.so'), '-Wl,--as-needed']) + CAFFE2_LIBS.extend(['-Wl,--no-as-needed', os.path.join(lib_path, 'libcaffe2_gpu.so'), '-Wl,--as-needed']) THD_LIB = os.path.join(lib_path, 'libTHD.a') NCCL_LIB = os.path.join(lib_path, 'libnccl.so.1') @@ -581,15 +582,15 @@ def run(self): NANOPB_STATIC_LIB = os.path.join(lib_path, 'libprotobuf-nanopb.a') if IS_DARWIN: - ATEN_LIBS = [os.path.join(lib_path, 'libATen_cpu.dylib')] + CAFFE2_LIBS = [os.path.join(lib_path, 'libcaffe2.dylib')] if WITH_CUDA or WITH_ROCM: - ATEN_LIBS.append(os.path.join(lib_path, 'libATen_cuda.dylib')) + CAFFE2_LIBS.append(os.path.join(lib_path, 'libcaffe2_gpu.dylib')) NCCL_LIB = os.path.join(lib_path, 'libnccl.1.dylib') if IS_WINDOWS: - ATEN_LIBS = [os.path.join(lib_path, 'ATen_cpu.lib')] + CAFFE2_LIBS = [os.path.join(lib_path, 'caffe2.lib')] if WITH_CUDA or WITH_ROCM: - ATEN_LIBS.append(os.path.join(lib_path, 'ATen_cuda.lib')) + CAFFE2_LIBS.append(os.path.join(lib_path, 'caffe2_gpu.lib')) if DEBUG: NANOPB_STATIC_LIB = os.path.join(lib_path, 'protobuf-nanopbd.lib') else: @@ -597,7 +598,7 @@ def run(self): main_compile_args = ['-D_THP_CORE'] main_libraries = ['shm'] -main_link_args = ATEN_LIBS + [NANOPB_STATIC_LIB] +main_link_args = CAFFE2_LIBS + [NANOPB_STATIC_LIB] main_sources = [ "torch/csrc/PtrWrapper.cpp", "torch/csrc/Module.cpp", diff --git a/tools/build_pytorch_libs.bat b/tools/build_pytorch_libs.bat index a2b6485cef98e..07683866995f7 100755 --- a/tools/build_pytorch_libs.bat +++ b/tools/build_pytorch_libs.bat @@ -19,16 +19,20 @@ mkdir torch/lib/tmp_install IF "%~1"=="--with-cuda" ( set /a NO_CUDA=0 + set /a USE_CUDA=1 shift ) ELSE ( set /a NO_CUDA=1 + set /a USE_CUDA=0 ) IF "%~1"=="--with-nnpack" ( set /a NO_NNPACK=0 + set /a USE_NNPACK=1 shift ) ELSE ( set /a NO_NNPACK=1 + set /a USE_NNPACK=0 ) set BUILD_TYPE=Release @@ -60,10 +64,8 @@ IF "%CMAKE_GENERATOR%"=="" ( :read_loop if "%1"=="" goto after_loop -if "%1"=="ATen" ( - cd aten - call:build_aten %~1 - cd .. +if "%1"=="caffe2" ( + call:build_caffe2 %~1 ) ELSE ( set "IS_OURS=" IF "%1"=="THD" set IS_OURS=1 @@ -113,13 +115,13 @@ goto:eof -Dcwrap_files="%CWRAP_FILES%" ^ -DTH_INCLUDE_PATH="%INSTALL_DIR%/include" ^ -DTH_LIB_PATH="%INSTALL_DIR%/lib" ^ - -DTH_LIBRARIES="%INSTALL_DIR%/lib/ATen_cpu.lib" ^ - -DTHS_LIBRARIES="%INSTALL_DIR%/lib/ATen_cpu.lib" ^ - -DTHC_LIBRARIES="%INSTALL_DIR%/lib/ATen_cuda.lib" ^ - -DTHCS_LIBRARIES="%INSTALL_DIR%/lib/ATen_cuda.lib" ^ - -DATEN_LIBRARIES="%INSTALL_DIR%/lib/ATen_cpu.lib" ^ - -DTHNN_LIBRARIES="%INSTALL_DIR%/lib/ATen_cpu.lib" ^ - -DTHCUNN_LIBRARIES="%INSTALL_DIR%/lib/ATen_cuda.lib" ^ + -DTH_LIBRARIES="%INSTALL_DIR%/lib/caffe2.lib" ^ + -DTHS_LIBRARIES="%INSTALL_DIR%/lib/caffe2.lib" ^ + -DTHC_LIBRARIES="%INSTALL_DIR%/lib/caffe2_gpu.lib" ^ + -DTHCS_LIBRARIES="%INSTALL_DIR%/lib/caffe2_gpu.lib" ^ + -DCAFFE2_LIBRARIES="%INSTALL_DIR%/lib/caffe2.lib" ^ + -DTHNN_LIBRARIES="%INSTALL_DIR%/lib/caffe2.lib" ^ + -DTHCUNN_LIBRARIES="%INSTALL_DIR%/lib/caffe2_gpu.lib" ^ -DTH_SO_VERSION=1 ^ -DTHC_SO_VERSION=1 ^ -DTHNN_SO_VERSION=1 ^ @@ -136,15 +138,19 @@ goto:eof goto:eof -:build_aten +:build_caffe2 @setlocal IF NOT "%PREBUILD_COMMAND%"=="" call "%PREBUILD_COMMAND%" %PREBUILD_COMMAND_ARGS% mkdir build cd build cmake .. %CMAKE_GENERATOR_COMMAND% ^ -DCMAKE_INSTALL_PREFIX="%INSTALL_DIR%" ^ - -DNO_CUDA=%NO_CUDA% ^ - -DNO_NNPACK=%NO_NNPACK% ^ + -DBUILD_CAFFE2=OFF ^ + -DBUILD_ATEN=ON ^ + -DBUILD_PYTHON=OFF ^ + -DBUILD_BINARY=OFF ^ + -DUSE_CUDA=%USE_CUDA% ^ + -DUSE_NNPACK=%USE_NNPACK% ^ -DCUDNN_INCLUDE_DIR="%CUDNN_INCLUDE_DIR%" ^ -DCUDNN_LIB_DIR="%CUDNN_LIB_DIR%" ^ -DCUDNN_LIBRARY="%CUDNN_LIBRARY%" ^ diff --git a/tools/build_pytorch_libs.sh b/tools/build_pytorch_libs.sh index f557250d33ebc..6f71cb6ec43e6 100755 --- a/tools/build_pytorch_libs.sh +++ b/tools/build_pytorch_libs.sh @@ -1,51 +1,48 @@ #!/usr/bin/env bash -# Shell script used to build the aten/* and third_party/* dependencies prior to -# linking the libraries and passing the headers to the Python extension -# compilation stage. This file is used from setup.py, but can also be +# Shell script used to build the aten/*, caffe2/*, and third_party/* +* dependencies prior to linking libraries and passing headers to the Python +# extension compilation stage. This file is used from setup.py, but can also be # called standalone to compile the libraries outside of the overall PyTorch # build process. # -# TODO: Replace this with a CMakeLists.txt +# TODO: Replace this with the root-level CMakeLists.txt set -ex # Options for building only a subset of the libraries WITH_CUDA=0 -if [[ "$1" == "--with-cuda" ]]; then - WITH_CUDA=1 - shift -fi - WITH_ROCM=0 -if [[ "$1" == "--with-rocm" ]]; then - WITH_ROCM=1 - shift -fi - WITH_NNPACK=0 -if [[ "$1" == "--with-nnpack" ]]; then - WITH_NNPACK=1 - shift -fi - WITH_MKLDNN=0 -if [[ "$1" == "--with-mkldnn" ]]; then - WITH_MKLDNN=1 - shift -fi - WITH_GLOO_IBVERBS=0 -if [[ "$1" == "--with-gloo-ibverbs" ]]; then - WITH_GLOO_IBVERBS=1 - shift -fi - WITH_DISTRIBUTED_MW=0 -if [[ "$1" == "--with-distributed-mw" ]]; then - WITH_DISTRIBUTED_MW=1 - shift -fi +while [[ $# -gt 0 ]]; do + case "$1" in + --with-cuda) + WITH_CUDA=1 + ;; + --with-rocm) + WITH_ROCM=1 + ;; + --with-nnpack) + WITH_NNPACK=1 + ;; + --with-mkldnn) + WITH_MKLDNN=1 + ;; + --with-gloo-ibverbs) + WITH_GLOO_IBVERBS=1 + ;; + --with-distributed-mw) + WITH_DISTRIBUTED_MW=1 + ;; + *) + break + ;; + esac + shift +done CMAKE_INSTALL=${CMAKE_INSTALL-make install} @@ -157,7 +154,7 @@ function build() { -DTH_INCLUDE_PATH="$INSTALL_DIR/include" \ -DTH_LIB_PATH="$INSTALL_DIR/lib" \ -DTH_LIBRARIES="$INSTALL_DIR/lib/libTH$LD_POSTFIX" \ - -DATEN_LIBRARIES="$INSTALL_DIR/lib/libATen$LD_POSTFIX" \ + -DCAFFE2_LIBRARIES="$INSTALL_DIR/lib/libcaffe2$LD_POSTFIX" \ -DTHNN_LIBRARIES="$INSTALL_DIR/lib/libTHNN$LD_POSTFIX" \ -DTHCUNN_LIBRARIES="$INSTALL_DIR/lib/libTHCUNN$LD_POSTFIX" \ -DTHS_LIBRARIES="$INSTALL_DIR/lib/libTHS$LD_POSTFIX" \ @@ -213,40 +210,44 @@ function build_nccl() { popd } -# purpusefully not using build() because we need ATen to build the same -# regardless of whether it is inside pytorch or not, so it +# purposefully not using build() because we need Caffe2 to build the same +# regardless of whether it is inside PyTorch or not, so it # cannot take any special flags -# special flags need to be part of the ATen build itself +# special flags need to be part of the Caffe2 build itself # # However, we do explicitly pass library paths when setup.py has already # detected them (to ensure that we have a consistent view between the -# PyTorch and ATen builds.) -function build_aten() { +# PyTorch and Caffe2 builds.) +function build_caffe2() { mkdir -p build pushd build ${CMAKE_VERSION} .. \ ${CMAKE_GENERATOR} \ -DCMAKE_BUILD_TYPE=$BUILD_TYPE \ - -DNO_CUDA=$((1-$WITH_CUDA)) \ - -DNO_NNPACK=$((1-$WITH_NNPACK)) \ + -DBUILD_CAFFE2=OFF \ + -DBUILD_ATEN=ON \ + -DBUILD_PYTHON=OFF \ + -DBUILD_BINARY=OFF \ + -DBUILD_SHARED_LIBS=ON \ + -DUSE_CUDA=$WITH_CUDA \ + -DUSE_ROCM=$WITH_ROCM \ + -DUSE_NNPACK=$WITH_NNPACK \ -DCUDNN_INCLUDE_DIR=$CUDNN_INCLUDE_DIR \ -DCUDNN_LIB_DIR=$CUDNN_LIB_DIR \ -DCUDNN_LIBRARY=$CUDNN_LIBRARY \ - -DNO_MKLDNN=$((1-$WITH_MKLDNN)) \ + -DUSE_MKLDNN=$WITH_MKLDNN \ -DMKLDNN_INCLUDE_DIR=$MKLDNN_INCLUDE_DIR \ -DMKLDNN_LIB_DIR=$MKLDNN_LIB_DIR \ -DMKLDNN_LIBRARY=$MKLDNN_LIBRARY \ - -DATEN_NO_CONTRIB=1 \ -DCMAKE_INSTALL_PREFIX="$INSTALL_DIR" \ -DCMAKE_EXPORT_COMPILE_COMMANDS=1 \ -DCMAKE_C_FLAGS="$USER_CFLAGS" \ -DCMAKE_CXX_FLAGS="$USER_CFLAGS" \ -DCMAKE_EXE_LINKER_FLAGS="$USER_LDFLAGS" \ - -DCMAKE_SHARED_LINKER_FLAGS="$USER_LDFLAGS" \ - -DWITH_ROCM="$WITH_ROCM" + -DCMAKE_SHARED_LINKER_FLAGS="$USER_LDFLAGS" # STOP!!! Are you trying to add a C or CXX flag? Add it - # to aten/CMakeLists.txt, not here. We need the vanilla - # cmake build to work. + # to CMakeLists.txt and aten/CMakeLists.txt, not here. + # We need the vanilla cmake build to work. ${CMAKE_INSTALL} -j"$NUM_JOBS" popd } @@ -264,9 +265,9 @@ for arg in "$@"; do pushd "$THIRD_PARTY_DIR" build gloo $GLOO_FLAGS popd - elif [[ "$arg" == "ATen" ]]; then - pushd "$BASE_DIR/aten" - build_aten + elif [[ "$arg" == "caffe2" ]]; then + pushd $BASE_DIR + build_caffe2 popd elif [[ "$arg" == "THD" ]]; then pushd "$TORCH_LIB_DIR" @@ -289,15 +290,15 @@ pushd torch/lib # binaries to torch/lib rm -rf "$INSTALL_DIR/lib/cmake" rm -rf "$INSTALL_DIR/lib/python" -cp "$INSTALL_DIR/lib"/* . +cp -r "$INSTALL_DIR/lib"/* . if [ -d "$INSTALL_DIR/lib64/" ]; then - cp "$INSTALL_DIR/lib64"/* . + cp -r "$INSTALL_DIR/lib64"/* . fi cp ../../aten/src/THNN/generic/THNN.h . cp ../../aten/src/THCUNN/generic/THCUNN.h . cp -r "$INSTALL_DIR/include" . if [ -d "$INSTALL_DIR/bin/" ]; then - cp "$INSTALL_DIR/bin/"/* . + cp -r "$INSTALL_DIR/bin/"/* . fi popd diff --git a/tools/cpp_build/build_all.sh b/tools/cpp_build/build_all.sh index fed4b9a9ddced..b11d59de2514a 100755 --- a/tools/cpp_build/build_all.sh +++ b/tools/cpp_build/build_all.sh @@ -2,6 +2,6 @@ set -ex SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )" -source $SCRIPTPATH/build_aten.sh +source $SCRIPTPATH/build_caffe2.sh source $SCRIPTPATH/build_nanopb.sh source $SCRIPTPATH/build_libtorch.sh diff --git a/tools/cpp_build/build_aten.sh b/tools/cpp_build/build_caffe2.sh similarity index 50% rename from tools/cpp_build/build_aten.sh rename to tools/cpp_build/build_caffe2.sh index 95d57e6e5a1d0..6ea82fd65d1af 100755 --- a/tools/cpp_build/build_aten.sh +++ b/tools/cpp_build/build_caffe2.sh @@ -7,19 +7,23 @@ SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )" pushd $SCRIPTPATH source ./build_common.sh -echo "Building ATen" +echo "Building Caffe2" -mkdir -p $ATEN_BUILDPATH -pushd $ATEN_BUILDPATH +mkdir -p $CAFFE2_BUILDPATH +pushd $CAFFE2_BUILDPATH -cmake -DNO_CUDA:BOOL=$NO_CUDA \ - -DAT_LINK_STYLE:STRING=SHARED \ +cmake -DUSE_CUDA=$((1-$NO_CUDA)) \ + -DBUILD_CAFFE2=OFF \ + -DBUILD_ATEN=ON \ + -DBUILD_PYTHON=OFF \ + -DBUILD_BINARY=OFF \ + -DBUILD_SHARED_LIBS=ON \ -DCMAKE_BUILD_TYPE:STRING=$BUILD_TYPE \ -DCMAKE_INSTALL_PREFIX:STRING=$INSTALL_PREFIX \ -DCMAKE_INSTALL_MESSAGE=NEVER \ -G "$GENERATE" \ - $PYTORCHPATH/aten -$MAKE -j "$JOBS" + $PYTORCHPATH/ +$MAKE -j "$JOBS" install popd popd diff --git a/tools/cpp_build/build_common.sh b/tools/cpp_build/build_common.sh index e16e87bf5704c..1b113191fb234 100755 --- a/tools/cpp_build/build_common.sh +++ b/tools/cpp_build/build_common.sh @@ -4,12 +4,14 @@ BUILD_PATH="${1:-$SCRIPTPATH/build}" INSTALL_PREFIX="$BUILD_PATH/install" PYTORCHPATH="$SCRIPTPATH/../.." -NO_CUDA=ON +NO_CUDA=1 +USE_CUDA=0 if [ -x "$(command -v nvcc)" ]; then - NO_CUDA=OFF + NO_CUDA=0 + USE_CUDA=1 fi -ATEN_BUILDPATH="$BUILD_PATH/aten" +CAFFE2_BUILDPATH="$BUILD_PATH/caffe2" NANOPB_BUILDPATH="$BUILD_PATH/nanopb" LIBTORCH_BUILDPATH="$BUILD_PATH/libtorch" diff --git a/tools/cpp_build/build_libtorch.sh b/tools/cpp_build/build_libtorch.sh index 72dabd1245ea7..b7ea07da8cd01 100755 --- a/tools/cpp_build/build_libtorch.sh +++ b/tools/cpp_build/build_libtorch.sh @@ -12,11 +12,12 @@ echo "Building Torch" mkdir -p $LIBTORCH_BUILDPATH pushd $LIBTORCH_BUILDPATH -cmake -DNO_CUDA:BOOL=${NO_CUDA:0} \ +cmake -DUSE_CUDA:BOOL=$USE_CUDA \ -DNO_API:BOOL=${NO_API:0} \ - -DATEN_PATH=$PYTORCHPATH/aten/ \ - -DATEN_BUILD_PATH=$ATEN_BUILDPATH \ + -DCAFFE2_PATH=$PYTORCHPATH/ \ + -DCAFFE2_BUILD_PATH=$CAFFE2_BUILDPATH \ -DNANOPB_BUILD_PATH=$NANOPB_BUILDPATH \ + -DINSTALL_PREFIX=$INSTALL_PREFIX \ -DCMAKE_BUILD_TYPE:STRING=$BUILD_TYPE \ -DCMAKE_INSTALL_PREFIX:STRING=$INSTALL_PREFIX \ -DCMAKE_INSTALL_MESSAGE=NEVER \ diff --git a/tools/cpp_build/libtorch/CMakeLists.txt b/tools/cpp_build/libtorch/CMakeLists.txt index 79fc9cdebf073..22d82d9bb12ed 100644 --- a/tools/cpp_build/libtorch/CMakeLists.txt +++ b/tools/cpp_build/libtorch/CMakeLists.txt @@ -1,4 +1,31 @@ -cmake_minimum_required(VERSION 3.0 FATAL_ERROR) +if (CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO) + if (NOT BUILD_TORCH) + return() + endif() +else() + cmake_minimum_required(VERSION 3.0 FATAL_ERROR) + include(CMakeDependentOption) + option(USE_CUDA "Use CUDA" ON) + + # Legacy options, which we will eventually remove + cmake_dependent_option( + NO_CUDA "Legacy no CUDA" OFF + "USE_CUDA" ON) + + # Flag for shared dependencies + set(BUILD_TORCH ON) +endif() +if (NOT USE_CUDA) + set(NO_CUDA ON) +endif() + +# TODO: Enable tests on Mac as soon as possible +if (APPLE) + set(TORCH_BUILD_TEST OFF) +else() + set(TORCH_BUILD_TEST ON) +endif() + cmake_policy(VERSION 3.0) set(CMAKE_CXX_STANDARD 11) @@ -7,17 +34,26 @@ set(CMAKE_POSITION_INDEPENDENT_CODE ON) set(CMAKE_EXPORT_COMPILE_COMMANDS ON) if (VERBOSE) - message(STATUS "ATEN_PATH is ${ATEN_PATH}") - message(STATUS "ATEN_BUILD_PATH is ${ATEN_BUILD_PATH}") + message(STATUS "CAFFE2_PATH is ${CAFFE2_PATH}") + message(STATUS "CAFFE2_BUILD_PATH is ${CAFFE2_BUILD_PATH}") + message(STATUS "INSTALL_PREFIX is ${INSTALL_PREFIX}") endif() -set(ATEN_INCLUDE_DIR "${ATEN_PATH}/src/") -set(ATEN_BUILD_INCLUDE_DIR "${ATEN_BUILD_PATH}/src/ATen") +set(CAFFE2_INCLUDE_DIR "${CAFFE2_PATH}") +set(CAFFE2_BUILD_INCLUDE_DIR "${CAFFE2_BUILD_PATH}") +set(CAFFE2_INSTALL_INCLUDE_DIR "${INSTALL_PREFIX}/include") +set(CAFFE2_INSTALL_SHARE_DIR "${INSTALL_PREFIX}/share") +set(CAFFE2_INSTALL_LIB_DIR "${INSTALL_PREFIX}/lib") set(TORCH_SRC_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../../../torch") -find_library(ATEN_CPU_LIBRARY ATen_cpu PATHS "${ATEN_BUILD_PATH}/src/ATen" NO_DEFAULT_PATH) -find_library(ATEN_CUDA_LIBRARY ATen_cuda PATHS "${ATEN_BUILD_PATH}/src/ATen" NO_DEFAULT_PATH) -find_library(NANOPB_LIBRARY protobuf-nanopb PATHS "${NANOPB_BUILD_PATH}" NO_DEFAULT_PATH) +find_library(CAFFE2_LIBRARY caffe2 + NAMES libcaffe2.so libcaffe2.dylib caffe2.lib + PATHS ${CAFFE2_INSTALL_LIB_DIR} NO_DEFAULT_PATH) +find_library(CAFFE2_GPU_LIBRARY caffe2_gpu + NAMES libcaffe2_gpu.so libcaffe2_gpu.dylib caffe2_gpu.lib + PATHS ${CAFFE2_INSTALL_LIB_DIR} NO_DEFAULT_PATH) +find_library(NANOPB_LIBRARY protobuf-nanopb + PATHS ${NANOPB_BUILD_PATH} NO_DEFAULT_PATH) set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) @@ -25,15 +61,17 @@ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) if(NOT NO_CUDA) set(CMAKE_MODULE_PATH - ${TORCH_SRC_DIR}/../aten/cmake - ${TORCH_SRC_DIR}/../aten/cmake/FindCUDA + ${INSTALL_PREFIX}/share/cmake + ${TORCH_SRC_DIR}/../cmake/Modules + ${TORCH_SRC_DIR}/../cmake/public + ${TORCH_SRC_DIR}/../cmake/Modules_CUDA_fix /usr/lib/x86_64-linux-gnu/ ${CMAKE_CURRENT_SOURCE_DIR}/src/TH/cmake ${CMAKE_MODULE_PATH}) set(CMAKE_LIBRARY_PATH /usr/lib/x86_64-linux-gnu/ ${CMAKE_LIBRARY_PATH}) if(NOT CUDA_FOUND) - find_package(CUDA 5.5) + find_package(CUDA 7.0) endif() find_package(MAGMA) @@ -85,10 +123,8 @@ if(NOT NO_CUDA) ${CUDA_TOOLKIT_ROOT_DIR}/lib64/libnvToolsExt.so ${CUDA_LIBRARIES}) - set(CUDA_INCLUDE_DIRS - ${CUDA_INCLUDE_DIRS} - "${ATEN_BUILD_PATH}/src/THC" - "${ATEN_PATH}/src/THC") + list(APPEND CUDA_INCLUDE_DIRS + ${CAFFE2_INSTALL_INCLUDE_DIR}/THC) endif() # RPATH stuff @@ -110,7 +146,7 @@ endif() # Generate files set(TOOLS_PATH "${TORCH_SRC_DIR}/../tools") -configure_file("${ATEN_PATH}/src/ATen/common_with_cwrap.py" +configure_file("${CAFFE2_PATH}/aten/src/ATen/common_with_cwrap.py" "${TOOLS_PATH}/shared/cwrap_common.py" COPYONLY) @@ -136,12 +172,11 @@ add_custom_command( "${TORCH_SRC_DIR}/csrc/jit/generated/aten_interned_strings.h" COMMAND python tools/setup_helpers/generate_code.py - --declarations-path "${ATEN_BUILD_PATH}/src/ATen/ATen/Declarations.yaml" + --declarations-path "${CAFFE2_INSTALL_SHARE_DIR}/ATen/Declarations.yaml" --nn-path "aten/src/" DEPENDS - "${ATEN_BUILD_PATH}/src/ATen/ATen/Declarations.yaml" - "${ATEN_PATH}/src/THNN/generic/THNN.h" - "${ATEN_PATH}/src/THCUNN/generic/THCUNN.h" + "${CAFFE2_INSTALL_SHARE_DIR}/ATen/Declarations.yaml" + "${CAFFE2_INSTALL_INCLUDE_DIR}/THNN/generic/THNN.h" "${TOOLS_PATH}/autograd/templates/VariableType.h" "${TOOLS_PATH}/autograd/templates/VariableType.cpp" "${TOOLS_PATH}/autograd/templates/Functions.h" @@ -256,23 +291,23 @@ endif() target_link_libraries(torch ${TORCH_CUDA_LIBRARIES} - ${ATEN_CPU_LIBRARY} + ${CAFFE2_LIBRARY} ${NANOPB_LIBRARY} ) if(NOT NO_CUDA) if("${CMAKE_CXX_COMPILER_ID}" MATCHES "GNU") - target_link_libraries(torch -Wl,--no-as-needed ${ATEN_CUDA_LIBRARY} -Wl,--as-needed) + target_link_libraries(torch -Wl,--no-as-needed ${CAFFE2_GPU_LIBRARY} -Wl,--as-needed) else() - target_link_libraries(torch ${ATEN_CUDA_LIBRARY}) + target_link_libraries(torch ${CAFFE2_GPU_LIBRARY}) endif() endif() target_include_directories(torch PUBLIC - "${ATEN_INCLUDE_DIR}" - "${ATEN_INCLUDE_DIR}/TH" - "${ATEN_BUILD_INCLUDE_DIR}" - "${ATEN_BUILD_PATH}/src/TH" + "${CAFFE2_INCLUDE_DIR}" + "${CAFFE2_BUILD_INCLUDE_DIR}" + "${CAFFE2_INSTALL_INCLUDE_DIR}" + "${CAFFE2_INSTALL_INCLUDE_DIR}/TH" "${TORCH_SRC_DIR}/.." "${CMAKE_CURRENT_SOURCE_DIR}") @@ -306,33 +341,35 @@ install(TARGETS torch LIBRARY DESTINATION "${TORCH_INSTALL_LIB_DIR}" ARCHIVE DESTINATION "${TORCH_INSTALL_LIB_DIR}") -# JIT Tests. TODO: Put into test/cpp/jit folder +if (TORCH_BUILD_TEST) + # JIT Tests. TODO: Put into test/cpp/jit folder -add_executable(test_jit ${TORCH_SRC_DIR}/csrc/jit/test_jit.cpp) + add_executable(test_jit ${TORCH_SRC_DIR}/csrc/jit/test_jit.cpp) -target_link_libraries(test_jit torch) + target_link_libraries(test_jit torch) -target_include_directories(test_jit PUBLIC - "${TORCH_SRC_DIR}/../third_party/catch/single_include") + target_include_directories(test_jit PUBLIC + "${TORCH_SRC_DIR}/../third_party/catch/single_include") -# API Tests + # API Tests -if (NOT NO_API) - set(TORCH_API_TEST_DIR "${TORCH_SRC_DIR}/../test/cpp/api") - - add_executable(test_api - ${TORCH_API_TEST_DIR}/container.cpp - ${TORCH_API_TEST_DIR}/integration.cpp - ${TORCH_API_TEST_DIR}/main.cpp - ${TORCH_API_TEST_DIR}/misc.cpp - ${TORCH_API_TEST_DIR}/module.cpp - ${TORCH_API_TEST_DIR}/optim.cpp - ${TORCH_API_TEST_DIR}/rnn.cpp - ${TORCH_API_TEST_DIR}/serialization.cpp) - - target_include_directories(test_api - PUBLIC - "${TORCH_SRC_DIR}/../third_party/catch/single_include") + if (NOT NO_API) + set(TORCH_API_TEST_DIR "${TORCH_SRC_DIR}/../test/cpp/api") - target_link_libraries(test_api torch) + add_executable(test_api + ${TORCH_API_TEST_DIR}/container.cpp + ${TORCH_API_TEST_DIR}/integration.cpp + ${TORCH_API_TEST_DIR}/main.cpp + ${TORCH_API_TEST_DIR}/misc.cpp + ${TORCH_API_TEST_DIR}/module.cpp + ${TORCH_API_TEST_DIR}/optim.cpp + ${TORCH_API_TEST_DIR}/rnn.cpp + ${TORCH_API_TEST_DIR}/serialization.cpp) + + target_include_directories(test_api + PUBLIC + "${TORCH_SRC_DIR}/../third_party/catch/single_include") + + target_link_libraries(test_api torch) + endif() endif() diff --git a/tools/test_aten_install.sh b/tools/test_aten_install.sh index 21a9b4026284f..d2d5723550577 100755 --- a/tools/test_aten_install.sh +++ b/tools/test_aten_install.sh @@ -1,8 +1,10 @@ #!/bin/sh set -xe +rm -rf aten_build +rm -rf aten_install mkdir aten_build aten_install cd aten_build -cmake ../aten -DNO_CUDA=1 -DCMAKE_INSTALL_PREFIX=../aten_install +cmake ../aten -DUSE_CUDA=OFF -DCMAKE_INSTALL_PREFIX=../aten_install NUM_JOBS="$(getconf _NPROCESSORS_ONLN)" make -j"$NUM_JOBS" install cd .. diff --git a/torch/lib/THD/CMakeLists.txt b/torch/lib/THD/CMakeLists.txt index d317f8d62987e..a5536ceb7d8f9 100644 --- a/torch/lib/THD/CMakeLists.txt +++ b/torch/lib/THD/CMakeLists.txt @@ -44,8 +44,8 @@ FIND_PACKAGE(MPI) FIND_PACKAGE(Gloo) -FIND_PACKAGE(ATen REQUIRED) -INCLUDE_DIRECTORIES(${ATEN_INCLUDE_DIR}) +FIND_PACKAGE(Caffe2 REQUIRED) +INCLUDE_DIRECTORIES(${CAFFE2_INCLUDE_DIR}) IF(NO_CUDA) MESSAGE(STATUS "ignoring CUDA") @@ -149,7 +149,7 @@ INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) ADD_LIBRARY(THD STATIC ${all_cpp}) set_property(TARGET THD PROPERTY POSITION_INDEPENDENT_CODE ON) -FILE(WRITE "${CMAKE_INSTALL_PREFIX}/THD_deps.txt" "${ATEN_LIBRARIES};") +FILE(WRITE "${CMAKE_INSTALL_PREFIX}/THD_deps.txt" "${CAFFE2_LIBRARIES};") IF(MPI_FOUND) INCLUDE_DIRECTORIES(${MPI_INCLUDE_PATH}) @@ -182,7 +182,7 @@ IF(THD_WITH_TESTS) SET(test_executable_name "test_${test_name}") ADD_EXECUTABLE(${test_executable_name} "test/${test_source_file}") - TARGET_LINK_LIBRARIES(${test_executable_name} THD ${ATEN_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT}) + TARGET_LINK_LIBRARIES(${test_executable_name} THD ${CAFFE2_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT}) SET_PROPERTY(TARGET ${test_executable_name} PROPERTY CXX_STANDARD 11) ADD_TEST(${test_name} ${test_executable_name}) ENDFOREACH() diff --git a/torch/lib/libshm/CMakeLists.txt b/torch/lib/libshm/CMakeLists.txt index 74ce9d1c328e1..ffeacedfbc603 100644 --- a/torch/lib/libshm/CMakeLists.txt +++ b/torch/lib/libshm/CMakeLists.txt @@ -1,8 +1,8 @@ CMAKE_MINIMUM_REQUIRED(VERSION 2.6 FATAL_ERROR) CMAKE_POLICY(VERSION 2.6) -FIND_PACKAGE(ATen REQUIRED) -INCLUDE_DIRECTORIES(${ATEN_INCLUDE_DIR}) +FIND_PACKAGE(Caffe2 REQUIRED) +INCLUDE_DIRECTORIES(${CAFFE2_INCLUDE_DIR}) IF(NOT LIBSHM_INSTALL_LIB_SUBDIR) SET(LIBSHM_INSTALL_LIB_SUBDIR "lib" CACHE PATH "libshm install library directory") @@ -28,7 +28,7 @@ INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) SET_TARGET_PROPERTIES(shm PROPERTIES PREFIX "lib" IMPORT_PREFIX "lib") -TARGET_LINK_LIBRARIES(shm ${ATEN_LIBRARIES}) +TARGET_LINK_LIBRARIES(shm ${CAFFE2_LIBRARIES}) IF (UNIX AND NOT APPLE) INCLUDE(CheckLibraryExists) diff --git a/torch/utils/cpp_extension.py b/torch/utils/cpp_extension.py index 96add112e59b8..6d2791e2b646a 100644 --- a/torch/utils/cpp_extension.py +++ b/torch/utils/cpp_extension.py @@ -300,7 +300,7 @@ def CppExtension(name, sources, *args, **kwargs): kwargs['library_dirs'] = library_dirs libraries = kwargs.get('libraries', []) - libraries.append('ATen_cpu') + libraries.append('caffe2') libraries.append('_C') kwargs['libraries'] = libraries @@ -343,8 +343,8 @@ def CUDAExtension(name, sources, *args, **kwargs): libraries = kwargs.get('libraries', []) libraries.append('cudart') if sys.platform == 'win32': - libraries.append('ATen_cpu') - libraries.append('ATen_cuda') + libraries.append('caffe2') + libraries.append('caffe2_gpu') libraries.append('_C') kwargs['libraries'] = libraries @@ -675,9 +675,9 @@ def _prepare_ldflags(extra_ldflags, with_cuda, verbose): torch_path = os.path.dirname(os.path.dirname(here)) lib_path = os.path.join(torch_path, 'lib') - extra_ldflags.append('ATen_cpu.lib') + extra_ldflags.append('caffe2.lib') if with_cuda: - extra_ldflags.append('ATen_cuda.lib') + extra_ldflags.append('caffe2_gpu.lib') extra_ldflags.append('_C.lib') extra_ldflags.append('/LIBPATH:{}'.format(python_lib_path)) extra_ldflags.append('/LIBPATH:{}'.format(lib_path))