Skip to content

Commit

Permalink
Merge pull request #475 from drnikolaev/caffe-0.17
Browse files Browse the repository at this point in the history
0.16.6 release
  • Loading branch information
drnikolaev authored Jan 31, 2018
2 parents 52ccc5c + cc79125 commit 1431f46
Show file tree
Hide file tree
Showing 211 changed files with 1,723 additions and 6,042 deletions.
12 changes: 4 additions & 8 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,20 +12,16 @@ env:
# envvar defaults:
# WITH_CMAKE: false
# WITH_PYTHON3: false
# WITH_IO: true
# WITH_CUDA: false
# WITH_CUDA: const true since v0.17
# WITH_CUDNN: false
- BUILD_NAME="default-make"
# - BUILD_NAME="python3-make" WITH_PYTHON3=true
- BUILD_NAME="no-io-make" WITH_IO=false
- BUILD_NAME="cuda-make" WITH_CUDA=true
- BUILD_NAME="cudnn-make" WITH_CUDA=true WITH_CUDNN=true
- BUILD_NAME="cudnn-make" WITH_CUDNN=true

- BUILD_NAME="default-cmake" WITH_CMAKE=true
- BUILD_NAME="python3-cmake" WITH_CMAKE=true WITH_PYTHON3=true
- BUILD_NAME="no-io-cmake" WITH_CMAKE=true WITH_IO=false
- BUILD_NAME="cuda-cmake" WITH_CMAKE=true WITH_CUDA=true
- BUILD_NAME="cudnn-cmake" WITH_CMAKE=true WITH_CUDA=true WITH_CUDNN=true
- BUILD_NAME="cudnn-cmake" WITH_CMAKE=true WITH_CUDNN=true
- BUILD_NAME="cudnn-python3-cmake" WITH_CMAKE=true WITH_CUDNN=true WITH_PYTHON3=true

cache:
apt: true
Expand Down
2 changes: 1 addition & 1 deletion 3rdparty/half_float/half.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@
#include "caffe/util/gpu_math_functions.cuh"
#endif

#if !defined(CPU_ONLY) && defined(__CUDA_ARCH__)
#if defined(__CUDA_ARCH__)
#define CAFFE_UTIL_HD __host__ __device__
#define CAFFE_UTIL_IHD __inline__ __host__ __device__
#else
Expand Down
17 changes: 8 additions & 9 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,21 +29,20 @@ include(cmake/Summary.cmake)
include(cmake/ConfigGen.cmake)

# ---[ Options
caffe_option(CPU_ONLY "Build Caffe without CUDA support" OFF) # TODO: rename to USE_CUDA
caffe_option(USE_CUDNN "Build Caffe with cuDNN library support" ON IF NOT CPU_ONLY)
caffe_option(USE_CUDNN "Build Caffe with cuDNN library support" ON)

# USE_NCCL: Build Caffe with NCCL Library support
# Regular ON/OFF option doesn't work here because we need to recognize 3 states:
# 1. User didn't set USE_NCCL option =>
# 1.1 If CPU_ONLY is ON we do nothing.
# 1.2 If CPU_ONLY is OFF we *quietly* try to find it and use if found; do nothing otherwise.
# 1.1 We *quietly* try to find it and use if found; do nothing otherwise.
# 2. User explicitly set USE_NCCL=ON option =>
# 1.1 If CPU_ONLY is ON we do nothing (it's higher priority).
# 2.1 If CPU_ONLY is OFF we try to find it with *required* option, thus CMake fails if not found.
# 1.1 We try to find it with *required* option, thus CMake fails if not found.
# 3. User explicitly set USE_NCCL=OFF option => we do nothing.
SET(USE_NCCL)
if(DEFINED USE_NCCL)
STRING(TOUPPER "${USE_NCCL}" USE_NCCL)
set(USE_NCCL "NONE" CACHE STRING "Link Caffe with NCCL Library for Multi-GPU support")
if(USE_NCCL STREQUAL "NONE")
set(USE_NCCL_SET OFF)
else()
set(USE_NCCL_SET ON)
endif()

caffe_option(BUILD_SHARED_LIBS "Build shared libraries" ON)
Expand Down
68 changes: 33 additions & 35 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ THIRDPARTY_DIR := ./3rdparty

# All of the directories containing code.
SRC_DIRS := $(shell find * -type d -exec bash -c "find {} -maxdepth 1 \
\( -name '*.cpp' -o -name '*.proto' \) | grep -q ." \; -print)
\( -name '*.cpp' -o -name '*.proto' \) | grep -q ." \; -print 2>/dev/null)

# The target shared library name
LIBRARY_NAME := $(PROJECT)$(LIBRARY_NAME_SUFFIX)
Expand Down Expand Up @@ -179,19 +179,17 @@ CUDA_LIB_DIR :=
# add <cuda>/lib64 only if it exists
ifneq ("$(wildcard $(CUDA_DIR)/lib64)","")
CUDA_LIB_DIR += $(CUDA_DIR)/lib64
CUDA_LIB_DIR += /usr/lib/nvidia-384 /usr/lib/nvidia-381 /usr/lib/nvidia-375 /usr/lib/nvidia-367 /usr/lib/nvidia-361 /usr/lib/nvidia-352
CUDA_LIB_DIR += /usr/lib/nvidia-396 /usr/lib/nvidia-390 /usr/lib/nvidia-387 /usr/lib/nvidia-384 /usr/lib/nvidia-381 /usr/lib/nvidia-375 /usr/lib/nvidia-367 /usr/lib/nvidia-361 /usr/lib/nvidia-352
endif
CUDA_LIB_DIR += $(CUDA_DIR)/lib

INCLUDE_DIRS += $(BUILD_INCLUDE_DIR) ./src ./include $(THIRDPARTY_DIR)
ifneq ($(CPU_ONLY), 1)
INCLUDE_DIRS += $(CUDA_INCLUDE_DIR)
LIBRARY_DIRS += $(CUDA_LIB_DIR)
LIBRARIES := cudart cublas curand
INCLUDE_DIRS += $(BUILD_INCLUDE_DIR) ./src ./include $(THIRDPARTY_DIR) /usr/include/hdf5/serial
INCLUDE_DIRS += $(CUDA_INCLUDE_DIR)
LIBRARY_DIRS += $(CUDA_LIB_DIR)
LIBRARIES := cudart cublas curand
ifneq ($(NO_NVML), 1)
LIBRARIES += nvidia-ml
endif
endif

# Note: libturbojpeg has a packaging bug. Workaround:
# $ sudo ln -s /usr/lib/x86_64-linux-gnu/libturbojpeg.so.0 /usr/lib/x86_64-linux-gnu/libturbojpeg.so
Expand Down Expand Up @@ -222,7 +220,16 @@ ifeq ($(USE_OPENCV), 1)
endif

endif
PYTHON_LIBRARIES ?= boost_python python2.7 boost_regex

python_version_full := $(wordlist 2,4,$(subst ., ,$(shell python --version 2>&1)))
python_version_major := $(word 1,${python_version_full})
python_version_minor := $(word 2,${python_version_full})
python_version_patch := $(word 3,${python_version_full})
ifeq ($(python_version_major), 3)
python_lib_suffix := m
endif

PYTHON_LIBRARIES ?= boost_python-py${python_version_major}${python_version_minor} python${python_version_major}.${python_version_minor}${python_lib_suffix} boost_regex
WARNINGS := -Wall -Wno-sign-compare

##############################
Expand Down Expand Up @@ -294,23 +301,21 @@ endif
# libstdc++ for NVCC compatibility on OS X >= 10.9 with CUDA < 7.0
ifeq ($(OSX), 1)
CXX := /usr/bin/clang++
ifneq ($(CPU_ONLY), 1)
CUDA_VERSION := $(shell $(CUDA_DIR)/bin/nvcc -V | grep -o 'release [0-9.]*' | grep -o '[0-9.]*')
ifeq ($(shell echo | awk '{exit $(CUDA_VERSION) < 7.0;}'), 1)
CXXFLAGS += -stdlib=libstdc++
LINKFLAGS += -stdlib=libstdc++
endif
# clang throws this warning for cuda headers
WARNINGS += -Wno-unneeded-internal-declaration
# 10.11 strips DYLD_* env vars so link CUDA (rpath is available on 10.5+)
OSX_10_OR_LATER := $(shell [ $(OSX_MAJOR_VERSION) -ge 10 ] && echo true)
OSX_10_5_OR_LATER := $(shell [ $(OSX_MINOR_VERSION) -ge 5 ] && echo true)
ifeq ($(OSX_10_OR_LATER),true)
ifeq ($(OSX_10_5_OR_LATER),true)
LDFLAGS += -Wl,-rpath,$(CUDA_LIB_DIR)
endif
endif
endif
CUDA_VERSION := $(shell $(CUDA_DIR)/bin/nvcc -V | grep -o 'release [0-9.]*' | grep -o '[0-9.]*')
ifeq ($(shell echo | awk '{exit $(CUDA_VERSION) < 7.0;}'), 1)
CXXFLAGS += -stdlib=libstdc++
LINKFLAGS += -stdlib=libstdc++
endif
# clang throws this warning for cuda headers
WARNINGS += -Wno-unneeded-internal-declaration
# 10.11 strips DYLD_* env vars so link CUDA (rpath is available on 10.5+)
OSX_10_OR_LATER := $(shell [ $(OSX_MAJOR_VERSION) -ge 10 ] && echo true)
OSX_10_5_OR_LATER := $(shell [ $(OSX_MINOR_VERSION) -ge 5 ] && echo true)
ifeq ($(OSX_10_OR_LATER),true)
ifeq ($(OSX_10_5_OR_LATER),true)
LDFLAGS += -Wl,-rpath,$(CUDA_LIB_DIR)
endif
endif
# gtest needs to use its own tuple to not conflict with clang
COMMON_FLAGS += -DGTEST_USE_OWN_TR1_TUPLE=1
# boost::thread is called boost_thread-mt to mark multithreading on OS X
Expand Down Expand Up @@ -376,15 +381,8 @@ ifeq ($(ALLOW_LMDB_NOLOCK), 1)
endif
endif

# CPU-only configuration
ifeq ($(CPU_ONLY), 1)
OBJS := $(PROTO_OBJS) $(CXX_OBJS)
TEST_OBJS := $(TEST_CXX_OBJS)
TEST_BINS := $(TEST_CXX_BINS)
ALL_WARNS := $(ALL_CXX_WARNS)
TEST_FILTER := --gtest_filter="-*GPU*"
COMMON_FLAGS += -DCPU_ONLY
endif
# New place for HDF5
LIBRARY_DIRS += /usr/lib/x86_64-linux-gnu/hdf5/serial

ifeq ($(NO_NVML), 1)
COMMON_FLAGS += -DNO_NVML=1
Expand Down
3 changes: 0 additions & 3 deletions Makefile.config.example
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,6 @@
# See https://github.com/NVIDIA/nccl
# USE_NCCL := 1

# CPU-only switch (uncomment to build without GPU support).
# Disables FP16 support.
# CPU_ONLY := 1
# Builds tests with 16 bit float support in addition to 32 and 64 bit.
# TEST_FP16 := 1

Expand Down
Binary file added NVCaffe-User-Guide.pdf
Binary file not shown.
12 changes: 10 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@ Here are the major features:
* **Mixed-precision support**. It allows to store and/or compute data in either
64, 32 or 16 bit formats. Precision can be defined for every layer (forward and
backward passes might be different too), or it can be set for the whole Net.
* **Integration with [cuDNN](https://developer.nvidia.com/cudnn) v6**.
* **Integration with [cuDNN](https://developer.nvidia.com/cudnn) v7**.
* **Automatic selection of the best cuDNN convolution algorithm**.
* **Integration with v1.3.4 of [NCCL library](https://github.com/NVIDIA/nccl)**
* **Integration with v2.2 of [NCCL library](https://github.com/NVIDIA/nccl)**
for improved multi-GPU scaling.
* **Optimized GPU memory management** for data and parameters storage, I/O buffers
and workspace for convolutional layers.
Expand All @@ -40,3 +40,11 @@ Please cite Caffe in your publications if it helps your research:
Title = {Caffe: Convolutional Architecture for Fast Feature Embedding},
Year = {2014}
}

## Useful notes

Libturbojpeg library is used since 0.16.5. It has a packaging bug. Please execute the following (required for Makefile, optional for CMake):
```
sudo apt-get install libturbojpeg libturbojpeg-dev
sudo ln -s /usr/lib/x86_64-linux-gnu/libturbojpeg.so.0.1.0 /usr/lib/x86_64-linux-gnu/libturbojpeg.so
```
4 changes: 0 additions & 4 deletions cmake/Cuda.cmake
Original file line number Diff line number Diff line change
@@ -1,7 +1,3 @@
if(CPU_ONLY)
return()
endif()

# Known NVIDIA GPU achitectures Caffe can be compiled for.
# This list will be used for CUDA_ARCH_NAME = All option
set(Caffe_known_gpu_archs "30 35 50 52 60 61 70")
Expand Down
36 changes: 20 additions & 16 deletions cmake/Dependencies.cmake
Original file line number Diff line number Diff line change
@@ -1,11 +1,8 @@
# This list is required for static linking and exported to CaffeConfig.cmake
set(Caffe_LINKER_LIBS "")

find_package(PythonInterp ${python_version})

# ---[ Boost
find_package(Boost 1.54 REQUIRED COMPONENTS system thread filesystem regex python-py${PYTHON_VERSION_MAJOR}${PYTHON_VERSION_MINOR})
set(Boost_PYTHON_FOUND ${Boost_PYTHON-PY${PYTHON_VERSION_MAJOR}${PYTHON_VERSION_MINOR}_FOUND})
find_package(Boost 1.54 REQUIRED COMPONENTS system thread filesystem regex)
include_directories(SYSTEM ${Boost_INCLUDE_DIR})
list(APPEND Caffe_LINKER_LIBS ${Boost_LIBRARIES})

Expand Down Expand Up @@ -64,12 +61,7 @@ list(APPEND Caffe_LINKER_LIBS ${JPEGTurbo_LIBRARIES})
# ---[ CUDA
include(cmake/Cuda.cmake)
if(NOT HAVE_CUDA)
if(CPU_ONLY)
message(STATUS "-- CUDA is disabled. Building without it...")
else()
message(WARNING "-- CUDA is not detected by cmake. Building without it...")
endif()

message(SEND_ERROR "-- CUDA is not detected by cmake. Building without it...")
# TODO: remove this not cross platform define in future. Use caffe_config.h instead.
add_definitions(-DCPU_ONLY)
endif()
Expand Down Expand Up @@ -113,6 +105,20 @@ endif()

# ---[ Python
if(BUILD_python)
find_package(PythonInterp ${python_version})

find_library(Boost_PYTHON_FOUND NAMES
python-py${PYTHON_VERSION_MAJOR}${PYTHON_VERSION_MINOR}
boost_python-py${PYTHON_VERSION_MAJOR}${PYTHON_VERSION_MINOR}
boost_python${PYTHON_VERSION_MAJOR}${PYTHON_VERSION_MINOR}
PATHS ${LIBDIR})
if ("${Boost_PYTHON_FOUND}" STREQUAL "Boost_PYTHON_FOUND-NOTFOUND")
message(SEND_ERROR "Could NOT find Boost Python Library")
else()
message(STATUS "Found Boost Python Library ${Boost_PYTHON_FOUND}")
list(APPEND Caffe_LINKER_LIBS ${Boost_PYTHON_FOUND})
endif()

find_package(PythonLibs ${python_version})
find_package(NumPy 1.7.1)
if(PYTHONLIBS_FOUND AND NUMPY_FOUND AND Boost_PYTHON_FOUND)
Expand Down Expand Up @@ -147,14 +153,12 @@ if(BUILD_docs)
endif()

# ---[ NCCL
if(DEFINED USE_NCCL)
if(USE_NCCL AND NOT CPU_ONLY)
if(USE_NCCL_SET)
if(USE_NCCL)
find_package(NCCL REQUIRED)
endif()
else()
if(NOT CPU_ONLY)
find_package(NCCL)
endif()
find_package(NCCL)
endif()
if(NCCL_FOUND)
add_definitions(-DUSE_NCCL)
Expand All @@ -163,7 +167,7 @@ if(NCCL_FOUND)
endif()

# ---[ NVML
if(NOT CPU_ONLY AND NOT NO_NVML)
if(NOT NO_NVML)
find_package(NVML)
endif()
if(NVML_FOUND)
Expand Down
14 changes: 7 additions & 7 deletions cmake/Summary.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,6 @@ function(caffe_print_configuration_summary)
caffe_status(" BUILD_python : ${BUILD_python}")
caffe_status(" BUILD_matlab : ${BUILD_matlab}")
caffe_status(" BUILD_docs : ${BUILD_docs}")
caffe_status(" CPU_ONLY : ${CPU_ONLY}")
caffe_status(" USE_LEVELDB : ${USE_LEVELDB}")
caffe_status(" USE_LMDB : ${USE_LMDB}")
caffe_status(" ALLOW_LMDB_NOLOCK : ${ALLOW_LMDB_NOLOCK}")
Expand Down Expand Up @@ -145,13 +144,14 @@ function(caffe_print_configuration_summary)
else()
caffe_status(" cuDNN : Disabled")
endif()
if(NOT DEFINED USE_NCCL)
caffe_status(" NCCL : " NCCL_FOUND THEN "Yes (ver. ${NCCL_VERSION})" ELSE "Not
found")
elseif(USE_NCCL)
caffe_status(" NCCL : " NCCL_FOUND THEN "Yes (ver. ${NCCL_VERSION})" ELSE "Not found")
if(USE_NCCL_SET)
if(USE_NCCL)
caffe_status(" NCCL : " NCCL_FOUND THEN "Yes (ver. ${NCCL_VERSION})" ELSE "Not found")
else()
caffe_status(" NCCL : Disabled")
endif()
else()
caffe_status(" NCCL : Disabled")
caffe_status(" NCCL : " NCCL_FOUND THEN "Yes (ver. ${NCCL_VERSION})" ELSE "Not found (not requested)")
endif()

if(NVML_FOUND)
Expand Down
2 changes: 1 addition & 1 deletion docs/installation.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ Pycaffe and Matcaffe interfaces have their own natural needs.

**cuDNN Caffe**: for fastest operation Caffe is accelerated by drop-in integration of [NVIDIA cuDNN](https://developer.nvidia.com/cudnn). To speed up your Caffe models, install cuDNN then uncomment the `USE_CUDNN := 1` flag in `Makefile.config` when installing Caffe. Acceleration is automatic. The current version is cuDNN v4; older versions are supported in older Caffe.

**CPU-only Caffe**: for cold-brewed CPU-only Caffe uncomment the `CPU_ONLY := 1` flag in `Makefile.config` to configure and build Caffe without CUDA. This is helpful for cloud or cluster deployment.
**CPU-only Caffe**: not supported starting from v0.17. For older versions, for cold-brewed CPU-only Caffe uncomment the `CPU_ONLY := 1` flag in `Makefile.config` to configure and build Caffe without CUDA. This is helpful for cloud or cluster deployment.

### CUDA and BLAS

Expand Down
5 changes: 0 additions & 5 deletions examples/cpp_classification/classification.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,12 +46,7 @@ Classifier::Classifier(const string& model_file,
const string& trained_file,
const string& mean_file,
const string& label_file) {
#ifdef CPU_ONLY
Caffe::set_mode(Caffe::CPU);
#else
Caffe::set_mode(Caffe::GPU);
#endif

/* Load the network. */
net_.reset(new Net(model_file, TEST));
net_->CopyTrainedLayersFrom(trained_file);
Expand Down
Loading

0 comments on commit 1431f46

Please sign in to comment.