Skip to content

Commit

Permalink
Add Rocm support (#168)
Browse files Browse the repository at this point in the history
  • Loading branch information
windstamp authored Mar 30, 2021
1 parent cd828e5 commit c690fc5
Show file tree
Hide file tree
Showing 12 changed files with 471 additions and 46 deletions.
53 changes: 45 additions & 8 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ ELSE()
cmake_minimum_required(VERSION 2.8)
ENDIF()

set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake")

project(ctc_release)

include_directories(include)
Expand All @@ -19,6 +21,12 @@ option(WITH_TORCH "compile warp-ctc with Torch." ${Torch_FOUND})
option(WITH_OMP "compile warp-ctc with OpenMP." ON)
option(BUILD_TESTS "build warp-ctc unit tests." ON)
option(BUILD_SHARED "build warp-ctc shared library." ON)
option(WITH_ROCM "Compile PaddlePaddle with ROCM platform" OFF)

if(WITH_ROCM)
add_definitions(-DWARPCTC_WITH_HIP)
include(hip)
endif(WITH_ROCM)

if(BUILD_SHARED)
set(WARPCTC_SHARED "SHARED")
Expand Down Expand Up @@ -141,30 +149,59 @@ function(windows_symbolic TARGET)
endforeach()
endfunction()

IF (WITH_GPU)
IF (WITH_GPU OR WITH_ROCM)

MESSAGE(STATUS "Building shared library with GPU support")
MESSAGE(STATUS "NVCC_ARCH_FLAGS" ${CUDA_NVCC_FLAGS})

IF (WITH_GPU)
MESSAGE(STATUS "NVCC_ARCH_FLAGS" ${CUDA_NVCC_FLAGS})
ENDIF()

if (WIN32)
SET(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -Xcompiler \"/wd 4068 /wd 4244 /wd 4267 /wd 4305 /wd 4819\"")
windows_symbolic(ctc_entrypoint SRCS ctc_entrypoint.cu PATH src)
CUDA_ADD_LIBRARY(warpctc ${WARPCTC_SHARED} src/.ctc_entrypoint.cu src/reduce.cu)
else()
CUDA_ADD_LIBRARY(warpctc ${WARPCTC_SHARED} src/ctc_entrypoint.cu src/reduce.cu)
IF (WITH_GPU)
CUDA_ADD_LIBRARY(warpctc ${WARPCTC_SHARED} src/ctc_entrypoint.cu src/reduce.cu)
ELSE()
HIP_ADD_LIBRARY(warpctc ${WARPCTC_SHARED} src/ctc_entrypoint.cu src/reduce.cu)
TARGET_LINK_LIBRARIES(warpctc PUBLIC ${ROCM_HIPRTC_LIB})
ENDIF()
endif(WIN32)

IF (!WITH_TORCH)
TARGET_LINK_LIBRARIES(warpctc ${CUDA_curand_LIBRARY})
MESSAGE(STATUS "Link rand library")

IF (WITH_GPU)
MESSAGE(STATUS "Link cuda rand library: ${CUDA_curand_LIBRARY}")
TARGET_LINK_LIBRARIES(warpctc ${CUDA_curand_LIBRARY})
ELSE()
MESSAGE(STATUS "Link hip rand library: ${hiprand_LIBRARY_DIRS}")
TARGET_LINK_LIBRARIES(warpctc ${hiprand_LIBRARY_DIRS}/libhiprand.so)
ENDIF()
ENDIF()

if(BUILD_TESTS)
add_executable(test_cpu tests/test_cpu.cpp )
MESSAGE(STATUS "Build tests")

IF (WITH_GPU)
add_executable(test_cpu tests/test_cpu.cpp)
ELSE()
add_executable(test_cpu tests/test_cpu.cpp)
ENDIF()


TARGET_LINK_LIBRARIES(test_cpu warpctc)
SET_TARGET_PROPERTIES(test_cpu PROPERTIES COMPILE_FLAGS "${CMAKE_CXX_FLAGS} --std=c++11")

cuda_add_executable(test_gpu tests/test_gpu.cu)
TARGET_LINK_LIBRARIES(test_gpu warpctc ${CUDA_curand_LIBRARY})
IF (WITH_GPU)
cuda_add_executable(test_gpu tests/test_gpu.cu)
TARGET_LINK_LIBRARIES(test_gpu warpctc ${CUDA_curand_LIBRARY})
ELSE()
hip_add_executable(test_gpu tests/test_gpu.cu)
TARGET_LINK_LIBRARIES(test_gpu warpctc ${hiprand_LIBRARY_DIRS}/libhiprand.so)
ENDIF()
endif(BUILD_TESTS)

INSTALL(TARGETS warpctc
Expand All @@ -174,7 +211,7 @@ IF (WITH_GPU)

INSTALL(FILES include/ctc.h DESTINATION "include")

IF (WITH_TORCH)
IF (WITH_TORCH AND WITH_GPU)
MESSAGE(STATUS "Building Torch Bindings with GPU support")
INCLUDE_DIRECTORIES(${CUDA_INCLUDE_DIRS} "${CUDA_TOOLKIT_ROOT_DIR}/samples/common/inc")
INCLUDE_DIRECTORIES(${Torch_INSTALL_INCLUDE} ${Torch_INSTALL_INCLUDE}/TH ${Torch_INSTALL_INCLUDE}/THC)
Expand Down
80 changes: 80 additions & 0 deletions cmake/hip.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
if(NOT WITH_ROCM)
return()
endif()

if(NOT DEFINED ENV{ROCM_PATH})
set(ROCM_PATH "/opt/rocm" CACHE PATH "Path to which ROCm has been installed")
set(HIP_PATH ${ROCM_PATH}/hip CACHE PATH "Path to which HIP has been installed")
set(HIP_CLANG_PATH ${ROCM_PATH}/llvm/bin CACHE PATH "Path to which clang has been installed")
else()
set(ROCM_PATH $ENV{ROCM_PATH} CACHE PATH "Path to which ROCm has been installed")
set(HIP_PATH ${ROCM_PATH}/hip CACHE PATH "Path to which HIP has been installed")
set(HIP_CLANG_PATH ${ROCM_PATH}/llvm/bin CACHE PATH "Path to which clang has been installed")
endif()
set(CMAKE_MODULE_PATH "${HIP_PATH}/cmake" ${CMAKE_MODULE_PATH})

find_package(HIP REQUIRED)
include_directories(${ROCM_PATH}/include)
message(STATUS "HIP version: ${HIP_VERSION}")
message(STATUS "HIP_CLANG_PATH: ${HIP_CLANG_PATH}")
MESSAGE(STATUS "HIP_ROOT_DIR: ${HIP_ROOT_DIR}")

macro(find_package_and_include PACKAGE_NAME)
find_package("${PACKAGE_NAME}" REQUIRED)
include_directories("${ROCM_PATH}/${PACKAGE_NAME}/include")
message(STATUS "${PACKAGE_NAME} version: ${${PACKAGE_NAME}_VERSION}")
endmacro()

find_package_and_include(hiprand)
find_package_and_include(rocrand)
find_package_and_include(rocthrust)

# set CXX flags for HIP
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D__HIP_PLATFORM_HCC__")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__HIP_PLATFORM_HCC__")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DTHRUST_DEVICE_SYSTEM=THRUST_DEVICE_SYSTEM_HIP")
set(THRUST_DEVICE_SYSTEM THRUST_DEVICE_SYSTEM_HIP)

# define HIP_CXX_FLAGS
list(APPEND HIP_CXX_FLAGS -fPIC)
list(APPEND HIP_CXX_FLAGS -D__HIP_PLATFORM_HCC__=1)
# Note(qili93): HIP has compile conflicts of float16.h as platform::float16 overload std::is_floating_point and std::is_integer
list(APPEND HIP_CXX_FLAGS -D__HIP_NO_HALF_CONVERSIONS__=1)
list(APPEND HIP_CXX_FLAGS -Wno-macro-redefined)
list(APPEND HIP_CXX_FLAGS -Wno-inconsistent-missing-override)
list(APPEND HIP_CXX_FLAGS -Wno-exceptions)
list(APPEND HIP_CXX_FLAGS -Wno-shift-count-negative)
list(APPEND HIP_CXX_FLAGS -Wno-shift-count-overflow)
list(APPEND HIP_CXX_FLAGS -Wno-unused-command-line-argument)
list(APPEND HIP_CXX_FLAGS -Wno-duplicate-decl-specifier)
list(APPEND HIP_CXX_FLAGS -Wno-implicit-int-float-conversion)
list(APPEND HIP_CXX_FLAGS -Wno-pass-failed)
list(APPEND HIP_CXX_FLAGS -DTHRUST_DEVICE_SYSTEM=THRUST_DEVICE_SYSTEM_HIP)
list(APPEND HIP_CXX_FLAGS -std=c++14)

if(CMAKE_BUILD_TYPE MATCHES Debug)
list(APPEND HIP_CXX_FLAGS -g2)
list(APPEND HIP_CXX_FLAGS -O0)
list(APPEND HIP_HIPCC_FLAGS -fdebug-info-for-profiling)
endif(CMAKE_BUILD_TYPE MATCHES Debug)

set(HIP_HCC_FLAGS ${HIP_CXX_FLAGS})
set(HIP_CLANG_FLAGS ${HIP_CXX_FLAGS})
# Ask hcc to generate device code during compilation so we can use
# host linker to link.
list(APPEND HIP_HCC_FLAGS -fno-gpu-rdc)
list(APPEND HIP_HCC_FLAGS --amdgpu-target=gfx906)
list(APPEND HIP_CLANG_FLAGS -fno-gpu-rdc)
list(APPEND HIP_CLANG_FLAGS --amdgpu-target=gfx906)


if(HIP_COMPILER STREQUAL clang)
set(hip_library_name amdhip64)
else()
set(hip_library_name hip_hcc)
endif()
message(STATUS "HIP library name: ${hip_library_name}")

# set HIP link libs
find_library(ROCM_HIPRTC_LIB ${hip_library_name} HINTS ${HIP_PATH}/lib)
message(STATUS "ROCM_HIPRTC_LIB: ${ROCM_HIPRTC_LIB}")
11 changes: 9 additions & 2 deletions include/ctc.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,20 @@
#define API_REFERENCE
#endif

#include <stdio.h>

#ifdef __cplusplus
#include <cstddef>
extern "C" {
#endif

#ifdef WARPCTC_WITH_HIP
//forward declare of HIP typedef to avoid needing to pull in HIP headers
typedef struct ihipStream_t* GPUstream;
#else
//forward declare of CUDA typedef to avoid needing to pull in CUDA headers
typedef struct CUstream_st* CUstream;
typedef struct CUstream_st* GPUstream;
#endif

typedef enum {
CTC_STATUS_SUCCESS = 0,
Expand Down Expand Up @@ -58,7 +65,7 @@ struct ctcOptions {
unsigned int num_threads;

/// used when loc == CTC_GPU, which stream the kernels should be launched in
CUstream stream;
GPUstream stream;
};

/// the label value/index that the CTC calculation should use as the blank label
Expand Down
1 change: 1 addition & 0 deletions include/detail/ctc_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include <cmath>

#include "hostdevice.h"
#include "type_defs.h"

namespace ctc_helper {

Expand Down
Loading

0 comments on commit c690fc5

Please sign in to comment.