-
Notifications
You must be signed in to change notification settings - Fork 3.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[ROCm] add support for ROCm/HIP device #6086
base: master
Are you sure you want to change the base?
Changes from all commits
01ff268
ac966a5
6ae6432
5b87bcd
3ad89f8
7b8b6a0
62aa30b
bb27c55
58ace9c
0bc1cfb
a7c9653
96e3a52
cb7623a
9ba27bb
5ba59b8
c0abd17
e7129a0
eb0036f
dbd972e
4cd0dea
3ad2482
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
#!/bin/bash | ||
# Copyright(C) 2023 Advanced Micro Devices, Inc. All rights reserved. | ||
|
||
for DIR in ./src ./include | ||
do | ||
for EXT in cpp h hpp cu | ||
do | ||
find ${DIR} -name "*.${EXT}" -exec sh -c ' | ||
echo "hipifying $1 in-place" | ||
hipify-perl "$1" -inplace & | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Where do we get There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is installed when installing HIP. |
||
' sh {} \; | ||
done | ||
done | ||
|
||
echo "waiting for all hipify-perl invocations to finish" | ||
wait |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -463,3 +463,6 @@ dask-worker-space/ | |
*.pub | ||
*.rdp | ||
*_rsa | ||
|
||
# hipify-perl -inplace leaves behind *.prehip files | ||
*.prehip |
Original file line number | Diff line number | Diff line change | ||
---|---|---|---|---|
|
@@ -4,6 +4,7 @@ option(USE_GPU "Enable GPU-accelerated training" OFF) | |||
option(USE_SWIG "Enable SWIG to generate Java API" OFF) | ||||
option(USE_TIMETAG "Set to ON to output time costs" OFF) | ||||
option(USE_CUDA "Enable CUDA-accelerated training " OFF) | ||||
option(USE_ROCM "Enable ROCM-accelerated training " OFF) | ||||
option(USE_DEBUG "Set to ON for Debug mode" OFF) | ||||
option(USE_SANITIZER "Use sanitizer flags" OFF) | ||||
set( | ||||
|
@@ -160,6 +161,11 @@ if(USE_CUDA) | |||
set(USE_OPENMP ON CACHE BOOL "CUDA requires OpenMP" FORCE) | ||||
endif() | ||||
|
||||
if(USE_ROCM) | ||||
enable_language(HIP) | ||||
set(USE_OPENMP ON CACHE BOOL "ROCM requires OpenMP" FORCE) | ||||
endif() | ||||
|
||||
if(USE_OPENMP) | ||||
if(APPLE) | ||||
find_package(OpenMP) | ||||
|
@@ -302,6 +308,85 @@ if(USE_CUDA) | |||
endforeach() | ||||
endif() | ||||
|
||||
if(USE_ROCM) | ||||
find_package(HIP) | ||||
include_directories(${HIP_INCLUDE_DIRS}) | ||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__HIP_PLATFORM_AMD__") | ||||
set(CMAKE_HIP_FLAGS "${CMAKE_HIP_FLAGS} ${OpenMP_CXX_FLAGS} -fPIC -Wall") | ||||
|
||||
# avoid warning: unused variable 'mask' due to __shfl_down_sync work-around | ||||
set(DISABLED_WARNINGS "${DISABLED_WARNINGS} -Wno-unused-variable") | ||||
# avoid warning: 'hipHostAlloc' is deprecated: use hipHostMalloc instead | ||||
set(DISABLED_WARNINGS "${DISABLED_WARNINGS} -Wno-deprecated-declarations") | ||||
# avoid many warnings about missing overrides | ||||
set(DISABLED_WARNINGS "${DISABLED_WARNINGS} -Wno-inconsistent-missing-override") | ||||
# avoid warning: shift count >= width of type in feature_histogram.hpp | ||||
set(DISABLED_WARNINGS "${DISABLED_WARNINGS} -Wno-shift-count-overflow") | ||||
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${DISABLED_WARNINGS}") | ||||
set(CMAKE_HIP_FLAGS "${CMAKE_HIP_FLAGS} ${DISABLED_WARNINGS}") | ||||
|
||||
if(USE_DEBUG) | ||||
set(CMAKE_HIP_FLAGS "${CMAKE_HIP_FLAGS} -g -O0") | ||||
else() | ||||
set(CMAKE_HIP_FLAGS "${CMAKE_HIP_FLAGS} -O3") | ||||
endif() | ||||
message(STATUS "CMAKE_HIP_FLAGS: ${CMAKE_HIP_FLAGS}") | ||||
|
||||
add_definitions(-DUSE_CUDA) | ||||
|
||||
set( | ||||
BASE_DEFINES | ||||
-DPOWER_FEATURE_WORKGROUPS=12 | ||||
-DUSE_CONSTANT_BUF=0 | ||||
) | ||||
set( | ||||
ALLFEATS_DEFINES | ||||
${BASE_DEFINES} | ||||
-DENABLE_ALL_FEATURES | ||||
) | ||||
set( | ||||
FULLDATA_DEFINES | ||||
${ALLFEATS_DEFINES} | ||||
-DIGNORE_INDICES | ||||
) | ||||
|
||||
message(STATUS "ALLFEATS_DEFINES: ${ALLFEATS_DEFINES}") | ||||
message(STATUS "FULLDATA_DEFINES: ${FULLDATA_DEFINES}") | ||||
|
||||
function(add_histogram hsize hname hadd hconst hdir) | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How this function differs from existing one for CUDA? Can we reuse it or merge these two functions into one? Line 275 in 480600b
|
||||
add_library(histo${hsize}${hname} OBJECT src/treelearner/kernels/histogram${hsize}.cu) | ||||
if(hadd) | ||||
list(APPEND histograms histo${hsize}${hname}) | ||||
set(histograms ${histograms} PARENT_SCOPE) | ||||
endif() | ||||
target_compile_definitions( | ||||
histo${hsize}${hname} | ||||
PRIVATE | ||||
-DCONST_HESSIAN=${hconst} | ||||
${hdir} | ||||
) | ||||
endfunction() | ||||
|
||||
foreach(hsize _16_64_256) | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same question as for |
||||
add_histogram("${hsize}" "_sp_const" "True" "1" "${BASE_DEFINES}") | ||||
add_histogram("${hsize}" "_sp" "True" "0" "${BASE_DEFINES}") | ||||
add_histogram("${hsize}" "-allfeats_sp_const" "False" "1" "${ALLFEATS_DEFINES}") | ||||
add_histogram("${hsize}" "-allfeats_sp" "False" "0" "${ALLFEATS_DEFINES}") | ||||
add_histogram("${hsize}" "-fulldata_sp_const" "True" "1" "${FULLDATA_DEFINES}") | ||||
add_histogram("${hsize}" "-fulldata_sp" "True" "0" "${FULLDATA_DEFINES}") | ||||
endforeach() | ||||
endif() | ||||
|
||||
if(USE_HDFS) | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. HDFS support was dropped some time ago. This |
||||
find_package(JNI REQUIRED) | ||||
find_path(HDFS_INCLUDE_DIR hdfs.h REQUIRED) | ||||
find_library(HDFS_LIB NAMES hdfs REQUIRED) | ||||
include_directories(${HDFS_INCLUDE_DIR}) | ||||
add_definitions(-DUSE_HDFS) | ||||
set(HDFS_CXX_LIBRARIES ${HDFS_LIB} ${JAVA_JVM_LIBRARY}) | ||||
endif() | ||||
|
||||
include(CheckCXXSourceCompiles) | ||||
check_cxx_source_compiles(" | ||||
#include <xmmintrin.h> | ||||
|
@@ -644,6 +729,20 @@ if(USE_CUDA) | |||
target_link_libraries(_lightgbm PRIVATE ${histograms}) | ||||
endif() | ||||
|
||||
if(USE_ROCM) | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we merge CUDA and HIP with |
||||
# histograms are list of object libraries. Linking object library to other | ||||
# object libraries only gets usage requirements, the linked objects won't be | ||||
# used. Thus we have to call target_link_libraries on final targets here. | ||||
if(BUILD_CLI) | ||||
target_link_libraries(lightgbm PRIVATE ${histograms}) | ||||
endif() | ||||
target_link_libraries(_lightgbm PRIVATE ${histograms}) | ||||
endif() | ||||
|
||||
if(USE_HDFS) | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Remove this. |
||||
target_link_libraries(lightgbm_objs PUBLIC ${HDFS_CXX_LIBRARIES}) | ||||
endif() | ||||
|
||||
if(WIN32) | ||||
if(MINGW OR CYGWIN) | ||||
target_link_libraries(lightgbm_objs PUBLIC ws2_32 iphlpapi) | ||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
/*! | ||
* Copyright(C) 2023 Advanced Micro Devices, Inc. All rights reserved. | ||
*/ | ||
#ifdef USE_CUDA | ||
|
||
#if defined(__HIP_PLATFORM_AMD__) || defined(__HIP__) | ||
// ROCm doesn't have __shfl_down_sync, only __shfl_down without mask. | ||
// Since mask is full 0xffffffff, we can use __shfl_down instead. | ||
#define __shfl_down_sync(mask, val, offset) __shfl_down(val, offset) | ||
#define __shfl_up_sync(mask, val, offset) __shfl_up(val, offset) | ||
// ROCm warpSize is constexpr and is either 32 or 64 depending on gfx arch. | ||
#define WARPSIZE warpSize | ||
// ROCm doesn't have atomicAdd_block, but it should be semantically the same as atomicAdd | ||
#define atomicAdd_block atomicAdd | ||
#else | ||
// CUDA warpSize is not a constexpr, but always 32 | ||
#define WARPSIZE 32 | ||
#endif | ||
|
||
#endif |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this file should be added in a follow-up PR in which we'll enable hipifying at our CI or will request users hipify localy before suggesting a CUDA-related PRs.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I agree. We can postpone this to the next PR for ROCm.