-
Notifications
You must be signed in to change notification settings - Fork 22
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add gpu intra-process-typeadaption demo.
- Loading branch information
1 parent
a0909c6
commit fd8043a
Showing
17 changed files
with
1,505 additions
and
15 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
# | ||
# Compute target flags macros by Anatoly Baksheev | ||
# | ||
# Usage in CmakeLists.txt: | ||
# include(CudaComputeTargetFlags.cmake) | ||
# APPEND_TARGET_ARCH_FLAGS() | ||
|
||
#compute flags macros | ||
MACRO(CUDA_COMPUTE_TARGET_FLAGS arch_bin arch_ptx cuda_nvcc_target_flags) | ||
string(REGEX REPLACE "\\." "" ARCH_BIN_WITHOUT_DOTS "${${arch_bin}}") | ||
string(REGEX REPLACE "\\." "" ARCH_PTX_WITHOUT_DOTS "${${arch_ptx}}") | ||
|
||
set(cuda_computer_target_flags_temp "") | ||
|
||
# Tell NVCC to add binaries for the specified GPUs | ||
string(REGEX MATCHALL "[0-9()]+" ARCH_LIST "${ARCH_BIN_WITHOUT_DOTS}") | ||
foreach(ARCH IN LISTS ARCH_LIST) | ||
if (ARCH MATCHES "([0-9]+)\\(([0-9]+)\\)") | ||
# User explicitly specified PTX for the concrete BIN | ||
set(cuda_computer_target_flags_temp ${cuda_computer_target_flags_temp} -gencode arch=compute_${CMAKE_MATCH_2},code=sm_${CMAKE_MATCH_1}) | ||
else() | ||
# User didn't explicitly specify PTX for the concrete BIN, we assume PTX=BIN | ||
set(cuda_computer_target_flags_temp ${cuda_computer_target_flags_temp} -gencode arch=compute_${ARCH},code=sm_${ARCH}) | ||
endif() | ||
endforeach() | ||
|
||
# Tell NVCC to add PTX intermediate code for the specified architectures | ||
string(REGEX MATCHALL "[0-9]+" ARCH_LIST "${ARCH_PTX_WITHOUT_DOTS}") | ||
foreach(ARCH IN LISTS ARCH_LIST) | ||
set(cuda_computer_target_flags_temp ${cuda_computer_target_flags_temp} -gencode arch=compute_${ARCH},code=compute_${ARCH}) | ||
endforeach() | ||
|
||
set(${cuda_nvcc_target_flags} ${cuda_computer_target_flags_temp}) | ||
ENDMACRO() | ||
|
||
MACRO(APPEND_TARGET_ARCH_FLAGS) | ||
set(cuda_nvcc_target_flags "") | ||
CUDA_COMPUTE_TARGET_FLAGS(CUDA_ARCH_BIN CUDA_ARCH_PTX cuda_nvcc_target_flags) | ||
if (cuda_nvcc_target_flags) | ||
message(STATUS "CUDA NVCC target flags: ${cuda_nvcc_target_flags}") | ||
list(APPEND CUDA_NVCC_FLAGS ${cuda_nvcc_target_flags}) | ||
endif() | ||
ENDMACRO() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,105 @@ | ||
macro(GA_CHECK_CUDA) | ||
if ($ENV{GPUAC_COMPILE_WITH_CUDA}) | ||
find_package(CUDA REQUIRED QUIET) | ||
find_package(Eigen3 REQUIRED QUIET) | ||
|
||
# if(NOT ${CUDA_VERSION} VERSION_LESS "10.0" | ||
# AND NOT ${CUDA_VERSION} VERSION_EQUAL "10.0" ) | ||
# message(FATAL_ERROR "GPU support on Melodic requires CUDA<=10.0") | ||
# endif() | ||
if(${CUDA_VERSION} VERSION_GREATER "9.1" | ||
AND ${CMAKE_VERSION} VERSION_LESS "3.12.3") | ||
unset(CUDA_cublas_device_LIBRARY CACHE) | ||
set(CUDA_cublas_device_LIBRARY ${CUDA_cublas_LIBRARY}) | ||
set(CUDA_CUBLAS_LIBRARIES ${CUDA_cublas_LIBRARY}) | ||
endif() | ||
# if ("$ENV{ROS_DISTRO}" STREQUAL "melodic" AND ${EIGEN3_VERSION_STRING} VERSION_LESS "3.3.7") | ||
# message(FATAL_ERROR "GPU support on Melodic requires Eigen version>= 3.3.7") | ||
# endif() | ||
if(NOT DEFINED CMAKE_CUDA_STANDARD) | ||
set(CMAKE_CUDA_STANDARD 14) | ||
set(CMAKE_CUDA_STANDARD_REQUIRED ON) | ||
endif() | ||
set(USE_CUDA ON) | ||
else() | ||
message(WARNING "CUDA support is disabled. Set the GPUAC_COMPILE_WITH_CUDA environment variable and recompile to enable it") | ||
set(USE_CUDA OFF) | ||
endif() | ||
endmacro() | ||
|
||
include(CudaComputeTargetFlags) | ||
GA_CHECK_CUDA() | ||
set_directory_properties(PROPERTIES COMPILE_DEFINITIONS "") | ||
APPEND_TARGET_ARCH_FLAGS() | ||
|
||
if(NOT USE_CUDA) | ||
return() | ||
endif() | ||
|
||
# Enable NVTX markers for improved profiling | ||
add_definitions(-DUSE_NVTX) | ||
link_directories("${CUDA_TOOLKIT_ROOT_DIR}/lib64") | ||
link_libraries("nvToolsExt") | ||
|
||
# nvcc -arch sm_50 ./cuda_hello_world.cu -o docker_hello | ||
cuda_add_executable(cuda_hello_world cuda_hello_world.cu) | ||
target_include_directories(cuda_hello_world PUBLIC | ||
"$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>" | ||
"$<INSTALL_INTERFACE:include>" | ||
${CUDA_INCLUDE_DIRS} | ||
) | ||
target_link_libraries(cuda_hello_world ${PROJECT_NAME}_image | ||
${CUDA_nvToolsExt_LIBRARY} | ||
${CUDA_LIBRARIES} | ||
) | ||
install(TARGETS cuda_hello_world DESTINATION lib/${PROJECT_NAME}) | ||
|
||
cuda_add_executable(cuda_image_node cuda_image_node.cpp cuda_image_container.cpp) | ||
ament_target_dependencies(cuda_image_node "rclcpp" "cv_bridge" "sensor_msgs") | ||
rosidl_get_typesupport_target(cpp_typesupport_target "${PROJECT_NAME}" "rosidl_typesupport_cpp") | ||
target_include_directories(cuda_image_node PUBLIC | ||
"$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>" | ||
"$<INSTALL_INTERFACE:include>" | ||
${CUDA_INCLUDE_DIRS} | ||
) | ||
target_link_libraries(cuda_image_node ${PROJECT_NAME}_image | ||
opencv_highgui | ||
${cpp_typesupport_target} | ||
${CUDA_nvToolsExt_LIBRARY} | ||
${CUDA_LIBRARIES} | ||
) | ||
install(TARGETS cuda_image_node DESTINATION lib/${PROJECT_NAME}) | ||
|
||
add_executable(intra_gmat_node intra_gmat_node.cpp) | ||
ament_target_dependencies(intra_gmat_node "rclcpp" "cv_bridge" "sensor_msgs") | ||
rosidl_get_typesupport_target(cpp_typesupport_target "${PROJECT_NAME}" "rosidl_typesupport_cpp") | ||
target_include_directories(intra_gmat_node PUBLIC | ||
"$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>" | ||
"$<INSTALL_INTERFACE:include>" | ||
${CUDA_INCLUDE_DIRS} | ||
${OpenCV_INCLUDE_DIRS} | ||
) | ||
target_link_libraries(intra_gmat_node ${PROJECT_NAME}_image | ||
${cpp_typesupport_target} | ||
${CUDA_nvToolsExt_LIBRARY} | ||
${CUDA_LIBRARIES} | ||
${OpenCV_LIBS} | ||
) | ||
install(TARGETS intra_gmat_node DESTINATION lib/${PROJECT_NAME}) | ||
|
||
add_executable(gmat_image_node gmat_image_node.cpp gmat_image_container.cpp) | ||
ament_target_dependencies(gmat_image_node "rclcpp" "cv_bridge" "sensor_msgs") | ||
rosidl_get_typesupport_target(cpp_typesupport_target "${PROJECT_NAME}" "rosidl_typesupport_cpp") | ||
target_include_directories(gmat_image_node PUBLIC | ||
"$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>" | ||
"$<INSTALL_INTERFACE:include>" | ||
${CUDA_INCLUDE_DIRS} | ||
${OpenCV_INCLUDE_DIRS} | ||
) | ||
target_link_libraries(gmat_image_node ${PROJECT_NAME}_image | ||
${cpp_typesupport_target} | ||
${CUDA_nvToolsExt_LIBRARY} | ||
${CUDA_LIBRARIES} | ||
${OpenCV_LIBS} | ||
) | ||
install(TARGETS gmat_image_node DESTINATION lib/${PROJECT_NAME}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
#include <iostream> | ||
#include <stdlib.h> | ||
#include <stdio.h> | ||
|
||
__global__ void cuda_hello(){ | ||
printf("Hello World from GPU! %d\n", threadIdx.x*gridDim.x); | ||
} | ||
|
||
int main() { | ||
printf("Hello World from CPU!\n"); | ||
cudaSetDevice(0); | ||
cuda_hello<<<1,10>>>(); | ||
uint8_t *cuda_mem_; | ||
if (cudaMalloc(&cuda_mem_, 1024) != cudaSuccess) | ||
{ | ||
throw std::runtime_error("Failed to allocate device memory"); | ||
} | ||
|
||
cudaDeviceSynchronize(); | ||
return 0; | ||
} |
Oops, something went wrong.