Skip to content

Commit

Permalink
add gpu intra-process-typeadaption demo.
Browse files Browse the repository at this point in the history
  • Loading branch information
ZhenshengLee committed Oct 17, 2022
1 parent a0909c6 commit fd8043a
Show file tree
Hide file tree
Showing 17 changed files with 1,505 additions and 15 deletions.
23 changes: 8 additions & 15 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,11 @@ cmake_minimum_required(VERSION 3.5)

project(shm_msgs)

# add open3d installation directory here
list(INSERT CMAKE_MODULE_PATH 0 ${CMAKE_CURRENT_LIST_DIR}/cmake)

# add installation directory here
# list(INSERT CMAKE_PREFIX_PATH 0 /opt/open3d/open3d)
list(INSERT CMAKE_PREFIX_PATH 0 /opt/opencv/opencv-4.6.0/)

if(NOT CMAKE_CXX_STANDARD)
set(CMAKE_CXX_STANDARD 17)
Expand Down Expand Up @@ -81,21 +84,9 @@ list(REMOVE_ITEM PCL_LIBRARIES
vtkRenderingOpenGL2
)

find_package(OpenCV 4 QUIET
COMPONENTS
opencv_core
opencv_imgproc
opencv_imgcodecs
CONFIG
)
find_package(OpenCV 4 QUIET)
if(NOT OpenCV_FOUND)
find_package(OpenCV 3 REQUIRED
COMPONENTS
opencv_core
opencv_imgproc
opencv_imgcodecs
CONFIG
)
find_package(OpenCV 3 REQUIRED)
endif()

find_package(iceoryx_posh CONFIG REQUIRED)
Expand Down Expand Up @@ -157,6 +148,8 @@ add_subdirectory(lib)
add_subdirectory(src)
# extra image intra-process demos
add_subdirectory(intra)
# extra type-adaption cuda demos
add_subdirectory(cuda)

if(BUILD_TESTING)
add_subdirectory(test)
Expand Down
43 changes: 43 additions & 0 deletions cmake/CudaComputeTargetFlags.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#
# Compute target flags macros by Anatoly Baksheev
#
# Usage in CmakeLists.txt:
# include(CudaComputeTargetFlags.cmake)
# APPEND_TARGET_ARCH_FLAGS()

#compute flags macros
MACRO(CUDA_COMPUTE_TARGET_FLAGS arch_bin arch_ptx cuda_nvcc_target_flags)
string(REGEX REPLACE "\\." "" ARCH_BIN_WITHOUT_DOTS "${${arch_bin}}")
string(REGEX REPLACE "\\." "" ARCH_PTX_WITHOUT_DOTS "${${arch_ptx}}")

set(cuda_computer_target_flags_temp "")

# Tell NVCC to add binaries for the specified GPUs
string(REGEX MATCHALL "[0-9()]+" ARCH_LIST "${ARCH_BIN_WITHOUT_DOTS}")
foreach(ARCH IN LISTS ARCH_LIST)
if (ARCH MATCHES "([0-9]+)\\(([0-9]+)\\)")
# User explicitly specified PTX for the concrete BIN
set(cuda_computer_target_flags_temp ${cuda_computer_target_flags_temp} -gencode arch=compute_${CMAKE_MATCH_2},code=sm_${CMAKE_MATCH_1})
else()
# User didn't explicitly specify PTX for the concrete BIN, we assume PTX=BIN
set(cuda_computer_target_flags_temp ${cuda_computer_target_flags_temp} -gencode arch=compute_${ARCH},code=sm_${ARCH})
endif()
endforeach()

# Tell NVCC to add PTX intermediate code for the specified architectures
string(REGEX MATCHALL "[0-9]+" ARCH_LIST "${ARCH_PTX_WITHOUT_DOTS}")
foreach(ARCH IN LISTS ARCH_LIST)
set(cuda_computer_target_flags_temp ${cuda_computer_target_flags_temp} -gencode arch=compute_${ARCH},code=compute_${ARCH})
endforeach()

set(${cuda_nvcc_target_flags} ${cuda_computer_target_flags_temp})
ENDMACRO()

MACRO(APPEND_TARGET_ARCH_FLAGS)
set(cuda_nvcc_target_flags "")
CUDA_COMPUTE_TARGET_FLAGS(CUDA_ARCH_BIN CUDA_ARCH_PTX cuda_nvcc_target_flags)
if (cuda_nvcc_target_flags)
message(STATUS "CUDA NVCC target flags: ${cuda_nvcc_target_flags}")
list(APPEND CUDA_NVCC_FLAGS ${cuda_nvcc_target_flags})
endif()
ENDMACRO()
105 changes: 105 additions & 0 deletions cuda/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
macro(GA_CHECK_CUDA)
if ($ENV{GPUAC_COMPILE_WITH_CUDA})
find_package(CUDA REQUIRED QUIET)
find_package(Eigen3 REQUIRED QUIET)

# if(NOT ${CUDA_VERSION} VERSION_LESS "10.0"
# AND NOT ${CUDA_VERSION} VERSION_EQUAL "10.0" )
# message(FATAL_ERROR "GPU support on Melodic requires CUDA<=10.0")
# endif()
if(${CUDA_VERSION} VERSION_GREATER "9.1"
AND ${CMAKE_VERSION} VERSION_LESS "3.12.3")
unset(CUDA_cublas_device_LIBRARY CACHE)
set(CUDA_cublas_device_LIBRARY ${CUDA_cublas_LIBRARY})
set(CUDA_CUBLAS_LIBRARIES ${CUDA_cublas_LIBRARY})
endif()
# if ("$ENV{ROS_DISTRO}" STREQUAL "melodic" AND ${EIGEN3_VERSION_STRING} VERSION_LESS "3.3.7")
# message(FATAL_ERROR "GPU support on Melodic requires Eigen version>= 3.3.7")
# endif()
if(NOT DEFINED CMAKE_CUDA_STANDARD)
set(CMAKE_CUDA_STANDARD 14)
set(CMAKE_CUDA_STANDARD_REQUIRED ON)
endif()
set(USE_CUDA ON)
else()
message(WARNING "CUDA support is disabled. Set the GPUAC_COMPILE_WITH_CUDA environment variable and recompile to enable it")
set(USE_CUDA OFF)
endif()
endmacro()

include(CudaComputeTargetFlags)
GA_CHECK_CUDA()
set_directory_properties(PROPERTIES COMPILE_DEFINITIONS "")
APPEND_TARGET_ARCH_FLAGS()

if(NOT USE_CUDA)
return()
endif()

# Enable NVTX markers for improved profiling
add_definitions(-DUSE_NVTX)
link_directories("${CUDA_TOOLKIT_ROOT_DIR}/lib64")
link_libraries("nvToolsExt")

# nvcc -arch sm_50 ./cuda_hello_world.cu -o docker_hello
cuda_add_executable(cuda_hello_world cuda_hello_world.cu)
target_include_directories(cuda_hello_world PUBLIC
"$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>"
"$<INSTALL_INTERFACE:include>"
${CUDA_INCLUDE_DIRS}
)
target_link_libraries(cuda_hello_world ${PROJECT_NAME}_image
${CUDA_nvToolsExt_LIBRARY}
${CUDA_LIBRARIES}
)
install(TARGETS cuda_hello_world DESTINATION lib/${PROJECT_NAME})

cuda_add_executable(cuda_image_node cuda_image_node.cpp cuda_image_container.cpp)
ament_target_dependencies(cuda_image_node "rclcpp" "cv_bridge" "sensor_msgs")
rosidl_get_typesupport_target(cpp_typesupport_target "${PROJECT_NAME}" "rosidl_typesupport_cpp")
target_include_directories(cuda_image_node PUBLIC
"$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>"
"$<INSTALL_INTERFACE:include>"
${CUDA_INCLUDE_DIRS}
)
target_link_libraries(cuda_image_node ${PROJECT_NAME}_image
opencv_highgui
${cpp_typesupport_target}
${CUDA_nvToolsExt_LIBRARY}
${CUDA_LIBRARIES}
)
install(TARGETS cuda_image_node DESTINATION lib/${PROJECT_NAME})

add_executable(intra_gmat_node intra_gmat_node.cpp)
ament_target_dependencies(intra_gmat_node "rclcpp" "cv_bridge" "sensor_msgs")
rosidl_get_typesupport_target(cpp_typesupport_target "${PROJECT_NAME}" "rosidl_typesupport_cpp")
target_include_directories(intra_gmat_node PUBLIC
"$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>"
"$<INSTALL_INTERFACE:include>"
${CUDA_INCLUDE_DIRS}
${OpenCV_INCLUDE_DIRS}
)
target_link_libraries(intra_gmat_node ${PROJECT_NAME}_image
${cpp_typesupport_target}
${CUDA_nvToolsExt_LIBRARY}
${CUDA_LIBRARIES}
${OpenCV_LIBS}
)
install(TARGETS intra_gmat_node DESTINATION lib/${PROJECT_NAME})

add_executable(gmat_image_node gmat_image_node.cpp gmat_image_container.cpp)
ament_target_dependencies(gmat_image_node "rclcpp" "cv_bridge" "sensor_msgs")
rosidl_get_typesupport_target(cpp_typesupport_target "${PROJECT_NAME}" "rosidl_typesupport_cpp")
target_include_directories(gmat_image_node PUBLIC
"$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>"
"$<INSTALL_INTERFACE:include>"
${CUDA_INCLUDE_DIRS}
${OpenCV_INCLUDE_DIRS}
)
target_link_libraries(gmat_image_node ${PROJECT_NAME}_image
${cpp_typesupport_target}
${CUDA_nvToolsExt_LIBRARY}
${CUDA_LIBRARIES}
${OpenCV_LIBS}
)
install(TARGETS gmat_image_node DESTINATION lib/${PROJECT_NAME})
21 changes: 21 additions & 0 deletions cuda/cuda_hello_world.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#include <iostream>
#include <stdlib.h>
#include <stdio.h>

__global__ void cuda_hello(){
printf("Hello World from GPU! %d\n", threadIdx.x*gridDim.x);
}

int main() {
printf("Hello World from CPU!\n");
cudaSetDevice(0);
cuda_hello<<<1,10>>>();
uint8_t *cuda_mem_;
if (cudaMalloc(&cuda_mem_, 1024) != cudaSuccess)
{
throw std::runtime_error("Failed to allocate device memory");
}

cudaDeviceSynchronize();
return 0;
}
Loading

0 comments on commit fd8043a

Please sign in to comment.