Skip to content

Commit

Permalink
Merge pull request #81 from unisa-hpc/sycl2020
Browse files Browse the repository at this point in the history
Update SYCL-Bench to SYCL 2020
  • Loading branch information
Luigi-Crisci authored Jun 3, 2024
2 parents db1c70c + f60695f commit ae02405
Show file tree
Hide file tree
Showing 78 changed files with 5,170 additions and 4,163 deletions.
28 changes: 28 additions & 0 deletions .clang-tidy
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
---
#
# Clang-Tidy configuration for SYCL-Bench.
#
# There are three usage scenarios:
# 1. Automatic checks through an IDE (CLion, VsCode, ...)
# 2. Running manually on select files (not recommended)
# `clang-tidy -p path/to/compile_commands.json file1 [file2, ...]`
# Note: A script for running clang-tidy on all Celerity sources is provided in `ci/run-clang-tidy.sh`
# 3. Running on a diff (for CI)
# `git diff -U0 --no-color | clang-tidy-diff.py -p1 -path path/to/compile_commands.json`
#
InheritParentConfig: false
# See https://clang.llvm.org/extra/clang-tidy/checks/list.html for a full list of available checks.
Checks: -*,
readability-*,
-readability-avoid-const-params-in-decls,
-readability-function-cognitive-complexity,
-readability-identifier-length,
-readability-magic-numbers,
-readability-uppercase-literal-suffix,
-readability-convert-member-functions-to-static
-readability-qualified-auto

# Treat naming violations as errors
WarningsAsErrors: "readability-identifier-naming"
# Use .clang-format configuration for fixes
FormatStyle: file
8 changes: 8 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,10 @@
/build*
*.csv
img/

# Clangd
.cache/
.clangd

# Vscode
.vscode/
21 changes: 21 additions & 0 deletions CITATION.cff
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
cff-version: 1.2.0
message: "If you use this software, please cite it as below."
conference-paper: "Proceedings of the 12th International Workshop on OpenCL and SYCL (IWOCL 24)"
authors:
- family-names: "Luigi"
given-names: "Crisci"
- family-names: "Lorenzo"
given-names: "Carpentieri"
- family-names: "Peter"
given-names: "Thoman"
- family-names: "Aksel"
given-names: "Alpay"
- family-names: "Vincent"
given-names: "Heuveline"
- family-names: "Biagio"
given-names: "Cosenza"
title: "SYCL-Bench 2020: Benchmarking SYCL 2020 on AMD, Intel, and NVIDIA GPUs"
version: 2.0.4
doi: 10.1145/3648115.3648120
date-released: 2024-04-08
url: "https://github.com/unisa-hpc/sycl-bench/"
133 changes: 89 additions & 44 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,24 +1,31 @@
cmake_minimum_required (VERSION 3.5)
cmake_minimum_required(VERSION 3.5)
project(sycl-bench)

set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${PROJECT_SOURCE_DIR}/cmake)
set_property(GLOBAL PROPERTY USE_FOLDERS ON)

if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE "Release" CACHE STRING "CMake Build Type" FORCE)
set(CMAKE_BUILD_TYPE "Release" CACHE STRING "CMake Build Type" FORCE)
endif()

set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)

# Due to CMake limitations, hipSYCL requires C++ standard to be set manually
set(CMAKE_SYCL_FLAGS "${CMAKE_SYCL_FLAGS} -std=c++17")
# Default build flags
set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g -DDEBUG -fno-omit-frame-pointer" CACHE STRING "Flags used by the C++ compiler during debug builds." FORCE)
set(CMAKE_CXX_FLAGS_RELEASE "-O3 -DNDEBUG -march=native -ffast-math" CACHE STRING "Flags used by the C++ compiler during release builds." FORCE)
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O2 -DNDEBUG -march=native -ffast-math -g -fno-omit-frame-pointer" CACHE STRING "Flags used by the C++ compiler during release builds with debug info." FORCE)


if(CMAKE_GENERATOR STREQUAL "Ninja")
set(CMAKE_SYCL_FLAGS "${CMAKE_SYCL_FLAGS} -fdiagnostics-color=always")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fdiagnostics-color=always")
set(COMPUTECPP_USER_FLAGS "${COMPUTECPP_USER_FLAGS} -fdiagnostics-color=always")
set(CMAKE_SYCL_FLAGS "${CMAKE_SYCL_FLAGS} -fdiagnostics-color=always")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fdiagnostics-color=always")
set(COMPUTECPP_USER_FLAGS "${COMPUTECPP_USER_FLAGS} -fdiagnostics-color=always")
endif()

if(SYCL_BENCH_ENABLE_QUEUE_PROFILING)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DSYCL_BENCH_ENABLE_QUEUE_PROFILING")
endif()

include(InstallRequiredSystemLibraries)
Expand All @@ -28,31 +35,55 @@ include_directories(${CMAKE_SOURCE_DIR}/include)
include_directories(${CMAKE_SOURCE_DIR}/polybench/common)

set(supported_implementations
ComputeCpp
hipSYCL
LLVM
LLVM-CUDA
triSYCL
AdaptiveCpp
dpcpp
triSYCL
)

list(FIND supported_implementations ${SYCL_IMPL} impl_idx)

if(NOT SYCL_IMPL OR impl_idx EQUAL -1)
message(FATAL_ERROR "Please specify SYCL_IMPL (one of: ${supported_implementations})")
message(FATAL_ERROR "Please specify SYCL_IMPL (one of: ${supported_implementations})")
endif()

if(SYCL_IMPL STREQUAL "ComputeCpp")
find_package(ComputeCpp MODULE REQUIRED)
elseif(SYCL_IMPL STREQUAL "hipSYCL")
find_package(hipSYCL CONFIG REQUIRED)
elseif(SYCL_IMPL STREQUAL "LLVM")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl")
elseif(SYCL_IMPL STREQUAL "LLVM-CUDA")
set(CMAKE_CXX_FLAGS
"${CMAKE_CXX_FLAGS} -fsycl -fsycl-targets=nvptx64-nvidia-cuda-sycldevice")
if(SYCL_IMPL STREQUAL "AdaptiveCpp")
find_package(AdaptiveCpp REQUIRED)
elseif(SYCL_IMPL STREQUAL "dpcpp")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl")

if(DPCPP_WITH_CUDA_BACKEND)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl-targets=nvptx64-nvidia-cuda")

set(CUDA_ARCH "" CACHE STRING "CUDA device architecture e.g. sm_70")

if(NOT CUDA_ARCH STREQUAL "")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Xsycl-target-backend --cuda-gpu-arch=${CUDA_ARCH}")
endif()
endif()

if(DPCPP_WITH_ROCM_BACKEND)
set(ROCM_ARCH "" CACHE STRING "ROCm device architecture e.g. gfx908")

if(NOT ROCM_ARCH STREQUAL "")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl-targets=amd_gpu_${ROCM_ARCH}")
endif()
endif()

if(DPCPP_WITH_LZ_BACKEND)
set(LZ_ARCH "" CACHE STRING "Level Zero device architecture e.g. acm-g10")

if(NOT LZ_ARCH STREQUAL "")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl-targets=${LZ_ARCH}")
endif()
endif()

elseif(SYCL_IMPL STREQUAL "triSYCL")
find_package(TriSYCL MODULE REQUIRED)
find_package(TriSYCL MODULE REQUIRED)
endif()

# Check if SYCL implementation implements the required SYCL features
include(HasFeatures)

set(benchmarks
micro/arith.cpp
micro/DRAM.cpp
Expand Down Expand Up @@ -92,42 +123,56 @@ set(benchmarks
polybench/mvt.cpp
polybench/syr2k.cpp
polybench/syrk.cpp
#compiletime/compiletime.cpp
)

foreach(benchmark IN LISTS benchmarks)
get_filename_component(target ${benchmark} NAME_WE)
# compiletime/compiletime.cpp
sycl2020/atomics/atomic_reduction.cpp
sycl2020/USM/usm_accessors_latency.cpp
sycl2020/USM/usm_instr_mix.cpp
sycl2020/USM/usm_pinned_overhead.cpp
sycl2020/USM/usm_allocation_latency.cpp
)
# Selectively add benchmarks based on some SYCL 2020 features
if (SYCL_BENCH_HAS_SPEC_CONSTANTS)
list(APPEND benchmarks sycl2020/spec_constants/spec_constant_convolution.cpp)
endif()
if (SYCL_BENCH_HAS_KERNEL_REDUCTIONS)
list(APPEND benchmarks sycl2020/kernel_reduction/kernel_reduction.cpp)
endif()
if (SYCL_BENCH_HAS_GROUP_ALGORITHMS)
list(APPEND benchmarks sycl2020/group_algorithms/reduce_over_group.cpp)
endif()

add_executable(${target} ${benchmark})
# Setting variables
add_compile_definitions(SYCL_BENCH_HAS_FP64_SUPPORT=$<BOOL:${SYCL_BENCH_HAS_FP64_SUPPORT}>)

if(SYCL_IMPL STREQUAL "ComputeCpp" OR SYCL_IMPL STREQUAL "hipSYCL")
add_sycl_to_target(TARGET ${target} SOURCES ${benchmark})
endif()
foreach(benchmark IN LISTS benchmarks)
get_filename_component(target ${benchmark} NAME_WE)

if(SYCL_IMPL STREQUAL "ComputeCpp" AND COMPUTECPP_BITCODE STREQUAL "ptx64")
target_compile_definitions(${target} PRIVATE SYCL_BENCH_ENABLE_QUEUE_PROFILING)
endif()
add_executable(${target} ${benchmark})

if(SYCL_IMPL STREQUAL "LLVM")
target_compile_definitions(${target} PRIVATE __LLVM_SYCL__)
endif()
if(SYCL_IMPL STREQUAL "AdaptiveCpp")
add_sycl_to_target(TARGET ${target} SOURCES ${benchmark})
endif()

if(SYCL_IMPL STREQUAL "LLVM-CUDA")
target_compile_definitions(${target} PRIVATE __LLVM_SYCL_CUDA__)
endif()
if(SYCL_IMPL STREQUAL "dpcpp")
target_compile_definitions(${target} PRIVATE __DPCPP__)
endif()

if(SYCL_IMPL STREQUAL "triSYCL")
add_sycl_to_target(${target})
target_compile_definitions(${target} PRIVATE __TRISYCL__)
endif()


if(ENABLE_TIME_EVENT_PROFILING)
target_compile_definitions(${target} PUBLIC SYCL_BENCH_ENABLE_QUEUE_PROFILING=1)
endif()

install(TARGETS ${target} RUNTIME DESTINATION bin/benchmarks/)
get_filename_component(dir ${benchmark} DIRECTORY)
set_property(TARGET ${target} PROPERTY FOLDER ${dir})
endforeach(benchmark)

# The "compiletime" target should only be used in the context of the compile time evaluation script
#set_target_properties(compiletime PROPERTIES EXCLUDE_FROM_ALL 1)

# set_target_properties(compiletime PROPERTIES EXCLUDE_FROM_ALL 1)
install(PROGRAMS bin/run-suite DESTINATION bin/)
install(FILES ${PROJECT_SOURCE_DIR}/Brommy.bmp DESTINATION share/)
install(FILES ${PROJECT_SOURCE_DIR}/share/Brommy.bmp DESTINATION share/)
20 changes: 19 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ $ mkdir build && cd build

Compile with CMake
```
$ cmake -DSYCL_IMPL=[target SYCL implementation] [other compiler arguments] ..
$ cmake -DSYCL_IMPL=[target SYCL implementation] [-DSYCL_BENCH_HAS_FP64_SUPPORT=ON|OFF] [other compiler arguments] ..
$ cmake --build .
$ sudo make install
```
Expand Down Expand Up @@ -57,6 +57,24 @@ Packages built via the `package` target will contain all files contained in a SY

## Attribution
If you use SYCL-Bench, please cite the following papers:
```
@inproceedings{SYCL-Bench:IWOCL:2024,
author = {Crisci, Luigi and Carpentieri, Lorenzo and Thoman, Peter and Alpay, Aksel and Heuveline, Vincent and Cosenza, Biagio},
title = {SYCL-Bench 2020: Benchmarking SYCL 2020 on AMD, Intel, and NVIDIA GPUs},
year = {2024},
isbn = {9798400717901},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3648115.3648120},
doi = {10.1145/3648115.3648120},
booktitle = {Proceedings of the 12th International Workshop on OpenCL and SYCL},
articleno = {1},
numpages = {12},
keywords = {GPU, HPC, SYCL, benchmark, heterogeneous computing, portability},
location = {<conf-loc>, <city>Chicago</city>, <state>IL</state>, <country>USA</country>, </conf-loc>},
series = {IWOCL '24}
}
```

```
@inproceedings{SYCL-Bench:Euro-Par:2020,
Expand Down
36 changes: 32 additions & 4 deletions bin/run-suite
Original file line number Diff line number Diff line change
Expand Up @@ -108,9 +108,36 @@ default_profile = {
},
'mvt' : {
'--size' : create_log_range(2**14, 2**14)
},
},
'individual-benchmark-flags' : set([])
},
'usm_accessors_latency' : {
'--size' : create_log_range(2**20, 2**20)
},
'usm_allocation_latency' : {
'--size' : create_log_range(2**25, 2**25)
},
'usm_instr_mix' : {
'--size' : create_log_range(2**14, 2**14)
},
'usm_pinned_overhead' : {
'--size' : create_log_range(2**20, 2**20)
},
'spec_constant_convolution' : {
'--size' : create_log_range(2**11, 2**11)
},
'atomic_reduction' : {
'--size' : create_log_range(2**20, 2**20)
},
'reduce_over_group' : {
'--size' : create_log_range(2**20, 2**20)
},
'kernel_reduction' : {
'--size' : create_log_range(2**20, 2**20)
}
},
'individual-benchmark-flags' : {
'usm_instr_mix' : ['--instr-mix=6'],
'usm_pinned_overhead' : ['--num-copies=5'],
}
}

def construct_profile(overridden_options_dict,
Expand Down Expand Up @@ -222,8 +249,9 @@ if __name__ == '__main__':
if benchmark_name in individual_benchmark_options:
for param in individual_benchmark_options[benchmark_name]:
options[param] = individual_benchmark_options[benchmark_name][param]

if benchmark_name in individual_benchmark_flags:
for f in individual_benchmark_flags:
for f in individual_benchmark_flags[benchmark_name]:
flags.add(f)

max_runtime = 0.0
Expand Down
22 changes: 22 additions & 0 deletions cmake/HasFeatures.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
macro(check_feature VAR FILENAME)
if(NOT DEFINED RUN_RES_${VAR})
try_run(RUN_RES_${VAR} COMPILE_RES_${VAR} ${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/cmake/has-features/src/${FILENAME}
CMAKE_FLAGS ${CMAKE_CXX_FLAGS}
COMPILE_OUTPUT_VARIABLE OUTPUT_VAR
RUN_OUTPUT_VARIABLE RUN_VAR
)
endif()

if (COMPILE_RES_${VAR} AND RUN_RES_${VAR} EQUAL 0)
set(RES ON)
else()
set(RES OFF)
endif()
message(STATUS "${VAR}: ${RES}")
endmacro()

message(STATUS "Checking for SYCL features....")
check_feature(KERNEL_REDUCTIONS kernel_reduction_dummy.cpp)
check_feature(SPEC_CONSTANTS spec_constants_dummy.cpp)
check_feature(GROUP_ALGORITHMS group_algorithms_dummy.cpp)
check_feature(FP64_SUPPORT fp64_support_dummy.cpp)
15 changes: 15 additions & 0 deletions cmake/has-features/src/fp64_support_dummy.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#include <sycl/sycl.hpp>

int main() {
sycl::queue q;
sycl::buffer<double> x(1);

q.submit([&](sycl::handler& cgh) {
sycl::accessor a(x, cgh, sycl::read_write);
cgh.parallel_for<class dummy>(sycl::range<1>(1), [=](sycl::id<1> idx) { a[idx] = 0; });
});

sycl::host_accessor host{x};
assert(host[0] == 0);

}
17 changes: 17 additions & 0 deletions cmake/has-features/src/group_algorithms_dummy.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#include <sycl/sycl.hpp>
#include <iostream>


int main() {
sycl::queue q;
int* i = sycl::malloc_shared<int>(1, q);
q.submit([&](sycl::handler& cgh) {
cgh.parallel_for(sycl::nd_range<1>{{1}, {1}}, [=](sycl::nd_item<1> item) {
// call only the group algorithms used in SYCL-Bench
*i = sycl::reduce_over_group(item.get_group(), 1, sycl::plus<int>{});
});
}).wait();

assert(*i == 1);
sycl::free(i, q);
}
Loading

0 comments on commit ae02405

Please sign in to comment.