Skip to content

Commit

Permalink
[PTI-SDK] Fix data race(s) found in ptiViewEnable/Disable (intel#62)
Browse files Browse the repository at this point in the history
* Fix data race issues found by ThreadSanitizer when calling `ptiViewEnable`
  and `ptiViewDisable` from multiple threads.
* Improve ThreadSanitizer results by adding suppressions for third party
  libraries and adding additional compiler flags.
* Document and rename internal structure to denote that it is NOT "thread safe"
* Add ThreadSanitizer build to CI along with fixes to iso sample.

Signed-off-by: Schilling, Matthew <[email protected]>
  • Loading branch information
mschilling0 authored Feb 12, 2024
1 parent 6cd8f30 commit 1f6e151
Show file tree
Hide file tree
Showing 15 changed files with 223 additions and 79 deletions.
29 changes: 21 additions & 8 deletions .github/workflows/sdk_build_and_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,38 +23,51 @@ jobs:
uses: actions/checkout@v4

- name: Build
working-directory: sdk
run: |
cd sdk
cmake --preset default
cmake --build --preset default -j $(($(nproc)/2))
- name: Test
working-directory: sdk
run: |
cd sdk
ctest --output-on-failure --preset default
- name: BuildSanitized
- name: Build AddressSanitizer
if: always()
working-directory: sdk
run: |
cd sdk
cmake --preset asan
cmake --build --preset asan --parallel $(($(nproc)/2))
- name: BuildFuzz
- name: Build ThreadSanitizer
if: always()
working-directory: sdk
run: |
cmake --preset tsan
cmake --build --preset tsan --parallel $(($(nproc)/2))
- name: Build libFuzzer
if: always()
working-directory: sdk
run: |
# To ensure it still builds, run build for fuzz targets until we have
# proper fuzz testing infrastructure in place.
cd sdk
cmake --preset fuzz
cmake --build --preset fuzz --parallel $(($(nproc)/2))
- name: TestSanitized
- name: Test AddressSanitizer
if: always()
working-directory: sdk
run: |
cd sdk
ctest --preset asan --output-on-failure -L samples
- name: Test ThreadSanitizer
if: always()
working-directory: sdk
run: |
ctest --preset tsan --output-on-failure -L samples
- name: Install SDK
working-directory: sdk
run: |
Expand Down
7 changes: 5 additions & 2 deletions sdk/CMakePresets.json
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,8 @@
"environment": {
"NEOReadDebugKeys": "1",
"DisableDeepBind": "1",
"ASAN_OPTIONS": "suppressions=${sourceDir}/test/ASan.supp,detect_leaks=1,check_initialization_order=1,alloc_dealloc_mismatch=0,new_delete_type_mismatch=0,halt_on_error=1,use_sigaltstack=0",
"LSAN_OPTIONS": "suppressions=${sourceDir}/test/LSan.supp,use_unaligned=1",
"ASAN_OPTIONS": "suppressions=${sourceDir}/test/suppressions/ASan.supp,detect_leaks=1,check_initialization_order=1,alloc_dealloc_mismatch=0,new_delete_type_mismatch=0,halt_on_error=1,use_sigaltstack=0",
"LSAN_OPTIONS": "suppressions=${sourceDir}/test/suppressions/LSan.supp,use_unaligned=1",
"UBSAN_OPTIONS": "print_stacktrace=1"
}
},
Expand All @@ -70,6 +70,9 @@
"inherits": "asan",
"displayName": "ThreadSanitizer Test Config",
"description": "Build configuration for thread sanitizer.",
"environment": {
"TSAN_OPTIONS": "suppressions=${sourceDir}/test/suppressions/TSan.supp"
},
"cacheVariables": {
"CMAKE_TOOLCHAIN_FILE": "${sourceDir}/cmake/toolchains/icpx_tsan_toolchain.cmake"
}
Expand Down
2 changes: 1 addition & 1 deletion sdk/VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.3.1
0.3.2
4 changes: 2 additions & 2 deletions sdk/cmake/toolchains/icpx_tsan_toolchain.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@ if (UNIX)
set(CMAKE_C_COMPILER icx)
set(CMAKE_CXX_COMPILER icpx)
endif()
set(CMAKE_CXX_FLAGS_DEBUG_INIT "-fsanitize=thread,undefined")
set(CMAKE_C_FLAGS_DEBUG_INIT "-fsanitize=thread,undefined")
set(CMAKE_CXX_FLAGS_DEBUG_INIT "-fsanitize=thread -fno-omit-frame-pointer -fsanitize-recover=all")
set(CMAKE_C_FLAGS_DEBUG_INIT "-fsanitize=thread -fno-omit-frame-pointer -fsanitize-recover=all")
3 changes: 3 additions & 0 deletions sdk/samples/iso3dfd_dpcpp/include/iso3dfd.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include <sycl/sycl.hpp>
using namespace sycl;

#include <mutex>
#include <chrono>
#include <cmath>
#include <cstring>
Expand All @@ -23,6 +24,8 @@ constexpr float dt = 0.002f;
constexpr float dxyz = 50.0f;
constexpr unsigned int kHalfLength = 8;

extern std::mutex global_cout_mtx;

/*
* Padding to test and eliminate shared local memory bank conflicts for
* the shared local memory(slm) version of the kernel executing on GPU
Expand Down
61 changes: 41 additions & 20 deletions sdk/samples/iso3dfd_dpcpp/src/iso3dfd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,14 @@
#include "iso3dfd.h"
#include <iostream>
#include <string>
#include <mutex>
#include "device_selector.hpp"
#include <dpc_common.hpp>
#include "pti_view.h"
#include "samples_utils.h"

std::mutex global_cout_mtx;

namespace oneapi {}
using namespace oneapi;

Expand All @@ -54,14 +58,16 @@ void StopTracing() {
assert(ptiViewDisable(PTI_VIEW_SYCL_RUNTIME_CALLS) == pti_result::PTI_SUCCESS);
}


/*
* Host-Code
* Function used for initialization
*/
void Initialize(float* ptr_prev, float* ptr_next, float* ptr_vel, size_t n1,
size_t n2, size_t n3) {
std::cout << "Initializing ... \n";
{
const std::lock_guard<std::mutex> cout_lock(global_cout_mtx);
std::cout << "Initializing ... \n";
}
size_t dim2 = n2 * n1;

for (size_t i = 0; i < n3; i++) {
Expand Down Expand Up @@ -212,6 +218,7 @@ int main(int argc, char* argv[]) {
while (true) {
auto buf_status =
ptiViewGetNextRecord(buf, valid_buf_size, &ptr);
const std::lock_guard<std::mutex> cout_lock(global_cout_mtx);
if (buf_status == pti_result::PTI_STATUS_END_OF_BUFFER) {
std::cout << "Reached End of buffer" << '\n';
break;
Expand All @@ -231,7 +238,7 @@ int main(int argc, char* argv[]) {
<< '\n';
std::cout << "Found Sycl Runtime Record" << '\n';
samples_utils::dump_record(reinterpret_cast<pti_view_record_sycl_runtime *>(ptr));
break;
break;
}
case pti_view_kind:: PTI_VIEW_DEVICE_GPU_MEM_COPY: {
std::cout << "---------------------------------------------------"
Expand Down Expand Up @@ -278,15 +285,16 @@ int main(int argc, char* argv[]) {

((reinterpret_cast<pti_view_record_kernel *>(ptr) ->_start_timestamp) <=
(reinterpret_cast<pti_view_record_kernel *>(ptr) ->_end_timestamp))) {
std::cout << "------------> All Monotonic" << std::endl;
} else {
std::cout << "------------> Something wrong: NOT All monotonic" << std::endl;
};
if ( reinterpret_cast<pti_view_record_kernel *>(ptr)->_sycl_task_begin_timestamp == 0)
std::cout << "------------> Something wrong: Sycl Task Begin Time is 0" << std::endl;
if ( reinterpret_cast<pti_view_record_kernel *>(ptr)->_sycl_enqk_begin_timestamp == 0)
std::cout << "------------> All Monotonic" << std::endl;
} else {
std::cout << "------------> Something wrong: NOT All monotonic" << std::endl;
}
if (reinterpret_cast<pti_view_record_kernel *>(ptr)->_sycl_task_begin_timestamp == 0) {
std::cout << "------------> Something wrong: Sycl Task Begin Time is 0" << std::endl;
}
if ( reinterpret_cast<pti_view_record_kernel *>(ptr)->_sycl_enqk_begin_timestamp == 0) {
std::cout << "------------> Something wrong: Sycl Enq Launch Kernel Time is 0" << std::endl;

}
break;
}
default: {
Expand Down Expand Up @@ -368,18 +376,24 @@ int main(int argc, char* argv[]) {
coeff[i] = coeff[i] / (dxyz * dxyz);
}

std::cout << "Grid Sizes: " << n1 - 2 * kHalfLength << " "
<< n2 - 2 * kHalfLength << " " << n3 - 2 * kHalfLength << "\n";
std::cout << "Memory Usage: " << ((3 * nsize * sizeof(float)) / (1024 * 1024))
<< " MB\n";
{
const std::lock_guard<std::mutex> cout_lock(global_cout_mtx);
std::cout << "Grid Sizes: " << n1 - 2 * kHalfLength << " "
<< n2 - 2 * kHalfLength << " " << n3 - 2 * kHalfLength << "\n";
std::cout << "Memory Usage: " << ((3 * nsize * sizeof(float)) / (1024 * 1024))
<< " MB\n";
}

// Check if running OpenMP OR Serial version on CPU
if (omp) {
{
const std::lock_guard<std::mutex> cout_lock(global_cout_mtx);
#if defined(_OPENMP)
std::cout << " ***** Running OpenMP variant *****\n";
std::cout << " ***** Running OpenMP variant *****\n";
#else
std::cout << " ***** Running C++ Serial variant *****\n";
std::cout << " ***** Running C++ Serial variant *****\n";
#endif
}

// Initialize arrays and introduce initial conditions (source)
Initialize(prev_base, next_base, vel_base, n1, n2, n3);
Expand Down Expand Up @@ -409,7 +423,10 @@ int main(int argc, char* argv[]) {
// Check if running SYCL version
if (sycl) {
try {
std::cout << " ***** Running SYCL variant *****\n";
{
const std::lock_guard<std::mutex> cout_lock(global_cout_mtx);
std::cout << " ***** Running SYCL variant *****\n";
}
// Initialize arrays and introduce initial conditions (source)
Initialize(prev_base, next_base, vel_base, n1, n2, n3);

Expand Down Expand Up @@ -473,13 +490,17 @@ int main(int argc, char* argv[]) {
error = WithinEpsilon(prev_base, temp, n1, n2, n3, kHalfLength, 0, 0.1f);
}
if (error) {
std::cout << "Final wavefields from SYCL device and CPU are not "
std::cerr << "Final wavefields from SYCL device and CPU are not "
<< "equivalent: Fail\n";
} else {
const std::lock_guard<std::mutex> cout_lock(global_cout_mtx);
std::cout << "Final wavefields from SYCL device and CPU are equivalent:"
<< " Success\n";
}
std::cout << "--------------------------------------\n";
{
const std::lock_guard<std::mutex> cout_lock(global_cout_mtx);
std::cout << "--------------------------------------\n";
}
delete[] temp;
}

Expand Down
5 changes: 4 additions & 1 deletion sdk/samples/iso3dfd_dpcpp/src/iso3dfd_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -320,7 +320,10 @@ bool Iso3dfdDevice(sycl::queue &q, float *ptr_next, float *ptr_prev,
// Iterate over time steps
for (auto i = 0; i < nIterations; i += 1) {
// Submit command group for execution
std::cout << "Q Submitting at: " << i << ": " << std::dec << GetTime() << std::endl;
{
const std::lock_guard<std::mutex> cout_lock(global_cout_mtx);
std::cout << "Q Submitting at: " << i << ": " << std::dec << GetTime() << std::endl;
}
q.submit([&](auto &h) {
// Create accessors
accessor next(b_ptr_next, h);
Expand Down
9 changes: 1 addition & 8 deletions sdk/src/levelzero/ze_collector.h
Original file line number Diff line number Diff line change
Expand Up @@ -396,14 +396,7 @@ class GlobalZeInitializer {
public:
inline static ze_result_t Initialize() {
utils::SetEnv("ZE_ENABLE_TRACING_LAYER", "1");
overhead::Init();
ze_result_t status = zeInit(ZE_INIT_FLAG_GPU_ONLY);
{
std::string o_api_string = "zeInit";
overhead::FiniLevel0(overhead::OverheadRuntimeType::OVERHEAD_RUNTIME_TYPE_L0,
o_api_string.c_str());
};
return status;
return zeInit(ZE_INIT_FLAG_GPU_ONLY);
}

inline static ze_result_t result_ = Initialize();
Expand Down
10 changes: 5 additions & 5 deletions sdk/src/overhead_kinds.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ typedef enum _pti_view_overhead_view_kind {
} pti_view_overhead_view_kind;

// TODO: redo this approach to enable/disable state tracking.
static std::atomic<bool> overhead_collection_enabled = false;
inline static std::atomic<bool> overhead_collection_enabled = false;

inline constexpr auto kOhThreshold =
1.00; // 1ns threshhold by default -- TODO -- make this setAttributable
Expand Down Expand Up @@ -144,8 +144,8 @@ inline void FiniLevel0(OverheadRuntimeType runtime_type,
ocallback_(&overhead_it->second, overhead_data);
}
ResetRecord();
};
};
}
}
}

inline void FiniSycl(OverheadRuntimeType runtime_type) {
Expand Down Expand Up @@ -173,8 +173,8 @@ inline void FiniSycl(OverheadRuntimeType runtime_type) {
ocallback_(&overhead_it->second, overhead_data);
}
ResetRecord();
};
};
}
}
}

} // namespace overhead
Expand Down
Loading

0 comments on commit 1f6e151

Please sign in to comment.