From 6c75eb15b649766ae127c2afe780e0293584b7fe Mon Sep 17 00:00:00 2001 From: Rafal Bielski Date: Tue, 5 Nov 2024 17:21:36 +0000 Subject: [PATCH 1/4] [UR][CUDA][HIP] Fix incorrect outputs and improve performance of queue::fill Update the UR tag to fix queue::fill for the CUDA and HIP backends, which was previously producing incorrect outputs for any pattern size other than 1, 2, or a multiple of 4 bytes. A new optimisation is also added which speeds up the fill greatly if the pattern equals to the first word repeated throughout (e.g. all zeros). Add a new e2e test to validate queue::fill outputs for any pattern size between 1 and 32 bytes. This test fails for CUDA and HIP before the UR change and passes with this PR. Other backends already worked correctly. --- sycl/test-e2e/USM/fill_any_size.cpp | 79 +++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) create mode 100644 sycl/test-e2e/USM/fill_any_size.cpp diff --git a/sycl/test-e2e/USM/fill_any_size.cpp b/sycl/test-e2e/USM/fill_any_size.cpp new file mode 100644 index 0000000000000..9f2cc1de9a390 --- /dev/null +++ b/sycl/test-e2e/USM/fill_any_size.cpp @@ -0,0 +1,79 @@ +// RUN: %{build} -o %t1.out +// RUN: %{run} %t1.out + +/** + * Test of the queue::fill interface with a range of pattern sizes and values. + * + * Loops over pattern sizes from 1 to MaxPatternSize bytes and calls queue::fill + * with std::array for the pattern. Two pattern values are tested, + * all zeros and value=index+42. The output is copied back to host and + * validated. + */ + +#include +#include +#include +#include + +constexpr size_t MaxPatternSize{32}; // bytes +constexpr size_t NumElements{10}; +constexpr size_t NumRepeats{1}; +constexpr bool verbose{false}; + +template +int test(sycl::queue &q, uint8_t firstValue = 0) { + using T = std::array; + T value{}; + for (unsigned int i{0}; i < PatternSize; ++i) { + if constexpr (SameValue) { + value[0] = firstValue; + } else { + value[i] = firstValue + i; + } + } + + T *dptr{sycl::malloc_device(NumElements, q)}; + for (size_t repeat{0}; repeat < NumRepeats; ++repeat) { + q.fill(dptr, value, NumElements).wait(); + } + + std::array host{}; + q.copy(dptr, host.data(), NumElements).wait(); + bool pass{true}; + for (unsigned int i{0}; i < NumElements; ++i) { + for (unsigned int j{0}; j < PatternSize; ++j) { + if (host[i][j] != value[j]) { + pass = false; + } + } + } + sycl::free(dptr, q); + + if (!pass || verbose) { + printf("Pattern size %3lu bytes, %s values (initial %3u) %s\n", PatternSize, + (SameValue ? " equal" : "varied"), firstValue, + (pass ? "== PASS ==" : "== FAIL ==")); + } + + return !pass; +} + +template int testOneSize(sycl::queue &q) { + return test(q, 0) + test(q, 42); +} + +template +int testSizes(sycl::queue &q, std::index_sequence) { + return (testOneSize<1u + Sizes>(q) + ...); +} + +int main() { + sycl::queue q{}; + int failures = testSizes(q, std::make_index_sequence{}); + if (failures > 0) { + printf("%d / %lu tests failed\n", failures, 2 * MaxPatternSize); + } else { + printf("All %lu tests passed\n", 2 * MaxPatternSize); + } + return failures; +} From dbe6aa5659eff1eb3b8b4f03cb3878aeb46b8f24 Mon Sep 17 00:00:00 2001 From: Rafal Bielski Date: Fri, 8 Nov 2024 12:44:27 +0000 Subject: [PATCH 2/4] Apply review suggestions --- sycl/test-e2e/USM/fill_any_size.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/sycl/test-e2e/USM/fill_any_size.cpp b/sycl/test-e2e/USM/fill_any_size.cpp index 9f2cc1de9a390..5dff6245fce9d 100644 --- a/sycl/test-e2e/USM/fill_any_size.cpp +++ b/sycl/test-e2e/USM/fill_any_size.cpp @@ -15,7 +15,7 @@ #include #include -constexpr size_t MaxPatternSize{32}; // bytes +constexpr size_t MaxPatternSize{32}; // Bytes. constexpr size_t NumElements{10}; constexpr size_t NumRepeats{1}; constexpr bool verbose{false}; @@ -24,9 +24,9 @@ template int test(sycl::queue &q, uint8_t firstValue = 0) { using T = std::array; T value{}; - for (unsigned int i{0}; i < PatternSize; ++i) { + for (size_t i{0}; i < PatternSize; ++i) { if constexpr (SameValue) { - value[0] = firstValue; + value[i] = firstValue; } else { value[i] = firstValue + i; } @@ -40,8 +40,8 @@ int test(sycl::queue &q, uint8_t firstValue = 0) { std::array host{}; q.copy(dptr, host.data(), NumElements).wait(); bool pass{true}; - for (unsigned int i{0}; i < NumElements; ++i) { - for (unsigned int j{0}; j < PatternSize; ++j) { + for (size_t i{0}; i < NumElements; ++i) { + for (size_t j{0}; j < PatternSize; ++j) { if (host[i][j] != value[j]) { pass = false; } From e86712f77e37d42b2c4e18c39cab50b4f11694f9 Mon Sep 17 00:00:00 2001 From: Rafal Bielski Date: Fri, 15 Nov 2024 15:25:01 +0000 Subject: [PATCH 3/4] Update printf format following variable type change to size_t --- sycl/test-e2e/USM/fill_any_size.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sycl/test-e2e/USM/fill_any_size.cpp b/sycl/test-e2e/USM/fill_any_size.cpp index 5dff6245fce9d..abc06d6a2b941 100644 --- a/sycl/test-e2e/USM/fill_any_size.cpp +++ b/sycl/test-e2e/USM/fill_any_size.cpp @@ -50,7 +50,7 @@ int test(sycl::queue &q, uint8_t firstValue = 0) { sycl::free(dptr, q); if (!pass || verbose) { - printf("Pattern size %3lu bytes, %s values (initial %3u) %s\n", PatternSize, + printf("Pattern size %3zu bytes, %s values (initial %3u) %s\n", PatternSize, (SameValue ? " equal" : "varied"), firstValue, (pass ? "== PASS ==" : "== FAIL ==")); } @@ -71,9 +71,9 @@ int main() { sycl::queue q{}; int failures = testSizes(q, std::make_index_sequence{}); if (failures > 0) { - printf("%d / %lu tests failed\n", failures, 2 * MaxPatternSize); + printf("%d / %zu tests failed\n", failures, 2u * MaxPatternSize); } else { - printf("All %lu tests passed\n", 2 * MaxPatternSize); + printf("All %zu tests passed\n", 2u * MaxPatternSize); } return failures; } From af5b89b846baa95613116b8fac5e7facb3311e91 Mon Sep 17 00:00:00 2001 From: Rafal Bielski Date: Mon, 9 Dec 2024 14:27:31 +0000 Subject: [PATCH 4/4] Add XFAIL for OpenCL CPU --- sycl/test-e2e/USM/fill_any_size.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sycl/test-e2e/USM/fill_any_size.cpp b/sycl/test-e2e/USM/fill_any_size.cpp index abc06d6a2b941..91f215d517633 100644 --- a/sycl/test-e2e/USM/fill_any_size.cpp +++ b/sycl/test-e2e/USM/fill_any_size.cpp @@ -1,5 +1,7 @@ // RUN: %{build} -o %t1.out // RUN: %{run} %t1.out +// XFAIL: (opencl && cpu) +// XFAIL-TRACKER: https://github.com/oneapi-src/unified-runtime/issues/2440 /** * Test of the queue::fill interface with a range of pattern sizes and values.