Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft: [SYCL][COMPAT] Math-ops failures in gen12 CI #15961

Draft
wants to merge 18 commits into
base: sycl
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 23 additions & 23 deletions .github/workflows/sycl-linux-precommit.yml
Original file line number Diff line number Diff line change
Expand Up @@ -75,35 +75,35 @@ jobs:
fail-fast: false
matrix:
include:
- name: NVIDIA/CUDA
runner: '["Linux", "cuda"]'
image: ghcr.io/intel/llvm/ubuntu2204_build:latest
image_options: -u 1001 --gpus all --cap-add SYS_ADMIN
target_devices: ext_oneapi_cuda:gpu
# - name: NVIDIA/CUDA
# runner: '["Linux", "cuda"]'
# image: ghcr.io/intel/llvm/ubuntu2204_build:latest
# image_options: -u 1001 --gpus all --cap-add SYS_ADMIN
# target_devices: ext_oneapi_cuda:gpu
- name: Intel
runner: '["Linux", "gen12"]'
image: ghcr.io/intel/llvm/ubuntu2204_intel_drivers:latest
image_options: -u 1001 --device=/dev/dri -v /dev/dri/by-path:/dev/dri/by-path --privileged --cap-add SYS_ADMIN
target_devices: level_zero:gpu;opencl:gpu;opencl:cpu
reset_intel_gpu: true
extra_lit_opts: --param gpu-intel-gen12=True
- name: E2E tests on Intel Arc A-Series Graphics
runner: '["Linux", "arc"]'
image: ghcr.io/intel/llvm/ubuntu2204_intel_drivers:latest
image_options: -u 1001 --device=/dev/dri -v /dev/dri/by-path:/dev/dri/by-path --privileged --cap-add SYS_ADMIN
target_devices: level_zero:gpu;opencl:gpu
reset_intel_gpu: true
extra_lit_opts: --param matrix-xmx8=True --param gpu-intel-dg2=True
env: '{"LIT_FILTER":${{ needs.determine_arc_tests.outputs.arc_tests }} }'
- name: E2E tests with dev igc on Intel Arc A-Series Graphics
runner: '["Linux", "arc"]'
image: ghcr.io/intel/llvm/ubuntu2204_intel_drivers:devigc
image_options: -u 1001 --device=/dev/dri -v /dev/dri/by-path:/dev/dri/by-path --privileged --cap-add SYS_ADMIN
target_devices: level_zero:gpu;opencl:gpu
reset_intel_gpu: true
extra_lit_opts: --param matrix-xmx8=True --param gpu-intel-dg2=True
env: '{"LIT_FILTER":${{ needs.determine_arc_tests.outputs.arc_tests }} }'
use_igc_dev: true
extra_lit_opts: --filter syclcompat --param gpu-intel-gen12=True
# - name: E2E tests on Intel Arc A-Series Graphics
# runner: '["Linux", "arc"]'
# image: ghcr.io/intel/llvm/ubuntu2204_intel_drivers:latest
# image_options: -u 1001 --device=/dev/dri -v /dev/dri/by-path:/dev/dri/by-path --privileged --cap-add SYS_ADMIN
# target_devices: level_zero:gpu;opencl:gpu
# reset_intel_gpu: true
# extra_lit_opts: --param matrix-xmx8=True --param gpu-intel-dg2=True
# env: '{"LIT_FILTER":${{ needs.determine_arc_tests.outputs.arc_tests }} }'
# - name: E2E tests with dev igc on Intel Arc A-Series Graphics
# runner: '["Linux", "arc"]'
# image: ghcr.io/intel/llvm/ubuntu2204_intel_drivers:devigc
# image_options: -u 1001 --device=/dev/dri -v /dev/dri/by-path:/dev/dri/by-path --privileged --cap-add SYS_ADMIN
# target_devices: level_zero:gpu;opencl:gpu
# reset_intel_gpu: true
# extra_lit_opts: --param matrix-xmx8=True --param gpu-intel-dg2=True
# env: '{"LIT_FILTER":${{ needs.determine_arc_tests.outputs.arc_tests }} }'
# use_igc_dev: true

uses: ./.github/workflows/sycl-linux-run-tests.yml
with:
Expand Down
44 changes: 22 additions & 22 deletions .github/workflows/sycl-windows-precommit.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,25 +34,25 @@ jobs:
detect_changes:
uses: ./.github/workflows/sycl-detect-changes.yml

build:
needs: [detect_changes]
if: |
always() && success()
&& github.repository == 'intel/llvm'
uses: ./.github/workflows/sycl-windows-build.yml
with:
changes: ${{ needs.detect_changes.outputs.filters }}

e2e:
needs: build
# Continue if build was successful.
if: |
always()
&& !cancelled()
&& needs.build.outputs.build_conclusion == 'success'
uses: ./.github/workflows/sycl-windows-run-tests.yml
with:
name: Intel GEN12 Graphics with Level Zero
runner: '["Windows","gen12"]'
sycl_toolchain_archive: ${{ needs.build.outputs.artifact_archive_name }}
extra_lit_opts: --param gpu-intel-gen12=True
# build:
# needs: [detect_changes]
# if: |
# always() && success()
# && github.repository == 'intel/llvm'
# uses: ./.github/workflows/sycl-windows-build.yml
# with:
# changes: ${{ needs.detect_changes.outputs.filters }}
#
# e2e:
# needs: build
# # Continue if build was successful.
# if: |
# always()
# && !cancelled()
# && needs.build.outputs.build_conclusion == 'success'
# uses: ./.github/workflows/sycl-windows-run-tests.yml
# with:
# name: Intel GEN12 Graphics with Level Zero
# runner: '["Windows","gen12"]'
# sycl_toolchain_archive: ${{ needs.build.outputs.artifact_archive_name }}
# extra_lit_opts: --param gpu-intel-gen12=True
2 changes: 1 addition & 1 deletion sycl/cmake/modules/FetchUnifiedRuntime.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ if(SYCL_UR_USE_FETCH_CONTENT)
CACHE PATH "Path to external '${name}' adapter source dir" FORCE)
endfunction()

set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git")
set(UNIFIED_RUNTIME_REPO "https://github.com/winstonzhang-intel/unified-runtime.git")
include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules/UnifiedRuntimeTag.cmake)

set(UMF_BUILD_EXAMPLES OFF CACHE INTERNAL "EXAMPLES")
Expand Down
15 changes: 8 additions & 7 deletions sycl/cmake/modules/UnifiedRuntimeTag.cmake
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
# commit b0a9e2be61ad42d3447f1f246120ab25119a03e0
# Merge: fa8cc8ec a0cf2ce2
# Author: Callum Fare <[email protected]>
# Date: Mon Nov 4 10:00:08 2024 +0000
# Merge pull request #2165 from aarongreig/aaron/makeUSMPoolsOptional
# Make USM pools optional with a device query to report support.
set(UNIFIED_RUNTIME_TAG b0a9e2be61ad42d3447f1f246120ab25119a03e0)
# commit fa8cc8ec16c1a2cf0926cc64026edc6a254ff0c2
# Merge: 3d58884b 1984ceb1
# Author: aarongreig <[email protected]>

# Date: Thu Oct 31 14:05:55 2024 +0000
# Merge pull request #2228 from nrspruit/copy_engine_refactor
# [L0] Refactor Copy Engine Usage checks for Performance
set(UNIFIED_RUNTIME_TAG 65ccdbd4355e4bc7b3e3e0f6d841a2d900871f0a)
17 changes: 12 additions & 5 deletions sycl/test-e2e/syclcompat/math/math_fixt.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ template <typename T, typename U> struct container_common_type<T, U> {
template <typename T, typename U>
using container_common_type_t = typename container_common_type<T, U>::type;

template <typename ...ValueT> struct should_skip {
template <typename... ValueT> struct should_skip {
bool operator()(const sycl::device &dev) const {
if constexpr ((std::is_same_v<ValueT, double> || ...) ||
(contained_is_same_v<ValueT, double> || ...)) {
Expand All @@ -92,21 +92,28 @@ template <typename ...ValueT> struct should_skip {

#define CHECK(ResultT, RESULT, EXPECTED) \
if constexpr (std::is_integral_v<ResultT>) { \
assert(RESULT == EXPECTED); \
assert(RESULT == EXPECTED || \
!(std::cerr << "-- " << RESULT << " - " << EXPECTED << " --")); \
} else if constexpr (contained_is_integral_v<ResultT>) { \
for (size_t i = 0; i < RESULT.size(); i++) \
assert(RESULT[i] == EXPECTED[i]); \
assert(RESULT[i] == EXPECTED[i] || \
!(std::cerr << "-- " << RESULT[i] << " - " << EXPECTED[i] \
<< " --")); \
} else if constexpr (syclcompat::is_floating_point_v<ResultT>) { \
if (syclcompat::detail::isnan(RESULT)) \
assert(syclcompat::detail::isnan(EXPECTED)); \
else \
assert(fabs(RESULT - EXPECTED) < ERROR_TOLERANCE); \
assert(fabs(RESULT - EXPECTED) < ERROR_TOLERANCE || \
!(std::cerr << "-- " << RESULT << " - " << EXPECTED << " < " \
<< ERROR_TOLERANCE << "-- ")); \
} else if constexpr (contained_is_floating_point_v<ResultT>) { \
for (size_t i = 0; i < RESULT.size(); i++) { \
if (syclcompat::detail::isnan(RESULT[i])) { \
assert(syclcompat::detail::isnan(EXPECTED[i])); \
} else { \
assert(fabs(RESULT[i] - EXPECTED[i]) < ERROR_TOLERANCE); \
assert((fabs(RESULT[i] - EXPECTED[i]) < ERROR_TOLERANCE) || \
!(std::cerr << "-- " << RESULT[i] << " - " << EXPECTED[i] \
<< " < " << ERROR_TOLERANCE << "-- ")); \
} \
} \
} else { \
Expand Down
5 changes: 4 additions & 1 deletion sycl/test-e2e/syclcompat/math/math_length_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,10 @@ class LengthLauncher {
float sum =
std::inner_product(result.begin(), result.end(), result.begin(), 0.0f);
float diff = fabs(sqrtf(sum)) - host_result_;
assert(diff <= 1.e-5);
assert(diff <= 1.e-5 || !(std::cerr << "-- " << fabs(sqrtf(sum)) << " - "
<< host_result_ << " < "
<< "1.e-5"
<< " --"));
}

template <auto F> void launch(std::vector<float> vec) {
Expand Down
74 changes: 74 additions & 0 deletions sycl/test-e2e/syclcompat/math/repro.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
// DEFINE: %{mathflags} = %if cl_options %{/clang:-fno-fast-math%} %else %{-fno-fast-math%}

// RUN: %{build} %{mathflags} -o %t.out
// RUN: %{run} %t.out

#include "sycl/detail/builtins/builtins.hpp"
#include <sycl/usm.hpp>

#include <syclcompat/dims.hpp>
#include <syclcompat/math.hpp>

inline void fmax_nan_kernel(float *a, float *b, sycl::vec<float, 2> *r) {
*r = syclcompat::fmax_nan(*a, *b);
}

void test_container_syclcompat_fmax_nan() {
std::cout << __PRETTY_FUNCTION__ << std::endl;
sycl::queue q;

sycl::range global{1};
sycl::range local{1};
sycl::nd_range ndr{global, local};

const sycl::vec<float, 2> op1 = {5.0f, 10.0f};
const sycl::vec<float, 2> op2 = {10.0f, 5.0f};
const sycl::vec<float, 2> expected{static_cast<float>(10),
static_cast<float>(10)};
sycl::vec<float, 2> res;

sycl::vec<float, 2> *op1_d = sycl::malloc_device<sycl::vec<float, 2>>(1, q);
sycl::vec<float, 2> *op2_d = sycl::malloc_device<sycl::vec<float, 2>>(1, q);
sycl::vec<float, 2> *res_d = sycl::malloc_device<sycl::vec<float, 2>>(1, q);

q.memcpy(op1_d, &op1, sizeof(sycl::vec<float, 2>));
q.memcpy(op2_d, &op2, sizeof(sycl::vec<float, 2>));
q.submit([&](sycl::handler &cgh) {
cgh.parallel_for(ndr, [=](sycl::nd_item<1> nd_item) {
*res_d = syclcompat::fmax_nan(*op1_d, *op2_d);
});
}).wait_and_throw();
q.memcpy(&res, res_d, sizeof(sycl::vec<float, 2>)).wait();

constexpr float ERROR_TOLERANCE = 1e-6;
for (size_t i = 0; i < 2; i++) {
assert((res[i] - expected[i]) < ERROR_TOLERANCE ||
!(std::cerr << "-- " << res[i] << " - " << expected[i] << " < "
<< ERROR_TOLERANCE << " --"));
}

const sycl::vec<float, 2> op3 = {sycl::nan(static_cast<unsigned int>(0)),
sycl::nan(static_cast<unsigned int>(0))};

q.memcpy(op2_d, &op3, sizeof(sycl::vec<float, 2>));
q.submit([&](sycl::handler &cgh) {
cgh.parallel_for(ndr, [=](sycl::nd_item<1> nd_item) {
*res_d = syclcompat::fmax_nan(*op1_d, *op2_d);
});
}).wait_and_throw();
q.memcpy(&res, res_d, sizeof(sycl::vec<float, 2>)).wait();

for (size_t i = 0; i < 2; i++) {
assert(sycl::isnan(res[i]));
}

sycl::free(op1_d, q);
sycl::free(op2_d, q);
sycl::free(res_d, q);
}

int main() {
test_container_syclcompat_fmax_nan();

return 0;
}
Loading