diff --git a/.github/workflows/sycl-linux-precommit.yml b/.github/workflows/sycl-linux-precommit.yml index a73324e33fec5..30936dc8a9554 100644 --- a/.github/workflows/sycl-linux-precommit.yml +++ b/.github/workflows/sycl-linux-precommit.yml @@ -75,35 +75,35 @@ jobs: fail-fast: false matrix: include: - - name: NVIDIA/CUDA - runner: '["Linux", "cuda"]' - image: ghcr.io/intel/llvm/ubuntu2204_build:latest - image_options: -u 1001 --gpus all --cap-add SYS_ADMIN - target_devices: ext_oneapi_cuda:gpu + # - name: NVIDIA/CUDA + # runner: '["Linux", "cuda"]' + # image: ghcr.io/intel/llvm/ubuntu2204_build:latest + # image_options: -u 1001 --gpus all --cap-add SYS_ADMIN + # target_devices: ext_oneapi_cuda:gpu - name: Intel runner: '["Linux", "gen12"]' image: ghcr.io/intel/llvm/ubuntu2204_intel_drivers:latest image_options: -u 1001 --device=/dev/dri -v /dev/dri/by-path:/dev/dri/by-path --privileged --cap-add SYS_ADMIN target_devices: level_zero:gpu;opencl:gpu;opencl:cpu reset_intel_gpu: true - extra_lit_opts: --param gpu-intel-gen12=True - - name: E2E tests on Intel Arc A-Series Graphics - runner: '["Linux", "arc"]' - image: ghcr.io/intel/llvm/ubuntu2204_intel_drivers:latest - image_options: -u 1001 --device=/dev/dri -v /dev/dri/by-path:/dev/dri/by-path --privileged --cap-add SYS_ADMIN - target_devices: level_zero:gpu;opencl:gpu - reset_intel_gpu: true - extra_lit_opts: --param matrix-xmx8=True --param gpu-intel-dg2=True - env: '{"LIT_FILTER":${{ needs.determine_arc_tests.outputs.arc_tests }} }' - - name: E2E tests with dev igc on Intel Arc A-Series Graphics - runner: '["Linux", "arc"]' - image: ghcr.io/intel/llvm/ubuntu2204_intel_drivers:devigc - image_options: -u 1001 --device=/dev/dri -v /dev/dri/by-path:/dev/dri/by-path --privileged --cap-add SYS_ADMIN - target_devices: level_zero:gpu;opencl:gpu - reset_intel_gpu: true - extra_lit_opts: --param matrix-xmx8=True --param gpu-intel-dg2=True - env: '{"LIT_FILTER":${{ needs.determine_arc_tests.outputs.arc_tests }} }' - use_igc_dev: true + extra_lit_opts: --filter syclcompat --param gpu-intel-gen12=True + # - name: E2E tests on Intel Arc A-Series Graphics + # runner: '["Linux", "arc"]' + # image: ghcr.io/intel/llvm/ubuntu2204_intel_drivers:latest + # image_options: -u 1001 --device=/dev/dri -v /dev/dri/by-path:/dev/dri/by-path --privileged --cap-add SYS_ADMIN + # target_devices: level_zero:gpu;opencl:gpu + # reset_intel_gpu: true + # extra_lit_opts: --param matrix-xmx8=True --param gpu-intel-dg2=True + # env: '{"LIT_FILTER":${{ needs.determine_arc_tests.outputs.arc_tests }} }' + # - name: E2E tests with dev igc on Intel Arc A-Series Graphics + # runner: '["Linux", "arc"]' + # image: ghcr.io/intel/llvm/ubuntu2204_intel_drivers:devigc + # image_options: -u 1001 --device=/dev/dri -v /dev/dri/by-path:/dev/dri/by-path --privileged --cap-add SYS_ADMIN + # target_devices: level_zero:gpu;opencl:gpu + # reset_intel_gpu: true + # extra_lit_opts: --param matrix-xmx8=True --param gpu-intel-dg2=True + # env: '{"LIT_FILTER":${{ needs.determine_arc_tests.outputs.arc_tests }} }' + # use_igc_dev: true uses: ./.github/workflows/sycl-linux-run-tests.yml with: diff --git a/.github/workflows/sycl-windows-precommit.yml b/.github/workflows/sycl-windows-precommit.yml index a82c2b7814d75..f2d3bdf56dbf2 100644 --- a/.github/workflows/sycl-windows-precommit.yml +++ b/.github/workflows/sycl-windows-precommit.yml @@ -34,25 +34,25 @@ jobs: detect_changes: uses: ./.github/workflows/sycl-detect-changes.yml - build: - needs: [detect_changes] - if: | - always() && success() - && github.repository == 'intel/llvm' - uses: ./.github/workflows/sycl-windows-build.yml - with: - changes: ${{ needs.detect_changes.outputs.filters }} - - e2e: - needs: build - # Continue if build was successful. - if: | - always() - && !cancelled() - && needs.build.outputs.build_conclusion == 'success' - uses: ./.github/workflows/sycl-windows-run-tests.yml - with: - name: Intel GEN12 Graphics with Level Zero - runner: '["Windows","gen12"]' - sycl_toolchain_archive: ${{ needs.build.outputs.artifact_archive_name }} - extra_lit_opts: --param gpu-intel-gen12=True + # build: + # needs: [detect_changes] + # if: | + # always() && success() + # && github.repository == 'intel/llvm' + # uses: ./.github/workflows/sycl-windows-build.yml + # with: + # changes: ${{ needs.detect_changes.outputs.filters }} + # + # e2e: + # needs: build + # # Continue if build was successful. + # if: | + # always() + # && !cancelled() + # && needs.build.outputs.build_conclusion == 'success' + # uses: ./.github/workflows/sycl-windows-run-tests.yml + # with: + # name: Intel GEN12 Graphics with Level Zero + # runner: '["Windows","gen12"]' + # sycl_toolchain_archive: ${{ needs.build.outputs.artifact_archive_name }} + # extra_lit_opts: --param gpu-intel-gen12=True diff --git a/sycl/cmake/modules/FetchUnifiedRuntime.cmake b/sycl/cmake/modules/FetchUnifiedRuntime.cmake index 72841724fa01d..e83971a3582fe 100644 --- a/sycl/cmake/modules/FetchUnifiedRuntime.cmake +++ b/sycl/cmake/modules/FetchUnifiedRuntime.cmake @@ -116,7 +116,7 @@ if(SYCL_UR_USE_FETCH_CONTENT) CACHE PATH "Path to external '${name}' adapter source dir" FORCE) endfunction() - set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git") + set(UNIFIED_RUNTIME_REPO "https://github.com/winstonzhang-intel/unified-runtime.git") include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules/UnifiedRuntimeTag.cmake) set(UMF_BUILD_EXAMPLES OFF CACHE INTERNAL "EXAMPLES") diff --git a/sycl/cmake/modules/UnifiedRuntimeTag.cmake b/sycl/cmake/modules/UnifiedRuntimeTag.cmake index 01bd0cd4d9586..fb8dbbec7a056 100644 --- a/sycl/cmake/modules/UnifiedRuntimeTag.cmake +++ b/sycl/cmake/modules/UnifiedRuntimeTag.cmake @@ -1,7 +1,8 @@ -# commit b0a9e2be61ad42d3447f1f246120ab25119a03e0 -# Merge: fa8cc8ec a0cf2ce2 -# Author: Callum Fare -# Date: Mon Nov 4 10:00:08 2024 +0000 -# Merge pull request #2165 from aarongreig/aaron/makeUSMPoolsOptional -# Make USM pools optional with a device query to report support. -set(UNIFIED_RUNTIME_TAG b0a9e2be61ad42d3447f1f246120ab25119a03e0) +# commit fa8cc8ec16c1a2cf0926cc64026edc6a254ff0c2 +# Merge: 3d58884b 1984ceb1 +# Author: aarongreig + +# Date: Thu Oct 31 14:05:55 2024 +0000 +# Merge pull request #2228 from nrspruit/copy_engine_refactor +# [L0] Refactor Copy Engine Usage checks for Performance +set(UNIFIED_RUNTIME_TAG 65ccdbd4355e4bc7b3e3e0f6d841a2d900871f0a) diff --git a/sycl/test-e2e/syclcompat/math/math_fixt.hpp b/sycl/test-e2e/syclcompat/math/math_fixt.hpp index 4647142da6c61..4b0cbdb8cfdff 100644 --- a/sycl/test-e2e/syclcompat/math/math_fixt.hpp +++ b/sycl/test-e2e/syclcompat/math/math_fixt.hpp @@ -68,7 +68,7 @@ template struct container_common_type { template using container_common_type_t = typename container_common_type::type; -template struct should_skip { +template struct should_skip { bool operator()(const sycl::device &dev) const { if constexpr ((std::is_same_v || ...) || (contained_is_same_v || ...)) { @@ -92,21 +92,28 @@ template struct should_skip { #define CHECK(ResultT, RESULT, EXPECTED) \ if constexpr (std::is_integral_v) { \ - assert(RESULT == EXPECTED); \ + assert(RESULT == EXPECTED || \ + !(std::cerr << "-- " << RESULT << " - " << EXPECTED << " --")); \ } else if constexpr (contained_is_integral_v) { \ for (size_t i = 0; i < RESULT.size(); i++) \ - assert(RESULT[i] == EXPECTED[i]); \ + assert(RESULT[i] == EXPECTED[i] || \ + !(std::cerr << "-- " << RESULT[i] << " - " << EXPECTED[i] \ + << " --")); \ } else if constexpr (syclcompat::is_floating_point_v) { \ if (syclcompat::detail::isnan(RESULT)) \ assert(syclcompat::detail::isnan(EXPECTED)); \ else \ - assert(fabs(RESULT - EXPECTED) < ERROR_TOLERANCE); \ + assert(fabs(RESULT - EXPECTED) < ERROR_TOLERANCE || \ + !(std::cerr << "-- " << RESULT << " - " << EXPECTED << " < " \ + << ERROR_TOLERANCE << "-- ")); \ } else if constexpr (contained_is_floating_point_v) { \ for (size_t i = 0; i < RESULT.size(); i++) { \ if (syclcompat::detail::isnan(RESULT[i])) { \ assert(syclcompat::detail::isnan(EXPECTED[i])); \ } else { \ - assert(fabs(RESULT[i] - EXPECTED[i]) < ERROR_TOLERANCE); \ + assert((fabs(RESULT[i] - EXPECTED[i]) < ERROR_TOLERANCE) || \ + !(std::cerr << "-- " << RESULT[i] << " - " << EXPECTED[i] \ + << " < " << ERROR_TOLERANCE << "-- ")); \ } \ } \ } else { \ diff --git a/sycl/test-e2e/syclcompat/math/math_length_test.cpp b/sycl/test-e2e/syclcompat/math/math_length_test.cpp index 24effadfa5398..dd43994ea08a6 100644 --- a/sycl/test-e2e/syclcompat/math/math_length_test.cpp +++ b/sycl/test-e2e/syclcompat/math/math_length_test.cpp @@ -69,7 +69,10 @@ class LengthLauncher { float sum = std::inner_product(result.begin(), result.end(), result.begin(), 0.0f); float diff = fabs(sqrtf(sum)) - host_result_; - assert(diff <= 1.e-5); + assert(diff <= 1.e-5 || !(std::cerr << "-- " << fabs(sqrtf(sum)) << " - " + << host_result_ << " < " + << "1.e-5" + << " --")); } template void launch(std::vector vec) { diff --git a/sycl/test-e2e/syclcompat/math/repro.cpp b/sycl/test-e2e/syclcompat/math/repro.cpp new file mode 100644 index 0000000000000..714978280fa25 --- /dev/null +++ b/sycl/test-e2e/syclcompat/math/repro.cpp @@ -0,0 +1,74 @@ +// DEFINE: %{mathflags} = %if cl_options %{/clang:-fno-fast-math%} %else %{-fno-fast-math%} + +// RUN: %{build} %{mathflags} -o %t.out +// RUN: %{run} %t.out + +#include "sycl/detail/builtins/builtins.hpp" +#include + +#include +#include + +inline void fmax_nan_kernel(float *a, float *b, sycl::vec *r) { + *r = syclcompat::fmax_nan(*a, *b); +} + +void test_container_syclcompat_fmax_nan() { + std::cout << __PRETTY_FUNCTION__ << std::endl; + sycl::queue q; + + sycl::range global{1}; + sycl::range local{1}; + sycl::nd_range ndr{global, local}; + + const sycl::vec op1 = {5.0f, 10.0f}; + const sycl::vec op2 = {10.0f, 5.0f}; + const sycl::vec expected{static_cast(10), + static_cast(10)}; + sycl::vec res; + + sycl::vec *op1_d = sycl::malloc_device>(1, q); + sycl::vec *op2_d = sycl::malloc_device>(1, q); + sycl::vec *res_d = sycl::malloc_device>(1, q); + + q.memcpy(op1_d, &op1, sizeof(sycl::vec)); + q.memcpy(op2_d, &op2, sizeof(sycl::vec)); + q.submit([&](sycl::handler &cgh) { + cgh.parallel_for(ndr, [=](sycl::nd_item<1> nd_item) { + *res_d = syclcompat::fmax_nan(*op1_d, *op2_d); + }); + }).wait_and_throw(); + q.memcpy(&res, res_d, sizeof(sycl::vec)).wait(); + + constexpr float ERROR_TOLERANCE = 1e-6; + for (size_t i = 0; i < 2; i++) { + assert((res[i] - expected[i]) < ERROR_TOLERANCE || + !(std::cerr << "-- " << res[i] << " - " << expected[i] << " < " + << ERROR_TOLERANCE << " --")); + } + + const sycl::vec op3 = {sycl::nan(static_cast(0)), + sycl::nan(static_cast(0))}; + + q.memcpy(op2_d, &op3, sizeof(sycl::vec)); + q.submit([&](sycl::handler &cgh) { + cgh.parallel_for(ndr, [=](sycl::nd_item<1> nd_item) { + *res_d = syclcompat::fmax_nan(*op1_d, *op2_d); + }); + }).wait_and_throw(); + q.memcpy(&res, res_d, sizeof(sycl::vec)).wait(); + + for (size_t i = 0; i < 2; i++) { + assert(sycl::isnan(res[i])); + } + + sycl::free(op1_d, q); + sycl::free(op2_d, q); + sycl::free(res_d, q); +} + +int main() { + test_container_syclcompat_fmax_nan(); + + return 0; +}