Skip to content

Commit

Permalink
Merge branch 'sycl' into e2e-split-ci
Browse files Browse the repository at this point in the history
  • Loading branch information
ayylol committed Nov 29, 2024
2 parents 33cc264 + 73b99be commit 11360d3
Show file tree
Hide file tree
Showing 59 changed files with 482 additions and 180 deletions.
3 changes: 3 additions & 0 deletions clang/lib/CodeGen/CodeGenFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1789,6 +1789,9 @@ void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn,
if (SyclOptReport.HasOptReportInfo(FD)) {
llvm::OptimizationRemarkEmitter ORE(Fn);
for (auto ORI : llvm::enumerate(SyclOptReport.GetInfo(FD))) {
// Temporarily apply arg location to ensure SourceLocToDebugLoc
// picks up the expected file.
ApplyDebugLocation TempApplyLoc(*this, ORI.value().KernelArgLoc);
llvm::DiagnosticLocation DL =
SourceLocToDebugLoc(ORI.value().KernelArgLoc);
StringRef NameInDesc = ORI.value().KernelArgDescName;
Expand Down
23 changes: 23 additions & 0 deletions clang/lib/Driver/ToolChains/SYCL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1618,6 +1618,23 @@ static std::vector<OptSpecifier> getUnsupportedOpts(void) {
return UnsupportedOpts;
}

// Currently supported options by SYCL NativeCPU device compilation
static inline bool SupportedByNativeCPU(const SYCLToolChain &TC,
const OptSpecifier &Opt) {
if (!TC.IsSYCLNativeCPU)
return false;

switch (Opt.getID()) {
case options::OPT_fcoverage_mapping:
case options::OPT_fno_coverage_mapping:
case options::OPT_fprofile_instr_generate:
case options::OPT_fprofile_instr_generate_EQ:
case options::OPT_fno_profile_instr_generate:
return true;
}
return false;
}

SYCLToolChain::SYCLToolChain(const Driver &D, const llvm::Triple &Triple,
const ToolChain &HostTC, const ArgList &Args)
: ToolChain(D, Triple, Args), HostTC(HostTC),
Expand All @@ -1629,6 +1646,9 @@ SYCLToolChain::SYCLToolChain(const Driver &D, const llvm::Triple &Triple,
// Diagnose unsupported options only once.
for (OptSpecifier Opt : getUnsupportedOpts()) {
if (const Arg *A = Args.getLastArg(Opt)) {
// Native CPU can support options unsupported by other targets.
if (SupportedByNativeCPU(*this, Opt))
continue;
// All sanitizer options are not currently supported, except
// AddressSanitizer
if (A->getOption().getID() == options::OPT_fsanitize_EQ &&
Expand Down Expand Up @@ -1669,6 +1689,9 @@ SYCLToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
bool Unsupported = false;
for (OptSpecifier UnsupportedOpt : getUnsupportedOpts()) {
if (Opt.matches(UnsupportedOpt)) {
// NativeCPU should allow most normal cpu options.
if (SupportedByNativeCPU(*this, Opt.getID()))
continue;
if (Opt.getID() == options::OPT_fsanitize_EQ &&
A->getValues().size() == 1) {
std::string SanitizeVal = A->getValue();
Expand Down
5 changes: 4 additions & 1 deletion clang/lib/Sema/SemaSYCL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3625,8 +3625,11 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler {
BodyStmts.insert(BodyStmts.end(), FinalizeStmts.begin(),
FinalizeStmts.end());

SourceLocation LL = NewBody ? NewBody->getBeginLoc() : SourceLocation();
SourceLocation LR = NewBody ? NewBody->getEndLoc() : SourceLocation();

return CompoundStmt::Create(SemaSYCLRef.getASTContext(), BodyStmts,
FPOptionsOverride(), {}, {});
FPOptionsOverride(), LL, LR);
}

void annotateHierarchicalParallelismAPICalls() {
Expand Down
6 changes: 6 additions & 0 deletions clang/test/Driver/sycl-native-cpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,9 @@

// RUN: %clangxx -fsycl -fsycl-targets=spir64 %s -### 2>&1 | FileCheck -check-prefix=CHECK-NONATIVECPU %s
// CHECK-NONATIVECPU-NOT: "-D" "__SYCL_NATIVE_CPU__"

// Checking that coverage testing options are accepted by native_cpu, and that device and host compilation invocations receive the same options
// RUN: %clangxx -fsycl -fsycl-targets=native_cpu -Werror -fno-profile-instr-generate -fprofile-instr-generate -fno-coverage-mapping -fcoverage-mapping -### %s 2>&1 | FileCheck %s --check-prefix=CHECK_COV_INVO
// CHECK_COV_INVO:{{.*}}clang{{.*}}-fsycl-is-device{{.*}}"-fsycl-is-native-cpu" "-D" "__SYCL_NATIVE_CPU__"{{.*}}"-fprofile-instrument=clang"{{.*}}"-fcoverage-mapping" "-fcoverage-compilation-dir={{.*}}"
// CHECK_COV_INVO:{{.*}}clang{{.*}}"-fsycl-is-host"{{.*}}"-fprofile-instrument=clang"{{.*}}"-fcoverage-mapping" "-fcoverage-compilation-dir={{.*}}"

22 changes: 22 additions & 0 deletions clang/test/SemaSYCL/kernel_functor_location.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
// RUN: %clang_cc1 -fsycl-is-device -ast-dump %s | FileCheck %s
//
// Checks that the compound statement of the implicitly generated kernel body
// has a valid source location (containing "line"). Previously this location
// was invalid containing "<<invalid sloc>>" which causes asserts in the
// llvm profiling tools.

#include "Inputs/sycl.hpp"

struct Functor {
void operator()() const {}
};

// CHECK: FunctionDecl {{.*}} _ZTS7Functor 'void ()'
// CHECK-NEXT: |-CompoundStmt {{.*}} <{{.*}}line{{.*}}>

int main() {

sycl::queue().submit([&](sycl::handler &cgh) {
cgh.single_task(Functor{});
});
}
8 changes: 4 additions & 4 deletions devops/dependencies-igc-dev.json
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
{
"linux": {
"igc_dev": {
"github_tag": "igc-dev-7dad678",
"version": "7dad678",
"updated_at": "2024-11-24T10:48:51Z",
"url": "https://api.github.com/repos/intel/intel-graphics-compiler/actions/artifacts/2229466354/zip",
"github_tag": "igc-dev-6ee988a",
"version": "6ee988a",
"updated_at": "2024-11-26T15:44:10Z",
"url": "https://api.github.com/repos/intel/intel-graphics-compiler/actions/artifacts/2239640503/zip",
"root": "{DEPS_ROOT}/opencl/runtime/linux/oclgpu"
}
}
Expand Down
8 changes: 8 additions & 0 deletions llvm/include/llvm/SYCLLowerIR/ESIMD/ESIMDUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ constexpr char GENX_KERNEL_METADATA[] = "genx.kernels";
// sycl/ext/oneapi/experimental/invoke_simd.hpp::__builtin_invoke_simd
// overloads instantiations:
constexpr char INVOKE_SIMD_PREF[] = "_Z33__regcall3____builtin_invoke_simd";
// The regexp for ESIMD intrinsics:
// /^_Z(\d+)__esimd_\w+/
static constexpr char ESIMD_INTRIN_PREF0[] = "_Z";
static constexpr char ESIMD_INTRIN_PREF1[] = "__esimd_";

bool isSlmAllocatorConstructor(const Function &F);
bool isSlmAllocatorDestructor(const Function &F);
Expand Down Expand Up @@ -133,5 +137,9 @@ struct UpdateUint64MetaDataToMaxValue {
// functions has changed its attribute to alwaysinline.
bool prepareForAlwaysInliner(Module &M);

// Remove mangling from an ESIMD intrinsic function.
// Returns empty on pattern match failure.
StringRef stripMangling(StringRef FName);

} // namespace esimd
} // namespace llvm
10 changes: 10 additions & 0 deletions llvm/lib/SYCLLowerIR/ESIMD/ESIMDUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,16 @@ void UpdateUint64MetaDataToMaxValue::operator()(Function *F) const {
Node->replaceOperandWith(Key, getMetadata(New));
}
}
StringRef stripMangling(StringRef FName) {

// See if the Name represents an ESIMD intrinsic and demangle only if it
// does.
if (!FName.consume_front(ESIMD_INTRIN_PREF0))
return "";
// now skip the digits
FName = FName.drop_while([](char C) { return std::isdigit(C); });
return FName.starts_with("__esimd") ? FName : "";
}

} // namespace esimd
} // namespace llvm
13 changes: 4 additions & 9 deletions llvm/lib/SYCLLowerIR/ESIMD/LowerESIMD.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -130,10 +130,6 @@ enum class lsc_subopcode : uint8_t {
read_state_info = 0x1e,
fence = 0x1f,
};
// The regexp for ESIMD intrinsics:
// /^_Z(\d+)__esimd_\w+/
static constexpr char ESIMD_INTRIN_PREF0[] = "_Z";
static constexpr char ESIMD_INTRIN_PREF1[] = "__esimd_";
static constexpr char ESIMD_INSERTED_VSTORE_FUNC_NAME[] = "_Z14__esimd_vstorev";
static constexpr char SPIRV_INTRIN_PREF[] = "__spirv_BuiltIn";
struct ESIMDIntrinDesc {
Expand Down Expand Up @@ -2178,12 +2174,11 @@ size_t SYCLLowerESIMDPass::runOnFunction(Function &F,
}
StringRef Name = Callee->getName();

// See if the Name represents an ESIMD intrinsic and demangle only if it
// does.
if (!Name.consume_front(ESIMD_INTRIN_PREF0) && !isDevicelibFunction(Name))
if (!isDevicelibFunction(Name))
Name = stripMangling(Name);

if (Name.empty())
continue;
// now skip the digits
Name = Name.drop_while([](char C) { return std::isdigit(C); });

// process ESIMD builtins that go through special handling instead of
// the translation procedure
Expand Down
18 changes: 16 additions & 2 deletions llvm/lib/SYCLLowerIR/ESIMD/LowerESIMDKernelAttrs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,11 @@
// Finds and adds sycl_explicit_simd attributes to wrapper functions that wrap
// ESIMD kernel functions

#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Module.h"
#include "llvm/SYCLLowerIR/ESIMD/ESIMDUtils.h"
#include "llvm/SYCLLowerIR/ESIMD/LowerESIMD.h"
#include "llvm/SYCLLowerIR/SYCLUtils.h"
#include "llvm/IR/Module.h"

#define DEBUG_TYPE "LowerESIMDKernelAttrs"

Expand All @@ -34,7 +35,20 @@ PreservedAnalyses
SYCLFixupESIMDKernelWrapperMDPass::run(Module &M, ModuleAnalysisManager &MAM) {
bool Modified = false;
for (Function &F : M) {
if (llvm::esimd::isESIMD(F)) {
bool ShouldConsiderESIMD = llvm::esimd::isESIMD(F);
if (!ShouldConsiderESIMD) {
for (Instruction &I : instructions(F)) {
auto *CI = dyn_cast_or_null<CallInst>(&I);
if (!CI)
continue;
auto *CalledF = CI->getCalledFunction();
if (CalledF && !esimd::stripMangling(CalledF->getName()).empty()) {
ShouldConsiderESIMD = true;
break;
}
}
}
if (ShouldConsiderESIMD) {
// TODO: Keep track of traversed functions to avoid repeating traversals
// over same function.
sycl::utils::traverseCallgraphUp(
Expand Down
17 changes: 17 additions & 0 deletions llvm/test/SYCLLowerIR/ESIMD/prop_metadata.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
; This test verifies that we propagate the ESIMD attribute to a function that
; doesn't call any ESIMD-attribute functions but calls an ESIMD intrinsic

; RUN: opt -passes=lower-esimd-kernel-attrs -S < %s | FileCheck %s

target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
target triple = "spir64-unknown-unknown"

; CHECK: define dso_local spir_func void @FUNC() !sycl_explicit_simd
define dso_local spir_func void @FUNC() {
%a_1 = alloca <16 x float>
%1 = load <16 x float>, ptr %a_1
%ret_val = call spir_func <8 x float> @_Z16__esimd_rdregionIfLi16ELi8ELi0ELi8ELi1ELi0EEN2cm3gen13__vector_typeIT_XT1_EE4typeENS2_IS3_XT0_EE4typeEt(<16 x float> %1, i16 zeroext 0)
ret void
}

declare dso_local spir_func <8 x float> @_Z16__esimd_rdregionIfLi16ELi8ELi0ELi8ELi1ELi0EEN2cm3gen13__vector_typeIT_XT1_EE4typeENS2_IS3_XT0_EE4typeEt(<16 x float> %0, i16 zeroext %1)
2 changes: 1 addition & 1 deletion sycl/cmake/modules/FetchUnifiedRuntime.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ if(SYCL_UR_USE_FETCH_CONTENT)
CACHE PATH "Path to external '${name}' adapter source dir" FORCE)
endfunction()

set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git")
set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime")
include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules/UnifiedRuntimeTag.cmake)

set(UMF_BUILD_EXAMPLES OFF CACHE INTERNAL "EXAMPLES")
Expand Down
12 changes: 6 additions & 6 deletions sycl/cmake/modules/UnifiedRuntimeTag.cmake
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# commit 0a90db9b2c36960c9b28ce18557ca15760724c4d
# Merge: c4d9fdb4 6e0bdeb9
# commit eb076da108a49ef1426f38690547a71905f58015
# Merge: d8d8ee90 46832dfd
# Author: Callum Fare <[email protected]>
# Date: Wed Nov 27 12:16:44 2024 +0000
# Merge pull request #2369 from Bensuo/ben/kernel-binary-update-l0
# [CMDBUF] Implement kernel binary update for L0 adapter
set(UNIFIED_RUNTIME_TAG 0a90db9b2c36960c9b28ce18557ca15760724c4d)
# Date: Fri Nov 29 15:54:31 2024 +0000
# Merge pull request #2396 from kswiecicki/init-results-fix
# [L0] Add nullopt check before init results access
set(UNIFIED_RUNTIME_TAG eb076da108a49ef1426f38690547a71905f58015)
37 changes: 28 additions & 9 deletions sycl/doc/design/SYCLNativeCPU.md
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
# SYCL Native CPU

The SYCL Native CPU flow aims at treating the host CPU as a "first class citizen", providing a SYCL implementation that targets CPUs of various different architectures, with no other dependencies than DPC++ itself, while bringing performances comparable to state-of-the-art CPU backends.
The SYCL Native CPU flow aims at treating the host CPU as a "first class citizen", providing a SYCL implementation that targets CPUs of various different architectures, with no other dependencies than DPC++ itself, while bringing performances comparable to state-of-the-art CPU backends. SYCL Native CPU also provides some initial/experimental support for LLVM's [source-based code coverage tools](https://clang.llvm.org/docs/SourceBasedCodeCoverage.html) (see also section [Code coverage](#code-coverage)).

# Compiler and runtime options

The SYCL Native CPU flow is enabled by setting `native_cpu` as a `sycl-target` (please note that currently doing so overrides any other SYCL target specified in the compiler invocation):
The SYCL Native CPU flow is enabled by setting `native_cpu` as a `sycl-target`:

```
clang++ -fsycl -fsycl-targets=native_cpu <input> -o <output>
Expand All @@ -28,9 +28,16 @@ clang++ <device-ir> -o <device-o>
clang++ -L<sycl-lib-path> -lsycl <device-o> <host-o> -o <output>
```

Note that SYCL Native CPU co-exists alongside the other SYCL targets. For example, the following command line builds SYCL code simultaneously for SYCL Native CPU and for OpenCL.

```
clang++ -fsycl -fsycl-targets=native_cpu,spir64 <input> -o <output>
```
The application can then run on either SYCL target by setting the DPC++ `ONEAPI_DEVICE_SELECTOR` environment variable accordingly.

## Configuring DPC++ with SYCL Native CPU

SYCL Native CPU needs to be enabled explictly when configuring DPC++, using `--native_cpu`, e.g.
SYCL Native CPU needs to be enabled explicitly when configuring DPC++, using `--native_cpu`, e.g.

```
python buildbot/configure.py \
Expand Down Expand Up @@ -86,7 +93,19 @@ Whole Function Vectorization is enabled by default, and can be controlled throug
* `-mllvm -sycl-native-cpu-no-vecz`: disable Whole Function Vectorization.
* `-mllvm -sycl-native-cpu-vecz-width`: sets the vector width to the specified value, defaults to 8.

For more details on how the Whole Function Vectorizer is integrated for SYCL Native CPU, refer to the [Technical details[(#technical-details) section.
For more details on how the Whole Function Vectorizer is integrated for SYCL Native CPU, refer to the [Technical details](#technical-details) section.

# Code coverage

SYCL Native CPU has experimental support for LLVM's source-based [code coverage](https://clang.llvm.org/docs/SourceBasedCodeCoverage.html). This enables coverage testing across device and host code.
Example usage:

```bash
clang.exe -fsycl -fsycl-targets=native_cpu -fprofile-instr-generate -fcoverage-mapping %fname% -o vector-add.exe
.\vector-add.exe
llvm-profdata merge -sparse default.profraw -o foo.profdata
llvm-cov show .\vector-add.exe -instr-profile=foo.profdata
```

## Ongoing work

Expand All @@ -95,7 +114,7 @@ For more details on how the Whole Function Vectorizer is integrated for SYCL Nat
* Subgroup support
* Performance optimizations

### Please note that Windows support is temporarily disabled due to some implementation details, it will be reinstantiated soon.
### Please note that Windows is partially supported but temporarily disabled due to some implementation details, it will be re-enabled soon.

# Technical details

Expand Down Expand Up @@ -140,13 +159,13 @@ entry:
}
```

For the SYCL Native CPU target, the device compiler is in charge of materializing the SPIRV builtins (such as `@__spirv_BuiltInGlobalInvocationId`), so that they can be correctly updated by the runtime when executing the kernel. This is performed by the [PrepareSYCLNativeCPU pass](https://github.com/intel/llvm/blob/sycl/llvm/lib/SYCLLowerIR/PrepareSYCLNativeCPU.cpp).
For the SYCL Native CPU target, the device compiler is in charge of materializing the SPIRV builtins (such as `@__spirv_BuiltInGlobalInvocationId`), so that they can be correctly updated by the runtime when executing the kernel. This is performed by the [PrepareSYCLNativeCPU pass](https://github.com/intel/llvm/blob/sycl/llvm/lib/SYCLNativeCPUUtils/PrepareSYCLNativeCPU.cpp).
The PrepareSYCLNativeCPUPass also emits a `subhandler` function, which receives the kernel arguments from the SYCL runtime (packed in a vector), unpacks them, and forwards only the used ones to the actual kernel.


## PrepareSYCLNativeCPU Pass

This pass will add a pointer to a `nativecpu_state` struct as kernel argument to all the kernel functions, and it will replace all the uses of SPIRV builtins with the return value of appropriately defined functions, which will read the requested information from the `__nativecpu_state` struct. The `__nativecpu_state` struct and the builtin functions are defined in [native_cpu.hpp](https://github.com/intel/llvm/blob/sycl/sycl/include/sycl/detail/native_cpu.hpp).
This pass will add a pointer to a `native_cpu::state` struct as kernel argument to all the kernel functions, and it will replace all the uses of SPIRV builtins with the return value of appropriately defined functions, which will read the requested information from the `native_cpu::state` struct. The `native_cpu::state` struct is defined in the [native_cpu UR adapter](https://github.com/oneapi-src/unified-runtime/blob/main/source/adapters/native_cpu/nativecpu_state.hpp) and the builtin functions are defined in the [native_cpu device library](https://github.com/intel/llvm/blob/sycl/libdevice/nativecpu_utils.cpp).


The resulting IR is:
Expand Down Expand Up @@ -188,11 +207,11 @@ entry:
}
```

As you can see, the `subhandler` steals the kernel's function name, and receives two pointer arguments: the first one points to the kernel arguments from the SYCL runtime, and the second one to the `__nativecpu_state` struct.
As you can see, the `subhandler` steals the kernel's function name, and receives two pointer arguments: the first one points to the kernel arguments from the SYCL runtime, and the second one to the `nativecpu::state` struct.

## Handling barriers

On SYCL Native CPU, calls to `__spirv_ControlBarrier` are handled using the `WorkItemLoopsPass` from the oneAPI Construction Kit. This pass handles barriers by splitting the kernel between calls calls to `__spirv_ControlBarrier`, and creating a wrapper that runs the subkernels over the local range. In order to correctly interface to the oneAPI Construction Kit pass pipeline, SPIRV builtins are converted to `mux` builtins (used by the OCK) by the `ConvertToMuxBuiltinsSYCLNativeCPUPass`.
On SYCL Native CPU, calls to `__spirv_ControlBarrier` are handled using the `WorkItemLoopsPass` from the oneAPI Construction Kit. This pass handles barriers by splitting the kernel between calls to `__spirv_ControlBarrier`, and creating a wrapper that runs the subkernels over the local range. In order to correctly interface to the oneAPI Construction Kit pass pipeline, SPIRV builtins are defined in the device library to call the corresponding `mux` builtins (used by the OCK).

## Vectorization

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
- [__regcall Calling convention](#__regcall-calling-convention)
- [Inline assembly](#inline-assembly)
- [Device aspect](#device-aspect)
- [Device Information Descriptors](#device-information-descriptors)
- [Device queries and conditional dispatching of the code](#device-queries-and-conditional-dispatching-of-the-code)
- [Implementation restrictions](#implementation-restrictions)
- [Features not supported with the ESIMD extension](#features-not-supported-with-the-esimd-extension)
Expand Down Expand Up @@ -1018,6 +1019,11 @@ The new aspect has the following behavior when queried via `device::has()`:
|--------|-------------|
|`aspect::ext_intel_esimd` | Indicates that the device supports the `sycl_ext_intel_esimd` extension as defined in this document. |

## Device Information Descriptors
| Device Descriptors | Return Type | Description |
| ------------------ | ----------- | ----------- |
| `ext::intel::esimd::info::device::has_2d_block_io_support` | bool | Returns a boolean indicating whether 2D load/store/prefetch instructions are supported by the device. |

## Examples
### Vector addition (USM)
```cpp
Expand Down
Loading

0 comments on commit 11360d3

Please sign in to comment.