diff --git a/.github/workflows/sycl-windows-run-tests.yml b/.github/workflows/sycl-windows-run-tests.yml index 65c2df1290771..9186392f8fa46 100644 --- a/.github/workflows/sycl-windows-run-tests.yml +++ b/.github/workflows/sycl-windows-run-tests.yml @@ -88,6 +88,7 @@ jobs: cmake --build build-e2e --target check-sycl-e2e - name: Detect hung tests shell: powershell + if: always() run: | $exitCode = 0 $hungTests = Get-Process | Where-Object { ($_.Path -match "llvm\\install") -or ($_.Path -match "llvm\\build-e2e") } diff --git a/clang/include/clang/AST/PrettyPrinter.h b/clang/include/clang/AST/PrettyPrinter.h index a50216615c4a9..beadfea6e3ae1 100644 --- a/clang/include/clang/AST/PrettyPrinter.h +++ b/clang/include/clang/AST/PrettyPrinter.h @@ -68,17 +68,18 @@ struct PrintingPolicy { SuppressStrongLifetime(false), SuppressLifetimeQualifiers(false), SuppressTypedefs(false), SuppressFinalSpecifier(false), SuppressTemplateArgsInCXXConstructors(false), - SuppressDefaultTemplateArgs(true), Bool(LO.Bool), - Nullptr(LO.CPlusPlus11 || LO.C23), NullptrTypeInNamespace(LO.CPlusPlus), - Restrict(LO.C99), Alignof(LO.CPlusPlus11), UnderscoreAlignof(LO.C11), + SuppressDefaultTemplateArgs(true), EnforceDefaultTemplateArgs(false), + Bool(LO.Bool), Nullptr(LO.CPlusPlus11 || LO.C23), + NullptrTypeInNamespace(LO.CPlusPlus), Restrict(LO.C99), + Alignof(LO.CPlusPlus11), UnderscoreAlignof(LO.C11), UseVoidForZeroParams(!LO.CPlusPlus), SplitTemplateClosers(!LO.CPlusPlus11), TerseOutput(false), PolishForDeclaration(false), Half(LO.Half), MSWChar(LO.MicrosoftExt && !LO.WChar), IncludeNewlines(true), MSVCFormatting(false), ConstantsAsWritten(false), SuppressImplicitBase(false), FullyQualifiedName(false), - SuppressDefinition(false), SuppressDefaultTemplateArguments(false), - PrintCanonicalTypes(false), + EnforceScopeForElaboratedTypes(false), SuppressDefinition(false), + SuppressDefaultTemplateArguments(false), PrintCanonicalTypes(false), SkipCanonicalizationOfTemplateTypeParms(false), PrintInjectedClassNameWithArguments(true), UsePreferredNames(true), AlwaysIncludeTypeForTemplateArgument(false), @@ -241,6 +242,11 @@ struct PrintingPolicy { LLVM_PREFERRED_TYPE(bool) unsigned SuppressDefaultTemplateArgs : 1; + /// When true, print template arguments that match the default argument for + /// the parameter, even if they're not specified in the source. + LLVM_PREFERRED_TYPE(bool) + unsigned EnforceDefaultTemplateArgs : 1; + /// Whether we can use 'bool' rather than '_Bool' (even if the language /// doesn't actually have 'bool', because, e.g., it is defined as a macro). LLVM_PREFERRED_TYPE(bool) @@ -339,6 +345,10 @@ struct PrintingPolicy { LLVM_PREFERRED_TYPE(bool) unsigned FullyQualifiedName : 1; + /// Enforce fully qualified name printing for elaborated types. + LLVM_PREFERRED_TYPE(bool) + unsigned EnforceScopeForElaboratedTypes : 1; + /// When true does not print definition of a type. E.g. /// \code /// template class C0 : public C1 {...} diff --git a/clang/lib/AST/TypePrinter.cpp b/clang/lib/AST/TypePrinter.cpp index 636ddaddf8769..49eb096cf369f 100644 --- a/clang/lib/AST/TypePrinter.cpp +++ b/clang/lib/AST/TypePrinter.cpp @@ -101,7 +101,7 @@ class ElaboratedTypePolicyRAII { SuppressTagKeyword = Policy.SuppressTagKeyword; SuppressScope = Policy.SuppressScope; Policy.SuppressTagKeyword = true; - Policy.SuppressScope = true; + Policy.SuppressScope = !Policy.EnforceScopeForElaboratedTypes; } ~ElaboratedTypePolicyRAII() { @@ -1728,8 +1728,10 @@ void TypePrinter::printElaboratedBefore(const ElaboratedType *T, Policy.SuppressScope = OldSupressScope; return; } - if (Qualifier && !(Policy.SuppressTypedefs && - T->getNamedType()->getTypeClass() == Type::Typedef)) + if (Qualifier && + !(Policy.SuppressTypedefs && + T->getNamedType()->getTypeClass() == Type::Typedef) && + !Policy.EnforceScopeForElaboratedTypes) Qualifier->print(OS, Policy); } @@ -2220,15 +2222,6 @@ static void printArgument(const TemplateArgument &A, const PrintingPolicy &PP, A.print(PP, OS, IncludeType); } -static void printArgument(const TemplateArgumentLoc &A, - const PrintingPolicy &PP, llvm::raw_ostream &OS, - bool IncludeType) { - const TemplateArgument::ArgKind &Kind = A.getArgument().getKind(); - if (Kind == TemplateArgument::ArgKind::Type) - return A.getTypeSourceInfo()->getType().print(OS, PP); - return A.getArgument().print(PP, OS, IncludeType); -} - static bool isSubstitutedTemplateArgument(ASTContext &Ctx, TemplateArgument Arg, TemplateArgument Pattern, ArrayRef Args, @@ -2399,15 +2392,40 @@ template static void printTo(raw_ostream &OS, ArrayRef Args, const PrintingPolicy &Policy, const TemplateParameterList *TPL, bool IsPack, unsigned ParmIndex) { - // Drop trailing template arguments that match default arguments. - if (TPL && Policy.SuppressDefaultTemplateArgs && - !Policy.PrintCanonicalTypes && !Args.empty() && !IsPack && + llvm::SmallVector ArgsToPrint; + for (const TA &A : Args) + ArgsToPrint.push_back(getArgument(A)); + if (TPL && !Policy.PrintCanonicalTypes && !IsPack && Args.size() <= TPL->size()) { - llvm::SmallVector OrigArgs; - for (const TA &A : Args) - OrigArgs.push_back(getArgument(A)); - while (!Args.empty() && getArgument(Args.back()).getIsDefaulted()) - Args = Args.drop_back(); + // Drop trailing template arguments that match default arguments. + if (Policy.SuppressDefaultTemplateArgs) { + while (!ArgsToPrint.empty() && + getArgument(ArgsToPrint.back()).getIsDefaulted()) + ArgsToPrint.pop_back(); + } else if (Policy.EnforceDefaultTemplateArgs) { + for (unsigned I = Args.size(); I < TPL->size(); ++I) { + auto Param = TPL->getParam(I); + if (auto *TTPD = dyn_cast(Param)) { + // If we met a non default-argument past provided list of arguments, + // it is either a pack which must be the last arguments, or provided + // argument list was problematic. Bail out either way. Do the same + // for each kind of template argument. + if (!TTPD->hasDefaultArgument()) + break; + ArgsToPrint.push_back(getArgument(TTPD->getDefaultArgument())); + } else if (auto *TTPD = dyn_cast(Param)) { + if (!TTPD->hasDefaultArgument()) + break; + ArgsToPrint.push_back(getArgument(TTPD->getDefaultArgument())); + } else if (auto *NTTPD = dyn_cast(Param)) { + if (!NTTPD->hasDefaultArgument()) + break; + ArgsToPrint.push_back(getArgument(NTTPD->getDefaultArgument())); + } else { + llvm_unreachable("unexpected template parameter"); + } + } + } } const char *Comma = Policy.MSVCFormatting ? "," : ", "; @@ -2416,7 +2434,7 @@ printTo(raw_ostream &OS, ArrayRef Args, const PrintingPolicy &Policy, bool NeedSpace = false; bool FirstArg = true; - for (const auto &Arg : Args) { + for (const auto &Arg : ArgsToPrint) { // Print the argument into a string. SmallString<128> Buf; llvm::raw_svector_ostream ArgOS(Buf); diff --git a/clang/lib/Driver/OffloadBundler.cpp b/clang/lib/Driver/OffloadBundler.cpp index f6d555aaa8ce3..d026805d22f4a 100644 --- a/clang/lib/Driver/OffloadBundler.cpp +++ b/clang/lib/Driver/OffloadBundler.cpp @@ -688,8 +688,11 @@ class ObjectFileHandler final : public FileHandler { return std::move(Err); // If we are dealing with a bitcode file do not add special globals - // llvm.used and llvm.compiler.used to the list of defined symbols. - if (SF->isIR() && (Name == "llvm.used" || Name == "llvm.compiler.used")) + // llvm.used and llvm.compiler.used and __AsanDeviceGlobalMetadata to + // the list of defined symbols. + if (SF->isIR() && + (Name == "llvm.used" || Name == "llvm.compiler.used" || + Name == "__AsanDeviceGlobalMetadata")) continue; // Add symbol name with the target prefix to the buffer. diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 57949dbc3f1d2..2d2c5dab453bd 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -10674,12 +10674,8 @@ static void getTripleBasedSPIRVTransOpts(Compilation &C, ArgStringList &TranslatorArgs) { bool IsCPU = Triple.isSPIR() && Triple.getSubArch() == llvm::Triple::SPIRSubArch_x86_64; - // Enable NonSemanticShaderDebugInfo.200 for CPU AOT and for non-Windows - const bool IsWindowsMSVC = - Triple.isWindowsMSVCEnvironment() || - C.getDefaultToolChain().getTriple().isWindowsMSVCEnvironment(); - const bool EnableNonSemanticDebug = - IsCPU || (!IsWindowsMSVC && !C.getDriver().IsFPGAHWMode()); + // Enable NonSemanticShaderDebugInfo.200 for non-FPGA targets. + const bool EnableNonSemanticDebug = !C.getDriver().IsFPGAHWMode(); if (EnableNonSemanticDebug) { TranslatorArgs.push_back( "-spirv-debug-info-version=nonsemantic-shader-200"); diff --git a/clang/lib/Driver/ToolChains/SYCL.cpp b/clang/lib/Driver/ToolChains/SYCL.cpp index 1d903aa1fc17c..6831938fd4c6d 100644 --- a/clang/lib/Driver/ToolChains/SYCL.cpp +++ b/clang/lib/Driver/ToolChains/SYCL.cpp @@ -650,8 +650,11 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple, addLibraries(SYCLDeviceBfloat16FallbackLib); } + // Link in ITT annotations library unless fsycl-no-instrument-device-code + // is specified. This ensures that we are ABI-compatible with the + // instrumented device code, which was the default not so long ago. if (Args.hasFlag(options::OPT_fsycl_instrument_device_code, - options::OPT_fno_sycl_instrument_device_code, false)) + options::OPT_fno_sycl_instrument_device_code, true)) addLibraries(SYCLDeviceAnnotationLibs); #if !defined(_WIN32) diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index 7d53638c8eff3..242e6c8a9d7d4 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -1522,7 +1522,7 @@ class KernelObjVisitor { void visitParam(ParmVarDecl *Param, QualType ParamTy, HandlerTys &...Handlers) { if (isSyclSpecialType(ParamTy, SemaSYCLRef)) - KP_FOR_EACH(handleOtherType, Param, ParamTy); + KP_FOR_EACH(handleSyclSpecialType, Param, ParamTy); else if (ParamTy->isStructureOrClassType()) { if (KP_FOR_EACH(handleStructType, Param, ParamTy)) { CXXRecordDecl *RD = ParamTy->getAsCXXRecordDecl(); @@ -2075,8 +2075,11 @@ class SyclKernelFieldChecker : public SyclKernelFieldHandler { } bool handleSyclSpecialType(ParmVarDecl *PD, QualType ParamTy) final { - Diag.Report(PD->getLocation(), diag::err_bad_kernel_param_type) << ParamTy; - IsInvalid = true; + if (!SemaSYCL::isSyclType(ParamTy, SYCLTypeAttr::work_group_memory)) { + Diag.Report(PD->getLocation(), diag::err_bad_kernel_param_type) + << ParamTy; + IsInvalid = true; + } return isValid(); } @@ -2228,8 +2231,8 @@ class SyclKernelUnionChecker : public SyclKernelFieldHandler { } bool handleSyclSpecialType(ParmVarDecl *PD, QualType ParamTy) final { - // TODO - unsupportedFreeFunctionParamType(); + if (!SemaSYCL::isSyclType(ParamTy, SYCLTypeAttr::work_group_memory)) + unsupportedFreeFunctionParamType(); // TODO return true; } @@ -3013,9 +3016,26 @@ class SyclKernelDeclCreator : public SyclKernelFieldHandler { return handleSpecialType(FD, FieldTy); } - bool handleSyclSpecialType(ParmVarDecl *, QualType) final { - // TODO - unsupportedFreeFunctionParamType(); + bool handleSyclSpecialType(ParmVarDecl *PD, QualType ParamTy) final { + if (SemaSYCL::isSyclType(ParamTy, SYCLTypeAttr::work_group_memory)) { + const auto *RecordDecl = ParamTy->getAsCXXRecordDecl(); + assert(RecordDecl && "The type must be a RecordDecl"); + CXXMethodDecl *InitMethod = getMethodByName(RecordDecl, InitMethodName); + assert(InitMethod && "The type must have the __init method"); + // Don't do -1 here because we count on this to be the first parameter + // added (if any). + size_t ParamIndex = Params.size(); + for (const ParmVarDecl *Param : InitMethod->parameters()) { + QualType ParamTy = Param->getType(); + addParam(Param, ParamTy.getCanonicalType()); + // Propagate add_ir_attributes_kernel_parameter attribute. + if (const auto *AddIRAttr = + Param->getAttr()) + Params.back()->addAttr(AddIRAttr->clone(SemaSYCLRef.getASTContext())); + } + LastParamIndex = ParamIndex; + } else // TODO + unsupportedFreeFunctionParamType(); return true; } @@ -3291,9 +3311,7 @@ class SyclKernelArgsSizeChecker : public SyclKernelFieldHandler { } bool handleSyclSpecialType(ParmVarDecl *PD, QualType ParamTy) final { - // TODO - unsupportedFreeFunctionParamType(); - return true; + return handleSpecialType(ParamTy); } bool handleSyclSpecialType(const CXXRecordDecl *, const CXXBaseSpecifier &BS, @@ -4442,6 +4460,45 @@ class FreeFunctionKernelBodyCreator : public SyclKernelFieldHandler { {}); } + MemberExpr *buildMemberExpr(Expr *Base, ValueDecl *Member) { + DeclAccessPair MemberDAP = DeclAccessPair::make(Member, AS_none); + MemberExpr *Result = SemaSYCLRef.SemaRef.BuildMemberExpr( + Base, /*IsArrow */ false, FreeFunctionSrcLoc, NestedNameSpecifierLoc(), + FreeFunctionSrcLoc, Member, MemberDAP, + /*HadMultipleCandidates*/ false, + DeclarationNameInfo(Member->getDeclName(), FreeFunctionSrcLoc), + Member->getType(), VK_LValue, OK_Ordinary); + return Result; + } + + void createSpecialMethodCall(const CXXRecordDecl *RD, StringRef MethodName, + Expr *MemberBaseExpr, + SmallVectorImpl &AddTo) { + CXXMethodDecl *Method = getMethodByName(RD, MethodName); + if (!Method) + return; + unsigned NumParams = Method->getNumParams(); + llvm::SmallVector ParamDREs(NumParams); + llvm::ArrayRef KernelParameters = + DeclCreator.getParamVarDeclsForCurrentField(); + for (size_t I = 0; I < NumParams; ++I) { + QualType ParamType = KernelParameters[I]->getOriginalType(); + ParamDREs[I] = SemaSYCLRef.SemaRef.BuildDeclRefExpr( + KernelParameters[I], ParamType, VK_LValue, FreeFunctionSrcLoc); + } + MemberExpr *MethodME = buildMemberExpr(MemberBaseExpr, Method); + QualType ResultTy = Method->getReturnType(); + ExprValueKind VK = Expr::getValueKindForType(ResultTy); + ResultTy = ResultTy.getNonLValueExprType(SemaSYCLRef.getASTContext()); + llvm::SmallVector ParamStmts; + const auto *Proto = cast(Method->getType()); + SemaSYCLRef.SemaRef.GatherArgumentsForCall(FreeFunctionSrcLoc, Method, + Proto, 0, ParamDREs, ParamStmts); + AddTo.push_back(CXXMemberCallExpr::Create( + SemaSYCLRef.getASTContext(), MethodME, ParamStmts, ResultTy, VK, + FreeFunctionSrcLoc, FPOptionsOverride())); + } + public: static constexpr const bool VisitInsideSimpleContainers = false; @@ -4461,9 +4518,53 @@ class FreeFunctionKernelBodyCreator : public SyclKernelFieldHandler { return true; } - bool handleSyclSpecialType(ParmVarDecl *, QualType) final { - // TODO - unsupportedFreeFunctionParamType(); + // Default inits the type, then calls the init-method in the body. + // A type may not have a public default constructor as per its spec so + // typically if this is the case the default constructor will be private and + // in such cases we must manually override the access specifier from private + // to public just for the duration of this default initialization. + // TODO: Revisit this approach once https://github.com/intel/llvm/issues/16061 + // is closed. + bool handleSyclSpecialType(ParmVarDecl *PD, QualType ParamTy) final { + if (SemaSYCL::isSyclType(ParamTy, SYCLTypeAttr::work_group_memory)) { + const auto *RecordDecl = ParamTy->getAsCXXRecordDecl(); + AccessSpecifier DefaultConstructorAccess; + auto DefaultConstructor = + std::find_if(RecordDecl->ctor_begin(), RecordDecl->ctor_end(), + [](auto it) { return it->isDefaultConstructor(); }); + DefaultConstructorAccess = DefaultConstructor->getAccess(); + DefaultConstructor->setAccess(AS_public); + + QualType Ty = PD->getOriginalType(); + ASTContext &Ctx = SemaSYCLRef.SemaRef.getASTContext(); + VarDecl *WorkGroupMemoryClone = VarDecl::Create( + Ctx, DeclCreator.getKernelDecl(), FreeFunctionSrcLoc, + FreeFunctionSrcLoc, PD->getIdentifier(), PD->getType(), + Ctx.getTrivialTypeSourceInfo(Ty), SC_None); + InitializedEntity VarEntity = + InitializedEntity::InitializeVariable(WorkGroupMemoryClone); + InitializationKind InitKind = + InitializationKind::CreateDefault(FreeFunctionSrcLoc); + InitializationSequence InitSeq(SemaSYCLRef.SemaRef, VarEntity, InitKind, + std::nullopt); + ExprResult Init = InitSeq.Perform(SemaSYCLRef.SemaRef, VarEntity, + InitKind, std::nullopt); + WorkGroupMemoryClone->setInit( + SemaSYCLRef.SemaRef.MaybeCreateExprWithCleanups(Init.get())); + WorkGroupMemoryClone->setInitStyle(VarDecl::CallInit); + DefaultConstructor->setAccess(DefaultConstructorAccess); + + Stmt *DS = new (SemaSYCLRef.getASTContext()) + DeclStmt(DeclGroupRef(WorkGroupMemoryClone), FreeFunctionSrcLoc, + FreeFunctionSrcLoc); + BodyStmts.push_back(DS); + Expr *MemberBaseExpr = SemaSYCLRef.SemaRef.BuildDeclRefExpr( + WorkGroupMemoryClone, Ty, VK_PRValue, FreeFunctionSrcLoc); + createSpecialMethodCall(RecordDecl, InitMethodName, MemberBaseExpr, + BodyStmts); + ArgExprs.push_back(MemberBaseExpr); + } else // TODO + unsupportedFreeFunctionParamType(); return true; } @@ -4748,9 +4849,11 @@ class SyclKernelIntHeaderCreator : public SyclKernelFieldHandler { return true; } - bool handleSyclSpecialType(ParmVarDecl *, QualType) final { - // TODO - unsupportedFreeFunctionParamType(); + bool handleSyclSpecialType(ParmVarDecl *PD, QualType ParamTy) final { + if (SemaSYCL::isSyclType(ParamTy, SYCLTypeAttr::work_group_memory)) + addParam(PD, ParamTy, SYCLIntegrationHeader::kind_work_group_memory); + else + unsupportedFreeFunctionParamType(); // TODO return true; } @@ -6227,7 +6330,6 @@ void SYCLIntegrationHeader::emit(raw_ostream &O) { O << "#include \n"; O << "#include \n"; O << "#include \n"; - O << "\n"; LangOptions LO; @@ -6502,6 +6604,7 @@ void SYCLIntegrationHeader::emit(raw_ostream &O) { O << "\n"; O << "// Forward declarations of kernel and its argument types:\n"; + Policy.SuppressDefaultTemplateArgs = false; FwdDeclEmitter.Visit(K.SyclKernel->getType()); O << "\n"; @@ -6509,16 +6612,48 @@ void SYCLIntegrationHeader::emit(raw_ostream &O) { O << "extern \"C\" "; std::string ParmList; bool FirstParam = true; + Policy.SuppressDefaultTemplateArgs = false; + Policy.PrintCanonicalTypes = true; for (ParmVarDecl *Param : K.SyclKernel->parameters()) { if (FirstParam) FirstParam = false; else ParmList += ", "; - ParmList += Param->getType().getCanonicalType().getAsString(); + ParmList += Param->getType().getCanonicalType().getAsString(Policy); } FunctionTemplateDecl *FTD = K.SyclKernel->getPrimaryTemplate(); + Policy.PrintCanonicalTypes = false; Policy.SuppressDefinition = true; Policy.PolishForDeclaration = true; + Policy.FullyQualifiedName = true; + Policy.EnforceScopeForElaboratedTypes = true; + + // Now we need to print the declaration of the kernel itself. + // Example: + // template struct Arg { + // T val; + // }; + // For the following free function kernel: + // template + // SYCL_EXT_ONEAPI_FUNCTION_PROPERTY( + // (ext::oneapi::experimental::nd_range_kernel<1>)) + // void foo(Arg arg) {} + // Integration header must contain the following declaration: + // template + // void foo(Arg arg); + // SuppressDefaultTemplateArguments is a downstream addition that suppresses + // default template arguments in the function declaration. It should be set + // to true to emit function declaration that won't cause any compilation + // errors when present in the integration header. + // To print Arg in the function declaration and shim functions we + // need to disable default arguments printing suppression via community flag + // SuppressDefaultTemplateArgs, otherwise they will be suppressed even for + // canonical types or if even written in the original source code. + Policy.SuppressDefaultTemplateArguments = true; + // EnforceDefaultTemplateArgs is a downstream addition that forces printing + // template arguments that match default template arguments while printing + // template-ids, even if the source code doesn't reference them. + Policy.EnforceDefaultTemplateArgs = true; if (FTD) { FTD->print(O, Policy); } else { @@ -6547,6 +6682,8 @@ void SYCLIntegrationHeader::emit(raw_ostream &O) { } O << ";\n"; O << "}\n"; + Policy.SuppressDefaultTemplateArgs = true; + Policy.EnforceDefaultTemplateArgs = false; // Generate is_kernel, is_single_task_kernel and nd_range_kernel functions. O << "namespace sycl {\n"; diff --git a/clang/test/CodeGenSYCL/free_function_default_template_arguments.cpp b/clang/test/CodeGenSYCL/free_function_default_template_arguments.cpp new file mode 100644 index 0000000000000..62a121d218b8b --- /dev/null +++ b/clang/test/CodeGenSYCL/free_function_default_template_arguments.cpp @@ -0,0 +1,132 @@ +// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -sycl-std=2020 -fsycl-int-header=%t.h %s +// RUN: FileCheck -input-file=%t.h %s + +// This test checks integration header contents for free functions kernels with +// parameter types that have default template arguments. + +#include "mock_properties.hpp" +#include "sycl.hpp" + +namespace ns { + +struct notatuple { + int a; +}; + +namespace ns1 { +template +class hasDefaultArg { + +}; +} + +template struct Arg { + T val; +}; + +[[__sycl_detail__::add_ir_attributes_function("sycl-single-task-kernel", + 2)]] void +simple(Arg){ +} + +} + +[[__sycl_detail__::add_ir_attributes_function("sycl-single-task-kernel", + 2)]] void +simple1(ns::Arg>){ +} + + +template +[[__sycl_detail__::add_ir_attributes_function("sycl-nd-range-kernel", 2)]] void +templated(ns::Arg, T end) { +} + +template void templated(ns::Arg, int); + +using namespace ns; + +template +[[__sycl_detail__::add_ir_attributes_function("sycl-nd-range-kernel", 2)]] void +templated2(Arg, T end) { +} + +template void templated2(Arg, int); + +template +[[__sycl_detail__::add_ir_attributes_function("sycl-nd-range-kernel", 2)]] void +templated3(Arg, int, int>, T end) { +} + +template void templated3(Arg, int, int>, int); + + +namespace sycl { +template struct X {}; +template <> struct X {}; +namespace detail { +struct Y {}; +} // namespace detail +template <> struct X {}; +} // namespace sycl +using namespace sycl; +template > struct Arg1 { T val; }; + +[[__sycl_detail__::add_ir_attributes_function("sycl-single-task-kernel", + 2)]] void +foo(Arg1 arg) { + arg.val = 42; +} + +// CHECK: Forward declarations of kernel and its argument types: +// CHECK-NEXT: namespace ns { +// CHECK-NEXT: struct notatuple; +// CHECK-NEXT: } +// CHECK-NEXT: namespace ns { +// CHECK-NEXT: template struct Arg; +// CHECK-NEXT: } + +// CHECK: void ns::simple(ns::Arg); +// CHECK-NEXT: static constexpr auto __sycl_shim1() { +// CHECK-NEXT: return (void (*)(struct ns::Arg))simple; +// CHECK-NEXT: } + +// CHECK: Forward declarations of kernel and its argument types: +// CHECK: namespace ns { +// CHECK: namespace ns1 { +// CHECK-NEXT: template class hasDefaultArg; +// CHECK-NEXT: } + +// CHECK: void simple1(ns::Arg, int, 12, ns::notatuple>); +// CHECK-NEXT: static constexpr auto __sycl_shim2() { +// CHECK-NEXT: return (void (*)(struct ns::Arg, int, 12, struct ns::notatuple>))simple1; +// CHECK-NEXT: } + +// CHECK: template void templated(ns::Arg, T end); +// CHECK-NEXT: static constexpr auto __sycl_shim3() { +// CHECK-NEXT: return (void (*)(struct ns::Arg, int))templated; +// CHECK-NEXT: } + +// CHECK: template void templated2(ns::Arg, T end); +// CHECK-NEXT: static constexpr auto __sycl_shim4() { +// CHECK-NEXT: return (void (*)(struct ns::Arg, int))templated2; +// CHECK-NEXT: } + +// CHECK: template void templated3(ns::Arg, int, int>, T end); +// CHECK-NEXT: static constexpr auto __sycl_shim5() { +// CHECK-NEXT: return (void (*)(struct ns::Arg, int, int>, int))templated3; +// CHECK-NEXT: } + +// CHECK Forward declarations of kernel and its argument types: +// CHECK: namespace sycl { namespace detail { +// CHECK-NEXT: struct Y; +// CHECK-NEXT: }} +// CHECK-NEXT: namespace sycl { +// CHECK-NEXT: template struct X; +// CHECK-NEXT: } +// CHECK-NEXT: template struct Arg1; + +// CHECK: void foo(Arg1 > arg); +// CHECK-NEXT: static constexpr auto __sycl_shim6() { +// CHECK-NEXT: return (void (*)(struct Arg1 >))foo; +// CHECK-NEXT: } diff --git a/clang/test/CodeGenSYCL/free_function_int_header.cpp b/clang/test/CodeGenSYCL/free_function_int_header.cpp index ccaf85aa897ca..6a196dedc2fc2 100644 --- a/clang/test/CodeGenSYCL/free_function_int_header.cpp +++ b/clang/test/CodeGenSYCL/free_function_int_header.cpp @@ -2,7 +2,7 @@ // RUN: FileCheck -input-file=%t.h %s // // This test checks integration header contents for free functions with scalar, -// pointer and non-decomposed struct parameters. +// pointer, non-decomposed struct parameters and work group memory parameters. #include "mock_properties.hpp" #include "sycl.hpp" @@ -96,6 +96,12 @@ void ff_7(KArgWithPtrArray KArg) { template void ff_7(KArgWithPtrArray KArg); +__attribute__((sycl_device)) +[[__sycl_detail__::add_ir_attributes_function("sycl-nd-range-kernel", 0)]] +void ff_8(sycl::work_group_memory) { +} + + // CHECK: const char* const kernel_names[] = { // CHECK-NEXT: {{.*}}__sycl_kernel_ff_2Piii // CHECK-NEXT: {{.*}}__sycl_kernel_ff_2Piiii @@ -105,6 +111,7 @@ template void ff_7(KArgWithPtrArray KArg); // CHECK-NEXT: {{.*}}__sycl_kernel_ff_410NoPointers8Pointers3Agg // CHECK-NEXT: {{.*}}__sycl_kernel_ff_6I3Agg7DerivedEvT_T0_i // CHECK-NEXT: {{.*}}__sycl_kernel_ff_7ILi3EEv16KArgWithPtrArrayIXT_EE +// CHECK-NEXT: {{.*}}__sycl_kernel_ff_8N4sycl3_V117work_group_memoryIiEE // CHECK-NEXT: "" // CHECK-NEXT: }; @@ -148,6 +155,9 @@ template void ff_7(KArgWithPtrArray KArg); // CHECK: //--- _Z18__sycl_kernel_ff_7ILi3EEv16KArgWithPtrArrayIXT_EE // CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 48, 0 }, +// CHECK: //--- _Z18__sycl_kernel_ff_8N4sycl3_V117work_group_memoryIiEE +// CHECK-NEXT: { kernel_param_kind_t::kind_work_group_memory, 8, 0 }, + // CHECK: { kernel_param_kind_t::kind_invalid, -987654321, -987654321 }, // CHECK-NEXT: }; @@ -294,6 +304,26 @@ template void ff_7(KArgWithPtrArray KArg); // CHECK-NEXT: }; // CHECK-NEXT: } +// CHECK: Definition of _Z18__sycl_kernel_ff_8N4sycl3_V117work_group_memoryIiEE as a free function kernel + +// CHECK: Forward declarations of kernel and its argument types: +// CHECK: template class work_group_memory; + +// CHECK: void ff_8(sycl::work_group_memory); +// CHECK-NEXT: static constexpr auto __sycl_shim9() { +// CHECK-NEXT: return (void (*)(class sycl::work_group_memory))ff_8; +// CHECK-NEXT: } +// CHECK-NEXT: namespace sycl { +// CHECK-NEXT: template <> +// CHECK-NEXT: struct ext::oneapi::experimental::is_kernel<__sycl_shim9()> { +// CHECK-NEXT: static constexpr bool value = true; +// CHECK-NEXT: }; +// CHECK-NEXT: template <> +// CHECK-NEXT: struct ext::oneapi::experimental::is_single_task_kernel<__sycl_shim9()> { +// CHECK-NEXT: static constexpr bool value = true; +// CHECK-NEXT: }; +// CHECK-NEXT: } + // CHECK: #include // CHECK: Definition of kernel_id of _Z18__sycl_kernel_ff_2Piii @@ -359,3 +389,11 @@ template void ff_7(KArgWithPtrArray KArg); // CHECK-NEXT: return sycl::detail::get_kernel_id_impl(std::string_view{"_Z18__sycl_kernel_ff_7ILi3EEv16KArgWithPtrArrayIXT_EE"}); // CHECK-NEXT: } // CHECK-NEXT: } + +// CHECK: Definition of kernel_id of _Z18__sycl_kernel_ff_8N4sycl3_V117work_group_memoryIiEE +// CHECK-NEXT: namespace sycl { +// CHECK-NEXT: template <> +// CHECK-NEXT: kernel_id ext::oneapi::experimental::get_kernel_id<__sycl_shim9()>() { +// CHECK-NEXT: return sycl::detail::get_kernel_id_impl(std::string_view{"_Z18__sycl_kernel_ff_8N4sycl3_V117work_group_memoryIiEE"}); +// CHECK-NEXT: } +// CHECK-NEXT: } diff --git a/clang/test/CodeGenSYCL/free_function_kernel_params.cpp b/clang/test/CodeGenSYCL/free_function_kernel_params.cpp index a11d55f483966..2e78116824ad2 100644 --- a/clang/test/CodeGenSYCL/free_function_kernel_params.cpp +++ b/clang/test/CodeGenSYCL/free_function_kernel_params.cpp @@ -1,7 +1,7 @@ // RUN: %clang_cc1 -internal-isystem %S/Inputs -fsycl-is-device -triple spir64 \ // RUN: -emit-llvm %s -o - | FileCheck %s // This test checks parameter IR generation for free functions with parameters -// of non-decomposed struct type. +// of non-decomposed struct type and work group memory type. #include "sycl.hpp" @@ -56,3 +56,18 @@ template void ff_6(KArgWithPtrArray KArg); // CHECK: %struct.KArgWithPtrArray = type { [3 x ptr addrspace(4)], [3 x i32], [3 x i32] } // CHECK: define dso_local spir_kernel void @{{.*}}__sycl_kernel{{.*}}(ptr noundef byval(%struct.NoPointers) align 4 %__arg_S1, ptr noundef byval(%struct.__generated_Pointers) align 8 %__arg_S2, ptr noundef byval(%struct.__generated_Agg) align 8 %__arg_S3) // CHECK: define dso_local spir_kernel void @{{.*}}__sycl_kernel_ff_6{{.*}}(ptr noundef byval(%struct.__generated_KArgWithPtrArray) align 8 %__arg_KArg) + +__attribute__((sycl_device)) +[[__sycl_detail__::add_ir_attributes_function("sycl-nd-range-kernel", 0)]] +void ff_7(sycl::work_group_memory mem) { +} + +// CHECK: define dso_local spir_kernel void @{{.*}}__sycl_kernel_ff_7{{.*}}(ptr addrspace(3) noundef align 4 %__arg_Ptr) +// CHECK: %__arg_Ptr.addr = alloca ptr addrspace(3), align 8 +// CHECK-NEXT: %mem = alloca %"class.sycl::_V1::work_group_memory", align 8 +// CHECK: %__arg_Ptr.addr.ascast = addrspacecast ptr %__arg_Ptr.addr to ptr addrspace(4) +// CHECK-NEXT: %mem.ascast = addrspacecast ptr %mem to ptr addrspace(4) +// CHECK: store ptr addrspace(3) %__arg_Ptr, ptr addrspace(4) %__arg_Ptr.addr.ascast, align 8 +// CHECK-NEXT: [[REGISTER:%[a-zA-Z0-9_]+]] = load ptr addrspace(3), ptr addrspace(4) %__arg_Ptr.addr.ascast, align 8 +// CHECK-NEXT: call spir_func void @{{.*}}work_group_memory{{.*}}__init{{.*}}(ptr addrspace(4) noundef align 8 dereferenceable_or_null(8) %mem.ascast, ptr addrspace(3) noundef [[REGISTER]]) + diff --git a/clang/test/Driver/sycl-instrumentation-old-model.c b/clang/test/Driver/sycl-instrumentation-old-model.c index 3e07d6d1ca298..bf1471fad84ef 100644 --- a/clang/test/Driver/sycl-instrumentation-old-model.c +++ b/clang/test/Driver/sycl-instrumentation-old-model.c @@ -20,12 +20,16 @@ // CHECK-SPIRV-SAME: "{{.*}}libsycl-itt-stubs.bc" // CHECK-HOST-NOT: "-cc1"{{.*}} "-fsycl-is-host"{{.*}} "-fsycl-instrument-device-code" -// ITT annotations in device code are disabled by default. +// ITT annotations in device code are disabled by default. However, for SYCL offloading, +// we still link ITT annotations libraries to ensure ABI compatibility with previous release. // RUN: %clangxx -fsycl --no-offload-new-driver -fsycl-targets=spir64 -### %s 2>&1 \ -// RUN: | FileCheck -check-prefixes=CHECK-NONPASSED %s +// RUN: | FileCheck -check-prefixes=CHECK-ITT-LINK-ONLY %s // RUN: %clangxx -fsycl --no-offload-new-driver -fsycl-targets=nvptx64-nvidia-cuda -nocudalib -### %s 2>&1 \ // RUN: | FileCheck -check-prefixes=CHECK-NONPASSED %s +// CHECK-ITT-LINK-ONLY-NOT: "-fsycl-instrument-device-code" +// CHECK-ITT-LINK-ONLY: llvm-link{{.*}} {{.*}}libsycl-itt-{{.*}} + // RUN: %clangxx -fsycl --no-offload-new-driver -fno-sycl-instrument-device-code -fsycl-targets=spir64 -### %s 2>&1 \ // RUN: | FileCheck -check-prefixes=CHECK-NONPASSED %s // RUN: %clangxx -fsycl --no-offload-new-driver -fsycl-targets=nvptx64-nvidia-cuda -fno-sycl-instrument-device-code -nocudalib -### %s 2>&1 \ diff --git a/clang/test/Driver/sycl-instrumentation.c b/clang/test/Driver/sycl-instrumentation.c index ccb3d857d46af..c2dbf8b6b83f7 100644 --- a/clang/test/Driver/sycl-instrumentation.c +++ b/clang/test/Driver/sycl-instrumentation.c @@ -19,12 +19,16 @@ // CHECK-SPIRV-SAME: libsycl-itt-compiler-wrappers.new.o // CHECK-SPIRV-SAME: libsycl-itt-stubs.new.o -// ITT annotations in device code are disabled by default. +// ITT annotations in device code are disabled by default. However, for SYCL offloading, +// we still link ITT annotations libraries to ensure ABI compatibility with previous release. // RUN: %clangxx -fsycl --offload-new-driver -fsycl-targets=spir64 -### %s 2>&1 \ -// RUN: | FileCheck -check-prefixes=CHECK-NONPASSED %s +// RUN: | FileCheck -check-prefixes=CHECK-ITT-LINK-ONLY %s // RUN: %clangxx -fsycl --offload-new-driver -fsycl-targets=nvptx64-nvidia-cuda -nocudalib -### %s 2>&1 \ // RUN: | FileCheck -check-prefixes=CHECK-NONPASSED %s +// CHECK-ITT-LINK-ONLY-NOT: "-fsycl-instrument-device-code" +// CHECK-ITT-LINK-ONLY: clang-linker-wrapper{{.*}} {{.*}}libsycl-itt-{{.*}} + // RUN: %clangxx -fsycl --offload-new-driver -fno-sycl-instrument-device-code -fsycl-targets=spir64 -### %s 2>&1 \ // RUN: | FileCheck -check-prefixes=CHECK-NONPASSED %s // RUN: %clangxx -fsycl --offload-new-driver -fsycl-targets=nvptx64-nvidia-cuda -fno-sycl-instrument-device-code -nocudalib -### %s 2>&1 \ diff --git a/clang/test/Driver/sycl-offload-new-driver.c b/clang/test/Driver/sycl-offload-new-driver.c index b6732b3e9312e..dd656192b80f3 100644 --- a/clang/test/Driver/sycl-offload-new-driver.c +++ b/clang/test/Driver/sycl-offload-new-driver.c @@ -34,7 +34,7 @@ // RUN: %clangxx --target=x86_64-unknown-linux-gnu -fsycl --offload-new-driver \ // RUN: --sysroot=%S/Inputs/SYCL -### %s 2>&1 \ // RUN: | FileCheck -check-prefix WRAPPER_OPTIONS %s -// WRAPPER_OPTIONS: clang-linker-wrapper{{.*}} "-sycl-device-libraries=libsycl-crt.new.o,libsycl-complex.new.o,libsycl-complex-fp64.new.o,libsycl-cmath.new.o,libsycl-cmath-fp64.new.o,libsycl-imf.new.o,libsycl-imf-fp64.new.o,libsycl-imf-bf16.new.o,libsycl-fallback-cassert.new.o,libsycl-fallback-cstring.new.o,libsycl-fallback-complex.new.o,libsycl-fallback-complex-fp64.new.o,libsycl-fallback-cmath.new.o,libsycl-fallback-cmath-fp64.new.o,libsycl-fallback-imf.new.o,libsycl-fallback-imf-fp64.new.o,libsycl-fallback-imf-bf16.new.o" +// WRAPPER_OPTIONS: clang-linker-wrapper{{.*}} "-sycl-device-libraries=libsycl-crt.new.o,libsycl-complex.new.o,libsycl-complex-fp64.new.o,libsycl-cmath.new.o,libsycl-cmath-fp64.new.o,libsycl-imf.new.o,libsycl-imf-fp64.new.o,libsycl-imf-bf16.new.o,libsycl-fallback-cassert.new.o,libsycl-fallback-cstring.new.o,libsycl-fallback-complex.new.o,libsycl-fallback-complex-fp64.new.o,libsycl-fallback-cmath.new.o,libsycl-fallback-cmath-fp64.new.o,libsycl-fallback-imf.new.o,libsycl-fallback-imf-fp64.new.o,libsycl-fallback-imf-bf16.new.o,libsycl-itt-user-wrappers.new.o,libsycl-itt-compiler-wrappers.new.o,libsycl-itt-stubs.new.o" // WRAPPER_OPTIONS-SAME: "-sycl-device-library-location={{.*}}/lib" /// Verify phases used to generate SPIR-V instead of LLVM-IR diff --git a/clang/test/Driver/sycl-spirv-default-options-old-model.c b/clang/test/Driver/sycl-spirv-default-options-old-model.c new file mode 100644 index 0000000000000..66f63a69737e5 --- /dev/null +++ b/clang/test/Driver/sycl-spirv-default-options-old-model.c @@ -0,0 +1,36 @@ +// Test for default llvm-spirv options + +// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl --no-offload-new-driver -fsycl-targets=spir64-unknown-unknown %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefixes=CHECK-DEFAULT + +// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl --no-offload-new-driver -fsycl-targets=spir64-unknown-unknown %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefixes=CHECK-DEFAULT +// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl --no-offload-new-driver -fsycl-targets=spir64_fpga-unknown-unknown %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefixes=CHECK-DEFAULT +// RUN: %clang -target x86_64-unknown-linux-gnu -fintelfpga %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefixes=CHECK-DEFAULT +// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl --no-offload-new-driver -fsycl-targets=spir64_fpga-unknown-unknown -Xshardware %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefixes=CHECK-FPGA-HW +// RUN: %clang -target x86_64-unknown-linux-gnu -fintelfpga -Xshardware %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefixes=CHECK-FPGA-HW +// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl --no-offload-new-driver -fsycl-targets=spir64_fpga-unknown-unknown -Xssimulation %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefixes=CHECK-FPGA-HW +// RUN: %clang -target x86_64-unknown-linux-gnu -fintelfpga -Xssimulation %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefixes=CHECK-FPGA-HW +// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl --no-offload-new-driver -fsycl-targets=spir64_fpga-unknown-unknown -Xsemulator %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefixes=CHECK-DEFAULT +// RUN: %clang -target x86_64-unknown-linux-gnu -fintelfpga -Xsemulator %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefixes=CHECK-DEFAULT +// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl --no-offload-new-driver -fsycl-targets=spir64_gen-unknown-unknown %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefixes=CHECK-DEFAULT +// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl --no-offload-new-driver -fsycl-targets=spir64_gen-unknown-unknown %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefixes=CHECK-DEFAULT +// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl --no-offload-new-driver -fsycl-targets=spir64_x86_64-unknown-unknown %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefixes=CHECK-DEFAULT + +// CHECK-DEFAULT: llvm-spirv{{.*}}-spirv-debug-info-version=nonsemantic-shader-200 +// CHECK-DEFAULT-NOT: -ocl-100 + +// CHECL-FPGA-HW: llvm-spirv{{.*}}-ocl-100 +// CHECK-FPGA-HW-NOT: spirv-debug-info-version=nonsemantic-shader-200 + diff --git a/clang/test/Driver/sycl-spirv-default-options.c b/clang/test/Driver/sycl-spirv-default-options.c new file mode 100644 index 0000000000000..c3ced7858e52f --- /dev/null +++ b/clang/test/Driver/sycl-spirv-default-options.c @@ -0,0 +1,17 @@ +// Generate .o file as SYCL device library file. +// +// RUN: touch %t.devicelib.cpp +// RUN: %clang %t.devicelib.cpp -fsycl -fsycl-targets=spir64-unknown-unknown -c --offload-new-driver -o %t_1.devicelib.o +// RUN: %clang %t.devicelib.cpp -fsycl -fsycl-targets=spir64_gen-unknown-unknown -c --offload-new-driver -o %t_2.devicelib.o +// RUN: %clang %t.devicelib.cpp -fsycl -fsycl-targets=spir64_x86_64-unknown-unknown -c --offload-new-driver -o %t_3.devicelib.o + +// Test for default llvm-spirv options + +// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl --offload-new-driver \ +// RUN: -fsycl-targets=spir64-unknown-unknown -c %s -o %t_1.o +// RUN: clang-linker-wrapper -sycl-device-libraries=%t_1.devicelib.o \ +// RUN: "--host-triple=x86_64-unknown-linux-gnu" "--linker-path=/usr/bin/ld" \ +// RUN: "--" "-o" "a.out" %t_1.o --dry-run 2>&1 | FileCheck %s + +// CHECK: llvm-spirv{{.*}}-spirv-debug-info-version=nonsemantic-shader-200 +// CHECK-NOT: ocl-100 diff --git a/clang/test/SemaSYCL/free_function_kernel_params.cpp b/clang/test/SemaSYCL/free_function_kernel_params.cpp index 2de4f896a1513..da229145a34ad 100644 --- a/clang/test/SemaSYCL/free_function_kernel_params.cpp +++ b/clang/test/SemaSYCL/free_function_kernel_params.cpp @@ -1,7 +1,7 @@ // RUN: %clang_cc1 -internal-isystem %S/Inputs -fsycl-is-device -ast-dump \ // RUN: %s -o - | FileCheck %s // This test checks parameter rewriting for free functions with parameters -// of type scalar, pointer and non-decomposed struct. +// of type scalar, pointer, non-decomposed struct and work group memory. #include "sycl.hpp" @@ -171,3 +171,23 @@ template void ff_6(Agg S1, Derived1 S2, int); // CHECK-NEXT: DeclRefExpr {{.*}} '__generated_Derived1' lvalue ParmVar {{.*}} '__arg_S2' '__generated_Derived1' // CHECK-NEXT: ImplicitCastExpr {{.*}} 'int' // CHECK-NEXT: DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} '__arg_end' 'int' + +__attribute__((sycl_device)) +[[__sycl_detail__::add_ir_attributes_function("sycl-nd-range-kernel", 0)]] +void ff_7(sycl::work_group_memory mem) { +} +// CHECK: FunctionDecl {{.*}}__sycl_kernel{{.*}}'void (__local int *)' +// CHECK-NEXT: ParmVarDecl {{.*}} used __arg_Ptr '__local int *' +// CHECK-NEXT: CompoundStmt +// CHECK-NEXT: DeclStmt +// CHECK-NEXT: VarDecl {{.*}} used mem 'sycl::work_group_memory' callinit +// CHECK-NEXT: CXXConstructExpr {{.*}} 'sycl::work_group_memory' 'void () noexcept' +// CHECK-NEXT: CXXMemberCallExpr {{.*}} 'void' +// CHECK-NEXT: MemberExpr {{.*}} 'void (__local int *)' lvalue .__init +// CHECK-NEXT: DeclRefExpr {{.*}} 'sycl::work_group_memory' Var {{.*}} 'mem' 'sycl::work_group_memory' +// CHECK-NEXT: ImplicitCastExpr {{.*}} '__local int *' +// CHECK-NEXT: DeclRefExpr {{.*}} '__local int *' lvalue ParmVar {{.*}} '__arg_Ptr' '__local int *' +// CHECK-NEXT: CallExpr {{.*}} 'void' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'void (*)(sycl::work_group_memory)' +// CHECK-NEXT: DeclRefExpr {{.*}} 'void (sycl::work_group_memory)' lvalue Function {{.*}} 'ff_7' 'void (sycl::work_group_memory)' +// CHECK-NEXT: DeclRefExpr {{.*}} 'sycl::work_group_memory' Var {{.*}} 'mem' 'sycl::work_group_memory' diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp index 2f7993c6411f4..e4a37591384c7 100644 --- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp +++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp @@ -830,19 +830,7 @@ getTripleBasedSPIRVTransOpts(const ArgList &Args, const llvm::Triple Triple) { bool IsCPU = Triple.isSPIR() && Triple.getSubArch() == llvm::Triple::SPIRSubArch_x86_64; - // Enable NonSemanticShaderDebugInfo.200 for CPU AOT and for non-Windows - const bool IsWindowsMSVC = Triple.isWindowsMSVCEnvironment() || - Args.hasArg(OPT_sycl_is_windows_msvc_env); - const bool EnableNonSemanticDebug = IsCPU || !IsWindowsMSVC; - if (EnableNonSemanticDebug) { - TranslatorArgs.push_back( - "-spirv-debug-info-version=nonsemantic-shader-200"); - } else { - TranslatorArgs.push_back("-spirv-debug-info-version=ocl-100"); - // Prevent crash in the translator if input IR contains DIExpression - // operations which don't have mapping to OpenCL.DebugInfo.100 spec. - TranslatorArgs.push_back("-spirv-allow-extra-diexpressions"); - } + TranslatorArgs.push_back("-spirv-debug-info-version=nonsemantic-shader-200"); std::string UnknownIntrinsics("-spirv-allow-unknown-intrinsics=llvm.genx."); if (IsCPU) UnknownIntrinsics += ",llvm.fpbuiltin"; diff --git a/devops/dependencies-igc-dev.json b/devops/dependencies-igc-dev.json index 6f3cbc7da4273..72a9ef49b2e89 100644 --- a/devops/dependencies-igc-dev.json +++ b/devops/dependencies-igc-dev.json @@ -1,10 +1,10 @@ { "linux": { "igc_dev": { - "github_tag": "igc-dev-ad75a20", - "version": "ad75a20", - "updated_at": "2024-11-10T01:11:34Z", - "url": "https://api.github.com/repos/intel/intel-graphics-compiler/actions/artifacts/2167439771/zip", + "github_tag": "igc-dev-0b4b682", + "version": "0b4b682", + "updated_at": "2024-11-17T01:09:50Z", + "url": "https://api.github.com/repos/intel/intel-graphics-compiler/actions/artifacts/2197388704/zip", "root": "{DEPS_ROOT}/opencl/runtime/linux/oclgpu" } } diff --git a/devops/dependencies.json b/devops/dependencies.json index 944a3ffb7e8d2..755a1f10625fb 100644 --- a/devops/dependencies.json +++ b/devops/dependencies.json @@ -25,21 +25,21 @@ "root": "{DEPS_ROOT}/opencl/runtime/linux/oclgpu" }, "tbb": { - "github_tag": "v2021.12.0", - "version": "2021.12.0", - "url": "https://github.com/oneapi-src/oneTBB/releases/download/v2021.12.0/oneapi-tbb-2021.12.0-lin.tgz", + "github_tag": "v2022.0.0", + "version": "2022.0.0", + "url": "https://github.com/oneapi-src/oneTBB/releases/download/v2022.0.0/oneapi-tbb-2022.0.0-lin.tgz", "root": "{DEPS_ROOT}/tbb/lin" }, "oclcpu": { - "github_tag": "2024-WW25", - "version": "2024.18.6.0.02", - "url": "https://github.com/intel/llvm/releases/download/2024-WW25/oclcpuexp-2024.18.6.0.02_rel.tar.gz", + "github_tag": "2024-WW43", + "version": "2024.18.10.0.08", + "url": "https://github.com/intel/llvm/releases/download/2024-WW43/oclcpuexp-2024.18.10.0.08_rel.tar.gz", "root": "{DEPS_ROOT}/opencl/runtime/linux/oclcpu" }, "fpgaemu": { - "github_tag": "2024-WW25", - "version": "2024.18.6.0.02", - "url": "https://github.com/intel/llvm/releases/download/2024-WW25/fpgaemu-2024.18.6.0.02_rel.tar.gz", + "github_tag": "2024-WW43", + "version": "2024.18.10.0.08", + "url": "https://github.com/intel/llvm/releases/download/2024-WW43/fpgaemu-2024.18.10.0.08_rel.tar.gz", "root": "{DEPS_ROOT}/opencl/runtime/linux/oclfpgaemu" }, "fpga": { @@ -53,21 +53,21 @@ "root": "" }, "tbb": { - "github_tag": "v2021.12.0", - "version": "2021.12.0", - "url": "https://github.com/oneapi-src/oneTBB/releases/download/v2021.12.0/oneapi-tbb-2021.12.0-win.zip", + "github_tag": "v2022.0.0", + "version": "2022.0.0", + "url": "https://github.com/oneapi-src/oneTBB/releases/download/v2022.0.0/oneapi-tbb-2022.0.0-win.zip", "root": "{DEPS_ROOT}/tbb/win" }, "oclcpu": { - "github_tag": "2024-WW25", - "version": "2024.18.6.0.02", - "url": "https://github.com/intel/llvm/releases/download/2024-WW25/win-oclcpuexp-2024.18.6.0.02_rel.zip", + "github_tag": "2024-WW43", + "version": "2024.18.10.0.08", + "url": "https://github.com/intel/llvm/releases/download/2024-WW43/win-oclcpuexp-2024.18.10.0.08_rel.zip", "root": "{DEPS_ROOT}/opencl/runtime/linux/oclcpu" }, "fpgaemu": { - "github_tag": "2024-WW25", - "version": "2024.18.6.0.02", - "url": "https://github.com/intel/llvm/releases/download/2024-WW25/win-fpgaemu-2024.18.6.0.02_rel.zip", + "github_tag": "2024-WW43", + "version": "2024.18.10.0.08", + "url": "https://github.com/intel/llvm/releases/download/2024-WW43/win-fpgaemu-2024.18.10.0.08_rel.zip", "root": "{DEPS_ROOT}/opencl/runtime/linux/oclfpgaemu" }, "fpga": { diff --git a/devops/scripts/install_drivers.sh b/devops/scripts/install_drivers.sh index f27c7f9c471e7..6efbc792d9ffc 100755 --- a/devops/scripts/install_drivers.sh +++ b/devops/scripts/install_drivers.sh @@ -81,6 +81,10 @@ InstallTBB () { if [ "$TBB_INSTALLED" = false ]; then mkdir -p $INSTALL_LOCATION cd $INSTALL_LOCATION + if [ -d "$INSTALL_LOCATION/oneapi-tbb" ]; then + echo "$INSTALL_LOCATION/oneapi-tbb exists and will be removed!" + rm -Rf $INSTALL_LOCATION/oneapi-tbb; + fi echo "Installing TBB..." echo "TBB version $TBB_TAG" get_release oneapi-src/onetbb $TBB_TAG \ diff --git a/libdevice/nativecpu_utils.cpp b/libdevice/nativecpu_utils.cpp index 65e6c4c1fd0d1..c3e8bb61657a7 100644 --- a/libdevice/nativecpu_utils.cpp +++ b/libdevice/nativecpu_utils.cpp @@ -17,7 +17,7 @@ #include "device.h" #include #include -#include +#include // including state definition from Native CPU UR adapter #include "nativecpu_state.hpp" diff --git a/llvm/include/llvm/SYCLLowerIR/SanitizeDeviceGlobal.h b/llvm/include/llvm/SYCLLowerIR/SanitizeDeviceGlobal.h deleted file mode 100644 index a0e7b2999b480..0000000000000 --- a/llvm/include/llvm/SYCLLowerIR/SanitizeDeviceGlobal.h +++ /dev/null @@ -1,23 +0,0 @@ -//===-- SanitizeDeviceGlobal.h - instrument device global for sanitizer ---===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// This pass adds red zone to each image scope device global and record the -// information like size, red zone size and beginning address. The information -// will be used by address sanitizer. -//===----------------------------------------------------------------------===// - -#include "llvm/IR/PassManager.h" - -namespace llvm { - -class SanitizeDeviceGlobalPass - : public PassInfoMixin { -public: - PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM); -}; - -} // namespace llvm diff --git a/llvm/lib/SYCLLowerIR/CMakeLists.txt b/llvm/lib/SYCLLowerIR/CMakeLists.txt index 0ce2a91f91a29..9f0b7fe7e43b0 100644 --- a/llvm/lib/SYCLLowerIR/CMakeLists.txt +++ b/llvm/lib/SYCLLowerIR/CMakeLists.txt @@ -69,7 +69,6 @@ add_llvm_component_library(LLVMSYCLLowerIR SYCLPropagateJointMatrixUsage.cpp SYCLVirtualFunctionsAnalysis.cpp SYCLUtils.cpp - SanitizeDeviceGlobal.cpp LocalAccessorToSharedMemory.cpp GlobalOffset.cpp diff --git a/llvm/lib/SYCLLowerIR/SYCLJointMatrixTransform.cpp b/llvm/lib/SYCLLowerIR/SYCLJointMatrixTransform.cpp index 629b27d61f24b..231ec9a818c19 100644 --- a/llvm/lib/SYCLLowerIR/SYCLJointMatrixTransform.cpp +++ b/llvm/lib/SYCLLowerIR/SYCLJointMatrixTransform.cpp @@ -26,8 +26,9 @@ static constexpr char MATRIX_TYPE[] = "spirv.CooperativeMatrixKHR"; // its users and operands to make LLVM IR more SPIR-V friendly. bool transformAccessChain(Function *F) { bool ModuleChanged = false; - for (auto I : F->users()) { - auto *CI = dyn_cast(I); + for (auto I = F->user_begin(), E = F->user_end(); I != E;) { + User *U = *I++; + auto *CI = dyn_cast(U); if (!CI) continue; diff --git a/llvm/lib/SYCLLowerIR/SanitizeDeviceGlobal.cpp b/llvm/lib/SYCLLowerIR/SanitizeDeviceGlobal.cpp deleted file mode 100644 index 81415b0f6f9dc..0000000000000 --- a/llvm/lib/SYCLLowerIR/SanitizeDeviceGlobal.cpp +++ /dev/null @@ -1,144 +0,0 @@ -//===-- SanitizeDeviceGlobal.cpp - instrument device global for sanitizer -===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// This pass adds red zone to each image scope device global and record the -// information like size, red zone size and beginning address. The information -// will be used by address sanitizer. -// TODO: Do this in AddressSanitizer pass when urProgramGetGlobalVariablePointer -// is implemented. -//===----------------------------------------------------------------------===// - -#include "llvm/SYCLLowerIR/SanitizeDeviceGlobal.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/SYCLLowerIR/DeviceGlobals.h" - -#define DEBUG_TYPE "SanitizeDeviceGlobal" - -using namespace llvm; - -namespace { - -// Add extra red zone to each image scope device globals if the module has been -// instrumented by sanitizer pass. And record their infomation like size, red -// zone size, beginning address. -static bool instrumentDeviceGlobal(Module &M) { - auto &DL = M.getDataLayout(); - IRBuilder<> IRB(M.getContext()); - SmallVector GlobalsToRemove; - SmallVector NewDeviceGlobals; - SmallVector DeviceGlobalMetadata; - - constexpr uint64_t MaxRZ = 1 << 18; - constexpr uint64_t MinRZ = 32; - - Type *IntTy = Type::getIntNTy(M.getContext(), DL.getPointerSizeInBits()); - - // Device global meta data is described by a structure - // size_t device_global_size - // size_t device_global_size_with_red_zone - // size_t beginning address of the device global - StructType *StructTy = StructType::get(IntTy, IntTy, IntTy); - - for (auto &G : M.globals()) { - // Non image scope device globals are implemented by device USM, and the - // out-of-bounds check for them will be done by sanitizer USM part. So we - // exclude them here. - if (!isDeviceGlobalVariable(G) || !hasDeviceImageScopeProperty(G)) - continue; - - Type *Ty = G.getValueType(); - const uint64_t SizeInBytes = DL.getTypeAllocSize(Ty); - const uint64_t RightRedzoneSize = [&] { - // The algorithm for calculating red zone size comes from - // llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp - uint64_t RZ = 0; - if (SizeInBytes <= MinRZ / 2) { - // Reduce redzone size for small size objects, e.g. int, char[1]. - // Optimize when SizeInBytes is less than or equal to half of MinRZ. - RZ = MinRZ - SizeInBytes; - } else { - // Calculate RZ, where MinRZ <= RZ <= MaxRZ, and RZ ~ 1/4 * - // SizeInBytes. - RZ = std::clamp((SizeInBytes / MinRZ / 4) * MinRZ, MinRZ, MaxRZ); - - // Round up to multiple of MinRZ. - if (SizeInBytes % MinRZ) - RZ += MinRZ - (SizeInBytes % MinRZ); - } - - assert((RZ + SizeInBytes) % MinRZ == 0); - return RZ; - }(); - Type *RightRedZoneTy = ArrayType::get(IRB.getInt8Ty(), RightRedzoneSize); - StructType *NewTy = StructType::get(Ty, RightRedZoneTy); - Constant *NewInitializer = ConstantStruct::get( - NewTy, G.getInitializer(), Constant::getNullValue(RightRedZoneTy)); - - // Create a new global variable with enough space for a redzone. - GlobalVariable *NewGlobal = new GlobalVariable( - M, NewTy, G.isConstant(), G.getLinkage(), NewInitializer, "", &G, - G.getThreadLocalMode(), G.getAddressSpace()); - NewGlobal->copyAttributesFrom(&G); - NewGlobal->setComdat(G.getComdat()); - NewGlobal->setAlignment(Align(MinRZ)); - NewGlobal->copyMetadata(&G, 0); - - Value *Indices2[2]; - Indices2[0] = IRB.getInt32(0); - Indices2[1] = IRB.getInt32(0); - - G.replaceAllUsesWith( - ConstantExpr::getGetElementPtr(NewTy, NewGlobal, Indices2, true)); - NewGlobal->takeName(&G); - GlobalsToRemove.push_back(&G); - NewDeviceGlobals.push_back(NewGlobal); - DeviceGlobalMetadata.push_back(ConstantStruct::get( - StructTy, ConstantInt::get(IntTy, SizeInBytes), - ConstantInt::get(IntTy, SizeInBytes + RightRedzoneSize), - ConstantExpr::getPointerCast(NewGlobal, IntTy))); - } - - if (GlobalsToRemove.empty()) - return false; - - // Create global to record number of device globals - GlobalVariable *NumOfDeviceGlobals = new GlobalVariable( - M, IntTy, false, GlobalValue::ExternalLinkage, - ConstantInt::get(IntTy, NewDeviceGlobals.size()), - "__AsanDeviceGlobalCount", nullptr, GlobalValue::NotThreadLocal, 1); - NumOfDeviceGlobals->setUnnamedAddr(GlobalValue::UnnamedAddr::Local); - - // Create meta data global to record device globals' information - ArrayType *ArrayTy = ArrayType::get(StructTy, NewDeviceGlobals.size()); - Constant *MetadataInitializer = - ConstantArray::get(ArrayTy, DeviceGlobalMetadata); - GlobalVariable *AsanDeviceGlobalMetadata = new GlobalVariable( - M, MetadataInitializer->getType(), false, GlobalValue::ExternalLinkage, - MetadataInitializer, "__AsanDeviceGlobalMetadata", nullptr, - GlobalValue::NotThreadLocal, 1); - AsanDeviceGlobalMetadata->setUnnamedAddr(GlobalValue::UnnamedAddr::Local); - - for (auto *G : GlobalsToRemove) - G->eraseFromParent(); - - return true; -} - -} - -namespace llvm { - -PreservedAnalyses SanitizeDeviceGlobalPass::run(Module &M, - ModuleAnalysisManager &MAM) { - bool Modified = false; - - Modified |= instrumentDeviceGlobal(M); - - return Modified ? PreservedAnalyses::none() : PreservedAnalyses::all(); -} - -} diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 84d839ee2fba9..67e98bb67013f 100644 --- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -465,6 +465,10 @@ static cl::opt cl::desc("instrument generic pointer"), cl::Hidden, cl::init(true)); +static cl::opt ClDeviceGlobals("asan-device-globals", + cl::desc("instrument device globals"), + cl::Hidden, cl::init(true)); + // Debug flags. static cl::opt ClDebug("asan-debug", cl::desc("debug"), cl::Hidden, @@ -970,6 +974,7 @@ class ModuleAddressSanitizer { private: void initializeCallbacks(); + void instrumentDeviceGlobal(IRBuilder<> &IRB); void instrumentGlobals(IRBuilder<> &IRB, bool *CtorComdat); void InstrumentGlobalsCOFF(IRBuilder<> &IRB, ArrayRef ExtendedGlobals, @@ -1556,12 +1561,27 @@ static bool isJointMatrixAccess(Value *V) { return false; } +static bool isUnsupportedDeviceGlobal(GlobalVariable *G) { + // Non image scope device globals are implemented by device USM, and the + // out-of-bounds check for them will be done by sanitizer USM part. So we + // exclude them here. + if (!G->hasAttribute("sycl-device-image-scope")) + return true; + + Attribute Attr = G->getAttribute("sycl-device-image-scope"); + return (!Attr.isStringAttribute() || Attr.getValueAsString() == "false"); +} + static bool isUnsupportedSPIRAccess(Value *Addr, Instruction *Inst) { // Skip SPIR-V built-in varibles auto *OrigValue = Addr->stripInBoundsOffsets(); if (OrigValue->getName().starts_with("__spirv_BuiltIn")) return true; + GlobalVariable *GV = dyn_cast(OrigValue); + if (GV && isUnsupportedDeviceGlobal(GV)) + return true; + // Ignore load/store for target ext type since we can't know exactly what size // it is. if (auto *SI = dyn_cast(Inst)) @@ -2766,6 +2786,71 @@ Instruction *ModuleAddressSanitizer::CreateAsanModuleDtor() { return ReturnInst::Create(*C, AsanDtorBB); } +void ModuleAddressSanitizer::instrumentDeviceGlobal(IRBuilder<> &IRB) { + auto &DL = M.getDataLayout(); + SmallVector GlobalsToRemove; + SmallVector DeviceGlobalMetadata; + + Type *IntptrTy = M.getDataLayout().getIntPtrType(*C, kSpirOffloadGlobalAS); + + // Device global meta data is described by a structure + // size_t device_global_size + // size_t device_global_size_with_red_zone + // size_t beginning address of the device global + StructType *StructTy = StructType::get(IntptrTy, IntptrTy, IntptrTy); + + for (auto &G : M.globals()) { + if (isUnsupportedDeviceGlobal(&G)) + continue; + + Type *Ty = G.getValueType(); + const uint64_t SizeInBytes = DL.getTypeAllocSize(Ty); + const uint64_t RightRedzoneSize = getRedzoneSizeForGlobal(SizeInBytes); + Type *RightRedZoneTy = ArrayType::get(IRB.getInt8Ty(), RightRedzoneSize); + StructType *NewTy = StructType::get(Ty, RightRedZoneTy); + Constant *NewInitializer = ConstantStruct::get( + NewTy, G.getInitializer(), Constant::getNullValue(RightRedZoneTy)); + + // Create a new global variable with enough space for a redzone. + GlobalVariable *NewGlobal = new GlobalVariable( + M, NewTy, G.isConstant(), G.getLinkage(), NewInitializer, "", &G, + G.getThreadLocalMode(), G.getAddressSpace()); + NewGlobal->copyAttributesFrom(&G); + NewGlobal->setComdat(G.getComdat()); + NewGlobal->setAlignment(Align(getMinRedzoneSizeForGlobal())); + NewGlobal->copyMetadata(&G, 0); + + Value *Indices2[2]; + Indices2[0] = IRB.getInt32(0); + Indices2[1] = IRB.getInt32(0); + + G.replaceAllUsesWith( + ConstantExpr::getGetElementPtr(NewTy, NewGlobal, Indices2, true)); + NewGlobal->takeName(&G); + GlobalsToRemove.push_back(&G); + DeviceGlobalMetadata.push_back(ConstantStruct::get( + StructTy, ConstantInt::get(IntptrTy, SizeInBytes), + ConstantInt::get(IntptrTy, SizeInBytes + RightRedzoneSize), + ConstantExpr::getPointerCast(NewGlobal, IntptrTy))); + } + + if (GlobalsToRemove.empty()) + return; + + // Create meta data global to record device globals' information + ArrayType *ArrayTy = ArrayType::get(StructTy, DeviceGlobalMetadata.size()); + Constant *MetadataInitializer = + ConstantArray::get(ArrayTy, DeviceGlobalMetadata); + GlobalVariable *AsanDeviceGlobalMetadata = new GlobalVariable( + M, MetadataInitializer->getType(), false, GlobalValue::AppendingLinkage, + MetadataInitializer, "__AsanDeviceGlobalMetadata", nullptr, + GlobalValue::NotThreadLocal, 1); + AsanDeviceGlobalMetadata->setUnnamedAddr(GlobalValue::UnnamedAddr::Local); + + for (auto *G : GlobalsToRemove) + G->eraseFromParent(); +} + void ModuleAddressSanitizer::InstrumentGlobalsCOFF( IRBuilder<> &IRB, ArrayRef ExtendedGlobals, ArrayRef MetadataInitializers) { @@ -3234,6 +3319,11 @@ bool ModuleAddressSanitizer::instrumentModule() { auto *MD = M.getOrInsertNamedMetadata("device.sanitizer"); Metadata *MDVals[] = {MDString::get(Ctx, "asan")}; MD->addOperand(MDNode::get(Ctx, MDVals)); + + if (ClDeviceGlobals) { + IRBuilder<> IRB(*C); + instrumentDeviceGlobal(IRB); + } } const uint64_t Priority = GetCtorAndDtorPriority(TargetTriple); diff --git a/llvm/test/Instrumentation/AddressSanitizer/SPIRV/device_global.ll b/llvm/test/Instrumentation/AddressSanitizer/SPIRV/device_global.ll new file mode 100644 index 0000000000000..a30eca4bc75be --- /dev/null +++ b/llvm/test/Instrumentation/AddressSanitizer/SPIRV/device_global.ll @@ -0,0 +1,13 @@ +; RUN: opt < %s -passes=asan -asan-instrumentation-with-call-threshold=0 -asan-stack=0 -asan-globals=0 -asan-constructor-kind=none -S | FileCheck %s + +; check that image scope device globals can be correctly instrumented. + +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" +target triple = "spir64-unknown-unknown" + +@dev_global = addrspace(1) global { [4 x i32] } zeroinitializer #0 + +; CHECK: @dev_global = addrspace(1) global { { [4 x i32] }, [16 x i8] } +; CHECK: @__AsanDeviceGlobalMetadata = appending local_unnamed_addr addrspace(1) global [1 x { i64, i64, i64 }] [{ i64, i64, i64 } { i64 16, i64 32, i64 ptrtoint (ptr addrspace(1) @dev_global to i64) }] + +attributes #0 = { "sycl-device-global-size"="16" "sycl-device-image-scope" } diff --git a/llvm/test/Instrumentation/AddressSanitizer/SPIRV/device_global_non_image_scope.ll b/llvm/test/Instrumentation/AddressSanitizer/SPIRV/device_global_non_image_scope.ll new file mode 100644 index 0000000000000..735c437c47169 --- /dev/null +++ b/llvm/test/Instrumentation/AddressSanitizer/SPIRV/device_global_non_image_scope.ll @@ -0,0 +1,11 @@ +; RUN: opt < %s -passes=asan -asan-instrumentation-with-call-threshold=0 -asan-stack=0 -asan-globals=0 -asan-constructor-kind=none -S | FileCheck %s + +; check non image scope device globals will not be instrumented. + +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" +target triple = "spir64-unknown-unknown" + +@dev_global = addrspace(1) global { ptr addrspace(1), [4 x i32] } zeroinitializer + +; CHECK: @dev_global = addrspace(1) global { ptr addrspace(1), [4 x i32] } +; CHECK-NOT: @__AsanDeviceGlobalMetadata diff --git a/llvm/tools/sycl-post-link/sycl-post-link.cpp b/llvm/tools/sycl-post-link/sycl-post-link.cpp index 3800c5875e44f..0a15c42dc4333 100644 --- a/llvm/tools/sycl-post-link/sycl-post-link.cpp +++ b/llvm/tools/sycl-post-link/sycl-post-link.cpp @@ -40,7 +40,6 @@ #include "llvm/SYCLLowerIR/ModuleSplitter.h" #include "llvm/SYCLLowerIR/SYCLJointMatrixTransform.h" #include "llvm/SYCLLowerIR/SYCLUtils.h" -#include "llvm/SYCLLowerIR/SanitizeDeviceGlobal.h" #include "llvm/SYCLLowerIR/SpecConstants.h" #include "llvm/SYCLLowerIR/Support.h" #include "llvm/Support/CommandLine.h" @@ -791,11 +790,6 @@ processInputModule(std::unique_ptr M) { if (M->getTargetTriple().find("spir") != std::string::npos) Modified |= removeDeviceGlobalFromCompilerUsed(*M.get()); - // Instrument each image scope device globals if the module has been - // instrumented by sanitizer pass. - if (isModuleUsingAsan(*M)) - Modified |= runModulePass(*M); - // Transform Joint Matrix builtin calls to align them with SPIR-V friendly // LLVM IR specification. Modified |= runModulePass(*M); diff --git a/sycl/cmake/modules/UnifiedRuntimeTag.cmake b/sycl/cmake/modules/UnifiedRuntimeTag.cmake index 58cf57c93e779..08140a1f64900 100644 --- a/sycl/cmake/modules/UnifiedRuntimeTag.cmake +++ b/sycl/cmake/modules/UnifiedRuntimeTag.cmake @@ -1,7 +1,7 @@ -# commit 3a5b23c8b475712f9107c1d5ab41f27a1465578e -# Merge: f9f71f17 1696524d +# commit 20e501a036e5e16368eb5c8f1e92289efb3ea735 +# Merge: dcf35a10 ca878c58 # Author: Piotr Balcer -# Date: Thu Nov 14 14:38:05 2024 +0100 -# Merge pull request #2253 from pbalcer/low-power-events -# add low-power events experimental extension spec -set(UNIFIED_RUNTIME_TAG 66c80c9c639cf149de0aac911be875f9bc1fcd30) +# Date: Thu Nov 21 10:55:59 2024 +0100 +# Merge pull request #1975 from Bensuo/fabio/immediate_append_exp +# [Command-Buffers] Implement new command-list enqueue path +set(UNIFIED_RUNTIME_TAG 20e501a036e5e16368eb5c8f1e92289efb3ea735) diff --git a/sycl/doc/EnvironmentVariables.md b/sycl/doc/EnvironmentVariables.md index 3172bc2446aee..5ee2c40542ced 100644 --- a/sycl/doc/EnvironmentVariables.md +++ b/sycl/doc/EnvironmentVariables.md @@ -14,6 +14,7 @@ compiler and runtime. | `SYCL_CACHE_DISABLE_PERSISTENT (deprecated)` | Any(\*) | Has no effect. | | `SYCL_CACHE_PERSISTENT` | Integer | Controls persistent device compiled code cache. Turns it on if set to '1' and turns it off if set to '0'. When cache is enabled SYCL runtime will try to cache and reuse JIT-compiled binaries. Default is off. | | `SYCL_CACHE_IN_MEM` | '1' or '0' | Enable ('1') or disable ('0') in-memory caching of device compiled code. When cache is enabled SYCL runtime will try to cache and reuse JIT-compiled binaries. Default is '1'. | +| `SYCL_IN_MEM_CACHE_EVICTION_THRESHOLD` | Positive integer | `SYCL_IN_MEM_CACHE_EVICTION_THRESHOLD` accepts an integer that specifies the maximum size of the in-memory program cache in bytes. Eviction is performed when the cache size exceeds the threshold. The default value is 0 which means that eviction is disabled. | | `SYCL_CACHE_EVICTION_DISABLE` | Any(\*) | Switches persistent cache eviction off when the variable is set. | | `SYCL_CACHE_MAX_SIZE` | Positive integer | Persistent cache eviction is triggered once total size of cached images exceeds the value in megabytes (default - 8 192 for 8 GB). Set to 0 to disable size-based cache eviction. | | `SYCL_CACHE_THRESHOLD` | Positive integer | Persistent cache eviction threshold in days (default value is 7 for 1 week). Set to 0 for disabling time-based cache eviction. | diff --git a/sycl/doc/extensions/proposed/sycl_ext_oneapi_work_group_memory.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_work_group_memory.asciidoc similarity index 98% rename from sycl/doc/extensions/proposed/sycl_ext_oneapi_work_group_memory.asciidoc rename to sycl/doc/extensions/experimental/sycl_ext_oneapi_work_group_memory.asciidoc index 296b77acf82fb..2cbc9d0b2d28b 100644 --- a/sycl/doc/extensions/proposed/sycl_ext_oneapi_work_group_memory.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_work_group_memory.asciidoc @@ -49,12 +49,10 @@ This extension also depends on the following other SYCL extensions: == Status -This is a proposed extension specification, intended to gather community -feedback. -Interfaces defined in this specification may not be implemented yet or may be -in a preliminary state. -The specification itself may also change in incompatible ways before it is -finalized. +This is an experimental extension specification, intended to provide early +access to features and gather community feedback. Interfaces defined in this +specification are implemented in {dpcpp}, but they are not finalized and may +change incompatibly in future versions of {dpcpp} without prior notice. *Shipping software products should not rely on APIs defined in this specification.* diff --git a/sycl/include/sycl/aliases.hpp b/sycl/include/sycl/aliases.hpp index 1640c11db6078..fc359b7659802 100644 --- a/sycl/include/sycl/aliases.hpp +++ b/sycl/include/sycl/aliases.hpp @@ -9,7 +9,6 @@ #pragma once #include // for __SYCL2020_DEPRECATED -#include // for half #include // for uint8_t, int16_t, int32_t diff --git a/sycl/include/sycl/backend.hpp b/sycl/include/sycl/backend.hpp index 24750732b08ff..fc477128b57b4 100644 --- a/sycl/include/sycl/backend.hpp +++ b/sycl/include/sycl/backend.hpp @@ -15,7 +15,6 @@ #include // for buffer_allocator #include // for context, get_na... #include // for InteropFeatureS... -#include // for _cl_event #include // for __SYCL_DEPRECATED #include // for __SYCL_EXPORT #include // for createSyclObjFr... @@ -23,13 +22,10 @@ #include // for event, get_native #include // for make_error_code #include // for SYCL_BACKEND_OP... -#include // for buffer #include // for image, image_al... -#include // for kernel, get_native #include // for kernel_bundle #include // for bundle_state #include // for platform, get_n... -#include // for property_list #include // for queue, get_native #include // for ur_native_handle_t @@ -60,13 +56,12 @@ namespace sycl { inline namespace _V1 { +class property_list; + namespace detail { // TODO each backend can have its own custom errc enumeration // but the details for this are not fully specified yet enum class backend_errc : unsigned int {}; - -// Convert from UR backend to SYCL backend enum -backend convertUrBackend(ur_platform_backend_t UrBackend); } // namespace detail template class backend_traits { @@ -78,14 +73,6 @@ template class backend_traits { using return_type = typename detail::BackendReturn::type; }; -template -using backend_input_t = - typename backend_traits::template input_type; - -template -using backend_return_t = - typename backend_traits::template return_type; - namespace detail { template struct BufferInterop { diff --git a/sycl/include/sycl/builtins_esimd.hpp b/sycl/include/sycl/builtins_esimd.hpp index fa720a405f9be..49566ce118eba 100644 --- a/sycl/include/sycl/builtins_esimd.hpp +++ b/sycl/include/sycl/builtins_esimd.hpp @@ -11,7 +11,7 @@ #include #include #include -#include +#include // TODO Decide whether to mark functions with this attribute. #define __NOEXC /*noexcept*/ diff --git a/sycl/include/sycl/builtins_utils_vec.hpp b/sycl/include/sycl/builtins_utils_vec.hpp index 178c696495c8e..9504b8d3aa295 100644 --- a/sycl/include/sycl/builtins_utils_vec.hpp +++ b/sycl/include/sycl/builtins_utils_vec.hpp @@ -13,8 +13,9 @@ #include #include +#include #include // for marray -#include // for vec +#include // for vec namespace sycl { inline namespace _V1 { diff --git a/sycl/include/sycl/detail/backend_traits.hpp b/sycl/include/sycl/detail/backend_traits.hpp index adbbab78642ff..87c00ce6d63d3 100644 --- a/sycl/include/sycl/detail/backend_traits.hpp +++ b/sycl/include/sycl/detail/backend_traits.hpp @@ -8,10 +8,9 @@ #pragma once -#include - namespace sycl { inline namespace _V1 { +enum class backend : char; namespace detail { template struct interop; diff --git a/sycl/include/sycl/detail/device_filter.hpp b/sycl/include/sycl/detail/device_filter.hpp index 5574bf69a3484..9ca26333ab15a 100644 --- a/sycl/include/sycl/detail/device_filter.hpp +++ b/sycl/include/sycl/detail/device_filter.hpp @@ -8,7 +8,6 @@ #pragma once -#include #include #include @@ -18,6 +17,7 @@ namespace sycl { inline namespace _V1 { +enum class backend : char; namespace detail { // --------------------------------------- diff --git a/sycl/include/sycl/detail/id_queries_fit_in_int.hpp b/sycl/include/sycl/detail/id_queries_fit_in_int.hpp index d3ce74dfdfc0a..3f12b47bd4296 100644 --- a/sycl/include/sycl/detail/id_queries_fit_in_int.hpp +++ b/sycl/include/sycl/detail/id_queries_fit_in_int.hpp @@ -23,6 +23,8 @@ #ifndef __SYCL_DEVICE_ONLY__ #include +#include +#include #include #include diff --git a/sycl/include/sycl/detail/image_accessor_util.hpp b/sycl/include/sycl/detail/image_accessor_util.hpp index d87038a8c9ce3..9b1c519301bf6 100644 --- a/sycl/include/sycl/detail/image_accessor_util.hpp +++ b/sycl/include/sycl/detail/image_accessor_util.hpp @@ -23,7 +23,7 @@ #include // for image_channel_type #include // for range #include // for addressing_mode, coor... -#include // for vec, operator*, round... +#include // for vec, operator*, round... #include // for int32_t, uint16_t #include // for size_t diff --git a/sycl/include/sycl/detail/optional.hpp b/sycl/include/sycl/detail/optional.hpp new file mode 100644 index 0000000000000..da9ff4d900000 --- /dev/null +++ b/sycl/include/sycl/detail/optional.hpp @@ -0,0 +1,147 @@ +//==-------- optional.hpp - limited variant of std::optional -------- C++ --==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// ===--------------------------------------------------------------------=== // + +#pragma once + +#include +#include + +namespace sycl { +inline namespace _V1 { +namespace detail { + +// ABI-stable implementation of optional to avoid reliance on potentially +// differing implementations of std::optional when crossing the library +// boundary. +template class optional { +public: + constexpr optional() noexcept {} + constexpr optional(std::nullopt_t) noexcept : optional() {} + + template + constexpr optional(const optional &Other) + : ContainsValue{Other.ContainsValue} { + new (Storage) T(Other.Value); + } + template + constexpr optional(optional &&Other) + : ContainsValue{std::move(Other.ContainsValue)} { + new (Storage) T(std::move(Other.Value)); + } + + constexpr optional(T &&Value) : ContainsValue{true} { + new (Storage) T(std::move(Value)); + } + + constexpr optional(const T &Value) : ContainsValue{true} { + new (Storage) T(Value); + } + + template + constexpr optional(const std::optional &Other) : ContainsValue{Other} { + if (Other) + new (Storage) T(*Other); + } + + ~optional() { + if (has_value()) + reinterpret_cast(Storage)->~T(); + } + + optional &operator=(std::nullopt_t) noexcept { + if (has_value()) + reinterpret_cast(Storage)->~T(); + ContainsValue = false; + return *this; + } + + template optional &operator=(const optional &Other) { + if (has_value()) + reinterpret_cast(Storage)->~T(); + ContainsValue = Other; + new (Storage) T(Other.Value); + return *this; + } + template optional &operator=(optional &&Other) noexcept { + if (has_value()) + reinterpret_cast(Storage)->~T(); + ContainsValue = Other; + new (Storage) T(std::move(Other.Value)); + return *this; + } + + optional &operator=(T &&Value) { + if (has_value()) + reinterpret_cast(Storage)->~T(); + ContainsValue = true; + new (Storage) T(std::move(Value)); + return *this; + } + + optional &operator=(const T &Value) { + if (has_value()) + reinterpret_cast(Storage)->~T(); + ContainsValue = true; + new (Storage) T(Value); + return *this; + } + + template optional &operator=(const std::optional &Other) { + if (has_value()) + reinterpret_cast(Storage)->~T(); + ContainsValue = Other; + if (Other) + new (Storage) T(*Other); + return *this; + } + + constexpr bool has_value() const noexcept { return ContainsValue; } + constexpr explicit operator bool() const noexcept { return has_value(); } + + constexpr T &value() & { + if (!has_value()) + throw std::bad_optional_access{}; + return *reinterpret_cast(Storage); + } + constexpr const T &value() const & { + if (!has_value()) + throw std::bad_optional_access{}; + return *reinterpret_cast(Storage); + } + constexpr T &&value() && { + if (!has_value()) + throw std::bad_optional_access{}; + return std::move(*reinterpret_cast(Storage)); + } + constexpr const T &&value() const && { + if (!has_value()) + throw std::bad_optional_access{}; + return std::move(*reinterpret_cast(Storage)); + } + + template constexpr T value_or(U &&DefaultVal) { + return has_value() ? value() : static_cast(std::forward(DefaultVal)); + } + template constexpr T value_or(U &&DefaultVal) const { + return has_value() ? std::move(value()) + : static_cast(std::forward(DefaultVal)); + } + + constexpr T &operator*() & { return value(); } + constexpr const T &operator*() const & { return value(); } + constexpr T &&operator*() && { return value(); } + constexpr const T &&operator*() const && { return value(); } + +private: + alignas(alignof(T)) char Storage[sizeof(T)] = {0}; + bool ContainsValue = false; +}; + +} // namespace detail +} // namespace _V1 +} // namespace sycl diff --git a/sycl/include/sycl/detail/ur.hpp b/sycl/include/sycl/detail/ur.hpp index 70b6517b43748..1ed65046c0c1b 100644 --- a/sycl/include/sycl/detail/ur.hpp +++ b/sycl/include/sycl/detail/ur.hpp @@ -14,7 +14,6 @@ #pragma once -#include #include #include #include @@ -42,6 +41,7 @@ struct trace_event_data_t; namespace sycl { inline namespace _V1 { +enum class backend : char; class context; namespace detail { diff --git a/sycl/include/sycl/detail/util.hpp b/sycl/include/sycl/detail/util.hpp index d858aba279f41..56d4b12df2bc9 100644 --- a/sycl/include/sycl/detail/util.hpp +++ b/sycl/include/sycl/detail/util.hpp @@ -15,6 +15,9 @@ #include #include +#include +#include +#include #include namespace sycl { @@ -83,6 +86,40 @@ template <> struct ABINeutralT> { }; template using ABINeutralT_t = typename ABINeutralT::type; + +template auto convert_to_abi_neutral(ParamT &&Info) { + using ParamNoRef = std::remove_reference_t; + if constexpr (std::is_same_v) { + return detail::string{Info}; + } else if constexpr (std::is_same_v>) { + std::vector Res; + Res.reserve(Info.size()); + for (std::string &Str : Info) { + Res.push_back(detail::string{Str}); + } + return Res; + } else { + return std::forward(Info); + } +} + +template auto convert_from_abi_neutral(ParamT &&Info) { + using ParamNoRef = std::remove_reference_t; + if constexpr (std::is_same_v) { + return Info.c_str(); + } else if constexpr (std::is_same_v>) { + std::vector Res; + Res.reserve(Info.size()); + for (detail::string &Str : Info) { + Res.push_back(Str.c_str()); + } + return Res; + } else { + return std::forward(Info); + } +} + } // namespace detail } // namespace _V1 } // namespace sycl diff --git a/sycl/include/sycl/device.hpp b/sycl/include/sycl/device.hpp index d49a1f1d1ff9a..57b193b4987b3 100644 --- a/sycl/include/sycl/device.hpp +++ b/sycl/include/sycl/device.hpp @@ -8,7 +8,6 @@ #pragma once -#include #include #include #include @@ -21,9 +20,12 @@ #include #include #include -#include #include +#ifdef __SYCL_INTERNAL_API +#include +#endif + #include #include #include @@ -35,7 +37,7 @@ namespace sycl { inline namespace _V1 { // Forward declarations -class device_selector; +class platform; template auto get_native(const SyclObjectT &Obj) -> backend_return_t; diff --git a/sycl/include/sycl/ext/intel/esimd/detail/memory_intrin.hpp b/sycl/include/sycl/ext/intel/esimd/detail/memory_intrin.hpp index 89636fda85019..2240379d5eed7 100644 --- a/sycl/include/sycl/ext/intel/esimd/detail/memory_intrin.hpp +++ b/sycl/include/sycl/ext/intel/esimd/detail/memory_intrin.hpp @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include diff --git a/sycl/include/sycl/ext/intel/experimental/fpga_annotated_properties.hpp b/sycl/include/sycl/ext/intel/experimental/fpga_annotated_properties.hpp index ea9f531c764b9..bbf55d469809a 100644 --- a/sycl/include/sycl/ext/intel/experimental/fpga_annotated_properties.hpp +++ b/sycl/include/sycl/ext/intel/experimental/fpga_annotated_properties.hpp @@ -319,28 +319,25 @@ struct propagateToPtrAnnotation : std::true_type {}; //===----------------------------------------------------------------------===// // namespace detail { -template struct checkValidFPGAPropertySet { - using list = std::tuple; - static constexpr bool has_BufferLocation = - ContainsProperty::value; +template struct checkValidFPGAPropertySet { + template + static constexpr bool has_one_of = + ((property_list_t::template has_property() || ...)); + + static constexpr bool has_BufferLocation = has_one_of; static constexpr bool has_InterfaceConfig = - ContainsProperty::value || - ContainsProperty::value || - ContainsProperty::value || - ContainsProperty::value || - ContainsProperty::value || - ContainsProperty::value; + has_one_of; static constexpr bool value = !(!has_BufferLocation && has_InterfaceConfig); }; -template struct checkHasConduitAndRegisterMap { - using list = std::tuple; +template struct checkHasConduitAndRegisterMap { static constexpr bool has_Conduit = - ContainsProperty::value; + property_list_t::template has_property(); static constexpr bool has_RegisterMap = - ContainsProperty::value; + property_list_t::template has_property(); static constexpr bool value = !(has_Conduit && has_RegisterMap); }; } // namespace detail diff --git a/sycl/include/sycl/ext/intel/experimental/grf_size_properties.hpp b/sycl/include/sycl/ext/intel/experimental/grf_size_properties.hpp index 40b36f7bc9383..e63cc02d60b96 100644 --- a/sycl/include/sycl/ext/intel/experimental/grf_size_properties.hpp +++ b/sycl/include/sycl/ext/intel/experimental/grf_size_properties.hpp @@ -56,29 +56,26 @@ template struct ConflictingProperties : std::bool_constant< - ContainsProperty< - sycl::ext::intel::experimental::grf_size_automatic_key, - Properties>::value || - ContainsProperty::value> {}; + Properties::template has_property< + sycl::ext::intel::experimental::grf_size_automatic_key>() || + Properties::template has_property< + sycl::detail::register_alloc_mode_key>()> {}; template struct ConflictingProperties< sycl::ext::intel::experimental::grf_size_automatic_key, Properties> - : std::bool_constant< - ContainsProperty::value || - ContainsProperty::value> {}; + : std::bool_constant() || + Properties::template has_property< + sycl::detail::register_alloc_mode_key>()> {}; template struct ConflictingProperties : std::bool_constant< - ContainsProperty::value || - ContainsProperty< - sycl::ext::intel::experimental::grf_size_automatic_key, - Properties>::value> {}; + Properties::template has_property< + sycl::ext::intel::experimental::grf_size_key>() || + Properties::template has_property< + sycl::ext::intel::experimental::grf_size_automatic_key>()> {}; } // namespace ext::oneapi::experimental::detail } // namespace _V1 diff --git a/sycl/include/sycl/ext/intel/experimental/pipes.hpp b/sycl/include/sycl/ext/intel/experimental/pipes.hpp index 3311d7cd66e07..c09ad05547759 100644 --- a/sycl/include/sycl/ext/intel/experimental/pipes.hpp +++ b/sycl/include/sycl/ext/intel/experimental/pipes.hpp @@ -376,21 +376,29 @@ class pipe : public pipe_base { static constexpr int32_t m_Capacity = _min_capacity; static constexpr int32_t m_ready_latency = - oneapi::experimental::detail::ValueOrDefault< - _propertiesT, ready_latency_key>::template get(0); + oneapi::experimental::detail::get_property_or( + ready_latency<0>) + .value; + static constexpr int32_t m_bits_per_symbol = - oneapi::experimental::detail::ValueOrDefault< - _propertiesT, bits_per_symbol_key>::template get(8); + oneapi::experimental::detail::get_property_or( + bits_per_symbol<8>) + .value; static constexpr bool m_uses_valid = - oneapi::experimental::detail::ValueOrDefault< - _propertiesT, uses_valid_key>::template get(true); + oneapi::experimental::detail::get_property_or(uses_valid_on) + .value; static constexpr bool m_first_symbol_in_high_order_bits = - oneapi::experimental::detail::ValueOrDefault< - _propertiesT, - first_symbol_in_high_order_bits_key>::template get(0); - static constexpr protocol_name m_protocol = oneapi::experimental::detail:: - ValueOrDefault<_propertiesT, protocol_key>::template get( - protocol_name::avalon_streaming_uses_ready); + oneapi::experimental::detail::get_property_or< + first_symbol_in_high_order_bits_key, _propertiesT>( + first_symbol_in_high_order_bits_off) + .value; + static constexpr protocol_name m_protocol = + oneapi::experimental::detail::get_property_or( + protocol_avalon_streaming_uses_ready) + .value; public: static constexpr struct ConstantPipeStorageExp m_Storage = { diff --git a/sycl/include/sycl/ext/intel/experimental/task_sequence.hpp b/sycl/include/sycl/ext/intel/experimental/task_sequence.hpp index b62fca60fef57..cb3434d377a5f 100644 --- a/sycl/include/sycl/ext/intel/experimental/task_sequence.hpp +++ b/sycl/include/sycl/ext/intel/experimental/task_sequence.hpp @@ -109,23 +109,28 @@ class task_sequence< __spv::__spirv_TaskSequenceINTEL *taskSequence; #endif static constexpr int32_t pipelined = - oneapi::experimental::detail::ValueOrDefault< - property_list_t, pipelined_key>::template get(-1); - static constexpr int32_t fpga_cluster = - has_property() - ? static_cast< - typename std::underlying_type::type>( - oneapi::experimental::detail::ValueOrDefault:: - template get( - fpga_cluster_options_enum::stall_free)) - : -1; + oneapi::experimental::detail::get_property_or( + intel::experimental::pipelined<-1>) + .value; + static constexpr int32_t fpga_cluster = []() constexpr { + if constexpr (has_property()) + return static_cast< + typename std::underlying_type::type>( + get_property().value); + else + return -1; + }(); static constexpr uint32_t response_capacity = - oneapi::experimental::detail::ValueOrDefault< - property_list_t, response_capacity_key>::template get(0); + oneapi::experimental::detail::get_property_or( + intel::experimental::response_capacity<0>) + .value; static constexpr uint32_t invocation_capacity = - oneapi::experimental::detail::ValueOrDefault< - property_list_t, invocation_capacity_key>::template get(0); + oneapi::experimental::detail::get_property_or( + intel::experimental::invocation_capacity<0>) + .value; }; } // namespace ext::intel::experimental diff --git a/sycl/include/sycl/ext/intel/fpga_device_selector.hpp b/sycl/include/sycl/ext/intel/fpga_device_selector.hpp index f3d6b1bb00d70..91ec593fe6f6a 100644 --- a/sycl/include/sycl/ext/intel/fpga_device_selector.hpp +++ b/sycl/include/sycl/ext/intel/fpga_device_selector.hpp @@ -10,6 +10,7 @@ #include #include +#include #include #include diff --git a/sycl/include/sycl/ext/oneapi/backend/level_zero.hpp b/sycl/include/sycl/ext/oneapi/backend/level_zero.hpp index 7ff2845f6bde4..14969a309e4da 100644 --- a/sycl/include/sycl/ext/oneapi/backend/level_zero.hpp +++ b/sycl/include/sycl/ext/oneapi/backend/level_zero.hpp @@ -9,8 +9,7 @@ #pragma once #include // for async_han... -#include // for backend_i... -#include // for backend +#include // for backend #include // for buffer_al... #include // for buffer #include // for context diff --git a/sycl/include/sycl/ext/oneapi/dot_product.hpp b/sycl/include/sycl/ext/oneapi/dot_product.hpp index 4fda07052e25a..cec308ba26b55 100644 --- a/sycl/include/sycl/ext/oneapi/dot_product.hpp +++ b/sycl/include/sycl/ext/oneapi/dot_product.hpp @@ -11,7 +11,7 @@ #pragma once #include -#include +#include namespace sycl { inline namespace _V1 { diff --git a/sycl/include/sycl/ext/oneapi/experimental/annotated_arg/annotated_arg.hpp b/sycl/include/sycl/ext/oneapi/experimental/annotated_arg/annotated_arg.hpp index 91baca3e14e3f..b71f4fc4e0f08 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/annotated_arg/annotated_arg.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/annotated_arg/annotated_arg.hpp @@ -66,8 +66,7 @@ annotated_arg(T, Args...) -> annotated_arg::type>; template -annotated_arg(annotated_arg, - properties>) +annotated_arg(annotated_arg, detail::properties_t) -> annotated_arg< T, detail::merged_properties_t>>; @@ -214,13 +213,13 @@ __SYCL_TYPE(annotated_arg) annotated_arg> { "The property list contains invalid property."); // check the set if FPGA specificed properties are used static constexpr bool hasValidFPGAProperties = - detail::checkValidFPGAPropertySet::value; + detail::checkValidFPGAPropertySet::value; static_assert(hasValidFPGAProperties, "FPGA Interface properties (i.e. awidth, dwidth, etc.) " "can only be set with BufferLocation together."); // check if conduit and register_map properties are specified together static constexpr bool hasConduitAndRegisterMapProperties = - detail::checkHasConduitAndRegisterMap::value; + detail::checkHasConduitAndRegisterMap::value; static_assert(hasConduitAndRegisterMapProperties, "The properties conduit and register_map cannot be " "specified at the same time."); @@ -447,13 +446,13 @@ __SYCL_TYPE(annotated_arg) annotated_arg> { "The property list contains invalid property."); // check the set if FPGA specificed properties are used static constexpr bool hasValidFPGAProperties = - detail::checkValidFPGAPropertySet::value; + detail::checkValidFPGAPropertySet::value; static_assert(hasValidFPGAProperties, "FPGA Interface properties (i.e. awidth, dwidth, etc.) " "can only be set with BufferLocation together."); // check if conduit and register_map properties are specified together static constexpr bool hasConduitAndRegisterMapProperties = - detail::checkHasConduitAndRegisterMap::value; + detail::checkHasConduitAndRegisterMap::value; static_assert(hasConduitAndRegisterMapProperties, "The properties conduit and register_map cannot be " "specified at the same time."); diff --git a/sycl/include/sycl/ext/oneapi/experimental/annotated_ptr/annotated_ptr.hpp b/sycl/include/sycl/ext/oneapi/experimental/annotated_ptr/annotated_ptr.hpp index c9a59a8d85c0c..28318364b33f2 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/annotated_ptr/annotated_ptr.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/annotated_ptr/annotated_ptr.hpp @@ -50,10 +50,6 @@ template constexpr bool is_ann_ref_v = is_ann_ref_impl>::value; -template -using contains_alignment = - detail::ContainsProperty>; - // filter properties that are applied on annotations template using annotation_filter = decltype(filter_properties( @@ -240,8 +236,7 @@ annotated_ptr(T *, Args...) -> annotated_ptr::type>; template -annotated_ptr(annotated_ptr, - properties>) +annotated_ptr(annotated_ptr, detail::properties_t) -> annotated_ptr< T, detail::merged_properties_t>>; #endif // __cpp_deduction_guides @@ -392,38 +387,38 @@ __SYCL_TYPE(annotated_ptr) annotated_ptr> { // turned off for these operators to make sure the complete error notes are // printed // clang-format off - template ::value, + template (), class = std::enable_if_t> reference operator[](std::ptrdiff_t idx) const noexcept { return reference(m_Ptr + idx); } - template ::value, + template (), class = std::enable_if_t> auto operator[](std::ptrdiff_t idx) const noexcept -> decltype("operator[] is not available when alignment is specified!") = delete; - template ::value, + template (), class = std::enable_if_t> annotated_ptr operator+(size_t offset) const noexcept { return annotated_ptr(m_Ptr + offset); } - template ::value, + template (), class = std::enable_if_t> auto operator+(size_t offset) const noexcept -> decltype("operator+ is not available when alignment is specified!") = delete; - template ::value, + template (), class = std::enable_if_t> annotated_ptr &operator++() noexcept { m_Ptr += 1; return *this; } - template ::value, + template (), class = std::enable_if_t> auto operator++() noexcept -> decltype("operator++ is not available when alignment is specified!") = delete; - template ::value, + template (), class = std::enable_if_t> annotated_ptr operator++(int) noexcept { auto tmp = *this; @@ -431,22 +426,22 @@ __SYCL_TYPE(annotated_ptr) annotated_ptr> { return tmp; } - template ::value, + template (), class = std::enable_if_t> auto operator++(int) noexcept -> decltype("operator++ is not available when alignment is specified!") = delete; - template ::value, + template (), class = std::enable_if_t> annotated_ptr &operator--() noexcept { m_Ptr -= 1; return *this; } - template ::value, + template (), class = std::enable_if_t> auto operator--() noexcept -> decltype("operator-- is not available when alignment is specified!") = delete; - template ::value, + template (), class = std::enable_if_t> annotated_ptr operator--(int) noexcept { auto tmp = *this; @@ -454,7 +449,7 @@ __SYCL_TYPE(annotated_ptr) annotated_ptr> { return tmp; } - template ::value, + template (), class = std::enable_if_t> auto operator--(int) noexcept -> decltype("operator-- is not available when alignment is specified!") = delete; @@ -485,13 +480,13 @@ __SYCL_TYPE(annotated_ptr) annotated_ptr> { "The property list contains invalid property."); // check the set if FPGA specificed properties are used static constexpr bool hasValidFPGAProperties = - detail::checkValidFPGAPropertySet::value; + detail::checkValidFPGAPropertySet::value; static_assert(hasValidFPGAProperties, "FPGA Interface properties (i.e. awidth, dwidth, etc.) " "can only be set with BufferLocation together."); // check if conduit and register_map properties are specified together static constexpr bool hasConduitAndRegisterMapProperties = - detail::checkHasConduitAndRegisterMap::value; + detail::checkHasConduitAndRegisterMap::value; static_assert(hasConduitAndRegisterMapProperties, "The properties conduit and register_map cannot be " "specified at the same time."); diff --git a/sycl/include/sycl/ext/oneapi/experimental/annotated_ptr/annotated_ptr_properties.hpp b/sycl/include/sycl/ext/oneapi/experimental/annotated_ptr/annotated_ptr_properties.hpp index fcdd05f3a497e..55dfa02fef68f 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/annotated_ptr/annotated_ptr_properties.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/annotated_ptr/annotated_ptr_properties.hpp @@ -55,24 +55,26 @@ struct PropertyMetaInfo> { static constexpr sycl::usm::alloc value = Kind; }; -template struct IsUsmKindDevice : std::false_type {}; -template -struct IsUsmKindDevice> - : detail::ContainsProperty, - std::tuple> {}; - -template struct IsUsmKindHost : std::false_type {}; -template -struct IsUsmKindHost> - : detail::ContainsProperty, - std::tuple> {}; - -template struct IsUsmKindShared : std::false_type {}; -template -struct IsUsmKindShared> - : detail::ContainsProperty, - std::tuple> {}; +template +inline constexpr bool is_usm_kind = []() constexpr { + if constexpr (PropertyListT::template has_property()) + return PropertyListT::template get_property() == + usm_kind; + else + return false; +}(); +template +struct IsUsmKindDevice + : std::bool_constant> { +}; +template +struct IsUsmKindHost + : std::bool_constant> {}; +template +struct IsUsmKindShared + : std::bool_constant> { +}; } // namespace detail } // namespace experimental diff --git a/sycl/include/sycl/ext/oneapi/experimental/annotated_usm/alloc_base.hpp b/sycl/include/sycl/ext/oneapi/experimental/annotated_usm/alloc_base.hpp index 6f2b408e43ffe..bde1b32f602e9 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/annotated_usm/alloc_base.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/annotated_usm/alloc_base.hpp @@ -41,9 +41,8 @@ template ::value, annotated_ptr> -aligned_alloc_annotated(size_t alignment, size_t numBytes, - const device &syclDevice, const context &syclContext, - sycl::usm::alloc kind, +aligned_alloc_annotated(size_t align, size_t numBytes, const device &syclDevice, + const context &syclContext, sycl::usm::alloc kind, const propertyListA &propList = propertyListA{}) { detail::ValidAllocPropertyList::value; @@ -53,12 +52,12 @@ aligned_alloc_annotated(size_t alignment, size_t numBytes, static_cast(propList); constexpr size_t alignFromPropList = - detail::GetAlignFromPropList::value; + detail::get_property_or(alignment<0>).value; const property_list &usmPropList = get_usm_property_list(); - if constexpr (detail::HasUsmKind::value) { + if constexpr (propertyListA::template has_property()) { constexpr sycl::usm::alloc usmKind = - detail::GetUsmKindFromPropList::value; + propertyListA::template get_property().value; if (usmKind != kind) { throw sycl::exception( sycl::make_error_code(sycl::errc::invalid), @@ -72,7 +71,7 @@ aligned_alloc_annotated(size_t alignment, size_t numBytes, "Unknown USM allocation kind was specified."); void *rawPtr = - sycl::aligned_alloc(combine_align(alignment, alignFromPropList), numBytes, + sycl::aligned_alloc(combine_align(align, alignFromPropList), numBytes, syclDevice, syclContext, kind, usmPropList); return annotated_ptr(rawPtr); } @@ -83,9 +82,8 @@ template ::value, annotated_ptr> -aligned_alloc_annotated(size_t alignment, size_t count, - const device &syclDevice, const context &syclContext, - sycl::usm::alloc kind, +aligned_alloc_annotated(size_t align, size_t count, const device &syclDevice, + const context &syclContext, sycl::usm::alloc kind, const propertyListA &propList = propertyListA{}) { detail::ValidAllocPropertyList::value; @@ -95,12 +93,12 @@ aligned_alloc_annotated(size_t alignment, size_t count, static_cast(propList); constexpr size_t alignFromPropList = - detail::GetAlignFromPropList::value; + detail::get_property_or(alignment<0>).value; const property_list &usmPropList = get_usm_property_list(); - if constexpr (detail::HasUsmKind::value) { + if constexpr (propertyListA::template has_property()) { constexpr sycl::usm::alloc usmKind = - detail::GetUsmKindFromPropList::value; + propertyListA::template get_property().value; if (usmKind != kind) { throw sycl::exception( sycl::make_error_code(sycl::errc::invalid), @@ -113,7 +111,7 @@ aligned_alloc_annotated(size_t alignment, size_t count, throw sycl::exception(sycl::make_error_code(sycl::errc::invalid), "Unknown USM allocation kind was specified."); - size_t combinedAlign = combine_align(alignment, alignFromPropList); + size_t combinedAlign = combine_align(align, alignFromPropList); T *rawPtr = sycl::aligned_alloc(combinedAlign, count, syclDevice, syclContext, kind, usmPropList); return annotated_ptr(rawPtr); @@ -212,7 +210,9 @@ std::enable_if_t< malloc_annotated(size_t numBytes, const device &syclDevice, const context &syclContext, const propertyListA &propList) { constexpr sycl::usm::alloc usmKind = - detail::GetUsmKindFromPropList::value; + detail::get_property_or( + usm_kind) + .value; static_assert(usmKind != sycl::usm::alloc::unknown, "USM kind is not specified. Please specify it as an argument " "or in the input property list."); @@ -228,7 +228,9 @@ std::enable_if_t< malloc_annotated(size_t count, const device &syclDevice, const context &syclContext, const propertyListA &propList) { constexpr sycl::usm::alloc usmKind = - detail::GetUsmKindFromPropList::value; + detail::get_property_or( + usm_kind) + .value; static_assert(usmKind != sycl::usm::alloc::unknown, "USM kind is not specified. Please specify it as an argument " "or in the input property list."); diff --git a/sycl/include/sycl/ext/oneapi/experimental/annotated_usm/alloc_util.hpp b/sycl/include/sycl/ext/oneapi/experimental/annotated_usm/alloc_util.hpp index 6548f3d3c3673..9217a9c567299 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/annotated_usm/alloc_util.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/annotated_usm/alloc_util.hpp @@ -25,54 +25,6 @@ namespace detail { // Type traits for USM allocation with property support //// -// Merge a property list with the usm_kind property -template -using MergeUsmKind = - detail::merged_properties_t})>; - -// Check if a property list contains the a certain property -template struct HasProperty {}; - -template -struct HasProperty> - : detail::ContainsProperty> {}; - -template -using HasAlign = HasProperty; -template -using HasUsmKind = HasProperty; -template -using HasBufferLocation = HasProperty; - -template -struct GetPropertyValueFromPropList> - : GetPropertyValueFromPropList> {}; - -// Get the value of alignment from a property list -// If alignment is not present in the property list, set to default value 0 -template -using GetAlignFromPropList = - GetPropertyValueFromPropList), - PropertyListT>; -// Get the value of usm_kind from a property list -// The usm_kind is sycl::usm::alloc::unknown by default -template -using GetUsmKindFromPropList = - GetPropertyValueFromPropList), - PropertyListT>; -// Get the value of buffer_location from a property list -// The buffer location is -1 by default -template -using GetBufferLocationFromPropList = GetPropertyValueFromPropList< - buffer_location_key, int, - decltype(sycl::ext::intel::experimental::buffer_location<-1>), - PropertyListT>; - // Check if a runtime property is valid template struct IsRuntimePropertyValid : std::false_type {}; @@ -91,13 +43,13 @@ struct ValidAllocPropertyList> IsRuntimePropertyValid::value) && ValidAllocPropertyList< T, detail::properties_t>::value> { + static_assert(is_property_value_v); + static constexpr bool is_compile_time = std::is_empty_v; // check if a compile-time property is valid for annotated_ptr - static_assert(!detail::IsCompileTimePropertyValue::value || - is_valid_property::value, + static_assert(!is_compile_time || is_valid_property::value, "Found invalid compile-time property in the property list."); // check if a runtime property is valid for malloc - static_assert(!detail::IsRuntimeProperty::value || - IsRuntimePropertyValid::value, + static_assert(is_compile_time || IsRuntimePropertyValid::value, "Found invalid runtime property in the property list."); }; @@ -112,15 +64,15 @@ template <> struct GetCompileTimeProperties { template struct GetCompileTimeProperties> { using type = - std::conditional_t::value, - detail::properties_t, empty_properties_t>; + std::conditional_t, detail::properties_t, + empty_properties_t>; }; template struct GetCompileTimeProperties> { using filtered_this_property_t = - std::conditional_t::value, - detail::properties_t, empty_properties_t>; + std::conditional_t, detail::properties_t, + empty_properties_t>; using filtered_other_properties_t = typename GetCompileTimeProperties>::type; using type = detail::merged_properties_t::type; - static_assert(!HasUsmKind::value || - GetUsmKindFromPropList::value == Kind, - "Input property list contains conflicting USM kind."); + static_assert( + detail::get_property_or(usm_kind) + .value == Kind, + "Input property list contains conflicting USM kind."); using type = detail::merged_properties_t, // runtime). Right now only the `buffer_location` has its corresponding USM // runtime property and is transformable template inline property_list get_usm_property_list() { - if constexpr (detail::HasBufferLocation::value) { + if constexpr (PropertyListT::template has_property()) { return property_list{ sycl::ext::intel::experimental::property::usm::buffer_location( - detail::GetBufferLocationFromPropList::value)}; + PropertyListT::template get_property().value)}; } return {}; } diff --git a/sycl/include/sycl/ext/oneapi/experimental/bfloat16_math.hpp b/sycl/include/sycl/ext/oneapi/experimental/bfloat16_math.hpp index ed513ae3d2098..f82014fe3e209 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/bfloat16_math.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/bfloat16_math.hpp @@ -8,9 +8,10 @@ #pragma once -#include // for ceil, cos, exp, exp10, exp2 -#include // For simplify_if_swizzle, is_swizzle -#include // sycl::detail::memcpy +#include // for ceil, cos, exp, exp10, exp2 +#include // For simplify_if_swizzle, is_swizzle +#include // sycl::detail::memcpy +#include #include // for bfloat16, bfloat16ToBits #include // for marray diff --git a/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp b/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp index facc486ca2f84..d42df1fee26c8 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp @@ -15,7 +15,7 @@ #include // for is_svgenfloath, is_sv... #include // detail::memcpy #include // for marray -#include // for vec +#include // for vec #include // for size_t #include // for printf diff --git a/sycl/include/sycl/ext/oneapi/experimental/cuda/builtins.hpp b/sycl/include/sycl/ext/oneapi/experimental/cuda/builtins.hpp index 067e238c2e36c..3609c282a5319 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/cuda/builtins.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/cuda/builtins.hpp @@ -10,7 +10,7 @@ #define SYCL_EXT_ONEAPI_CUDA_TEX_CACHE_READ 1 -#include +#include #if defined(_WIN32) || defined(_WIN64) #define ATTRIBUTE_EXT_VEC_TYPE(N) __declspec(ext_vector_type(N)) diff --git a/sycl/include/sycl/ext/oneapi/experimental/non_uniform_groups.hpp b/sycl/include/sycl/ext/oneapi/experimental/non_uniform_groups.hpp index af68ce0e10e0f..bbe619834dcdc 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/non_uniform_groups.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/non_uniform_groups.hpp @@ -10,7 +10,7 @@ #include // for sub_group_mask #include // for marray -#include // for vec +#include // for vec #include // for size_t #include // for uint32_t diff --git a/sycl/include/sycl/ext/oneapi/experimental/prefetch.hpp b/sycl/include/sycl/ext/oneapi/experimental/prefetch.hpp index 441e32a085990..29c25d6a0860b 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/prefetch.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/prefetch.hpp @@ -10,7 +10,7 @@ #include #include -#include +#include namespace sycl { inline namespace _V1 { diff --git a/sycl/include/sycl/ext/oneapi/experimental/raw_kernel_arg.hpp b/sycl/include/sycl/ext/oneapi/experimental/raw_kernel_arg.hpp index e744181906a24..d53095d066e77 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/raw_kernel_arg.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/raw_kernel_arg.hpp @@ -14,6 +14,9 @@ namespace sycl { inline namespace _V1 { class handler; +namespace detail { +class dynamic_parameter_impl; +} namespace ext::oneapi::experimental { diff --git a/sycl/include/sycl/ext/oneapi/experimental/virtual_functions.hpp b/sycl/include/sycl/ext/oneapi/experimental/virtual_functions.hpp index 4e1d0e13eb623..9b13f6e3ed123 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/virtual_functions.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/virtual_functions.hpp @@ -1,8 +1,11 @@ #pragma once #include +#include #include +#include + namespace sycl { inline namespace _V1 { namespace ext::oneapi::experimental { diff --git a/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp b/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp index 0be24c912907b..c03bdef7efceb 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp @@ -1,5 +1,4 @@ //===-------------------- work_group_memory.hpp ---------------------------===// -// // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -8,10 +7,18 @@ #pragma once +#include +#include +#include +#include + +#include #include namespace sycl { inline namespace _V1 { +class handler; + namespace detail { template struct is_unbounded_array : std::false_type {}; @@ -38,14 +45,25 @@ namespace ext::oneapi::experimental { struct indeterminate_t {}; inline constexpr indeterminate_t indeterminate; - template +class work_group_memory; + +template class __SYCL_SPECIAL_CLASS __SYCL_TYPE(work_group_memory) work_group_memory : sycl::detail::work_group_memory_impl { public: using value_type = std::remove_all_extents_t; private: + // At the moment we do not have a way to set properties nor property values to + // set for work group memory. So, we check here for diagnostic purposes that + // the property list is empty. + // TODO: Remove this function and its occurrences in this file once properties + // have been created for work group memory. + void check_props_empty() const { + static_assert(std::is_same_v && + "Work group memory class does not support properties yet!"); + } using decoratedPtr = typename sycl::detail::DecoratedType< value_type, access::address_space::local_space>::type *; @@ -62,18 +80,22 @@ class __SYCL_SPECIAL_CLASS __SYCL_TYPE(work_group_memory) work_group_memory #endif public: - work_group_memory(const indeterminate_t &) {}; + work_group_memory(const indeterminate_t &) { check_props_empty(); }; work_group_memory(const work_group_memory &rhs) = default; work_group_memory &operator=(const work_group_memory &rhs) = default; template >> work_group_memory(handler &) - : sycl::detail::work_group_memory_impl(sizeof(DataT)) {} + : sycl::detail::work_group_memory_impl(sizeof(DataT)) { + check_props_empty(); + } template >> work_group_memory(size_t num, handler &) : sycl::detail::work_group_memory_impl( - num * sizeof(std::remove_extent_t)) {} + num * sizeof(std::remove_extent_t)) { + check_props_empty(); + } template multi_ptr get_multi_ptr() const { @@ -90,6 +112,9 @@ class __SYCL_SPECIAL_CLASS __SYCL_TYPE(work_group_memory) work_group_memory } private: + friend class sycl::handler; // needed in order for handler class to be aware + // of the private inheritance with + // work_group_memory_impl as base class decoratedPtr ptr = nullptr; }; } // namespace ext::oneapi::experimental diff --git a/sycl/include/sycl/ext/oneapi/kernel_properties/properties.hpp b/sycl/include/sycl/ext/oneapi/kernel_properties/properties.hpp index 9f91607456dd6..f79f56f698a22 100644 --- a/sycl/include/sycl/ext/oneapi/kernel_properties/properties.hpp +++ b/sycl/include/sycl/ext/oneapi/kernel_properties/properties.hpp @@ -104,6 +104,11 @@ struct property_value, constexpr size_t operator[](int Dim) const { return std::array{Dim0, Dims...}[Dim]; } + +private: + constexpr size_t size() const { return sizeof...(Dims) + 1; } + + template friend struct detail::ConflictingProperties; }; template @@ -190,6 +195,11 @@ struct property_value{Dim0, Dims...}[Dim]; } + +private: + constexpr size_t size() const { return sizeof...(Dims) + 1; } + + template friend struct detail::ConflictingProperties; }; template <> @@ -389,78 +399,53 @@ struct HasKernelPropertiesGetMethod().get(std::declval())); }; -// Trait for property compile-time meta names and values. -template struct WGSizePropertyMetaInfo { - static constexpr std::array WGSize = {}; - static constexpr size_t LinearSize = 0; -}; - -template -struct WGSizePropertyMetaInfo> { - static constexpr std::array WGSize = {Dim0, - Dims...}; - static constexpr size_t LinearSize = (Dim0 * ... * Dims); -}; - -template -struct WGSizePropertyMetaInfo> { - static constexpr std::array WGSize = {Dim0, - Dims...}; - static constexpr size_t LinearSize = (Dim0 * ... * Dims); -}; - -// Get the value of a work-group size related property from a property list -template -struct GetWGPropertyFromPropList {}; - -template -struct GetWGPropertyFromPropList> { - using prop_val_t = std::conditional_t< - ContainsProperty>::value, - typename FindCompileTimePropertyValueType< - PropKey, std::tuple>::type, - void>; - static constexpr auto WGSize = - WGSizePropertyMetaInfo>::WGSize; - static constexpr size_t LinearSize = - WGSizePropertyMetaInfo>::LinearSize; -}; - // If work_group_size and max_work_group_size coexist, check that the // dimensionality matches and that the required work-group size doesn't // trivially exceed the maximum size. template -struct ConflictingProperties - : std::false_type { - using WGSizeVal = GetWGPropertyFromPropList; - using MaxWGSizeVal = - GetWGPropertyFromPropList; - // If work_group_size_key doesn't exist in the list of properties, WGSize is - // an empty array and so Dims == 0. - static constexpr size_t Dims = WGSizeVal::WGSize.size(); - static_assert( - Dims == 0 || Dims == MaxWGSizeVal::WGSize.size(), - "work_group_size and max_work_group_size dimensionality must match"); - static_assert(Dims < 1 || WGSizeVal::WGSize[0] <= MaxWGSizeVal::WGSize[0], - "work_group_size must not exceed max_work_group_size"); - static_assert(Dims < 2 || WGSizeVal::WGSize[1] <= MaxWGSizeVal::WGSize[1], - "work_group_size must not exceed max_work_group_size"); - static_assert(Dims < 3 || WGSizeVal::WGSize[2] <= MaxWGSizeVal::WGSize[2], - "work_group_size must not exceed max_work_group_size"); +struct ConflictingProperties { + static constexpr bool value = []() constexpr { + if constexpr (Properties::template has_property()) { + constexpr auto wg_size = + Properties::template get_property(); + constexpr auto max_wg_size = + Properties::template get_property(); + static_assert( + wg_size.size() == max_wg_size.size(), + "work_group_size and max_work_group_size dimensionality must match"); + if constexpr (wg_size.size() == max_wg_size.size()) { + constexpr auto Dims = wg_size.size(); + static_assert(Dims < 1 || wg_size[0] <= max_wg_size[0], + "work_group_size must not exceed max_work_group_size"); + static_assert(Dims < 2 || wg_size[1] <= max_wg_size[1], + "work_group_size must not exceed max_work_group_size"); + static_assert(Dims < 3 || wg_size[2] <= max_wg_size[2], + "work_group_size must not exceed max_work_group_size"); + } + } + return false; + }(); }; // If work_group_size and max_linear_work_group_size coexist, check that the // required linear work-group size doesn't trivially exceed the maximum size. template -struct ConflictingProperties - : std::false_type { - using WGSizeVal = GetWGPropertyFromPropList; - using MaxLinearWGSizeVal = - GetPropertyValueFromPropList; - static_assert(WGSizeVal::WGSize.empty() || - WGSizeVal::LinearSize <= MaxLinearWGSizeVal::value, - "work_group_size must not exceed max_linear_work_group_size"); +struct ConflictingProperties { + static constexpr bool value = []() constexpr { + if constexpr (Properties::template has_property()) { + constexpr auto wg_size = + Properties::template get_property(); + constexpr auto dims = wg_size.size(); + constexpr auto linear_size = wg_size[0] * (dims > 1 ? wg_size[1] : 1) * + (dims > 2 ? wg_size[2] : 1); + constexpr auto max_linear_wg_size = + Properties::template get_property(); + static_assert( + linear_size < max_linear_wg_size.value, + "work_group_size must not exceed max_linear_work_group_size"); + } + return false; + }(); }; } // namespace detail diff --git a/sycl/include/sycl/ext/oneapi/properties/properties.hpp b/sycl/include/sycl/ext/oneapi/properties/properties.hpp index 190a16ce5d4c3..7a840df39f2fc 100644 --- a/sycl/include/sycl/ext/oneapi/properties/properties.hpp +++ b/sycl/include/sycl/ext/oneapi/properties/properties.hpp @@ -21,71 +21,29 @@ namespace sycl { inline namespace _V1 { namespace ext::oneapi::experimental { -namespace detail { - -// Checks if a tuple of properties contains a property. -template -struct ContainsProperty : std::false_type {}; -template -struct ContainsProperty> - : ContainsProperty> {}; -template -struct ContainsProperty> : std::true_type {}; -template -struct ContainsProperty< - PropT, std::tuple, Rest...>> - : std::true_type {}; +template class __SYCL_EBO properties; -// Finds the full property_value type of a property in a tuple of properties. -// type is void if the type was not found in the tuple of properties. -template -struct FindCompileTimePropertyValueType { - using type = void; -}; -template -struct FindCompileTimePropertyValueType> { - using type = - typename FindCompileTimePropertyValueType>::type; -}; -template -struct FindCompileTimePropertyValueType< - CTPropertyT, - std::tuple, Rest...>> { - using type = property_value; -}; +namespace detail { -template -static constexpr std::enable_if_t< - HasProperty, - typename FindCompileTimePropertyValueType::type> -get_property() { - return {}; -} +// NOTE: Meta-function to implement CTAD rules isn't allowed to return +// `properties` and it's impossible to return a pack as well. As +// such, we're forced to have an extra level of `detail::properties_type_list` +// for the purpose of providing CTAD rules. +template struct properties_type_list; -template -static constexpr std::enable_if_t get_property() { - return; -} +// This is used in a separate `properties` specialization to report friendlier +// errors. +template struct invalid_properties_type_list {}; -// Get the value of a property from a property list -template -struct GetPropertyValueFromPropList {}; - -template -struct GetPropertyValueFromPropList> { - using prop_val_t = std::conditional_t< - ContainsProperty>::value, - typename FindCompileTimePropertyValueType< - PropKey, std::tuple>::type, - DefaultPropVal>; - static constexpr ConstType value = - PropertyMetaInfo>::value; -}; +// Helper for reconstructing a properties type. This assumes that +// PropertyValueTs is sorted and contains only valid properties. +// +// It also allows us to hide details of `properties` implementation from the +// code that uses/defines them (with the exception of ESIMD which is extremely +// hacky in its own esimd::properties piggybacking on these ones). +template +using properties_t = + properties>; template inline constexpr bool properties_are_unique = []() constexpr { @@ -130,9 +88,6 @@ constexpr bool properties_are_valid_for_ctad = []() constexpr { } }(); -template struct properties_type_list; -template struct invalid_properties_type_list {}; - template struct properties_sorter { // Not using "auto" due to MSVC bug in v19.36 and older. v19.37 and later is // able to compile "auto" just fine. See https://godbolt.org/z/eW3rjjs7n. @@ -182,8 +137,6 @@ template <> struct properties_sorter<> { } // namespace detail -template class __SYCL_EBO properties; - // Empty property list. template <> class __SYCL_EBO properties> { template @@ -241,20 +194,17 @@ class __SYCL_EBO } } } + + template static constexpr bool has_property() { + return false; + } }; -// NOTE: Meta-function to implement CTAD rules isn't allowed to return -// `properties` and it's impossible to return a pack as well. As -// such, we're forced to have an extra level of `detail::properties_type_list` -// for the purpose of providing CTAD rules. template class __SYCL_EBO properties> : private property_tys... { static_assert(detail::properties_are_sorted, "Properties must be sorted!"); - static_assert( - detail::NoConflictingProperties>::value, - "Conflicting properties in property list."); using property_tys::get_property_impl...; template friend class __SYCL_EBO properties; @@ -282,6 +232,9 @@ class __SYCL_EBO properties> detail::property_key_tag{})); public: + // Definition is out-of-class so that `properties` would be complete there and + // its interfaces could be used in `ConflictingProperties`' partial + // specializations. template < typename... unsorted_property_tys, typename = std::enable_if_t< @@ -291,8 +244,7 @@ class __SYCL_EBO properties> ...))>, typename = std::enable_if_t< detail::properties_are_unique>> - constexpr properties(unsorted_property_tys... props) - : unsorted_property_tys(props)... {} + constexpr properties(unsorted_property_tys... props); template static constexpr bool has_property() { return std::is_base_of_v, @@ -318,6 +270,17 @@ class __SYCL_EBO properties> } }; +template +template +constexpr properties>::properties( + unsorted_property_tys... props) + : unsorted_property_tys(props)... { + static_assert(((!detail::ConflictingProperties::value && + ...)), + "Conflicting properties in property list."); +} + // Deduction guides template -using properties_t = - properties>; +template