Skip to content

Commit

Permalink
Implement work_group_static / work_group_scratch_memory (#15061)
Browse files Browse the repository at this point in the history
The patch partially implements `work_group_static` and update proposal.

Implemented:

- `work_group_static` to handle static allocation in kernel.
- `get_dynamic_work_group_memory` to handle runtime allocation, but only
on CUDA

`work_group_static` is implemented by exposing `SYCLScope(WorkGroup)`,
allowing the class to be decorated by the attribute and uses the same
mechanism during lowering to place the variable in local memory.

`get_dynamic_work_group_memory` uses a new builtin function,
`__sycl_dynamicLocalMemoryPlaceholder `, which is lowered into
referencing a 0 sized array GV when targeting NVPTX. The approach for
SPIR will need to differ from this lowering.

UR change oneapi-src/unified-runtime#1968,
oneapi-src/unified-runtime#2403

---------

Signed-off-by: Lukas Sommer <[email protected]>
Signed-off-by: Victor Lomuller <[email protected]>
Co-authored-by: Lukas Sommer <[email protected]>
Co-authored-by: Atharva Dubey <[email protected]>
Co-authored-by: Marcos Maronas <[email protected]>
Co-authored-by: Callum Fare <[email protected]>
Co-authored-by: Martin Morrison-Grant <[email protected]>
  • Loading branch information
6 people authored and KornevNikita committed Feb 24, 2025
1 parent 141b054 commit 54d534f
Show file tree
Hide file tree
Showing 65 changed files with 1,809 additions and 381 deletions.
10 changes: 5 additions & 5 deletions clang/include/clang/Basic/Attr.td
Original file line number Diff line number Diff line change
Expand Up @@ -1667,12 +1667,12 @@ def SYCLIntelESimdVectorize : InheritableAttr {
}

def SYCLScope : Attr {
// No spelling, as this attribute can't be created in the source code.
let Spellings = [];
let Spellings = [CXX11<"__sycl_detail__", "wg_scope">];
let Args = [EnumArgument<"level", "Level", /*is_string=*/false,
["work_group", "work_item"],
["WorkGroup", "WorkItem"]>];
let Subjects = SubjectList<[Function, Var]>;
["WorkGroup", "WorkItem"],
/*optional=*/true>];
let Subjects = SubjectList<[Function, Var, CXXRecord]>;
let LangOpts = [SYCLIsDevice];

let AdditionalMembers = [{
Expand All @@ -1685,7 +1685,7 @@ def SYCLScope : Attr {
}
}];

let Documentation = [InternalOnly];
let Documentation = [SYCLWGScopeDocs];
}

def SYCLDeviceIndirectlyCallable : InheritableAttr {
Expand Down
12 changes: 12 additions & 0 deletions clang/include/clang/Basic/AttrDocs.td
Original file line number Diff line number Diff line change
Expand Up @@ -4097,6 +4097,18 @@ function pointer for the specified function.
}];
}

def SYCLWGScopeDocs : Documentation {
let Category = DocCatFunction;
let Heading = "__sycl_detail__::wg_scope";
let Content = [{
This attribute can only be applied to records with a trivial default constructor and destructor.
Types with this attribute cannot be used for non-static data members.
It indicates that any block and namespace scope variable of a type holding this attribute
will be allocated in local memory. For variables allocated in block scope, they behave
as implicitly declared as static.
}];
}

def SYCLDeviceDocs : Documentation {
let Category = DocCatFunction;
let Heading = "sycl_device";
Expand Down
5 changes: 5 additions & 0 deletions clang/include/clang/Basic/DiagnosticSemaKinds.td
Original file line number Diff line number Diff line change
Expand Up @@ -12547,6 +12547,11 @@ def err_sycl_external_global : Error<
def warn_sycl_kernel_too_big_args : Warning<
"size of kernel arguments (%0 bytes) may exceed the supported maximum "
"of %1 bytes on some devices">, InGroup<SyclStrict>, ShowInSystemHeader;
def err_sycl_wg_scope : Error<
"SYCL work group scope only applies to class with a trivial "
"%select{default constructor|destructor}0">;
def err_sycl_field_with_wg_scope : Error<
"non-static data member is of a type with a SYCL work group scope attribute applied to it">;
def err_sycl_virtual_types : Error<
"no class with a vtable can be used in a SYCL kernel or any code included in the kernel">;
def note_sycl_recursive_function_declared_here: Note<"function implemented using recursion declared here">;
Expand Down
2 changes: 2 additions & 0 deletions clang/include/clang/Sema/SemaSYCL.h
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,7 @@ class SemaSYCL : public SemaBase {

void CheckSYCLKernelCall(FunctionDecl *CallerFunc,
ArrayRef<const Expr *> Args);
void CheckSYCLScopeAttr(CXXRecordDecl *Decl);

/// Creates a SemaDiagnosticBuilder that emits the diagnostic if the current
/// context is "used as device code".
Expand Down Expand Up @@ -478,6 +479,7 @@ class SemaSYCL : public SemaBase {
const ParsedAttr &AL);
void handleSYCLIntelMaxWorkGroupsPerMultiprocessor(Decl *D,
const ParsedAttr &AL);
void handleSYCLScopeAttr(Decl *D, const ParsedAttr &AL);

void checkSYCLAddIRAttributesFunctionAttrConflicts(Decl *D);

Expand Down
6 changes: 5 additions & 1 deletion clang/lib/CodeGen/CGDecl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,11 @@ void CodeGenFunction::EmitVarDecl(const VarDecl &D) {
if (D.getType().getAddressSpace() == LangAS::opencl_local)
return CGM.getOpenCLRuntime().EmitWorkGroupLocalVarDecl(*this, D);

if (D.getAttr<SYCLScopeAttr>() && D.getAttr<SYCLScopeAttr>()->isWorkGroup())
SYCLScopeAttr *ScopeAttr = D.getAttr<SYCLScopeAttr>();
if (!ScopeAttr)
if (auto *RD = D.getType()->getAsCXXRecordDecl())
ScopeAttr = RD->getAttr<SYCLScopeAttr>();
if (ScopeAttr && ScopeAttr->isWorkGroup())
return CGM.getSYCLRuntime().emitWorkGroupLocalVarDecl(*this, D);

assert(D.hasLocalStorage());
Expand Down
5 changes: 4 additions & 1 deletion clang/lib/CodeGen/CGSYCLRuntime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,10 @@ void CGSYCLRuntime::emitWorkGroupLocalVarDecl(CodeGenFunction &CGF,
const VarDecl &D) {
#ifndef NDEBUG
SYCLScopeAttr *Scope = D.getAttr<SYCLScopeAttr>();
assert(Scope && Scope->isWorkGroup() && "work group scope expected");
if (!Scope)
if (auto *RD = D.getType()->getAsCXXRecordDecl())
Scope = RD->getAttr<SYCLScopeAttr>();
assert((Scope && Scope->isWorkGroup()) && "work group scope expected");
#endif // NDEBUG
// generate global variable in the address space selected by the clang CodeGen
// (should be local)
Expand Down
3 changes: 3 additions & 0 deletions clang/lib/CodeGen/CodeGenModule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5789,6 +5789,9 @@ LangAS CodeGenModule::GetGlobalVarAddressSpace(const VarDecl *D) {

if (LangOpts.SYCLIsDevice && D) {
auto *Scope = D->getAttr<SYCLScopeAttr>();
if (!Scope)
if (auto *RD = D->getType()->getAsCXXRecordDecl())
Scope = RD->getAttr<SYCLScopeAttr>();
if (Scope && Scope->isWorkGroup())
return LangAS::sycl_local;
}
Expand Down
4 changes: 3 additions & 1 deletion clang/lib/Sema/Sema.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1849,7 +1849,9 @@ class DeferredDiagnosticsEmitter
if (!S.SYCL().checkAllowedSYCLInitializer(VD) &&
!S.SYCL()
.isTypeDecoratedWithDeclAttribute<
SYCLGlobalVariableAllowedAttr>(VD->getType())) {
SYCLGlobalVariableAllowedAttr>(VD->getType()) &&
!S.SYCL().isTypeDecoratedWithDeclAttribute<SYCLScopeAttr>(
VD->getType())) {
S.Diag(Loc, diag::err_sycl_restrict)
<< SemaSYCL::KernelConstStaticVariable;
return;
Expand Down
10 changes: 10 additions & 0 deletions clang/lib/Sema/SemaDecl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7870,6 +7870,8 @@ NamedDecl *Sema::ActOnVariableDeclarator(
// attribute.
if (SCSpec == DeclSpec::SCS_static && !R.isConstant(Context) &&
!SYCL().isTypeDecoratedWithDeclAttribute<SYCLGlobalVariableAllowedAttr>(
NewVD->getType()) &&
!SYCL().isTypeDecoratedWithDeclAttribute<SYCLScopeAttr>(
NewVD->getType()))
SYCL().DiagIfDeviceCode(D.getIdentifierLoc(), diag::err_sycl_restrict)
<< SemaSYCL::KernelNonConstStaticDataVariable;
Expand Down Expand Up @@ -18551,6 +18553,14 @@ FieldDecl *Sema::CheckFieldDecl(DeclarationName Name, QualType T,
InvalidDecl = true;
}

if (LangOpts.SYCLIsDevice) {
const CXXRecordDecl *RD = T->getAsCXXRecordDecl();
if (RD && RD->hasAttr<SYCLScopeAttr>()) {
Diag(Loc, diag::err_sycl_field_with_wg_scope);
InvalidDecl = true;
}
}

if (LangOpts.OpenCL) {
// OpenCL v1.2 s6.9b,r & OpenCL v2.0 s6.12.5 - The following types cannot be
// used as structure or union field: image, sampler, event or block types.
Expand Down
3 changes: 3 additions & 0 deletions clang/lib/Sema/SemaDeclAttr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6705,6 +6705,9 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL,
case ParsedAttr::AT_SYCLDevice:
S.SYCL().handleSYCLDeviceAttr(D, AL);
break;
case ParsedAttr::AT_SYCLScope:
S.SYCL().handleSYCLScopeAttr(D, AL);
break;
case ParsedAttr::AT_SYCLDeviceIndirectlyCallable:
S.SYCL().handleSYCLDeviceIndirectlyCallableAttr(D, AL);
break;
Expand Down
3 changes: 3 additions & 0 deletions clang/lib/Sema/SemaDeclCXX.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7218,6 +7218,9 @@ void Sema::CheckCompletedCXXClass(Scope *S, CXXRecordDecl *Record) {
else if (Record->hasAttr<CUDADeviceBuiltinTextureTypeAttr>())
checkCUDADeviceBuiltinTextureClassTemplate(*this, Record);
}
if (getLangOpts().SYCLIsDevice && Record->hasAttr<SYCLScopeAttr>()) {
SYCL().CheckSYCLScopeAttr(Record);
}
}

/// Look up the special member function that would be called by a special
Expand Down
8 changes: 6 additions & 2 deletions clang/lib/Sema/SemaExpr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -235,15 +235,19 @@ bool Sema::DiagnoseUseOfDecl(NamedDecl *D, ArrayRef<SourceLocation> Locs,
VD->getStorageClass() == SC_Static &&
!VD->hasAttr<SYCLGlobalVarAttr>() &&
!SemaSYCL::isTypeDecoratedWithDeclAttribute<
SYCLGlobalVariableAllowedAttr>(VD->getType()))
SYCLGlobalVariableAllowedAttr>(VD->getType()) &&
!SemaSYCL::isTypeDecoratedWithDeclAttribute<SYCLScopeAttr>(
VD->getType()))
SYCL().DiagIfDeviceCode(*Locs.begin(), diag::err_sycl_restrict)
<< SemaSYCL::KernelNonConstStaticDataVariable;
// Non-const globals are not allowed in SYCL except for ESIMD or with the
// SYCLGlobalVar or SYCLGlobalVariableAllowed attribute.
else if (IsRuntimeEvaluated && !IsEsimdPrivateGlobal && !IsConst &&
VD->hasGlobalStorage() && !VD->hasAttr<SYCLGlobalVarAttr>() &&
!SemaSYCL::isTypeDecoratedWithDeclAttribute<
SYCLGlobalVariableAllowedAttr>(VD->getType()))
SYCLGlobalVariableAllowedAttr>(VD->getType()) &&
!SemaSYCL::isTypeDecoratedWithDeclAttribute<SYCLScopeAttr>(
VD->getType()))
SYCL().DiagIfDeviceCode(*Locs.begin(), diag::err_sycl_restrict)
<< SemaSYCL::KernelGlobalVariable;
// ESIMD globals cannot be used in a SYCL context.
Expand Down
22 changes: 22 additions & 0 deletions clang/lib/Sema/SemaSYCL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5207,6 +5207,28 @@ void SemaSYCL::CheckSYCLKernelCall(FunctionDecl *KernelFunc,
KernelFunc->setInvalidDecl();
}

void SemaSYCL::CheckSYCLScopeAttr(CXXRecordDecl *Decl) {
assert(Decl->hasAttr<SYCLScopeAttr>());

bool HasError = false;

if (Decl->isDependentContext())
return;

// We don't emit both diags at the time as note will only be emitted for the
// first, which is confusing. So we check both cases but only report one.
if (!Decl->hasTrivialDefaultConstructor()) {
Diag(Decl->getLocation(), diag::err_sycl_wg_scope) << 0;
HasError = true;
} else if (!Decl->hasTrivialDestructor()) {
Diag(Decl->getLocation(), diag::err_sycl_wg_scope) << 1;
HasError = true;
}

if (HasError)
Decl->dropAttr<SYCLScopeAttr>();
}

// For a wrapped parallel_for, copy attributes from original
// kernel to wrapped kernel.
void SemaSYCL::copySYCLKernelAttrs(CXXMethodDecl *CallOperator) {
Expand Down
15 changes: 15 additions & 0 deletions clang/lib/Sema/SemaSYCLDeclAttr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3109,6 +3109,21 @@ void SemaSYCL::handleSYCLRegisterNumAttr(Decl *D, const ParsedAttr &AL) {
D->addAttr(::new (Context) SYCLRegisterNumAttr(Context, AL, RegNo));
}

void SemaSYCL::handleSYCLScopeAttr(Decl *D, const ParsedAttr &AL) {
if (!AL.checkExactlyNumArgs(SemaRef, 0))
return;
if (auto *CRD = dyn_cast<CXXRecordDecl>(D);
!CRD || !(CRD->isClass() || CRD->isStruct())) {
SemaRef.Diag(AL.getRange().getBegin(),
diag::err_attribute_wrong_decl_type_str)
<< AL << AL.isRegularKeywordAttribute() << "classes";
return;
}

D->addAttr(SYCLScopeAttr::Create(SemaRef.getASTContext(),
SYCLScopeAttr::Level::WorkGroup, AL));
}

void SemaSYCL::checkSYCLAddIRAttributesFunctionAttrConflicts(Decl *D) {
const auto *AddIRFuncAttr = D->getAttr<SYCLAddIRAttributesFunctionAttr>();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,7 @@
// CHECK-NEXT: SYCLIntelSchedulerTargetFmaxMhz (SubjectMatchRule_function)
// CHECK-NEXT: SYCLIntelUseStallEnableClusters (SubjectMatchRule_function)
// CHECK-NEXT: SYCLRegisterNum (SubjectMatchRule_variable_is_global)
// CHECK-NEXT: SYCLScope (SubjectMatchRule_function, SubjectMatchRule_variable, SubjectMatchRule_record)
// CHECK-NEXT: SYCLSimd (SubjectMatchRule_function, SubjectMatchRule_variable_is_global)
// CHECK-NEXT: SYCLSpecialClass (SubjectMatchRule_record)
// CHECK-NEXT: SYCLType (SubjectMatchRule_record, SubjectMatchRule_enum)
Expand Down
94 changes: 94 additions & 0 deletions clang/test/SemaSYCL/sycl_wg_scope.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
// Verify the use of wg_scope is correctly diagnosed.
// RUN: %clang_cc1 -fsycl-is-device -verify %s

class [[__sycl_detail__::wg_scope]] G1 {};
class [[__sycl_detail__::wg_scope]] G2 {
G2() = default;
G2(int i) : i(i) {}
int i;
};

class [[__sycl_detail__::wg_scope]] G3 {
~G3() = default;
};

class [[__sycl_detail__::wg_scope]] B4 { // expected-error {{SYCL work group scope only applies to class with a trivial default constructor}}
B4() {}
};

class [[__sycl_detail__::wg_scope]] B5 { // expected-error {{SYCL work group scope only applies to class with a trivial destructor}}
~B5() {}
};

class [[__sycl_detail__::wg_scope]] B6 { // expected-error {{SYCL work group scope only applies to class with a trivial default constructor}}
B6() {}
~B6() {}
};

template <typename T> class [[__sycl_detail__::wg_scope]] B7 { // #B7
public:
T obj;
};

struct Valid {};
struct InvalidCtor {
InvalidCtor() {}
};
struct InvalidDtor {
~InvalidDtor() {}
};
struct InvalidCDtor {
InvalidCDtor() {}
~InvalidCDtor() {}
};

B7<Valid> b7;
// expected-error@#B7 {{SYCL work group scope only applies to class with a trivial default constructor}}
// expected-note@+1 {{in instantiation of template class 'B7<InvalidCtor>' requested here}}
B7<InvalidCtor> b9;
// expected-error@#B7 {{SYCL work group scope only applies to class with a trivial destructor}}
// expected-note@+1 {{in instantiation of template class 'B7<InvalidDtor>' requested here}}
B7<InvalidDtor> b10;
// expected-error@#B7 {{SYCL work group scope only applies to class with a trivial default constructor}}
// expected-note@+1 {{in instantiation of template class 'B7<InvalidCDtor>' requested here}}
B7<InvalidCDtor> b11;

template <typename T> class [[__sycl_detail__::wg_scope]] B12 { // #B12
public:
B12() = default;
~B12() = default;
T obj;
};

B12<Valid> b12;
// expected-error@#B12 {{SYCL work group scope only applies to class with a trivial default constructor}}
// expected-note@+1 {{in instantiation of template class 'B12<InvalidCtor>' requested here}}
B12<InvalidCtor> b13;

class B14 {
G1 field; // expected-error {{non-static data member is of a type with a SYCL work group scope attribute applied to it}}
};

template <typename T> class B15 {
T field; // #B15-field
};

// expected-error@#B15-field {{non-static data member is of a type with a SYCL work group scope attribute applied to it}}
// expected-note@+1 {{in instantiation of template class 'B15<G1>' requested here}}
B15<G1> b15;

G1 g16;
static G1 g17;

struct Wrap {
static G1 g18;
};

__attribute__((sycl_device)) void ref_func() {
G1 g19;
static G1 g20;

(void)g16;
(void)g17;
(void)Wrap::g18;
}
5 changes: 5 additions & 0 deletions llvm/include/llvm/SYCLLowerIR/LowerWGLocalMemory.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,11 @@ class SYCLLowerWGLocalMemoryPass
ModulePass *createSYCLLowerWGLocalMemoryLegacyPass();
void initializeSYCLLowerWGLocalMemoryLegacyPass(PassRegistry &);

namespace sycl {
std::vector<std::pair<StringRef, int>>
getKernelNamesUsingImplicitLocalMem(const Module &M);
}

} // namespace llvm

#endif // LLVM_SYCLLOWERIR_LOWERWGLOCALMEMORY_H
1 change: 1 addition & 0 deletions llvm/include/llvm/Support/PropertySetIO.h
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,7 @@ class PropertySetRegistry {
static constexpr char SYCL_DEVICE_REQUIREMENTS[] = "SYCL/device requirements";
static constexpr char SYCL_HOST_PIPES[] = "SYCL/host pipes";
static constexpr char SYCL_VIRTUAL_FUNCTIONS[] = "SYCL/virtual functions";
static constexpr char SYCL_IMPLICIT_LOCAL_ARG[] = "SYCL/implicit local arg";

/// Function for bulk addition of an entire property set in the given
/// \p Category .
Expand Down
8 changes: 8 additions & 0 deletions llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include "llvm/SYCLLowerIR/CompileTimePropertiesPass.h"
#include "llvm/SYCLLowerIR/DeviceGlobals.h"
#include "llvm/SYCLLowerIR/HostPipes.h"
#include "llvm/SYCLLowerIR/LowerWGLocalMemory.h"
#include "llvm/SYCLLowerIR/ModuleSplitter.h"
#include "llvm/SYCLLowerIR/SYCLDeviceLibReqMask.h"
#include "llvm/SYCLLowerIR/SYCLKernelParamOptInfo.h"
Expand Down Expand Up @@ -388,6 +389,13 @@ PropSetRegTy computeModuleProperties(const Module &M,
for (const StringRef &FName : FuncNames)
PropSet.add(PropSetRegTy::SYCL_ASSERT_USED, FName, true);
}
{
std::vector<std::pair<StringRef, int>> ArgPos =
getKernelNamesUsingImplicitLocalMem(M);
for (const auto &FuncAndArgPos : ArgPos)
PropSet.add(PropSetRegTy::SYCL_IMPLICIT_LOCAL_ARG, FuncAndArgPos.first,
FuncAndArgPos.second);
}

{
if (isModuleUsingAsan(M))
Expand Down
Loading

0 comments on commit 54d534f

Please sign in to comment.