diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp index cc09932d58b7..1d6c8ffb57b8 100755 --- a/clang/lib/Frontend/InitPreprocessor.cpp +++ b/clang/lib/Frontend/InitPreprocessor.cpp @@ -1117,8 +1117,22 @@ static void InitializePredefinedMacros(const TargetInfo &TI, Builder.defineMacro("SYCL_EXTERNAL", "__attribute__((sycl_device))"); // Defines a macro that switches on SPIR intrinsics in SYCL runtime, used // by Xilinx FPGA devices for the moment - if (LangOpts.SYCLXOCCDevice) + if (LangOpts.SYCLXOCCDevice) { Builder.defineMacro("__SYCL_SPIR_DEVICE__"); + switch (TI.getTriple().getSubArch()) { + case llvm::Triple::FPGASubArch_sw_emu: + Builder.defineMacro("__SYCL_XILINX_SW_EMU_MODE__"); + break; + case llvm::Triple::FPGASubArch_hw_emu: + Builder.defineMacro("__SYCL_XILINX_HW_EMU_MODE__"); + break; + case llvm::Triple::FPGASubArch_hw: + Builder.defineMacro("__SYCL_XILINX_HW_MODE__"); + break; + default: + break; + } + } if (TI.getTriple().isNVPTX()) { Builder.defineMacro("__SYCL_NVPTX__", "1"); diff --git a/llvm/lib/SYCL/LowerSYCLMetaData.cpp b/llvm/lib/SYCL/LowerSYCLMetaData.cpp index 6b8f728a20a3..71d29b407671 100644 --- a/llvm/lib/SYCL/LowerSYCLMetaData.cpp +++ b/llvm/lib/SYCL/LowerSYCLMetaData.cpp @@ -94,11 +94,10 @@ struct LSMDState { ResultMD.push_back(MDNode::get( Ctx, {MDString::get(Ctx, "llvm.loop.pipeline.enable"), ConstantAsMetadata::get( - ConstantInt::get(Type::getInt32Ty(Ctx), 1)), + ConstantInt::get(Type::getInt32Ty(Ctx), -1)), ConstantAsMetadata::get( ConstantInt::getFalse(Type::getInt1Ty(Ctx))), - ConstantAsMetadata::get( - ConstantInt::get(Type::getInt8Ty(Ctx), -1))})); + })); MDNode *MDN = MDNode::getDistinct(Ctx, ResultMD); BB->getTerminator()->setMetadata(LLVMContext::MD_loop, MDN); BB->getTerminator() @@ -162,6 +161,9 @@ struct LowerSYCLMetaData : public ModulePass { bool runOnModule(Module &M) override { return LSMDState(M).run(); } + virtual StringRef getPassName() const override { + return "LowerSYCLMetaData"; + } }; } diff --git a/llvm/lib/SYCL/PrepareSYCLOpt.cpp b/llvm/lib/SYCL/PrepareSYCLOpt.cpp index 072629261592..518a113ec17b 100644 --- a/llvm/lib/SYCL/PrepareSYCLOpt.cpp +++ b/llvm/lib/SYCL/PrepareSYCLOpt.cpp @@ -15,6 +15,7 @@ #include #include +#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" @@ -48,6 +49,8 @@ struct PrepareSYCLOpt : public ModulePass { assert(F.use_empty()); continue; } + if (F.isIntrinsic()) + continue; F.setCallingConv(CallingConv::SPIR_FUNC); for (Value* V : F.users()) { if (auto* Call = dyn_cast(V)) @@ -72,14 +75,48 @@ struct PrepareSYCLOpt : public ModulePass { I->eraseFromParent(); } + /// This will change array partition such that after the O3 pipeline it + /// matched very closely what v++ generates. + /// This will change the type of the alloca referenced by the array partition + /// into an array. and change the argument received by xlx_array_partition + /// into a pointer on an array. + void lowerArrayPartition(Module &M) { + Function* Func = Intrinsic::getDeclaration(&M, Intrinsic::sideeffect); + for (Use& U : Func->uses()) { + auto* Usr = dyn_cast(U.getUser()); + if (!Usr) + continue; + if (!Usr->getOperandBundle("xlx_array_partition")) + continue; + Use& Ptr = U.getUser()->getOperandUse(0); + Value* Obj = getUnderlyingObject(Ptr); + if (!isa(Obj)) + return; + auto* Alloca = cast(Obj); + auto *Replacement = + new AllocaInst(Ptr->getType()->getPointerElementType(), 0, + ConstantInt::get(Type::getInt32Ty(M.getContext()), 1), + Align(128), ""); + Replacement->insertAfter(Alloca); + Instruction* Cast = BitCastInst::Create( + Instruction::BitCast, Replacement, Alloca->getType()); + Cast->insertAfter(Replacement); + Alloca->replaceAllUsesWith(Cast); + Value* Zero = ConstantInt::get(Type::getInt32Ty(M.getContext()), 0); + Instruction* GEP = GetElementPtrInst::Create(nullptr, Replacement, {Zero}); + GEP->insertAfter(Cast); + Ptr.set(GEP); + } + } + bool runOnModule(Module &M) override { turnNonKernelsIntoPrivate(M); setCallingConventions(M); + lowerArrayPartition(M); removeAnnotationsIntrisic(M); return true; } }; - } namespace llvm { diff --git a/sycl/doc/GettingStartedXilinxFPGA.md b/sycl/doc/GettingStartedXilinxFPGA.md index 1b1a99a98e89..14395364c768 100644 --- a/sycl/doc/GettingStartedXilinxFPGA.md +++ b/sycl/doc/GettingStartedXilinxFPGA.md @@ -472,6 +472,16 @@ sudo rmmod xocl sudo modprobe xocl ``` +## Xilinx Macros + +``__SYCL_XILINX_SW_EMU_MODE__`` will be defined when compiling device code in sw_emu mode + +``__SYCL_XILINX_HW_EMU_MODE__`` will be defined when compiling device code in hw_emu mode + +``__SYCL_XILINX_HW_MODE__`` will be defined when compiling device code in hw mode + +when compiling host code none of them will be defined. + ## Xilinx FPGA SYCL compiler architecture [Architecture of the Xilinx SYCL diff --git a/sycl/include/CL/sycl/xilinx/fpga/opt_decorate_func.hpp b/sycl/include/CL/sycl/xilinx/fpga/opt_decorate_func.hpp index bed968bdc311..c51c88e66292 100644 --- a/sycl/include/CL/sycl/xilinx/fpga/opt_decorate_func.hpp +++ b/sycl/include/CL/sycl/xilinx/fpga/opt_decorate_func.hpp @@ -57,7 +57,7 @@ void dataflow(T&& functor) { */ template __SYCL_DEVICE_ANNOTATE("xilinx_pipeline") -ALWAYS_INLINE void pipeline(T&& functor) { +__SYCL_ALWAYS_INLINE void pipeline(T&& functor) { /// the std::forward can make a difference when the operator() is l or r value /// specified. std::forward(functor)(); diff --git a/sycl/include/CL/sycl/xilinx/fpga/partition_array.hpp b/sycl/include/CL/sycl/xilinx/fpga/partition_array.hpp index d2bab320d628..983d0745c225 100644 --- a/sycl/include/CL/sycl/xilinx/fpga/partition_array.hpp +++ b/sycl/include/CL/sycl/xilinx/fpga/partition_array.hpp @@ -52,8 +52,11 @@ namespace partition { /// This fuction is currently empty but the LowerSYCLMetaData Pass will fill /// it with the required IR. template +#if defined(__SYCL_XILINX_HW_EMU_MODE__) || defined(__SYCL_XILINX_HW_MODE__) __SYCL_DEVICE_ANNOTATE("xilinx_partition_array") - ALWAYS_INLINE inline void xilinx_partition_array(Ptr, int, int, int) {} +#endif + __SYCL_ALWAYS_INLINE + inline void xilinx_partition_array(Ptr, int, int, int) {} /** Represent a cyclic partition.