From f92a2bd535bef520b07df7b2e79e01e64dfdf791 Mon Sep 17 00:00:00 2001 From: Gauthier Harnisch Date: Wed, 30 Sep 2020 08:29:21 -0700 Subject: [PATCH] [SYCL] Fix edge_detection and add it to the lit test-suite --- llvm/lib/SYCL/InSPIRation.cpp | 73 ------------------ sycl/include/CL/sycl/detail/defines.hpp | 12 +++ .../CL/sycl/xilinx/fpga/opt_decorate_func.hpp | 8 +- .../CL/sycl/xilinx/fpga/partition_array.hpp | 27 ++++--- sycl/test/lit.cfg.py | 30 +++++-- .../{disabled => }/edge_detection/README.rst | 0 .../edge_detection/data/input/CF000221.jpg | Bin .../edge_detection/data/input/eiffel.bmp | Bin .../edge_detection/data/input/lola.bmp | Bin .../edge_detection/data/input/vase.bmp | Bin .../edge_detection/data/output/lola.bmp | Bin .../edge_detection/edge_detection.cpp | 23 ++++-- .../edge_detection_with_webcam.cpp | 4 +- sycl/tools/sycl-xocc/bin/sycl-xocc | 2 +- 14 files changed, 74 insertions(+), 105 deletions(-) rename sycl/test/xocc_tests/{disabled => }/edge_detection/README.rst (100%) rename sycl/test/xocc_tests/{disabled => }/edge_detection/data/input/CF000221.jpg (100%) rename sycl/test/xocc_tests/{disabled => }/edge_detection/data/input/eiffel.bmp (100%) rename sycl/test/xocc_tests/{disabled => }/edge_detection/data/input/lola.bmp (100%) rename sycl/test/xocc_tests/{disabled => }/edge_detection/data/input/vase.bmp (100%) rename sycl/test/xocc_tests/{disabled => }/edge_detection/data/output/lola.bmp (100%) rename sycl/test/xocc_tests/{disabled => }/edge_detection/edge_detection.cpp (86%) rename sycl/test/xocc_tests/{disabled => }/edge_detection/edge_detection_with_webcam.cpp (96%) diff --git a/llvm/lib/SYCL/InSPIRation.cpp b/llvm/lib/SYCL/InSPIRation.cpp index 499e23b8f3f6..8954b2b2a354 100644 --- a/llvm/lib/SYCL/InSPIRation.cpp +++ b/llvm/lib/SYCL/InSPIRation.cpp @@ -348,78 +348,6 @@ struct InSPIRation : public ModulePass { } } - /// This currently looks through the arguments passed to the SSDM intrinsic - /// call and checks the instruction to see if it is an address space cast to - /// a generic, if it is, it will take the concrete segment of the cast and - /// replace the operand with it. It looks like the below example: - /// - /// Before: - /// %10 = getelementptr inbounds %"struct._ZTSN2cl4sycl6xilinx15partition_ - /// arrayIcLm9ENS1_9partition8completeILm0EEEEE.cl::sycl::xilinx::partition_ - /// array", %"struct._ZTSN2cl4sycl6xilinx15partition_arrayIcLm9ENS1_9 - /// partition8completeILm0EEEEE.cl::sycl::xilinx::partition_array"* %1, - /// i64 0, i32 0, i64 0 - /// %11 = addrspacecast i8* %10 to i8 addrspace(4)* - /// call spir_func void (...) @_ssdm_SpecArrayPartition(i8 addrspace(4)* %11, - /// i64 0, i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i64 0, - /// i64 0), i32 0, i8* getelementptr inbounds ([1 x i8], [1 x i8]* @.str.2, - /// i64 0, i64 0)) #4 - /// - /// After: - /// %10 = getelementptr inbounds %"struct._ZTSN2cl4sycl6xilinx15partition_ - /// arrayIcLm9ENS1_9partition8completeILm0EEEEE.cl::sycl::xilinx::partition - /// _array", %"struct._ZTSN2cl4sycl6xilinx15partition_arrayIcLm9ENS1_9 - /// partition8completeILm0EEEEE.cl::sycl::xilinx::partition_array"* %1, - /// i64 0, i32 0, i64 0 - /// call spir_func void (...) @_ssdm_SpecArrayPartition(i8* %10, i64 0, - /// i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i64 0, i64 0), - /// i32 0, i8* getelementptr inbounds ([1 x i8], [1 x i8]* @.str.2, i64 0, - /// i64 0)) #4 - /// - /// It's simply collapsing away the cast right now, this doesn't consider the - /// possibility of interactions with other possible address space casts that - /// depend on it (we simply replace all uses with the non-geneirc variant). - /// For now we hope that these are erased or eliminated by either of the AS - /// Fixer passes or DCE passes. - /// - /// Note: Unsure how robust this is with the small sample size we currently - /// have to test against. So it's a WIP. If the situation becomes untenable - /// we can likely revert the accessor class back to it's prior form by - /// reverting this pull: https://github.com/intel/llvm/pull/348 (commit: - /// 609999c4e1aeca05aff010ce5e2eb08dde08fd69). This may cause address space - /// leakage however, but should result in more overall address space - /// consistency/stability due to the addition of more concrete address spaces. - void handleSpecArrayPartition(CallInst *CI) { - for (Use &Op : CI->operands()) { - if (Op->getType()->isPointerTy()) { - if (auto *ASC = dyn_cast(Op)) { - if (ASC->getDestAddressSpace() == /*Generic AS*/ 4) { - Op.set(nullptr); - Op.set(ASC->getPointerOperand()); - } - } - } - } - } - - /// SSDM intrinsics are black boxes, the InferAddressSpace pass will not touch - /// them (this is in part due to the fact it doesn't deal with Calls and in - /// part because SSDMs are declared with no implementation and no arguments), - /// this will result in left over generic casts. This function is here - /// to deal with the cases of the left over generics caused by SSDM intrinsics - /// as if they're left in the compilation will fail. - /// - /// In the future if we ever define an LLVM target backend similar to AMDGPU - /// and we end up with a lot of these edge cases we could move this to the - /// InferAddressSpaces pass and teach it to deal with these SSDM calls as - /// Intrinsics. - void ssdmAddressSpaceFix(Function &F) { - for (auto &I : instructions(F)) - if (auto *Call = dyn_cast(&I)) - if (Call->getIntrinsicID() == Intrinsic::sideeffect) - handleSpecArrayPartition(Call); - } - // Hopeful list/probably impractical asks for xocc: // 1) Make XML generator/reader a little kinder towards arguments with no // names if possible @@ -486,7 +414,6 @@ struct InSPIRation : public ModulePass { // It doesn't require application to the SPIR intrinsics as we're // linking against the HLS SPIR library, which is already conformant. giveNameToArguments(F); - ssdmAddressSpaceFix(F); } else if (isTransitiveNonIntrinsicFunc(F) && F.isDeclaration()) { // push back intrinsics to make sure we handle naming after changing the diff --git a/sycl/include/CL/sycl/detail/defines.hpp b/sycl/include/CL/sycl/detail/defines.hpp index a34b53e9bfe2..3205ff506fca 100644 --- a/sycl/include/CL/sycl/detail/defines.hpp +++ b/sycl/include/CL/sycl/detail/defines.hpp @@ -63,3 +63,15 @@ #else #define __SYCL_INLINE_CONSTEXPR static constexpr #endif + +#if defined(__SYCL_DEVICE_ONLY__) +#define __SYCL_DEVICE_ANNOTATE(...) __attribute__((annotate(__VA_ARGS__))) +#else +#define __SYCL_DEVICE_ANNOTATE(...) +#endif + +#if defined(__SYCL_DEVICE_ONLY__) +#define __SYCL_DEVICE_ADDRSPACE(AS) __attribute__((address_space(AS))) +#else +#define __SYCL_DEVICE_ADDRSPACE(AS) +#endif diff --git a/sycl/include/CL/sycl/xilinx/fpga/opt_decorate_func.hpp b/sycl/include/CL/sycl/xilinx/fpga/opt_decorate_func.hpp index 67e98928ebfe..e3954ed22aaa 100644 --- a/sycl/include/CL/sycl/xilinx/fpga/opt_decorate_func.hpp +++ b/sycl/include/CL/sycl/xilinx/fpga/opt_decorate_func.hpp @@ -18,12 +18,6 @@ #include "CL/sycl/xilinx/fpga/ssdm_inst.hpp" #include "CL/sycl/detail/defines.hpp" -#if defined(__SYCL_DEVICE_ONLY__) -#define SYCL_DEVICE_ANNOTATE(...) __attribute__((annotate(__VA_ARGS__))) -#else -#define SYCL_DEVICE_ANNOTATE(...) -#endif - __SYCL_INLINE_NAMESPACE(cl) { namespace sycl::xilinx { @@ -59,7 +53,7 @@ void dataflow(T functor) { pipeline way. */ template -SYCL_DEVICE_ANNOTATE("xilinx_pipeline") __attribute__((always_inline)) +__SYCL_DEVICE_ANNOTATE("xilinx_pipeline") __attribute__((always_inline)) void pipeline(T functor) { functor(); } diff --git a/sycl/include/CL/sycl/xilinx/fpga/partition_array.hpp b/sycl/include/CL/sycl/xilinx/fpga/partition_array.hpp index b94d5a8c4eaf..c2553300271c 100644 --- a/sycl/include/CL/sycl/xilinx/fpga/partition_array.hpp +++ b/sycl/include/CL/sycl/xilinx/fpga/partition_array.hpp @@ -51,8 +51,9 @@ namespace partition { /// This fuction is currently empty but the LowerSYCLMetaData Pass will fill /// it with the required IR. - SYCL_DEVICE_ANNOTATE("xilinx_partition_array") __attribute__((always_inline)) - inline void xilinx_partition_array(void*, int, int, int) {} + template + __SYCL_DEVICE_ANNOTATE("xilinx_partition_array") __attribute__((always_inline)) + inline void xilinx_partition_array(Ptr, int, int, int) {} /** Represent a cyclic partition. @@ -130,6 +131,7 @@ namespace partition { */ template struct complete { + static_assert(PDim >= 1, "array can't 0 dimension"); static constexpr auto partition_dim = PDim; static constexpr auto partition_type = type::complete; }; @@ -197,16 +199,21 @@ struct partition_array { // Add the intrinsic according expressing to the target compiler the // partitioning to use if constexpr (partition_type == partition::type::cyclic) - partition::xilinx_partition_array(&(*this)[0], partition_type, - PartitionType::physical_mem_num, - PartitionType::partition_dim); + partition::xilinx_partition_array( + (ValueType __SYCL_DEVICE_ADDRSPACE(0 /*stack*/)(*)[Size])(&elems) & + elems, + partition_type, PartitionType::physical_mem_num, + PartitionType::partition_dim); if constexpr (partition_type == partition::type::block) - partition::xilinx_partition_array(&(*this)[0], partition_type, - PartitionType::ele_in_each_physical_mem, - PartitionType::partition_dim); + partition::xilinx_partition_array( + (ValueType __SYCL_DEVICE_ADDRSPACE(0 /*stack*/)(*)[Size])(&elems) & + elems, + partition_type, PartitionType::ele_in_each_physical_mem, + PartitionType::partition_dim); if constexpr (partition_type == partition::type::complete) - partition::xilinx_partition_array(&(*this)[0], partition_type, 0, - PartitionType::partition_dim); + partition::xilinx_partition_array( + (ValueType __SYCL_DEVICE_ADDRSPACE(0 /*stack*/)(*)[Size])(&elems), + partition_type, 0, PartitionType::partition_dim); } /// A constructor from some container diff --git a/sycl/test/lit.cfg.py b/sycl/test/lit.cfg.py index 8b0970ecc3e2..aca4635755df 100644 --- a/sycl/test/lit.cfg.py +++ b/sycl/test/lit.cfg.py @@ -32,6 +32,8 @@ # test_source_root: The root path where tests are located. config.test_source_root = os.path.dirname(__file__) +timeout=600 + # test_exec_root: The root path where tests should be run. config.test_exec_root = os.path.join(config.sycl_obj_root, 'test') @@ -80,7 +82,6 @@ config.substitutions.append( ('%sycl_be', backend) ) xocc=lit_config.params.get('XOCC', "off") -lit_config.note("XOCC={}".format(xocc)) get_device_count_by_type_path = os.path.join(config.llvm_tools_dir, "get_device_count_by_type") @@ -248,6 +249,13 @@ def getDeviceCount(device_type): required_env = ['HOME', 'USER', 'XILINX_XRT', 'XILINX_SDX', 'XILINX_PLATFORM', 'EMCONFIG_PATH', 'LIBRARY_PATH'] has_error=False config.available_features.add("xocc") + config.available_features.add(xocc_target) + pkg_opencv4 = subprocess.run(["pkg-config", "--libs", "--cflags", "opencv4"], stdout=subprocess.PIPE) + has_opencv4 = not pkg_opencv4.returncode + lit_config.note("has opencv4: {}".format(has_opencv4)) + if has_opencv4: + config.available_features.add("opencv4") + config.substitutions.append( ('%opencv4_flags', pkg_opencv4.stdout.decode('utf-8')[:-1]) ) for env in required_env: if env not in os.environ: lit_config.note("missing environnement variable: {}".format(env)) @@ -258,13 +266,25 @@ def getDeviceCount(device_type): if xocc == "only": config.name = 'SYCL-XOCC' config.test_source_root = config.test_source_root + "/xocc_tests" + run_if_hw="echo" + run_if_hw_emu="echo" + run_if_sw_emu="echo" + if xocc_target == "hw": + timeout = 10800 # 3h + run_if_hw="" + if xocc_target == "hw_emu": + timeout = 1800 # 30min + run_if_hw_emu="" + if xocc_target == "sw_emu": + run_if_sw_emu="" + config.substitutions.append( ('%run_if_hw', run_if_hw) ) + config.substitutions.append( ('%run_if_hw_emu', run_if_hw_emu) ) + config.substitutions.append( ('%run_if_sw_emu', run_if_sw_emu) ) + # Set timeout for test = 10 mins try: import psutil - lit_config.maxIndividualTestTime = 600 - if "XCL_EMULATION_MODE" in os.environ: - if os.environ["XCL_EMULATION_MODE"] == "hw": - lit_config.maxIndividualTestTime = 10800 # 3h + lit_config.maxIndividualTestTime = timeout except ImportError: pass diff --git a/sycl/test/xocc_tests/disabled/edge_detection/README.rst b/sycl/test/xocc_tests/edge_detection/README.rst similarity index 100% rename from sycl/test/xocc_tests/disabled/edge_detection/README.rst rename to sycl/test/xocc_tests/edge_detection/README.rst diff --git a/sycl/test/xocc_tests/disabled/edge_detection/data/input/CF000221.jpg b/sycl/test/xocc_tests/edge_detection/data/input/CF000221.jpg similarity index 100% rename from sycl/test/xocc_tests/disabled/edge_detection/data/input/CF000221.jpg rename to sycl/test/xocc_tests/edge_detection/data/input/CF000221.jpg diff --git a/sycl/test/xocc_tests/disabled/edge_detection/data/input/eiffel.bmp b/sycl/test/xocc_tests/edge_detection/data/input/eiffel.bmp similarity index 100% rename from sycl/test/xocc_tests/disabled/edge_detection/data/input/eiffel.bmp rename to sycl/test/xocc_tests/edge_detection/data/input/eiffel.bmp diff --git a/sycl/test/xocc_tests/disabled/edge_detection/data/input/lola.bmp b/sycl/test/xocc_tests/edge_detection/data/input/lola.bmp similarity index 100% rename from sycl/test/xocc_tests/disabled/edge_detection/data/input/lola.bmp rename to sycl/test/xocc_tests/edge_detection/data/input/lola.bmp diff --git a/sycl/test/xocc_tests/disabled/edge_detection/data/input/vase.bmp b/sycl/test/xocc_tests/edge_detection/data/input/vase.bmp similarity index 100% rename from sycl/test/xocc_tests/disabled/edge_detection/data/input/vase.bmp rename to sycl/test/xocc_tests/edge_detection/data/input/vase.bmp diff --git a/sycl/test/xocc_tests/disabled/edge_detection/data/output/lola.bmp b/sycl/test/xocc_tests/edge_detection/data/output/lola.bmp similarity index 100% rename from sycl/test/xocc_tests/disabled/edge_detection/data/output/lola.bmp rename to sycl/test/xocc_tests/edge_detection/data/output/lola.bmp diff --git a/sycl/test/xocc_tests/disabled/edge_detection/edge_detection.cpp b/sycl/test/xocc_tests/edge_detection/edge_detection.cpp similarity index 86% rename from sycl/test/xocc_tests/disabled/edge_detection/edge_detection.cpp rename to sycl/test/xocc_tests/edge_detection/edge_detection.cpp index dad6353fcea6..ce255f2a9a01 100644 --- a/sycl/test/xocc_tests/disabled/edge_detection/edge_detection.cpp +++ b/sycl/test/xocc_tests/edge_detection/edge_detection.cpp @@ -1,4 +1,12 @@ -// RUN: true +// REQUIRES: xocc && opencv4 + +// RUN: %clangxx -std=c++20 -fsycl -fsycl-targets=%sycl_triple -o %t.out %s %opencv4_flags +// RUN: %run_if_hw %ACC_RUN_PLACEHOLDER %t.out %S/data/input/eiffel.bmp +// RUN: %run_if_hw %ACC_RUN_PLACEHOLDER %t.out %S/data/input/lola.bmp +// RUN: %run_if_hw %ACC_RUN_PLACEHOLDER %t.out %S/data/input/vase.bmp +// RUN: %run_if_sw_emu %ACC_RUN_PLACEHOLDER %t.out %S/data/input/eiffel.bmp +// RUN: %run_if_sw_emu %ACC_RUN_PLACEHOLDER %t.out %S/data/input/lola.bmp +// RUN: %run_if_sw_emu %ACC_RUN_PLACEHOLDER %t.out %S/data/input/vase.bmp /* Attempt at translating SDAccel Examples edge_detection example to SYCL @@ -29,7 +37,7 @@ #include #include -#include "../../utilities/device_selectors.hpp" +#include "../utilities/device_selectors.hpp" using namespace cl::sycl; @@ -50,9 +58,9 @@ int main(int argc, char* argv[]) { // using fixed constexpr values stays more true to the original implementation // however you can in theory just use input.rows/cols to support a wider range // of image sizes. - constexpr auto height = 1895; // input.rows; - constexpr auto width = 1024; // input.cols; - constexpr auto area = height * width; + const size_t height = input.rows; + const size_t width = input.cols; + const size_t area = height * width; selector_defines::CompiledForDeviceSelector selector; queue q {selector, property::queue::enable_profiling()}; @@ -71,6 +79,7 @@ int main(int argc, char* argv[]) { std::cout << "Max Energy = " << ceil(log2((long long)iMax * 2 * 3 * 3)) + 1 << " Bits \n"; std::cout << "Image Dimensions: " << input.size() << "\n"; + std::cout << "Used Size: " << height << "x" << width << " = " << area << "\n"; // mapping the enqueueTask call to a single_task, interested in seeing if a // parallel_for without a fixed 1-1-1 mapping is workable on an FPGA though.. @@ -92,10 +101,10 @@ int main(int argc, char* argv[]) { // to parallel_fors with local sizes [=]() { auto gX = xilinx::partition_array>({-1, 0, 1, -2, 0, 2, -1, 0, 1}); + xilinx::partition::complete<1>>({-1, 0, 1, -2, 0, 2, -1, 0, 1}); auto gY = xilinx::partition_array>({1, 2, 1, 0, 0, 0, -1, -2, -1}); + xilinx::partition::complete<1>>({1, 2, 1, 0, 0, 0, -1, -2, -1}); int magX, magY, gI, pIndex; diff --git a/sycl/test/xocc_tests/disabled/edge_detection/edge_detection_with_webcam.cpp b/sycl/test/xocc_tests/edge_detection/edge_detection_with_webcam.cpp similarity index 96% rename from sycl/test/xocc_tests/disabled/edge_detection/edge_detection_with_webcam.cpp rename to sycl/test/xocc_tests/edge_detection/edge_detection_with_webcam.cpp index 6408a20a7fea..c1e74add010d 100644 --- a/sycl/test/xocc_tests/disabled/edge_detection/edge_detection_with_webcam.cpp +++ b/sycl/test/xocc_tests/edge_detection/edge_detection_with_webcam.cpp @@ -66,10 +66,10 @@ int main(int argc, char* argv[]) { cgh.single_task>([=] { xilinx::partition_array - > gX + > gX { {-1, 0, 1, -2, 0, 2, -1, 0, 1} }; xilinx::partition_array - > gY + > gY { {1, 2, 1, 0, 0, 0, -1, -2, -1} }; for (size_t x = 1; x < width - 1; ++x) { diff --git a/sycl/tools/sycl-xocc/bin/sycl-xocc b/sycl/tools/sycl-xocc/bin/sycl-xocc index e1c56c67d491..9ff4edd2ef34 100755 --- a/sycl/tools/sycl-xocc/bin/sycl-xocc +++ b/sycl/tools/sycl-xocc/bin/sycl-xocc @@ -229,7 +229,7 @@ if [[ ! ${#KERNEL_NAME_ARRAY[@]} -eq 0 ]]; then --save-temps -o "$OUTPUT_FILE_NAME" $LINKER_LIST $@ cd $CWD - cp -r $LINKER_WD/* . + cp -rf $LINKER_WD/* . # This step is unconnected to the generation of the final xcl binary, it dumps # useful debug information from the final generated xcl binary into the temp