diff --git a/clang b/clang index 82f205fe0ca..a529fd030a5 160000 --- a/clang +++ b/clang @@ -1 +1 @@ -Subproject commit 82f205fe0cad94c44d5ff594721fb83b2f570dd8 +Subproject commit a529fd030a56e926233e6f6a33f70555bfa4abcd diff --git a/compiler b/compiler index 3776c25f6a9..f4a98ef88d7 160000 --- a/compiler +++ b/compiler @@ -1 +1 @@ -Subproject commit 3776c25f6a942adc5c4e5d68a88f1e3d7cb51ef6 +Subproject commit f4a98ef88d739610e493bc35b761fd96371f7023 diff --git a/lib/clamp-device.in b/lib/clamp-device.in index aed0f38941a..a3e2673fe34 100755 --- a/lib/clamp-device.in +++ b/lib/clamp-device.in @@ -8,6 +8,13 @@ # enable bash debugging KMDBSCRIPT="${KMDBSCRIPT:=0}" +if [ $KMDBSCRIPT == "1" ] +then + set -x +fi +# directory where files are dumped +KMDUMPDIR="${KMDUMPDIR:=.}" + # dump the LLVM bitcode KMDUMPLLVM="${KMDUMPLLVM:=0}" @@ -24,11 +31,6 @@ KMOPTOPT="${KMOPTOPT:="-O3"}" # if not set, then "-O2" will be passed to llc KMOPTLLC="${KMOPTLLC:="-O2"}" -if [ $KMDBSCRIPT == "1" ] -then - set -x -fi - # check command line arguments if [ "$#" -lt 2 ]; then echo "Usage: $0 input_LLVM output_hsaco_kernel (--amdgpu-target=(GPU family name)" >&2 @@ -85,6 +87,16 @@ do AMDGPU_TARGET="${ARG#*=}" continue ;; + --dump-isa) + KMDUMPISA=1 + ;; + --dump-llvm) + KMDUMPLLVM=1 + ;; + --dump-dir=*) + KMDUMPDIR="${ARG#*=}" + continue + ;; esac done @@ -116,7 +128,7 @@ if [ $RETVAL != 0 ]; then fi if [ $KMDUMPLLVM == "1" ]; then - cp $2.promote.bc ./dump.promote.bc + cp $2.promote.bc ${KMDUMPDIR}/dump.promote.bc fi HCC_EXTRA_ARCH_FILE="" @@ -153,7 +165,7 @@ if [ $RETVAL != 0 ]; then fi if [ $KMDUMPLLVM == "1" ]; then - cp $2.linked.bc ./dump.linked.bc + cp $2.linked.bc ${KMDUMPDIR}/dump.linked.bc fi # Optimization notes: @@ -168,7 +180,7 @@ if [ $RETVAL != 0 ]; then fi if [ $KMDUMPLLVM == "1" ]; then - cp $2.opt.bc ./dump-$AMDGPU_TARGET.opt.bc + cp $2.opt.bc ${KMDUMPDIR}/dump-$AMDGPU_TARGET.opt.bc fi $LLC $KMOPTLLC -mtriple amdgcn--amdhsa -mcpu=$AMDGPU_TARGET -filetype=obj -o $2.isabin $2.opt.bc @@ -183,7 +195,7 @@ fi if [ $KMDUMPISA == "1" ]; then cp $2.isabin ./dump-$AMDGPU_TARGET.isabin $LLC $KMOPTLLC -mtriple amdgcn--amdhsa -mcpu=$AMDGPU_TARGET -filetype=asm -o $2.isa $2.opt.bc - mv $2.isa ./dump-$AMDGPU_TARGET.isa + mv $2.isa ${KMDUMPDIR}/dump-$AMDGPU_TARGET.isa fi $LLD -shared $2.isabin -o $2 @@ -196,7 +208,7 @@ if [ $RETVAL != 0 ]; then fi if [ $KMDUMPISA == "1" ]; then - cp $2 ./dump-$AMDGPU_TARGET.hsaco + cp $2 ${KMDUMPDIR}/dump-$AMDGPU_TARGET.hsaco fi # remove temp file diff --git a/lib/clamp-link.in b/lib/clamp-link.in index 088957d3f18..0ca497c3c07 100755 --- a/lib/clamp-link.in +++ b/lib/clamp-link.in @@ -112,14 +112,30 @@ done for ARG in "$@" do - ###################### - # Parse AMDGPU target - ###################### case $ARG in + ###################### + # Parse AMDGPU target + ###################### --amdgpu-target=*) AMDGPU_TARGET_ARRAY+=("${ARG#*=}") continue ;; + ################################################ + # Parse dump options and export them for called + # scripts e.g. clamp-device + ################################################ + -dump-isa) + export KMDUMPISA=1 + continue + ;; + -dump-llvm) + export KMDUMPLLVM=1 + continue + ;; + -dump-dir=*) + export KMDUMPDIR="${ARG#*=}" + continue + ;; esac ##################################### diff --git a/tests/Unit/AMDGPU/register-control.cpp b/tests/Unit/AMDGPU/register-control.cpp new file mode 100644 index 00000000000..cde79d7024e --- /dev/null +++ b/tests/Unit/AMDGPU/register-control.cpp @@ -0,0 +1,35 @@ +// RUN: %hc %s -o %t.out -Xlinker -dump-llvm -Xlinker -dump-dir=%T +// RUN: %llvm-dis %T/dump*.opt.bc +// RUN: cat %T/dump*.opt.ll| %FileCheck %s +// RUN: %t.out + +#include +#include + +#define GRID_SIZE (1024) + +int main() { + using namespace hc; + array table(GRID_SIZE); + extent<1> ex(GRID_SIZE); + // CHECK-LABEL: define amdgpu_kernel void @"_ZZ4mainEN3$_019__cxxamp_trampolineEPjii" + // CHECK-SAME:({{[^)]*}}){{[^#]*}}#[[ATTR0:[0-9]+]] + // CHECK: attributes #[[ATTR0]] = {{{.*}}"amdgpu-flat-work-group-size"="1,10" "amdgpu-max-work-group-dim"="10,1,1" "amdgpu-waves-per-eu"="5,6" + auto k = [&](index<1>& idx) [[hc]] + [[hc_waves_per_eu(5,6)]] + [[hc_flat_workgroup_size(1,10)]] + [[hc_max_workgroup_dim(10,1,1)]]{ + table(idx) = idx[0]; + }; + parallel_for_each(ex, k ).wait(); + + // verify result + bool ret = true; + std::vector result = table; + for (int i = 0; i < GRID_SIZE; ++i) { + ret &= (result[i] == i); + } + + return !(ret == true); +} + diff --git a/tests/Unit/GridLaunch/register-control.cpp b/tests/Unit/GridLaunch/register-control.cpp new file mode 100644 index 00000000000..a2f7ebf1cd1 --- /dev/null +++ b/tests/Unit/GridLaunch/register-control.cpp @@ -0,0 +1,63 @@ +// RUN: %hc -lhc_am %s -o %t.out -Xlinker -dump-llvm -Xlinker -dump-dir=%T +// RUN: %llvm-dis %T/dump*.opt.bc +// RUN: cat %T/dump*.opt.ll | %FileCheck %s +// RUN: %t.out + +#include "grid_launch.hpp" +#include "hc_am.hpp" +#include "hc.hpp" +#include + +#define GRID_SIZE 256 +#define TILE_SIZE 16 + +const int SIZE = GRID_SIZE*TILE_SIZE; + +// CHECK-LABEL: define amdgpu_kernel void @_ZN12_GLOBAL__N_138_Z7kernel116grid_launch_parmPi_functor19__cxxamp_trampolineEiiiiiiPi +// CHECK-SAME:({{[^)]*}}){{[^#]*}}#[[ATTR0:[0-9]+]] +// CHECK: attributes #[[ATTR0]] = {{{.*}}"amdgpu-flat-work-group-size"="1,10" "amdgpu-max-work-group-dim"="10,1,1" "amdgpu-waves-per-eu"="5,6" + +__attribute__((hc_grid_launch)) void kernel1(grid_launch_parm lp, int *x) +[[hc_waves_per_eu(5,6)]] +[[hc_flat_workgroup_size(1,10)]] +[[hc_max_workgroup_dim(10,1,1)]] +{ + int i = hc_get_workitem_id(0) + hc_get_group_id(0)*lp.group_dim.x; + + x[i] = i; +} + +int main(void) { + + int *data1 = (int *)malloc(SIZE*sizeof(int)); + + auto acc = hc::accelerator(); + int* data1_d = (int*)hc::am_alloc(SIZE*sizeof(int), acc, 0); + + grid_launch_parm lp; + grid_launch_init(&lp); + + lp.grid_dim.x = GRID_SIZE; + lp.group_dim.x = TILE_SIZE; + + hc::completion_future cf; + lp.cf = &cf; + kernel1(lp, data1_d); + lp.cf->wait(); + + static hc::accelerator_view av = acc.get_default_view(); + av.copy(data1_d, data1, SIZE*sizeof(int)); + + bool ret = 0; + for(int i = 0; i < SIZE; ++i) { + if(data1[i] != i) { + ret = 1; + break; + } + } + + hc::am_free(data1_d); + free(data1); + + return ret; +}