Skip to content

Commit

Permalink
Merge branch 'hcc-register-control' of https://github.com/yxsamliu/hcc
Browse files Browse the repository at this point in the history
…into yxsamliu-hcc-register-control

Conflicts:
	compiler
  • Loading branch information
whchung committed Feb 10, 2017
2 parents 887b9b4 + 1d65b8a commit f1281a7
Show file tree
Hide file tree
Showing 6 changed files with 141 additions and 15 deletions.
2 changes: 1 addition & 1 deletion clang
Submodule clang updated from 82f205 to a529fd
2 changes: 1 addition & 1 deletion compiler
32 changes: 22 additions & 10 deletions lib/clamp-device.in
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,13 @@
# enable bash debugging
KMDBSCRIPT="${KMDBSCRIPT:=0}"

if [ $KMDBSCRIPT == "1" ]
then
set -x
fi
# directory where files are dumped
KMDUMPDIR="${KMDUMPDIR:=.}"

# dump the LLVM bitcode
KMDUMPLLVM="${KMDUMPLLVM:=0}"

Expand All @@ -24,11 +31,6 @@ KMOPTOPT="${KMOPTOPT:="-O3"}"
# if not set, then "-O2" will be passed to llc
KMOPTLLC="${KMOPTLLC:="-O2"}"

if [ $KMDBSCRIPT == "1" ]
then
set -x
fi

# check command line arguments
if [ "$#" -lt 2 ]; then
echo "Usage: $0 input_LLVM output_hsaco_kernel (--amdgpu-target=(GPU family name)" >&2
Expand Down Expand Up @@ -85,6 +87,16 @@ do
AMDGPU_TARGET="${ARG#*=}"
continue
;;
--dump-isa)
KMDUMPISA=1
;;
--dump-llvm)
KMDUMPLLVM=1
;;
--dump-dir=*)
KMDUMPDIR="${ARG#*=}"
continue
;;
esac
done

Expand Down Expand Up @@ -116,7 +128,7 @@ if [ $RETVAL != 0 ]; then
fi

if [ $KMDUMPLLVM == "1" ]; then
cp $2.promote.bc ./dump.promote.bc
cp $2.promote.bc ${KMDUMPDIR}/dump.promote.bc
fi

HCC_EXTRA_ARCH_FILE=""
Expand Down Expand Up @@ -153,7 +165,7 @@ if [ $RETVAL != 0 ]; then
fi

if [ $KMDUMPLLVM == "1" ]; then
cp $2.linked.bc ./dump.linked.bc
cp $2.linked.bc ${KMDUMPDIR}/dump.linked.bc
fi

# Optimization notes:
Expand All @@ -168,7 +180,7 @@ if [ $RETVAL != 0 ]; then
fi

if [ $KMDUMPLLVM == "1" ]; then
cp $2.opt.bc ./dump-$AMDGPU_TARGET.opt.bc
cp $2.opt.bc ${KMDUMPDIR}/dump-$AMDGPU_TARGET.opt.bc
fi

$LLC $KMOPTLLC -mtriple amdgcn--amdhsa -mcpu=$AMDGPU_TARGET -filetype=obj -o $2.isabin $2.opt.bc
Expand All @@ -183,7 +195,7 @@ fi
if [ $KMDUMPISA == "1" ]; then
cp $2.isabin ./dump-$AMDGPU_TARGET.isabin
$LLC $KMOPTLLC -mtriple amdgcn--amdhsa -mcpu=$AMDGPU_TARGET -filetype=asm -o $2.isa $2.opt.bc
mv $2.isa ./dump-$AMDGPU_TARGET.isa
mv $2.isa ${KMDUMPDIR}/dump-$AMDGPU_TARGET.isa
fi

$LLD -shared $2.isabin -o $2
Expand All @@ -196,7 +208,7 @@ if [ $RETVAL != 0 ]; then
fi

if [ $KMDUMPISA == "1" ]; then
cp $2 ./dump-$AMDGPU_TARGET.hsaco
cp $2 ${KMDUMPDIR}/dump-$AMDGPU_TARGET.hsaco
fi

# remove temp file
Expand Down
22 changes: 19 additions & 3 deletions lib/clamp-link.in
Original file line number Diff line number Diff line change
Expand Up @@ -112,14 +112,30 @@ done
for ARG in "$@"
do

######################
# Parse AMDGPU target
######################
case $ARG in
######################
# Parse AMDGPU target
######################
--amdgpu-target=*)
AMDGPU_TARGET_ARRAY+=("${ARG#*=}")
continue
;;
################################################
# Parse dump options and export them for called
# scripts e.g. clamp-device
################################################
-dump-isa)
export KMDUMPISA=1
continue
;;
-dump-llvm)
export KMDUMPLLVM=1
continue
;;
-dump-dir=*)
export KMDUMPDIR="${ARG#*=}"
continue
;;
esac

#####################################
Expand Down
35 changes: 35 additions & 0 deletions tests/Unit/AMDGPU/register-control.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
// RUN: %hc %s -o %t.out -Xlinker -dump-llvm -Xlinker -dump-dir=%T
// RUN: %llvm-dis %T/dump*.opt.bc
// RUN: cat %T/dump*.opt.ll| %FileCheck %s
// RUN: %t.out

#include <hc.hpp>
#include <vector>

#define GRID_SIZE (1024)

int main() {
using namespace hc;
array<unsigned int, 1> table(GRID_SIZE);
extent<1> ex(GRID_SIZE);
// CHECK-LABEL: define amdgpu_kernel void @"_ZZ4mainEN3$_019__cxxamp_trampolineEPjii"
// CHECK-SAME:({{[^)]*}}){{[^#]*}}#[[ATTR0:[0-9]+]]
// CHECK: attributes #[[ATTR0]] = {{{.*}}"amdgpu-flat-work-group-size"="1,10" "amdgpu-max-work-group-dim"="10,1,1" "amdgpu-waves-per-eu"="5,6"
auto k = [&](index<1>& idx) [[hc]]
[[hc_waves_per_eu(5,6)]]
[[hc_flat_workgroup_size(1,10)]]
[[hc_max_workgroup_dim(10,1,1)]]{
table(idx) = idx[0];
};
parallel_for_each(ex, k ).wait();

// verify result
bool ret = true;
std::vector<unsigned int> result = table;
for (int i = 0; i < GRID_SIZE; ++i) {
ret &= (result[i] == i);
}

return !(ret == true);
}

63 changes: 63 additions & 0 deletions tests/Unit/GridLaunch/register-control.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
// RUN: %hc -lhc_am %s -o %t.out -Xlinker -dump-llvm -Xlinker -dump-dir=%T
// RUN: %llvm-dis %T/dump*.opt.bc
// RUN: cat %T/dump*.opt.ll | %FileCheck %s
// RUN: %t.out

#include "grid_launch.hpp"
#include "hc_am.hpp"
#include "hc.hpp"
#include <iostream>

#define GRID_SIZE 256
#define TILE_SIZE 16

const int SIZE = GRID_SIZE*TILE_SIZE;

// CHECK-LABEL: define amdgpu_kernel void @_ZN12_GLOBAL__N_138_Z7kernel116grid_launch_parmPi_functor19__cxxamp_trampolineEiiiiiiPi
// CHECK-SAME:({{[^)]*}}){{[^#]*}}#[[ATTR0:[0-9]+]]
// CHECK: attributes #[[ATTR0]] = {{{.*}}"amdgpu-flat-work-group-size"="1,10" "amdgpu-max-work-group-dim"="10,1,1" "amdgpu-waves-per-eu"="5,6"

__attribute__((hc_grid_launch)) void kernel1(grid_launch_parm lp, int *x)
[[hc_waves_per_eu(5,6)]]
[[hc_flat_workgroup_size(1,10)]]
[[hc_max_workgroup_dim(10,1,1)]]
{
int i = hc_get_workitem_id(0) + hc_get_group_id(0)*lp.group_dim.x;

x[i] = i;
}

int main(void) {

int *data1 = (int *)malloc(SIZE*sizeof(int));

auto acc = hc::accelerator();
int* data1_d = (int*)hc::am_alloc(SIZE*sizeof(int), acc, 0);

grid_launch_parm lp;
grid_launch_init(&lp);

lp.grid_dim.x = GRID_SIZE;
lp.group_dim.x = TILE_SIZE;

hc::completion_future cf;
lp.cf = &cf;
kernel1(lp, data1_d);
lp.cf->wait();

static hc::accelerator_view av = acc.get_default_view();
av.copy(data1_d, data1, SIZE*sizeof(int));

bool ret = 0;
for(int i = 0; i < SIZE; ++i) {
if(data1[i] != i) {
ret = 1;
break;
}
}

hc::am_free(data1_d);
free(data1);

return ret;
}

0 comments on commit f1281a7

Please sign in to comment.