Skip to content

Commit

Permalink
OpenMP 4 patches, atomic version
Browse files Browse the repository at this point in the history
  • Loading branch information
reguly authored and bozbez committed Dec 11, 2024
1 parent 81636ff commit 10243db
Show file tree
Hide file tree
Showing 10 changed files with 131 additions and 62 deletions.
4 changes: 4 additions & 0 deletions makefiles/compilers/c/clang.mk
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,7 @@ CONFIG_CC := clang
CONFIG_CXX := clang++

CONFIG_CXXLINK ?= -lc++

CONFIG_CPP_HAS_OMP_OFFLOAD ?= true
OMP_OFFLOAD_CXXFLAGS = -target x86_64-pc-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx90a
CONFIG_OMP_OFFLOAD_CXXFLAGS = -target x86_64-pc-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx90a
4 changes: 3 additions & 1 deletion makefiles/compilers/c/cray.mk
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,6 @@ CONFIG_OMP_CPPFLAGS ?= -fopenmp
CONFIG_CPP_HAS_OMP ?= true

# CONFIG_OMP_OFFLOAD_CPPFLAGS ?=
CONFIG_CPP_HAS_OMP_OFFLOAD ?= false
CONFIG_CPP_HAS_OMP_OFFLOAD ?= true
OMP_OFFLOAD_CXXFLAGS = -fopenmp
CONFIG_OMP_OFFLOAD_CXXFLAGS = -fopenmp
4 changes: 2 additions & 2 deletions op2/include/op_cuda_rt_support.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,9 +84,9 @@ void cutilDeviceInit_mpi(int argc, char **argv, int mpi_rank);
* routines to move arrays to/from GPU device
*/

void op_mvHostToDevice(void **map, int size);
void op_mvHostToDevice(void **map, size_t size);

void op_cpHostToDevice(void **data_d, void **data_h, int size);
void op_cpHostToDevice(void **data_d, void **data_h, size_t size);

void op_cuda_get_data(op_dat dat);

Expand Down
4 changes: 2 additions & 2 deletions op2/include/op_hip_rt_support.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,9 +82,9 @@ void cutilDeviceInit_mpi(int argc, char **argv, int mpi_rank);
* routines to move arrays to/from GPU device
*/

void op_mvHostToDevice(void **map, int size);
void op_mvHostToDevice(void **map, size_t size);

void op_cpHostToDevice(void **data_d, void **data_h, int size);
void op_cpHostToDevice(void **data_d, void **data_h, size_t size);

void op_cuda_get_data(op_dat dat);

Expand Down
30 changes: 15 additions & 15 deletions op2/src/hip/op_hip_decl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -113,20 +113,20 @@ op_dat op_decl_dat_char(op_set set, int dim, char const *type, int size,
if (strstr(type, ":soa") != NULL || (OP_auto_soa && dim > 1)) {
char *temp_data = (char *)malloc(dat->size * set_size * sizeof(char));
int element_size = dat->size / dat->dim;
for (int i = 0; i < dat->dim; i++) {
for (int j = 0; j < set_size; j++) {
for (int c = 0; c < element_size; c++) {
for (size_t i = 0; i < dat->dim; i++) {
for (size_t j = 0; j < set_size; j++) {
for (size_t c = 0; c < element_size; c++) {
temp_data[element_size * i * set_size + element_size * j + c] =
data[dat->size * j + element_size * i + c];
}
}
}
op_cpHostToDevice((void **)&(dat->data_d), (void **)&(temp_data),
dat->size * set_size);
(size_t)dat->size * set_size);
free(temp_data);
} else {
op_cpHostToDevice((void **)&(dat->data_d), (void **)&(dat->data),
dat->size * set_size);
(size_t)dat->size * set_size);
}

return dat;
Expand All @@ -142,7 +142,7 @@ op_dat op_decl_dat_temp_char(op_set set, int dim, char const *type, int size,
dat->user_managed = 0;

op_cpHostToDevice((void **)&(dat->data_d), (void **)&(dat->data),
dat->size * set->size);
(size_t)dat->size * set->size);

return dat;
}
Expand All @@ -162,14 +162,14 @@ op_map op_decl_map(op_set from, op_set to, int dim, int *imap,
char const *name) {
op_map map = op_decl_map_core(from, to, dim, imap, name);
int set_size = map->from->size + map->from->exec_size;
int *temp_map = (int *)malloc(map->dim * set_size * sizeof(int));
for (int i = 0; i < map->dim; i++) {
for (int j = 0; j < set_size; j++) {
int *temp_map = (int *)malloc((size_t)map->dim * set_size * sizeof(int));
for (size_t i = 0; i < map->dim; i++) {
for (size_t j = 0; j < set_size; j++) {
temp_map[i * set_size + j] = map->map[map->dim * j + i];
}
}
op_cpHostToDevice((void **)&(map->map_d), (void **)&(temp_map),
map->dim * set_size * sizeof(int));
(size_t)map->dim * set_size * sizeof(int));
free(temp_map);
return map;
}
Expand Down Expand Up @@ -304,20 +304,20 @@ void op_upload_all() {
if (strstr(dat->type, ":soa") != NULL || (OP_auto_soa && dat->dim > 1)) {
char *temp_data = (char *)malloc(dat->size * set_size * sizeof(char));
int element_size = dat->size / dat->dim;
for (int i = 0; i < dat->dim; i++) {
for (int j = 0; j < set_size; j++) {
for (int c = 0; c < element_size; c++) {
for (size_t i = 0; i < dat->dim; i++) {
for (size_t j = 0; j < set_size; j++) {
for (size_t c = 0; c < element_size; c++) {
temp_data[element_size * i * set_size + element_size * j + c] =
dat->data[dat->size * j + element_size * i + c];
}
}
}
cutilSafeCall(hipMemcpy(dat->data_d, temp_data, dat->size * set_size,
cutilSafeCall(hipMemcpy(dat->data_d, temp_data, (size_t)dat->size * set_size,
hipMemcpyHostToDevice));
dat->dirty_hd = 0;
free(temp_data);
} else {
cutilSafeCall(hipMemcpy(dat->data_d, dat->data, dat->size * set_size,
cutilSafeCall(hipMemcpy(dat->data_d, dat->data, (size_t)dat->size * set_size,
hipMemcpyHostToDevice));
dat->dirty_hd = 0;
}
Expand Down
4 changes: 2 additions & 2 deletions op2/src/hip/op_hip_rt_support.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ void __cutilCheckMsg(const char *errorMessage, const char *file,
// routines to move arrays to/from GPU device
//

void op_mvHostToDevice(void **map, int size) {
void op_mvHostToDevice(void **map, size_t size) {
if (!OP_hybrid_gpu || size == 0)
return;
void *tmp;
Expand All @@ -104,7 +104,7 @@ void op_mvHostToDevice(void **map, int size) {
*map = tmp;
}

void op_cpHostToDevice(void **data_d, void **data_h, int size) {
void op_cpHostToDevice(void **data_d, void **data_h, size_t size) {
if (!OP_hybrid_gpu)
return;
if (*data_d != NULL) cutilSafeCall(hipFree(*data_d));
Expand Down
4 changes: 2 additions & 2 deletions op2/src/mpi/op_mpi_hip_decl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,7 @@ void op_mv_halo_list_device() {

op_cpHostToDevice((void **)&(export_exec_list_d[set->index]),
(void **)&(OP_export_exec_list[set->index]->list),
OP_export_exec_list[set->index]->size * sizeof(int));
(size_t)OP_export_exec_list[set->index]->size * sizeof(int));
}

if (export_nonexec_list_d != NULL) {
Expand All @@ -302,7 +302,7 @@ void op_mv_halo_list_device() {

op_cpHostToDevice((void **)&(export_nonexec_list_d[set->index]),
(void **)&(OP_export_nonexec_list[set->index]->list),
OP_export_nonexec_list[set->index]->size * sizeof(int));
(size_t)OP_export_nonexec_list[set->index]->size * sizeof(int));
}

//for grouped, we need the disps array on device too
Expand Down
6 changes: 3 additions & 3 deletions op2/src/mpi/op_mpi_part_core.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1083,7 +1083,7 @@ static void migrate_all(int my_rank, int comm_size) {
//MPI_Isend(sbuf[i], (size_t)dat->size/sizeof(double) * exp->sizes[i], MPI_DOUBLE, exp->ranks[i],
// d, OP_PART_WORLD, &request_send[i]);
if ((size_t)dat->size * exp->sizes[i] > (size_t)INT_MAX) printf("Integer overflow at %s: %d\n",__FILE__,__LINE__);
MPI_Isend(sbuf[i], (size_t)dat->size * exp->sizes[i], MPI_CHAR, exp->ranks[i],
MPI_Isend(sbuf[i], (size_t)dat->size/8 * exp->sizes[i], MPI_DOUBLE, exp->ranks[i],
d, OP_PART_WORLD, &request_send[i]);
}

Expand All @@ -1096,8 +1096,8 @@ static void migrate_all(int my_rank, int comm_size) {
// MPI_DOUBLE, imp->ranks[i], d, OP_PART_WORLD,
// MPI_STATUS_IGNORE);
if ((size_t)dat->size * imp->sizes[i] > (size_t)INT_MAX) printf("Integer overflow at %s: %d\n",__FILE__,__LINE__);
MPI_Recv(&rbuf[(size_t)imp->disps[i] * (size_t)dat->size], (size_t)dat->size * imp->sizes[i],
MPI_CHAR, imp->ranks[i], d, OP_PART_WORLD,
MPI_Recv(&rbuf[(size_t)imp->disps[i] * (size_t)dat->size], (size_t)dat->size/8 * imp->sizes[i],
MPI_DOUBLE, imp->ranks[i], d, OP_PART_WORLD,
MPI_STATUS_IGNORE);
}

Expand Down
11 changes: 11 additions & 0 deletions op2/src/openmp4/op_openmp4_rt_support.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,17 @@ void op_mpi_wait_all_cuda(int nargs, op_arg *args) {
(void)args;
}

int op_mpi_halo_exchanges_grouped(op_set set, int nargs, op_arg *args, int device){
(void)device;
return device == 1 ? op_mpi_halo_exchanges(set, nargs, args) : op_mpi_halo_exchanges_cuda(set, nargs, args);
}

void op_mpi_wait_all_grouped(int nargs, op_arg *args, int device) {
(void)device;
(void)nargs;
(void)args;
}

void op_mpi_reset_halos(int nargs, op_arg *args) {
(void)nargs;
(void)args;
Expand Down
Loading

0 comments on commit 10243db

Please sign in to comment.