Skip to content

Commit

Permalink
Merge pull request #123 from jamtrott/jamtrott/pddrive_check_superlu_…
Browse files Browse the repository at this point in the history
…acc_offload

Skip cublas initialisation when GPU offloading is disabled at runtime (i.e., SUPERLU_ACC_OFFLOAD=0)
  • Loading branch information
xiaoyeli authored Oct 21, 2022
2 parents 98695a4 + 0520fc1 commit 324d65f
Show file tree
Hide file tree
Showing 6 changed files with 201 additions and 168 deletions.
62 changes: 34 additions & 28 deletions EXAMPLE/pddrive.c
Original file line number Diff line number Diff line change
Expand Up @@ -155,26 +155,29 @@ int main(int argc, char *argv[])
SUPERLU_FREE(usermap);

#ifdef GPU_ACC
/* Binding each MPI to a GPU device */
char *ttemp;
ttemp = getenv ("SUPERLU_BIND_MPI_GPU");

if (ttemp) {
int devs, rank;
MPI_Comm_rank(MPI_COMM_WORLD, &rank); // MPI_COMM_WORLD needs to be used here instead of SubComm
gpuGetDeviceCount(&devs); // Returns the number of compute-capable devices
gpuSetDevice(rank % devs); // Set device to be used for GPU executions
}
int superlu_acc_offload = get_acc_offload();
if (superlu_acc_offload) {
/* Binding each MPI to a GPU device */
char *ttemp;
ttemp = getenv ("SUPERLU_BIND_MPI_GPU");

if (ttemp) {
int devs, rank;
MPI_Comm_rank(MPI_COMM_WORLD, &rank); // MPI_COMM_WORLD needs to be used here instead of SubComm
gpuGetDeviceCount(&devs); // Returns the number of compute-capable devices
gpuSetDevice(rank % devs); // Set device to be used for GPU executions
}

// This is to initialize GPU, which can be costly.
double t1 = SuperLU_timer_();
gpuFree(0);
double t2 = SuperLU_timer_();
if(!myrank)printf("first gpufree time: %7.4f\n",t2-t1);
gpublasHandle_t hb;
gpublasCreate(&hb);
if(!myrank)printf("first blas create time: %7.4f\n",SuperLU_timer_()-t2);
gpublasDestroy(hb);
// This is to initialize GPU, which can be costly.
double t1 = SuperLU_timer_();
gpuFree(0);
double t2 = SuperLU_timer_();
if(!myrank)printf("first gpufree time: %7.4f\n",t2-t1);
gpublasHandle_t hb;
gpublasCreate(&hb);
if(!myrank)printf("first blas create time: %7.4f\n",SuperLU_timer_()-t2);
gpublasDestroy(hb);
}
#endif
// printf("grid.iam %5d, myrank %5d\n",grid.iam,myrank);
// fflush(stdout);
Expand All @@ -186,15 +189,18 @@ int main(int argc, char *argv[])
superlu_gridinit(MPI_COMM_WORLD, nprow, npcol, &grid);

#ifdef GPU_ACC
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
double t1 = SuperLU_timer_();
gpuFree(0);
double t2 = SuperLU_timer_();
if(!myrank)printf("first gpufree time: %7.4f\n",t2-t1);
gpublasHandle_t hb;
gpublasCreate(&hb);
if(!myrank)printf("first blas create time: %7.4f\n",SuperLU_timer_()-t2);
gpublasDestroy(hb);
int superlu_acc_offload = get_acc_offload();
if (superlu_acc_offload) {
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
double t1 = SuperLU_timer_();
gpuFree(0);
double t2 = SuperLU_timer_();
if(!myrank)printf("first gpufree time: %7.4f\n",t2-t1);
gpublasHandle_t hb;
gpublasCreate(&hb);
if(!myrank)printf("first blas create time: %7.4f\n",SuperLU_timer_()-t2);
gpublasDestroy(hb);
}
#endif
}

Expand Down
61 changes: 33 additions & 28 deletions EXAMPLE/pddrive3d.c
Original file line number Diff line number Diff line change
Expand Up @@ -222,26 +222,28 @@ main (int argc, char *argv[])
SUPERLU_FREE(usermap);

#ifdef GPU_ACC
/* Binding each MPI to a GPU device */
char *ttemp;
ttemp = getenv ("SUPERLU_BIND_MPI_GPU");

if (ttemp) {
int devs, rank;
MPI_Comm_rank(MPI_COMM_WORLD, &rank); // MPI_COMM_WORLD needs to be used here instead of SubComm
gpuGetDeviceCount(&devs); // Returns the number of compute-capable devices
gpuSetDevice(rank % devs); // Set device to be used for GPU executions
int superlu_acc_offload = get_acc_offload();
if (superlu_acc_offload) {
/* Binding each MPI to a GPU device */
char *ttemp;
ttemp = getenv ("SUPERLU_BIND_MPI_GPU");

if (ttemp) {
int devs, rank;
MPI_Comm_rank(MPI_COMM_WORLD, &rank); // MPI_COMM_WORLD needs to be used here instead of SubComm
gpuGetDeviceCount(&devs); // Returns the number of compute-capable devices
gpuSetDevice(rank % devs); // Set device to be used for GPU executions
}
// This is to initialize GPU, which can be costly.
double t1 = SuperLU_timer_();
gpuFree(0);
double t2 = SuperLU_timer_();
if(!myrank)printf("first gpufree time: %7.4f\n",t2-t1);
gpublasHandle_t hb;
gpublasCreate(&hb);
if(!myrank)printf("first blas create time: %7.4f\n",SuperLU_timer_()-t2);
gpublasDestroy(hb);
}
// This is to initialize GPU, which can be costly.
double t1 = SuperLU_timer_();
gpuFree(0);
double t2 = SuperLU_timer_();
if(!myrank)printf("first gpufree time: %7.4f\n",t2-t1);
gpublasHandle_t hb;
gpublasCreate(&hb);
if(!myrank)printf("first blas create time: %7.4f\n",SuperLU_timer_()-t2);
gpublasDestroy(hb);

#endif

// printf("grid.iam %5d, myrank %5d\n",grid.iam,myrank);
Expand All @@ -253,15 +255,18 @@ main (int argc, char *argv[])
------------------------------------------------------------ */
superlu_gridinit3d (MPI_COMM_WORLD, nprow, npcol, npdep, &grid);
#ifdef GPU_ACC
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
double t1 = SuperLU_timer_();
gpuFree(0);
double t2 = SuperLU_timer_();
if(!myrank)printf("first gpufree time: %7.4f\n",t2-t1);
gpublasHandle_t hb;
gpublasCreate(&hb);
if(!myrank)printf("first blas create time: %7.4f\n",SuperLU_timer_()-t2);
gpublasDestroy(hb);
int superlu_acc_offload = get_acc_offload();
if (superlu_acc_offload) {
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
double t1 = SuperLU_timer_();
gpuFree(0);
double t2 = SuperLU_timer_();
if(!myrank)printf("first gpufree time: %7.4f\n",t2-t1);
gpublasHandle_t hb;
gpublasCreate(&hb);
if(!myrank)printf("first blas create time: %7.4f\n",SuperLU_timer_()-t2);
gpublasDestroy(hb);
}
#endif
}

Expand Down
62 changes: 34 additions & 28 deletions EXAMPLE/psdrive.c
Original file line number Diff line number Diff line change
Expand Up @@ -155,26 +155,29 @@ int main(int argc, char *argv[])
SUPERLU_FREE(usermap);

#ifdef GPU_ACC
/* Binding each MPI to a GPU device */
char *ttemp;
ttemp = getenv ("SUPERLU_BIND_MPI_GPU");

if (ttemp) {
int devs, rank;
MPI_Comm_rank(MPI_COMM_WORLD, &rank); // MPI_COMM_WORLD needs to be used here instead of SubComm
gpuGetDeviceCount(&devs); // Returns the number of compute-capable devices
gpuSetDevice(rank % devs); // Set device to be used for GPU executions
}
int superlu_acc_offload = get_acc_offload();
if (superlu_acc_offload) {
/* Binding each MPI to a GPU device */
char *ttemp;
ttemp = getenv ("SUPERLU_BIND_MPI_GPU");

if (ttemp) {
int devs, rank;
MPI_Comm_rank(MPI_COMM_WORLD, &rank); // MPI_COMM_WORLD needs to be used here instead of SubComm
gpuGetDeviceCount(&devs); // Returns the number of compute-capable devices
gpuSetDevice(rank % devs); // Set device to be used for GPU executions
}

// This is to initialize GPU, which can be costly.
double t1 = SuperLU_timer_();
gpuFree(0);
double t2 = SuperLU_timer_();
if(!myrank)printf("first gpufree time: %7.4f\n",t2-t1);
gpublasHandle_t hb;
gpublasCreate(&hb);
if(!myrank)printf("first blas create time: %7.4f\n",SuperLU_timer_()-t2);
gpublasDestroy(hb);
// This is to initialize GPU, which can be costly.
double t1 = SuperLU_timer_();
gpuFree(0);
double t2 = SuperLU_timer_();
if(!myrank)printf("first gpufree time: %7.4f\n",t2-t1);
gpublasHandle_t hb;
gpublasCreate(&hb);
if(!myrank)printf("first blas create time: %7.4f\n",SuperLU_timer_()-t2);
gpublasDestroy(hb);
}
#endif
// printf("grid.iam %5d, myrank %5d\n",grid.iam,myrank);
// fflush(stdout);
Expand All @@ -186,15 +189,18 @@ int main(int argc, char *argv[])
superlu_gridinit(MPI_COMM_WORLD, nprow, npcol, &grid);

#ifdef GPU_ACC
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
double t1 = SuperLU_timer_();
gpuFree(0);
double t2 = SuperLU_timer_();
if(!myrank)printf("first gpufree time: %7.4f\n",t2-t1);
gpublasHandle_t hb;
gpublasCreate(&hb);
if(!myrank)printf("first blas create time: %7.4f\n",SuperLU_timer_()-t2);
gpublasDestroy(hb);
int superlu_acc_offload = get_acc_offload();
if (superlu_acc_offload) {
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
double t1 = SuperLU_timer_();
gpuFree(0);
double t2 = SuperLU_timer_();
if(!myrank)printf("first gpufree time: %7.4f\n",t2-t1);
gpublasHandle_t hb;
gpublasCreate(&hb);
if(!myrank)printf("first blas create time: %7.4f\n",SuperLU_timer_()-t2);
gpublasDestroy(hb);
}
#endif
}

Expand Down
61 changes: 33 additions & 28 deletions EXAMPLE/psdrive3d.c
Original file line number Diff line number Diff line change
Expand Up @@ -222,26 +222,28 @@ main (int argc, char *argv[])
SUPERLU_FREE(usermap);

#ifdef GPU_ACC
/* Binding each MPI to a GPU device */
char *ttemp;
ttemp = getenv ("SUPERLU_BIND_MPI_GPU");

if (ttemp) {
int devs, rank;
MPI_Comm_rank(MPI_COMM_WORLD, &rank); // MPI_COMM_WORLD needs to be used here instead of SubComm
gpuGetDeviceCount(&devs); // Returns the number of compute-capable devices
gpuSetDevice(rank % devs); // Set device to be used for GPU executions
int superlu_acc_offload = get_acc_offload();
if (superlu_acc_offload) {
/* Binding each MPI to a GPU device */
char *ttemp;
ttemp = getenv ("SUPERLU_BIND_MPI_GPU");

if (ttemp) {
int devs, rank;
MPI_Comm_rank(MPI_COMM_WORLD, &rank); // MPI_COMM_WORLD needs to be used here instead of SubComm
gpuGetDeviceCount(&devs); // Returns the number of compute-capable devices
gpuSetDevice(rank % devs); // Set device to be used for GPU executions
}
// This is to initialize GPU, which can be costly.
double t1 = SuperLU_timer_();
gpuFree(0);
double t2 = SuperLU_timer_();
if(!myrank)printf("first gpufree time: %7.4f\n",t2-t1);
gpublasHandle_t hb;
gpublasCreate(&hb);
if(!myrank)printf("first blas create time: %7.4f\n",SuperLU_timer_()-t2);
gpublasDestroy(hb);
}
// This is to initialize GPU, which can be costly.
double t1 = SuperLU_timer_();
gpuFree(0);
double t2 = SuperLU_timer_();
if(!myrank)printf("first gpufree time: %7.4f\n",t2-t1);
gpublasHandle_t hb;
gpublasCreate(&hb);
if(!myrank)printf("first blas create time: %7.4f\n",SuperLU_timer_()-t2);
gpublasDestroy(hb);

#endif

// printf("grid.iam %5d, myrank %5d\n",grid.iam,myrank);
Expand All @@ -253,15 +255,18 @@ main (int argc, char *argv[])
------------------------------------------------------------ */
superlu_gridinit3d (MPI_COMM_WORLD, nprow, npcol, npdep, &grid);
#ifdef GPU_ACC
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
double t1 = SuperLU_timer_();
gpuFree(0);
double t2 = SuperLU_timer_();
if(!myrank)printf("first gpufree time: %7.4f\n",t2-t1);
gpublasHandle_t hb;
gpublasCreate(&hb);
if(!myrank)printf("first blas create time: %7.4f\n",SuperLU_timer_()-t2);
gpublasDestroy(hb);
int superlu_acc_offload = get_acc_offload();
if (superlu_acc_offload) {
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
double t1 = SuperLU_timer_();
gpuFree(0);
double t2 = SuperLU_timer_();
if(!myrank)printf("first gpufree time: %7.4f\n",t2-t1);
gpublasHandle_t hb;
gpublasCreate(&hb);
if(!myrank)printf("first blas create time: %7.4f\n",SuperLU_timer_()-t2);
gpublasDestroy(hb);
}
#endif
}

Expand Down
62 changes: 34 additions & 28 deletions EXAMPLE/pzdrive.c
Original file line number Diff line number Diff line change
Expand Up @@ -154,26 +154,29 @@ int main(int argc, char *argv[])
SUPERLU_FREE(usermap);

#ifdef GPU_ACC
/* Binding each MPI to a GPU device */
char *ttemp;
ttemp = getenv ("SUPERLU_BIND_MPI_GPU");

if (ttemp) {
int devs, rank;
MPI_Comm_rank(MPI_COMM_WORLD, &rank); // MPI_COMM_WORLD needs to be used here instead of SubComm
gpuGetDeviceCount(&devs); // Returns the number of compute-capable devices
gpuSetDevice(rank % devs); // Set device to be used for GPU executions
}
int superlu_acc_offload = get_acc_offload();
if (superlu_acc_offload) {
/* Binding each MPI to a GPU device */
char *ttemp;
ttemp = getenv ("SUPERLU_BIND_MPI_GPU");

if (ttemp) {
int devs, rank;
MPI_Comm_rank(MPI_COMM_WORLD, &rank); // MPI_COMM_WORLD needs to be used here instead of SubComm
gpuGetDeviceCount(&devs); // Returns the number of compute-capable devices
gpuSetDevice(rank % devs); // Set device to be used for GPU executions
}

// This is to initialize GPU, which can be costly.
double t1 = SuperLU_timer_();
gpuFree(0);
double t2 = SuperLU_timer_();
if(!myrank)printf("first gpufree time: %7.4f\n",t2-t1);
gpublasHandle_t hb;
gpublasCreate(&hb);
if(!myrank)printf("first blas create time: %7.4f\n",SuperLU_timer_()-t2);
gpublasDestroy(hb);
// This is to initialize GPU, which can be costly.
double t1 = SuperLU_timer_();
gpuFree(0);
double t2 = SuperLU_timer_();
if(!myrank)printf("first gpufree time: %7.4f\n",t2-t1);
gpublasHandle_t hb;
gpublasCreate(&hb);
if(!myrank)printf("first blas create time: %7.4f\n",SuperLU_timer_()-t2);
gpublasDestroy(hb);
}
#endif
// printf("grid.iam %5d, myrank %5d\n",grid.iam,myrank);
// fflush(stdout);
Expand All @@ -185,15 +188,18 @@ int main(int argc, char *argv[])
superlu_gridinit(MPI_COMM_WORLD, nprow, npcol, &grid);

#ifdef GPU_ACC
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
double t1 = SuperLU_timer_();
gpuFree(0);
double t2 = SuperLU_timer_();
if(!myrank)printf("first gpufree time: %7.4f\n",t2-t1);
gpublasHandle_t hb;
gpublasCreate(&hb);
if(!myrank)printf("first blas create time: %7.4f\n",SuperLU_timer_()-t2);
gpublasDestroy(hb);
int superlu_acc_offload = get_acc_offload();
if (superlu_acc_offload) {
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
double t1 = SuperLU_timer_();
gpuFree(0);
double t2 = SuperLU_timer_();
if(!myrank)printf("first gpufree time: %7.4f\n",t2-t1);
gpublasHandle_t hb;
gpublasCreate(&hb);
if(!myrank)printf("first blas create time: %7.4f\n",SuperLU_timer_()-t2);
gpublasDestroy(hb);
}
#endif
}

Expand Down
Loading

0 comments on commit 324d65f

Please sign in to comment.