diff --git a/EXAMPLE/pddrive.c b/EXAMPLE/pddrive.c index 2b4b4381..1fc7dcce 100755 --- a/EXAMPLE/pddrive.c +++ b/EXAMPLE/pddrive.c @@ -155,26 +155,29 @@ int main(int argc, char *argv[]) SUPERLU_FREE(usermap); #ifdef GPU_ACC - /* Binding each MPI to a GPU device */ - char *ttemp; - ttemp = getenv ("SUPERLU_BIND_MPI_GPU"); - - if (ttemp) { - int devs, rank; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); // MPI_COMM_WORLD needs to be used here instead of SubComm - gpuGetDeviceCount(&devs); // Returns the number of compute-capable devices - gpuSetDevice(rank % devs); // Set device to be used for GPU executions - } + int superlu_acc_offload = get_acc_offload(); + if (superlu_acc_offload) { + /* Binding each MPI to a GPU device */ + char *ttemp; + ttemp = getenv ("SUPERLU_BIND_MPI_GPU"); + + if (ttemp) { + int devs, rank; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); // MPI_COMM_WORLD needs to be used here instead of SubComm + gpuGetDeviceCount(&devs); // Returns the number of compute-capable devices + gpuSetDevice(rank % devs); // Set device to be used for GPU executions + } - // This is to initialize GPU, which can be costly. - double t1 = SuperLU_timer_(); - gpuFree(0); - double t2 = SuperLU_timer_(); - if(!myrank)printf("first gpufree time: %7.4f\n",t2-t1); - gpublasHandle_t hb; - gpublasCreate(&hb); - if(!myrank)printf("first blas create time: %7.4f\n",SuperLU_timer_()-t2); - gpublasDestroy(hb); + // This is to initialize GPU, which can be costly. + double t1 = SuperLU_timer_(); + gpuFree(0); + double t2 = SuperLU_timer_(); + if(!myrank)printf("first gpufree time: %7.4f\n",t2-t1); + gpublasHandle_t hb; + gpublasCreate(&hb); + if(!myrank)printf("first blas create time: %7.4f\n",SuperLU_timer_()-t2); + gpublasDestroy(hb); + } #endif // printf("grid.iam %5d, myrank %5d\n",grid.iam,myrank); // fflush(stdout); @@ -186,15 +189,18 @@ int main(int argc, char *argv[]) superlu_gridinit(MPI_COMM_WORLD, nprow, npcol, &grid); #ifdef GPU_ACC - MPI_Comm_rank(MPI_COMM_WORLD, &myrank); - double t1 = SuperLU_timer_(); - gpuFree(0); - double t2 = SuperLU_timer_(); - if(!myrank)printf("first gpufree time: %7.4f\n",t2-t1); - gpublasHandle_t hb; - gpublasCreate(&hb); - if(!myrank)printf("first blas create time: %7.4f\n",SuperLU_timer_()-t2); - gpublasDestroy(hb); + int superlu_acc_offload = get_acc_offload(); + if (superlu_acc_offload) { + MPI_Comm_rank(MPI_COMM_WORLD, &myrank); + double t1 = SuperLU_timer_(); + gpuFree(0); + double t2 = SuperLU_timer_(); + if(!myrank)printf("first gpufree time: %7.4f\n",t2-t1); + gpublasHandle_t hb; + gpublasCreate(&hb); + if(!myrank)printf("first blas create time: %7.4f\n",SuperLU_timer_()-t2); + gpublasDestroy(hb); + } #endif } diff --git a/EXAMPLE/pddrive3d.c b/EXAMPLE/pddrive3d.c index fa7fb1af..ec444b48 100755 --- a/EXAMPLE/pddrive3d.c +++ b/EXAMPLE/pddrive3d.c @@ -222,26 +222,28 @@ main (int argc, char *argv[]) SUPERLU_FREE(usermap); #ifdef GPU_ACC - /* Binding each MPI to a GPU device */ - char *ttemp; - ttemp = getenv ("SUPERLU_BIND_MPI_GPU"); - - if (ttemp) { - int devs, rank; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); // MPI_COMM_WORLD needs to be used here instead of SubComm - gpuGetDeviceCount(&devs); // Returns the number of compute-capable devices - gpuSetDevice(rank % devs); // Set device to be used for GPU executions + int superlu_acc_offload = get_acc_offload(); + if (superlu_acc_offload) { + /* Binding each MPI to a GPU device */ + char *ttemp; + ttemp = getenv ("SUPERLU_BIND_MPI_GPU"); + + if (ttemp) { + int devs, rank; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); // MPI_COMM_WORLD needs to be used here instead of SubComm + gpuGetDeviceCount(&devs); // Returns the number of compute-capable devices + gpuSetDevice(rank % devs); // Set device to be used for GPU executions + } + // This is to initialize GPU, which can be costly. + double t1 = SuperLU_timer_(); + gpuFree(0); + double t2 = SuperLU_timer_(); + if(!myrank)printf("first gpufree time: %7.4f\n",t2-t1); + gpublasHandle_t hb; + gpublasCreate(&hb); + if(!myrank)printf("first blas create time: %7.4f\n",SuperLU_timer_()-t2); + gpublasDestroy(hb); } - // This is to initialize GPU, which can be costly. - double t1 = SuperLU_timer_(); - gpuFree(0); - double t2 = SuperLU_timer_(); - if(!myrank)printf("first gpufree time: %7.4f\n",t2-t1); - gpublasHandle_t hb; - gpublasCreate(&hb); - if(!myrank)printf("first blas create time: %7.4f\n",SuperLU_timer_()-t2); - gpublasDestroy(hb); - #endif // printf("grid.iam %5d, myrank %5d\n",grid.iam,myrank); @@ -253,15 +255,18 @@ main (int argc, char *argv[]) ------------------------------------------------------------ */ superlu_gridinit3d (MPI_COMM_WORLD, nprow, npcol, npdep, &grid); #ifdef GPU_ACC - MPI_Comm_rank(MPI_COMM_WORLD, &myrank); - double t1 = SuperLU_timer_(); - gpuFree(0); - double t2 = SuperLU_timer_(); - if(!myrank)printf("first gpufree time: %7.4f\n",t2-t1); - gpublasHandle_t hb; - gpublasCreate(&hb); - if(!myrank)printf("first blas create time: %7.4f\n",SuperLU_timer_()-t2); - gpublasDestroy(hb); + int superlu_acc_offload = get_acc_offload(); + if (superlu_acc_offload) { + MPI_Comm_rank(MPI_COMM_WORLD, &myrank); + double t1 = SuperLU_timer_(); + gpuFree(0); + double t2 = SuperLU_timer_(); + if(!myrank)printf("first gpufree time: %7.4f\n",t2-t1); + gpublasHandle_t hb; + gpublasCreate(&hb); + if(!myrank)printf("first blas create time: %7.4f\n",SuperLU_timer_()-t2); + gpublasDestroy(hb); + } #endif } diff --git a/EXAMPLE/psdrive.c b/EXAMPLE/psdrive.c index 47be729f..175f0deb 100644 --- a/EXAMPLE/psdrive.c +++ b/EXAMPLE/psdrive.c @@ -155,26 +155,29 @@ int main(int argc, char *argv[]) SUPERLU_FREE(usermap); #ifdef GPU_ACC - /* Binding each MPI to a GPU device */ - char *ttemp; - ttemp = getenv ("SUPERLU_BIND_MPI_GPU"); - - if (ttemp) { - int devs, rank; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); // MPI_COMM_WORLD needs to be used here instead of SubComm - gpuGetDeviceCount(&devs); // Returns the number of compute-capable devices - gpuSetDevice(rank % devs); // Set device to be used for GPU executions - } + int superlu_acc_offload = get_acc_offload(); + if (superlu_acc_offload) { + /* Binding each MPI to a GPU device */ + char *ttemp; + ttemp = getenv ("SUPERLU_BIND_MPI_GPU"); + + if (ttemp) { + int devs, rank; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); // MPI_COMM_WORLD needs to be used here instead of SubComm + gpuGetDeviceCount(&devs); // Returns the number of compute-capable devices + gpuSetDevice(rank % devs); // Set device to be used for GPU executions + } - // This is to initialize GPU, which can be costly. - double t1 = SuperLU_timer_(); - gpuFree(0); - double t2 = SuperLU_timer_(); - if(!myrank)printf("first gpufree time: %7.4f\n",t2-t1); - gpublasHandle_t hb; - gpublasCreate(&hb); - if(!myrank)printf("first blas create time: %7.4f\n",SuperLU_timer_()-t2); - gpublasDestroy(hb); + // This is to initialize GPU, which can be costly. + double t1 = SuperLU_timer_(); + gpuFree(0); + double t2 = SuperLU_timer_(); + if(!myrank)printf("first gpufree time: %7.4f\n",t2-t1); + gpublasHandle_t hb; + gpublasCreate(&hb); + if(!myrank)printf("first blas create time: %7.4f\n",SuperLU_timer_()-t2); + gpublasDestroy(hb); + } #endif // printf("grid.iam %5d, myrank %5d\n",grid.iam,myrank); // fflush(stdout); @@ -186,15 +189,18 @@ int main(int argc, char *argv[]) superlu_gridinit(MPI_COMM_WORLD, nprow, npcol, &grid); #ifdef GPU_ACC - MPI_Comm_rank(MPI_COMM_WORLD, &myrank); - double t1 = SuperLU_timer_(); - gpuFree(0); - double t2 = SuperLU_timer_(); - if(!myrank)printf("first gpufree time: %7.4f\n",t2-t1); - gpublasHandle_t hb; - gpublasCreate(&hb); - if(!myrank)printf("first blas create time: %7.4f\n",SuperLU_timer_()-t2); - gpublasDestroy(hb); + int superlu_acc_offload = get_acc_offload(); + if (superlu_acc_offload) { + MPI_Comm_rank(MPI_COMM_WORLD, &myrank); + double t1 = SuperLU_timer_(); + gpuFree(0); + double t2 = SuperLU_timer_(); + if(!myrank)printf("first gpufree time: %7.4f\n",t2-t1); + gpublasHandle_t hb; + gpublasCreate(&hb); + if(!myrank)printf("first blas create time: %7.4f\n",SuperLU_timer_()-t2); + gpublasDestroy(hb); + } #endif } diff --git a/EXAMPLE/psdrive3d.c b/EXAMPLE/psdrive3d.c index 91997525..e90e4162 100644 --- a/EXAMPLE/psdrive3d.c +++ b/EXAMPLE/psdrive3d.c @@ -222,26 +222,28 @@ main (int argc, char *argv[]) SUPERLU_FREE(usermap); #ifdef GPU_ACC - /* Binding each MPI to a GPU device */ - char *ttemp; - ttemp = getenv ("SUPERLU_BIND_MPI_GPU"); - - if (ttemp) { - int devs, rank; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); // MPI_COMM_WORLD needs to be used here instead of SubComm - gpuGetDeviceCount(&devs); // Returns the number of compute-capable devices - gpuSetDevice(rank % devs); // Set device to be used for GPU executions + int superlu_acc_offload = get_acc_offload(); + if (superlu_acc_offload) { + /* Binding each MPI to a GPU device */ + char *ttemp; + ttemp = getenv ("SUPERLU_BIND_MPI_GPU"); + + if (ttemp) { + int devs, rank; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); // MPI_COMM_WORLD needs to be used here instead of SubComm + gpuGetDeviceCount(&devs); // Returns the number of compute-capable devices + gpuSetDevice(rank % devs); // Set device to be used for GPU executions + } + // This is to initialize GPU, which can be costly. + double t1 = SuperLU_timer_(); + gpuFree(0); + double t2 = SuperLU_timer_(); + if(!myrank)printf("first gpufree time: %7.4f\n",t2-t1); + gpublasHandle_t hb; + gpublasCreate(&hb); + if(!myrank)printf("first blas create time: %7.4f\n",SuperLU_timer_()-t2); + gpublasDestroy(hb); } - // This is to initialize GPU, which can be costly. - double t1 = SuperLU_timer_(); - gpuFree(0); - double t2 = SuperLU_timer_(); - if(!myrank)printf("first gpufree time: %7.4f\n",t2-t1); - gpublasHandle_t hb; - gpublasCreate(&hb); - if(!myrank)printf("first blas create time: %7.4f\n",SuperLU_timer_()-t2); - gpublasDestroy(hb); - #endif // printf("grid.iam %5d, myrank %5d\n",grid.iam,myrank); @@ -253,15 +255,18 @@ main (int argc, char *argv[]) ------------------------------------------------------------ */ superlu_gridinit3d (MPI_COMM_WORLD, nprow, npcol, npdep, &grid); #ifdef GPU_ACC - MPI_Comm_rank(MPI_COMM_WORLD, &myrank); - double t1 = SuperLU_timer_(); - gpuFree(0); - double t2 = SuperLU_timer_(); - if(!myrank)printf("first gpufree time: %7.4f\n",t2-t1); - gpublasHandle_t hb; - gpublasCreate(&hb); - if(!myrank)printf("first blas create time: %7.4f\n",SuperLU_timer_()-t2); - gpublasDestroy(hb); + int superlu_acc_offload = get_acc_offload(); + if (superlu_acc_offload) { + MPI_Comm_rank(MPI_COMM_WORLD, &myrank); + double t1 = SuperLU_timer_(); + gpuFree(0); + double t2 = SuperLU_timer_(); + if(!myrank)printf("first gpufree time: %7.4f\n",t2-t1); + gpublasHandle_t hb; + gpublasCreate(&hb); + if(!myrank)printf("first blas create time: %7.4f\n",SuperLU_timer_()-t2); + gpublasDestroy(hb); + } #endif } diff --git a/EXAMPLE/pzdrive.c b/EXAMPLE/pzdrive.c index 789472a4..ad445725 100755 --- a/EXAMPLE/pzdrive.c +++ b/EXAMPLE/pzdrive.c @@ -154,26 +154,29 @@ int main(int argc, char *argv[]) SUPERLU_FREE(usermap); #ifdef GPU_ACC - /* Binding each MPI to a GPU device */ - char *ttemp; - ttemp = getenv ("SUPERLU_BIND_MPI_GPU"); - - if (ttemp) { - int devs, rank; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); // MPI_COMM_WORLD needs to be used here instead of SubComm - gpuGetDeviceCount(&devs); // Returns the number of compute-capable devices - gpuSetDevice(rank % devs); // Set device to be used for GPU executions - } + int superlu_acc_offload = get_acc_offload(); + if (superlu_acc_offload) { + /* Binding each MPI to a GPU device */ + char *ttemp; + ttemp = getenv ("SUPERLU_BIND_MPI_GPU"); + + if (ttemp) { + int devs, rank; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); // MPI_COMM_WORLD needs to be used here instead of SubComm + gpuGetDeviceCount(&devs); // Returns the number of compute-capable devices + gpuSetDevice(rank % devs); // Set device to be used for GPU executions + } - // This is to initialize GPU, which can be costly. - double t1 = SuperLU_timer_(); - gpuFree(0); - double t2 = SuperLU_timer_(); - if(!myrank)printf("first gpufree time: %7.4f\n",t2-t1); - gpublasHandle_t hb; - gpublasCreate(&hb); - if(!myrank)printf("first blas create time: %7.4f\n",SuperLU_timer_()-t2); - gpublasDestroy(hb); + // This is to initialize GPU, which can be costly. + double t1 = SuperLU_timer_(); + gpuFree(0); + double t2 = SuperLU_timer_(); + if(!myrank)printf("first gpufree time: %7.4f\n",t2-t1); + gpublasHandle_t hb; + gpublasCreate(&hb); + if(!myrank)printf("first blas create time: %7.4f\n",SuperLU_timer_()-t2); + gpublasDestroy(hb); + } #endif // printf("grid.iam %5d, myrank %5d\n",grid.iam,myrank); // fflush(stdout); @@ -185,15 +188,18 @@ int main(int argc, char *argv[]) superlu_gridinit(MPI_COMM_WORLD, nprow, npcol, &grid); #ifdef GPU_ACC - MPI_Comm_rank(MPI_COMM_WORLD, &myrank); - double t1 = SuperLU_timer_(); - gpuFree(0); - double t2 = SuperLU_timer_(); - if(!myrank)printf("first gpufree time: %7.4f\n",t2-t1); - gpublasHandle_t hb; - gpublasCreate(&hb); - if(!myrank)printf("first blas create time: %7.4f\n",SuperLU_timer_()-t2); - gpublasDestroy(hb); + int superlu_acc_offload = get_acc_offload(); + if (superlu_acc_offload) { + MPI_Comm_rank(MPI_COMM_WORLD, &myrank); + double t1 = SuperLU_timer_(); + gpuFree(0); + double t2 = SuperLU_timer_(); + if(!myrank)printf("first gpufree time: %7.4f\n",t2-t1); + gpublasHandle_t hb; + gpublasCreate(&hb); + if(!myrank)printf("first blas create time: %7.4f\n",SuperLU_timer_()-t2); + gpublasDestroy(hb); + } #endif } diff --git a/EXAMPLE/pzdrive3d.c b/EXAMPLE/pzdrive3d.c index 9d20fc0f..b78480c3 100755 --- a/EXAMPLE/pzdrive3d.c +++ b/EXAMPLE/pzdrive3d.c @@ -222,26 +222,28 @@ main (int argc, char *argv[]) SUPERLU_FREE(usermap); #ifdef GPU_ACC - /* Binding each MPI to a GPU device */ - char *ttemp; - ttemp = getenv ("SUPERLU_BIND_MPI_GPU"); - - if (ttemp) { - int devs, rank; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); // MPI_COMM_WORLD needs to be used here instead of SubComm - gpuGetDeviceCount(&devs); // Returns the number of compute-capable devices - gpuSetDevice(rank % devs); // Set device to be used for GPU executions + int superlu_acc_offload = get_acc_offload(); + if (superlu_acc_offload) { + /* Binding each MPI to a GPU device */ + char *ttemp; + ttemp = getenv ("SUPERLU_BIND_MPI_GPU"); + + if (ttemp) { + int devs, rank; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); // MPI_COMM_WORLD needs to be used here instead of SubComm + gpuGetDeviceCount(&devs); // Returns the number of compute-capable devices + gpuSetDevice(rank % devs); // Set device to be used for GPU executions + } + // This is to initialize GPU, which can be costly. + double t1 = SuperLU_timer_(); + gpuFree(0); + double t2 = SuperLU_timer_(); + if(!myrank)printf("first gpufree time: %7.4f\n",t2-t1); + gpublasHandle_t hb; + gpublasCreate(&hb); + if(!myrank)printf("first blas create time: %7.4f\n",SuperLU_timer_()-t2); + gpublasDestroy(hb); } - // This is to initialize GPU, which can be costly. - double t1 = SuperLU_timer_(); - gpuFree(0); - double t2 = SuperLU_timer_(); - if(!myrank)printf("first gpufree time: %7.4f\n",t2-t1); - gpublasHandle_t hb; - gpublasCreate(&hb); - if(!myrank)printf("first blas create time: %7.4f\n",SuperLU_timer_()-t2); - gpublasDestroy(hb); - #endif // printf("grid.iam %5d, myrank %5d\n",grid.iam,myrank); @@ -253,15 +255,18 @@ main (int argc, char *argv[]) ------------------------------------------------------------ */ superlu_gridinit3d (MPI_COMM_WORLD, nprow, npcol, npdep, &grid); #ifdef GPU_ACC - MPI_Comm_rank(MPI_COMM_WORLD, &myrank); - double t1 = SuperLU_timer_(); - gpuFree(0); - double t2 = SuperLU_timer_(); - if(!myrank)printf("first gpufree time: %7.4f\n",t2-t1); - gpublasHandle_t hb; - gpublasCreate(&hb); - if(!myrank)printf("first blas create time: %7.4f\n",SuperLU_timer_()-t2); - gpublasDestroy(hb); + int superlu_acc_offload = get_acc_offload(); + if (superlu_acc_offload) { + MPI_Comm_rank(MPI_COMM_WORLD, &myrank); + double t1 = SuperLU_timer_(); + gpuFree(0); + double t2 = SuperLU_timer_(); + if(!myrank)printf("first gpufree time: %7.4f\n",t2-t1); + gpublasHandle_t hb; + gpublasCreate(&hb); + if(!myrank)printf("first blas create time: %7.4f\n",SuperLU_timer_()-t2); + gpublasDestroy(hb); + } #endif }