-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcuda_ring.c
114 lines (96 loc) · 3.06 KB
/
cuda_ring.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
#include <cuda.h>
#include <cuda_runtime.h>
#include <mpi.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#define CUDACHECK(CALL) \
do { \
cudaError_t e = CALL; \
if (e != cudaSuccess) { \
printf("Cuda failure %s:%d: '%s'\n", __FILE__, __LINE__, cudaGetErrorString(e)); \
exit(0); \
} \
} while(0)
#if 0
static double dclock(void)
{
struct timespec tp;
clock_gettime(CLOCK_REALTIME, &tp);
return (double)(tp.tv_sec + 1.0e-9*tp.tv_nsec);
}
#endif
#ifdef _OMPI_CUDA_OPA
int MPI_Init(int *argc, char ***argv)
{
int ndevs;
CUDACHECK(cudaGetDeviceCount(&ndevs));
int iam = atoi(getenv("OMPI_COMM_WORLD_RANK"));
int mydev = iam%ndevs;
CUDACHECK(cudaSetDevice(mydev));
return PMPI_Init(argc, argv);
}
#endif
int main(int argc, char **argv)
{
int iam, np;
long i;
long *hs, *hr;
long *ds, *dr;
MPI_Status stat;
int sendto, recvfrom;
int ndevs, mydev;
int size = argv[1] ? atol(argv[1]) : 4;
double time, t0;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &iam);
MPI_Comm_size(MPI_COMM_WORLD, &np);
#ifndef _OMPI_CUDA_OPA
CUDACHECK(cudaGetDeviceCount(&ndevs));
mydev = iam % ndevs;
CUDACHECK(cudaSetDevice(mydev));
#endif
#ifdef _DEBUG
printf("iam: %d mydev: %d\n", iam, mydev);
#endif
if (!iam) printf("size: %d\n", size);
sendto = iam + 1;
if (iam == np-1) sendto = 0;
recvfrom = iam - 1;
if (iam == 0) recvfrom = np - 1;
hs = (long*)malloc(sizeof(*hs)*size);
hr = (long*)malloc(sizeof(*hr)*size);
for (i=0; i<size; i++) {
hs[i] = 10*iam + i;
hr[i] = -1;
}
CUDACHECK(cudaMalloc((void**)&ds, sizeof(*ds)*size));
CUDACHECK(cudaMalloc((void**)&dr, sizeof(*dr)*size));
CUDACHECK(cudaMemcpy(ds, hs, sizeof(*ds)*size, cudaMemcpyDefault));
CUDACHECK(cudaMemcpy(dr, hr, sizeof(*dr)*size, cudaMemcpyDefault));
MPI_Barrier(MPI_COMM_WORLD);
t0 = MPI_Wtime();
if (!iam) {
MPI_Send(ds, size, MPI_LONG, sendto, 0, MPI_COMM_WORLD);
MPI_Recv(dr, size, MPI_LONG, recvfrom, 0, MPI_COMM_WORLD, &stat);
} else {
MPI_Recv(dr, size, MPI_LONG, recvfrom, 0, MPI_COMM_WORLD, &stat);
MPI_Send(ds, size, MPI_LONG, sendto, 0, MPI_COMM_WORLD);
}
MPI_Barrier(MPI_COMM_WORLD);
time = MPI_Wtime() - t0;
CUDACHECK(cudaMemcpy(hr, dr, sizeof(*dr)*size, cudaMemcpyDefault));
if (!iam) printf("time[s]: %lf\n", time);
#ifdef _DEBUG
printf("iam: %d: ", iam);
for (i=0; i<size; i++)
printf("%ld ", hr[i]);
printf("\n");
#endif
CUDACHECK(cudaFree(ds));
CUDACHECK(cudaFree(dr));
free(hs);
free(hr);
MPI_Finalize();
return 0;
}