-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmmult_mpi_omp.c
157 lines (114 loc) · 3.78 KB
/
mmult_mpi_omp.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
/**
* Incomplete program to multiply a matrix times a matrix using both
* MPI to distribute the computation among nodes and OMP
* to distribute the computation among threads.
*/
#include "mpi.h"
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <sys/times.h>
#define min(x, y) ((x)<(y)?(x):(y))
#include "mat.h"
int mmult_omp(double *c,
double *a, int aRows, int aCols,
double *b, int bRows, int bCols){
int i, j, k;
#pragma omp parallel default(none) \
shared(a, b, c, aRows, aCols, bRows, bCols) private(i, k, j)
#pragma omp for
for (i =0; i < aRows; i++){
for (j=0; j < bCols; j++){
c[i*bCols + j] = 0;
}
for (k = 0; k < aCols; k++){
for (j=0; j< bCols; j++){
c[i*bCols + j] += a[i*aCols + k] * b[k*bCols + j];
}
}
}
return 0;
}
int main(int argc, char* argv[])
{
int nrows, ncols;
double *aa; /* the A matrix */
double *bb; /* the B matrix */
double *cc1; /* A x B computed using the omp-mpi code you write */
double *cc2; /* A x B computed using the conventional algorithm */
int myid, numprocs;
double starttime, endtime;
MPI_Status status;
int numberSent = 0;
int main = 0;
int sent = 0;
int row = 0;
int setNumber = 0;
double *buff;
double *res;
/* insert other global variables here */
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
MPI_Comm_rank(MPI_COMM_WORLD, &myid);
if (argc > 1) {
nrows = atoi(argv[1]);
ncols = nrows;
if (myid == 0) {
// Master Code goes here
aa = gen_matrix(nrows, ncols);
bb = gen_matrix(ncols, nrows);
cc1 = malloc(sizeof(double) * nrows * nrows);
buff = (double*)malloc(sizeof(double) * ncols);
res = (double*)malloc(sizeof(double) *nrows);
starttime = MPI_Wtime();
MPI_Bcast(bb, nrows * ncols, MPI_DOUBLE, main, MPI_COMM_WORLD);
/* Insert your master code here to store the product into cc1 */
for(int i = 0; i < min(numprocs-1, nrows); i++){
for(int j =0; j <ncols; j++){
buff[j] = aa[i * ncols + j];
}
MPI_Send(buff, ncols, MPI_DOUBLE, i+1, ++numberSent, MPI_COMM_WORLD);
}
for( int i = 0; i<nrows; i++){
MPI_Recv(res, nrows, MPI_DOUBLE, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status);
sent = status.MPI_SOURCE;
row = status.MPI_TAG;
for(int l= 0 ; l<ncols; l++){
cc1[row*nrows + 1] = res[l];
}
if(numberSent < nrows){
for(int l =0; l < nrows; l++){
buff[l] = aa[setNumber*nrows +1];
}
MPI_Send(buff, ncols, MPI_DOUBLE, sent, ++setNumber, MPI_COMM_WORLD);
}
else{
MPI_Send(MPI_BOTTOM, 0, MPI_DOUBLE, sent, ++setNumber, MPI_COMM_WORLD);
}
}
endtime = MPI_Wtime();
printf("%f\n",(endtime - starttime));
cc2 = malloc(sizeof(double) * nrows * nrows);
mmult(cc2, aa, nrows, ncols, bb, ncols, nrows);
compare_matrices(cc2, cc1, nrows, nrows);
} else {
// Slave Code goes here
MPI_Bcast(bb, nrows*ncols, MPI_DOUBLE, main, MPI_COMM_WORLD);
if(myid <- nrows){
while(1){
MPI_Recv(buff, ncols, MPI_DOUBLE, main, MPI_ANY_TAG, MPI_COMM_WORLD, &status);
if(status.MPI_TAG == 0){
break;
}
row = status.MPI_TAG - 1;
mmult_omp(res, buff, 1, ncols, bb, nrows, ncols);
MPI_Send(res, nrows, MPI_DOUBLE, main, row, MPI_COMM_WORLD);
}
}
}
} else {
fprintf(stderr, "Usage matrix_times_vector <size>\n");
}
MPI_Finalize();
return 0;
}