-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsimpleMPI.cpp
122 lines (97 loc) · 3.22 KB
/
simpleMPI.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
//
// Created by kwoodle on 5/9/20.
//
/*
* Copyright 1993-2015 NVIDIA Corporation. All rights reserved.
*
* Please refer to the NVIDIA end user license agreement (EULA) associated
* with this source code for terms and conditions that govern your use of
* this software. Any use, reproduction, disclosure, or distribution of
* this software and related documentation outside the terms of the EULA
* is strictly prohibited.
*
*/
/* Simple example demonstrating how to use MPI with CUDA
*
* Generate some random numbers on one node.
* Dispatch them to all nodes.
* Compute their square root on each node's GPU.
* Compute the average of the results using MPI.
*
* simpleMPI.cpp: main program, compiled with mpicxx on linux/Mac platforms
* on Windows, please download the Microsoft HPC Pack SDK 2008
*/
// MPI include
#include <mpi.h>
// System includes
#include <iostream>
using std::cout;
using std::cerr;
using std::endl;
// User include
#include "simpleMPI.h"
// Error handling macros
#define MPI_CHECK(call) \
if((call) != MPI_SUCCESS) { \
cerr << "MPI error calling \""#call"\"\n"; \
my_abort(-1); }
// Host code
// No CUDA here, only MPI
int main(int argc, char *argv[]) {
// Dimensions of the dataset
int blockSize = 256;
int gridSize = 10000;
int dataSizePerNode = gridSize * blockSize;
// Initialize MPI state
MPI_CHECK(MPI_Init(&argc, &argv));
// Get our MPI node number and node count
int commSize, commRank;
MPI_CHECK(MPI_Comm_size(MPI_COMM_WORLD, &commSize));
MPI_CHECK(MPI_Comm_rank(MPI_COMM_WORLD, &commRank));
// Generate some random numbers on the root node (node 0)
int dataSizeTotal = dataSizePerNode * commSize;
float *dataRoot = NULL;
if (commRank == 0) // Are we the root node?
{
cout << "Running on " << commSize << " nodes" << endl;
dataRoot = new float[dataSizeTotal];
initData(dataRoot, dataSizeTotal);
}
// Allocate a buffer on each node
float *dataNode = new float[dataSizePerNode];
// Dispatch a portion of the input data to each node
MPI_CHECK(MPI_Scatter(dataRoot,
dataSizePerNode,
MPI_FLOAT,
dataNode,
dataSizePerNode,
MPI_FLOAT,
0,
MPI_COMM_WORLD));
if (commRank == 0) {
// No need for root data any more
delete[] dataRoot;
}
// On each node, run computation on GPU
computeGPU(dataNode, blockSize, gridSize);
// Reduction to the root node, computing the sum of output elements
float sumNode = sum(dataNode, dataSizePerNode);
float sumRoot;
MPI_CHECK(MPI_Reduce(&sumNode, &sumRoot, 1, MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD));
if (commRank == 0) {
float average = sumRoot / dataSizeTotal;
cout << "Average of square roots is: " << average << endl;
}
// Cleanup
delete[] dataNode;
MPI_CHECK(MPI_Finalize());
if (commRank == 0) {
cout << "PASSED\n";
}
return 0;
}
// Shut down MPI cleanly if something goes wrong
void my_abort(int err) {
cout << "Test FAILED\n";
MPI_Abort(MPI_COMM_WORLD, err);
}