-
Notifications
You must be signed in to change notification settings - Fork 46
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
10 changed files
with
541 additions
and
32 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
CXX ?= g++ | ||
all: tut5 tut5_ipu | ||
|
||
tut5: tut5_complete.cpp | ||
$(CXX) -std=c++11 tut5_complete.cpp -lpoplar -lpoputil -o tut5 | ||
|
||
tut5_ipu: tut5_ipu_hardware_complete.cpp | ||
$(CXX) -std=c++11 tut5_ipu_hardware_complete.cpp -lpoplar -lpoputil -o tut5_ipu | ||
|
||
clean: | ||
rm -f tut5 | ||
rm -f tut5_ipu |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
# Getting Started with Poplar | ||
|
||
## SDK Overview | ||
|
||
|
||
|
||
There are two ways to run examples. | ||
1. IPU model (Simulator) | ||
2. On IPU Hardware | ||
|
||
## Setup Poplar SDK | ||
|
||
* Poplar SDK should be enabled by default, if its not, enable it. | ||
```bash | ||
> source /software/graphcore/poplar_sdk/3.3.0/enable | ||
> popc --version | ||
POPLAR version 3.3.0 (de1f8de2a7) | ||
clang version 16.0.0 (2fce0648f3c328b23a6cbc664fc0dd0630122212) | ||
``` | ||
|
||
* Go to directory with GEMV code. | ||
```bash | ||
cd examples/tutorials/tutorials/poplar/tut5_matrix_vector/complete | ||
``` | ||
## Run with IPU Model | ||
|
||
* Compile `tut5_complete.cpp` with the provided `Makefile` | ||
```bash | ||
make tut5 | ||
``` | ||
* Run executable on the CPU. | ||
```bash | ||
./tut5 1000 100 | ||
``` | ||
|
||
<details> | ||
<summary>Sample Output</summary> | ||
|
||
```bash | ||
./tut5 1000 100 | ||
Multiplying matrix of size 1000x100 by vector of size 100 | ||
Creating new graph object and compiling vertex program additions | ||
Constructing full compute graph and control program | ||
Running graph program to multiply matrix by vector | ||
Multiplication result OK | ||
``` | ||
</details> | ||
|
||
## Run on IPU | ||
|
||
* Compile `tut5_ipu_hardware_complete.cpp` with the provided `Makefile` | ||
```bash | ||
make tut5_ipu | ||
``` | ||
|
||
* Run executable on IPU using scheduler. | ||
```bash | ||
srun --ipus=1 ./tut5_ipu 10000 1000 --device ipu | ||
``` | ||
<details> | ||
<summary>Sample Output</summary> | ||
|
||
```bash | ||
srun --ipus=1 ./tut5_ipu 1000 100 | ||
srun: job 26636 queued and waiting for resources | ||
srun: job 26636 has been allocated resources | ||
Multiplying matrix of size 1000x100 by vector of size 100 | ||
Trying to attach to IPU | ||
Attached to IPU 0 | ||
Creating environment (compiling vertex programs) | ||
Constructing compute graph and control program | ||
Running graph program to multiply matrix by vector | ||
Multiplication result OK | ||
``` | ||
</details> | ||
|
||
|
||
|
||
|
||
## Next Steps | ||
|
||
Follow [Poplar Tutorial](https://github.com/graphcore/examples/tree/master/tutorials/tutorials/poplar) for optimized implementation of GEMV. | ||
|
||
## Useful Resources | ||
|
||
* [Poplar and PopLibs User Guide](https://docs.graphcore.ai/projects/poplar-user-guide/en/latest/) | ||
* [Poplar and PopLibs API Reference](https://docs.graphcore.ai/projects/poplar-api/en/latest/) | ||
* [Request Cerebras SDK](https://cerebras.ai/homepage-landing/developers/sdk-request/) | ||
|
||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
// Copyright (c) 2018 Graphcore Ltd. All rights reserved. | ||
|
||
#include <poplar/Vertex.hpp> | ||
|
||
using namespace poplar; | ||
|
||
// This file contains the definitions of the vertex types used by the | ||
// matrix-vector multiplication example. The vertex type provides a description | ||
// of how individual tasks on the IPU will perform computation. | ||
|
||
// A vertex type to perform a dot product calculation. | ||
class DotProductVertex : public Vertex { | ||
public: | ||
// These two inputs read a vector of values (that is, an ordered, contiguous | ||
// set of values in memory) from the graph. | ||
Input<Vector<float>> a; | ||
Input<Vector<float>> b; | ||
|
||
// The output is to a single scalar value in the graph. | ||
Output<float> out; | ||
|
||
// The compute method performs the dot product between inputs 'a' and | ||
// 'b' and stores the result in 'out'. | ||
bool compute() { | ||
float sum = 0; | ||
for (unsigned i = 0; i < a.size(); ++i) | ||
sum += a[i] * b[i]; | ||
*out = sum; | ||
return true; | ||
} | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,140 @@ | ||
// Copyright (c) 2018 Graphcore Ltd. All rights reserved. | ||
|
||
#include <poplar/Engine.hpp> | ||
#include <poplar/IPUModel.hpp> | ||
#include <poputil/TileMapping.hpp> | ||
|
||
#include <iostream> | ||
#include <vector> | ||
|
||
using namespace poplar; | ||
using namespace poplar::program; | ||
|
||
// Function to check the result of multiplying the matrix by the vector. | ||
int checkResult(float *matrix, float *input, float *output, unsigned numRows, | ||
unsigned numCols) { | ||
for (unsigned row = 0; row < numRows; ++row) { | ||
float sum = 0; | ||
for (unsigned col = 0; col < numCols; ++col) { | ||
sum += matrix[row * numCols + col] * input[col]; | ||
} | ||
if (output[row] != sum) { | ||
std::cout << "ERROR: output " << row << ": expected=" << sum | ||
<< ", actual=" << output[row] << "\n"; | ||
return 1; | ||
} | ||
} | ||
std::cout << "Multiplication result OK\n"; | ||
return 0; | ||
} | ||
|
||
// This function returns a device side program that will multiply | ||
// the data in the 2-d tensor 'matrix' with the 1-d vector held | ||
// in the 'in' tensor. When the program executes | ||
// the result is placed in the 'out' 1-d tensor. | ||
Program buildMultiplyProgram(Graph &graph, Tensor matrix, Tensor in, | ||
Tensor out) { | ||
// Create a compute set to hold the vertices to perform the calculation | ||
ComputeSet mulCS = graph.addComputeSet("mulCS"); | ||
|
||
// The compute set holds a vertex for every output value. Each vertex | ||
// takes a row of the matrix as input and the whole input vector and | ||
// performs a dot-product placing the result in an element of the | ||
// output vector. | ||
auto numRows = matrix.dim(0); | ||
for (unsigned i = 0; i < numRows; ++i) { | ||
auto v = graph.addVertex(mulCS, // Put the vertex in the | ||
// 'mulCS' compute set. | ||
"DotProductVertex", // Create a vertex of this | ||
// type. | ||
{{"a", matrix[i]}, // Connect input 'a' of the | ||
// vertex to a row of the | ||
// matrix. | ||
{"b", in}, // Connect input 'b' of the | ||
// vertex to whole | ||
// input vector. | ||
{"out", out[i]}}); // Connect the output 'out' | ||
// of the vertex to a single | ||
// element of the output | ||
// vector. | ||
graph.setTileMapping(v, i); | ||
graph.setPerfEstimate(v, 20); | ||
} | ||
// The returned program just executes the 'mulCS' compute set that is, | ||
// executes every vertex calculation in parallel. | ||
return Execute(mulCS); | ||
} | ||
|
||
int main(int argc, char **argv) { | ||
if (argc != 3) { | ||
std::cerr << "usage: " << argv[0] << " numRows numCols\n"; | ||
return 1; | ||
} | ||
|
||
unsigned numRows = std::atoi(argv[1]); | ||
unsigned numCols = std::atoi(argv[2]); | ||
std::cout << "Multiplying matrix of size " << numRows << "x" << numCols | ||
<< " by vector of size " << numCols << "\n"; | ||
|
||
// Create the IPU model device | ||
IPUModel ipuModel; | ||
Device device = ipuModel.createDevice(); | ||
Target target = device.getTarget(); | ||
|
||
std::cout | ||
<< "Creating new graph object and compiling vertex program additions\n"; | ||
|
||
Graph graph(target); | ||
graph.addCodelets("matrix-mul-codelets.cpp"); | ||
|
||
std::cout << "Constructing full compute graph and control program\n"; | ||
|
||
// Create tensors in the graph to hold the input/output data. | ||
Tensor matrix = graph.addVariable(FLOAT, {numRows, numCols}, "matrix"); | ||
Tensor inputVector = graph.addVariable(FLOAT, {numCols}, "inputVector"); | ||
Tensor outputVector = graph.addVariable(FLOAT, {numRows}, "outputVector"); | ||
poputil::mapTensorLinearly(graph, matrix); | ||
poputil::mapTensorLinearly(graph, inputVector); | ||
poputil::mapTensorLinearly(graph, outputVector); | ||
|
||
// Create host buffers for the inputs and outputs and fill the inputs | ||
// with sample data. | ||
auto hMatrix = std::vector<float>(numRows * numCols); | ||
auto hInput = std::vector<float>(numCols); | ||
auto hOutput = std::vector<float>(numRows); | ||
|
||
for (unsigned col = 0; col < numCols; ++col) { | ||
hInput[col] = col; | ||
for (unsigned row = 0; row < numRows; ++row) { | ||
hMatrix[row * numCols + col] = row * col; | ||
} | ||
} | ||
|
||
// Create a device program to multiply two tensors together. | ||
auto mulProg = buildMultiplyProgram(graph, matrix, inputVector, outputVector); | ||
|
||
// Set up data streams to copy data in and out of graph | ||
auto inStreamV = graph.addHostToDeviceFIFO("inputVector", FLOAT, numCols); | ||
auto inStreamM = | ||
graph.addHostToDeviceFIFO("inputMatrix", FLOAT, numCols * numRows); | ||
auto outStream = graph.addDeviceToHostFIFO("out", FLOAT, numRows); | ||
|
||
// Create a program that copies data from the host buffers, multiplies | ||
// the result and copies the result back to the host. | ||
auto prog = Sequence({Copy(inStreamV, inputVector), Copy(inStreamM, matrix), | ||
mulProg, Copy(outputVector, outStream)}); | ||
|
||
// Create an engine from the compute graph and control program. | ||
Engine engine(graph, prog); | ||
engine.load(device); | ||
engine.connectStream("inputVector", hInput.data()); | ||
engine.connectStream("inputMatrix", hMatrix.data()); | ||
engine.connectStream("out", hOutput.data()); | ||
|
||
// Execute the program | ||
std::cout << "Running graph program to multiply matrix by vector\n"; | ||
engine.run(); | ||
|
||
// Check the results match what is expected. | ||
return checkResult(&hMatrix[0], &hInput[0], &hOutput[0], numRows, numCols); | ||
} |
Oops, something went wrong.