Skip to content

Commit

Permalink
Merge pull request #54 from mn416/explicit-vpm
Browse files Browse the repository at this point in the history
Support explicit VPM and DMA
  • Loading branch information
mn416 authored Jul 15, 2018
2 parents e5e585b + d7c0d9c commit 92218a9
Show file tree
Hide file tree
Showing 30 changed files with 1,247 additions and 594 deletions.
51 changes: 51 additions & 0 deletions Examples/DMA.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
#include "QPULib.h"

using namespace QPULib;

void dma(Ptr<Int> p)
{
// Setup load of 16 vectors into VPM, starting at word address 0
dmaSetReadPitch(64);
dmaSetupRead(HORIZ, 16, 0);
// Start loading from memory at address 'p'
dmaStartRead(p);
// Wait until load complete
dmaWaitRead();

// Setup load of 16 vectors from VPM, starting at vector address 0
vpmSetupRead(HORIZ, 16, 0);
// Setup store to VPM, starting at vector address 16
vpmSetupWrite(HORIZ, 16);

// Read each vector, increment it, and write it back
for (int i = 0; i < 16; i++)
vpmPut(vpmGetInt() + 1);

// Setup store of 16 vectors into VPM, starting at word address 256
dmaSetupWrite(HORIZ, 16, 256);
// Start writing to memory at address 'p'
dmaStartWrite(p);
// Wait until store complete
dmaWaitWrite();
}

int main()
{
// Construct kernel
auto k = compile(dma);

// Allocate and initialise array shared between ARM and GPU
SharedArray<int> array(256);
for (int i = 0; i < 256; i++)
array[i] = i;

// Invoke the kernel and display the result
k(&array);
for (int i = 0; i < 16; i++) {
for (int j = 0; j < 16; j++)
printf("%i ", array[16*i + j]);
printf("\n");
}

return 0;
}
11 changes: 4 additions & 7 deletions Examples/OET.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,8 @@ using namespace QPULib;

void oet(Ptr<Int> p)
{
setReadStride(1);
setWriteStride(1);

Int evens = *p;
Int odds = *(p+1);
Int odds = *(p+16);

For (Int count = 0, count < 16, count++)
Int evens2 = min(evens, odds);
Expand All @@ -31,8 +28,8 @@ void oet(Ptr<Int> p)
odds = odds2;
End

*p = evens;
*(p+1) = odds;
*p = evens;
*(p+16) = odds;
}

int main()
Expand All @@ -48,7 +45,7 @@ int main()
// Invoke the kernel and display the result
k.call(&a);
for (int i = 0; i < 32; i++)
printf("%i: %i\n", i, a[i]);
printf("%i: %i\n", i, (i & 1) ? a[16+(i>>1)] : a[i>>1]);

return 0;
}
21 changes: 21 additions & 0 deletions Lib/Common/Queue.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#ifndef _QUEUE_H_
#define _QUEUE_H_

namespace QPULib {

// Very simple queue containing N elements of type T
template <int N, typename T> struct Queue {
T elems[N+1];
int front;
int back;
Queue() { front = back = 0; }
bool isEmpty() { return front == back; }
bool isFull() { return ((back+1)%(N+1)) == front; }
void enq(T elem) { elems[back] = elem; back = (back+1)%(N+1); }
T* first() { return &elems[front]; }
void deq() { front = (front+1)%(N+1); }
};

}

#endif
1 change: 1 addition & 0 deletions Lib/Kernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include "Target/Satisfy.h"
#include "Target/LoadStore.h"
#include "Target/Encode.h"
#include "Target/Pretty.h"

namespace QPULib {

Expand Down
2 changes: 1 addition & 1 deletion Lib/Kernel.h
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ template <typename... ts> struct Kernel {
resetFreshLabelGen();

// Reserved general-purpose variables
Int qpuId, qpuCount, readStride, writeStride;
Int qpuId, qpuCount;
qpuId = getUniformInt();
qpuCount = getUniformInt();

Expand Down
9 changes: 9 additions & 0 deletions Lib/Source/Float.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,15 @@ FloatExpr getUniformFloat()
return mkFloatExpr(e);
}

// Read vector from VPM
FloatExpr vpmGetFloat()
{
Expr* e = mkExpr();
e->tag = VAR;
e->var.tag = VPM_READ;
return mkFloatExpr(e);
}

// Add
FloatExpr operator+(FloatExpr a, FloatExpr b)
{ return mkFloatApply(a, mkOp(ADD, FLOAT), b); }
Expand Down
1 change: 1 addition & 0 deletions Lib/Source/Float.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ struct Float {
// ============================================================================

FloatExpr getUniformFloat();
FloatExpr vpmGetFloat();

FloatExpr operator+(FloatExpr a, FloatExpr b);
FloatExpr operator-(FloatExpr a, FloatExpr b);
Expand Down
9 changes: 9 additions & 0 deletions Lib/Source/Int.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,15 @@ IntExpr numQPUs()
return mkIntExpr(e);
}

// Read vector from VPM
IntExpr vpmGetInt()
{
Expr* e = mkExpr();
e->tag = VAR;
e->var.tag = VPM_READ;
return mkIntExpr(e);
}

// Vector rotation
IntExpr rotate(IntExpr a, IntExpr b)
{ return mkIntApply(a, mkOp(ROTATE, INT32), b); }
Expand Down
1 change: 1 addition & 0 deletions Lib/Source/Int.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ IntExpr getUniformInt();
IntExpr index();
IntExpr me();
IntExpr numQPUs();
IntExpr vpmGetInt();

IntExpr rotate(IntExpr a, IntExpr b);
FloatExpr rotate(FloatExpr a, IntExpr b);
Expand Down
31 changes: 26 additions & 5 deletions Lib/Source/Interpreter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,11 @@ Vec evalVar(CoreState* s, Var v)
return x;
}

// VPM read
case VPM_READ:
printf("QPULib: vpmGet() not supported by interpreter\n");
break;

default:
printf("QPULib: reading from write-only variable\n");
}
Expand Down Expand Up @@ -145,7 +150,7 @@ Vec eval(CoreState* s, Expr* e)
Vec v;
for (int i = 0; i < NUM_LANES; i++) {
v.elems[i].intVal = emuHeap[hp>>2];
hp += 4*(s->readStride+1);
hp += s->readStride;
}
return v;
}
Expand Down Expand Up @@ -291,6 +296,11 @@ void assignToVar(CoreState* s, Vec cond, Var v, Vec x)
return;
}

// VPM write
case VPM_WRITE:
printf("QPULib: vpmPut() not supported by interpreter\n");
break;

// Others are read-only
case UNIFORM:
case QPU_NUM:
Expand Down Expand Up @@ -322,7 +332,7 @@ void execAssign(CoreState* s, Vec cond, Expr* lhs, Expr* rhs)
int hp = index.elems[0].intVal;
for (int i = 0; i < NUM_LANES; i++) {
emuHeap[hp>>2] = val.elems[i].intVal;
hp += 4*(s->writeStride+1);
hp += 4 + s->writeStride;
}
return;
}
Expand Down Expand Up @@ -464,7 +474,7 @@ void execStoreRequest(CoreState* s, Expr* data, Expr* addr) {
int hp = index.elems[0].intVal;
for (int i = 0; i < NUM_LANES; i++) {
emuHeap[hp>>2] = val.elems[i].intVal;
hp += 4*(s->writeStride+1);
hp += 4 + s->writeStride;
}
}

Expand Down Expand Up @@ -565,8 +575,19 @@ void exec(InterpreterState* state, CoreState* s)
else state->sema[stmt->semaId]--;
return;

// Flush outstanding stores
case FLUSH: return;
case DMA_READ_WAIT:
case DMA_WRITE_WAIT:
case SETUP_VPM_READ:
case SETUP_VPM_WRITE:
case SETUP_DMA_READ:
case SETUP_DMA_WRITE:
// Interpreter ignores these
return;

case DMA_START_READ:
case DMA_START_WRITE:
printf("QPULib: DMA access not supported by interpreter\n");
break;
}

// Unreachable
Expand Down
86 changes: 78 additions & 8 deletions Lib/Source/Pretty.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,10 @@ void pretty(FILE *f, Expr* e)
fprintf(f, "QPU_NUM");
else if (e->var.tag == ELEM_NUM)
fprintf(f, "ELEM_NUM");
else if (e->var.tag == VPM_READ)
fprintf(f, "VPM_READ");
else if (e->var.tag == VPM_WRITE)
fprintf(f, "VPM_WRITE");
else if (e->var.tag == TMU0_ADDR)
fprintf(f, "TMU0_ADDR");
break;
Expand Down Expand Up @@ -264,15 +268,15 @@ void pretty(FILE *f, int indent, Stmt* s)
// Set read stride
case SET_READ_STRIDE:
indentBy(f, indent);
fprintf(f, "setReadStride(");
fprintf(f, "dmaSetReadPitch(");
pretty(f, s->stride);
fprintf(f, ")\n");
break;

// Set write stride
case SET_WRITE_STRIDE:
indentBy(f, indent);
fprintf(f, "setWriteStride(");
fprintf(f, "dmaSetWriteStride(");
pretty(f, s->stride);
fprintf(f, ")\n");
break;
Expand All @@ -295,12 +299,6 @@ void pretty(FILE *f, int indent, Stmt* s)
fprintf(f, ")\n");
break;

// Flush outstanding stores
case FLUSH:
indentBy(f, indent);
fprintf(f, "flush()\n");
break;

// Increment semaphore
case SEMA_INC:
indentBy(f, indent);
Expand All @@ -319,6 +317,78 @@ void pretty(FILE *f, int indent, Stmt* s)
fprintf(f, "hostIRQ()\n");
break;

// Setup VPM Read
case SETUP_VPM_READ:
indentBy(f, indent);
fprintf(f, "vpmSetupRead(");
fprintf(f, "numVecs=%i, ", s->setupVPMRead.numVecs);
fprintf(f, "dir=%s,", s->setupVPMRead.hor ? "HOR" : "VIR");
fprintf(f, "stride=%i,", s->setupVPMRead.stride);
pretty(f, s->setupVPMRead.addr);
fprintf(f, ");\n");
break;

// Setup VPM Write
case SETUP_VPM_WRITE:
indentBy(f, indent);
fprintf(f, "vpmSetupWrite(");
fprintf(f, "dir=%s,", s->setupVPMWrite.hor ? "HOR" : "VIR");
fprintf(f, "stride=%i,", s->setupVPMWrite.stride);
pretty(f, s->setupVPMWrite.addr);
fprintf(f, ");\n");
break;

// DMA read wait
case DMA_READ_WAIT:
indentBy(f, indent);
fprintf(f, "dmaReadWait();\n");
break;

// DMA write wait
case DMA_WRITE_WAIT:
indentBy(f, indent);
fprintf(f, "dmaWriteWait();\n");
break;

// DMA start read
case DMA_START_READ:
indentBy(f, indent);
fprintf(f, "dmaStartRead(");
pretty(f, s->startDMARead);
fprintf(f, ");\n");
break;

// DMA start write
case DMA_START_WRITE:
indentBy(f, indent);
fprintf(f, "dmaStartWrite(");
pretty(f, s->startDMAWrite);
fprintf(f, ");\n");
break;

// DMA read setup
case SETUP_DMA_READ:
indentBy(f, indent);
fprintf(f, "dmaSetupRead(");
fprintf(f, "numRows=%i,", s->setupDMARead.numRows);
fprintf(f, "rowLen=%i,", s->setupDMARead.rowLen);
fprintf(f, "dir=%s,", s->setupDMARead.hor ? "HORIZ" : "VERT");
fprintf(f, "vpitch=%i,", s->setupDMARead.vpitch);
pretty(f, s->setupDMARead.vpmAddr);
fprintf(f, ");\n");
break;

// DMA write setup
case SETUP_DMA_WRITE:
indentBy(f, indent);
fprintf(f, "dmaSetupWrite(");
fprintf(f, "numRows=%i,", s->setupDMAWrite.numRows);
fprintf(f, "rowLen=%i,", s->setupDMAWrite.rowLen);
fprintf(f, "dir=%s,", s->setupDMAWrite.hor ? "HORIZ" : "VERT");
pretty(f, s->setupDMAWrite.vpmAddr);
fprintf(f, ");\n");
break;

// Not reachable
default:
assert(false);
Expand Down
Loading

0 comments on commit 92218a9

Please sign in to comment.