-
Notifications
You must be signed in to change notification settings - Fork 212
/
printing.slang
387 lines (340 loc) · 12.8 KB
/
printing.slang
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
// printing.slang
// This file provides the GPU code for a simple library that
// allows GPU shaders to print values to `stdout`.
//
// The implementation relies on a single buffer that must
// be bound to any shader that uses GPU printing.
//
RWStructuredBuffer<uint> gPrintBuffer;
//
// Encoding
// ========
//
// The print buffer is organized in terms of 32-bit (`uint`) *words*.
//
// The first word in the print buffer is used as an atomic
// counter, and must be initialized to zero before a shader starts.
// By atomically incrementing this counter, GPU threads can allocate
// space for printing commands in the buffer. All printing
// commands are stored after the first word (so, starting at
// an index of 1).
//
// A printing command starts with a single-word header, where
// the high 32 bits specify the *op* for the command, and the
// low 32 bits specify the number of *payload* words in in
// the command. The payload is the words that immediately
// follow the command header.
//
// Note that the header word for a command is *not* included
// in the count of words in the low 16 bits.
//
// The opcode values need to be shared between CPU and GPU
// code, so we use a bit of preprocessor trickery here to
// generate an `enum` type with all the opcodes.
//
enum PrintingOp
{
#define GPU_PRINTING_OP(NAME) NAME ,
#include "gpu-printing-ops.h"
};
// It is critical that when printing something, we allocate
// all the words it requires in the print buffer contiguously.
// For example, if the user writes:
//
// println("Thread number ", threadID, " has value ", someValue);
//
// It would be very bad if the output from different threads
// got interleaved, such that one cannot determine which value
// goes with which thread.
//
// Allocating individual print *commands* atomically is not necessarily
// enough: instead, we need to allocate the storage for all
// the commands that comprise a `print()` call at once.
//
// The core allocation operation here is `_allocatePrintWords()`
// Allocate space for one or more print commands.
//
uint _allocatePrintWords(uint wordCount)
{
// We allocate the required number of words with an atomic, and
// get back the old value of the counter, which tells us the
// offset at which the words for our printing operation should start.
//
uint wordOffset = 0;
InterlockedAdd(gPrintBuffer[0], wordCount, wordOffset);
// Because the first word of the buffer is reserved for the counter,
// and the counter value starts at zero, we need to add one to
// get to the actual offset for the data to be written.
//
return wordOffset + 1;
}
// Java-style `println`
// ====================
//
// We will start by building up a Java-style `println()` function
// that accepts zero or more values to print, and prints them
// atomically (without any other thread being able to interleave
// in the printed output), followed by a newline.
//
// We will define a wrapper around `_allocatePrintWords()`
// that captures the main idiom for `println()`.
//
uint _beginPrintln(uint wordCount)
{
// The `wordCount` passed in will represent the
// number of words required for the arguments
// to `println`, but won't include the terminating
// newline.
//
// Thus we will allocate one extra word to allow
// us to append a newline to the print command we
// generate.
//
uint wordOffset = _allocatePrintWords(wordCount + 1);
//
// We will then initialize the last word of the command
// that was allocated to a `NewLine` command.
//
gPrintBuffer[wordOffset + wordCount] = uint(PrintingOp.NewLine) << 16;
return wordOffset;
}
//
// With the `_beginPrintLn()` function handling all the heavy-lifting,
// we can define a zero-argument `println()` trivially.
//
void println()
{
_beginPrintln(0);
}
// We could continue to build a family of overloaded `println()` functions, like:
//
// void println();
// void println(int value);
// void println(float value);
// void println(uint value);
// ...
//
// but it should be clear that this approach doesn't scale at all
// to functions with multiple argumenst:
//
// void println(int a, int b);
// void println(float a, int b);
// void println(int a, float b);
// ...
//
// Using the features of the Slang language, we can build a framework
// for a more scalable solution.
//
// We start by defining an `interface` that captures the essence
// of what a type of printable values needs to support.
//
interface IPrintable
{
// Every printable value needs to be able to compute the number
// of words required to write it into the print buffer.
//
uint getPrintWordCount();
// A printable value must also support writing those words into
// a buffer, once the appropriate offset to write to is known.
//
void writePrintWords(RWStructuredBuffer<uint> buffer, uint offset);
};
// With the `IPrintable` interface in place, we can now write
// a generic one-argument `println()` that works with any
// printable value.
void println<T : IPrintable>(T value)
{
// In order to print a value we first compute the number of words
// it needs in the print buffer.
//
uint wordCount = value.getPrintWordCount();
// Then we can use `_beginPrint()` to allocate those words and
// find the starting offset to write to.
//
uint wordOffset = _beginPrintln(wordCount);
// And finally we can ask the value to write itself into the
// buffer at the given offset.
//
value.writePrintWords(gPrintBuffer, wordOffset);
}
// Of course, in order to be able to print things with this `println()`
// operation, we need to have some types that implement `IPrintable`.
//
// In particular, we'd like to be able to print built-in types like
// `uint`, but we don't have access to the declaration of `uint`
// to be able to change it!
//
// It just so happens that another Slang feature, `extension`
// declarations, lets us extend a type with new methods *and*
// allows us to add new interface implementations to it.
//
// We can therefore making the exisint Slang `uint` type be
// printable.
extension uint : IPrintable // <-- Note: we are adding a conformance to `IPrintable here`
{
// Printing a `uint` uses up two words in the buffer
//
uint getPrintWordCount() { return 2; }
// Writing a command to print a `uint` is straightforward,
// given knowledge of our encoding.
//
void writePrintWords(RWStructuredBuffer<uint> buffer, uint offset)
{
buffer[offset++] = (uint(PrintingOp.UInt32) << 16) | 1;
buffer[offset++] = this;
}
}
extension String : IPrintable
{
uint getPrintWordCount() { return 2; }
void writePrintWords(RWStructuredBuffer<uint> buffer, uint offset)
{
buffer[offset++] = (uint(PrintingOp.String) << 16) | 1;
buffer[offset++] = getStringHash(this);
}
}
// Where generics and interfaces start to pay off is when we want
// to scale up to a two-argument `println()` function that can
// work for any combination of printable types.
// Print two values, `a` and `b`.
//
// This function ensures that the values of `a` and `b`
// are written out atomically, without values printed
// from other threads spliced in between.
//
void println<A : IPrintable, B : IPrintable>(A a, B b)
{
// To print two values atomically, we must first
// allocate the total number of words that are
// required to print the values.
//
uint wordCount = 0;
uint aCount = a.getPrintWordCount(); wordCount += aCount;
uint bCount = b.getPrintWordCount(); wordCount += bCount;
// Then we can allocate those words atomically
// with a single `_beginPrint()`.
//
uint wordOffset = _beginPrintln(wordCount);
// Finally, we can write the words for each of `a`
// and `b` to an appropriate offset in the print buffer,
// without having to worry about other threads inserting
// print commands between them.
//
a.writePrintWords(gPrintBuffer, wordOffset); wordOffset += aCount;
b.writePrintWords(gPrintBuffer, wordOffset); wordOffset += bCount;
}
// We can then continue to build up to `println()` functions with
// three or more arguments.
void println<A : IPrintable, B : IPrintable, C : IPrintable>(
A a, B b, C c)
{
uint wordCount = 0;
uint aCount = a.getPrintWordCount(); wordCount += aCount;
uint bCount = b.getPrintWordCount(); wordCount += bCount;
uint cCount = c.getPrintWordCount(); wordCount += cCount;
uint wordOffset = _beginPrintln(wordCount);
a.writePrintWords(gPrintBuffer, wordOffset); wordOffset += aCount;
b.writePrintWords(gPrintBuffer, wordOffset); wordOffset += bCount;
c.writePrintWords(gPrintBuffer, wordOffset); wordOffset += cCount;
}
// Further generalizing to four or more arguments is straightforward but tedious.
//
// A future version of Slang may support variadic functions, variadic generics,
// or some other facilities to make writing code like this easier.
// An important benefit of the approach we have taken here with an `IPrintable`
// interface is that arbitrary user-defined types can implement `IPrintable`
// and will work correctly with the existing `println()` definitions in
// this file.
// C-style `printf()`
// ==================
//
// Many developers who use C/C++ would prefer to be able to use traditional
// `printf()` with format strings. `printf`-based printing tends to be
// more readable than `println`-style alternatives, but comes at the cost
// of only supported a more restricted set of types for printing.
//
// Similar to the `println()` case, our Slang implementation of `printf()`
// starts with an allocation function that does the behind-the-scenes
// work.
//
// Note: We use the name `printf_` here because `printf` clashes with
// HLSL's printf.
//
uint _beginPrintf(String format, uint wordCount)
{
// A printf command will start with the usual command header word,
// along with a word for the (hashed) format string. These
// two header words will be followed by the user-provided payload
// words for all the format arguments.
//
uint wordOffset = _allocatePrintWords(wordCount + 2);
gPrintBuffer[wordOffset++] = (uint(PrintingOp.PrintF) << 16) | (wordCount+1);
gPrintBuffer[wordOffset++] = getStringHash(format);
return wordOffset;
}
// Now we will define an interface for types that are allowed to
// appear as format arguments to `printf()`.
interface IPrintf
{
// A `printf()` format argument must know how many words it encodes into
uint getPrintfWordCount();
// A `printf()` format argument must know how to encode itself
void writePrintfWords(RWStructuredBuffer<uint> buffer, uint offset);
};
// The extension to make `uint` compatible with `printf()` is straightforward.
extension uint : IPrintf
{
// A `uint` only consumes one word in the variadic payload.
//
// Note: unlike the case for `IPrintable` above, the encoding
// for format args for `printf()` doesn't include type information.
//
uint getPrintfWordCount() { return 1; }
// Writing the required data to the payload for `printf()` is simple
void writePrintfWords(RWStructuredBuffer<uint> buffer, uint offset)
{
buffer[offset++] = this;
}
}
extension String : IPrintf
{
uint getPrintfWordCount() { return 1; }
void writePrintfWords(RWStructuredBuffer<uint> buffer, uint offset)
{
buffer[offset++] = getStringHash(this);
}
}
// A `printf()` with no format arguments can just call back to `_beginPrintf()`
void printf_(String format)
{
_beginPrintf(format, 0);
}
// The `printf()` cases with one or more format arguments are all quite similar.
void printf_<A : IPrintf>(String format, A a)
{
// We need to compute the words required by each format argument
// and sum them up.
//
uint wordCount = 0;
uint aCount = a.getPrintfWordCount(); wordCount += aCount;
// We need to allocate a `printf()` command in the buffer with
// the required number of words for format argument payload.
//
uint wordOffset = _beginPrintf(format, wordCount);
// We need to write each format argument to the appropriate offset
// in the payload part of the `printf()` command.
//
a.writePrintfWords(gPrintBuffer, wordOffset); wordOffset += aCount;
}
void printf_<A : IPrintf, B : IPrintf>(String format, A a, B b)
{
uint wordCount = 0;
uint aCount = a.getPrintfWordCount(); wordCount += aCount;
uint bCount = b.getPrintfWordCount(); wordCount += bCount;
uint wordOffset = _beginPrintf(format, wordCount);
a.writePrintfWords(gPrintBuffer, wordOffset); wordOffset += aCount;
b.writePrintfWords(gPrintBuffer, wordOffset); wordOffset += bCount;
}
// Extending this `printf()` implementation to handle more format arguments
// is straightforward, but tedious. Future versions of Slang might add
// support for variadic generics, which could make this code more compact.